Skip to content
Snippets Groups Projects
finddupes.c 5.15 KiB
Newer Older
/* finddupes.c */

/* Search for (and optionally delete) duplicate files in multiple	*/
/* directories based on size and either MD5 or CRC-32 "chksums".	*/

#include <stdio.h>
#include <time.h>

#include "dirwrap.h"
#ifdef USE_MD5
	#include "md5.h"
	char path[MAX_PATH + 1];
	long length;
	time_t date;
	BYTE chksum[MD5_DIGEST_SIZE];
ulong   file_count = 0;

int fchksum(const char* fname, long length,
#ifdef USE_MD5
	if ((fp = fopen(fname, "rb")) == NULL) {

	if (length && (buf = malloc(length)) == NULL) {
		printf("!Error allocating %ld bytes of memory for %s\n"
	if (fread(buf, sizeof(BYTE), length, fp) != length) {
		FREE_AND_NULL(buf);
	}

	fclose(fp);
#ifdef USE_MD5
	MD5_calc(chksum, buf, length);
#else
	*chksum = crc32(buf, length);
#endif
	FREE_AND_NULL(buf);
	char*  p;
	time_t t = time(NULL);
	p = ctime(&t);
	p[19] = 0;        /* chop off year and \n */
	return p + 4;    /* skip day-of-week */
int searchdir(const char* path, BOOL recursive, ulong compare_bytes)
{
	file_t*        fp;
	char           fpath[MAX_PATH + 1];
	printf("%s begin searching %s\n", timestr(), path);
	if ((dir = opendir(path)) == NULL) {
	while ((ent = readdir(dir)) != NULL) {
		if (kbhit())
		if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
		strcpy(fpath, path);
		strcat(fpath, ent->d_name);
		if (isdir(fpath)) {
			if (recursive)
				searchdir(fpath, recursive, compare_bytes);
			continue;
		}

		file = realloc(file, sizeof(file_t) * (file_count + 1));
		if (file == NULL) {
			printf("!Error allocating %lu bytes\n", sizeof(file_t) * (file_count + 1));
		fp = &file[file_count];
		memset(fp, 0, sizeof(file_t));
		strcpy(fp->path, fpath);
		fp->date = fdate(fp->path);
		if ((fp->length = flength(fp->path)) == -1) {
			printf("!Failed to get length of %s\n", fp->path);
		if (compare_bytes && fp->length > compare_bytes)
		if (fchksum(fp->path, fp->length,
			continue;
		file_count++;
		printf("%lu\r", file_count);
	}

	closedir(dir);
	printf("%s done searching %s\n", timestr(), path);
}

int compare_files(const file_t *f1, const file_t *f2 )
{
	int result;
	/* Sort first by size (descending) */
	if ((result = f2->length - f1->length) != 0)
	if ((result = memcmp(&f1->chksum, &f2->chksum, sizeof(f1->chksum))) != 0)
	return f2->date - f1->date;
	ulong dupe_count = 0;
	ulong del_files = 0;
	ulong del_bytes = 0;
	ulong compare_bytes = 0;
	BOOL  recursive = FALSE;
	BOOL  del_dupes = FALSE;
	BOOL  dir_specified = FALSE;

	for (i = 1; i < argc; i++) {
		if (!stricmp(argv[i], "-d"))
			del_dupes = TRUE;
		else if (!stricmp(argv[i], "-r"))
			recursive = TRUE;
		else if (!stricmp(argv[i], "-b") && i < argc + 1)
			compare_bytes = atoi(argv[++i]);
		else if (!stricmp(argv[i], "-k") && i < argc + 1)
			compare_bytes = atoi(argv[++i]) * 1024;
		else if (argv[i][0] == '-') {
			printf("%s [[-opt] [-opt] [...]] [[path] [path] [...]]\n", argv[0]);
			printf("-r\t search directories recursively\n");
			printf("-d\t delete duplicate files found\n");
			printf("-b n\t compare up to n bytes of each file\n");
			printf("-k n\t compare up to n kilobytes each of file\n");
			exit(0);
		}
		else {
			dir_specified = TRUE;
			searchdir(argv[i], recursive, compare_bytes);
		}
	}
	if (!dir_specified)
		searchdir(".", recursive, compare_bytes);

	}

	printf("%s begin sorting (%lu files)\n", timestr(), file_count);
	qsort(file, file_count, sizeof(file_t), compare_files);
	printf("%s end sorting\n", timestr());

	printf("%s comparing (%lu files)\n", timestr(), file_count);

	for (i = 0; i < file_count - 1; i++) {
		if (file[i].length != file[i + 1].length)
		if (memcmp(&file[i].chksum, &file[i + 1].chksum, sizeof(file[i].chksum)))
			continue; /* chksums must match */
#ifdef USE_MD5
		MD5_hex(hex, file[i].chksum);
#else
		sprintf(hex, "%08lx", file[i].chksum);
#endif
		printf("Dupe: %s %7lu %s\n", hex, file[i].length, getfname(file[i].path));
		if (del_dupes) {
			fsize = flength(file[i].path);
			printf("Removing %s (%lu bytes)\n", file[i].path, fsize);
			if (remove(file[i].path) != 0)
				del_bytes += fsize;
			}
		}
		dupe_count++;
	}

	printf("%s done (%lu duplicates found)\n", timestr(), dupe_count);
		printf("%lu bytes deleted in %lu files\n", del_bytes, del_files);