smbhash.c 11.2 KB
Newer Older
1 2 3 4 5 6
/* Synchronet message base (SMB) hash-related functions */

/****************************************************************************
 * @format.tab-size 4		(Plain Text/Source Code File Header)			*
 * @format.use-tabs true	(see http://www.synchro.net/ptsc_hdr.html)		*
 *																			*
7
 * Copyright Rob Swindell - http://www.synchro.net/copyright.html			*
8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *																			*
 * This library is free software; you can redistribute it and/or			*
 * modify it under the terms of the GNU Lesser General Public License		*
 * as published by the Free Software Foundation; either version 2			*
 * of the License, or (at your option) any later version.					*
 * See the GNU Lesser General Public License for more details: lgpl.txt or	*
 * http://www.fsf.org/copyleft/lesser.html									*
 *																			*
 * For Synchronet coding style and modification guidelines, see				*
 * http://www.synchro.net/source.html										*
 *																			*
 * Note: If this box doesn't appear square, then you need to fix your tabs.	*
 ****************************************************************************/

22 23
#include <time.h>		/* time()	*/
#include <string.h>		/* strdup() */
deuce's avatar
deuce committed
24
#include <ctype.h>		/* isspace()*/
25 26
#include "smblib.h"
#include "md5.h"
27
#include "sha1.h"
28 29 30 31
#include "crc16.h"
#include "crc32.h"
#include "genwrap.h"

32
/* If return value is SMB_ERR_NOT_FOUND, hash file is left open */
33
int smb_findhash(smb_t* smb, hash_t** compare, hash_t* found_hash, 
34
						 long source_mask, BOOL mark)
35 36 37 38 39 40 41 42 43 44 45 46 47 48
{
	int		retval;
	BOOL	found=FALSE;
	size_t	c,count;
	hash_t	hash;

	if(found_hash!=NULL)
		memset(found_hash,0,sizeof(hash_t));

	if((retval=smb_open_hash(smb))!=SMB_SUCCESS)
		return(retval);

	COUNT_LIST_ITEMS(compare, count);

49
	if(count && source_mask!=SMB_HASH_SOURCE_NONE) {
50 51

		rewind(smb->hash_fp);
52
		clearerr(smb->hash_fp);
53 54 55 56 57 58 59
		while(!feof(smb->hash_fp)) {
			if(smb_fread(smb,&hash,sizeof(hash),smb->hash_fp)!=sizeof(hash))
				break;

			if(hash.flags==0)
				continue;		/* invalid hash record (!?) */

60 61 62
			if((source_mask&(1<<hash.source))==0)	/* not checking this source type */
				continue;

63 64 65 66
			for(c=0;compare[c]!=NULL;c++) {

				if(compare[c]->source!=hash.source)
					continue;	/* wrong source */
67 68
				if(compare[c]->length!=hash.length)
					continue;	/* wrong source length */
69 70
				if(compare[c]->flags&SMB_HASH_MARKED)
					continue;	/* already marked */
71
				if((compare[c]->flags&SMB_HASH_PROC_COMP_MASK)!=(hash.flags&SMB_HASH_PROC_COMP_MASK))
72 73 74
					continue;	/* wrong pre-process flags */
				if((compare[c]->flags&hash.flags&SMB_HASH_MASK)==0)	
					continue;	/* no matching hashes */
75
				if((compare[c]->flags&hash.flags&SMB_HASH_CRC16)
76
					&& compare[c]->data.crc16!=hash.data.crc16)
77
					continue;	/* wrong crc-16 */
78
				if((compare[c]->flags&hash.flags&SMB_HASH_CRC32)
79
					&& compare[c]->data.crc32!=hash.data.crc32)
80
					continue;	/* wrong crc-32 */
81
				if((compare[c]->flags&hash.flags&SMB_HASH_MD5)
82
					&& memcmp(compare[c]->data.md5,hash.data.md5,sizeof(hash.data.md5)))
83
					continue;	/* wrong MD5 */
84 85 86 87
				if((compare[c]->flags&hash.flags&SMB_HASH_SHA1)
					&& memcmp(compare[c]->data.sha1,hash.data.sha1,sizeof(hash.data.sha1)))
					continue;	/* wrong SHA1 */

88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
				/* successful match! */
				break;	/* can't match more than one, so stop comparing */
			}

			if(compare[c]==NULL)
				continue;	/* no match */

			found=TRUE;

			if(found_hash!=NULL)
				memcpy(found_hash,&hash,sizeof(hash));

			if(!mark)
				break;

			compare[c]->flags|=SMB_HASH_MARKED;
		}
		if(found) {
			smb_close_hash(smb);
			return(SMB_SUCCESS);
		}
	}

	/* hash file left open */
	return(SMB_ERR_NOT_FOUND);
}

115
int smb_addhashes(smb_t* smb, hash_t** hashes, BOOL skip_marked)
116 117 118 119 120
{
	int		retval;
	size_t	h;

	COUNT_LIST_ITEMS(hashes, h);
rswindell's avatar
rswindell committed
121 122
	if(!h) { /* nothing to add */
		smb_close_hash(smb);
123
		return(SMB_SUCCESS);
rswindell's avatar
rswindell committed
124
	}
125 126 127 128 129 130 131 132 133 134 135 136 137

	if((retval=smb_open_hash(smb))!=SMB_SUCCESS)
		return(retval);

	fseek(smb->hash_fp,0,SEEK_END);

	for(h=0;hashes[h]!=NULL;h++) {

		/* skip hashes marked by smb_findhash() */
		if(skip_marked && hashes[h]->flags&SMB_HASH_MARKED)	
			continue;	
	
		/* can't think of any reason to strip SMB_HASH_MARKED flag right now */
138 139 140 141
		if(smb_fwrite(smb,hashes[h],sizeof(hash_t),smb->hash_fp)!=sizeof(hash_t)) {
			retval=SMB_ERR_WRITE;
			break;
		}
142 143 144 145
	}

	smb_close_hash(smb);

146
	return(retval);
147 148
}

149
static char* strip_chars(uchar* dst, const uchar* src, uchar* set)
150
{
151
	while(*src) {
deuce's avatar
deuce committed
152
		if(strchr((char *)set,*src)==NULL)
153
			*(dst++)=*src;
154
		src++;
155 156
	}
	*dst=0;
157

deuce's avatar
deuce committed
158
	return((char *)dst);
159 160
}

161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
static char* strip_ctrla(uchar* dst, const uchar* src)
{
	while(*src) {
		if(*src==CTRL_A) {
			src++;
			if(*src)
				src++;
		}
		else
			*(dst++)=*(src++);
	}
	*dst=0;

	return((char *)dst);
}

177 178
/* Allocates and calculates hashes of data (based on flags)					*/
/* Returns NULL on failure													*/
179
hash_t* smb_hash(ulong msgnum, uint32_t t, unsigned source, unsigned flags
180 181 182 183
						 ,const void* data, size_t length)
{
	hash_t*	hash;

184 185 186
	if(length==0)		/* Don't hash 0-length sources (e.g. empty/blank message bodies) */
		return(NULL);

187 188 189
	if((hash=(hash_t*)malloc(sizeof(hash_t)))==NULL)
		return(NULL);

190
	memset(hash,0,sizeof(hash_t));
191 192
	hash->number=msgnum;
	hash->time=t;
193
	hash->length=length;
194 195 196
	hash->source=source;
	hash->flags=flags;
	if(flags&SMB_HASH_CRC16)
197
		hash->data.crc16=crc16((char*)data,length);
198
	if(flags&SMB_HASH_CRC32)
199
		hash->data.crc32=crc32((char*)data,length);
200
	if(flags&SMB_HASH_MD5)
201 202 203
		MD5_calc(hash->data.md5,data,length);
	if(flags&SMB_HASH_SHA1)
		SHA1_calc(hash->data.sha1, data, length);
204 205 206 207 208 209 210

	return(hash);
}

/* Allocates and calculates hashes of data (based on flags)					*/
/* Supports string hash "pre-processing" (e.g. lowercase, strip whitespace)	*/
/* Returns NULL on failure													*/
211
hash_t* smb_hashstr(ulong msgnum, uint32_t t, unsigned source, unsigned flags
212 213
							,const char* str)
{
deuce's avatar
deuce committed
214
	char*	p=NULL;
215 216 217 218 219
	hash_t*	hash;

	if(flags&SMB_HASH_PROC_MASK) {	/* string pre-processing */
		if((p=strdup(str))==NULL)
			return(NULL);
220
		if(flags&SMB_HASH_STRIP_CTRL_A)
deuce's avatar
deuce committed
221
			strip_ctrla((uchar *)p,(uchar *)p);
222
		if(flags&SMB_HASH_STRIP_WSP)
deuce's avatar
deuce committed
223
			strip_chars((uchar *)p,(uchar *)p,(uchar *)" \t\r\n");
224 225 226 227
		if(flags&SMB_HASH_LOWERCASE)
			strlwr(p);
	}

228 229 230 231 232
	if(p!=NULL) {
		hash=smb_hash(msgnum, t, source, flags, p, strlen(p));
		free(p);
	} else
		hash=smb_hash(msgnum, t, source, flags, str, strlen(str));
233 234 235 236

	return(hash);
}

237
/* Allocates and calculates all hashes for a single message					*/
238
/* Returns NULL on failure													*/
239
hash_t** smb_msghashes(smbmsg_t* msg, const uchar* body, long source_mask)
240 241
{
	size_t		h=0;
242
	uchar		flags=SMB_HASH_CRC16|SMB_HASH_CRC32|SMB_HASH_MD5|SMB_HASH_SHA1;
243 244 245 246
	hash_t**	hashes;	/* This is a NULL-terminated list of hashes */
	hash_t*		hash;
	time_t		t=time(NULL);

247
	if((hashes=(hash_t**)malloc(sizeof(hash_t*)*(SMB_HASH_SOURCE_TYPES+1)))==NULL)
248 249
		return(NULL);

250
	memset(hashes, 0, sizeof(hash_t*)*(SMB_HASH_SOURCE_TYPES+1));
251

252
	if(msg->id!=NULL && (source_mask&(1<<SMB_HASH_SOURCE_MSG_ID)) &&
rswindell's avatar
rswindell committed
253
		(hash=smb_hashstr(msg->hdr.number, (uint32_t)t, SMB_HASH_SOURCE_MSG_ID, flags, msg->id))!=NULL)
254 255
		hashes[h++]=hash;

256
	if(msg->ftn_msgid!=NULL	&& (source_mask&(1<<SMB_HASH_SOURCE_FTN_ID)) &&
rswindell's avatar
rswindell committed
257
		(hash=smb_hashstr(msg->hdr.number, (uint32_t)t, SMB_HASH_SOURCE_FTN_ID, flags, msg->ftn_msgid))!=NULL)
258 259
		hashes[h++]=hash;

260
	if(body!=NULL && (source_mask&(1<<SMB_HASH_SOURCE_BODY)) &&
deuce's avatar
deuce committed
261
		(hash=smb_hashstr(msg->hdr.number, (uint32_t)t, SMB_HASH_SOURCE_BODY, flags|SMB_HASH_STRIP_WSP|SMB_HASH_STRIP_CTRL_A, (const char *)body))!=NULL)
262 263
		hashes[h++]=hash;

264 265 266 267 268 269 270 271 272 273 274 275
	if(msg->subj!=NULL && (source_mask&(1<<SMB_HASH_SOURCE_SUBJECT))) {
		char*	p=msg->subj;
		while(*p) {
			char* tp=strchr(p,':');
			char* sp=strchr(p,' ');
			if(tp!=NULL && (sp==NULL || tp<sp)) {
				p=tp+1;
				SKIP_WHITESPACE(p);
				continue;
			}
			break;
		}
rswindell's avatar
rswindell committed
276
		if((hash=smb_hashstr(msg->hdr.number, (uint32_t)t, SMB_HASH_SOURCE_SUBJECT, flags, p))!=NULL)
277 278
			hashes[h++]=hash;
	}
rswindell's avatar
rswindell committed
279

280 281 282
	return(hashes);
}

283
void smb_freehashes(hash_t** hashes)
284 285 286 287 288 289
{
	size_t		n;

	FREE_LIST(hashes,n);
}

290
/* Calculates and stores the hashes for a single message					*/
291
int smb_hashmsg(smb_t* smb, smbmsg_t* msg, const uchar* text, BOOL update)
292 293 294 295 296 297
{
	size_t		n;
	int			retval=SMB_SUCCESS;
	hash_t		found;
	hash_t**	hashes;	/* This is a NULL-terminated list of hashes */

298
	if(smb->status.attr&(SMB_EMAIL | SMB_NOHASH | SMB_FILE_DIRECTORY))
299 300
		return(SMB_SUCCESS);

301
	hashes=smb_msghashes(msg,text,SMB_HASH_SOURCE_DUPE);
302

303
	if(smb_findhash(smb, hashes, &found, SMB_HASH_SOURCE_DUPE, update)==SMB_SUCCESS && !update) {
304 305
		retval=SMB_DUPE_MSG;
		safe_snprintf(smb->last_error,sizeof(smb->last_error)
306
			,"%s duplicate %s: %s found in message #%lu", __FUNCTION__
307 308
			,smb_hashsourcetype(found.source)
			,smb_hashsource(msg,found.source)
309
			,(ulong)found.number);
310 311 312 313 314 315 316 317 318 319
	} else
		if((retval=smb_addhashes(smb,hashes,/* skip_marked? */TRUE))==SMB_SUCCESS)
			msg->flags|=MSG_FLAG_HASHED;

	FREE_LIST(hashes,n);

	return(retval);
}

/* length=0 specifies ASCIIZ data											*/
320
int smb_getmsgidx_by_hash(smb_t* smb, smbmsg_t* msg, unsigned source
321 322 323
								 ,unsigned flags, const void* data, size_t length)
{
	int			retval;
324
	size_t		n=2;
325 326 327
	hash_t**	hashes;
	hash_t		found;

328
	if((hashes=(hash_t**)calloc(n, sizeof(hash_t*)))==NULL)
329 330 331 332 333 334
		return(SMB_ERR_MEM);

	if(length==0)
		hashes[0]=smb_hashstr(0,0,source,flags,data);
	else
		hashes[0]=smb_hash(0,0,source,flags,data,length);
335 336
	if(hashes[0]==NULL) {
		FREE_LIST(hashes,n);
337
		return(SMB_ERR_MEM);
338
	}
339

340
	if((retval=smb_findhash(smb, hashes, &found, 1<<source, FALSE))==SMB_SUCCESS) {
341 342 343 344 345 346 347 348 349 350 351 352 353
		if(found.number==0)
			retval=SMB_FAILURE;	/* use better error value here? */
		else {
			msg->hdr.number=found.number;
			retval=smb_getmsgidx(smb, msg);
		}
	}

	FREE_LIST(hashes,n);

	return(retval);
}

354
int smb_getmsghdr_by_hash(smb_t* smb, smbmsg_t* msg, unsigned source
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
								 ,unsigned flags, const void* data, size_t length)
{
	int retval;

	if((retval=smb_getmsgidx_by_hash(smb,msg,source,flags,data,length))!=SMB_SUCCESS)
		return(retval);

	if((retval=smb_lockmsghdr(smb,msg))!=SMB_SUCCESS)
		return(retval);

	retval=smb_getmsghdr(smb,msg);

	smb_unlockmsghdr(smb,msg); 

	return(retval);
}

372
uint16_t smb_subject_crc(const char* subj)
373 374
{
	char*	str;
deuce's avatar
64-bit  
deuce committed
375
	uint16_t	crc;
376

377
	if(subj==NULL)
378
		return(0);
379

380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
	while(!strnicmp(subj,"RE:",3)) {
		subj+=3;
		while(*subj==' ')
			subj++; 
	}

	if((str=strdup(subj))==NULL)
		return(0xffff);

	strlwr(str);
	crc=crc16(str,0	/* auto-length */);
	free(str);

	return(crc);
}
395

396
uint16_t smb_name_crc(const char* name)
397 398
{
	char*	str;
deuce's avatar
64-bit  
deuce committed
399
	uint16_t	crc;
400 401

	if(name==NULL)
402
		return(0);
403 404 405 406 407 408 409 410 411 412

	if((str=strdup(name))==NULL)
		return(0xffff);

	strlwr(str);
	crc=crc16(str,0	/* auto-length */);
	free(str);

	return(crc);
}
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447

// Returns hashflags_t on success
int smb_hashfile(const char* path, off_t size, struct hash_data* data)
{
	char buf[256 * 1024];
	FILE*	fp;
	MD5 md5_ctx;
	SHA1_CTX sha1_ctx;

	if(size < 1)
		return 0;

	if((fp = fopen(path, "rb")) == NULL)
		return 0;

	MD5_open(&md5_ctx);
	SHA1Init(&sha1_ctx);
	data->crc16 = 0;
	data->crc32 = 0;
	off_t off = 0;
	while(!feof(fp) && off < size) {
		size_t rd = fread(buf, sizeof(uint8_t), sizeof(buf), fp);
		if(rd < 1)
			break;
		data->crc32 = crc32i(~data->crc32, buf, rd);
		data->crc16 = icrc16(data->crc16, buf, rd);
		MD5_digest(&md5_ctx, buf, rd);
		SHA1Update(&sha1_ctx, (unsigned char*)buf, rd);
		off += rd;
	}
	fclose(fp);
	MD5_close(&md5_ctx, data->md5);
	SHA1Final(&sha1_ctx, data->sha1);
	return SMB_HASH_CRC16 | SMB_HASH_CRC32 | SMB_HASH_MD5 | SMB_HASH_SHA1;
}