From 5e8810edd12cdd8dd071c5239752d6b2088d1a5c Mon Sep 17 00:00:00 2001
From: rswindell <>
Date: Mon, 30 Aug 2004 07:17:10 +0000
Subject: [PATCH] Significant upgrade (increased smblib version string to
 2.40):

New smb.hash file uses multiple hashing methods (CRC-16, CRC-32 and MD5) to
calculate hashes of various "distinguishing marks" (e.g. message ID and body
text) to create a permanent history of message "fingerprints" that can be used
for duplicate message detection or fast location of messages based on one
of these "distinguishing marks". For example, reading each message header
in a message base to find a message with a specific message-ID (i.e.
how get_msg_by_id() currently works) significantly slows down message
importing with auto-thread linkage (a newly added feature). This hash file
will be used to speed this up considerably as well as improve duplicate
message detection.

This feature is supported with the following new smblib functions:
smb_findhash, smb_hashmsg, smb_hash, smb_msghashes, and smb_addhashes.

smb_addmsghdr() currently calls smb_hashmsg(), but this will probably change
with yet another new function call (smb_addmsg?) which will have access to the
body text (a very useful hash for duplicate message detection).

Existing SMB message bases will not have an existing .hash file (chksmb will
"missing hash" errors), but fixsmb can be used to create the .hash file for
existing message bases. Other than the chksmb errors, there are no real
problems with missing hashes, accept for the potential for failed auto-thread
linkage when importing replies to existing messages. Over time, this will fix
itself as new messages are imported and their hashes are stored automatically.

Projects using smblib now must link with crc16.c, crc32.c, and md5.c.

Other changes:
Created GETMSGTXT_TAIL_ONLY convenience macro.
smb_open/close_ha/da functions are now just macros to the new smb_open_fp and
smb_close_fp functions. These functions were 99% redundant.
New macros smb_open/close_hash also point to smb_open/close_fp.
More use of SAFEPRINTF macros and safe_snprintf().
---
 src/smblib/smbdefs.h |  60 ++++++---
 src/smblib/smblib.c  | 308 ++++++++++++++++++++++++++++++++-----------
 src/smblib/smblib.h  |  23 +++-
 3 files changed, 294 insertions(+), 97 deletions(-)

diff --git a/src/smblib/smbdefs.h b/src/smblib/smbdefs.h
index 6ce72f999a..a4fa1a3ead 100644
--- a/src/smblib/smbdefs.h
+++ b/src/smblib/smbdefs.h
@@ -385,7 +385,7 @@ enum {
 /* Add new ones here */
 
     ,NET_TYPES
-    };
+};
 
 enum {
      AGENT_PERSON
@@ -396,7 +396,7 @@ enum {
 /* Add new ones here */
 
     ,AGENT_TYPES
-    };
+};
 
 enum {
      XLAT_NONE              /* No translation/End of translation list */
@@ -413,7 +413,7 @@ enum {
 /* Add new ones here */
 
     ,XLAT_TYPES
-    };
+};
 
 
 /************/
@@ -439,7 +439,7 @@ typedef struct _PACK {		/* Time with time-zone */
 	ulong	time;			/* Local time (unix format) */
 	short	zone;			/* Time zone */
 
-	} when_t;
+} when_t;
 
 typedef struct _PACK {		/* Index record */
 
@@ -451,7 +451,28 @@ typedef struct _PACK {		/* Index record */
 	ulong	number; 		/* number of message (1 based) */
 	ulong	time;			/* time/date message was imported/posted */
 
-	} idxrec_t;
+} idxrec_t;
+
+									/* valid bits in hash_t.flags		*/
+#define SMB_HASH_CRC16		(1<<0)	/* CRC-16 hash is valid				*/
+#define SMB_HASH_CRC32		(1<<1)	/* CRC-32 hash is valid				*/
+#define SMB_HASH_MD5		(1<<2)	/* MD5 digest is valid				*/
+#define SMB_HASH_MASK		0x0f	/* which hashes are valid			*/
+#define SMB_HASH_UPPERCASE	(1<<4)	/* Convert a-z to A-Z first			*/
+#define SMB_HASH_LOWERCASE	(1<<5)	/* Convert A-Z to a-z first			*/
+#define SMB_HASH_STRIP_WSP	(1<<6)	/* Strip white-space chars first	*/
+
+typedef struct _PACK {
+
+	ulong	number;					/* Message number */
+	ulong	time;					/* Local time of fingerprinting */
+	uchar	source;					/* (e.g. TEXT_BODY, RFC822MSGID, FIDOMSGID) */
+	uchar	flags;					/* indications of valid hashes and pre-processing */
+	ushort	crc16;					/* CRC-16 of source */
+	ulong	crc32;					/* CRC-32 of source */
+	uchar	md5[MD5_DIGEST_SIZE];	/* MD5 digest of source */
+
+} hash_t;
 
 typedef struct _PACK {		/* Message base header (fixed portion) */
 
@@ -459,7 +480,7 @@ typedef struct _PACK {		/* Message base header (fixed portion) */
     ushort  version;        /* version number (initially 100h for 1.00) */
     ushort  length;         /* length including this struct */
 
-	} smbhdr_t;
+} smbhdr_t;
 
 typedef struct _PACK {		/* Message base status header */
 
@@ -471,7 +492,7 @@ typedef struct _PACK {		/* Message base status header */
     ushort  max_age;        /* Maximum age of message to keep in sub (in days) */
 	ushort	attr;			/* Attributes for this message base (SMB_HYPER,etc) */
 
-	} smbstatus_t;
+} smbstatus_t;
 
 typedef struct _PACK {		/* Message header */
 
@@ -495,7 +516,7 @@ typedef struct _PACK {		/* Message header */
     /* 40 */ ulong	offset;				/* Offset for buffer into data file (0 or mod 256) */
 	/* 44 */ ushort	total_dfields;		/* Total number of data fields */
 
-	} msghdr_t;
+} msghdr_t;
 
 #define thread_orig	thread_back	/* for backwards compatibility with older code */
 
@@ -505,14 +526,14 @@ typedef struct _PACK {		/* Data field */
     ulong   offset;         /* Offset into buffer */ 
     ulong   length;         /* Length of data field */
 
-    } dfield_t;
+} dfield_t;
 
 typedef struct _PACK {		/* Header field */
 
 	ushort	type;
 	ushort	length; 		/* Length of buffer */
 
-	} hfield_t;
+} hfield_t;
 
 typedef struct _PACK {		/* FidoNet address (zone:net/node.point) */
 
@@ -521,7 +542,7 @@ typedef struct _PACK {		/* FidoNet address (zone:net/node.point) */
 	ushort	node;
 	ushort	point;
 
-    } fidoaddr_t;
+} fidoaddr_t;
 
 #if defined(PRAGMA_PACK)
 #pragma pack(pop)		/* original packing */
@@ -532,7 +553,7 @@ typedef struct _PACK {		/* Network (type and address) */
     ushort  type;
 	void	*addr;
 
-	} net_t;
+} net_t;
 
 typedef struct {				/* Message */
 
@@ -574,16 +595,17 @@ typedef struct {				/* Message */
 	ulong		priority;		/* Message priority (0 is lowest) */
 	ulong		cost;			/* Cost to download/read */
 
-	} smbmsg_t;
+} smbmsg_t;
 
 typedef struct {			/* Message base */
 
     char    file[128];      /* Path and base filename (no extension) */
-    FILE    *sdt_fp;        /* File pointer for data (.sdt) file */
-    FILE    *shd_fp;        /* File pointer for header (.shd) file */
-    FILE    *sid_fp;        /* File pointer for index (.sid) file */
-    FILE    *sda_fp;        /* File pointer for data allocation (.sda) file */
-    FILE    *sha_fp;        /* File pointer for header allocation (.sha) file */
+    FILE*	sdt_fp;			/* File pointer for data (.sdt) file */
+    FILE*	shd_fp;			/* File pointer for header (.shd) file */
+    FILE*	sid_fp;			/* File pointer for index (.sid) file */
+    FILE*	sda_fp;			/* File pointer for data allocation (.sda) file */
+    FILE*	sha_fp;			/* File pointer for header allocation (.sha) file */
+	FILE*	hash_fp;		/* File pointer for hash (.hash) file */
 	ulong	retry_time; 	/* Maximum number of seconds to retry opens/locks */
 	ulong	retry_delay;	/* Time-slice yield (milliseconds) while retrying */
 	smbstatus_t status; 	/* Status header record */
@@ -596,6 +618,6 @@ typedef struct {			/* Message base */
 	long	msgs;			/* Number of messages loaded (for user) */
 	long	curmsg;			/* Current message number (for user) */
 
-    } smb_t;
+} smb_t;
 
 #endif /* Don't add anything after this #endif statement */
diff --git a/src/smblib/smblib.c b/src/smblib/smblib.c
index e36ce2d4c5..c6fd005edf 100644
--- a/src/smblib/smblib.c
+++ b/src/smblib/smblib.c
@@ -53,7 +53,7 @@
 #include "filewrap.h"
 
 /* Use smb_ver() and smb_lib_ver() to obtain these values */
-#define SMBLIB_VERSION		"2.31"      /* SMB library version */
+#define SMBLIB_VERSION		"2.40"      /* SMB library version */
 #define SMB_VERSION 		0x0121		/* SMB format version */
 										/* High byte major, low byte minor */
 
@@ -106,8 +106,9 @@ int SMBCALL smb_open(smb_t* smb)
 		|| smb->retry_delay>(smb->retry_time*100))	/* at least ten retries */
 		smb->retry_delay=250;	/* milliseconds */
 	smb->shd_fp=smb->sdt_fp=smb->sid_fp=NULL;
+	smb->sha_fp=smb->sda_fp=smb->hash_fp=NULL;
 	smb->last_error[0]=0;
-	sprintf(str,"%s.shd",smb->file);
+	SAFEPRINTF(str,"%s.shd",smb->file);
 	if((file=sopen(str,O_RDWR|O_CREAT|O_BINARY,SH_DENYNO,S_IREAD|S_IWRITE))==-1) {
 		safe_snprintf(smb->last_error,sizeof(smb->last_error)
 			,"%d (%s) opening %s"
@@ -165,7 +166,7 @@ int SMBCALL smb_open(smb_t* smb)
 
 	setvbuf(smb->shd_fp,smb->shd_buf,_IOFBF,SHD_BLOCK_LEN);
 
-	sprintf(str,"%s.sdt",smb->file);
+	SAFEPRINTF(str,"%s.sdt",smb->file);
 	if((file=sopen(str,O_RDWR|O_CREAT|O_BINARY,SH_DENYNO,S_IREAD|S_IWRITE))==-1) {
 		safe_snprintf(smb->last_error,sizeof(smb->last_error)
 			,"%d (%s) opening %s"
@@ -185,7 +186,7 @@ int SMBCALL smb_open(smb_t* smb)
 
 	setvbuf(smb->sdt_fp,NULL,_IOFBF,2*1024);
 
-	sprintf(str,"%s.sid",smb->file);
+	SAFEPRINTF(str,"%s.sid",smb->file);
 	if((file=sopen(str,O_RDWR|O_CREAT|O_BINARY,SH_DENYNO,S_IREAD|S_IWRITE))==-1) {
 		safe_snprintf(smb->last_error,sizeof(smb->last_error)
 			,"%d (%s) opening %s"
@@ -225,74 +226,41 @@ void SMBCALL smb_close(smb_t* smb)
 }
 
 /****************************************************************************/
-/* Opens the data block allocation table message base 'smb->file'           */
+/* Opens a non-shareable file (FILE*) associated with a message base		*/
 /* Retrys for retry_time number of seconds									*/
 /* Return 0 on success, non-zero otherwise									*/
 /****************************************************************************/
-int SMBCALL smb_open_da(smb_t* smb)
+int SMBCALL smb_open_fp(smb_t* smb, FILE** fp)
 {
 	int 	file;
-	char	str[MAX_PATH+1];
+	char	path[MAX_PATH+1];
+	char*	ext;
 	time_t	start=0;
 
-	sprintf(str,"%s.sda",smb->file);
-	while(1) {
-		if((file=sopen(str,O_RDWR|O_CREAT|O_BINARY,SH_DENYRW,S_IREAD|S_IWRITE))!=-1)
-			break;
-		if(errno!=EACCES && errno!=EAGAIN) {
-			safe_snprintf(smb->last_error,sizeof(smb->last_error)
-				,"%d (%s) opening %s"
-				,errno,STRERROR(errno),str);
-			return(SMB_ERR_OPEN);
-		}
-		if(!start)
-			start=time(NULL);
-		else
-			if(time(NULL)-start>=(time_t)smb->retry_time) {
-				safe_snprintf(smb->last_error,sizeof(smb->last_error)
-					,"timeout opening %s (retry_time=%ld)"
-					,str,smb->retry_time);
-				return(SMB_ERR_TIMEOUT); 
-			}
-		SLEEP(smb->retry_delay);
-	}
-	if((smb->sda_fp=fdopen(file,"r+b"))==NULL) {
+	if(fp==&smb->sda_fp)
+		ext="sda";
+	else if(fp==&smb->sha_fp)
+		ext="sha";
+	else if(fp==&smb->hash_fp)
+		ext="hash";
+	else {
 		safe_snprintf(smb->last_error,sizeof(smb->last_error)
-			,"%d (%s) fdopening %s (%d)"
-			,errno,STRERROR(errno),str,file);
-		close(file);
-		return(SMB_ERR_OPEN); 
+			,"opening %s: Illegal FILE* pointer argument: %p"
+			,smb->file, fp);
+		return(SMB_ERR_OPEN);
 	}
-	setvbuf(smb->sda_fp,NULL,_IOFBF,2*1024);
-	return(SMB_SUCCESS);
-}
-
-void SMBCALL smb_close_da(smb_t* smb)
-{
-	if(smb->sda_fp!=NULL)
-		fclose(smb->sda_fp);
-	smb->sda_fp=NULL;
-}
+	SAFEPRINTF2(path,"%s.%s",smb->file,ext);
 
-/****************************************************************************/
-/* Opens the header block allocation table for message base 'smb.file'      */
-/* Retrys for smb.retry_time number of seconds								*/
-/* Return 0 on success, non-zero otherwise									*/
-/****************************************************************************/
-int SMBCALL smb_open_ha(smb_t* smb)
-{
-	int 	file;
-	char	str[MAX_PATH+1];
-	time_t	start=0;
+	if(*fp!=NULL)	/* Already open! */
+		return(SMB_SUCCESS);
 
-	sprintf(str,"%s.sha",smb->file);
 	while(1) {
-		if((file=sopen(str,O_RDWR|O_CREAT|O_BINARY,SH_DENYRW,S_IREAD|S_IWRITE))!=-1)
+		if((file=sopen(path,O_RDWR|O_CREAT|O_BINARY,SH_DENYRW,S_IREAD|S_IWRITE))!=-1)
 			break;
 		if(errno!=EACCES && errno!=EAGAIN) {
 			safe_snprintf(smb->last_error,sizeof(smb->last_error)
 				,"%d (%s) opening %s"
-				,errno,STRERROR(errno),str);
+				,errno,STRERROR(errno),path);
 			return(SMB_ERR_OPEN);
 		}
 		if(!start)
@@ -301,27 +269,29 @@ int SMBCALL smb_open_ha(smb_t* smb)
 			if(time(NULL)-start>=(time_t)smb->retry_time) {
 				safe_snprintf(smb->last_error,sizeof(smb->last_error)
 					,"timeout opening %s (retry_time=%ld)"
-					,str,smb->retry_time);
+					,path,smb->retry_time);
 				return(SMB_ERR_TIMEOUT); 
 			}
 		SLEEP(smb->retry_delay);
 	}
-	if((smb->sha_fp=fdopen(file,"r+b"))==NULL) {
+	if((*fp=fdopen(file,"r+b"))==NULL) {
 		safe_snprintf(smb->last_error,sizeof(smb->last_error)
 			,"%d (%s) fdopening %s (%d)"
-			,errno,STRERROR(errno),str,file);
+			,errno,STRERROR(errno),path,file);
 		close(file);
 		return(SMB_ERR_OPEN); 
 	}
-	setvbuf(smb->sha_fp,NULL,_IOFBF,2*1024);
+	setvbuf(*fp,NULL,_IOFBF,2*1024);
 	return(SMB_SUCCESS);
 }
 
-void SMBCALL smb_close_ha(smb_t* smb)
+void SMBCALL smb_close_fp(FILE** fp)
 {
-	if(smb->sha_fp!=NULL)
-		fclose(smb->sha_fp);
-	smb->sha_fp=NULL;
+	if(fp!=NULL) {
+		if(*fp!=NULL)
+			fclose(*fp);
+		*fp=NULL;
+	}
 }
 
 /****************************************************************************/
@@ -330,9 +300,9 @@ void SMBCALL smb_close_ha(smb_t* smb)
 /* Currently, this is only used while smbutil packs a message base.			*/
 /* This is achieved with a semaphore lock file (e.g. mail.lock).			*/
 /****************************************************************************/
-static char* smb_lockfname(smb_t* smb, char* fname)
+static char* smb_lockfname(smb_t* smb, char* fname, size_t maxlen)
 {
-	sprintf(fname,"%s.lock",smb->file);
+	safe_snprintf(fname,maxlen,"%s.lock",smb->file);
 	return(fname);
 }
 
@@ -341,7 +311,7 @@ int SMBCALL smb_lock(smb_t* smb)
 	char	str[MAX_PATH+1];
 	int		file;
 
-	smb_lockfname(smb,str);
+	smb_lockfname(smb,str,sizeof(str)-1);
 	if((file=open(str,O_CREAT|O_EXCL|O_RDWR,S_IREAD|S_IWRITE))==-1) {
 		safe_snprintf(smb->last_error,sizeof(smb->last_error)
 			,"%d (%s) creating %s"
@@ -356,7 +326,7 @@ int SMBCALL smb_unlock(smb_t* smb)
 {
 	char	str[MAX_PATH+1];
 
-	smb_lockfname(smb,str);
+	smb_lockfname(smb,str,sizeof(str)-1);
 	if(remove(str)!=0) {
 		safe_snprintf(smb->last_error,sizeof(smb->last_error)
 			,"%d (%s) removing %s"
@@ -370,7 +340,7 @@ int SMBCALL smb_islocked(smb_t* smb)
 {
 	char	str[MAX_PATH+1];
 
-	if(access(smb_lockfname(smb,str),0)!=0)
+	if(access(smb_lockfname(smb,str,sizeof(str)-1),0)!=0)
 		return(0);
 	safe_snprintf(smb->last_error,sizeof(smb->last_error),"%s exists",str);
 	return(1);
@@ -813,7 +783,6 @@ ulong SMBCALL smb_getmsgtxtlen(smbmsg_t* msg)
 	return(length);
 }
 
-
 static void set_convenience_ptr(smbmsg_t* msg, ushort hfield_type, void* hfield_dat)
 {
 	switch(hfield_type) {	/* convenience variables */
@@ -1350,7 +1319,7 @@ int SMBCALL smb_addcrc(smb_t* smb, ulong crc)
 	if(!smb->status.max_crcs)
 		return(SMB_SUCCESS);
 
-	sprintf(str,"%s.sch",smb->file);
+	SAFEPRINTF(str,"%s.sch",smb->file);
 	while(1) {
 		if((file=sopen(str,O_RDWR|O_CREAT|O_BINARY,SH_DENYRW,S_IREAD|S_IWRITE))!=-1)
 			break;
@@ -1492,6 +1461,7 @@ int SMBCALL smb_addmsghdr(smb_t* smb, smbmsg_t* msg, int storage)
 		smb->status.last_msg++;
 		smb->status.total_msgs++;
 		smb_putstatus(smb);
+		smb_hashmsg(smb,msg,NULL);
 	}
 	smb_unlocksmbhdr(smb);
 	return(i);
@@ -1681,11 +1651,11 @@ int SMBCALL smb_create(smb_t* smb)
 	rewind(smb->sid_fp);
 	chsize(fileno(smb->sid_fp),0L);
 
-	sprintf(str,"%s.sda",smb->file);
+	SAFEPRINTF(str,"%s.sda",smb->file);
 	remove(str);						/* if it exists, delete it */
-	sprintf(str,"%s.sha",smb->file);
+	SAFEPRINTF(str,"%s.sha",smb->file);
 	remove(str);                        /* if it exists, delete it */
-	sprintf(str,"%s.sch",smb->file);
+	SAFEPRINTF(str,"%s.sch",smb->file);
 	remove(str);
 	smb_unlocksmbhdr(smb);
 	return(SMB_SUCCESS);
@@ -2384,4 +2354,194 @@ int SMBCALL smb_updatethread(smb_t* smb, smbmsg_t* remsg, ulong newmsgnum)
 	return(retval);
 }
 
+/**************************/
+/* Hash-related functions */
+/**************************/
+
+/* If return value is SMB_ERROR_NOT_FOUND, hash file is left open */
+int SMBCALL smb_findhash(smb_t* smb, hash_t** compare, hash_t* found)
+{
+	int		retval;
+	size_t	c;
+	hash_t	hash;
+
+	if(found!=NULL)
+		memset(found,0,sizeof(hash_t));
+
+	if((retval=smb_open_hash(smb))!=SMB_SUCCESS)
+		return(retval);
+
+	if(compare!=NULL) {
+
+		rewind(smb->hash_fp);
+		while(!feof(smb->hash_fp)) {
+			memset(&hash,0,sizeof(hash));
+			if(smb_fread(smb,&hash,sizeof(hash),smb->hash_fp)!=sizeof(hash))
+				break;
+
+			if(hash.number==0 || hash.flags==0)
+				continue;		/* invalid hash record (!?) */
+
+			for(c=0;compare[c]!=NULL;c++) {
+
+				if(compare[c]->source!=hash.source)	
+					continue;	/* wrong source */
+				if((compare[c]->flags&hash.flags&SMB_HASH_MASK)==0)	
+					continue;	/* no matching hashes */
+				if((compare[c]->flags&~SMB_HASH_MASK)!=(hash.flags&~SMB_HASH_MASK))
+					continue;	/* wrong pre-process flags */
+				if(compare[c]->flags&hash.flags&SMB_HASH_CRC16 
+					&& compare[c]->crc16!=hash.crc16)
+					continue;	/* wrong crc-16 */
+				if(compare[c]->flags&hash.flags&SMB_HASH_CRC32
+					&& compare[c]->crc32!=hash.crc32)
+					continue;	/* wrong crc-32 */
+				if(compare[c]->flags&hash.flags&SMB_HASH_MD5 
+					&& memcmp(compare[c]->md5,hash.md5,sizeof(hash.md5)))
+					continue;	/* wrong crc-16 */
+
+				/* successful match! */
+				if(found!=NULL)
+					memcpy(found,&hash,sizeof(hash));
+
+				smb_close_hash(smb);
+
+				return(SMB_SUCCESS);
+			}
+		}
+	}
+
+	/* hash file left open */
+	return(SMB_ERR_NOT_FOUND);
+}
+
+int SMBCALL smb_addhashes(smb_t* smb, hash_t** hashes)
+{
+	int			retval;
+	size_t		h;
+
+	if((retval=smb_open_hash(smb))!=SMB_SUCCESS)
+		return(retval);
+
+	if(hashes!=NULL) {
+
+		fseek(smb->hash_fp,0,SEEK_END);
+		for(h=0;hashes[h]!=NULL;h++) {
+			if(smb_fwrite(smb,hashes[h],sizeof(hash_t),smb->hash_fp)!=sizeof(hash_t))
+				return(SMB_ERR_WRITE);
+		}
+	}
+
+	smb_close_hash(smb);
+
+	return(SMB_SUCCESS);
+}
+
+static char* strip_chars(uchar* str, uchar* set)
+{
+	char*	src;
+	char*	dst;
+	char*	tmp;
+
+	if((tmp=strdup(str))==NULL)
+		return(NULL);
+	for(src=tmp,dst=str;*src;src++) {
+		if(strchr(set,*src)==NULL)
+			*(dst++)=*src;
+	}
+	*dst=0;
+	
+	return(str);
+}
+
+/* Allocates and calculates hashes of data (based on flags)					*/
+/* Returns NULL on failure													*/
+hash_t* smb_hash(ulong msgnum, ulong t, unsigned source, unsigned flags, uchar* data)
+{
+	uchar*	p=data;
+	size_t	len;
+	hash_t*	hash;
+
+	if((hash=(hash_t*)malloc(sizeof(hash_t)))==NULL)
+		return(NULL);
+
+	if(flags&~SMB_HASH_MASK) {	/* pre-processing */
+		if((p=strdup(data))==NULL)
+			return(NULL);
+		if(flags&SMB_HASH_UPPERCASE)
+			strupr(p);
+		if(flags&SMB_HASH_LOWERCASE)
+			strlwr(p);
+		if(flags&SMB_HASH_STRIP_WSP)
+			strip_chars(p," \t\r\n");
+	}
+	len=strlen(p);
+	hash->number=msgnum;
+	hash->time=t;
+	hash->source=source;
+	hash->flags=flags;
+	if(flags&SMB_HASH_CRC16)
+		hash->crc16=crc16(p,len);
+	if(flags&SMB_HASH_CRC32)
+		hash->crc32=crc32(p,len);
+	if(flags&SMB_HASH_MD5)
+		MD5_calc(hash->md5,p,len);
+
+	if(p!=data)	/* duped string */
+		free(p);
+
+	return(hash);
+}
+
+/* Allocatese and calculates all hashes for a single message				*/
+/* Returns NULL on failure													*/
+hash_t** SMBCALL smb_msghashes(smb_t* smb, smbmsg_t* msg, uchar* text)
+{
+	size_t		h=0;
+	uchar		flags=SMB_HASH_CRC16|SMB_HASH_CRC32|SMB_HASH_MD5;
+	hash_t**	hashes;	/* This is a NULL-terminated list of hashes */
+	hash_t*		hash;
+	time_t		t=time(NULL);
+
+#define SMB_MAX_HASH_COUNT 4
+
+	if((hashes=(hash_t**)malloc(sizeof(hash_t*)*SMB_MAX_HASH_COUNT))==NULL)
+		return(NULL);
+
+	memset(hashes, 0, sizeof(hash_t*)*SMB_MAX_HASH_COUNT);
+
+	if(msg->id!=NULL
+		&& (hash=smb_hash(msg->hdr.number, t, RFC822MSGID, flags, msg->id))!=NULL)
+		hashes[h++]=hash;
+
+	if(msg->ftn_msgid!=NULL
+		&& (hash=smb_hash(msg->hdr.number, t, FIDOMSGID, flags, msg->ftn_msgid))!=NULL)
+		hashes[h++]=hash;
+
+	if(text!=NULL
+		&& (hash=smb_hash(msg->hdr.number, t, TEXT_BODY, flags|SMB_HASH_STRIP_WSP, text))!=NULL)
+		hashes[h++]=hash;
+
+	return(hashes);
+}
+
+/* Calculates and stores the hashes for a single message					*/
+int SMBCALL smb_hashmsg(smb_t* smb, smbmsg_t* msg, uchar* text)
+{
+	size_t		n;
+	int			retval=SMB_SUCCESS;
+	hash_t**	hashes;	/* This is a NULL-terminated list of hashes */
+
+	hashes=smb_msghashes(smb,msg,text);
+
+	if(smb_findhash(smb, hashes, NULL)==SMB_SUCCESS)
+		retval=SMB_DUPE_MSG;
+	else
+		retval=smb_addhashes(smb,hashes);
+
+	FREE_LIST(hashes,n);
+
+	return(retval);
+}
+
 /* End of SMBLIB.C */
diff --git a/src/smblib/smblib.h b/src/smblib/smblib.h
index 6d8f110fdd..ec530a1f03 100644
--- a/src/smblib/smblib.h
+++ b/src/smblib/smblib.h
@@ -39,6 +39,9 @@
 #define _SMBLIB_H
 
 #include "lzh.h"
+#include "md5.h"
+#include "crc16.h"
+#include "crc32.h"
 
 #ifdef SMBEXPORT
 	#undef SMBEXPORT
@@ -82,12 +85,19 @@
 
 #define GETMSGTXT_TAILS 	(1<<0)		/* Get message tail(s)				*/
 #define GETMSGTXT_NO_BODY	(1<<1)		/* Do not retrieve message body		*/
+#define GETMSGTXT_TAIL_ONLY (GETMSGTXT_TAILS|GETMSGTXT_NO_BODY)
 
 #define SMB_IS_OPEN(smb)	((smb)->shd_fp!=NULL)
 
 /* Legacy API functions */
 #define smb_incmsg(smb,msg)	smb_incmsg_dfields(smb,msg,1)
 #define smb_incdat			smb_incmsgdat
+#define smb_open_da(smb)	smb_open_fp(smb,&(smb)->sda_fp)
+#define smb_close_da(smb)	smb_close_fp(&(smb)->sda_fp)
+#define smb_open_ha(smb)	smb_open_fp(smb,&(smb)->sha_fp)
+#define smb_close_ha(smb)	smb_close_fp(&(smb)->sha_fp)
+#define smb_open_hash(smb)	smb_open_fp(smb,&(smb)->hash_fp)
+#define smb_close_hash(smb)	smb_close_fp(&(smb)->hash_fp)
 
 #ifdef __cplusplus
 extern "C" {
@@ -97,10 +107,8 @@ SMBEXPORT int 		SMBCALL smb_ver(void);
 SMBEXPORT char*		SMBCALL smb_lib_ver(void);
 SMBEXPORT int 		SMBCALL smb_open(smb_t* smb);
 SMBEXPORT void		SMBCALL smb_close(smb_t* smb);
-SMBEXPORT int 		SMBCALL smb_open_da(smb_t* smb);
-SMBEXPORT void		SMBCALL smb_close_da(smb_t* smb);
-SMBEXPORT int 		SMBCALL smb_open_ha(smb_t* smb);
-SMBEXPORT void		SMBCALL smb_close_ha(smb_t* smb);
+SMBEXPORT int 		SMBCALL smb_open_fp(smb_t* smb, FILE**);
+SMBEXPORT void		SMBCALL smb_close_fp(FILE**);
 SMBEXPORT int 		SMBCALL smb_create(smb_t* smb);
 SMBEXPORT int 		SMBCALL smb_stack(smb_t* smb, int op);
 SMBEXPORT int 		SMBCALL smb_trunchdr(smb_t* smb);
@@ -155,6 +163,13 @@ SMBEXPORT int		SMBCALL	smb_copymsgmem(smb_t* smb, smbmsg_t* destmsg, smbmsg_t* s
 SMBEXPORT int		SMBCALL smb_tzutc(short timezone);
 SMBEXPORT int		SMBCALL smb_updatethread(smb_t* smb, smbmsg_t* remsg, ulong newmsgnum);
 
+/* hash-related functions */
+SMBEXPORT int		SMBCALL smb_findhash(smb_t* smb, hash_t** compare_list, hash_t* found);
+SMBEXPORT int		SMBCALL smb_hashmsg(smb_t* smb, smbmsg_t* msg, uchar* text);
+SMBEXPORT hash_t*	SMBCALL	smb_hash(ulong msgnum, ulong time, unsigned source, unsigned flags, uchar* str);
+SMBEXPORT hash_t**	SMBCALL smb_msghashes(smb_t* smb, smbmsg_t* msg, uchar* text);
+SMBEXPORT int		SMBCALL smb_addhashes(smb_t* smb, hash_t** hash_list);
+
 /* smbtxt.c */
 SMBEXPORT char*		SMBCALL smb_getmsgtxt(smb_t* smb, smbmsg_t* msg, ulong mode);
 
-- 
GitLab