smbtxt.c 15 KB
Newer Older
1 2 3 4 5 6 7 8
/* Synchronet message base (SMB) message text library routines */

/* $Id$ */

/****************************************************************************
 * @format.tab-size 4		(Plain Text/Source Code File Header)			*
 * @format.use-tabs true	(see http://www.synchro.net/ptsc_hdr.html)		*
 *																			*
rswindell's avatar
rswindell committed
9
 * Copyright Rob Swindell - http://www.synchro.net/copyright.html			*
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 *																			*
 * This library is free software; you can redistribute it and/or			*
 * modify it under the terms of the GNU Lesser General Public License		*
 * as published by the Free Software Foundation; either version 2			*
 * of the License, or (at your option) any later version.					*
 * See the GNU Lesser General Public License for more details: lgpl.txt or	*
 * http://www.fsf.org/copyleft/lesser.html									*
 *																			*
 * Anonymous FTP access to the most recent released source is available at	*
 * ftp://vert.synchro.net, ftp://cvs.synchro.net and ftp://ftp.synchro.net	*
 *																			*
 * Anonymous CVS access to the development source and modification history	*
 * is available at cvs.synchro.net:/cvsroot/sbbs, example:					*
 * cvs -d :pserver:anonymous@cvs.synchro.net:/cvsroot/sbbs login			*
 *     (just hit return, no password is necessary)							*
 * cvs -d :pserver:anonymous@cvs.synchro.net:/cvsroot/sbbs checkout src		*
 *																			*
 * For Synchronet coding style and modification guidelines, see				*
 * http://www.synchro.net/source.html										*
 *																			*
 * You are encouraged to submit any modifications (preferably in Unix diff	*
 * format) via e-mail to mods@synchro.net									*
 *																			*
 * Note: If this box doesn't appear square, then you need to fix your tabs.	*
 ****************************************************************************/

/* ANSI */
37 38
#include <stdlib.h>	/* malloc/realloc/free */
#include <string.h>	/* strlen */
39 40 41

/* SMB-specific */
#include "smblib.h"
42
#include "base64.h"
43

44
char* SMBCALL smb_getmsgtxt(smb_t* smb, smbmsg_t* msg, ulong mode)
45
{
46 47 48
	char*	buf;
	char*	lzhbuf;
	char*	p;
49
	char*	str;
deuce's avatar
64-bit  
deuce committed
50
	uint16_t	xlat;
51 52
	uint 	i;
	int		lzh;	/* BOOL */
53 54
	long	l=0,lzhlen,length;

55
	if((buf=(char*)malloc(sizeof(char)))==NULL) {
56
		sprintf(smb->last_error
57 58
			,"%s malloc failure of %" XP_PRIsize_t "u bytes for buffer"
			,__FUNCTION__, sizeof(char));
59 60 61 62
		return(NULL);
	}
	*buf=0;

63 64 65 66 67 68 69 70
	if(!(mode&GETMSGTXT_NO_HFIELDS)) {
		for(i=0;i<(uint)msg->total_hfields;i++) {			/* comment headers are part of text */
			if(msg->hfield[i].type!=SMB_COMMENT && msg->hfield[i].type!=SMTPSYSMSG)
				continue;
			str=(char*)msg->hfield_dat[i];
			length=strlen(str)+2;	/* +2 for crlf */
			if((p=(char*)realloc(buf,l+length+1))==NULL) {
				sprintf(smb->last_error
71 72
					,"%s realloc failure of %ld bytes for comment buffer"
					, __FUNCTION__, l+length+1);
rswindell's avatar
rswindell committed
73 74
				free(buf);
				return(NULL);
75 76 77
			}
			buf=p;
			l+=sprintf(buf+l,"%s\r\n",str);
78
		}
rswindell's avatar
rswindell committed
79 80 81
		if(l) {	/* Add a blank line after comments */
			if((p=(char*)realloc(buf,l+3))==NULL) {
				sprintf(smb->last_error
82 83
					,"%s realloc failure of %ld bytes for comment buffer"
					, __FUNCTION__, l+3);
rswindell's avatar
rswindell committed
84 85 86 87 88 89 90 91 92 93 94 95 96 97
				free(buf);
				return(NULL);
			}
			buf=p;
			l+=sprintf(buf+l,"\r\n");
		}
		unsigned answers = 0;
		for(i=0;i<(uint)msg->total_hfields;i++) {			/* Poll Answers are part of text */
			if(msg->hfield[i].type!=SMB_POLL_ANSWER)
				continue;
			char tmp[128];
			length = safe_snprintf(tmp, sizeof(tmp), "%2u: %s\r\n", ++answers, (char*)msg->hfield_dat[i]);
			if((p=(char*)realloc(buf,l+length+1))==NULL) {
				sprintf(smb->last_error
98 99
					,"%s realloc failure of %ld bytes for comment buffer"
					, __FUNCTION__, l+length+1);
rswindell's avatar
rswindell committed
100 101 102 103 104 105 106 107
				free(buf);
				return(NULL);
			}
			buf=p;
			memcpy(buf+l, tmp, length);
			l += length;
			buf[l] = 0;
		}
108 109
	}

110
	for(i=0;i<(uint)msg->hdr.total_dfields;i++) {
111
		if(msg->dfield[i].length<=sizeof(xlat))
112
			continue;
113 114 115 116 117 118 119 120 121 122 123 124
		switch(msg->dfield[i].type) {
			case TEXT_BODY:
				if(mode&GETMSGTXT_NO_BODY)
					continue;
				break;
			case TEXT_TAIL:
				if(!(mode&GETMSGTXT_TAILS))
					continue;
				break;
			default:	/* ignore other data types */
				continue;
		}
125 126
		fseek(smb->sdt_fp,msg->hdr.offset+msg->dfield[i].offset
			,SEEK_SET);
127 128
		if(fread(&xlat, 1, sizeof(xlat), smb->sdt_fp) != sizeof(xlat))
			continue;
129 130 131
		lzh=0;
		if(xlat==XLAT_LZH) {
			lzh=1;
132 133
			if(fread(&xlat, 1, sizeof(xlat), smb->sdt_fp) != sizeof(xlat))
				continue;
134 135 136 137
		}
		if(xlat!=XLAT_NONE) 	/* no other translations currently supported */
			continue;

138
		length=msg->dfield[i].length-sizeof(xlat);
139
		if(lzh) {
140
			length-=sizeof(xlat);
141 142
			if(length<1)
				continue;
143
			if((lzhbuf=(char*)malloc(length))==NULL) {
144
				sprintf(smb->last_error
145 146
					,"%s malloc failure of %ld bytes for LZH buffer"
					, __FUNCTION__, length);
rswindell's avatar
rswindell committed
147 148
				free(buf);
				return(NULL);
149
			}
150 151 152 153 154 155 156 157
			if(smb_fread(smb,lzhbuf,length,smb->sdt_fp) != length) {
				sprintf(smb->last_error
					,"%s read failure of %ld bytes for LZH data"
					, __FUNCTION__, length);
				free(lzhbuf);
				free(buf);
				return(NULL);
			}
deuce's avatar
deuce committed
158
			lzhlen=*(int32_t*)lzhbuf;
159
			if((p=(char*)realloc(buf,l+lzhlen+3L))==NULL) {
160
				sprintf(smb->last_error
161 162
					,"%s realloc failure of %ld bytes for text buffer"
					, __FUNCTION__, l+lzhlen+3L);
163
				free(lzhbuf);
rswindell's avatar
rswindell committed
164
				free(buf);
165
				return(NULL);
166 167
			}
			buf=p;
deuce's avatar
deuce committed
168
			lzh_decode((uint8_t *)lzhbuf,length,(uint8_t *)buf+l);
169
			free(lzhbuf);
170
			l+=lzhlen;
171 172
		}
		else {
173
			if((p=(char*)realloc(buf,l+length+3L))==NULL) {
174
				sprintf(smb->last_error
175 176
					,"%s realloc failure of %ld bytes for text buffer"
					, __FUNCTION__, l+length+3L);
rswindell's avatar
rswindell committed
177 178
				free(buf);
				return(NULL);
179 180 181 182 183 184 185 186 187 188
			}
			buf=p;
			p=buf+l;
			l+=fread(p,1,length,smb->sdt_fp);
		}
		if(!l)
			continue;
		l--;
		while(l && buf[l]==0) l--;
		l++;
189
		*(buf+l)='\r';	/* CR */
190
		l++;
191
		*(buf+l)='\n';	/* LF */
192
		l++;
193
		*(buf+l)=0;
194
	}
195

196 197 198 199 200
	if(mode&GETMSGTXT_PLAIN) {
		char* plaintext = smb_getplaintext(msg, buf);
		if(plaintext != NULL)
			return plaintext;
	}
201 202 203
	return(buf);
}

204
void SMBCALL smb_freemsgtxt(char* buf)
205 206
{
	if(buf!=NULL)
207
		free(buf);
208
}
rswindell's avatar
rswindell committed
209

210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
enum content_transfer_encoding {
	CONTENT_TRANFER_ENCODING_NONE,
	CONTENT_TRANFER_ENCODING_BASE64,
	CONTENT_TRANFER_ENCODING_QUOTED_PRINTABLE,
	CONTENT_TRANFER_ENCODING_OTHER
};

/* Decode quoted-printable content-transfer-encoded text */
/* Ignores (strips) unsupported ctrl chars and non-ASCII chars */
/* Does not enforce 76 char line length limit */
char* qp_decode(char* buf)
{
	uchar*	p=(uchar*)buf;
	uchar*	dest=p;

225
	for(;*p != 0; p++) {
226
		if(*p==' ' || (*p>='!' && *p<='~' && *p!='=') || *p=='\t'|| *p=='\r'|| *p=='\n')
227 228 229
			*dest++=*p;
		else if(*p=='=') {
			p++;
230 231 232
			if(*p == '\r')	/* soft link break */
				p++;
			if(*p == 0)
233
				break;
234 235
			if(*p == '\n')
				continue;
236
			if(isxdigit(*p) && isxdigit(*(p+1))) {
237
				uchar ch = HEX_CHAR_TO_INT(*p) << 4;
238
				p++;
239 240 241
				ch |= HEX_CHAR_TO_INT(*p);
				if(ch == '\t' || ch >= ' ')
					*dest++=ch;
242 243 244 245 246 247
			} else {	/* bad encoding */
				*dest++='=';
				*dest++=*p;
			}
		}
	}
248 249
	*dest++='\r';
	*dest++='\n';
250 251 252 253
	*dest=0;
	return buf;
}

254
static enum content_transfer_encoding mime_getxferencoding(const char* beg, const char* end)
255
{
256
	const char* p = beg;
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278

	while(p < end) {
		SKIP_WHITESPACE(p);
		if(strnicmp(p, "content-transfer-encoding:", 26) != 0) {
			FIND_CHAR(p, '\n');
			continue;
		}
		p += 26;
		SKIP_WHITESPACE(p);
		if(strnicmp(p, "base64", 6) == 0)
			return CONTENT_TRANFER_ENCODING_BASE64;
		if(strnicmp(p, "quoted-printable", 16) == 0)
			return CONTENT_TRANFER_ENCODING_QUOTED_PRINTABLE;
		if(strnicmp(p, "7bit", 4) == 0 || strnicmp(p, "8bit", 4) == 0 || strnicmp(p, "binary", 6) == 0)
			return CONTENT_TRANFER_ENCODING_NONE;
		return CONTENT_TRANFER_ENCODING_OTHER;
	}

	return CONTENT_TRANFER_ENCODING_NONE;
}

/* ToDo: parse and return the "modification-date" value */
279
static BOOL mime_getattachment(const char* beg, const char* end, char* attachment, size_t attachment_len)
280
{
281
	char fname[MAX_PATH+1];
282
	const char* p = beg;
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311

	while(p < end) {
		SKIP_WHITESPACE(p);
		if(strnicmp(p, "content-disposition:", 20) != 0) {
			FIND_CHAR(p, '\n');
			continue;
		}
		p += 20;
		SKIP_WHITESPACE(p);
		if(strnicmp(p, "inline", 6) == 0) {
			FIND_CHAR(p, '\n');
			continue;
		}
		char* filename = strstr(p, "filename=");
		if(filename == NULL) {
			FIND_CHAR(p, '\n');
			continue;
		}
		filename += 9;
		char* term;
		if(*filename == '"') {
			filename++;
			term = strchr(filename, '"');
		} else
			term = strchr(filename, ';');
		if(term == NULL) {
			term = filename;
			FIND_WHITESPACE(term);
		}
312 313
		if(term - filename >= sizeof(fname))
			term = filename + sizeof(fname) - 1;
314 315
		memcpy(fname, filename, term - filename);
		fname[term - filename] = 0;
316 317
		if(attachment != NULL && attachment_len > 0) {
			strncpy(attachment, getfname(fname), attachment_len);
318
			attachment[attachment_len - 1] = '\0';
319
		}
320 321 322 323 324
		return TRUE;
	}
	return FALSE;
}

325 326 327 328 329 330 331 332 333 334 335 336 337 338
// Parses a MIME text/* content-type header field
void SMBCALL smb_parse_content_type(const char* content_type, char** subtype, char** charset)
{
	if(subtype != NULL) {
		FREE_AND_NULL(*subtype);
	}
	if(charset != NULL) {
		FREE_AND_NULL(*charset);
	}
	if(content_type == NULL)
		return;
	char buf[512];
	SAFECOPY(buf, content_type);
	char* p;
339 340
	if((p = strstr(buf, "\r\n\r\n")) != NULL)	/* Don't parse past the end of header */
		*p = 0;
341 342 343
	if((p = strstr(buf, "text/")) == buf) {
		p += 5;
		if(subtype != NULL) {
344 345 346 347 348 349 350 351
			if((*subtype = strdup(p)) != NULL) {
				char* tp = *subtype;
				FIND_WHITESPACE(tp);
				*tp = 0;
				tp = *subtype;
				FIND_CHAR(tp, ';');
				*tp = 0;
			}
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
		}
		if(charset != NULL && (p = strstr(p, "charset=")) != NULL) {
			p += 8;
			if(*p == '"')
				p++;
			char* tp = p;
			FIND_WHITESPACE(tp);
			*tp = 0;
			tp = p;
			FIND_CHAR(tp, '"');
			*tp = 0;
			*charset = strdup(p);
		}
	}
}

368
/* Find the specified content-type in a MIME-encoded message body, recursively */
369
static const char* mime_getcontent(const char* buf, const char* content_type, const char* content_match
370
	,int depth, enum content_transfer_encoding* encoding, char** charset, char* attachment, size_t attachment_len, int index)
rswindell's avatar
rswindell committed
371
{
372
	const char*	txt;
rswindell's avatar
rswindell committed
373 374
	char*	p;
	char	boundary[256];
375 376 377
	char	match1[128];
	char	match2[128];
	int		match_len = 0;
378
	int		found = 0;
379

380
	if(content_match != NULL) {
381 382
		match_len = sprintf(match1, "%s;", content_match);
					sprintf(match2, "%s\r", content_match);
383
	}
rswindell's avatar
rswindell committed
384

385 386
	if(depth > 2)
		return NULL;
rswindell's avatar
rswindell committed
387
	if(content_type == NULL)	/* Not MIME-encoded */
388
		return NULL;
rswindell's avatar
rswindell committed
389 390 391 392
	if(strstr(content_type, "multipart/alternative;") == content_type)
		content_type += 22;
	else if(strstr(content_type, "multipart/mixed;") == content_type)
		content_type +=16;
393 394
	else if(strstr(content_type, "multipart/report;") == content_type)
		content_type +=17;
rswindell's avatar
rswindell committed
395
	else
396
		return NULL;
397
	p = strstr(content_type, "boundary=");
rswindell's avatar
rswindell committed
398
	if(p == NULL)
399
		return NULL;
400 401 402 403 404 405
	p += 9;
	if(*p == '"')
		p++;
	SAFEPRINTF(boundary, "--%s", p);
	if((p = strchr(boundary,'"')) != NULL)
		*p = 0;
rswindell's avatar
rswindell committed
406 407 408
	txt = buf;
	while((p = strstr(txt, boundary)) != NULL) {
		txt = p+strlen(boundary);
409 410
		if(strncmp(txt, "--\r\n", 4) == 0)
			break;
rswindell's avatar
rswindell committed
411 412 413 414
		SKIP_WHITESPACE(txt);
		p = strstr(txt, "\r\n\r\n");	/* End of header */
		if(p==NULL)
			continue;
415 416
		for(content_type = txt; content_type < p; content_type++) {
			SKIP_WHITESPACE(content_type);
417 418 419
			if(strnicmp(content_type, "Content-Type:", 13) == 0) {
				content_type += 13;
				SKIP_WHITESPACE(content_type);
420
				break;
421
			}
422 423 424 425
			FIND_CHAR(content_type, '\r');
		}
		if(content_type >= p)
			continue;
426
		const char* cp;
427
		if((match_len && strnicmp(content_type, match1, match_len) && strnicmp(content_type, match2, match_len))
428
			|| (attachment != NULL && !mime_getattachment(txt, p, attachment, attachment_len))) {
429 430
			if((cp = mime_getcontent(p, content_type, content_match, depth + 1, encoding, charset, attachment, attachment_len, index)) != NULL)
				return cp;
431 432 433
			continue;
		}
		if(found++ != index) {
434 435
			if((cp = mime_getcontent(p, content_type, content_match, depth + 1, encoding, charset, attachment, attachment_len, index)) != NULL)
				return cp;
436 437
			continue;
		}
438 439
		if(encoding != NULL)
			*encoding = mime_getxferencoding(txt, p);
440 441
		if(charset != NULL)
			smb_parse_content_type(content_type, NULL, charset);
442

443
		txt = p + 4;	// strlen("\r\n\r\n")
rswindell's avatar
rswindell committed
444 445 446
		SKIP_WHITESPACE(txt);
		if((p = strstr(txt, boundary)) != NULL)
			*p = 0;
447
		return txt;
rswindell's avatar
rswindell committed
448
	}
449 450 451
	return NULL;
}

452 453
/* Get just the (first) plain-text or HTML portion of a MIME-encoded multi-part message body */
/* Returns NULL if there is no MIME-encoded plain-text/html portion of the message */
454 455
char* SMBCALL smb_getplaintext(smbmsg_t* msg, char* buf)
{
456
	const char*	txt;
457
	enum content_transfer_encoding xfer_encoding = CONTENT_TRANFER_ENCODING_NONE;
458

459
	FREE_AND_NULL(msg->text_subtype);
460
	if(msg->mime_version == NULL || msg->content_type == NULL)	/* not MIME */
461
		return NULL;
462
	txt = mime_getcontent(buf, msg->content_type, "text/plain", 0, &xfer_encoding, &msg->text_charset
463
		,/* attachment: */NULL, /* attachment_len: */0, /* index: */0);
464 465 466 467 468 469 470 471
	if(txt == NULL) {
		txt = mime_getcontent(buf, msg->content_type, "text/html", 0, &xfer_encoding, &msg->text_charset
			,/* attachment: */NULL, /* attachment_len: */0, /* index: */0);
		if(txt == NULL)
			return NULL;
		msg->text_subtype = strdup("html");
	} else
		msg->text_subtype = strdup("plain");
472 473 474 475 476 477 478 479 480 481 482 483 484

	memmove(buf, txt, strlen(txt)+1);
	if(*buf == 0)	/* No decoding necessary */
		return buf;
	if(xfer_encoding == CONTENT_TRANFER_ENCODING_QUOTED_PRINTABLE)
		qp_decode(buf);
	else if(xfer_encoding == CONTENT_TRANFER_ENCODING_BASE64) {
		char* decoded = strdup(buf);
		if(decoded == NULL)
			return NULL;
		if(b64_decode(decoded, strlen(decoded), buf, strlen(buf)) > 0)
			strcpy(buf, decoded);
		free(decoded);
485 486
	}

rswindell's avatar
rswindell committed
487 488
	return buf;
}
489

490
/* Get just a base64-encoded attachment (just one) from MIME-encoded message body */
491
/* This function is destructive (over-writes 'buf' with decoded attachment)! */
492
uint8_t* SMBCALL smb_getattachment(smbmsg_t* msg, char* buf, char* filename, size_t filename_len, uint32_t* filelen, int index)
493
{
494
	const char*	txt;
495 496
	enum content_transfer_encoding xfer_encoding = CONTENT_TRANFER_ENCODING_NONE;

497
	if(msg->mime_version == NULL || msg->content_type == NULL)	/* not MIME */
498
		return NULL;
499
	txt = mime_getcontent(buf, msg->content_type, /* match-type: */NULL, 0, &xfer_encoding, /* charset: */NULL
500
		,/* attachment: */filename, filename_len, index);
501 502
	if(txt != NULL && xfer_encoding == CONTENT_TRANFER_ENCODING_BASE64) {
		memmove(buf, txt, strlen(txt)+1);
503 504 505
		int result = b64_decode(buf, strlen(buf), buf, strlen(buf));
		if(result < 1)
			return NULL;
506 507
		if(filelen != NULL)
			*filelen = result;
508
		return (uint8_t*)buf;
509 510 511 512
	}

	return NULL;	/* No attachment */
}
513 514 515 516 517

/* Return number of file attachments contained in MIME-encoded message body */
/* 'body' may be NULL if the body text is not already read/available */
ulong SMBCALL smb_countattachments(smb_t* smb, smbmsg_t* msg, const char* body)
{
518
	if(msg->mime_version == NULL || msg->content_type == NULL)	/* not MIME */
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
		return 0;

	ulong count = 0;
	char* buf;

	if(body == NULL)
		buf = smb_getmsgtxt(smb, msg, GETMSGTXT_ALL);
	else
		buf = strdup(body);

	if(buf == NULL)
		return 0;

	char* tmp;
	while((tmp = strdup(buf)) != NULL) {
534 535
		char filename[MAX_PATH + 1];
		uint8_t* attachment = smb_getattachment(msg, tmp, filename, sizeof(filename), NULL, count);
536 537 538 539 540 541 542 543 544
		free(tmp);
		if(attachment == NULL)
			break;
		count++;
	}

	free(buf);
	return count;
}