smbtxt.c 15.9 KB
Newer Older
1 2
/* Synchronet message base (SMB) message text library routines */

3
/* $Id: smbtxt.c,v 1.49 2019/11/19 22:04:55 rswindell Exp $ */
4 5 6 7 8

/****************************************************************************
 * @format.tab-size 4		(Plain Text/Source Code File Header)			*
 * @format.use-tabs true	(see http://www.synchro.net/ptsc_hdr.html)		*
 *																			*
rswindell's avatar
rswindell committed
9
 * Copyright Rob Swindell - http://www.synchro.net/copyright.html			*
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 *																			*
 * This library is free software; you can redistribute it and/or			*
 * modify it under the terms of the GNU Lesser General Public License		*
 * as published by the Free Software Foundation; either version 2			*
 * of the License, or (at your option) any later version.					*
 * See the GNU Lesser General Public License for more details: lgpl.txt or	*
 * http://www.fsf.org/copyleft/lesser.html									*
 *																			*
 * Anonymous FTP access to the most recent released source is available at	*
 * ftp://vert.synchro.net, ftp://cvs.synchro.net and ftp://ftp.synchro.net	*
 *																			*
 * Anonymous CVS access to the development source and modification history	*
 * is available at cvs.synchro.net:/cvsroot/sbbs, example:					*
 * cvs -d :pserver:anonymous@cvs.synchro.net:/cvsroot/sbbs login			*
 *     (just hit return, no password is necessary)							*
 * cvs -d :pserver:anonymous@cvs.synchro.net:/cvsroot/sbbs checkout src		*
 *																			*
 * For Synchronet coding style and modification guidelines, see				*
 * http://www.synchro.net/source.html										*
 *																			*
 * You are encouraged to submit any modifications (preferably in Unix diff	*
 * format) via e-mail to mods@synchro.net									*
 *																			*
 * Note: If this box doesn't appear square, then you need to fix your tabs.	*
 ****************************************************************************/

/* ANSI */
37 38
#include <stdlib.h>	/* malloc/realloc/free */
#include <string.h>	/* strlen */
39 40 41

/* SMB-specific */
#include "smblib.h"
42
#include "base64.h"
43
#include "lzh.h"
44

45
char* SMBCALL smb_getmsgtxt(smb_t* smb, smbmsg_t* msg, ulong mode)
46
{
47
	char*	buf;
48
	char*	preamble;
49 50
	char*	lzhbuf;
	char*	p;
51
	char*	str;
deuce's avatar
64-bit  
deuce committed
52
	uint16_t	xlat;
53 54
	uint 	i;
	int		lzh;	/* BOOL */
55 56
	long	l=0,lzhlen,length;

57
	if((buf=(char*)malloc(sizeof(char)))==NULL) {
58
		sprintf(smb->last_error
59 60
			,"%s malloc failure of %" XP_PRIsize_t "u bytes for buffer"
			,__FUNCTION__, sizeof(char));
61 62 63 64
		return(NULL);
	}
	*buf=0;

65 66 67 68 69 70 71 72
	if(!(mode&GETMSGTXT_NO_HFIELDS)) {
		for(i=0;i<(uint)msg->total_hfields;i++) {			/* comment headers are part of text */
			if(msg->hfield[i].type!=SMB_COMMENT && msg->hfield[i].type!=SMTPSYSMSG)
				continue;
			str=(char*)msg->hfield_dat[i];
			length=strlen(str)+2;	/* +2 for crlf */
			if((p=(char*)realloc(buf,l+length+1))==NULL) {
				sprintf(smb->last_error
73 74
					,"%s realloc failure of %ld bytes for comment buffer"
					, __FUNCTION__, l+length+1);
rswindell's avatar
rswindell committed
75 76
				free(buf);
				return(NULL);
77 78 79
			}
			buf=p;
			l+=sprintf(buf+l,"%s\r\n",str);
80
		}
rswindell's avatar
rswindell committed
81 82 83
		if(l) {	/* Add a blank line after comments */
			if((p=(char*)realloc(buf,l+3))==NULL) {
				sprintf(smb->last_error
84 85
					,"%s realloc failure of %ld bytes for comment buffer"
					, __FUNCTION__, l+3);
rswindell's avatar
rswindell committed
86 87 88 89 90 91 92 93 94 95 96 97 98 99
				free(buf);
				return(NULL);
			}
			buf=p;
			l+=sprintf(buf+l,"\r\n");
		}
		unsigned answers = 0;
		for(i=0;i<(uint)msg->total_hfields;i++) {			/* Poll Answers are part of text */
			if(msg->hfield[i].type!=SMB_POLL_ANSWER)
				continue;
			char tmp[128];
			length = safe_snprintf(tmp, sizeof(tmp), "%2u: %s\r\n", ++answers, (char*)msg->hfield_dat[i]);
			if((p=(char*)realloc(buf,l+length+1))==NULL) {
				sprintf(smb->last_error
100 101
					,"%s realloc failure of %ld bytes for comment buffer"
					, __FUNCTION__, l+length+1);
rswindell's avatar
rswindell committed
102 103 104 105 106 107 108 109
				free(buf);
				return(NULL);
			}
			buf=p;
			memcpy(buf+l, tmp, length);
			l += length;
			buf[l] = 0;
		}
110
	}
111
	preamble = strdup(buf);
112

113
	for(i=0;i<(uint)msg->hdr.total_dfields;i++) {
114
		if(msg->dfield[i].length<=sizeof(xlat))
115
			continue;
116 117 118 119 120 121 122 123 124 125 126 127
		switch(msg->dfield[i].type) {
			case TEXT_BODY:
				if(mode&GETMSGTXT_NO_BODY)
					continue;
				break;
			case TEXT_TAIL:
				if(!(mode&GETMSGTXT_TAILS))
					continue;
				break;
			default:	/* ignore other data types */
				continue;
		}
128 129
		fseek(smb->sdt_fp,msg->hdr.offset+msg->dfield[i].offset
			,SEEK_SET);
130 131
		if(fread(&xlat, 1, sizeof(xlat), smb->sdt_fp) != sizeof(xlat))
			continue;
132 133 134
		lzh=0;
		if(xlat==XLAT_LZH) {
			lzh=1;
135 136
			if(fread(&xlat, 1, sizeof(xlat), smb->sdt_fp) != sizeof(xlat))
				continue;
137 138 139 140
		}
		if(xlat!=XLAT_NONE) 	/* no other translations currently supported */
			continue;

141
		length=msg->dfield[i].length-sizeof(xlat);
142
		if(lzh) {
143
			length-=sizeof(xlat);
144 145
			if(length<1)
				continue;
146
			if((lzhbuf=(char*)malloc(length))==NULL) {
147
				sprintf(smb->last_error
148 149
					,"%s malloc failure of %ld bytes for LZH buffer"
					, __FUNCTION__, length);
rswindell's avatar
rswindell committed
150
				free(buf);
151
				free(preamble);
rswindell's avatar
rswindell committed
152
				return(NULL);
153
			}
154 155 156 157 158 159
			if(smb_fread(smb,lzhbuf,length,smb->sdt_fp) != length) {
				sprintf(smb->last_error
					,"%s read failure of %ld bytes for LZH data"
					, __FUNCTION__, length);
				free(lzhbuf);
				free(buf);
160
				free(preamble);
161 162
				return(NULL);
			}
deuce's avatar
deuce committed
163
			lzhlen=*(int32_t*)lzhbuf;
164
			if((p=(char*)realloc(buf,l+lzhlen+3L))==NULL) {
165
				sprintf(smb->last_error
166 167
					,"%s realloc failure of %ld bytes for text buffer"
					, __FUNCTION__, l+lzhlen+3L);
168
				free(lzhbuf);
rswindell's avatar
rswindell committed
169
				free(buf);
170
				free(preamble);
171
				return(NULL);
172 173
			}
			buf=p;
deuce's avatar
deuce committed
174
			lzh_decode((uint8_t *)lzhbuf,length,(uint8_t *)buf+l);
175
			free(lzhbuf);
176
			l+=lzhlen;
177 178
		}
		else {
179
			if((p=(char*)realloc(buf,l+length+3L))==NULL) {
180
				sprintf(smb->last_error
181 182
					,"%s realloc failure of %ld bytes for text buffer"
					, __FUNCTION__, l+length+3L);
rswindell's avatar
rswindell committed
183
				free(buf);
184
				free(preamble);
rswindell's avatar
rswindell committed
185
				return(NULL);
186 187 188 189 190 191 192 193 194 195
			}
			buf=p;
			p=buf+l;
			l+=fread(p,1,length,smb->sdt_fp);
		}
		if(!l)
			continue;
		l--;
		while(l && buf[l]==0) l--;
		l++;
196
		*(buf+l)='\r';	/* CR */
197
		l++;
198
		*(buf+l)='\n';	/* LF */
199
		l++;
200
		*(buf+l)=0;
201
	}
202

203 204
	if(mode&GETMSGTXT_PLAIN) {
		char* plaintext = smb_getplaintext(msg, buf);
205 206 207 208 209 210 211 212 213 214
		if(plaintext != NULL) {
			buf = malloc(strlen(preamble) + strlen(plaintext) + 1);
			if(buf == NULL)
				buf = plaintext;
			else {
				strcpy(buf, preamble);
				strcat(buf, plaintext);
				free(plaintext);
			}
		}
215
	}
216
	free(preamble);
217 218 219
	return(buf);
}

220
void SMBCALL smb_freemsgtxt(char* buf)
221 222
{
	if(buf!=NULL)
223
		free(buf);
224
}
rswindell's avatar
rswindell committed
225

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
enum content_transfer_encoding {
	CONTENT_TRANFER_ENCODING_NONE,
	CONTENT_TRANFER_ENCODING_BASE64,
	CONTENT_TRANFER_ENCODING_QUOTED_PRINTABLE,
	CONTENT_TRANFER_ENCODING_OTHER
};

/* Decode quoted-printable content-transfer-encoded text */
/* Ignores (strips) unsupported ctrl chars and non-ASCII chars */
/* Does not enforce 76 char line length limit */
char* qp_decode(char* buf)
{
	uchar*	p=(uchar*)buf;
	uchar*	dest=p;

241
	for(;*p != 0; p++) {
242
		if(*p==' ' || (*p>='!' && *p<='~' && *p!='=') || *p=='\t'|| *p=='\r'|| *p=='\n')
243 244 245
			*dest++=*p;
		else if(*p=='=') {
			p++;
246 247 248
			if(*p == '\r')	/* soft link break */
				p++;
			if(*p == 0)
249
				break;
250 251
			if(*p == '\n')
				continue;
252
			if(IS_HEXDIGIT(*p) && IS_HEXDIGIT(*(p+1))) {
253
				uchar ch = HEX_CHAR_TO_INT(*p) << 4;
254
				p++;
255 256 257
				ch |= HEX_CHAR_TO_INT(*p);
				if(ch == '\t' || ch >= ' ')
					*dest++=ch;
258 259 260 261 262 263
			} else {	/* bad encoding */
				*dest++='=';
				*dest++=*p;
			}
		}
	}
264 265
	*dest++='\r';
	*dest++='\n';
266 267 268 269
	*dest=0;
	return buf;
}

270 271 272 273 274 275 276 277
static size_t strStartsWith_i(const char* buf, const char* match)
{
	size_t len = strlen(match);
	if (strnicmp(buf, match, len) == 0)
		return len;
	return 0;
}

278
static enum content_transfer_encoding mime_getxferencoding(const char* beg, const char* end)
279
{
280
	const char* p = beg;
281 282 283

	while(p < end) {
		SKIP_WHITESPACE(p);
284 285
		size_t len = strStartsWith_i(p, "content-transfer-encoding:");
		if(len < 1) {
286 287 288
			FIND_CHAR(p, '\n');
			continue;
		}
289
		p += len;
290 291 292 293 294 295 296 297 298 299 300 301 302 303
		SKIP_WHITESPACE(p);
		if(strnicmp(p, "base64", 6) == 0)
			return CONTENT_TRANFER_ENCODING_BASE64;
		if(strnicmp(p, "quoted-printable", 16) == 0)
			return CONTENT_TRANFER_ENCODING_QUOTED_PRINTABLE;
		if(strnicmp(p, "7bit", 4) == 0 || strnicmp(p, "8bit", 4) == 0 || strnicmp(p, "binary", 6) == 0)
			return CONTENT_TRANFER_ENCODING_NONE;
		return CONTENT_TRANFER_ENCODING_OTHER;
	}

	return CONTENT_TRANFER_ENCODING_NONE;
}

/* ToDo: parse and return the "modification-date" value */
304
static BOOL mime_getattachment(const char* beg, const char* end, char* attachment, size_t attachment_len)
305
{
306
	char fname[MAX_PATH+1];
307
	const char* p = beg;
308 309 310

	while(p < end) {
		SKIP_WHITESPACE(p);
311 312
		size_t len = strStartsWith_i(p, "content-disposition:");
		if(len < 1) {
313 314 315
			FIND_CHAR(p, '\n');
			continue;
		}
316
		p += len;
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
		SKIP_WHITESPACE(p);
		if(strnicmp(p, "inline", 6) == 0) {
			FIND_CHAR(p, '\n');
			continue;
		}
		char* filename = strstr(p, "filename=");
		if(filename == NULL) {
			FIND_CHAR(p, '\n');
			continue;
		}
		filename += 9;
		char* term;
		if(*filename == '"') {
			filename++;
			term = strchr(filename, '"');
332 333 334
		} else {
			char* wsp = filename;
			FIND_WHITESPACE(wsp);
335
			term = strchr(filename, ';');
336 337 338
			if(term > wsp)
				term = wsp;
		}
339 340 341 342
		if(term == NULL) {
			term = filename;
			FIND_WHITESPACE(term);
		}
343 344
		if(term - filename >= sizeof(fname))
			term = filename + sizeof(fname) - 1;
345 346
		memcpy(fname, filename, term - filename);
		fname[term - filename] = 0;
347 348
		if(attachment != NULL && attachment_len > 0) {
			strncpy(attachment, getfname(fname), attachment_len);
349
			attachment[attachment_len - 1] = '\0';
350
		}
351 352 353 354 355
		return TRUE;
	}
	return FALSE;
}

356 357 358 359 360 361 362 363 364 365 366 367 368 369
// Parses a MIME text/* content-type header field
void SMBCALL smb_parse_content_type(const char* content_type, char** subtype, char** charset)
{
	if(subtype != NULL) {
		FREE_AND_NULL(*subtype);
	}
	if(charset != NULL) {
		FREE_AND_NULL(*charset);
	}
	if(content_type == NULL)
		return;
	char buf[512];
	SAFECOPY(buf, content_type);
	char* p;
370 371
	if((p = strstr(buf, "\r\n\r\n")) != NULL)	/* Don't parse past the end of header */
		*p = 0;
372 373 374
	size_t len = strStartsWith_i(buf, "text/");
	if(len > 0) {
		p = buf + len;
375
		if(subtype != NULL) {
376 377 378 379 380 381 382 383
			if((*subtype = strdup(p)) != NULL) {
				char* tp = *subtype;
				FIND_WHITESPACE(tp);
				*tp = 0;
				tp = *subtype;
				FIND_CHAR(tp, ';');
				*tp = 0;
			}
384
		}
385 386
		char* parms = p;
		if(charset != NULL && ((p = strcasestr(parms, " charset=")) != NULL || (p = strcasestr(parms, ";charset=")) != NULL)) {
387 388 389 390
			BOOL quoted = FALSE;
			p += 9;
			if(*p == '"') {
				quoted = TRUE;
391
				p++;
392
			}
393 394 395 396
			char* tp = p;
			FIND_WHITESPACE(tp);
			*tp = 0;
			tp = p;
397 398 399 400 401
			if(quoted) {
				FIND_CHAR(tp, '"');
			} else {
				FIND_CHAR(tp, ';');
			}
402 403 404 405 406 407
			*tp = 0;
			*charset = strdup(p);
		}
	}
}

408
/* Find the specified content-type in a multi-pat MIME-encoded message body, recursively */
409
static const char* mime_getcontent(const char* buf, const char* content_type, const char* content_match
410
	,int depth, enum content_transfer_encoding* encoding, char** charset, char* attachment, size_t attachment_len, int index)
rswindell's avatar
rswindell committed
411
{
412
	const char*	txt;
rswindell's avatar
rswindell committed
413 414
	char*	p;
	char	boundary[256];
415 416 417
	char	match1[128];
	char	match2[128];
	int		match_len = 0;
418
	int		found = 0;
419

420
	if(content_match != NULL) {
421 422
		match_len = sprintf(match1, "%s;", content_match);
					sprintf(match2, "%s\r", content_match);
423
	}
rswindell's avatar
rswindell committed
424

425 426
	if(depth > 2)
		return NULL;
rswindell's avatar
rswindell committed
427
	if(content_type == NULL)	/* Not MIME-encoded */
428
		return NULL;
429 430 431 432 433
	size_t len;
	if(((len = strStartsWith_i(content_type, "multipart/alternative;")) < 1)
	&& ((len = strStartsWith_i(content_type, "multipart/mixed;")) < 1)
	&& ((len = strStartsWith_i(content_type, "multipart/report;")) < 1)
	&& ((len = strStartsWith_i(content_type, "multipart/")) < 1))
434
		return NULL;
435 436
	content_type += len;
	p = strcasestr(content_type, "boundary=");
rswindell's avatar
rswindell committed
437
	if(p == NULL)
438
		return NULL;
439 440 441 442 443 444
	p += 9;
	if(*p == '"')
		p++;
	SAFEPRINTF(boundary, "--%s", p);
	if((p = strchr(boundary,'"')) != NULL)
		*p = 0;
rswindell's avatar
rswindell committed
445 446 447
	txt = buf;
	while((p = strstr(txt, boundary)) != NULL) {
		txt = p+strlen(boundary);
448 449
		if(strncmp(txt, "--\r\n", 4) == 0)
			break;
rswindell's avatar
rswindell committed
450 451 452 453
		SKIP_WHITESPACE(txt);
		p = strstr(txt, "\r\n\r\n");	/* End of header */
		if(p==NULL)
			continue;
454 455
		for(content_type = txt; content_type < p; content_type++) {
			SKIP_WHITESPACE(content_type);
456 457
			if((len = strStartsWith_i(content_type, "Content-Type:")) > 0) {
				content_type += len;
458
				SKIP_WHITESPACE(content_type);
459
				break;
460
			}
461 462 463 464
			FIND_CHAR(content_type, '\r');
		}
		if(content_type >= p)
			continue;
465
		const char* cp;
466
		if((match_len && strnicmp(content_type, match1, match_len) && strnicmp(content_type, match2, match_len))
467
			|| (attachment != NULL && !mime_getattachment(txt, p, attachment, attachment_len))) {
468 469
			if((cp = mime_getcontent(p, content_type, content_match, depth + 1, encoding, charset, attachment, attachment_len, index)) != NULL)
				return cp;
470 471 472
			continue;
		}
		if(found++ != index) {
473 474
			if((cp = mime_getcontent(p, content_type, content_match, depth + 1, encoding, charset, attachment, attachment_len, index)) != NULL)
				return cp;
475 476
			continue;
		}
477 478
		if(encoding != NULL)
			*encoding = mime_getxferencoding(txt, p);
479 480
		if(charset != NULL)
			smb_parse_content_type(content_type, NULL, charset);
481

482
		txt = p + 4;	// strlen("\r\n\r\n")
rswindell's avatar
rswindell committed
483 484 485
		SKIP_WHITESPACE(txt);
		if((p = strstr(txt, boundary)) != NULL)
			*p = 0;
486
		return txt;
rswindell's avatar
rswindell committed
487
	}
488 489 490
	return NULL;
}

491 492
/* Get just the (first) plain-text or HTML portion of a MIME-encoded multi-part message body */
/* Returns NULL if there is no MIME-encoded plain-text/html portion of the message */
493 494
char* SMBCALL smb_getplaintext(smbmsg_t* msg, char* buf)
{
495
	const char*	txt;
496
	enum content_transfer_encoding xfer_encoding = CONTENT_TRANFER_ENCODING_NONE;
497

498
	if(msg->mime_version == NULL || msg->content_type == NULL)	/* not MIME */
499
		return NULL;
500
	txt = mime_getcontent(buf, msg->content_type, "text/plain", 0, &xfer_encoding, &msg->text_charset
501
		,/* attachment: */NULL, /* attachment_len: */0, /* index: */0);
502 503 504 505 506
	if(txt == NULL) {
		txt = mime_getcontent(buf, msg->content_type, "text/html", 0, &xfer_encoding, &msg->text_charset
			,/* attachment: */NULL, /* attachment_len: */0, /* index: */0);
		if(txt == NULL)
			return NULL;
507
		free(msg->text_subtype);
508
		msg->text_subtype = strdup("html");
509 510
	} else {
		free(msg->text_subtype);
511
		msg->text_subtype = strdup("plain");
512
	}
513 514 515 516 517 518 519 520 521 522 523 524 525

	memmove(buf, txt, strlen(txt)+1);
	if(*buf == 0)	/* No decoding necessary */
		return buf;
	if(xfer_encoding == CONTENT_TRANFER_ENCODING_QUOTED_PRINTABLE)
		qp_decode(buf);
	else if(xfer_encoding == CONTENT_TRANFER_ENCODING_BASE64) {
		char* decoded = strdup(buf);
		if(decoded == NULL)
			return NULL;
		if(b64_decode(decoded, strlen(decoded), buf, strlen(buf)) > 0)
			strcpy(buf, decoded);
		free(decoded);
526 527
	}

rswindell's avatar
rswindell committed
528 529
	return buf;
}
530

531
/* Get just a base64-encoded attachment (just one) from MIME-encoded message body */
532
/* This function is destructive (over-writes 'buf' with decoded attachment)! */
533
uint8_t* SMBCALL smb_getattachment(smbmsg_t* msg, char* buf, char* filename, size_t filename_len, uint32_t* filelen, int index)
534
{
535
	const char*	txt;
536 537
	enum content_transfer_encoding xfer_encoding = CONTENT_TRANFER_ENCODING_NONE;

538
	if(msg->mime_version == NULL || msg->content_type == NULL)	/* not MIME */
539
		return NULL;
540
	txt = mime_getcontent(buf, msg->content_type, /* match-type: */NULL, 0, &xfer_encoding, /* charset: */NULL
541
		,/* attachment: */filename, filename_len, index);
542 543
	if(txt != NULL && xfer_encoding == CONTENT_TRANFER_ENCODING_BASE64) {
		memmove(buf, txt, strlen(txt)+1);
544 545 546
		int result = b64_decode(buf, strlen(buf), buf, strlen(buf));
		if(result < 1)
			return NULL;
547 548
		if(filelen != NULL)
			*filelen = result;
549
		return (uint8_t*)buf;
550 551 552 553
	}

	return NULL;	/* No attachment */
}
554 555 556 557 558

/* Return number of file attachments contained in MIME-encoded message body */
/* 'body' may be NULL if the body text is not already read/available */
ulong SMBCALL smb_countattachments(smb_t* smb, smbmsg_t* msg, const char* body)
{
559
	if(msg->mime_version == NULL || msg->content_type == NULL)	/* not MIME */
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
		return 0;

	ulong count = 0;
	char* buf;

	if(body == NULL)
		buf = smb_getmsgtxt(smb, msg, GETMSGTXT_ALL);
	else
		buf = strdup(body);

	if(buf == NULL)
		return 0;

	char* tmp;
	while((tmp = strdup(buf)) != NULL) {
575 576
		char filename[MAX_PATH + 1];
		uint8_t* attachment = smb_getattachment(msg, tmp, filename, sizeof(filename), NULL, count);
577 578 579 580 581 582 583 584 585
		free(tmp);
		if(attachment == NULL)
			break;
		count++;
	}

	free(buf);
	return count;
}