Synchronet now requires the libarchive development package (e.g. libarchive-dev on Debian-based Linux distros, libarchive.org for more info) to build successfully.

smbtxt.c 15.6 KB
Newer Older
1 2
/* Synchronet message base (SMB) message text library routines */

3
/* $Id: smbtxt.c,v 1.49 2019/11/19 22:04:55 rswindell Exp $ */
4 5 6 7 8

/****************************************************************************
 * @format.tab-size 4		(Plain Text/Source Code File Header)			*
 * @format.use-tabs true	(see http://www.synchro.net/ptsc_hdr.html)		*
 *																			*
rswindell's avatar
rswindell committed
9
 * Copyright Rob Swindell - http://www.synchro.net/copyright.html			*
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 *																			*
 * This library is free software; you can redistribute it and/or			*
 * modify it under the terms of the GNU Lesser General Public License		*
 * as published by the Free Software Foundation; either version 2			*
 * of the License, or (at your option) any later version.					*
 * See the GNU Lesser General Public License for more details: lgpl.txt or	*
 * http://www.fsf.org/copyleft/lesser.html									*
 *																			*
 * Anonymous FTP access to the most recent released source is available at	*
 * ftp://vert.synchro.net, ftp://cvs.synchro.net and ftp://ftp.synchro.net	*
 *																			*
 * Anonymous CVS access to the development source and modification history	*
 * is available at cvs.synchro.net:/cvsroot/sbbs, example:					*
 * cvs -d :pserver:anonymous@cvs.synchro.net:/cvsroot/sbbs login			*
 *     (just hit return, no password is necessary)							*
 * cvs -d :pserver:anonymous@cvs.synchro.net:/cvsroot/sbbs checkout src		*
 *																			*
 * For Synchronet coding style and modification guidelines, see				*
 * http://www.synchro.net/source.html										*
 *																			*
 * You are encouraged to submit any modifications (preferably in Unix diff	*
 * format) via e-mail to mods@synchro.net									*
 *																			*
 * Note: If this box doesn't appear square, then you need to fix your tabs.	*
 ****************************************************************************/

/* ANSI */
37 38
#include <stdlib.h>	/* malloc/realloc/free */
#include <string.h>	/* strlen */
39 40 41

/* SMB-specific */
#include "smblib.h"
42
#include "base64.h"
43
#include "lzh.h"
44

45
char* SMBCALL smb_getmsgtxt(smb_t* smb, smbmsg_t* msg, ulong mode)
46
{
47 48 49
	char*	buf;
	char*	lzhbuf;
	char*	p;
50
	char*	str;
deuce's avatar
64-bit  
deuce committed
51
	uint16_t	xlat;
52 53
	uint 	i;
	int		lzh;	/* BOOL */
54 55
	long	l=0,lzhlen,length;

56
	if((buf=(char*)malloc(sizeof(char)))==NULL) {
57
		sprintf(smb->last_error
58 59
			,"%s malloc failure of %" XP_PRIsize_t "u bytes for buffer"
			,__FUNCTION__, sizeof(char));
60 61 62 63
		return(NULL);
	}
	*buf=0;

64 65 66 67 68 69 70 71
	if(!(mode&GETMSGTXT_NO_HFIELDS)) {
		for(i=0;i<(uint)msg->total_hfields;i++) {			/* comment headers are part of text */
			if(msg->hfield[i].type!=SMB_COMMENT && msg->hfield[i].type!=SMTPSYSMSG)
				continue;
			str=(char*)msg->hfield_dat[i];
			length=strlen(str)+2;	/* +2 for crlf */
			if((p=(char*)realloc(buf,l+length+1))==NULL) {
				sprintf(smb->last_error
72 73
					,"%s realloc failure of %ld bytes for comment buffer"
					, __FUNCTION__, l+length+1);
rswindell's avatar
rswindell committed
74 75
				free(buf);
				return(NULL);
76 77 78
			}
			buf=p;
			l+=sprintf(buf+l,"%s\r\n",str);
79
		}
rswindell's avatar
rswindell committed
80 81 82
		if(l) {	/* Add a blank line after comments */
			if((p=(char*)realloc(buf,l+3))==NULL) {
				sprintf(smb->last_error
83 84
					,"%s realloc failure of %ld bytes for comment buffer"
					, __FUNCTION__, l+3);
rswindell's avatar
rswindell committed
85 86 87 88 89 90 91 92 93 94 95 96 97 98
				free(buf);
				return(NULL);
			}
			buf=p;
			l+=sprintf(buf+l,"\r\n");
		}
		unsigned answers = 0;
		for(i=0;i<(uint)msg->total_hfields;i++) {			/* Poll Answers are part of text */
			if(msg->hfield[i].type!=SMB_POLL_ANSWER)
				continue;
			char tmp[128];
			length = safe_snprintf(tmp, sizeof(tmp), "%2u: %s\r\n", ++answers, (char*)msg->hfield_dat[i]);
			if((p=(char*)realloc(buf,l+length+1))==NULL) {
				sprintf(smb->last_error
99 100
					,"%s realloc failure of %ld bytes for comment buffer"
					, __FUNCTION__, l+length+1);
rswindell's avatar
rswindell committed
101 102 103 104 105 106 107 108
				free(buf);
				return(NULL);
			}
			buf=p;
			memcpy(buf+l, tmp, length);
			l += length;
			buf[l] = 0;
		}
109 110
	}

111
	for(i=0;i<(uint)msg->hdr.total_dfields;i++) {
112
		if(msg->dfield[i].length<=sizeof(xlat))
113
			continue;
114 115 116 117 118 119 120 121 122 123 124 125
		switch(msg->dfield[i].type) {
			case TEXT_BODY:
				if(mode&GETMSGTXT_NO_BODY)
					continue;
				break;
			case TEXT_TAIL:
				if(!(mode&GETMSGTXT_TAILS))
					continue;
				break;
			default:	/* ignore other data types */
				continue;
		}
126 127
		fseek(smb->sdt_fp,msg->hdr.offset+msg->dfield[i].offset
			,SEEK_SET);
128 129
		if(fread(&xlat, 1, sizeof(xlat), smb->sdt_fp) != sizeof(xlat))
			continue;
130 131 132
		lzh=0;
		if(xlat==XLAT_LZH) {
			lzh=1;
133 134
			if(fread(&xlat, 1, sizeof(xlat), smb->sdt_fp) != sizeof(xlat))
				continue;
135 136 137 138
		}
		if(xlat!=XLAT_NONE) 	/* no other translations currently supported */
			continue;

139
		length=msg->dfield[i].length-sizeof(xlat);
140
		if(lzh) {
141
			length-=sizeof(xlat);
142 143
			if(length<1)
				continue;
144
			if((lzhbuf=(char*)malloc(length))==NULL) {
145
				sprintf(smb->last_error
146 147
					,"%s malloc failure of %ld bytes for LZH buffer"
					, __FUNCTION__, length);
rswindell's avatar
rswindell committed
148 149
				free(buf);
				return(NULL);
150
			}
151 152 153 154 155 156 157 158
			if(smb_fread(smb,lzhbuf,length,smb->sdt_fp) != length) {
				sprintf(smb->last_error
					,"%s read failure of %ld bytes for LZH data"
					, __FUNCTION__, length);
				free(lzhbuf);
				free(buf);
				return(NULL);
			}
deuce's avatar
deuce committed
159
			lzhlen=*(int32_t*)lzhbuf;
160
			if((p=(char*)realloc(buf,l+lzhlen+3L))==NULL) {
161
				sprintf(smb->last_error
162 163
					,"%s realloc failure of %ld bytes for text buffer"
					, __FUNCTION__, l+lzhlen+3L);
164
				free(lzhbuf);
rswindell's avatar
rswindell committed
165
				free(buf);
166
				return(NULL);
167 168
			}
			buf=p;
deuce's avatar
deuce committed
169
			lzh_decode((uint8_t *)lzhbuf,length,(uint8_t *)buf+l);
170
			free(lzhbuf);
171
			l+=lzhlen;
172 173
		}
		else {
174
			if((p=(char*)realloc(buf,l+length+3L))==NULL) {
175
				sprintf(smb->last_error
176 177
					,"%s realloc failure of %ld bytes for text buffer"
					, __FUNCTION__, l+length+3L);
rswindell's avatar
rswindell committed
178 179
				free(buf);
				return(NULL);
180 181 182 183 184 185 186 187 188 189
			}
			buf=p;
			p=buf+l;
			l+=fread(p,1,length,smb->sdt_fp);
		}
		if(!l)
			continue;
		l--;
		while(l && buf[l]==0) l--;
		l++;
190
		*(buf+l)='\r';	/* CR */
191
		l++;
192
		*(buf+l)='\n';	/* LF */
193
		l++;
194
		*(buf+l)=0;
195
	}
196

197 198 199 200 201
	if(mode&GETMSGTXT_PLAIN) {
		char* plaintext = smb_getplaintext(msg, buf);
		if(plaintext != NULL)
			return plaintext;
	}
202 203 204
	return(buf);
}

205
void SMBCALL smb_freemsgtxt(char* buf)
206 207
{
	if(buf!=NULL)
208
		free(buf);
209
}
rswindell's avatar
rswindell committed
210

211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
enum content_transfer_encoding {
	CONTENT_TRANFER_ENCODING_NONE,
	CONTENT_TRANFER_ENCODING_BASE64,
	CONTENT_TRANFER_ENCODING_QUOTED_PRINTABLE,
	CONTENT_TRANFER_ENCODING_OTHER
};

/* Decode quoted-printable content-transfer-encoded text */
/* Ignores (strips) unsupported ctrl chars and non-ASCII chars */
/* Does not enforce 76 char line length limit */
char* qp_decode(char* buf)
{
	uchar*	p=(uchar*)buf;
	uchar*	dest=p;

226
	for(;*p != 0; p++) {
227
		if(*p==' ' || (*p>='!' && *p<='~' && *p!='=') || *p=='\t'|| *p=='\r'|| *p=='\n')
228 229 230
			*dest++=*p;
		else if(*p=='=') {
			p++;
231 232 233
			if(*p == '\r')	/* soft link break */
				p++;
			if(*p == 0)
234
				break;
235 236
			if(*p == '\n')
				continue;
237
			if(IS_HEXDIGIT(*p) && IS_HEXDIGIT(*(p+1))) {
238
				uchar ch = HEX_CHAR_TO_INT(*p) << 4;
239
				p++;
240 241 242
				ch |= HEX_CHAR_TO_INT(*p);
				if(ch == '\t' || ch >= ' ')
					*dest++=ch;
243 244 245 246 247 248
			} else {	/* bad encoding */
				*dest++='=';
				*dest++=*p;
			}
		}
	}
249 250
	*dest++='\r';
	*dest++='\n';
251 252 253 254
	*dest=0;
	return buf;
}

255 256 257 258 259 260 261 262
static size_t strStartsWith_i(const char* buf, const char* match)
{
	size_t len = strlen(match);
	if (strnicmp(buf, match, len) == 0)
		return len;
	return 0;
}

263
static enum content_transfer_encoding mime_getxferencoding(const char* beg, const char* end)
264
{
265
	const char* p = beg;
266 267 268

	while(p < end) {
		SKIP_WHITESPACE(p);
269 270
		size_t len = strStartsWith_i(p, "content-transfer-encoding:");
		if(len < 1) {
271 272 273
			FIND_CHAR(p, '\n');
			continue;
		}
274
		p += len;
275 276 277 278 279 280 281 282 283 284 285 286 287 288
		SKIP_WHITESPACE(p);
		if(strnicmp(p, "base64", 6) == 0)
			return CONTENT_TRANFER_ENCODING_BASE64;
		if(strnicmp(p, "quoted-printable", 16) == 0)
			return CONTENT_TRANFER_ENCODING_QUOTED_PRINTABLE;
		if(strnicmp(p, "7bit", 4) == 0 || strnicmp(p, "8bit", 4) == 0 || strnicmp(p, "binary", 6) == 0)
			return CONTENT_TRANFER_ENCODING_NONE;
		return CONTENT_TRANFER_ENCODING_OTHER;
	}

	return CONTENT_TRANFER_ENCODING_NONE;
}

/* ToDo: parse and return the "modification-date" value */
289
static BOOL mime_getattachment(const char* beg, const char* end, char* attachment, size_t attachment_len)
290
{
291
	char fname[MAX_PATH+1];
292
	const char* p = beg;
293 294 295

	while(p < end) {
		SKIP_WHITESPACE(p);
296 297
		size_t len = strStartsWith_i(p, "content-disposition:");
		if(len < 1) {
298 299 300
			FIND_CHAR(p, '\n');
			continue;
		}
301
		p += len;
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
		SKIP_WHITESPACE(p);
		if(strnicmp(p, "inline", 6) == 0) {
			FIND_CHAR(p, '\n');
			continue;
		}
		char* filename = strstr(p, "filename=");
		if(filename == NULL) {
			FIND_CHAR(p, '\n');
			continue;
		}
		filename += 9;
		char* term;
		if(*filename == '"') {
			filename++;
			term = strchr(filename, '"');
317 318 319
		} else {
			char* wsp = filename;
			FIND_WHITESPACE(wsp);
320
			term = strchr(filename, ';');
321 322 323
			if(term > wsp)
				term = wsp;
		}
324 325 326 327
		if(term == NULL) {
			term = filename;
			FIND_WHITESPACE(term);
		}
328 329
		if(term - filename >= sizeof(fname))
			term = filename + sizeof(fname) - 1;
330 331
		memcpy(fname, filename, term - filename);
		fname[term - filename] = 0;
332 333
		if(attachment != NULL && attachment_len > 0) {
			strncpy(attachment, getfname(fname), attachment_len);
334
			attachment[attachment_len - 1] = '\0';
335
		}
336 337 338 339 340
		return TRUE;
	}
	return FALSE;
}

341 342 343 344 345 346 347 348 349 350 351 352 353 354
// Parses a MIME text/* content-type header field
void SMBCALL smb_parse_content_type(const char* content_type, char** subtype, char** charset)
{
	if(subtype != NULL) {
		FREE_AND_NULL(*subtype);
	}
	if(charset != NULL) {
		FREE_AND_NULL(*charset);
	}
	if(content_type == NULL)
		return;
	char buf[512];
	SAFECOPY(buf, content_type);
	char* p;
355 356
	if((p = strstr(buf, "\r\n\r\n")) != NULL)	/* Don't parse past the end of header */
		*p = 0;
357 358 359
	size_t len = strStartsWith_i(buf, "text/");
	if(len > 0) {
		p = buf + len;
360
		if(subtype != NULL) {
361 362 363 364 365 366 367 368
			if((*subtype = strdup(p)) != NULL) {
				char* tp = *subtype;
				FIND_WHITESPACE(tp);
				*tp = 0;
				tp = *subtype;
				FIND_CHAR(tp, ';');
				*tp = 0;
			}
369
		}
370 371
		char* parms = p;
		if(charset != NULL && ((p = strcasestr(parms, " charset=")) != NULL || (p = strcasestr(parms, ";charset=")) != NULL)) {
372 373 374 375
			BOOL quoted = FALSE;
			p += 9;
			if(*p == '"') {
				quoted = TRUE;
376
				p++;
377
			}
378 379 380 381
			char* tp = p;
			FIND_WHITESPACE(tp);
			*tp = 0;
			tp = p;
382 383 384 385 386
			if(quoted) {
				FIND_CHAR(tp, '"');
			} else {
				FIND_CHAR(tp, ';');
			}
387 388 389 390 391 392
			*tp = 0;
			*charset = strdup(p);
		}
	}
}

393
/* Find the specified content-type in a multi-pat MIME-encoded message body, recursively */
394
static const char* mime_getcontent(const char* buf, const char* content_type, const char* content_match
395
	,int depth, enum content_transfer_encoding* encoding, char** charset, char* attachment, size_t attachment_len, int index)
rswindell's avatar
rswindell committed
396
{
397
	const char*	txt;
rswindell's avatar
rswindell committed
398 399
	char*	p;
	char	boundary[256];
400 401 402
	char	match1[128];
	char	match2[128];
	int		match_len = 0;
403
	int		found = 0;
404

405
	if(content_match != NULL) {
406 407
		match_len = sprintf(match1, "%s;", content_match);
					sprintf(match2, "%s\r", content_match);
408
	}
rswindell's avatar
rswindell committed
409

410 411
	if(depth > 2)
		return NULL;
rswindell's avatar
rswindell committed
412
	if(content_type == NULL)	/* Not MIME-encoded */
413
		return NULL;
414 415 416 417 418
	size_t len;
	if(((len = strStartsWith_i(content_type, "multipart/alternative;")) < 1)
	&& ((len = strStartsWith_i(content_type, "multipart/mixed;")) < 1)
	&& ((len = strStartsWith_i(content_type, "multipart/report;")) < 1)
	&& ((len = strStartsWith_i(content_type, "multipart/")) < 1))
419
		return NULL;
420 421
	content_type += len;
	p = strcasestr(content_type, "boundary=");
rswindell's avatar
rswindell committed
422
	if(p == NULL)
423
		return NULL;
424 425 426 427 428 429
	p += 9;
	if(*p == '"')
		p++;
	SAFEPRINTF(boundary, "--%s", p);
	if((p = strchr(boundary,'"')) != NULL)
		*p = 0;
rswindell's avatar
rswindell committed
430 431 432
	txt = buf;
	while((p = strstr(txt, boundary)) != NULL) {
		txt = p+strlen(boundary);
433 434
		if(strncmp(txt, "--\r\n", 4) == 0)
			break;
rswindell's avatar
rswindell committed
435 436 437 438
		SKIP_WHITESPACE(txt);
		p = strstr(txt, "\r\n\r\n");	/* End of header */
		if(p==NULL)
			continue;
439 440
		for(content_type = txt; content_type < p; content_type++) {
			SKIP_WHITESPACE(content_type);
441 442
			if((len = strStartsWith_i(content_type, "Content-Type:")) > 0) {
				content_type += len;
443
				SKIP_WHITESPACE(content_type);
444
				break;
445
			}
446 447 448 449
			FIND_CHAR(content_type, '\r');
		}
		if(content_type >= p)
			continue;
450
		const char* cp;
451
		if((match_len && strnicmp(content_type, match1, match_len) && strnicmp(content_type, match2, match_len))
452
			|| (attachment != NULL && !mime_getattachment(txt, p, attachment, attachment_len))) {
453 454
			if((cp = mime_getcontent(p, content_type, content_match, depth + 1, encoding, charset, attachment, attachment_len, index)) != NULL)
				return cp;
455 456 457
			continue;
		}
		if(found++ != index) {
458 459
			if((cp = mime_getcontent(p, content_type, content_match, depth + 1, encoding, charset, attachment, attachment_len, index)) != NULL)
				return cp;
460 461
			continue;
		}
462 463
		if(encoding != NULL)
			*encoding = mime_getxferencoding(txt, p);
464 465
		if(charset != NULL)
			smb_parse_content_type(content_type, NULL, charset);
466

467
		txt = p + 4;	// strlen("\r\n\r\n")
rswindell's avatar
rswindell committed
468 469 470
		SKIP_WHITESPACE(txt);
		if((p = strstr(txt, boundary)) != NULL)
			*p = 0;
471
		return txt;
rswindell's avatar
rswindell committed
472
	}
473 474 475
	return NULL;
}

476 477
/* Get just the (first) plain-text or HTML portion of a MIME-encoded multi-part message body */
/* Returns NULL if there is no MIME-encoded plain-text/html portion of the message */
478 479
char* SMBCALL smb_getplaintext(smbmsg_t* msg, char* buf)
{
480
	const char*	txt;
481
	enum content_transfer_encoding xfer_encoding = CONTENT_TRANFER_ENCODING_NONE;
482

483
	FREE_AND_NULL(msg->text_subtype);
484
	if(msg->mime_version == NULL || msg->content_type == NULL)	/* not MIME */
485
		return NULL;
486
	txt = mime_getcontent(buf, msg->content_type, "text/plain", 0, &xfer_encoding, &msg->text_charset
487
		,/* attachment: */NULL, /* attachment_len: */0, /* index: */0);
488 489 490 491 492 493 494 495
	if(txt == NULL) {
		txt = mime_getcontent(buf, msg->content_type, "text/html", 0, &xfer_encoding, &msg->text_charset
			,/* attachment: */NULL, /* attachment_len: */0, /* index: */0);
		if(txt == NULL)
			return NULL;
		msg->text_subtype = strdup("html");
	} else
		msg->text_subtype = strdup("plain");
496 497 498 499 500 501 502 503 504 505 506 507 508

	memmove(buf, txt, strlen(txt)+1);
	if(*buf == 0)	/* No decoding necessary */
		return buf;
	if(xfer_encoding == CONTENT_TRANFER_ENCODING_QUOTED_PRINTABLE)
		qp_decode(buf);
	else if(xfer_encoding == CONTENT_TRANFER_ENCODING_BASE64) {
		char* decoded = strdup(buf);
		if(decoded == NULL)
			return NULL;
		if(b64_decode(decoded, strlen(decoded), buf, strlen(buf)) > 0)
			strcpy(buf, decoded);
		free(decoded);
509 510
	}

rswindell's avatar
rswindell committed
511 512
	return buf;
}
513

514
/* Get just a base64-encoded attachment (just one) from MIME-encoded message body */
515
/* This function is destructive (over-writes 'buf' with decoded attachment)! */
516
uint8_t* SMBCALL smb_getattachment(smbmsg_t* msg, char* buf, char* filename, size_t filename_len, uint32_t* filelen, int index)
517
{
518
	const char*	txt;
519 520
	enum content_transfer_encoding xfer_encoding = CONTENT_TRANFER_ENCODING_NONE;

521
	if(msg->mime_version == NULL || msg->content_type == NULL)	/* not MIME */
522
		return NULL;
523
	txt = mime_getcontent(buf, msg->content_type, /* match-type: */NULL, 0, &xfer_encoding, /* charset: */NULL
524
		,/* attachment: */filename, filename_len, index);
525 526
	if(txt != NULL && xfer_encoding == CONTENT_TRANFER_ENCODING_BASE64) {
		memmove(buf, txt, strlen(txt)+1);
527 528 529
		int result = b64_decode(buf, strlen(buf), buf, strlen(buf));
		if(result < 1)
			return NULL;
530 531
		if(filelen != NULL)
			*filelen = result;
532
		return (uint8_t*)buf;
533 534 535 536
	}

	return NULL;	/* No attachment */
}
537 538 539 540 541

/* Return number of file attachments contained in MIME-encoded message body */
/* 'body' may be NULL if the body text is not already read/available */
ulong SMBCALL smb_countattachments(smb_t* smb, smbmsg_t* msg, const char* body)
{
542
	if(msg->mime_version == NULL || msg->content_type == NULL)	/* not MIME */
543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
		return 0;

	ulong count = 0;
	char* buf;

	if(body == NULL)
		buf = smb_getmsgtxt(smb, msg, GETMSGTXT_ALL);
	else
		buf = strdup(body);

	if(buf == NULL)
		return 0;

	char* tmp;
	while((tmp = strdup(buf)) != NULL) {
558 559
		char filename[MAX_PATH + 1];
		uint8_t* attachment = smb_getattachment(msg, tmp, filename, sizeof(filename), NULL, count);
560 561 562 563 564 565 566 567 568
		free(tmp);
		if(attachment == NULL)
			break;
		count++;
	}

	free(buf);
	return count;
}