From 3313a87eeba8d44474037e65bad16aae38dad717 Mon Sep 17 00:00:00 2001
From: rswindell <>
Date: Sun, 4 Aug 2019 22:48:38 +0000
Subject: [PATCH] More UTF-8 fun: - bstrlen() moved to sbbs_t, accepts an
 option pmode argument so it can account   for UTF-8 encoded strings correctly
 - JS console.strlen() now accepts an optional pmode argument (e.g. P_UTF8) -
 Renamed sbbs_t::utf8_to_cp437 to sbbs_t::print_utf8_as_cp437 - Create/use
 msghdr_hfield() to perform UTF-8->CP437 conversions as needed for  
 printing/copying UTF-8 encoded message header fields. - Defined XTRN_UTF8
 misc setting flag. If a message editor does *not* have this   flag, it is
 assumed to *not* support UTF-8. Will likely use this for UTF-8   doors as
 some point too (none known to exist, yet).

---
 src/sbbs3/con_out.cpp    | 36 ++++++++++++++++++++++++++++++++++--
 src/sbbs3/getmsg.cpp     | 25 +++++++++++++++++++++++++
 src/sbbs3/js_console.cpp | 14 +++++++++++---
 src/sbbs3/postmsg.cpp    | 11 +++++++----
 src/sbbs3/putmsg.cpp     |  2 +-
 src/sbbs3/readmail.cpp   | 10 +++++-----
 src/sbbs3/readmsgs.cpp   | 16 ++++++++--------
 src/sbbs3/sbbs.h         |  6 ++++--
 src/sbbs3/sbbsdefs.h     |  1 +
 src/sbbs3/str_util.c     | 24 ------------------------
 10 files changed, 96 insertions(+), 49 deletions(-)

diff --git a/src/sbbs3/con_out.cpp b/src/sbbs3/con_out.cpp
index 5e60f2ad1d..100968c4f2 100644
--- a/src/sbbs3/con_out.cpp
+++ b/src/sbbs3/con_out.cpp
@@ -123,13 +123,45 @@ int sbbs_t::bputs(const char *str, long mode)
 			if(term&UTF8)
 				outcom(str[l++]);
 			else
-				l += utf8_to_cp437(str + l, len - l);
+				l += print_utf8_as_cp437(str + l, len - l);
 		} else
 			outchar(str[l++]);
 	}
 	return(l);
 }
 
+/****************************************************************************/
+/* Returns the printed columns from 'str' accounting for Ctrl-A codes		*/
+/****************************************************************************/
+size_t sbbs_t::bstrlen(const char *str, long mode)
+{
+	str = auto_utf8(str, &mode);
+	size_t count = 0;
+	const char* end = str + strlen(str);
+	while (str < end) {
+		int len = 1;
+		if(*str == CTRL_A) {
+			str++;
+			if(*str == 0 || *str == 'Z')	// EOF
+				break;
+			if(*str == '[') // CR
+				count = 0;
+			else if(*str == '<' && count) // ND-Backspace
+				count--;
+		} else if(((*str) & 0x80) && (mode&P_UTF8)) {
+			enum unicode_codepoint codepoint = UNICODE_UNDEFINED;
+			len = utf8_getc(str, end - str, &codepoint);
+			if(len < 1)
+				break;
+			count += unicode_width(codepoint);;
+		} else
+			count++;
+		str += len;
+	}
+	return count;
+}
+
+
 /* Perform PETSCII terminal output translation (from ASCII/CP437) */
 unsigned char cp437_to_petscii(unsigned char ch)
 {
@@ -290,7 +322,7 @@ int sbbs_t::petscii_to_ansibbs(unsigned char ch)
 }
 
 // Return length of sequence
-size_t sbbs_t::utf8_to_cp437(const char* str, size_t len)
+size_t sbbs_t::print_utf8_as_cp437(const char* str, size_t len)
 {
 	if(((*str)&0x80) == 0) {
 		outchar(*str);
diff --git a/src/sbbs3/getmsg.cpp b/src/sbbs3/getmsg.cpp
index 234fbc64b4..1b28155b87 100644
--- a/src/sbbs3/getmsg.cpp
+++ b/src/sbbs3/getmsg.cpp
@@ -40,6 +40,8 @@
 
 #include "sbbs.h"
 #include "utf8.h"
+#include "unicode.h"
+#include "cp437defs.h"
 
 /****************************************************************************/
 /* Loads an SMB message from the open msg base the fastest way possible 	*/
@@ -130,6 +132,29 @@ const char* sbbs_t::msghdr_text(const smbmsg_t* msg, uint index)
 	return msghdr_utf8_text;
 }
 
+// Returns a CP437 version of a message header field or UTF-8 if can_utf8 is true
+// Doesn't do CP437->UTF-8 conversion
+const char* sbbs_t::msghdr_field(const smbmsg_t* msg, const char* str, char* buf, bool can_utf8)
+{
+	if(msg == NULL || !(msg->hdr.auxattr & MSG_HFIELDS_UTF8))
+		return str;
+
+	if(can_utf8 && term_supports(UTF8))
+		return str;
+
+	if(buf == NULL)
+		buf = msgghdr_field_cp437_str;
+
+	strncpy(buf, str, sizeof(msgghdr_field_cp437_str));
+	utf8_normalize_str(buf);
+	utf8_replace_chars(buf, unicode_to_cp437
+		,/* unsupported char: */CP437_INVERTED_QUESTION_MARK
+		,/* unsupported zero-width ch: */0
+		,/* decode error char: */CP437_INVERTED_EXCLAMATION_MARK);
+
+	return buf;
+}
+
 /****************************************************************************/
 /* Displays a message header to the screen                                  */
 /****************************************************************************/
diff --git a/src/sbbs3/js_console.cpp b/src/sbbs3/js_console.cpp
index e4eb9ffb14..1615c068a2 100644
--- a/src/sbbs3/js_console.cpp
+++ b/src/sbbs3/js_console.cpp
@@ -1119,20 +1119,28 @@ static JSBool
 js_strlen(JSContext *cx, uintN argc, jsval *arglist)
 {
 	jsval *argv=JS_ARGV(cx, arglist);
+	sbbs_t*		sbbs;
     JSString*	str;
 	char*		cstr;
 	jsrefcount	rc;
+	int32		pmode = 0;
+
+	if((sbbs=(sbbs_t*)js_GetClassPrivate(cx, JS_THIS_OBJECT(cx, arglist), &js_console_class))==NULL)
+		return(JS_FALSE);
 
 	JS_SET_RVAL(cx, arglist, JSVAL_VOID);
 
 	if((str=JS_ValueToString(cx, argv[0]))==NULL)
 		return(JS_FALSE);
 
+	if(argc > 1)
+		JS_ValueToInt32(cx, argv[1], &pmode);
+
 	JSSTRING_TO_MSTRING(cx, str, cstr, NULL);
 	if(cstr==NULL)
 		return JS_FALSE;
 	rc=JS_SUSPENDREQUEST(cx);
-	JS_SET_RVAL(cx, arglist, INT_TO_JSVAL(bstrlen(cstr)));
+	JS_SET_RVAL(cx, arglist, INT_TO_JSVAL(sbbs->bstrlen(cstr, pmode)));
 	free(cstr);
 	JS_RESUMEREQUEST(cx, rc);
     return(JS_TRUE);
@@ -2014,8 +2022,8 @@ static jsSyncMethodSpec js_console_functions[] = {
 	,JSDOCSTR("display a string double-wide on the screen (sending \"fullwidth\" Unicode characters when possible)")
 	,31702
 	},
-	{"strlen",			js_strlen,			1, JSTYPE_NUMBER,	JSDOCSTR("text")
-	,JSDOCSTR("returns the number of characters in text, excluding Ctrl-A codes")
+	{"strlen",			js_strlen,			1, JSTYPE_NUMBER,	JSDOCSTR("text [,mode=<tt>P_NONE</tt>]")
+	,JSDOCSTR("returns the printed-length (number of columns) of the specified <i>text</i>, accounting for Ctrl-A codes")
 	,310
 	},
 	{"printfile",		js_printfile,		1, JSTYPE_BOOLEAN,		JSDOCSTR("filename [,mode=<tt>P_NONE</tt>] [,orig_columns=0")
diff --git a/src/sbbs3/postmsg.cpp b/src/sbbs3/postmsg.cpp
index 86923d35a8..5326f08ce0 100644
--- a/src/sbbs3/postmsg.cpp
+++ b/src/sbbs3/postmsg.cpp
@@ -91,20 +91,23 @@ bool sbbs_t::postmsg(uint subnum, long wm_mode, smb_t* resmb, smbmsg_t* remsg)
 	uint	reason;
 
 	if(remsg) {
-		SAFECOPY(title, remsg->subj);
+		SAFECOPY(title, msghdr_field(remsg, remsg->subj, NULL, /* UTF8: */true));
 		if(remsg->hdr.attr&MSG_ANONYMOUS)
 			SAFECOPY(from,text[Anonymous]);
 		else
-			SAFECOPY(from,remsg->from);
+			SAFECOPY(from, msghdr_field(remsg, remsg->from, NULL, /* UTF8: */true));
 		// If user posted this message, reply to the original recipient again
 		if(remsg->to != NULL
 			&& ((remsg->from_ext != NULL && atoi(remsg->from_ext)==useron.number)
 				|| stricmp(useron.alias,remsg->from) == 0 || stricmp(useron.name,remsg->from) == 0))
-			SAFECOPY(touser,remsg->to);
+			SAFECOPY(touser, msghdr_field(remsg, remsg->to, NULL, /* UTF8: */true));
 		else
 			SAFECOPY(touser,from);
 		msgattr=(ushort)(remsg->hdr.attr&MSG_PRIVATE);
-		sprintf(top,text[RegardingByToOn],title,from,remsg->to
+		sprintf(top,text[RegardingByToOn]
+			,title
+			,from
+			,msghdr_field(remsg, remsg->to, NULL, /* UTF8: */true)
 			,timestr(remsg->hdr.when_written.time)
 			,smb_zonestr(remsg->hdr.when_written.zone,NULL));
 		if(remsg->tags != NULL)
diff --git a/src/sbbs3/putmsg.cpp b/src/sbbs3/putmsg.cpp
index ca2902eed6..e79642b299 100644
--- a/src/sbbs3/putmsg.cpp
+++ b/src/sbbs3/putmsg.cpp
@@ -382,7 +382,7 @@ char sbbs_t::putmsg(const char *buf, long mode, long org_cols)
 				if(term&UTF8)
 					outcom(str[l]);
 				else
-					skip = utf8_to_cp437(str + l, len - l);
+					skip = print_utf8_as_cp437(str + l, len - l);
 			} else
 				outchar(str[l]);
 			l += skip;
diff --git a/src/sbbs3/readmail.cpp b/src/sbbs3/readmail.cpp
index e9e8597840..2ef52fdc07 100644
--- a/src/sbbs3/readmail.cpp
+++ b/src/sbbs3/readmail.cpp
@@ -353,15 +353,15 @@ void sbbs_t::readmail(uint usernumber, int which, long lm_mode)
 				smb_getmsgidx(&smb,&msg);
 
 				if(!stricmp(str2,str))		/* Reply to sender */
-					SAFEPRINTF(str2,text[Regarding],msg.subj);
+					SAFEPRINTF(str2,text[Regarding], msghdr_field(&msg, msg.subj));
 				else						/* Reply to other */
-					SAFEPRINTF3(str2,text[RegardingByOn],msg.subj,msg.from
+					SAFEPRINTF3(str2,text[RegardingByOn], msghdr_field(&msg, msg.subj), msghdr_field(&msg, msg.from, tmp)
 						,timestr(msg.hdr.when_written.time));
 
 				p=strrchr(str,'@');
 				if(p) { 							/* name @addr */
 					replied=netmail(str,msg.subj,WM_NONE, &smb, &msg);
-					SAFEPRINTF(str2,text[DeleteMailQ],msg.from); 
+					SAFEPRINTF(str2,text[DeleteMailQ],msghdr_field(&msg, msg.from)); 
 				}
 				else {
 					if(!msg.from_net.type && !stricmp(str,msg.from))
@@ -372,7 +372,7 @@ void sbbs_t::readmail(uint usernumber, int which, long lm_mode)
 						replied=email(i,str2,msg.subj,WM_NONE, &smb, &msg);
 					else
 						replied=false;
-					SAFEPRINTF(str2,text[DeleteMailQ],msg.from); 
+					SAFEPRINTF(str2,text[DeleteMailQ],msghdr_field(&msg, msg.from)); 
 				}
 
 				if(replied==true && !(msg.hdr.attr&MSG_REPLIED)) {
@@ -442,7 +442,7 @@ void sbbs_t::readmail(uint usernumber, int which, long lm_mode)
 				forwardmail(&msg,i);
 				if(msg.hdr.attr&MSG_PERMANENT)
 					break;
-				SAFEPRINTF(str2,text[DeleteMailQ],msg.from);
+				SAFEPRINTF(str2,text[DeleteMailQ],msghdr_field(&msg, msg.from));
 				if(!yesno(str2))
 					break;
 				if(msg.total_hfields)
diff --git a/src/sbbs3/readmsgs.cpp b/src/sbbs3/readmsgs.cpp
index 3e0fe3b265..15aba09765 100644
--- a/src/sbbs3/readmsgs.cpp
+++ b/src/sbbs3/readmsgs.cpp
@@ -465,7 +465,7 @@ void sbbs_t::show_thread(uint32_t msgnum, post_t* post, unsigned curmsg, int thr
 		,(int)(cols-column-12)
 		,(int)(cols-column-12)
 		,msg.hdr.attr&MSG_ANONYMOUS && !sub_op(smb.subnum)
-			? text[Anonymous] : msg.from
+			? text[Anonymous] : msghdr_field(&msg, msg.from)
 		,(unsigned)i == curmsg ? '<' : ' '
 		,msg_listing_flag(smb.subnum, &msg, &post[i])
 		,unixtodstr(&cfg, msg.hdr.when_written.time, date));
@@ -728,7 +728,7 @@ int sbbs_t::scanposts(uint subnum, long mode, const char *find)
 				break;
 			}
 			bprintf("\1n\1l\1h\1bThread\1n\1b: \1h\1c");
-			bprintf("%-.*s\r\n", (int)(cols-(column+1)), msg.subj);
+			bprintf("%-.*s\r\n", (int)(cols-(column+1)), msghdr_field(&msg, msg.subj));
 			show_thread(first, post, smb.curmsg);
 			subscan[subnum].last = post[smb.curmsg].idx.number;
 		}
@@ -809,11 +809,11 @@ int sbbs_t::scanposts(uint subnum, long mode, const char *find)
 
 			if(sub_op(subnum) && (msg.hdr.attr&(MSG_MODERATED|MSG_VALIDATED)) == MSG_MODERATED) {
 				uint16_t msg_attr = msg.hdr.attr;
-				SAFEPRINTF2(str,text[ValidatePostQ],smb.curmsg+1,msg.subj);
+				SAFEPRINTF2(str,text[ValidatePostQ],smb.curmsg+1,msghdr_field(&msg, msg.subj));
 				if(!noyes(str))
 					msg_attr|=MSG_VALIDATED;
 				else {
-					SAFEPRINTF2(str,text[DeletePostQ],smb.curmsg+1,msg.subj);
+					SAFEPRINTF2(str,text[DeletePostQ],smb.curmsg+1,msghdr_field(&msg, msg.subj));
 					if(yesno(str))
 						msg_attr|=MSG_DELETE;
 				}
@@ -976,7 +976,7 @@ int sbbs_t::scanposts(uint subnum, long mode, const char *find)
 				if(msg.hdr.type == SMB_MSG_TYPE_POLL)
 					SAFEPRINTF(str, text[DeleteTextFileQ], "Poll");
 				else
-					SAFEPRINTF2(str,text[DeletePostQ], smb.curmsg+1, msg.subj);
+					SAFEPRINTF2(str,text[DeletePostQ], smb.curmsg+1, msghdr_field(&msg, msg.subj));
 				if(!(msg.hdr.attr&MSG_DELETE) && noyes(str)) {
 					domsg = false;
 					break;
@@ -1100,7 +1100,7 @@ int sbbs_t::scanposts(uint subnum, long mode, const char *find)
 					&& stricmp(msg.to,useron.alias))
 					break;
 				SAFEPRINTF2(str2,text[Regarding]
-					,msg.subj
+					,msghdr_field(&msg, msg.subj)
 					,timestr(msg.hdr.when_written.time));
 				if(msg.from_net.addr==NULL)
 					SAFECOPY(str,msg.from);
@@ -1214,7 +1214,7 @@ int sbbs_t::scanposts(uint subnum, long mode, const char *find)
 						if(msg.hfield[i].type == SMB_POLL_ANSWER)
 							strListPush(&answers, (char*)msg.hfield_dat[i]);
 					}
-					SAFEPRINTF(str, text[BallotHdr], msg.subj);
+					SAFEPRINTF(str, text[BallotHdr], msghdr_field(&msg, msg.subj));
 					i = mselect(str, answers, msg.hdr.votes ? msg.hdr.votes : 1, text[BallotAnswerFmt]
 						,text[PollAnswerChecked], nulstr, text[BallotVoteWhich]);
 					strListFree(&answers);
@@ -1344,7 +1344,7 @@ int sbbs_t::scanposts(uint subnum, long mode, const char *find)
 									errormsg(WHERE,ERR_READ,smb.file,msg.idx.number);
 									break; 
 								}
-								SAFEPRINTF2(str,text[DeletePostQ], smb.curmsg+1, msg.subj);
+								SAFEPRINTF2(str,text[DeletePostQ], smb.curmsg+1, msghdr_field(&msg, msg.subj));
 								if(movemsg(&msg,subnum) && yesno(str)) {
 									msg.idx.attr|=MSG_DELETE;
 									msg.hdr.attr=msg.idx.attr;
diff --git a/src/sbbs3/sbbs.h b/src/sbbs3/sbbs.h
index d2365d0b45..7f3689e862 100644
--- a/src/sbbs3/sbbs.h
+++ b/src/sbbs3/sbbs.h
@@ -690,6 +690,8 @@ public:
 	bool	msgtotxt(smb_t*, smbmsg_t*, const char *fname, bool header = true, ulong gettxt_mode = GETMSGTXT_ALL);
 	const char* msghdr_text(const smbmsg_t*, uint index);
 	char	msghdr_utf8_text[128];
+	const char* msghdr_field(const smbmsg_t*, const char* str, char* buf = NULL, bool can_utf8 = false);
+	char	msgghdr_field_cp437_str[128];
 	ulong	getlastmsg(uint subnum, uint32_t *ptr, time_t *t);
 	time_t	getmsgtime(uint subnum, ulong ptr);
 	ulong	getmsgnum(uint subnum, time_t t);
@@ -705,6 +707,7 @@ public:
 	int		bulkmailhdr(smb_t*, smbmsg_t*, uint usernum);
 
 	/* con_out.cpp */
+	size_t	bstrlen(const char *str, long mode = 0);
 	int		bputs(const char *str, long mode = 0);	/* BBS puts function */
 	int		rputs(const char *str, size_t len=0);	/* BBS raw puts function */
 	int		bprintf(const char *fmt, ...)			/* BBS printf function */
@@ -748,7 +751,7 @@ public:
 	bool	saveline(void);
 	bool	restoreline(void);
 	int		petscii_to_ansibbs(unsigned char);
-	size_t	utf8_to_cp437(const char*, size_t);
+	size_t	print_utf8_as_cp437(const char*, size_t);
 	int		attr(int);				/* Change text color/attributes */
 	void	ctrl_a(char);			/* Performs Ctrl-Ax attribute changes */
 	char*	auto_utf8(const char*, long* mode);
@@ -1406,7 +1409,6 @@ extern "C" {
 #endif
 
 /* str_util.c */
-size_t	bstrlen(const char *str);
 char*	backslashcolon(char *str);
 ulong	ahtoul(const char *str);	/* Converts ASCII hex to ulong */
 char *	hexplus(uint num, char *str); 	/* Hex plus for 3 digits up to 9000 */
diff --git a/src/sbbs3/sbbsdefs.h b/src/sbbs3/sbbsdefs.h
index deac93da44..7b13063903 100644
--- a/src/sbbs3/sbbsdefs.h
+++ b/src/sbbs3/sbbsdefs.h
@@ -412,6 +412,7 @@ typedef enum {						/* Values for xtrn_t.event				*/
 #define XTRN_NOECHO		(1<<20)		/* Don't echo stdin to stdout			*/
 #define QUOTEWRAP		(1<<21)		/* Word-wrap quoted message text		*/
 #define SAVECOLUMNS		(1<<22)		/* Save/share current terminal width	*/
+#define XTRN_UTF8		(1<<23)		/* External program supports UTF-8		*/
 #define XTRN_CONIO		(1<<31)		/* Intercept Windows Console I/O (Drwy)	*/
 
 									/* Bits in cfg.xtrn_misc				*/
diff --git a/src/sbbs3/str_util.c b/src/sbbs3/str_util.c
index 4d57a36081..e7302ca28a 100644
--- a/src/sbbs3/str_util.c
+++ b/src/sbbs3/str_util.c
@@ -352,30 +352,6 @@ str_list_t DLLCALL trashcan_list(scfg_t* cfg, const char* name)
 	return findstr_list(trashcan_fname(cfg, name, fname, sizeof(fname)));
 }
 
-/****************************************************************************/
-/* Returns the printed columns from 'str' accounting for Ctrl-A codes		*/
-/****************************************************************************/
-size_t bstrlen(const char *str)
-{
-	size_t i=0;
-
-	while(*str) {
-		if(*str==CTRL_A) {
-			str++;
-			if(*str==0 || *str=='Z')	/* EOF */
-				break;
-			if(*str=='[')
-				i=0;
-			else if(*str=='<' && i)
-				i--;
-		} else
-			i++;
-		if(!(*str)) break;
-		str++; 
-	}
-	return(i);
-}
-
 /****************************************************************************/
 /* Returns in 'string' a character representation of the number in l with   */
 /* commas.																	*/
-- 
GitLab