From 3313a87eeba8d44474037e65bad16aae38dad717 Mon Sep 17 00:00:00 2001 From: rswindell <> Date: Sun, 4 Aug 2019 22:48:38 +0000 Subject: [PATCH] More UTF-8 fun: - bstrlen() moved to sbbs_t, accepts an option pmode argument so it can account for UTF-8 encoded strings correctly - JS console.strlen() now accepts an optional pmode argument (e.g. P_UTF8) - Renamed sbbs_t::utf8_to_cp437 to sbbs_t::print_utf8_as_cp437 - Create/use msghdr_hfield() to perform UTF-8->CP437 conversions as needed for printing/copying UTF-8 encoded message header fields. - Defined XTRN_UTF8 misc setting flag. If a message editor does *not* have this flag, it is assumed to *not* support UTF-8. Will likely use this for UTF-8 doors as some point too (none known to exist, yet). --- src/sbbs3/con_out.cpp | 36 ++++++++++++++++++++++++++++++++++-- src/sbbs3/getmsg.cpp | 25 +++++++++++++++++++++++++ src/sbbs3/js_console.cpp | 14 +++++++++++--- src/sbbs3/postmsg.cpp | 11 +++++++---- src/sbbs3/putmsg.cpp | 2 +- src/sbbs3/readmail.cpp | 10 +++++----- src/sbbs3/readmsgs.cpp | 16 ++++++++-------- src/sbbs3/sbbs.h | 6 ++++-- src/sbbs3/sbbsdefs.h | 1 + src/sbbs3/str_util.c | 24 ------------------------ 10 files changed, 96 insertions(+), 49 deletions(-) diff --git a/src/sbbs3/con_out.cpp b/src/sbbs3/con_out.cpp index 5e60f2ad1d..100968c4f2 100644 --- a/src/sbbs3/con_out.cpp +++ b/src/sbbs3/con_out.cpp @@ -123,13 +123,45 @@ int sbbs_t::bputs(const char *str, long mode) if(term&UTF8) outcom(str[l++]); else - l += utf8_to_cp437(str + l, len - l); + l += print_utf8_as_cp437(str + l, len - l); } else outchar(str[l++]); } return(l); } +/****************************************************************************/ +/* Returns the printed columns from 'str' accounting for Ctrl-A codes */ +/****************************************************************************/ +size_t sbbs_t::bstrlen(const char *str, long mode) +{ + str = auto_utf8(str, &mode); + size_t count = 0; + const char* end = str + strlen(str); + while (str < end) { + int len = 1; + if(*str == CTRL_A) { + str++; + if(*str == 0 || *str == 'Z') // EOF + break; + if(*str == '[') // CR + count = 0; + else if(*str == '<' && count) // ND-Backspace + count--; + } else if(((*str) & 0x80) && (mode&P_UTF8)) { + enum unicode_codepoint codepoint = UNICODE_UNDEFINED; + len = utf8_getc(str, end - str, &codepoint); + if(len < 1) + break; + count += unicode_width(codepoint);; + } else + count++; + str += len; + } + return count; +} + + /* Perform PETSCII terminal output translation (from ASCII/CP437) */ unsigned char cp437_to_petscii(unsigned char ch) { @@ -290,7 +322,7 @@ int sbbs_t::petscii_to_ansibbs(unsigned char ch) } // Return length of sequence -size_t sbbs_t::utf8_to_cp437(const char* str, size_t len) +size_t sbbs_t::print_utf8_as_cp437(const char* str, size_t len) { if(((*str)&0x80) == 0) { outchar(*str); diff --git a/src/sbbs3/getmsg.cpp b/src/sbbs3/getmsg.cpp index 234fbc64b4..1b28155b87 100644 --- a/src/sbbs3/getmsg.cpp +++ b/src/sbbs3/getmsg.cpp @@ -40,6 +40,8 @@ #include "sbbs.h" #include "utf8.h" +#include "unicode.h" +#include "cp437defs.h" /****************************************************************************/ /* Loads an SMB message from the open msg base the fastest way possible */ @@ -130,6 +132,29 @@ const char* sbbs_t::msghdr_text(const smbmsg_t* msg, uint index) return msghdr_utf8_text; } +// Returns a CP437 version of a message header field or UTF-8 if can_utf8 is true +// Doesn't do CP437->UTF-8 conversion +const char* sbbs_t::msghdr_field(const smbmsg_t* msg, const char* str, char* buf, bool can_utf8) +{ + if(msg == NULL || !(msg->hdr.auxattr & MSG_HFIELDS_UTF8)) + return str; + + if(can_utf8 && term_supports(UTF8)) + return str; + + if(buf == NULL) + buf = msgghdr_field_cp437_str; + + strncpy(buf, str, sizeof(msgghdr_field_cp437_str)); + utf8_normalize_str(buf); + utf8_replace_chars(buf, unicode_to_cp437 + ,/* unsupported char: */CP437_INVERTED_QUESTION_MARK + ,/* unsupported zero-width ch: */0 + ,/* decode error char: */CP437_INVERTED_EXCLAMATION_MARK); + + return buf; +} + /****************************************************************************/ /* Displays a message header to the screen */ /****************************************************************************/ diff --git a/src/sbbs3/js_console.cpp b/src/sbbs3/js_console.cpp index e4eb9ffb14..1615c068a2 100644 --- a/src/sbbs3/js_console.cpp +++ b/src/sbbs3/js_console.cpp @@ -1119,20 +1119,28 @@ static JSBool js_strlen(JSContext *cx, uintN argc, jsval *arglist) { jsval *argv=JS_ARGV(cx, arglist); + sbbs_t* sbbs; JSString* str; char* cstr; jsrefcount rc; + int32 pmode = 0; + + if((sbbs=(sbbs_t*)js_GetClassPrivate(cx, JS_THIS_OBJECT(cx, arglist), &js_console_class))==NULL) + return(JS_FALSE); JS_SET_RVAL(cx, arglist, JSVAL_VOID); if((str=JS_ValueToString(cx, argv[0]))==NULL) return(JS_FALSE); + if(argc > 1) + JS_ValueToInt32(cx, argv[1], &pmode); + JSSTRING_TO_MSTRING(cx, str, cstr, NULL); if(cstr==NULL) return JS_FALSE; rc=JS_SUSPENDREQUEST(cx); - JS_SET_RVAL(cx, arglist, INT_TO_JSVAL(bstrlen(cstr))); + JS_SET_RVAL(cx, arglist, INT_TO_JSVAL(sbbs->bstrlen(cstr, pmode))); free(cstr); JS_RESUMEREQUEST(cx, rc); return(JS_TRUE); @@ -2014,8 +2022,8 @@ static jsSyncMethodSpec js_console_functions[] = { ,JSDOCSTR("display a string double-wide on the screen (sending \"fullwidth\" Unicode characters when possible)") ,31702 }, - {"strlen", js_strlen, 1, JSTYPE_NUMBER, JSDOCSTR("text") - ,JSDOCSTR("returns the number of characters in text, excluding Ctrl-A codes") + {"strlen", js_strlen, 1, JSTYPE_NUMBER, JSDOCSTR("text [,mode=<tt>P_NONE</tt>]") + ,JSDOCSTR("returns the printed-length (number of columns) of the specified <i>text</i>, accounting for Ctrl-A codes") ,310 }, {"printfile", js_printfile, 1, JSTYPE_BOOLEAN, JSDOCSTR("filename [,mode=<tt>P_NONE</tt>] [,orig_columns=0") diff --git a/src/sbbs3/postmsg.cpp b/src/sbbs3/postmsg.cpp index 86923d35a8..5326f08ce0 100644 --- a/src/sbbs3/postmsg.cpp +++ b/src/sbbs3/postmsg.cpp @@ -91,20 +91,23 @@ bool sbbs_t::postmsg(uint subnum, long wm_mode, smb_t* resmb, smbmsg_t* remsg) uint reason; if(remsg) { - SAFECOPY(title, remsg->subj); + SAFECOPY(title, msghdr_field(remsg, remsg->subj, NULL, /* UTF8: */true)); if(remsg->hdr.attr&MSG_ANONYMOUS) SAFECOPY(from,text[Anonymous]); else - SAFECOPY(from,remsg->from); + SAFECOPY(from, msghdr_field(remsg, remsg->from, NULL, /* UTF8: */true)); // If user posted this message, reply to the original recipient again if(remsg->to != NULL && ((remsg->from_ext != NULL && atoi(remsg->from_ext)==useron.number) || stricmp(useron.alias,remsg->from) == 0 || stricmp(useron.name,remsg->from) == 0)) - SAFECOPY(touser,remsg->to); + SAFECOPY(touser, msghdr_field(remsg, remsg->to, NULL, /* UTF8: */true)); else SAFECOPY(touser,from); msgattr=(ushort)(remsg->hdr.attr&MSG_PRIVATE); - sprintf(top,text[RegardingByToOn],title,from,remsg->to + sprintf(top,text[RegardingByToOn] + ,title + ,from + ,msghdr_field(remsg, remsg->to, NULL, /* UTF8: */true) ,timestr(remsg->hdr.when_written.time) ,smb_zonestr(remsg->hdr.when_written.zone,NULL)); if(remsg->tags != NULL) diff --git a/src/sbbs3/putmsg.cpp b/src/sbbs3/putmsg.cpp index ca2902eed6..e79642b299 100644 --- a/src/sbbs3/putmsg.cpp +++ b/src/sbbs3/putmsg.cpp @@ -382,7 +382,7 @@ char sbbs_t::putmsg(const char *buf, long mode, long org_cols) if(term&UTF8) outcom(str[l]); else - skip = utf8_to_cp437(str + l, len - l); + skip = print_utf8_as_cp437(str + l, len - l); } else outchar(str[l]); l += skip; diff --git a/src/sbbs3/readmail.cpp b/src/sbbs3/readmail.cpp index e9e8597840..2ef52fdc07 100644 --- a/src/sbbs3/readmail.cpp +++ b/src/sbbs3/readmail.cpp @@ -353,15 +353,15 @@ void sbbs_t::readmail(uint usernumber, int which, long lm_mode) smb_getmsgidx(&smb,&msg); if(!stricmp(str2,str)) /* Reply to sender */ - SAFEPRINTF(str2,text[Regarding],msg.subj); + SAFEPRINTF(str2,text[Regarding], msghdr_field(&msg, msg.subj)); else /* Reply to other */ - SAFEPRINTF3(str2,text[RegardingByOn],msg.subj,msg.from + SAFEPRINTF3(str2,text[RegardingByOn], msghdr_field(&msg, msg.subj), msghdr_field(&msg, msg.from, tmp) ,timestr(msg.hdr.when_written.time)); p=strrchr(str,'@'); if(p) { /* name @addr */ replied=netmail(str,msg.subj,WM_NONE, &smb, &msg); - SAFEPRINTF(str2,text[DeleteMailQ],msg.from); + SAFEPRINTF(str2,text[DeleteMailQ],msghdr_field(&msg, msg.from)); } else { if(!msg.from_net.type && !stricmp(str,msg.from)) @@ -372,7 +372,7 @@ void sbbs_t::readmail(uint usernumber, int which, long lm_mode) replied=email(i,str2,msg.subj,WM_NONE, &smb, &msg); else replied=false; - SAFEPRINTF(str2,text[DeleteMailQ],msg.from); + SAFEPRINTF(str2,text[DeleteMailQ],msghdr_field(&msg, msg.from)); } if(replied==true && !(msg.hdr.attr&MSG_REPLIED)) { @@ -442,7 +442,7 @@ void sbbs_t::readmail(uint usernumber, int which, long lm_mode) forwardmail(&msg,i); if(msg.hdr.attr&MSG_PERMANENT) break; - SAFEPRINTF(str2,text[DeleteMailQ],msg.from); + SAFEPRINTF(str2,text[DeleteMailQ],msghdr_field(&msg, msg.from)); if(!yesno(str2)) break; if(msg.total_hfields) diff --git a/src/sbbs3/readmsgs.cpp b/src/sbbs3/readmsgs.cpp index 3e0fe3b265..15aba09765 100644 --- a/src/sbbs3/readmsgs.cpp +++ b/src/sbbs3/readmsgs.cpp @@ -465,7 +465,7 @@ void sbbs_t::show_thread(uint32_t msgnum, post_t* post, unsigned curmsg, int thr ,(int)(cols-column-12) ,(int)(cols-column-12) ,msg.hdr.attr&MSG_ANONYMOUS && !sub_op(smb.subnum) - ? text[Anonymous] : msg.from + ? text[Anonymous] : msghdr_field(&msg, msg.from) ,(unsigned)i == curmsg ? '<' : ' ' ,msg_listing_flag(smb.subnum, &msg, &post[i]) ,unixtodstr(&cfg, msg.hdr.when_written.time, date)); @@ -728,7 +728,7 @@ int sbbs_t::scanposts(uint subnum, long mode, const char *find) break; } bprintf("\1n\1l\1h\1bThread\1n\1b: \1h\1c"); - bprintf("%-.*s\r\n", (int)(cols-(column+1)), msg.subj); + bprintf("%-.*s\r\n", (int)(cols-(column+1)), msghdr_field(&msg, msg.subj)); show_thread(first, post, smb.curmsg); subscan[subnum].last = post[smb.curmsg].idx.number; } @@ -809,11 +809,11 @@ int sbbs_t::scanposts(uint subnum, long mode, const char *find) if(sub_op(subnum) && (msg.hdr.attr&(MSG_MODERATED|MSG_VALIDATED)) == MSG_MODERATED) { uint16_t msg_attr = msg.hdr.attr; - SAFEPRINTF2(str,text[ValidatePostQ],smb.curmsg+1,msg.subj); + SAFEPRINTF2(str,text[ValidatePostQ],smb.curmsg+1,msghdr_field(&msg, msg.subj)); if(!noyes(str)) msg_attr|=MSG_VALIDATED; else { - SAFEPRINTF2(str,text[DeletePostQ],smb.curmsg+1,msg.subj); + SAFEPRINTF2(str,text[DeletePostQ],smb.curmsg+1,msghdr_field(&msg, msg.subj)); if(yesno(str)) msg_attr|=MSG_DELETE; } @@ -976,7 +976,7 @@ int sbbs_t::scanposts(uint subnum, long mode, const char *find) if(msg.hdr.type == SMB_MSG_TYPE_POLL) SAFEPRINTF(str, text[DeleteTextFileQ], "Poll"); else - SAFEPRINTF2(str,text[DeletePostQ], smb.curmsg+1, msg.subj); + SAFEPRINTF2(str,text[DeletePostQ], smb.curmsg+1, msghdr_field(&msg, msg.subj)); if(!(msg.hdr.attr&MSG_DELETE) && noyes(str)) { domsg = false; break; @@ -1100,7 +1100,7 @@ int sbbs_t::scanposts(uint subnum, long mode, const char *find) && stricmp(msg.to,useron.alias)) break; SAFEPRINTF2(str2,text[Regarding] - ,msg.subj + ,msghdr_field(&msg, msg.subj) ,timestr(msg.hdr.when_written.time)); if(msg.from_net.addr==NULL) SAFECOPY(str,msg.from); @@ -1214,7 +1214,7 @@ int sbbs_t::scanposts(uint subnum, long mode, const char *find) if(msg.hfield[i].type == SMB_POLL_ANSWER) strListPush(&answers, (char*)msg.hfield_dat[i]); } - SAFEPRINTF(str, text[BallotHdr], msg.subj); + SAFEPRINTF(str, text[BallotHdr], msghdr_field(&msg, msg.subj)); i = mselect(str, answers, msg.hdr.votes ? msg.hdr.votes : 1, text[BallotAnswerFmt] ,text[PollAnswerChecked], nulstr, text[BallotVoteWhich]); strListFree(&answers); @@ -1344,7 +1344,7 @@ int sbbs_t::scanposts(uint subnum, long mode, const char *find) errormsg(WHERE,ERR_READ,smb.file,msg.idx.number); break; } - SAFEPRINTF2(str,text[DeletePostQ], smb.curmsg+1, msg.subj); + SAFEPRINTF2(str,text[DeletePostQ], smb.curmsg+1, msghdr_field(&msg, msg.subj)); if(movemsg(&msg,subnum) && yesno(str)) { msg.idx.attr|=MSG_DELETE; msg.hdr.attr=msg.idx.attr; diff --git a/src/sbbs3/sbbs.h b/src/sbbs3/sbbs.h index d2365d0b45..7f3689e862 100644 --- a/src/sbbs3/sbbs.h +++ b/src/sbbs3/sbbs.h @@ -690,6 +690,8 @@ public: bool msgtotxt(smb_t*, smbmsg_t*, const char *fname, bool header = true, ulong gettxt_mode = GETMSGTXT_ALL); const char* msghdr_text(const smbmsg_t*, uint index); char msghdr_utf8_text[128]; + const char* msghdr_field(const smbmsg_t*, const char* str, char* buf = NULL, bool can_utf8 = false); + char msgghdr_field_cp437_str[128]; ulong getlastmsg(uint subnum, uint32_t *ptr, time_t *t); time_t getmsgtime(uint subnum, ulong ptr); ulong getmsgnum(uint subnum, time_t t); @@ -705,6 +707,7 @@ public: int bulkmailhdr(smb_t*, smbmsg_t*, uint usernum); /* con_out.cpp */ + size_t bstrlen(const char *str, long mode = 0); int bputs(const char *str, long mode = 0); /* BBS puts function */ int rputs(const char *str, size_t len=0); /* BBS raw puts function */ int bprintf(const char *fmt, ...) /* BBS printf function */ @@ -748,7 +751,7 @@ public: bool saveline(void); bool restoreline(void); int petscii_to_ansibbs(unsigned char); - size_t utf8_to_cp437(const char*, size_t); + size_t print_utf8_as_cp437(const char*, size_t); int attr(int); /* Change text color/attributes */ void ctrl_a(char); /* Performs Ctrl-Ax attribute changes */ char* auto_utf8(const char*, long* mode); @@ -1406,7 +1409,6 @@ extern "C" { #endif /* str_util.c */ -size_t bstrlen(const char *str); char* backslashcolon(char *str); ulong ahtoul(const char *str); /* Converts ASCII hex to ulong */ char * hexplus(uint num, char *str); /* Hex plus for 3 digits up to 9000 */ diff --git a/src/sbbs3/sbbsdefs.h b/src/sbbs3/sbbsdefs.h index deac93da44..7b13063903 100644 --- a/src/sbbs3/sbbsdefs.h +++ b/src/sbbs3/sbbsdefs.h @@ -412,6 +412,7 @@ typedef enum { /* Values for xtrn_t.event */ #define XTRN_NOECHO (1<<20) /* Don't echo stdin to stdout */ #define QUOTEWRAP (1<<21) /* Word-wrap quoted message text */ #define SAVECOLUMNS (1<<22) /* Save/share current terminal width */ +#define XTRN_UTF8 (1<<23) /* External program supports UTF-8 */ #define XTRN_CONIO (1<<31) /* Intercept Windows Console I/O (Drwy) */ /* Bits in cfg.xtrn_misc */ diff --git a/src/sbbs3/str_util.c b/src/sbbs3/str_util.c index 4d57a36081..e7302ca28a 100644 --- a/src/sbbs3/str_util.c +++ b/src/sbbs3/str_util.c @@ -352,30 +352,6 @@ str_list_t DLLCALL trashcan_list(scfg_t* cfg, const char* name) return findstr_list(trashcan_fname(cfg, name, fname, sizeof(fname))); } -/****************************************************************************/ -/* Returns the printed columns from 'str' accounting for Ctrl-A codes */ -/****************************************************************************/ -size_t bstrlen(const char *str) -{ - size_t i=0; - - while(*str) { - if(*str==CTRL_A) { - str++; - if(*str==0 || *str=='Z') /* EOF */ - break; - if(*str=='[') - i=0; - else if(*str=='<' && i) - i--; - } else - i++; - if(!(*str)) break; - str++; - } - return(i); -} - /****************************************************************************/ /* Returns in 'string' a character representation of the number in l with */ /* commas. */ -- GitLab