diff --git a/src/encode/utf8.c b/src/encode/utf8.c index 20bae23fddb261dfea756090f21d3393dd7f8235..2f57eaf3cf1487e605be5ef7fee483d5fd434ae7 100644 --- a/src/encode/utf8.c +++ b/src/encode/utf8.c @@ -167,7 +167,7 @@ char* utf8_replace_chars(char* str, char (*lookup)(enum unicode_codepoint), char continue; } } - if(unicode_width(codepoint) == 0) { + if(unicode_is_zerowidth(codepoint)) { if(unsupported_zwch) *dest++ = unsupported_zwch; } @@ -191,7 +191,7 @@ bool utf8_str_is_valid(const char* str) } // Return the total printed-width of UTF-8 string (str) accounting for zero/half/full-width codepoints -size_t utf8_str_total_width(const char* str) +size_t utf8_str_total_width(const char* str, size_t zerowidth) { size_t count = 0; const char* end = str + strlen(str); @@ -200,14 +200,14 @@ size_t utf8_str_total_width(const char* str) int len = utf8_getc(str, end - str, &codepoint); if (len < 1) break; - count += unicode_width(codepoint); + count += unicode_width(codepoint, zerowidth); str += len; } return count; } // Return the count of chars within the specified width range in UTF-8 string (str) -size_t utf8_str_count_width(const char* str, size_t min_width, size_t max_width) +size_t utf8_str_count_width(const char* str, size_t min_width, size_t max_width, size_t zerowidth) { size_t count = 0; const char* end = str + strlen(str); @@ -216,7 +216,7 @@ size_t utf8_str_count_width(const char* str, size_t min_width, size_t max_width) int len = utf8_getc(str, end - str, &codepoint); if (len < 1) break; - size_t width = unicode_width(codepoint); + size_t width = unicode_width(codepoint, zerowidth); if(width >= min_width && width <= max_width) count++; str += len; diff --git a/src/encode/utf8.h b/src/encode/utf8.h index 1bfd4fe645c9680f55578dd0481506d81d70379e..a407457930ea64a42880843196a7c19648054c3c 100644 --- a/src/encode/utf8.h +++ b/src/encode/utf8.h @@ -39,10 +39,10 @@ int utf8_decode_firstbyte(char ch); bool utf8_str_is_valid(const char*); // Returns the fixed printed-width of the UTF-8 string -size_t utf8_str_total_width(const char*); +size_t utf8_str_total_width(const char*, size_t zerowidth); // Return the count of chars within the specified width range in UTF-8 string (str) -size_t utf8_str_count_width(const char*, size_t min_width, size_t max_width); +size_t utf8_str_count_width(const char*, size_t min_width, size_t max_width, size_t zerowidth); // Like strlcpy(), but doesn't leave a partial UTF-8 sequence at the end of dst size_t utf8_strlcpy(char* dst, const char* src, size_t size); diff --git a/src/sbbs3/answer.cpp b/src/sbbs3/answer.cpp index a6d5ebe3237b9bfd33c07cee38453b17ddf5b010..b47a0f8a1945ea44486be222a895e280eab889a2 100644 --- a/src/sbbs3/answer.cpp +++ b/src/sbbs3/answer.cpp @@ -501,8 +501,10 @@ bool sbbs_t::answer() if(x >= TERM_COLS_MIN && x <= TERM_COLS_MAX) cols=x; if(y >= TERM_ROWS_MIN && y <= TERM_ROWS_MAX) rows=y; } else { // second report - if(x < 3) // ZWNBSP didn't move cursor (more than one column) + if(x < 3) { // ZWNBSP didn't move cursor (more than one column) autoterm |= UTF8; + unicode_zerowidth = x - 1; + } } } else if(sscanf(p, "[=67;84;101;114;109;%u;%u", &x, &y) == 2 && *lastchar(p) == 'c') { lprintf(LOG_INFO,"received CTerm version report: %u.%u", x, y); diff --git a/src/sbbs3/atcodes.cpp b/src/sbbs3/atcodes.cpp index 5cac9aaf034a9a17603e4af26603d54959fb97c2..6d7464335e01d0db9c6f16fc25299d1a8aed0600 100644 --- a/src/sbbs3/atcodes.cpp +++ b/src/sbbs3/atcodes.cpp @@ -201,9 +201,9 @@ int sbbs_t::show_atcode(const char *instr, JSObject* obj) } if(pmode & P_UTF8) { if(term_supports(UTF8)) - fmt.disp_len += strlen(cp) - utf8_str_total_width(cp); + fmt.disp_len += strlen(cp) - utf8_str_total_width(cp, unicode_zerowidth); else - fmt.disp_len += strlen(cp) - utf8_str_count_width(cp, /* min: */1, /* max: */2); + fmt.disp_len += strlen(cp) - utf8_str_count_width(cp, /* min: */1, /* max: */2, unicode_zerowidth); } if(fmt.align == fmt.left) bprintf(pmode, "%-*.*s",fmt.disp_len,fmt.disp_len,cp); diff --git a/src/sbbs3/con_hi.cpp b/src/sbbs3/con_hi.cpp index 4d87c4877c4aceb4f3438c49254a8ddf9bb9aeb4..ca1ef5ccd09ec11e4c05d1bcb4d534d41e40152f 100644 --- a/src/sbbs3/con_hi.cpp +++ b/src/sbbs3/con_hi.cpp @@ -40,8 +40,9 @@ void sbbs_t::redrwstr(char *strin, int i, int l, int mode) column+=rprintf("%-*.*s",l,l,strin); cleartoeol(); if(i<l) { + auto_utf8(strin, mode); if(mode&P_UTF8) - l = utf8_str_total_width(strin); + l = utf8_str_total_width(strin, unicode_zerowidth); cursor_left(l-i); } } diff --git a/src/sbbs3/con_out.cpp b/src/sbbs3/con_out.cpp index b4699ac94571bddca95d65b112fdc870962d4df1..585380c8890761805031ce91d458943fc9fcfebc 100644 --- a/src/sbbs3/con_out.cpp +++ b/src/sbbs3/con_out.cpp @@ -29,7 +29,7 @@ char* sbbs_t::auto_utf8(const char* str, int& mode) { if(strncmp(str, "\xEF\xBB\xBF", 3) == 0) { mode |= P_UTF8; - return (char*)(str + 3); + return (char*)str; } if(mode & P_AUTO_UTF8) { if(!str_is_ascii(str) && utf8_str_is_valid(str)) @@ -155,7 +155,7 @@ size_t sbbs_t::bstrlen(const char *str, int mode) len = utf8_getc(str, end - str, &codepoint); if(len < 1) break; - count += unicode_width(codepoint);; + count += unicode_width(codepoint, unicode_zerowidth); } else count++; str += len; @@ -347,7 +347,7 @@ size_t sbbs_t::print_utf8_as_cp437(const char* str, size_t len) char ch = unicode_to_cp437(codepoint); if(ch) outchar(ch); - else if(unicode_width(codepoint) > 0) { + else if(unicode_width(codepoint, unicode_zerowidth) > 0) { outchar(CP437_INVERTED_QUESTION_MARK); char seq[32] = ""; for(size_t i = 0; i < len; i++) @@ -796,7 +796,7 @@ int sbbs_t::outchar(enum unicode_codepoint codepoint, const char* cp437_fallback if(len < 1) return len; putcom(str, len); - inc_column(unicode_width(codepoint)); + inc_column(unicode_width(codepoint, unicode_zerowidth)); return 0; } if(cp437_fallback == NULL) diff --git a/src/sbbs3/email.cpp b/src/sbbs3/email.cpp index 1668d01c9c0c0b3e906c469c4d59b92bc3873e44..62bb3953caaa2b8672f0924fd61ab20d32bbce8d 100644 --- a/src/sbbs3/email.cpp +++ b/src/sbbs3/email.cpp @@ -21,6 +21,7 @@ #include "sbbs.h" #include "cmdshell.h" +#include "utf8.h" /****************************************************************************/ /* Mails a message to usernumber. 'top' is a buffer to place at beginning */ @@ -47,9 +48,9 @@ bool sbbs_t::email(int usernumber, const char *top, const char *subj, int mode, smbmsg_t msg; if(subj != NULL) - SAFECOPY(title, subj); + SAFECOPY_UTF8(title, subj); if(remsg != NULL && title[0] == 0) - SAFECOPY(title, remsg->subj); + SAFECOPY_UTF8(title, remsg->subj); if(useron.etoday>=cfg.level_emailperday[useron.level] && !SYSOP && !(useron.exempt&FLAG('M'))) { bputs(text[TooManyEmailsToday]); diff --git a/src/sbbs3/getstr.cpp b/src/sbbs3/getstr.cpp index 0f13685164ff5ace373ac7eee1164c1fa5a872fe..d715feb38d2b4f489316f789c9b0e90f921c1345 100644 --- a/src/sbbs3/getstr.cpp +++ b/src/sbbs3/getstr.cpp @@ -80,7 +80,7 @@ size_t sbbs_t::getstr(char *strout, size_t maxlen, int mode, const str_list_t hi } SAFECOPY(undo,str1); - i=l=strlen(str1); + i=l=bstrlen(str1, P_AUTO_UTF8); if(mode&K_AUTODEL && str1[0] && !(mode&K_NOECHO)) { ch=getkey(mode|K_GETSTR); attr(atr); @@ -92,7 +92,7 @@ size_t sbbs_t::getstr(char *strout, size_t maxlen, int mode, const str_list_t hi else { for(i=0;i<l;i++) outchar(BS); - column+=rputs(str1); + column+=bputs(str1, P_AUTO_UTF8); i=l; } if(ch!=' ' && ch!=TAB) @@ -353,7 +353,7 @@ size_t sbbs_t::getstr(char *strout, size_t maxlen, int mode, const str_list_t hi break; case CTRL_R: /* Ctrl-R Redraw Line */ if(!(mode&K_NOECHO)) - redrwstr(str1,i,l,K_MSG); + redrwstr(str1,i,l, P_AUTO_UTF8); break; case TERM_KEY_INSERT: /* Ctrl-V Toggles Insert/Overwrite */ if(mode&K_NOECHO) diff --git a/src/sbbs3/js_global.c b/src/sbbs3/js_global.c index 82673a060ae86c0a2b7ebb1a1a056a4ff8eefec5..e50393ce4e58f78362457264726df441128fa6ca 100644 --- a/src/sbbs3/js_global.c +++ b/src/sbbs3/js_global.c @@ -4668,6 +4668,7 @@ js_utf8_get_width(JSContext *cx, uintN argc, jsval *arglist) jsval *argv=JS_ARGV(cx, arglist); char* str = NULL; jsrefcount rc; + int zerowidth = 1; JS_SET_RVAL(cx, arglist, JSVAL_VOID); @@ -4680,7 +4681,7 @@ js_utf8_get_width(JSContext *cx, uintN argc, jsval *arglist) return JS_TRUE; rc=JS_SUSPENDREQUEST(cx); - size_t width = utf8_str_total_width(str); + size_t width = utf8_str_total_width(str, zerowidth); JS_RESUMEREQUEST(cx, rc); free(str); diff --git a/src/sbbs3/postmsg.cpp b/src/sbbs3/postmsg.cpp index df331dbe1df19499f40d2203db3371cac3bd55df..7724f0ae5b10ad3e0b95ae25c1c7f7a4323441bb 100644 --- a/src/sbbs3/postmsg.cpp +++ b/src/sbbs3/postmsg.cpp @@ -92,18 +92,18 @@ bool sbbs_t::postmsg(int subnum, int wm_mode, smb_t* resmb, smbmsg_t* remsg) } if(remsg) { - SAFECOPY(title, msghdr_field(remsg, remsg->subj, NULL, term_supports(UTF8))); + SAFECOPY_UTF8(title, msghdr_field(remsg, remsg->subj, NULL, term_supports(UTF8))); if(remsg->hdr.attr&MSG_ANONYMOUS) SAFECOPY(from,text[Anonymous]); else - SAFECOPY(from, msghdr_field(remsg, remsg->from, NULL, term_supports(UTF8))); + SAFECOPY_UTF8(from, msghdr_field(remsg, remsg->from, NULL, term_supports(UTF8))); // If user posted this message, reply to the original recipient again if(remsg->to != NULL && ((remsg->from_ext != NULL && atoi(remsg->from_ext)==useron.number) || stricmp(useron.alias,remsg->from) == 0 || stricmp(useron.name,remsg->from) == 0)) - SAFECOPY(touser, msghdr_field(remsg, remsg->to, NULL, term_supports(UTF8))); + SAFECOPY_UTF8(touser, msghdr_field(remsg, remsg->to, NULL, term_supports(UTF8))); else - SAFECOPY(touser,from); + SAFECOPY_UTF8(touser,from); if(remsg->to != NULL) strListPush(&names, remsg->to); msgattr=(ushort)(remsg->hdr.attr&MSG_PRIVATE); diff --git a/src/sbbs3/readmsgs.cpp b/src/sbbs3/readmsgs.cpp index c8a8c90b97a95dc243438d8d01c5744069932a73..807c54ba107de47b5507370822f3169874059bef 100644 --- a/src/sbbs3/readmsgs.cpp +++ b/src/sbbs3/readmsgs.cpp @@ -1013,7 +1013,7 @@ int sbbs_t::scanposts(int subnum, int mode, const char *find) ,msghdr_field(&msg, msg.subj) ,timestr(msg.hdr.when_written.time))); if(msg.from_net.addr==NULL) - SAFECOPY(str,msg.from); + SAFECOPY_UTF8(str,msg.from); else if(msg.from_net.type==NET_FIDO) SAFEPRINTF2(str,"%s@%s",msg.from ,smb_faddrtoa((faddr_t *)msg.from_net.addr,tmp)); diff --git a/src/sbbs3/sbbs.h b/src/sbbs3/sbbs.h index 16332498914a5a3c49641df781533e713e9831bd..41e3c477e65d415cb98bc98b474191c20c489af1 100644 --- a/src/sbbs3/sbbs.h +++ b/src/sbbs3/sbbs.h @@ -266,6 +266,8 @@ extern int thread_suid_broken; /* NPTL is no longer broken */ #include "xpdatetime.h" #include "unicode_defs.h" +#define SAFECOPY_UTF8(dst, src) if(utf8_str_is_valid(src)) utf8_strlcpy(dst, src, sizeof dst); else SAFECOPY(dst, src) + /***********************/ /* Synchronet-specific */ /***********************/ @@ -577,6 +579,7 @@ public: int tabstop = 8; /* Current symmetric-tabstop (size) */ int lastlinelen = 0; /* The previously displayed line length */ int autoterm=0; /* Auto-detected terminal type */ + size_t unicode_zerowidth=0; char terminal[TELNET_TERM_MAXLEN+1]{}; // <- answer() writes to this int cterm_version=0;/* (MajorVer*1000) + MinorVer */ link_list_t savedlines{}; diff --git a/src/xpdev/unicode.c b/src/xpdev/unicode.c index 26254fc2ba7663763e8897d3a82ed56d596a3072..9030bb39443f6a7bc00afceb000d00c7ff7ac60f 100644 --- a/src/xpdev/unicode.c +++ b/src/xpdev/unicode.c @@ -294,10 +294,9 @@ enum unicode_codepoint cp437_unicode_tbl[] = /* 0xFF */ UNICODE_NO_BREAK_SPACE }; -size_t unicode_width(enum unicode_codepoint u) +bool unicode_is_zerowidth(enum unicode_codepoint u) { switch(u) { - case UNICODE_UNDEFINED: case UNICODE_ZERO_WIDTH_SPACE: case UNICODE_ZERO_WIDTH_NON_JOINER: case UNICODE_ZERO_WIDTH_JOINER: @@ -318,6 +317,15 @@ size_t unicode_width(enum unicode_codepoint u) case UNICODE_VARIATION_SELECTOR_15: case UNICODE_VARIATION_SELECTOR_16: case UNICODE_ZERO_WIDTH_NO_BREAK_SPACE: + return true; + } + return false; +} + +size_t unicode_width(enum unicode_codepoint u, size_t zerowidth) +{ + switch(u) { + case UNICODE_UNDEFINED: return 0; // Exceptions to the ranges (blocks/sub-blocks) in the default case case UNICODE_CIRCLED_NUMBER_TEN_ON_BLACK_SQUARE: @@ -383,6 +391,8 @@ size_t unicode_width(enum unicode_codepoint u) || (u >= UNICODE_BLOCK_EXTA_SYMBOLS_AND_PICTOGRAPHS_BEGIN && u <= UNICODE_BLOCK_EXTA_SYMBOLS_AND_PICTOGRAPHS_END) ) return 2; + if(unicode_is_zerowidth(u)) + return zerowidth; return 1; } } diff --git a/src/xpdev/unicode.h b/src/xpdev/unicode.h index 28faf3990558709fa63d63a204be6947fbfe7bb7..ca74b29c614fe50b83e32c015fdfa46550e99ead 100644 --- a/src/xpdev/unicode.h +++ b/src/xpdev/unicode.h @@ -23,6 +23,7 @@ #define UNICODE_H_ #include <stdlib.h> +#include "gen_defs.h" #include "unicode_defs.h" #if defined(__cplusplus) @@ -30,7 +31,8 @@ extern "C" { #endif extern enum unicode_codepoint cp437_unicode_tbl[]; -size_t unicode_width(enum unicode_codepoint); +bool unicode_is_zerowidth(enum unicode_codepoint); +size_t unicode_width(enum unicode_codepoint, size_t zerowidth); char unicode_to_cp437(enum unicode_codepoint); char unicode_to_latin1(enum unicode_codepoint);