diff --git a/src/sbbs3/con_out.cpp b/src/sbbs3/con_out.cpp index 75c4a6666d92db361ac330e1fafd17c28d24e4ac..633dba2e55d8255190d0dc08e391122fc803ca9e 100644 --- a/src/sbbs3/con_out.cpp +++ b/src/sbbs3/con_out.cpp @@ -41,7 +41,8 @@ /**********************************************************************/ #include "sbbs.h" -#include "cp437_utf8_tbl.h" +#include "utf8.h" +#include "cp437_unicode_tbl.h" /****************************************************************************/ /* Outputs a NULL terminated string locally and remotely (if applicable) */ @@ -246,6 +247,250 @@ int sbbs_t::petscii_to_ansibbs(unsigned char ch) return 0; } +// Return length of sequence +size_t sbbs_t::utf8_to_cp437(const char* str, size_t len) +{ + if(((*str)&0x80) == 0) { + outchar(*str); + return sizeof(char); + } + uint32_t codepoint = 0; + len = utf8_getc(str, len, &codepoint); + if(len < 2) { + bprintf("Invalid UTF-8 sequence: %02X (error = %d)", (uchar)*str, (int)len); + return 1; + } + for(int i = 1; i < 0x100; i++) { + if(cp437_unicode_tbl[i] + && cp437_unicode_tbl[i] == codepoint) { + outchar(i); + return len; + } + } + char ch = 0; + switch(codepoint) { + case 0x00A9: // COPYRIGHT SIGN + outchar('('); + outchar('C'); + ch = ')'; + break; + case 0x00AE: // REGISTERED SIGN + outchar('('); + outchar('R'); + ch = ')'; + break; + case 0x00B4: // ACUTE ACCENT + ch = '\''; + break; + case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE + ch = '\xA1'; // Lower-case Letter i with Acute + break; + case 0x2014: // EM DASH + ch = '\xC4'; + break; + case 0x2022: // BULLET + ch = '\xF9'; + break; + case 0x203E: // OVERLINE + case 0x2500: // Box Drawings Light Horizontal + case 0x2501: // Box Drawings Heavy Horizontal + case 0x2504: // Box Drawings Light Triple Dash Horizontal + case 0x2505: // Box Drawings Heavy Triple Dash Horizontal + case 0x2508: // Box Drawings Light Quadruple Dash Horizontal + case 0x2509: // Box Drawings Heavy Quadruple Dash Horizontal + case 0x254C: // Box Drawings Light Double Dash Horizontal + case 0x254D: // Box Drawings Heavy Double Dash Horizontal + case 0x2574: // Box Drawings Light Left + case 0x2576: // Box Drawings Light Right + case 0x2578: // Box Drawings Heavy Left + case 0x257A: // Box Drawings Heavy Right + case 0x257C: // Box Drawings Light Left and Heavy Right + case 0x257E: // Box Drawings Heavy Left and Light Right + ch = '\xC4'; + break; + case 0x2502: // Box Drawings Light Vertical + case 0x2503: // Box Drawings Heavy Vertical + case 0x2506: // Box Drawings Light Triple Dash Vertical + case 0x2507: // Box Drawings Heavy Triple Dash Vertical + case 0x250A: // Box Drawings Light Quadruple Dash Vertical + case 0x250B: // Box Drawings Heavy Quadruple Dash Vertical + ch = '\xB3'; + break; + case 0x250C: // BOX DRAWINGS LIGHT DOWN AND RIGHT + case 0x250D: + case 0x250E: + case 0x250F: // BOX DRAWINGS HEAVY DOWN AND RIGHT + ch = '\xDA'; + break; + case 0x2510: // BOX DRAWINGS LIGHT DOWN AND LEFT + case 0x2511: + case 0x2512: + case 0x2513: // BOX DRAWINGS HEAVY DOWN AND LEFT + ch = '\xBF'; + break; + case 0x2514: // BOX DRAWINGS LIGHT UP AND RIGHT + case 0x2515: + case 0x2516: + case 0x2517: // BOX DRAWINGS HEAVY UP AND RIGHT + ch = '\xC0'; + break; + case 0x2518: // BOX DRAWINGS LIGHT UP AND LEFT + case 0x2519: + case 0x251A: + case 0x251B: // BOX DRAWINGS HEAVY UP AND LEFT + ch = '\xD9'; + break; + case 0x251C: // BOX DRAWINGS LIGHT VERTICAL AND RIGHT + case 0x251D: + case 0x251E: + case 0x251F: + case 0x2520: + case 0x2521: + case 0x2522: + case 0x2523: // BOX DRAWINGS HEAVY VERTICAL AND RIGHT + ch = '\xC3'; + break; + case 0x2524: // BOX DRAWINGS LIGHT VERTICAL AND LEFT + case 0x2525: + case 0x2526: + case 0x2527: + case 0x2528: + case 0x2529: + case 0x252A: + case 0x252B: + ch = '\xB4'; + break; + case 0x252C: // BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + case 0x252D: + case 0x252E: + case 0x252F: + case 0x2530: + case 0x2531: + case 0x2532: // BOX DRAWINGS LEFT LIGHT AND RIGHT DOWN HEAVY + case 0x2533: // BOX DRAWINGS HEAVY DOWN AND HORIZONTAL + ch = '\xC2'; + break; + case 0x2534: // BOX DRAWINGS LIGHT UP AND HORIZONTAL + case 0x2535: // BOX DRAWINGS LEFT HEAVY AND RIGHT UP LIGHT + case 0x2536: // BOX DRAWINGS RIGHT HEAVY AND LEFT UP LIGHT + case 0x2537: // BOX DRAWINGS UP LIGHT AND HORIZONTAL HEAVY + case 0x2538: // BOX DRAWINGS UP HEAVY AND HORIZONTAL LIGHT + case 0x2539: // BOX DRAWINGS RIGHT LIGHT AND LEFT UP HEAVY + case 0x253A: // BOX DRAWINGS LEFT LIGHT AND RIGHT UP HEAVY + case 0x253B: // BOX DRAWINGS HEAVY UP AND HORIZONTAL + ch = '\xC1'; + break; + case 0x253C: // BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + case 0x253D: // BOX DRAWINGS LEFT HEAVY AND RIGHT VERTICAL LIGHT + case 0x253E: // BOX DRAWINGS RIGHT HEAVY AND LEFT VERTICAL LIGHT + case 0x253F: // BOX DRAWINGS VERTICAL LIGHT AND HORIZONTAL HEAVY + case 0x2540: // BOX DRAWINGS UP HEAVY AND DOWN HORIZONTAL LIGHT + case 0x2541: // BOX DRAWINGS DOWN HEAVY AND UP HORIZONTAL LIGHT + case 0x2542: // BOX DRAWINGS VERTICAL HEAVY AND HORIZONTAL LIGHT + case 0x2543: // BOX DRAWINGS LEFT UP HEAVY AND RIGHT DOWN LIGHT + case 0x2544: // BOX DRAWINGS RIGHT UP HEAVY AND LEFT DOWN LIGHT + case 0x2545: // BOX DRAWINGS LEFT DOWN HEAVY AND RIGHT UP LIGHT + case 0x2546: // BOX DRAWINGS RIGHT DOWN HEAVY AND LEFT UP LIGHT + case 0x2547: // BOX DRAWINGS DOWN LIGHT AND UP HORIZONTAL HEAVY + case 0x2548: // BOX DRAWINGS UP LIGHT AND DOWN HORIZONTAL HEAVY + case 0x2549: // BOX DRAWINGS RIGHT LIGHT AND LEFT VERTICAL HEAVY + case 0x254A: // BOX DRAWINGS LEFT LIGHT AND RIGHT VERTICAL HEAVY + case 0x254B: // BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL + ch = '\xC5'; + break; + case 0x254E: // BOX DRAWINGS LIGHT DOUBLE DASH VERTICAL + case 0x254F: // BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL + ch = '|'; + break; + case 0x256D: // BOX DRAWINGS LIGHT ARC DOWN AND RIGHT + ch = '\xDA'; + break; + case 0x256E: // BOX DRAWINGS LIGHT ARC DOWN AND LEFT + ch = '\xBF'; + break; + case 0x256F: // BOX DRAWINGS LIGHT ARC UP AND LEFT + ch = '\xD9'; + break; + case 0x2570: // BOX DRAWINGS LIGHT ARC UP AND RIGHT + ch = '\xC0'; + break; + case 0x2571: // BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT + ch = '/'; + break; + case 0x2572: // BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT + ch = '\\'; + break; + case 0x2573: // BOX DRAWINGS LIGHT DIAGONAL CROSS + ch = 'X'; + break; + case 0x2575: // Box Drawings Light Up + case 0x2577: // Box Drawings Light Down + case 0x2579: // Box Drawings Heavy Up + case 0x257B: // Box Drawings Heavy Down + case 0x257D: // Box Drawings Light Up and Heavy Down + case 0x257F: // Box Drawings Heavy Up and Light Down + ch = '\xB3'; + break; + case 0x2581: // Lower One Eighth Block + ch = '_'; + break; + case 0x2582: // Lower One Quarter Block + case 0x2583: // Lower Three Eighths Block + ch = '\x16'; + break; + case 0x2585: // Lower Five Eighths Block + case 0x2586: // Lower Three Quarters Block + case 0x2587: // Lower Seven Eighths Block + ch = '\xDC'; + break; + case 0x2588: // Full Block + case 0x2589: // Left Seven Eighths Block + ch = '\xDB'; + break; + case 0x258A: // Left Three Quarters Block + case 0x258B: // Left Five Eighths Block + case 0x258C: // Left Half Block + case 0x258D: // Left Three Eighths Block + case 0x258E: // Left One Quarter Block + case 0x258F: // Left One Eighth Block + ch = '\xDD'; + break; + case 0x2590: // Right Half Block + case 0x2595: // Right One Eighth Block + ch = '\xDE'; + break; + case 0x2594: // Upper One Eighth Block + ch = '\xDF'; + break; + case 0xFE00: // VARIATION SELECTOR-1 + case 0xFE01: // VARIATION SELECTOR-2 + case 0xFE02: // VARIATION SELECTOR-3 + case 0xFE03: // VARIATION SELECTOR-4 + case 0xFE04: // VARIATION SELECTOR-5 + case 0xFE05: // VARIATION SELECTOR-6 + case 0xFE06: // VARIATION SELECTOR-7 + case 0xFE07: // VARIATION SELECTOR-8 + case 0xFE08: // VARIATION SELECTOR-9 + case 0xFE09: // VARIATION SELECTOR-10 + case 0xFE0A: // VARIATION SELECTOR-11 + case 0xFE0B: // VARIATION SELECTOR-12 + case 0xFE0C: // VARIATION SELECTOR-13 + case 0xFE0D: // VARIATION SELECTOR-14 + case 0xFE0E: // VARIATION SELECTOR-15 + case 0xFE0F: // VARIATION SELECTOR-16 + return len; + } + if(ch) + outchar(ch); + else { + outchar('\xA8'); // Inverted question mark + char seq[32] = ""; + for(size_t i = 0; i < len; i++) + sprintf(seq + strlen(seq), "%02X ", (uchar)*(str + i)); + lprintf(LOG_DEBUG, "Unsupported UTF-8 sequence: %s (U+%X)", seq, codepoint); + } + return len; +} /****************************************************************************/ /* Raw put string (remotely) */ @@ -403,12 +648,15 @@ int sbbs_t::outchar(char ch) else outchar_esc=0; long term = term_supports(); - const char* utf8 = NULL; + char utf8[UTF8_MAX_LEN + 1] = ""; if(!(term&PETSCII)) { if((term&NO_EXASCII) && (ch&0x80)) ch = exascii_to_ascii_char(ch); /* seven bit table */ - else if(term&UTF8) - utf8 = cp437_utf8_tbl[(uchar)ch]; + else if(term&UTF8) { + uint32_t codepoint = cp437_unicode_tbl[(uchar)ch]; + if(codepoint != 0) + utf8_putc(utf8, sizeof(utf8) - 1, codepoint); + } } if(ch==FF && lncntr > 0 && !tos) { @@ -459,7 +707,7 @@ int sbbs_t::outchar(char ch) if(ch == '\r' && (curatr&0xf0) != 0) // reverse video is disabled upon CR curatr >>= 4; } else { - if(utf8 != NULL) + if(utf8[0] != 0) putcom(utf8); else outcom(ch); diff --git a/src/sbbs3/getmsg.cpp b/src/sbbs3/getmsg.cpp index 06b45be38f67de0e397c6065d16fab655b430bfa..198fe517fa4e19a9bf29dcaefc0ad6b6f1608e0f 100644 --- a/src/sbbs3/getmsg.cpp +++ b/src/sbbs3/getmsg.cpp @@ -39,6 +39,7 @@ /***********************************************************************/ #include "sbbs.h" +#include "utf8.h" /****************************************************************************/ /* Loads an SMB message from the open msg base the fastest way possible */ @@ -286,6 +287,11 @@ bool sbbs_t::show_msg(smb_t* smb, smbmsg_t* msg, long p_mode, post_t* post) } truncsp(p); SKIP_CRLF(p); + if(smb_msg_is_utf8(msg)) { + if(!term_supports(UTF8)) + utf8_normalize_str(txt); + p_mode |= P_UTF8; + } putmsg(p, p_mode, msg->columns); smb_freemsgtxt(txt); if(column) diff --git a/src/sbbs3/prntfile.cpp b/src/sbbs3/prntfile.cpp index ef567c6cb12a052a60969193e206db746a760f0f..b73601fa53ae78f1cec10cbba9d2e7861a3c6d97 100644 --- a/src/sbbs3/prntfile.cpp +++ b/src/sbbs3/prntfile.cpp @@ -37,6 +37,7 @@ ****************************************************************************/ #include "sbbs.h" +#include "utf8.h" /****************************************************************************/ /* Prints a file remotely and locally, interpreting ^A sequences, checks */ @@ -62,6 +63,8 @@ bool sbbs_t::printfile(const char* fname, long mode, long org_cols) mode|=P_NOPAUSE; } else if(stricmp(p, ".seq") == 0) { mode |= P_PETSCII; + } else if(stricmp(p, ".utf8") == 0) { + mode |= P_UTF8; } } @@ -105,6 +108,8 @@ bool sbbs_t::printfile(const char* fname, long mode, long org_cols) errormsg(WHERE,ERR_READ,fpath,length); else { buf[l]=0; + if((mode&P_UTF8) && !term_supports(UTF8)) + utf8_normalize_str(buf); putmsg(buf,mode,org_cols); } free(buf); diff --git a/src/sbbs3/putmsg.cpp b/src/sbbs3/putmsg.cpp index fe4ee05c7ce086cc0c55681e3e7b937bfda38609..5e9e00a297c19e055deb746e59e3eebe1cee586e 100644 --- a/src/sbbs3/putmsg.cpp +++ b/src/sbbs3/putmsg.cpp @@ -65,6 +65,10 @@ char sbbs_t::putmsg(const char *buf, long mode, long org_cols) attr(LIGHTGRAY); if(mode&P_NOPAUSE) sys_status|=SS_PAUSEOFF; + if(strncmp(str, "\xEF\xBB\xBF", 3) == 0) { + mode |= P_UTF8; + str += 3; + } long term = term_supports(); if(!(mode&P_NOATCODES) && memcmp(str, "@WRAPOFF@", 9) == 0) { mode &= ~P_WORDWRAP; @@ -92,7 +96,8 @@ char sbbs_t::putmsg(const char *buf, long mode, long org_cols) } } - while(str[l] && (mode&P_NOABORT || !msgabort()) && online) { + size_t len = strlen(str); + while(l < len && (mode&P_NOABORT || !msgabort()) && online) { switch(str[l]) { case '\r': case '\n': @@ -355,14 +360,20 @@ char sbbs_t::putmsg(const char *buf, long mode, long org_cols) } if(mode&P_CPM_EOF && str[l]==CTRL_Z) break; + size_t skip = sizeof(char); if(mode&P_PETSCII) { if(term&PETSCII) outcom(str[l]); else petscii_to_ansibbs(str[l]); + } else if((str[l]&0x80) && (mode&P_UTF8)) { + if(term&UTF8) + outcom(str[l]); + else + skip = utf8_to_cp437(str + l, len - l); } else outchar(str[l]); - l++; + l += skip; } } if(!(mode&P_SAVEATR)) { diff --git a/src/sbbs3/sbbs.h b/src/sbbs3/sbbs.h index eb9d4381adafdeb6bd33794f9719c3c1365dbcbe..c29bf5dd0ede2a7d8d6c46aa84c73d6229f5b12b 100644 --- a/src/sbbs3/sbbs.h +++ b/src/sbbs3/sbbs.h @@ -734,6 +734,7 @@ public: bool saveline(void); bool restoreline(void); int petscii_to_ansibbs(unsigned char); + size_t utf8_to_cp437(const char*, size_t); int attr(int); /* Change text color/attributes */ void ctrl_a(char); /* Performs Ctrl-Ax attribute changes */ diff --git a/src/sbbs3/sbbsdefs.h b/src/sbbs3/sbbsdefs.h index c098ecda7b3875c981f930da876c2eb43191ebf4..e4cd2a7ac29d22919ef329679d74eb41df3e8417 100644 --- a/src/sbbs3/sbbsdefs.h +++ b/src/sbbs3/sbbsdefs.h @@ -758,6 +758,7 @@ typedef enum { /* Values for xtrn_t.event */ #define P_NOERROR (1<<10) /* Don't report error if file doesn't exist */ #define P_PETSCII (1<<11) /* Message is native PETSCII */ #define P_WRAP (1<<12) /* Wrap/split long-lines, ungracefully */ +#define P_UTF8 (1<<13) /* Message is UTF-8 */ /* Bits in 'mode' for listfiles */ #define FL_ULTIME (1<<0) /* List files by upload time */