Synchronet now requires the libarchive development package (e.g. libarchive-dev on Debian-based Linux distros, libarchive.org for more info) to build successfully.

Commit 1340f563 authored by rswindell's avatar rswindell

Moved cp437_unicode_tbl and unicode_is_zerowidth() to (new file) unicode.c.

New function (derived from sbbs_t::utf8_to_cp437()): unicode_to_cp437()
New utf8 functions: utf8_replace_chars(), utf8_str_is_valid().
utf8_getc() enhancement: val arg may be NULL (for length/validation uses).

Convert quoted UTF-8 message text to CP437 when terminal is not UTF8.

Set Fido CHRS: UTF-8 header field when posted message is UTF-8.
parent c454fd57
......@@ -34,15 +34,9 @@
* Note: If this box doesn't appear square, then you need to fix your tabs. *
****************************************************************************/
/**********************************************************************/
/* Functions that pertain to console i/o - color, strings, chars etc. */
/* Called from functions everywhere */
/**********************************************************************/
#include "sbbs.h"
#include "utf8.h"
#include "cp437_unicode_tbl.h"
#include "unicode.h"
/****************************************************************************/
/* Outputs a NULL terminated string locally and remotely (if applicable) */
......@@ -267,222 +261,10 @@ size_t sbbs_t::utf8_to_cp437(const char* str, size_t len)
return len;
}
}
char ch = 0;
switch(codepoint) {
case 0x00A9: // COPYRIGHT SIGN
outchar('(');
outchar('C');
ch = ')';
break;
case 0x00AE: // REGISTERED SIGN
outchar('(');
outchar('R');
ch = ')';
break;
case 0x00B4: // ACUTE ACCENT
ch = '\'';
break;
case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
ch = '\xA1'; // Lower-case Letter i with Acute
break;
case 0x2014: // EM DASH
ch = '\xC4';
break;
case 0x2022: // BULLET
ch = '\xF9';
break;
case 0x203E: // OVERLINE
case 0x2500: // Box Drawings Light Horizontal
case 0x2501: // Box Drawings Heavy Horizontal
case 0x2504: // Box Drawings Light Triple Dash Horizontal
case 0x2505: // Box Drawings Heavy Triple Dash Horizontal
case 0x2508: // Box Drawings Light Quadruple Dash Horizontal
case 0x2509: // Box Drawings Heavy Quadruple Dash Horizontal
case 0x254C: // Box Drawings Light Double Dash Horizontal
case 0x254D: // Box Drawings Heavy Double Dash Horizontal
case 0x2574: // Box Drawings Light Left
case 0x2576: // Box Drawings Light Right
case 0x2578: // Box Drawings Heavy Left
case 0x257A: // Box Drawings Heavy Right
case 0x257C: // Box Drawings Light Left and Heavy Right
case 0x257E: // Box Drawings Heavy Left and Light Right
ch = '\xC4';
break;
case 0x2502: // Box Drawings Light Vertical
case 0x2503: // Box Drawings Heavy Vertical
case 0x2506: // Box Drawings Light Triple Dash Vertical
case 0x2507: // Box Drawings Heavy Triple Dash Vertical
case 0x250A: // Box Drawings Light Quadruple Dash Vertical
case 0x250B: // Box Drawings Heavy Quadruple Dash Vertical
ch = '\xB3';
break;
case 0x250C: // BOX DRAWINGS LIGHT DOWN AND RIGHT
case 0x250D:
case 0x250E:
case 0x250F: // BOX DRAWINGS HEAVY DOWN AND RIGHT
ch = '\xDA';
break;
case 0x2510: // BOX DRAWINGS LIGHT DOWN AND LEFT
case 0x2511:
case 0x2512:
case 0x2513: // BOX DRAWINGS HEAVY DOWN AND LEFT
ch = '\xBF';
break;
case 0x2514: // BOX DRAWINGS LIGHT UP AND RIGHT
case 0x2515:
case 0x2516:
case 0x2517: // BOX DRAWINGS HEAVY UP AND RIGHT
ch = '\xC0';
break;
case 0x2518: // BOX DRAWINGS LIGHT UP AND LEFT
case 0x2519:
case 0x251A:
case 0x251B: // BOX DRAWINGS HEAVY UP AND LEFT
ch = '\xD9';
break;
case 0x251C: // BOX DRAWINGS LIGHT VERTICAL AND RIGHT
case 0x251D:
case 0x251E:
case 0x251F:
case 0x2520:
case 0x2521:
case 0x2522:
case 0x2523: // BOX DRAWINGS HEAVY VERTICAL AND RIGHT
ch = '\xC3';
break;
case 0x2524: // BOX DRAWINGS LIGHT VERTICAL AND LEFT
case 0x2525:
case 0x2526:
case 0x2527:
case 0x2528:
case 0x2529:
case 0x252A:
case 0x252B:
ch = '\xB4';
break;
case 0x252C: // BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
case 0x252D:
case 0x252E:
case 0x252F:
case 0x2530:
case 0x2531:
case 0x2532: // BOX DRAWINGS LEFT LIGHT AND RIGHT DOWN HEAVY
case 0x2533: // BOX DRAWINGS HEAVY DOWN AND HORIZONTAL
ch = '\xC2';
break;
case 0x2534: // BOX DRAWINGS LIGHT UP AND HORIZONTAL
case 0x2535: // BOX DRAWINGS LEFT HEAVY AND RIGHT UP LIGHT
case 0x2536: // BOX DRAWINGS RIGHT HEAVY AND LEFT UP LIGHT
case 0x2537: // BOX DRAWINGS UP LIGHT AND HORIZONTAL HEAVY
case 0x2538: // BOX DRAWINGS UP HEAVY AND HORIZONTAL LIGHT
case 0x2539: // BOX DRAWINGS RIGHT LIGHT AND LEFT UP HEAVY
case 0x253A: // BOX DRAWINGS LEFT LIGHT AND RIGHT UP HEAVY
case 0x253B: // BOX DRAWINGS HEAVY UP AND HORIZONTAL
ch = '\xC1';
break;
case 0x253C: // BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
case 0x253D: // BOX DRAWINGS LEFT HEAVY AND RIGHT VERTICAL LIGHT
case 0x253E: // BOX DRAWINGS RIGHT HEAVY AND LEFT VERTICAL LIGHT
case 0x253F: // BOX DRAWINGS VERTICAL LIGHT AND HORIZONTAL HEAVY
case 0x2540: // BOX DRAWINGS UP HEAVY AND DOWN HORIZONTAL LIGHT
case 0x2541: // BOX DRAWINGS DOWN HEAVY AND UP HORIZONTAL LIGHT
case 0x2542: // BOX DRAWINGS VERTICAL HEAVY AND HORIZONTAL LIGHT
case 0x2543: // BOX DRAWINGS LEFT UP HEAVY AND RIGHT DOWN LIGHT
case 0x2544: // BOX DRAWINGS RIGHT UP HEAVY AND LEFT DOWN LIGHT
case 0x2545: // BOX DRAWINGS LEFT DOWN HEAVY AND RIGHT UP LIGHT
case 0x2546: // BOX DRAWINGS RIGHT DOWN HEAVY AND LEFT UP LIGHT
case 0x2547: // BOX DRAWINGS DOWN LIGHT AND UP HORIZONTAL HEAVY
case 0x2548: // BOX DRAWINGS UP LIGHT AND DOWN HORIZONTAL HEAVY
case 0x2549: // BOX DRAWINGS RIGHT LIGHT AND LEFT VERTICAL HEAVY
case 0x254A: // BOX DRAWINGS LEFT LIGHT AND RIGHT VERTICAL HEAVY
case 0x254B: // BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL
ch = '\xC5';
break;
case 0x254E: // BOX DRAWINGS LIGHT DOUBLE DASH VERTICAL
case 0x254F: // BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL
ch = '|';
break;
case 0x256D: // BOX DRAWINGS LIGHT ARC DOWN AND RIGHT
ch = '\xDA';
break;
case 0x256E: // BOX DRAWINGS LIGHT ARC DOWN AND LEFT
ch = '\xBF';
break;
case 0x256F: // BOX DRAWINGS LIGHT ARC UP AND LEFT
ch = '\xD9';
break;
case 0x2570: // BOX DRAWINGS LIGHT ARC UP AND RIGHT
ch = '\xC0';
break;
case 0x2571: // BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT
ch = '/';
break;
case 0x2572: // BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT
ch = '\\';
break;
case 0x2573: // BOX DRAWINGS LIGHT DIAGONAL CROSS
ch = 'X';
break;
case 0x2575: // Box Drawings Light Up
case 0x2577: // Box Drawings Light Down
case 0x2579: // Box Drawings Heavy Up
case 0x257B: // Box Drawings Heavy Down
case 0x257D: // Box Drawings Light Up and Heavy Down
case 0x257F: // Box Drawings Heavy Up and Light Down
ch = '\xB3';
break;
case 0x2581: // Lower One Eighth Block
ch = '_';
break;
case 0x2582: // Lower One Quarter Block
case 0x2583: // Lower Three Eighths Block
ch = '\x16';
break;
case 0x2585: // Lower Five Eighths Block
case 0x2586: // Lower Three Quarters Block
case 0x2587: // Lower Seven Eighths Block
ch = '\xDC';
break;
case 0x2588: // Full Block
case 0x2589: // Left Seven Eighths Block
ch = '\xDB';
break;
case 0x258A: // Left Three Quarters Block
case 0x258B: // Left Five Eighths Block
case 0x258C: // Left Half Block
case 0x258D: // Left Three Eighths Block
case 0x258E: // Left One Quarter Block
case 0x258F: // Left One Eighth Block
ch = '\xDD';
break;
case 0x2590: // Right Half Block
case 0x2595: // Right One Eighth Block
ch = '\xDE';
break;
case 0x2594: // Upper One Eighth Block
ch = '\xDF';
break;
case 0xFE00: // VARIATION SELECTOR-1
case 0xFE01: // VARIATION SELECTOR-2
case 0xFE02: // VARIATION SELECTOR-3
case 0xFE03: // VARIATION SELECTOR-4
case 0xFE04: // VARIATION SELECTOR-5
case 0xFE05: // VARIATION SELECTOR-6
case 0xFE06: // VARIATION SELECTOR-7
case 0xFE07: // VARIATION SELECTOR-8
case 0xFE08: // VARIATION SELECTOR-9
case 0xFE09: // VARIATION SELECTOR-10
case 0xFE0A: // VARIATION SELECTOR-11
case 0xFE0B: // VARIATION SELECTOR-12
case 0xFE0C: // VARIATION SELECTOR-13
case 0xFE0D: // VARIATION SELECTOR-14
case 0xFE0E: // VARIATION SELECTOR-15
case 0xFE0F: // VARIATION SELECTOR-16
return len;
}
char ch = unicode_to_cp437(codepoint);
if(ch)
outchar(ch);
else {
else if(!unicode_is_zerowidth(codepoint)) {
outchar('\xA8'); // Inverted question mark
char seq[32] = "";
for(size_t i = 0; i < len; i++)
......@@ -495,6 +277,7 @@ size_t sbbs_t::utf8_to_cp437(const char* str, size_t len)
/****************************************************************************/
/* Raw put string (remotely) */
/* Performs Telnet IAC escaping */
/* Performs charset translations */
/* Performs saveline buffering (for restoreline) */
/* DOES NOT expand ctrl-A codes, track columns, lines, auto-pause, etc. */
/****************************************************************************/
......@@ -509,8 +292,6 @@ int sbbs_t::rputs(const char *str, size_t len)
long term = term_supports();
char utf8[UTF8_MAX_LEN + 1] = "";
for(l=0;l<len && online;l++) {
if(str[l]==(char)TELNET_IAC && !(telnet_mode&TELNET_MODE_OFF))
outcom(TELNET_IAC); /* Must escape Telnet IAC char (255) */
uchar ch = str[l];
utf8[0] = 0;
if(term&PETSCII)
......@@ -524,9 +305,12 @@ int sbbs_t::rputs(const char *str, size_t len)
}
if(utf8[0])
putcom(utf8);
else
else {
if(outcom(ch)!=0)
break;
if(ch == (char)TELNET_IAC && !(telnet_mode&TELNET_MODE_OFF))
outcom(TELNET_IAC); /* Must escape Telnet IAC char (255) */
}
if(lbuflen<LINE_BUFSIZE)
lbuf[lbuflen++] = ch;
}
......
......@@ -2339,15 +2339,6 @@ void js_cleanup(JSRuntime* js_runtime, JSContext* js_cx, JSObject** js_glob)
}
#endif
bool strIsPlainAscii(const char* str)
{
for(const char* p = str; *p != 0; p++) {
if(*p < 0)
return false;
}
return true;
}
static size_t strStartsWith_i(const char* buf, const char* match)
{
size_t len = strlen(match);
......@@ -2435,14 +2426,14 @@ bool normalize_hfield_value(char* str)
mimehdr_q_decode(tmp);
if(charset == MIMEHDR_CHARSET_UTF8)
utf8_normalize_str(tmp);
if(charset == MIMEHDR_CHARSET_CP437 || strIsPlainAscii(tmp))
if(charset == MIMEHDR_CHARSET_CP437 || str_is_ascii(tmp))
p = tmp;
}
else if(encoding == 'B'
&& b64_decode(tmp, sizeof(tmp), tmp, strlen(tmp)) > 0) { // base64
if(charset == MIMEHDR_CHARSET_UTF8)
utf8_normalize_str(tmp);
if(charset == MIMEHDR_CHARSET_CP437 || strIsPlainAscii(tmp))
if(charset == MIMEHDR_CHARSET_CP437 || str_is_ascii(tmp))
p = tmp;
}
}
......
......@@ -88,6 +88,7 @@ bool sbbs_t::postmsg(uint subnum, long wm_mode, smb_t* resmb, smbmsg_t* remsg)
FILE* fp;
smbmsg_t msg;
uint reason;
bool utf8 = false;
if(remsg) {
SAFECOPY(title, remsg->subj);
......@@ -215,7 +216,7 @@ bool sbbs_t::postmsg(uint subnum, long wm_mode, smb_t* resmb, smbmsg_t* remsg)
if(!writemsg(str,top,title,wm_mode,subnum,touser
,/* from: */cfg.sub[subnum]->misc&SUB_NAME ? useron.name : useron.alias
,&editor)
,&editor, &utf8)
|| (length=(long)flength(str))<1) { /* Bugfix Aug-20-2003: Reject negative length */
bputs(text[Aborted]);
smb_close(&smb);
......@@ -291,6 +292,9 @@ bool sbbs_t::postmsg(uint subnum, long wm_mode, smb_t* resmb, smbmsg_t* remsg)
add_msg_ids(&cfg, &smb, &msg, remsg);
editor_info_to_msg(&msg, editor);
if(utf8)
smb_hfield_str(&msg, FIDOCTRL, FIDO_CHARSET_UTF8);
if((cfg.sub[subnum]->misc&SUB_MSGTAGS)
&& (tags[0] || text[TagMessageQ][0] == 0 || !noyes(text[TagMessageQ]))) {
......
......@@ -651,7 +651,7 @@ public:
/* writemsg.cpp */
void automsg(void);
bool writemsg(const char *str, const char *top, char *subj, long mode, uint subnum
,const char *to, const char* from, char** editor=NULL);
,const char *to, const char* from, char** editor=NULL, bool* utf8=NULL);
char* quotes_fname(int xedit, char* buf, size_t len);
char* msg_tmp_fname(int xedit, char* fname, size_t len);
char putmsg(const char *str, long mode, long org_cols = 0);
......@@ -1128,6 +1128,7 @@ extern "C" {
DLLEXPORT char * DLLCALL ultoac(ulong l,char *str);
DLLEXPORT char * DLLCALL rot13(char* str);
DLLEXPORT uint32_t DLLCALL str_to_bits(uint32_t currval, const char *str);
DLLEXPORT BOOL DLLCALL str_is_ascii(const char*);
/* msg_id.c */
DLLEXPORT char * DLLCALL ftn_msgid(sub_t*, smbmsg_t*, char* msgid, size_t);
......
......@@ -168,7 +168,7 @@
<ItemGroup>
<ClCompile Include="..\comio\comio.c" />
<ClCompile Include="..\comio\comio_win32.c" />
<ClCompile Include="..\encode\cp437_unicode_tbl.c" />
<ClCompile Include="..\encode\unicode.c" />
<ClCompile Include="..\encode\utf8.c" />
<ClCompile Include="..\encode\uucode.c" />
<ClCompile Include="..\encode\yenc.c" />
......
......@@ -616,6 +616,15 @@ char DLLCALL exascii_to_ascii_char(uchar ch)
return ch;
}
BOOL DLLCALL str_is_ascii(const char* str)
{
for(const char* p = str; *p != 0; p++) {
if(*p < 0)
return FALSE;
}
return TRUE;
}
/****************************************************************************/
/* Convert string from IBM extended ASCII to just ASCII */
/****************************************************************************/
......
......@@ -36,6 +36,8 @@
#include "sbbs.h"
#include "wordwrap.h"
#include "utf8.h"
#include "unicode.h"
#define MAX_LINES 10000
#define MAX_LINE_LEN (cols - 1)
......@@ -103,6 +105,13 @@ bool sbbs_t::quotemsg(smb_t* smb, smbmsg_t* msg, bool tails)
if((buf=smb_getmsgtxt(smb, msg, mode)) != NULL) {
strip_invalid_attr(buf);
truncsp(buf);
if(smb_msg_is_utf8(msg) && !term_supports(UTF8)) {
utf8_normalize_str(buf);
utf8_replace_chars(buf, unicode_to_cp437
,/* unsupported char: */'\xA8' /* Inverted question mark */
,/* unsupported zero-width ch: */0
,/* decode error char: */ '\xAD' /* inverted exclamation mark */);
}
if(!useron_xedit || (useron_xedit && (cfg.xedit[useron_xedit-1]->misc&QUOTEWRAP))) {
int wrap_cols = 0;
if(useron_xedit > 0)
......@@ -177,8 +186,10 @@ int sbbs_t::process_edited_text(char* buf, FILE* stream, long mode, unsigned* li
len++;
}
if(buf[l])
if(buf[l]) {
bprintf(text[NoMoreLines], i);
buf[l] = 0;
}
if(lines!=NULL)
*lines=i;
......@@ -224,7 +235,7 @@ int sbbs_t::process_edited_file(const char* src, const char* dest, long mode, un
/* 'dest' contains a text description of where the message is going. */
/****************************************************************************/
bool sbbs_t::writemsg(const char *fname, const char *top, char *subj, long mode, uint subnum
,const char *to, const char* from, char** editor)
,const char *to, const char* from, char** editor, bool* utf8)
{
char str[256],quote[128],c,*buf,*p,*tp
,useron_level;
......@@ -629,6 +640,8 @@ bool sbbs_t::writemsg(const char *fname, const char *top, char *subj, long mode,
return(false);
}
l=process_edited_text(buf,stream,mode,&lines,cfg.level_linespermsg[useron_level]);
if(utf8 != NULL)
*utf8 = (!str_is_ascii(buf) && utf8_str_is_valid(buf));
if(!(mode&(WM_EXTDESC|WM_ANON))) {
/* Signature file */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment