...
 
Commits (2)
  • Rob Swindell's avatar
    Improved parsing of "charset" parameter in MIME Content-Type header. · ad79c591
    Rob Swindell authored
    In Issue #177, the reported problem message header was "Content-Type: text/plain; charset=utf-8; format=flowed"
    The fact that the "charset" value was not quoted and not space delimited means the charset would have been parsed as "utf-8;", which would not provide an exact match (against "utf-8") in smb_msg_is_utf8() and thus the message body would not be considered to be utf-8 encoded.
    
    The solution is to terminate the "charset" parameter value at the semicolon, if it exists, and the value was not quoted.
    
    Also, for good measure, only search for " charset" or ";charset" to avoid false-positive parameter matches,  like "notcharset".
    ad79c591
  • Rob Swindell's avatar
    Reduce unnecessary posted-to username searches. Auto-UTF8 in savemsg(). · a852cced
    Rob Swindell authored
    When posting to "All" or replying to a message that was posted by networked (not local) user, don't attempt to search for that username in the local user base to inform them of the posted message to them.
    
    savemsg() is the underlying C function for saving a new message (mail or post) to a message base from several places, including JS modules. If there was no "charset" specified in the passed message header, check to see if the message body text is non-ASCII but valid UTF-8, and if it is, set the FIDO_CHARSET header field to the appropriate UTF-8 charset identifier string. This would *also* address Issue #177 reported by Michael J. Ryan. It's possible, but unlikely, that a message text would contain valid CP437 that *also* happened to be valid UTF-8. If that does happen to occur (e.g. in posted ANSI art?), we might want to revert this enhancement to savemsg() and leave to the caller's to detect/specify the charset, always.
    a852cced
......@@ -346,7 +346,9 @@ bool sbbs_t::postmsg(uint subnum, long wm_mode, smb_t* resmb, smbmsg_t* remsg)
,cfg.grp[cfg.sub[subnum]->grp]->sname,cfg.sub[subnum]->lname);
logline("P+",str);
if(!(msgattr & MSG_ANONYMOUS)) {
if(!(msgattr & MSG_ANONYMOUS)
&& stricmp(touser, "All") != 0
&& (remsg == NULL || remsg->from_net.type == NET_NONE)) {
if(cfg.sub[subnum]->misc&SUB_NAME)
i = userdatdupe(0, U_NAME, LEN_NAME, touser);
else
......@@ -414,6 +416,8 @@ extern "C" int DLLCALL msg_client_hfields(smbmsg_t* msg, client_t* client)
/* Note: finds signature delimiter automatically and (if applicable) separates msgbuf into body and tail */
/* Adds/generates Message-IDs when needed */
/* Auto-sets the UTF-8 indicators for UTF-8 encoded header fields and body text */
/* If you want to save a message body with CP437 chars that also happen to be valid UTF-8 sequences, you'll need to preset the ftn_charset header */
extern "C" int DLLCALL savemsg(scfg_t* cfg, smb_t* smb, smbmsg_t* msg, client_t* client, const char* server, char* msgbuf, smbmsg_t* remsg)
{
ushort xlat=XLAT_NONE;
......@@ -494,6 +498,9 @@ extern "C" int DLLCALL savemsg(scfg_t* cfg, smb_t* smb, smbmsg_t* msg, client_t*
|| (msg->subj != NULL && !str_is_ascii(msg->subj) && utf8_str_is_valid(msg->subj)))
msg->hdr.auxattr |= MSG_HFIELDS_UTF8;
if(msg->ftn_charset == NULL && !str_is_ascii(msgbuf) && utf8_str_is_valid(msgbuf))
smb_hfield_str(msg, FIDOCHARSET, FIDO_CHARSET_UTF8);
msgbuf = strdup(msgbuf);
if(msgbuf == NULL)
return SMB_FAILURE;
......
......@@ -367,15 +367,22 @@ void SMBCALL smb_parse_content_type(const char* content_type, char** subtype, ch
*tp = 0;
}
}
if(charset != NULL && (p = strcasestr(p, "charset=")) != NULL) {
p += 8;
if(*p == '"')
if(charset != NULL && ((p = strcasestr(p, " charset=")) != NULL || (p = strcasestr(p, ";charset=")) != NULL)) {
BOOL quoted = FALSE;
p += 9;
if(*p == '"') {
quoted = TRUE;
p++;
}
char* tp = p;
FIND_WHITESPACE(tp);
*tp = 0;
tp = p;
FIND_CHAR(tp, '"');
if(quoted) {
FIND_CHAR(tp, '"');
} else {
FIND_CHAR(tp, ';');
}
*tp = 0;
*charset = strdup(p);
}
......