Commit 0a0e2a8d authored by rswindell's avatar rswindell
Browse files

Replacement for typehtml.src - converts HTML to plain-text/Ctrl-A format.

Work in progress.
parent c9fba31f
// typehtml.js
// Convert HTML to plain-text with (optional) Synchronet attribute (Ctrl-A) codes
// Planned replacement for exec/typehtml.src (Baja version)
// $Id$
var NORMAL ="\1N\1H"
var HEADING1 ="\1H\1Y"
var HEADING2 ="\1H\1C"
var HEADING3 ="\1H\1M"
var HEADING4 ="\1H\1G"
var HEADING5 ="\1H\1B"
var HEADING6 ="\1H\1R"
var BOLD ="\1H\1C\x014"
var ITALIC ="\1H\1G\x012"
var UNDERLINE ="\1H\1W\x016"
var STRIKE_THROUGH ="\1N\1K\x017"
var LIST_ITEM ="\r\n \1H\1Wo \1G"
var f;
for(i in argv) {
switch(argv[i].toLowerCase()) {
case "-mono":
mono=true;
break;
default:
f = new File(argv[i]);
break;
}
}
if(this.f==undefined) {
print("usage: typehtml [-mono] <filename>");
exit(1);
}
if(!f.open("r")) {
alert("Error " + errno + " opening " + f.name);
exit(errno);
}
buf=f.read(f.length);
f.close();
// Monochrome?
if(mono) {
NORMAL =""
HEADING1 =""
HEADING2 =""
HEADING3 =""
HEADING4 =""
HEADING5 =""
HEADING6 =""
BOLD =""
ITALIC =""
UNDERLINE =""
STRIKE_THROUGH =""
LIST_ITEM ="\r\n o "
}
// Global search and replaces
// Tag aliases
buf=buf.replace(/<strong>/gi,"<b>");
buf=buf.replace(/<\/strong>/gi,"</b>");
buf=buf.replace(/<strike>/gi,"<s>");
buf=buf.replace(/<\/strike>/gi,"</s>");
// Reduce white-space
buf=buf.replace(/>\s*\n/g,">"); // Replace >\r\n with >
buf=buf.replace(/\s+/g," "); // Replace white-space with single space
// Strip
buf=buf.replace(/<head[^<]*>.*<\/head>/gi,""); // Strip the header
// Visual white-space
buf=buf.replace(/<br>/gi,"\r\n"); // Replace <br> with \r\n
buf=buf.replace(/<p>/gi,"\r\n"); // Replace <p> with \r\n
buf=buf.replace(/<tr[^<]*>/gi,"\r\n"); // Replace <tr> with \r\n
buf=buf.replace(/<\/p>/gi,"\r\n"); // Replace </p> with \r\n
buf=buf.replace(/<\/ul>/gi,"\r\n"); // Replace </ul> with \r\n
buf=buf.replace(/<\/ol>/gi,"\r\n"); // Replace </ul> with \r\n
buf=buf.replace(/<\/caption>/gi,"\r\n"); // Replace </caption> with \r\n
buf=buf.replace(/<\/table>/gi,"\r\n"); // Replace </table> with \r\n
buf=buf.replace(/<td[^<]*>/gi," ");
buf=buf.replace(/<th[^<]*>/gi," ");
if(1) {
// Text attributes
buf=buf.replace(/<b>/gi,BOLD);
buf=buf.replace(/<\/b>/gi,NORMAL);
buf=buf.replace(/<i>/gi,ITALIC);
buf=buf.replace(/<\/i>/gi,NORMAL);
buf=buf.replace(/<u>/gi,UNDERLINE);
buf=buf.replace(/<\/u>/gi,NORMAL);
buf=buf.replace(/<s>/gi,STRIKE_THROUGH);
buf=buf.replace(/<\/s>/gi,NORMAL);
buf=buf.replace(/<h1>/gi,"\r\n" + HEADING1);
buf=buf.replace(/<\/h1>/gi,NORMAL + "\r\n");
buf=buf.replace(/<h2>/gi,"\r\n" + HEADING2);
buf=buf.replace(/<\/h2>/gi,NORMAL + "\r\n");
buf=buf.replace(/<h3>/gi,"\r\n" + HEADING3);
buf=buf.replace(/<\/h3>/gi,NORMAL + "\r\n");
buf=buf.replace(/<h4>/gi,"\r\n" + HEADING4);
buf=buf.replace(/<\/h4>/gi,NORMAL + "\r\n");
buf=buf.replace(/<h5>/gi,"\r\n" + HEADING5);
buf=buf.replace(/<\/h5>/gi,NORMAL + "\r\n");
buf=buf.replace(/<h6>/gi,"\r\n" + HEADING6);
buf=buf.replace(/<\/h6>/gi,NORMAL + "\r\n");
}
// Lists
buf=buf.replace(/<li[^<]*>/gi,LIST_ITEM);
// Strip unsupported tags
buf=buf.replace(/<[^<]*>/g,"");
// Translate &xxx; codes to ASCII and ex-ASCII
buf=html_decode(buf);
buf=word_wrap(buf);
print(buf);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment