wordwrap.c 13.51 KiB
/* $Id$ */
/****************************************************************************
* @format.tab-size 4 (Plain Text/Source Code File Header) *
* @format.use-tabs true (see http://www.synchro.net/ptsc_hdr.html) *
* *
* Copyright 2011 Rob Swindell - http://www.synchro.net/copyright.html *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License *
* as published by the Free Software Foundation; either version 2 *
* of the License, or (at your option) any later version. *
* See the GNU General Public License for more details: gpl.txt or *
* http://www.fsf.org/copyleft/gpl.html *
* *
* Anonymous FTP access to the most recent released source is available at *
* ftp://vert.synchro.net, ftp://cvs.synchro.net and ftp://ftp.synchro.net *
* *
* Anonymous CVS access to the development source and modification history *
* is available at cvs.synchro.net:/cvsroot/sbbs, example: *
* cvs -d :pserver:anonymous@cvs.synchro.net:/cvsroot/sbbs login *
* (just hit return, no password is necessary) *
* cvs -d :pserver:anonymous@cvs.synchro.net:/cvsroot/sbbs checkout src *
* *
* For Synchronet coding style and modification guidelines, see *
* http://www.synchro.net/source.html *
* *
* You are encouraged to submit any modifications (preferably in Unix diff *
* format) via e-mail to mods@synchro.net *
* *
* Note: If this box doesn't appear square, then you need to fix your tabs. *
****************************************************************************/
#include <ctype.h>
#include <genwrap.h>
#include <stdlib.h> /* realloc */
#include "wordwrap.h"
struct prefix {
int cols;
char *bytes;
};
enum prefix_pos {
PREFIX_START,
PREFIX_FIRST,
PREFIX_SECOND,
PREFIX_THIRD,
PREFIX_END,
PREFIX_PAD,
PREFIX_FINISHED
};
/*
* Parses a prefix from the passed text, returns a struct containing
* an allocated bytes pointer and the number of columns the prefix
* takes up in the output.
*/
static struct prefix parse_prefix(const char *text)
{
enum prefix_pos expect = PREFIX_START;
const char *pos = text;
const char *end = text;
struct prefix ret = {0, NULL};
int cols = 0;
// Quote may begin the line or have a space before it.
if (text[0] != ' ')
expect = PREFIX_FIRST;
for (;*pos && expect != PREFIX_FINISHED; pos++) {
// Skip CTRL-A Codes
while(*pos == '\x01') {
pos++;
if (*pos != '\x01' && *pos != 0) {
pos++;
continue;
}
}
// If end of line or message, or obviously outside of prefixes, exit loop.
if (*pos == 0 || *pos == '\n' || *pos == '\r' || *pos == '\t')
break;
cols++;
switch(expect) {
// If there's no space before the quote mark, it's not a prefix.
case PREFIX_START:
if (*pos == ' ')
expect = PREFIX_FIRST;
else
expect = PREFIX_FINISHED;
break;
// Next char should be alphanum or >
case PREFIX_FIRST:
case PREFIX_SECOND:
case PREFIX_THIRD:
if(*pos == ' ')
expect = PREFIX_FINISHED;
else
if(*pos == '>')
expect = PREFIX_PAD;
else
expect++;
break;
// Next char must be >
case PREFIX_END:
if (*pos == '>')
expect = PREFIX_PAD;
else
expect = PREFIX_FINISHED;
break;
// Must be a space after the '>'
case PREFIX_PAD:
if (*pos == ' ') {
ret.cols = cols;
end = pos+1;
if (*(pos+1) == ' ')
expect = PREFIX_START;
else
expect = PREFIX_FIRST;
}
else
expect = PREFIX_FINISHED;
break;
default:
expect = PREFIX_FINISHED;
break;
}
}
if (end > text) {
ret.bytes = (char *)malloc((end-text)+1);
memcpy(ret.bytes, text, (end-text));
ret.bytes[end-text] = 0;
}
else {
ret.bytes = (char *)malloc(1);
ret.bytes[0] = 0;
}
return ret;
}
/*
* Appends to a malloc()ed buffer, realloc()ing if needed.
*/
static void outbuf_append(char **outbuf, char **outp, char *append, int len, int *outlen)
{
char *p;
/* Terminate outbuf */
**outp=0;
/* Check if there's room */
if(*outp - *outbuf + len < *outlen) {
memcpy(*outp, append, len);
*outp+=len;
return;
}
/* Not enough room, double the size. */
*outlen *= 2;
p=realloc(*outbuf, *outlen);
if(p==NULL) {
/* Can't do it. */
*outlen/=2;
return;
}
/* Set outp for new buffer */
*outp=p+(*outp - *outbuf);
*outbuf=p;
memcpy(*outp, append, len);
*outp+=len;
return;
}
/*
* Holds the length of a "section"... either a word or whitespace.
* Length is in bytes and "len" (the number of columns)
*/
struct section_len {
int bytes;
int len;
};
/*
* Gets the length of a run of whitespace starting at the beginning
* of buf, which occurs in column col.
*
* The column is needed for tab size calculations.
*/
static struct section_len get_ws_len(char *buf, int col)
{
struct section_len ret = {0,0};
for(ret.bytes=0; ; ret.bytes++) {
if (!buf[ret.bytes])
break;
if (!isspace(buf[ret.bytes]))
break;
if(buf[ret.bytes] == '\t') {
ret.len++;
while((ret.len+col)%8)
ret.len++;
ret.len--;
}
ret.len++;
}
return ret;
}
/*
* Gets the length of a word, optionally limiting the max number
* of columns to consume to maxlen.
*
* When maxlen < 0, returns the word length in cols and bytes.
* When maxlen >= 0, returns the number of cols and bytes up to
* maxlen cols (used to find the number of bytes to fill a specific
* number of columns).
*/
static struct section_len get_word_len(char *buf, int maxlen)
{
struct section_len ret = {0,0};
for(ret.bytes=0; ;ret.bytes++) {
if (!buf[ret.bytes])
break;
else if (isspace((unsigned char)buf[ret.bytes]))
break;
else if (buf[ret.bytes]=='\x1f')
continue;
else if (buf[ret.bytes]=='\x01') {
ret.bytes++;
if(buf[ret.bytes]!='\x01')
continue;
}
else if (buf[ret.bytes]=='\b') {
// This doesn't handle BS the same way... bit it's kinda BS anyway.
ret.len--;
continue;
}
if (maxlen > 0 && ret.len >= maxlen)
break;
ret.len++;
}
return ret;
}
/*
* This structure holds a "paragraph" defined as everything from either
* the beginning of the message, or the previous hard CR to the next
* hard CR or end of message
*/
struct paragraph {
struct prefix prefix;
char *text;
size_t alloc_size;
};
/*
* Free()s the allocations in an array of paragraphs. If count is
* provided, that many paragraphs are freed. If count == -1, frees
* up to the first paragraph with a NULL text member.
*/
static void free_paragraphs(struct paragraph *paragraph, int count)
{
int i;
for(i=0; count == -1 || i<count ;i++) {
FREE_AND_NULL(paragraph[i].prefix.bytes);
if (count == -1 && paragraph[i].text == NULL)
break;
FREE_AND_NULL(paragraph[i].text);
}
}
/*
* Appends bytes to a paragraph, realloc()ing space if needed.
*/
static BOOL paragraph_append(struct paragraph *paragraph, const char *bytes, size_t count)
{
size_t len = strlen(paragraph->text);
char *new_text;
while (len + count + 1 > paragraph->alloc_size) {
new_text = realloc(paragraph->text, paragraph->alloc_size * 2);
if (new_text == NULL)
return FALSE;
paragraph->text = new_text;
paragraph->alloc_size *= 2;
}
memcpy(paragraph->text + len, bytes, count);
paragraph->text[len+count] = 0;
return TRUE;
}
/*
* This unwraps a message into infinite line length paragraphs.
* Optionally, each with separate prefix.
*
* The returned malloc()ed array will have the text member of the last
* paragraph set to NULL.
*/
static struct paragraph *word_unwrap(char *inbuf, int oldlen, BOOL handle_quotes)
{
unsigned inpos=0;
struct prefix new_prefix;
int incol;
BOOL has_crs = FALSE;
int paragraph = 0;
struct paragraph *ret = NULL;
struct paragraph *newret = NULL;
BOOL paragraph_done;
int next_word_len;
while(inbuf[inpos]) {
incol = 0;
/* Start of a new paragraph (ie: after a hard CR) */
newret = realloc(ret, (paragraph+1) * sizeof(struct paragraph));
if (newret == NULL) {
free_paragraphs(ret, paragraph);
return NULL;
}
ret = newret;
ret[paragraph].text = (char *)malloc(oldlen+1);
ret[paragraph].prefix.bytes = NULL;
if (ret[paragraph].text == NULL) {
free_paragraphs(ret, paragraph+1);
return NULL;
}
ret[paragraph].alloc_size = oldlen+1;
ret[paragraph].text[0] = 0;
if (handle_quotes) {
ret[paragraph].prefix = parse_prefix(inbuf+inpos);
inpos += strlen(ret[paragraph].prefix.bytes);
incol = ret[paragraph].prefix.cols;
}
paragraph_done = FALSE;
while(!paragraph_done) {
switch(inbuf[inpos]) {
case '\r': // Strip CRs and add them in later.
has_crs = TRUE;
// Fall-through to strip
case '\b': // Strip backspaces.
case '\x1f': // Strip delete chars.
break;
case '\x01': // CTRL-A code.
if (inbuf[inpos] == '\x01') {
// This is a literal CTRL-A... col advances and we can wrap
incol++;
}
if (!paragraph_append(&ret[paragraph], inbuf+inpos, 2))
goto fail_return;
inpos++;
break;
case '\n': // End of line... figure out if it's soft or hard...
// First, check if we're at the end...
if (inbuf[inpos+1] == 0)
break;
// Now, if the prefix changes, it's hard.
new_prefix = parse_prefix(&inbuf[inpos+1]);
if (memcmp(&new_prefix, &ret[paragraph].prefix, sizeof(new_prefix)) == 0) {
paragraph_done = TRUE;
FREE_AND_NULL(new_prefix.bytes);
break;
}
// If the next line start with whitespace, it's hard
switch(inbuf[inpos+1+strlen(new_prefix.bytes)]) {
case 0:
case ' ':
case '\t':
case '\r':
case '\n':
FREE_AND_NULL(new_prefix.bytes);
paragraph_done = TRUE;
break;
}
if (paragraph_done) {
FREE_AND_NULL(new_prefix.bytes);
paragraph_done = TRUE;
break;
}
// If this paragraph was only whitespace, it's hard.
if(strspn(ret[paragraph].text, " \t\r") == strlen(ret[paragraph].text)) {
FREE_AND_NULL(new_prefix.bytes);
paragraph_done = TRUE;
break;
}
// If the first word on the next line would have fit here, it's hard
next_word_len = get_word_len(inbuf+inpos+1+strlen(new_prefix.bytes), -1).len;
if ((incol + next_word_len + 1 - 1) < oldlen) {
FREE_AND_NULL(new_prefix.bytes);
paragraph_done = TRUE;
break;
}
FREE_AND_NULL(new_prefix.bytes);
if (!paragraph_append(&ret[paragraph], " ", 1))
goto fail_return;
incol = 0;
break;
case '\t': // Tab... bah.
if (!paragraph_append(&ret[paragraph], inbuf+inpos, 1))
goto fail_return;
incol++;
while(incol%8)
incol++;
break;
default:
if (!paragraph_append(&ret[paragraph], inbuf+inpos, 1))
goto fail_return;
incol++;
break;
}
inpos++;
if (inbuf[inpos] == 0)
paragraph_done = TRUE;
}
paragraph++;
}
newret = realloc(ret, (paragraph+1) * sizeof(struct paragraph));
if (newret == NULL) {
free_paragraphs(ret, paragraph);
return NULL;
}
ret = newret;
memset(&ret[paragraph], 0, sizeof(ret[0]));
return ret;
fail_return:
free_paragraphs(ret, paragraph+1);
return NULL;
}
/*
* Wraps a set of infinite line length paragraphs to the specified length
* optionally prepending the prefixes.
*
* Returns a malloc()ed string.
*/
static char *wrap_paragraphs(struct paragraph *paragraph, int outlen, BOOL handle_quotes)
{
int outcol;
char *outbuf = NULL;
char *outp = NULL;
int outbuf_size = outlen;
char *prefix_copy;
size_t prefix_cols;
size_t prefix_bytes;
char *inp;
struct section_len ws_len;
struct section_len word_len;
outbuf = (char *)malloc(outbuf_size);
outp = outbuf;
while(paragraph->text) {
if (handle_quotes) {
if (paragraph->prefix.cols > (outlen / 2)) {
// Massive prefix... chop it down...
prefix_copy = paragraph->prefix.bytes + strlen(paragraph->prefix.bytes) - (outlen/2);
while (*prefix_copy != ' ')
prefix_copy--;
word_len = get_word_len(prefix_copy, -1);
prefix_cols = word_len.len;
prefix_bytes = word_len.bytes;
}
else {
prefix_copy = paragraph->prefix.bytes;
prefix_cols = paragraph->prefix.cols;
prefix_bytes = strlen(prefix_copy);
}
}
inp = paragraph->text;
if (*inp == 0)
outbuf_append(&outbuf, &outp, "\r\n", 2, &outbuf_size);
while (*inp) {
outcol = 0;
// First, add the prefix...
if (handle_quotes) {
outbuf_append(&outbuf, &outp, prefix_copy, prefix_bytes, &outbuf_size);
outcol = prefix_cols;
}
// Now add words until the line is full...
while(1) {
if (*inp == 0)
break;
ws_len = get_ws_len(inp, outcol);
word_len = get_word_len(inp+ws_len.bytes, -1);
// Do we need to chop a long word?
if (word_len.len > (outlen - prefix_cols))
word_len = get_word_len(inp + ws_len.bytes, outlen - outcol);
if (outcol + ws_len.len + word_len.len > outlen) {
inp += ws_len.bytes;
break;
}
outbuf_append(&outbuf, &outp, inp, ws_len.bytes, &outbuf_size);
inp += ws_len.bytes;
outcol += ws_len.len;
outbuf_append(&outbuf, &outp, inp, word_len.bytes, &outbuf_size);
inp += word_len.bytes;
outcol += word_len.len;
}
outbuf_append(&outbuf, &outp, "\r\n", 2, &outbuf_size);
}
paragraph++;
}
outbuf_append(&outbuf, &outp, "", 1, &outbuf_size);
return outbuf;
}
char* wordwrap(char* inbuf, int len, int oldlen, BOOL handle_quotes)
{
char* outbuf;
struct paragraph *paragraphs;
paragraphs = word_unwrap(inbuf, oldlen, handle_quotes);
outbuf = wrap_paragraphs(paragraphs, oldlen, handle_quotes);
free_paragraphs(paragraphs, -1);
free(paragraphs);
return outbuf;
}