From 0b044181607801ed86886b54bcb3c0c7cc111ea2 Mon Sep 17 00:00:00 2001
From: deuce <>
Date: Thu, 16 Apr 2020 14:46:47 +0000
Subject: [PATCH] Make the to/from unicode API generic so it can support
 multiple codepages. This is planned to work with the built-in fonts... no
 real idea how to make this work with dynamic fonts at this time.

We'll also need a line drawing set thing ala curses, but I'll worry about
that when/if I update uifc32 for unicode.
---
 src/conio/curs_cio.c       |  17 +----
 src/conio/sdl_con.c        |   6 +-
 src/conio/utf8_codepages.c | 136 ++++++++++++++++++++++++++-----------
 src/conio/utf8_codepages.h |  19 ++----
 src/conio/x_events.c       |   4 +-
 5 files changed, 112 insertions(+), 70 deletions(-)

diff --git a/src/conio/curs_cio.c b/src/conio/curs_cio.c
index ec317d3974..d065a10374 100644
--- a/src/conio/curs_cio.c
+++ b/src/conio/curs_cio.c
@@ -312,12 +312,7 @@ static int _putch(unsigned char ch, BOOL refresh_now)
 			wch[0]=ch;
 			break;
 		case CIOLIB_MODE_CURSES:
-			if (ch < 32)
-				wch[0] = cp437_ext_table[ch];
-			else if (ch > 127)
-				wch[0] = cp437_unicode_table[ch - 128];
-			else
-				wch[0] = ch;
+			wch[0] = cp_from_unicode_cp_ext(CIOLIB_CP437, ch, ch);
 			break;
 	}
 
@@ -633,7 +628,7 @@ int curs_gettext(int sx, int sy, int ex, int ey, void *fillbuf)
 						}
 						break;
 					case CIOLIB_MODE_CURSES:
-						thischar = cp437_from_unicode_cp_ext(ext_char, '?');
+						thischar = cp_from_unicode_cp_ext(CIOLIB_CP437, ext_char, '?');
 						break;
 				}
 			}
@@ -1084,13 +1079,7 @@ int curs_getch(void)
 
 			default:
 				// TODO: May not be right for wide...
-				if (ch > 127) {
-					ch = (unsigned char)cp437_from_unicode_cp(ch, 0);
-					if (ch == 0) {
-						curs_nextgetch=0xff;
-						ch=0;
-					}
-				}
+				ch = cp_from_unicode_cp(CIOLIB_CP437, ch, 0);
 				break;
 		}
 	}
diff --git a/src/conio/sdl_con.c b/src/conio/sdl_con.c
index 22bae5325e..bd24b7eb7d 100644
--- a/src/conio/sdl_con.c
+++ b/src/conio/sdl_con.c
@@ -276,7 +276,7 @@ void exit_sdl_con(void)
 void sdl_copytext(const char *text, size_t buflen)
 {
 	size_t outlen;
-	uint8_t *u8 = cp437_to_utf8(text, buflen, &outlen);
+	uint8_t *u8 = cp_to_utf8(CIOLIB_CP437, text, buflen, &outlen);
 	sdl.SetClipboardText((char *)u8);
 	free(u8);
 }
@@ -285,7 +285,7 @@ char *sdl_getcliptext(void)
 {
 	uint8_t *u8 = (uint8_t *)sdl.GetClipboardText();
 	char *ret;
-	ret = utf8_to_cp437(u8, '?');
+	ret = utf8_to_cp(CIOLIB_CP437, u8, '?', strlen((char *)u8), NULL);
 	sdl.free(u8);
 	return ret;
 }
@@ -709,7 +709,7 @@ sdl_add_keys(uint8_t *utf8s)
 	char *chars;
 	char *p;
 
-	chars = utf8_to_cp437(utf8s, '\x0d');
+	chars = utf8_to_cp(CIOLIB_CP437, utf8s, '\x0d', strlen((char *)utf8s), NULL);
 	if (chars) {
 		for (p = chars; *p; p++) {
 			if (*p == '\x0d')
diff --git a/src/conio/utf8_codepages.c b/src/conio/utf8_codepages.c
index e92a61963a..d70fcedd3a 100644
--- a/src/conio/utf8_codepages.c
+++ b/src/conio/utf8_codepages.c
@@ -2,8 +2,26 @@
 #include <stdlib.h>
 #include "utf8_codepages.h"
 
+struct ciolib_cpmap {
+	uint32_t	unicode;
+	uint8_t		cpchar;
+};
+
+struct codepage_def {
+	char name[32];
+	enum ciolib_codepage cp;
+	uint8_t *(*to_utf8)(const char *cp437str, size_t buflen, size_t *outlen, struct codepage_def *cpdef);
+	char *(*utf8_to)(const uint8_t *utf8str, char unmapped, size_t buflen, size_t *outlen, struct codepage_def *cpdef);
+	uint8_t (*from_unicode_cpoint)(uint32_t cpoint, char unmapped, struct codepage_def *cpdef);
+	uint8_t (*from_unicode_cpoint_ext)(uint32_t cpoint, char unmapped, struct codepage_def *cpdef);
+	struct ciolib_cpmap *cp_table;
+	size_t cp_table_sz;
+	uint32_t *cp_unicode_table;
+	uint32_t *cp_ext_unicode_table;
+};
+
 // Sorted by unicode codepoint...
-struct cp437map cp437_table[160] = {
+static struct ciolib_cpmap cp437_table[160] = {
 	{0x0000, 0},   {0x00A0, 255}, {0x00A1, 173}, {0x00A2, 155},
 	{0x00A3, 156}, {0x00A5, 157}, {0x00A7, 21},  {0x00AA, 166},
 	{0x00AB, 174}, {0x00AC, 170}, {0x00B0, 248}, {0x00B1, 241},
@@ -46,14 +64,14 @@ struct cp437map cp437_table[160] = {
 	{0x2665, 3},   {0x2666, 4},   {0x266A, 13},  {0x266B, 14},
 };
 
-uint32_t cp437_ext_table[32] = {
+static uint32_t cp437_ext_table[32] = {
 	0x0000, 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
 	0x25D8, 0x25CB, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266B, 0x263C,
 	0x25BA, 0x25C4, 0x2195, 0x203C, 0x00B6, 0x00A7, 0x25AC, 0x21A8,
 	0x2191, 0x2193, 0x2192, 0x2190, 0x221F, 0x2194, 0x25B2, 0x25BC
 };
 
-uint32_t cp437_unicode_table[128] = {
+static uint32_t cp437_unicode_table[128] = {
 	0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, 
 	0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, 
 	0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, 
@@ -76,7 +94,7 @@ static int
 cmptab(const void *key, const void *entry)
 {
 	const uint32_t *pkey = key;
-	const struct cp437map *pentry = entry;
+	const struct ciolib_cpmap *pentry = entry;
 
 	if (*pkey == pentry->unicode)
 		return 0;
@@ -85,28 +103,26 @@ cmptab(const void *key, const void *entry)
 	return 1;
 }
 
-uint8_t
-cp437_from_unicode_cp(uint32_t cp, char unmapped)
+static uint8_t
+cptable_from_unicode_cpoint(uint32_t cpoint, char unmapped, struct codepage_def *cpdef)
 {
-	struct cp437map *mapped;
+	struct ciolib_cpmap *mapped;
 
-	if (cp < 128)
-		return cp;
-	mapped = bsearch(&cp, cp437_table, 
-	    sizeof(cp437_table) / sizeof(cp437_table[0]),
-	    sizeof(cp437_table[0]), cmptab);
+	if (cpoint < 128)
+		return cpoint;
+	mapped = bsearch(&cpoint, cpdef->cp_table, cpdef->cp_table_sz, sizeof(cpdef->cp_table[0]), cmptab);
 	if (mapped == NULL)
 		return unmapped;
-	return mapped->cp437;
+	return mapped->cpchar;
 }
 
-uint8_t
-cp437_from_unicode_cp_ext(uint32_t cp, char unmapped)
+static uint8_t
+cptable_from_unicode_cpoint_ext(uint32_t cpoint, char unmapped, struct codepage_def *cpdef)
 {
-	if (cp < 32) {
-		return cp437_ext_table[cp];
+	if (cpoint < 32) {
+		return cpdef->cp_ext_unicode_table[cpoint];
 	}
-	return cp437_from_unicode_cp(cp, unmapped);
+	return cptable_from_unicode_cpoint(cpoint, unmapped, cpdef);
 }
 
 static int
@@ -199,8 +215,8 @@ utf8_bytes(uint32_t cp)
 	return -1;
 }
 
-uint8_t *
-cp437_to_utf8(const char *cp437str, size_t buflen, size_t *outlen)
+static uint8_t *
+cpstr_to_utf8(const char *cpstr, size_t buflen, size_t *outlen, struct codepage_def *cpdef)
 {
 	size_t needed = 0;
 	int cplen;
@@ -211,7 +227,7 @@ cp437_to_utf8(const char *cp437str, size_t buflen, size_t *outlen)
 
 	// Calculate the number of bytes needed
 	for (idx = 0; idx < buflen; idx++) {
-		ch = cp437str[idx];
+		ch = cpstr[idx];
 		if (ch == 0)
 			cplen = 4;
 		else if (ch < 128)
@@ -229,7 +245,7 @@ cp437_to_utf8(const char *cp437str, size_t buflen, size_t *outlen)
 
 	rp = ret;
 	for (idx = 0; idx < buflen; idx++) {
-		ch = cp437str[idx];
+		ch = cpstr[idx];
 		if (ch == 0) {
 			*(rp++) = 0xef;
 			*(rp++) = 0xbf;
@@ -241,13 +257,15 @@ cp437_to_utf8(const char *cp437str, size_t buflen, size_t *outlen)
 			cplen = 1;
 		}
 		else {
-			cplen = write_cp(rp, cp437_unicode_table[ch - 128]);
+			cplen = write_cp(rp, cpdef->cp_unicode_table[ch - 128]);
 			if (cplen < 1)
 				goto error;
 		}
 		rp += cplen;
 	}
 	*rp = 0;
+	if (outlen)
+		*outlen = rp - ret;
 	return ret;
 
 error:
@@ -263,12 +281,12 @@ error:
  * Does not normalize the unicode, just a simple mapping
  * (TODO: Normalize into combined chars etc)
  */
-char *
-utf8_to_cp437(const uint8_t *utf8str, char unmapped)
+static char *
+utf8_to_cpstr(const uint8_t *utf8str, char unmapped, size_t inlen, size_t *outlen, struct codepage_def *cpdef)
 {
-	const uint8_t *p;
+	size_t idx;
 	char *rp;
-	size_t outlen = 0;
+	size_t outsz = 0;
 	int incode = 0;
 	uint32_t codepoint;
 	char *ret = NULL;
@@ -276,41 +294,41 @@ utf8_to_cp437(const uint8_t *utf8str, char unmapped)
 	// TODO: Normalize UTF-8...
 
 	// Calculate the number of code points and validate.
-	for (p = utf8str; *p; p++) {
+	for (idx = 0; idx < inlen; idx++) {
 		if (incode) {
-			switch (*p & 0xc0) {
+			switch (utf8str[idx] & 0xc0) {
 				case 0x80:
 					incode--;
 					if (incode == 0)
-						outlen++;
+						outsz++;
 					break;
 				default:
 					goto error;
 			}
 		}
 		else {
-			if (*p & 0x80) {
-				if ((*p & 0xe0) == 0xc0)
+			if (utf8str[idx] & 0x80) {
+				if ((utf8str[idx] & 0xe0) == 0xc0)
 					incode = 1;
-				else if ((*p & 0xf0) == 0xe0)
+				else if ((utf8str[idx] & 0xf0) == 0xe0)
 					incode = 2;
-				else if ((*p & 0xf8) == 0xf0)
+				else if ((utf8str[idx] & 0xf8) == 0xf0)
 					incode = 3;
 				else
 					goto error;
 			}
 			else
-				outlen++;
+				outsz++;
 		}
 	}
-	ret = malloc(outlen + 1);
+	ret = malloc(outsz + 1);
 	if (ret == NULL)
 		goto error;
 	rp = ret;
 
 	// Fill the string...
-	while (*utf8str) {
-		utf8str += read_cp(utf8str, &codepoint);
+	for (idx = 0; idx < inlen; idx++) {
+		utf8str += read_cp(&utf8str[idx], &codepoint);
 		if (codepoint == 0xffff || codepoint == 0xfffe)
 			goto error;
 		if (codepoint < 128)
@@ -319,13 +337,53 @@ utf8_to_cp437(const uint8_t *utf8str, char unmapped)
 		else if (codepoint == 0xa6) 
 			*(rp++) = '|';
 		else {
-			*(rp++) = cp437_from_unicode_cp(codepoint, unmapped);
+			*(rp++) = cptable_from_unicode_cpoint(codepoint, unmapped, cpdef);
 		}
 	}
 	*rp = 0;
+	if (outlen)
+		*outlen = rp - ret;
 
 	return ret;
 error:
 	free(ret);
 	return NULL;
 }
+
+struct codepage_def ciolib_cp[CIOLIB_CP_COUNT] = {
+	{"CP437", CIOLIB_CP437, cpstr_to_utf8, utf8_to_cpstr, cptable_from_unicode_cpoint, cptable_from_unicode_cpoint_ext, 
+		cp437_table, sizeof(cp437_table) / sizeof(cp437_table[0]),
+		cp437_unicode_table, cp437_ext_table},
+};
+
+uint8_t *cp_to_utf8(enum ciolib_codepage cp, const char *cpstr, size_t buflen, size_t *outlen)
+{
+	if (cp < 0 || cp >= CIOLIB_CP_COUNT)
+		return NULL;
+
+	return ciolib_cp[cp].to_utf8(cpstr, buflen, outlen, &ciolib_cp[cp]);
+}
+
+char *utf8_to_cp(enum ciolib_codepage cp, const uint8_t *utf8str, char unmapped, size_t buflen, size_t *outlen)
+{
+	if (cp < 0 || cp >= CIOLIB_CP_COUNT)
+		return NULL;
+
+	return ciolib_cp[cp].utf8_to(utf8str, unmapped, buflen, outlen, &ciolib_cp[cp]);
+}
+
+uint8_t cp_from_unicode_cp(enum ciolib_codepage cp, uint32_t cpoint, char unmapped)
+{
+	if (cp < 0 || cp >= CIOLIB_CP_COUNT)
+		return unmapped;
+
+	return ciolib_cp[cp].from_unicode_cpoint(cpoint, unmapped, &ciolib_cp[cp]);
+}
+
+uint8_t cp_from_unicode_cp_ext(enum ciolib_codepage cp, uint32_t cpoint, char unmapped)
+{
+	if (cp < 0 || cp >= CIOLIB_CP_COUNT)
+		return unmapped;
+
+	return ciolib_cp[cp].from_unicode_cpoint_ext(cpoint, unmapped, &ciolib_cp[cp]);
+}
diff --git a/src/conio/utf8_codepages.h b/src/conio/utf8_codepages.h
index 381b3200da..26bf50232d 100644
--- a/src/conio/utf8_codepages.h
+++ b/src/conio/utf8_codepages.h
@@ -1,19 +1,14 @@
 #ifndef UTF8_CODEPAGES_H
 #define UTF8_CODEPAGES_H
 
-struct cp437map {
-	uint32_t	unicode;
-	uint8_t		cp437;
+enum ciolib_codepage {
+	CIOLIB_CP437,
+	CIOLIB_CP_COUNT
 };
 
-// Sorted by unicode codepoint...
-extern struct cp437map cp437_table[160];
-extern uint32_t cp437_unicode_table[128];
-extern uint32_t cp437_ext_table[32];
-
-uint8_t *cp437_to_utf8(const char *cp437str, size_t buflen, size_t *outlen);
-char *utf8_to_cp437(const uint8_t *utf8str, char unmapped);
-uint8_t cp437_from_unicode_cp(uint32_t cp, char unmapped);
-uint8_t cp437_from_unicode_cp_ext(uint32_t cp, char unmapped);
+uint8_t *cp_to_utf8(enum ciolib_codepage cp, const char *cpstr, size_t buflen, size_t *outlen);
+char *utf8_to_cp(enum ciolib_codepage cp, const uint8_t *utf8str, char unmapped, size_t buflen, size_t *outlen);
+uint8_t cp_from_unicode_cp(enum ciolib_codepage cp, uint32_t cpoint, char unmapped);
+uint8_t cp_from_unicode_cp_ext(enum ciolib_codepage cp, uint32_t cpoint, char unmapped);
 
 #endif
diff --git a/src/conio/x_events.c b/src/conio/x_events.c
index 7733e73382..9ffc0f2047 100644
--- a/src/conio/x_events.c
+++ b/src/conio/x_events.c
@@ -665,7 +665,7 @@ static int x11_event(XEvent *ev)
 						x11.XGetWindowProperty(dpy, win, ev->xselection.property, 0, bytes_left, True, AnyPropertyType, &pastebuf_format, &format, &len, &dummy, (unsigned char **)&pastebuf);
 						if (x11.utf8 && pastebuf_format == x11.utf8) {
 							char *opb = pastebuf;
-							pastebuf = (char *)utf8_to_cp437((uint8_t *)pastebuf, '?');
+							pastebuf = (char *)utf8_to_cp(CIOLIB_CP437, (uint8_t *)pastebuf, '?', strlen(pastebuf), NULL);
 							if (pastebuf == NULL)
 								pastebuf = opb;
 							else
@@ -706,7 +706,7 @@ static int x11_event(XEvent *ev)
 						respond.xselection.property=req->property;
 					}
 					else if(req->target == x11.utf8) {
-						uint8_t *utf8_str = cp437_to_utf8(copybuf, strlen(copybuf), NULL);
+						uint8_t *utf8_str = cp_to_utf8(CIOLIB_CP437, copybuf, strlen(copybuf), NULL);
 						if (utf8_str != NULL) {
 							x11.XChangeProperty(dpy, req->requestor, req->property, x11.utf8, 8, PropModeReplace, utf8_str, strlen((char *)utf8_str));
 							respond.xselection.property=req->property;
-- 
GitLab