From 7c9fbc7e77eb80e60f5094b38b7c2a3cedf5006d Mon Sep 17 00:00:00 2001
From: rswindell <>
Date: Wed, 10 Jul 2019 00:02:40 +0000
Subject: [PATCH] enum-ification (use enum unicode_codepoint instead of
 uint32_t). Replaced unicode_is_zerowidth() with unicode_width(), in
 preparation for "fullwidth" char support. Added UNICODE_UNDEFINED definition
 (0x0000) (UNICODE_NULL is already defined, at least in MSVC).

---
 src/encode/utf8.c        | 12 ++++++------
 src/encode/utf8.h        |  8 ++++----
 src/xpdev/unicode.c      | 20 +++++++++++++++-----
 src/xpdev/unicode.h      | 10 +++++-----
 src/xpdev/unicode_defs.h | 37 ++++++++++++++++++++++++-------------
 5 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/src/encode/utf8.c b/src/encode/utf8.c
index b8afb75edc..c7c427e072 100644
--- a/src/encode/utf8.c
+++ b/src/encode/utf8.c
@@ -141,7 +141,7 @@ char* utf8_normalize_str(char* str)
 
 /* Replace all multi-byte UTF-8 sequences with 'ch' or 'zwch' (when non-zero) */
 /* When ch and zwch are 0, effectively strips all UTF-8 chars from str */
-char* utf8_replace_chars(char* str, char (*lookup)(uint32_t), char unsupported_ch, char unsupported_zwch, char error_ch)
+char* utf8_replace_chars(char* str, char (*lookup)(enum unicode_codepoint), char unsupported_ch, char unsupported_zwch, char error_ch)
 {
 	char* end = str + strlen(str);
 	char* dest = str;
@@ -153,7 +153,7 @@ char* utf8_replace_chars(char* str, char (*lookup)(uint32_t), char unsupported_c
 			len = 1;
 			continue;
 		}
-		uint32_t codepoint = 0;
+		enum unicode_codepoint codepoint = 0;
 		len = utf8_getc(src, end - src, &codepoint);
 		if(len < 2) {
 			if(error_ch)
@@ -168,7 +168,7 @@ char* utf8_replace_chars(char* str, char (*lookup)(uint32_t), char unsupported_c
 				continue;
 			}
 		}
-		if(unicode_is_zerowidth(codepoint)) {
+		if(unicode_width(codepoint) == 0) {
 			if(unsupported_zwch)
 				*dest++ = unsupported_zwch;
 		} 
@@ -200,7 +200,7 @@ int cp437_to_utf8_str(const char* str, char* dest, size_t maxlen, unsigned char
 			retval = -1;
 			break;
 		}
-		uint32_t codepoint = 0;
+		enum unicode_codepoint codepoint = 0;
 		if(*p >= minval)
 			codepoint = cp437_unicode_tbl[*p];
 		if(codepoint) {
@@ -241,7 +241,7 @@ int cp437_to_utf8_str(const char* str, char* dest, size_t maxlen, unsigned char
  * -4 = character encoded incorrectly (not minimal length).
  */
 
-int utf8_getc(const char *str, size_t len, uint32_t* val)
+int utf8_getc(const char *str, size_t len, enum unicode_codepoint* val)
 {
     const unsigned char *p;
     unsigned long value;
@@ -339,7 +339,7 @@ int utf8_getc(const char *str, size_t len, uint32_t* val)
  * most 6 characters.
  */
 
-int utf8_putc(char *str, size_t len, uint32_t value)
+int utf8_putc(char *str, size_t len, enum unicode_codepoint value)
 {
     if (!str)
         len = 6;                /* Maximum we will need */
diff --git a/src/encode/utf8.h b/src/encode/utf8.h
index 2d3293cb17..e0a3cb8d04 100644
--- a/src/encode/utf8.h
+++ b/src/encode/utf8.h
@@ -36,9 +36,9 @@
 #ifndef UTF8_H_
 #define UTF8_H_
 
-#include <stdint.h>
 #include <stdbool.h>
 #include <stdlib.h>
+#include "unicode_defs.h"
 
 #define UTF8_MAX_LEN 6	// Longest possible UTF-8 sequence
 
@@ -57,17 +57,17 @@ char* utf8_normalize_str(char* str);
 // 'unsupported_ch' is the character used to replace unsupported Unicode codepoints (optional)
 // 'unsupported_zwch' is the character used to replace unsupported zero-width Unicode codepoints (optional)
 // 'error_ch' is the character used to replace invalid UTF-8 sequence bytes (optional)
-char* utf8_replace_chars(char* str, char (*lookup)(uint32_t), char unsupported_ch, char unsupported_zwch, char error_ch);
+char* utf8_replace_chars(char* str, char (*lookup)(enum unicode_codepoint), char unsupported_ch, char unsupported_zwch, char error_ch);
 
 // Convert a CP437 char string (src) to UTF-8 string (dest) up to 'maxlen' chars long (sans NUL-terminator)
 // 'minval' can be used to limit the range of converted chars
 int cp437_to_utf8_str(const char* src, char* dest, size_t maxlen, unsigned char minval);
 
 // Decode a UTF-8 sequence to a UNICODE code point
-int utf8_getc(const char* str, size_t len, uint32_t* codepoint);
+int utf8_getc(const char* str, size_t len, enum unicode_codepoint* codepoint);
 
 // Encode a UNICODE code point into a UTF-8 sequence (str)
-int utf8_putc(char* str, size_t len, uint32_t codepoint);
+int utf8_putc(char* str, size_t len, enum unicode_codepoint codepoint);
 
 #if defined(__cplusplus)
 }
diff --git a/src/xpdev/unicode.c b/src/xpdev/unicode.c
index 355132b18f..8fea13355f 100644
--- a/src/xpdev/unicode.c
+++ b/src/xpdev/unicode.c
@@ -47,7 +47,7 @@
 // CP437 character to/from UNICODE code point conversion
 // The CP437 character value is the index into the table.
 // If the value at that index is 0, no translation is needed (1:1 mapping).
-uint32_t cp437_unicode_tbl[] =
+enum unicode_codepoint cp437_unicode_tbl[] =
 {
 	/* 0x00 */ 0,
 	/* 0x01 */ 0x263A,
@@ -308,7 +308,7 @@ uint32_t cp437_unicode_tbl[] =
 	/* 0xFF */ 0x00A0
 };
 
-bool unicode_is_zerowidth(uint32_t u)
+size_t unicode_width(enum unicode_codepoint u)
 {
 	switch(u) {
 		case UNICODE_ZERO_WIDTH_SPACE:
@@ -331,12 +331,13 @@ bool unicode_is_zerowidth(uint32_t u)
 		case UNICODE_VARIATION_SELECTOR_15:
 		case UNICODE_VARIATION_SELECTOR_16:
 		case UNICODE_ZERO_WIDTH_NO_BREAK_SPACE:
-			return true;
+			return 0;
+		/* TODO: return 2 for "fullwdith" chars */
 	}
-	return false;
+	return 1;
 }
 
-char unicode_to_cp437(uint32_t codepoint)
+char unicode_to_cp437(enum unicode_codepoint codepoint)
 {
 	switch(codepoint) {
 		case 0:													return '\0';
@@ -444,6 +445,15 @@ char unicode_to_cp437(uint32_t codepoint)
 		case UNICODE_EM_SPACE:
 			return ' ';
 
+		case UNICODE_SQUARE_ROOT:								return CP437_CHAR_SQUARE_ROOT;
+		case UNICODE_CHECK_MARK:
+		case UNICODE_HEAVY_CHECK_MARK:							return CP437_CHAR_CHECK_MARK;
+
+		case UNICODE_MULTIPLICATION_X:
+		case UNICODE_HEAVY_MULTIPLICATION_X:
+		case UNICODE_BALLOT_X:
+		case UNICODE_HEAVY_BALLOT_X:							return 'x';
+
 		case UNICODE_OVERLINE:
 		case 0x2500: // Box Drawings Light Horizontal
 		case 0x2501: // Box Drawings Heavy Horizontal
diff --git a/src/xpdev/unicode.h b/src/xpdev/unicode.h
index 6dda706351..2b534a79d4 100644
--- a/src/xpdev/unicode.h
+++ b/src/xpdev/unicode.h
@@ -36,16 +36,16 @@
 #ifndef UNICODE_H_
 #define UNICODE_H_
 
-#include <stdint.h>
-#include <stdbool.h>
+#include <stdlib.h>
+#include "unicode_defs.h"
 
 #if defined(__cplusplus)
 extern "C" {
 #endif
 
-extern uint32_t cp437_unicode_tbl[];
-bool unicode_is_zerowidth(uint32_t);
-char unicode_to_cp437(uint32_t);
+extern enum unicode_codepoint cp437_unicode_tbl[];
+size_t unicode_width(enum unicode_codepoint);
+char unicode_to_cp437(enum unicode_codepoint);
 
 #if defined(__cplusplus)
 }
diff --git a/src/xpdev/unicode_defs.h b/src/xpdev/unicode_defs.h
index 97bf03aca6..555bdab7fd 100644
--- a/src/xpdev/unicode_defs.h
+++ b/src/xpdev/unicode_defs.h
@@ -37,8 +37,9 @@
 #define UNICODE_DEFS_H_
 
 enum unicode_codepoint {
-	UNICODE_NO_BREAK_SPACE = 0x00A0,
+	UNICODE_UNDEFINED = 0x0000,	// UNICODE_NULL() is defined
 
+	UNICODE_NO_BREAK_SPACE = 0x00A0,
 	UNICODE_INVERTED_EXCLAMATION_MARK = 0x00A1,
 	UNICODE_CENT_SIGN = 0x00A2,
 	UNICODE_POUND_SIGN = 0x00A3,
@@ -121,18 +122,6 @@ enum unicode_codepoint {
 	UNICODE_LATIN_SMALL_LETTER_Y_WITH_DIAERESIS = 0x00FF,
 	UNICODE_LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS = 0x0178,
 
-	UNICODE_EN_QUAD = 0x2000,
-	UNICODE_EM_QUAD = 0x2001,
-	UNICODE_EN_SPACE = 0x2002,
-	UNICODE_EM_SPACE = 0x2003,
-	UNICODE_ZERO_WIDTH_SPACE = 0x200B,
-	UNICODE_ZERO_WIDTH_NON_JOINER = 0x200C,
-	UNICODE_ZERO_WIDTH_JOINER = 0x200D,
-	UNICODE_EM_DASH = 0x2014,
-	UNICODE_BULLET = 0x2022,
-	UNICODE_DOUBLE_EXCLAMATION_MARK = 0x203c,
-	UNICODE_OVERLINE = 0x203E,
-
 	UNICODE_GREEK_CAPITAL_LETTER_HETA = 0x0370,
 	UNICODE_GREEK_SMALL_LETTER_HETA = 0x0371,
 	UNICODE_GREEK_CAPITAL_LETTER_ARCHAIC_SAMPI = 0x0372,
@@ -239,8 +228,30 @@ enum unicode_codepoint {
 	UNICODE_GREEK_LETTER_SAMPI = 0x03E0,
 	UNICODE_GREEK_SMALL_LETTER_SAMPI = 0x03E1,
 
+	UNICODE_EN_QUAD = 0x2000,
+	UNICODE_EM_QUAD = 0x2001,
+	UNICODE_EN_SPACE = 0x2002,
+	UNICODE_EM_SPACE = 0x2003,
+	UNICODE_ZERO_WIDTH_SPACE = 0x200B,
+	UNICODE_ZERO_WIDTH_NON_JOINER = 0x200C,
+	UNICODE_ZERO_WIDTH_JOINER = 0x200D,
+	UNICODE_EM_DASH = 0x2014,
+	UNICODE_BULLET = 0x2022,
+	UNICODE_DOUBLE_EXCLAMATION_MARK = 0x203c,
+	UNICODE_OVERLINE = 0x203E,
+
+	UNICODE_SQUARE_ROOT = 0x221A,
+
 	UNICODE_BLACK_SQUARE = 0x25A0,
 
+	UNICODE_CHECK_MARK = 0x2713,
+	UNICODE_HEAVY_CHECK_MARK = 0x2714,
+
+	UNICODE_MULTIPLICATION_X = 0x2715,
+	UNICODE_HEAVY_MULTIPLICATION_X = 0x2716,
+	UNICODE_BALLOT_X = 0x2717,
+	UNICODE_HEAVY_BALLOT_X = 0x2718,
+
 	UNICODE_VARIATION_SELECTOR_1 = 0xFE00,
 	UNICODE_VARIATION_SELECTOR_2 = 0xFE01,
 	UNICODE_VARIATION_SELECTOR_3 = 0xFE02,
-- 
GitLab