From 1d23efcd8480b1021ca00fbb14db41b113a2f2b1 Mon Sep 17 00:00:00 2001
From: rswindell <>
Date: Mon, 8 Jul 2019 04:23:48 +0000
Subject: [PATCH] Add function cp437_to_utf8_str().

---
 src/encode/utf8.c | 28 ++++++++++++++++++++++++++++
 src/encode/utf8.h | 15 +++++++++++++--
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/src/encode/utf8.c b/src/encode/utf8.c
index a210e6d0c9..94daa78948 100644
--- a/src/encode/utf8.c
+++ b/src/encode/utf8.c
@@ -34,6 +34,7 @@
  ****************************************************************************/
 
 #include "utf8.h"
+#include "unicode.h"
 #include <stdbool.h>
 #include <string.h>
 
@@ -218,6 +219,33 @@ bool utf8_str_is_valid(const char* str)
 	return true;
 }
 
+int cp437_to_utf8_str(const char* str, char* dest, size_t maxlen, unsigned char minval)
+{
+	int retval = 0;
+	size_t outlen = 0;
+	for(const unsigned char* p = str; *p != 0; p++) {
+		if(outlen >= maxlen) {
+			retval = -1;
+			break;
+		}
+		uint32_t codepoint = 0;
+		if(*p >= minval)
+			codepoint = cp437_unicode_tbl[*p];
+		if(codepoint) {
+			retval = utf8_putc(dest + outlen, maxlen - outlen, codepoint);
+			if(retval < 1)
+				break;
+			outlen += retval;
+		} else {
+			*(dest + outlen) = *p;
+			outlen++;
+		}
+	}
+	*(dest + outlen) = 0;
+	return retval;
+}
+
+
 // From openssl/crypto/asn1/a_utf8.c:
 /*
  * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
diff --git a/src/encode/utf8.h b/src/encode/utf8.h
index c959b4457e..2d3293cb17 100644
--- a/src/encode/utf8.h
+++ b/src/encode/utf8.h
@@ -48,13 +48,24 @@ extern "C" {
 
 // Returns true if the string is valid UTF-8
 bool utf8_str_is_valid(const char*);
+
 // Normalizes (to ASCII) chars in UTF-8 string 'str', in-place, resulting in string <= original in length
 char* utf8_normalize_str(char* str);
-// Replace or strip UTF-8 sequences in str
-// If table ('tbl') of unicode codepoints if non-NULL is an array of 256 codepoints to map to 8-bit chars
+
+// Replace or strip UTF-8 sequences in str (in-place)
+// 'lookup' is a Unicode codepoint look-up function (optional)
+// 'unsupported_ch' is the character used to replace unsupported Unicode codepoints (optional)
+// 'unsupported_zwch' is the character used to replace unsupported zero-width Unicode codepoints (optional)
+// 'error_ch' is the character used to replace invalid UTF-8 sequence bytes (optional)
 char* utf8_replace_chars(char* str, char (*lookup)(uint32_t), char unsupported_ch, char unsupported_zwch, char error_ch);
+
+// Convert a CP437 char string (src) to UTF-8 string (dest) up to 'maxlen' chars long (sans NUL-terminator)
+// 'minval' can be used to limit the range of converted chars
+int cp437_to_utf8_str(const char* src, char* dest, size_t maxlen, unsigned char minval);
+
 // Decode a UTF-8 sequence to a UNICODE code point
 int utf8_getc(const char* str, size_t len, uint32_t* codepoint);
+
 // Encode a UNICODE code point into a UTF-8 sequence (str)
 int utf8_putc(char* str, size_t len, uint32_t codepoint);
 
-- 
GitLab