From 1c9816d3b6653ea915810e883da9dd3e5e2e77b8 Mon Sep 17 00:00:00 2001
From: "Rob Swindell (on Windows 11)" <rob@synchro.net>
Date: Thu, 15 Feb 2024 21:24:53 -0800
Subject: [PATCH] Create utf8_strlcpy() which does a "safe" truncated-string
 copy

(doesn't leave a partial UTF-8 sequence at the end of the destination string).

This calls memcpy() for every char, which may not be performant, but I didn't
(yet) do any profiling. Hence, no premature optimization either.
---
 src/encode/utf8.c | 22 ++++++++++++++++++++++
 src/encode/utf8.h |  3 +++
 2 files changed, 25 insertions(+)

diff --git a/src/encode/utf8.c b/src/encode/utf8.c
index 56c5bd710f..20bae23fdd 100644
--- a/src/encode/utf8.c
+++ b/src/encode/utf8.c
@@ -224,6 +224,28 @@ size_t utf8_str_count_width(const char* str, size_t min_width, size_t max_width)
 	return count;
 }
 
+// Like strlcpy(), but doesn't leave a partial UTF-8 sequence at the end of dst
+size_t utf8_strlcpy(char* dst, const char* src, size_t size)
+{
+	size_t i;
+	int len;
+
+	if (size < 1)
+		return 0;
+
+	for (i = 0; src[i] != '\0'; i += len) {
+		len = utf8_decode_firstbyte(src[i]);
+		if (len < 1)
+			break;
+		if(i + len  < size) {
+			memcpy(dst, src + i, len);
+			dst += len;
+		}
+	}
+	*dst = '\0';
+	return i;
+}
+
 int cp437_to_utf8_str(const char* str, char* dest, size_t maxlen, unsigned char minval)
 {
 	int retval = 0;
diff --git a/src/encode/utf8.h b/src/encode/utf8.h
index 99e8e50c62..1bfd4fe645 100644
--- a/src/encode/utf8.h
+++ b/src/encode/utf8.h
@@ -44,6 +44,9 @@ size_t utf8_str_total_width(const char*);
 // Return the count of chars within the specified width range in UTF-8 string (str)
 size_t utf8_str_count_width(const char*, size_t min_width, size_t max_width);
 
+// Like strlcpy(), but doesn't leave a partial UTF-8 sequence at the end of dst
+size_t utf8_strlcpy(char* dst, const char* src, size_t size);
+
 // Normalizes (to ASCII) chars in UTF-8 string 'str', in-place, resulting in string <= original in length
 char* utf8_normalize_str(char* str);
 
-- 
GitLab