From 1c9816d3b6653ea915810e883da9dd3e5e2e77b8 Mon Sep 17 00:00:00 2001 From: "Rob Swindell (on Windows 11)" <rob@synchro.net> Date: Thu, 15 Feb 2024 21:24:53 -0800 Subject: [PATCH] Create utf8_strlcpy() which does a "safe" truncated-string copy (doesn't leave a partial UTF-8 sequence at the end of the destination string). This calls memcpy() for every char, which may not be performant, but I didn't (yet) do any profiling. Hence, no premature optimization either. --- src/encode/utf8.c | 22 ++++++++++++++++++++++ src/encode/utf8.h | 3 +++ 2 files changed, 25 insertions(+) diff --git a/src/encode/utf8.c b/src/encode/utf8.c index 56c5bd710f..20bae23fdd 100644 --- a/src/encode/utf8.c +++ b/src/encode/utf8.c @@ -224,6 +224,28 @@ size_t utf8_str_count_width(const char* str, size_t min_width, size_t max_width) return count; } +// Like strlcpy(), but doesn't leave a partial UTF-8 sequence at the end of dst +size_t utf8_strlcpy(char* dst, const char* src, size_t size) +{ + size_t i; + int len; + + if (size < 1) + return 0; + + for (i = 0; src[i] != '\0'; i += len) { + len = utf8_decode_firstbyte(src[i]); + if (len < 1) + break; + if(i + len < size) { + memcpy(dst, src + i, len); + dst += len; + } + } + *dst = '\0'; + return i; +} + int cp437_to_utf8_str(const char* str, char* dest, size_t maxlen, unsigned char minval) { int retval = 0; diff --git a/src/encode/utf8.h b/src/encode/utf8.h index 99e8e50c62..1bfd4fe645 100644 --- a/src/encode/utf8.h +++ b/src/encode/utf8.h @@ -44,6 +44,9 @@ size_t utf8_str_total_width(const char*); // Return the count of chars within the specified width range in UTF-8 string (str) size_t utf8_str_count_width(const char*, size_t min_width, size_t max_width); +// Like strlcpy(), but doesn't leave a partial UTF-8 sequence at the end of dst +size_t utf8_strlcpy(char* dst, const char* src, size_t size); + // Normalizes (to ASCII) chars in UTF-8 string 'str', in-place, resulting in string <= original in length char* utf8_normalize_str(char* str); -- GitLab