diff --git a/src/encode/utf8.c b/src/encode/utf8.c
index 16cc15c39863320d83b469ea7fd5844deb2b104f..56c5bd710f29cbc599c1f4e90e9c3fcf8576143a 100644
--- a/src/encode/utf8.c
+++ b/src/encode/utf8.c
@@ -23,6 +23,20 @@
 #include "unicode.h"
 #include <string.h>
 
+int utf8_decode_firstbyte(char ch)
+{
+	/* Check syntax and work out the encoded value (if correct) */
+	if ((ch & 0x80) == 0)
+		return 1;
+	if ((ch & 0xe0) == 0xc0)
+		return 2;
+	if ((ch & 0xf0) == 0xe0)
+		return 3;
+	if ((ch & 0xf8) == 0xf0)
+		return 4;
+	return 0; // error
+}
+
 char* utf8_normalize_str(char* str)
 {
 	char* dest = str;
diff --git a/src/encode/utf8.h b/src/encode/utf8.h
index a16d9e3ffe910017ecb99c4ae55abb60d98d0efe..99e8e50c62218e6c8c14da6856d45e90e24feb11 100644
--- a/src/encode/utf8.h
+++ b/src/encode/utf8.h
@@ -32,6 +32,9 @@
 extern "C" {
 #endif
 
+// Decode a UTF-8 first byte, returns length of character sequence (1-4) or 0 on error
+int utf8_decode_firstbyte(char ch);
+
 // Returns true if the string is valid UTF-8
 bool utf8_str_is_valid(const char*);