Skip to content
Snippets Groups Projects
Commit 5e4964c4 authored by Rob Swindell's avatar Rob Swindell :speech_balloon:
Browse files

Add utf8_decode_firstbyte()

for use when all you need is the length (and validity) of the first byte in
a UTF-8 sequence.
parent 4f291f94
No related branches found
No related tags found
No related merge requests found
......@@ -23,6 +23,20 @@
#include "unicode.h"
#include <string.h>
int utf8_decode_firstbyte(char ch)
{
/* Check syntax and work out the encoded value (if correct) */
if ((ch & 0x80) == 0)
return 1;
if ((ch & 0xe0) == 0xc0)
return 2;
if ((ch & 0xf0) == 0xe0)
return 3;
if ((ch & 0xf8) == 0xf0)
return 4;
return 0; // error
}
char* utf8_normalize_str(char* str)
{
char* dest = str;
......
......@@ -32,6 +32,9 @@
extern "C" {
#endif
// Decode a UTF-8 first byte, returns length of character sequence (1-4) or 0 on error
int utf8_decode_firstbyte(char ch);
// Returns true if the string is valid UTF-8
bool utf8_str_is_valid(const char*);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment