Skip to content
Snippets Groups Projects
Commit c33f773d authored by Deucе's avatar Deucе :ok_hand_tone4:
Browse files

New functions utf8_to_cp437_str() latin1_to_utf8_str() utf8_to_latin1_str()

parent 07ceba90
No related branches found
No related tags found
No related merge requests found
......@@ -251,6 +251,139 @@ int cp437_to_utf8_str(const char* str, char* dest, size_t maxlen, unsigned char
return retval;
}
int utf8_to_cp437_str(const char *src, char *dest, size_t maxlen, unsigned char minval, size_t *outlen)
{
int retval = 0;
size_t lcl_outlen;
unsigned char ch;
if (outlen == NULL)
outlen = &lcl_outlen;
*outlen = 0;
for(const char* p = src; *p != 0; p += retval) {
if(*outlen >= maxlen) {
retval = -1;
break;
}
enum unicode_codepoint codepoint;
retval = utf8_getc(p, maxlen - *outlen, &codepoint);
if (retval < 1)
break;
ch = unicode_to_cp437(codepoint);
if (ch) {
*(dest + *outlen) = ch;
(*outlen)++;
}
}
*(dest + *outlen) = 0;
return retval;
}
int latin1_to_utf8_str(const char* str, char* dest, size_t maxlen, unsigned char minval, size_t *outlen)
{
int retval = 0;
size_t lcl_outlen;
if (outlen == NULL)
outlen = &lcl_outlen;
*outlen = 0;
for(const unsigned char* p = (const unsigned char *)str; *p != 0; p++) {
if(*outlen >= maxlen) {
retval = -1;
break;
}
enum unicode_codepoint codepoint = 0;
if(*p >= minval)
codepoint = *p;
if(codepoint) {
retval = utf8_putc(dest + *outlen, maxlen - *outlen, codepoint);
if(retval < 1)
break;
*outlen += retval;
} else {
*(dest + *outlen) = *p;
(*outlen)++;
}
}
*(dest + *outlen) = 0;
return retval;
}
int utf8_to_latin1_str(const char *src, char *dest, size_t maxlen, unsigned char minval, size_t *outlen)
{
int retval = 0;
size_t lcl_outlen;
unsigned char ch;
if (outlen == NULL)
outlen = &lcl_outlen;
*outlen = 0;
for(const char* p = src; *p != 0; p += retval) {
if(*outlen >= maxlen) {
retval = -1;
break;
}
enum unicode_codepoint codepoint;
retval = utf8_getc(p, maxlen - *outlen, &codepoint);
if (retval < 1)
break;
ch = unicode_to_latin1(codepoint);
if (ch) {
*(dest + *outlen) = ch;
(*outlen)++;
}
}
*(dest + *outlen) = 0;
return retval;
}
// From openssl/crypto/asn1/a_utf8.c:
/*
* Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/
/* UTF8 utilities */
/*-
* This parses a UTF8 string one codepoint at a time. It is passed a pointer
* to the string and the size of the string (in bytes). It sets 'value' to
* the value of the current codepoint. It returns the number of bytes read
* or a negative error code:
* -1 = string too short
* -2 = illegal character
* -3 = subsequent characters not of the form 10xxxxxx
* -4 = character encoded incorrectly (not minimal length).
*/
int utf8_getc(const char *str, size_t len, enum unicode_codepoint* val)
{
const unsigned char *p;
unsigned long value;
int ret;
if (len <= 0)
return 0;
p = (const unsigned char*)str;
/* Check syntax and work out the encoded value (if correct) */
if ((*p & 0x80) == 0) {
value = *p++ & 0x7f;
ret = 1;
} else if ((*p & 0xe0) == 0xc0) {
if (len < 2)
return -1;
if ((p[1] & 0xc0) != 0x80)
return -3;
value = (*p++ & 0x1f) << 6;
value |= *p++ & 0x3f;
if (value < 0x80)
return -4;
ret = 2;
} else if ((*p & 0xf0) == 0xe0) {
if (len < 3)
return -1;
#define is_unicode_surrogate(value) \
(value >= UNICODE_BLOCK_SURROGATE_BEGIN && value <= UNICODE_BLOCK_SURROGATE_END)
......
......@@ -68,6 +68,13 @@ char* utf8_replace_chars(char* str, char (*lookup)(enum unicode_codepoint), char
// Convert a CP437 char string (src) to UTF-8 string (dest) up to 'maxlen' chars long (sans NUL-terminator)
// 'minval' can be used to limit the range of converted chars
int cp437_to_utf8_str(const char* src, char* dest, size_t maxlen, unsigned char minval);
int utf8_to_cp437_str(const char *src, char *dest, size_t maxlen, unsigned char minval, size_t *outlen);
// Convert a Latin1 char string (src) to UTF-8 string (dest) up to 'maxlen' bytes long (sans NUL-terminator)
// 'minval' can be used to limit the range of converted chars. On return, *outlen is set to the number
// of bytes written to dest unless it is NULL
int latin1_to_utf8_str(const char* str, char* dest, size_t maxlen, unsigned char minval, size_t *outlen);
int utf8_to_latin1_str(const char *src, char *dest, size_t maxlen, unsigned char minval, size_t *outlen);
// Decode a UTF-8 sequence to a UNICODE code point
int utf8_getc(const char* str, size_t len, enum unicode_codepoint* codepoint);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment