Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Main
Synchronet
Commits
1d23efcd
Commit
1d23efcd
authored
Jul 08, 2019
by
rswindell
Browse files
Add function cp437_to_utf8_str().
parent
b4bfa4fe
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
41 additions
and
2 deletions
+41
-2
src/encode/utf8.c
src/encode/utf8.c
+28
-0
src/encode/utf8.h
src/encode/utf8.h
+13
-2
No files found.
src/encode/utf8.c
View file @
1d23efcd
...
...
@@ -34,6 +34,7 @@
****************************************************************************/
#include "utf8.h"
#include "unicode.h"
#include <stdbool.h>
#include <string.h>
...
...
@@ -218,6 +219,33 @@ bool utf8_str_is_valid(const char* str)
return
true
;
}
int
cp437_to_utf8_str
(
const
char
*
str
,
char
*
dest
,
size_t
maxlen
,
unsigned
char
minval
)
{
int
retval
=
0
;
size_t
outlen
=
0
;
for
(
const
unsigned
char
*
p
=
str
;
*
p
!=
0
;
p
++
)
{
if
(
outlen
>=
maxlen
)
{
retval
=
-
1
;
break
;
}
uint32_t
codepoint
=
0
;
if
(
*
p
>=
minval
)
codepoint
=
cp437_unicode_tbl
[
*
p
];
if
(
codepoint
)
{
retval
=
utf8_putc
(
dest
+
outlen
,
maxlen
-
outlen
,
codepoint
);
if
(
retval
<
1
)
break
;
outlen
+=
retval
;
}
else
{
*
(
dest
+
outlen
)
=
*
p
;
outlen
++
;
}
}
*
(
dest
+
outlen
)
=
0
;
return
retval
;
}
// From openssl/crypto/asn1/a_utf8.c:
/*
* Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
...
...
src/encode/utf8.h
View file @
1d23efcd
...
...
@@ -48,13 +48,24 @@ extern "C" {
// Returns true if the string is valid UTF-8
bool
utf8_str_is_valid
(
const
char
*
);
// Normalizes (to ASCII) chars in UTF-8 string 'str', in-place, resulting in string <= original in length
char
*
utf8_normalize_str
(
char
*
str
);
// Replace or strip UTF-8 sequences in str
// If table ('tbl') of unicode codepoints if non-NULL is an array of 256 codepoints to map to 8-bit chars
// Replace or strip UTF-8 sequences in str (in-place)
// 'lookup' is a Unicode codepoint look-up function (optional)
// 'unsupported_ch' is the character used to replace unsupported Unicode codepoints (optional)
// 'unsupported_zwch' is the character used to replace unsupported zero-width Unicode codepoints (optional)
// 'error_ch' is the character used to replace invalid UTF-8 sequence bytes (optional)
char
*
utf8_replace_chars
(
char
*
str
,
char
(
*
lookup
)(
uint32_t
),
char
unsupported_ch
,
char
unsupported_zwch
,
char
error_ch
);
// Convert a CP437 char string (src) to UTF-8 string (dest) up to 'maxlen' chars long (sans NUL-terminator)
// 'minval' can be used to limit the range of converted chars
int
cp437_to_utf8_str
(
const
char
*
src
,
char
*
dest
,
size_t
maxlen
,
unsigned
char
minval
);
// Decode a UTF-8 sequence to a UNICODE code point
int
utf8_getc
(
const
char
*
str
,
size_t
len
,
uint32_t
*
codepoint
);
// Encode a UNICODE code point into a UTF-8 sequence (str)
int
utf8_putc
(
char
*
str
,
size_t
len
,
uint32_t
codepoint
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment