Skip to content
Snippets Groups Projects
Commit a05acc7b authored by Rob Swindell's avatar Rob Swindell :speech_balloon:
Browse files

Better unicode character display-width calculation (in columns)

Tested with Windows Terminal Version: 1.18.10301.0 and all codepoints between
0x100 and 0x1FFFF, this change reduces the width mismatches from > 8000 to
349. The "Zero width" mismatches are probably the most important/glaring and
will need to be addressed next (since some terminals don't move the cursor
for zero width chars, and some do).
parent 0e14245f
No related branches found
No related tags found
1 merge request!455Update branch with changes from master
......@@ -319,15 +319,68 @@ size_t unicode_width(enum unicode_codepoint u)
case UNICODE_VARIATION_SELECTOR_16:
case UNICODE_ZERO_WIDTH_NO_BREAK_SPACE:
return 0;
// Exceptions to the ranges (blocks/sub-blocks) in the default case
case UNICODE_CIRCLED_NUMBER_TEN_ON_BLACK_SQUARE:
case UNICODE_CIRCLED_NUMBER_TWENTY_ON_BLACK_SQUARE:
case UNICODE_CIRCLED_NUMBER_THIRTY_ON_BLACK_SQUARE:
case UNICODE_CIRCLED_NUMBER_FORTY_ON_BLACK_SQUARE:
case UNICODE_CIRCLED_NUMBER_FIFTY_ON_BLACK_SQUARE:
case UNICODE_CIRCLED_NUMBER_SIXTY_ON_BLACK_SQUARE:
case UNICODE_CIRCLED_NUMBER_SEVENTY_ON_BLACK_SQUARE:
case UNICODE_CIRCLED_NUMBER_EIGHTY_ON_BLACK_SQUARE:
case UNICODE_HOT_PEPPER:
case UNICODE_FORK_AND_KNIFE_WITH_PLATE:
return 1;
case UNICODE_MAHJONG_TILE_RED_DRAGON:
case UNICODE_PLAYING_CARD_BLACK_JOKER:
case UNICODE_NEGATIVE_SQUARED_AB:
case UNICODE_HEAVY_EQUAL_SIGN:
return 2;
// Ranges
default:
if( (u >= UNICODE_BLOCK_CJK_RADICALS_SUPPLEMENT_BEGIN && u <= UNICIDE_BLOCK_YI_RADICALS_END)
if( (u >= UNICODE_SUBBLOCK_YIJING_HEXAGRAM_BEGIN && u <= UNICODE_SUBBLOCK_YIJING_HEXAGRAM_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS1_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS1_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS2_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS2_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS3_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS3_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS4_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS4_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS5_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS5_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS6_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS6_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS7_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS7_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS8_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS8_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS9_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS9_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS10_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS10_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS11_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS11_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS12_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS12_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS13_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS13_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS14_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS14_END)
|| (u >= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS15_BEGIN && u <= UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS15_END)
)
return 1;
if( (u >= UNICODE_SUBBLOCK_FULLWIDTH_MISC_SYMBOLS_BEGIN && u <= UNICODE_SUBBLOCK_FULLWIDTH_MISC_SYMBOLS_END)
|| (u >= UNICODE_SUBBLOCK_FULLWIDTH_HANGUL_BEGIN && u <= UNICODE_SUBBLOCK_FULLWIDTH_HANGUL_END)
|| (u >= UNICODE_SUBBLOCK_FULLWIDTH_CHARS_BEGIN && u <= UNICODE_SUBBLOCK_FULLWIDTH_CHARS_END)
|| (u >= UNICODE_SUBBLOCK_FULLWIDTH_SYMBOLS_BEGIN && u <= UNICODE_SUBBLOCK_FULLWIDTH_SYMBOLS_END)
|| (u >= UNICODE_SUBBLOCK_FULLWIDTH_KHITAN_BEGIN && u <= UNICODE_SUBBLOCK_FULLWIDTH_KHITAN_END)
|| (u >= UNICODE_SUBBLOCK_FULLWIDTH_ENCLOSED_ALPHANUM_BEGIN && u <= UNICODE_SUBBLOCK_FULLWIDTH_ENCLOSED_ALPHANUM_END)
|| (u >= UNICODE_SUBBLOCK_FULLWIDTH_REG_INDICATOR_SYM_BEGIN && u <= UNICODE_SUBBLOCK_FULLWIDTH_REG_INDICATOR_SYM_END)
|| (u >= UNICODE_SUBBLOCK_FULLWIDTH_ENC_IDEOG_SUPP_BEGIN && u <= UNICODE_SUBBLOCK_FULLWIDTH_ENC_IDEOG_SUPP_END)
|| (u >= UNICODE_SUBBLOCK_FULLWIDTH_GEOM_SHAPES_EXT_BEGIN && u <= UNICODE_SUBBLOCK_FULLWIDTH_GEOM_SHAPES_EXT_END)
|| (u >= UNICODE_BLOCK_CJK_RADICALS_SUPPLEMENT_BEGIN && u <= UNICIDE_BLOCK_YI_RADICALS_END)
|| (u >= UNICIDE_BLOCK_HANGUL_JAMO_EXTENDED_A_BEGIN && u <= UNICIDE_BLOCK_HANGUL_JAMO_EXTENDED_A_END)
|| (u >= UNICIDE_BLOCK_HANGUL_SYLLABLES_BEGIN && u <= UNICIDE_BLOCK_HANGUL_SYLLABLES_END)
|| (u >= UNICODE_BLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_BEGIN && u <= UNICODE_BLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_END)
|| (u >= UNICODE_BLOCK_VERTICAL_FORMS_BEGIN && u <= UNICODE_BLOCK_VERTICAL_FORMS_END)
|| (u >= UNICODE_BLOCK_CJK_COMPATIBILITY_FORMS_BEGIN && u <= UNICODE_BLOCK_CJK_COMPATIBILITY_FORMS_END)
|| (u >= UNICODE_BLOCK_SMALL_FORM_VARIANTS_BEGIN && u <= UNICODE_BLOCK_SMALL_FORM_VARIANTS_END)
|| (u >= UNICODE_SUBBLOCK_FULLWIDTH_CHARS_BEGIN && u <= UNICODE_SUBBLOCK_FULLWIDTH_CHARS_END)
|| (u >= UNICODE_SUBBLOCK_FULLWIDTH_SYMBOLS_BEGIN && u <= UNICODE_SUBBLOCK_FULLWIDTH_SYMBOLS_END)
|| (u >= UNICODE_BLOCK_TANGUT_BEGIN && u <= UNICODE_BLOCK_TANGUT_END)
|| (u >= UNICODE_BLOCK_TANGUT_COMPONENTS_BEGIN && u <= UNICODE_BLOCK_TANGUT_COMPONENTS_END)
|| (u >= UNICODE_BLOCK_KANA_SUPPLEMENT_BEGIN && u <= UNICODE_BLOCK_KANA_SUPPLEMENT_END)
|| (u >= UNICODE_BLOCK_KANA_EXTENDED_A_BEGIN && u <= UNICODE_BLOCK_KANA_EXTENDED_A_END)
|| (u >= UNICODE_BLOCK_NUSHU_BEGIN && u <= UNICODE_BLOCK_NUSHU_END)
|| (u >= UNICODE_BLOCK_EMOTICONS_BEGIN && u <= UNICODE_BLOCK_EMOTICONS_END)
|| (u >= UNICODE_BLOCK_MISC_SYMBOLS_AND_PICTOGRAPHS_BEGIN && u <= UNICODE_BLOCK_MISC_SYMBOLS_AND_PICTOGRAPHS_END)
|| (u >= UNICODE_BLOCK_SUPP_SYMBOLS_AND_PICTOGRAPHS_BEGIN && u <= UNICODE_BLOCK_SUPP_SYMBOLS_AND_PICTOGRAPHS_END)
|| (u >= UNICODE_BLOCK_EXTA_SYMBOLS_AND_PICTOGRAPHS_BEGIN && u <= UNICODE_BLOCK_EXTA_SYMBOLS_AND_PICTOGRAPHS_END)
)
return 2;
return 1;
......
......@@ -625,6 +625,15 @@ enum unicode_codepoint {
UNICODE_KATAKANA_LETTER_TU = 0x30C4,
UNICODE_CIRCLED_NUMBER_TEN_ON_BLACK_SQUARE = 0x3248,
UNICODE_CIRCLED_NUMBER_TWENTY_ON_BLACK_SQUARE = 0x3249,
UNICODE_CIRCLED_NUMBER_THIRTY_ON_BLACK_SQUARE = 0x324A,
UNICODE_CIRCLED_NUMBER_FORTY_ON_BLACK_SQUARE = 0x324B,
UNICODE_CIRCLED_NUMBER_FIFTY_ON_BLACK_SQUARE = 0x324C,
UNICODE_CIRCLED_NUMBER_SIXTY_ON_BLACK_SQUARE = 0x324D,
UNICODE_CIRCLED_NUMBER_SEVENTY_ON_BLACK_SQUARE = 0x324E,
UNICODE_CIRCLED_NUMBER_EIGHTY_ON_BLACK_SQUARE = 0x324F,
UNICODE_VARIATION_SELECTOR_1 = 0xFE00,
UNICODE_VARIATION_SELECTOR_2 = 0xFE01,
UNICODE_VARIATION_SELECTOR_3 = 0xFE02,
......@@ -680,6 +689,13 @@ enum unicode_codepoint {
UNICODE_REPLACEMENT_CHARACTER = 0xFFFD,
UNICODE_MAHJONG_TILE_RED_DRAGON = 0x1F004,
UNICODE_PLAYING_CARD_BLACK_JOKER = 0x1F0CF,
UNICODE_NEGATIVE_SQUARED_AB = 0x1F18E,
UNICODE_HOT_PEPPER = 0x1F336,
UNICODE_FORK_AND_KNIFE_WITH_PLATE = 0x1F37D,
UNICODE_HEAVY_EQUAL_SIGN = 0x1F7F0,
UNICODE_MAX = 0x10FFFF,
UNICODE_LIMIT
};
......@@ -715,6 +731,8 @@ enum unicode_codepoint {
#define UNICODE_BLOCK_KATAKANA_PHONETIC_EXTENSIONS_END 0x31FF // Fullwidth
#define UNICIDE_BLOCK_YI_RADICALS_BEGIN 0xA490 // Fullwidth
#define UNICIDE_BLOCK_YI_RADICALS_END 0xA4CF // Fullwidth
#define UNICIDE_BLOCK_HANGUL_JAMO_EXTENDED_A_BEGIN 0xA960 // Fullwidth
#define UNICIDE_BLOCK_HANGUL_JAMO_EXTENDED_A_END 0xA97F // Fullwidth
#define UNICIDE_BLOCK_HANGUL_SYLLABLES_BEGIN 0xAC00 // Fullwidth
#define UNICIDE_BLOCK_HANGUL_SYLLABLES_END 0xD7AF // Fullwidth
#define UNICODE_BLOCK_SURROGATE_BEGIN 0xD800
......@@ -729,13 +747,79 @@ enum unicode_codepoint {
#define UNICODE_BLOCK_SMALL_FORM_VARIANTS_END 0xFE6F
#define UNICODE_BLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS_BEGIN 0xFF00
#define UNICODE_BLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS_END 0xFFEF
#define UNICODE_BLOCK_TANGUT_BEGIN 0x17000 // Fullwidth
#define UNICODE_BLOCK_TANGUT_END 0x187FF // Fullwidth
#define UNICODE_BLOCK_TANGUT_COMPONENTS_BEGIN 0x18800 // Fullwidth
#define UNICODE_BLOCK_TANGUT_COMPONENTS_END 0x18AFF // Fullwidth
#define UNICODE_BLOCK_KHITAN_SMALL_SCRIPT_BEGIN 0x18B00 // Fullwidth
#define UNICODE_BLOCK_KHITAN_SMALL_SCRIPT_END 0x18CFF // Fullwidth
#define UNICODE_BLOCK_KANA_SUPPLEMENT_BEGIN 0x1B000 // Fullwidth
#define UNICODE_BLOCK_KANA_SUPPLEMENT_END 0x1B0FF // Fullwidth
#define UNICODE_BLOCK_KANA_EXTENDED_A_BEGIN 0x1B100 // Fullwidth
#define UNICODE_BLOCK_KANA_EXTENDED_A_END 0x1B12F // Fullwidth
#define UNICODE_BLOCK_NUSHU_BEGIN 0x1B170 // Fullwidth
#define UNICODE_BLOCK_NUSHU_END 0x1B2FF // Fullwidth
#define UNICODE_BLOCK_ENCLOSED_IDEOG_SUPPLEMENT_BEGIN 0x1F200 // Fullwidth
#define UNICODE_BLOCK_ENCLOSED_IDEOG_SUPPLEMENT_END 0x1F2FF // Fullwidth
#define UNICODE_BLOCK_MISC_SYMBOLS_AND_PICTOGRAPHS_BEGIN 0x1F300 // Fullwidth
#define UNICODE_BLOCK_MISC_SYMBOLS_AND_PICTOGRAPHS_END 0x1F5FF // Fullwidth
#define UNICODE_BLOCK_EMOTICONS_BEGIN 0x1F600
#define UNICODE_BLOCK_EMOTICONS_END 0x1F6FF
#define UNICODE_BLOCK_SUPP_SYMBOLS_AND_PICTOGRAPHS_BEGIN 0x1F900 // Fullwidth
#define UNICODE_BLOCK_SUPP_SYMBOLS_AND_PICTOGRAPHS_END 0x1F9FF // Fullwidth
#define UNICODE_BLOCK_EXTA_SYMBOLS_AND_PICTOGRAPHS_BEGIN 0x1FA70 // Fullwidth
#define UNICODE_BLOCK_EXTA_SYMBOLS_AND_PICTOGRAPHS_END 0x1FAFF // Fullwidth
// Sub-Blocks
#define UNICODE_SUBBLOCK_FULLWIDTH_HANGUL_BEGIN 0x1100
#define UNICODE_SUBBLOCK_FULLWIDTH_HANGUL_END 0x115F
#define UNICODE_SUBBLOCK_FULLWIDTH_MISC_SYMBOLS_BEGIN 0x2648
#define UNICODE_SUBBLOCK_FULLWIDTH_MISC_SYMBOLS_END 0x2653
#define UNICODE_SUBBLOCK_YIJING_HEXAGRAM_BEGIN 0x4DC0
#define UNICODE_SUBBLOCK_YIJING_HEXAGRAM_END 0x4DFF
#define UNICODE_SUBBLOCK_FULLWIDTH_CHARS_BEGIN 0xFF01
#define UNICODE_SUBBLOCK_FULLWIDTH_CHARS_END 0xFF60
#define UNICODE_SUBBLOCK_FULLWIDTH_SYMBOLS_BEGIN 0xFFE0
#define UNICODE_SUBBLOCK_FULLWIDTH_SYMBOLS_END 0xFFE6
#define UNICODE_SUBBLOCK_FULLWIDTH_KHITAN_BEGIN 0x18B00
#define UNICODE_SUBBLOCK_FULLWIDTH_KHITAN_END 0x18CD5
#define UNICODE_SUBBLOCK_FULLWIDTH_ENCLOSED_ALPHANUM_BEGIN 0x1F191
#define UNICODE_SUBBLOCK_FULLWIDTH_ENCLOSED_ALPHANUM_END 0x1F19A
#define UNICODE_SUBBLOCK_FULLWIDTH_REG_INDICATOR_SYM_BEGIN 0x1F1E6
#define UNICODE_SUBBLOCK_FULLWIDTH_REG_INDICATOR_SYM_END 0x1F1FF
#define UNICODE_SUBBLOCK_FULLWIDTH_ENC_IDEOG_SUPP_BEGIN 0x1F200
#define UNICODE_SUBBLOCK_FULLWIDTH_ENC_IDEOG_SUPP_END 0x1F265
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS1_BEGIN 0x1F321
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS1_END 0x1F32C
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS2_BEGIN 0x1F394
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS2_END 0x1F39F
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS3_BEGIN 0x1F3CB
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS3_END 0x1F3CE
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS4_BEGIN 0x1F3D4
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS4_END 0x1F3DF
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS5_BEGIN 0x1F3F1
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS5_END 0x1F3F3
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS6_BEGIN 0x1F3F5
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS6_END 0x1F3F7
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS7_BEGIN 0x1F53E
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS7_END 0x1F54A
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS8_BEGIN 0x1F568
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS8_END 0x1F579
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS9_BEGIN 0x1F57B
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS9_END 0x1F594
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS10_BEGIN 0x1F597
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS10_END 0x1F5A3
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS11_BEGIN 0x1F5A5
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS11_END 0x1F5FA
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS12_BEGIN 0x1F650
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS12_END 0x1F67F
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS13_BEGIN 0x1F6C6
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS13_END 0x1F6CB
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS14_BEGIN 0x1F6CD
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS14_END 0x1F6CF
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS15_BEGIN 0x1F6E0
#define UNICODE_SUBBLOCK_HALFWIDTH_SYMBOLS15_END 0x1F6EA
#define UNICODE_SUBBLOCK_FULLWIDTH_GEOM_SHAPES_EXT_BEGIN 0x1F7E0
#define UNICODE_SUBBLOCK_FULLWIDTH_GEOM_SHAPES_EXT_END 0x1F7EB
#endif // Don't add anything after this line
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment