summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2020-04-17 06:42:40 -0700
committerKaz Kylheku <kaz@kylheku.com>2020-04-17 06:42:40 -0700
commit7fbc8ed7bdb1b5f5a5a4b83cf84c5343adbde201 (patch)
tree345ea5458df39cbb0b0f2beb8a4f5f38612a3b56
parent6b7374d7179d9be8ef2fb39928917cc3fd37453a (diff)
downloadtxr-7fbc8ed7bdb1b5f5a5a4b83cf84c5343adbde201.tar.gz
txr-7fbc8ed7bdb1b5f5a5a4b83cf84c5343adbde201.tar.bz2
txr-7fbc8ed7bdb1b5f5a5a4b83cf84c5343adbde201.zip
unicode: character width upkeep.
Updating the regex for matching code points corresponding to wide and full width characters, with regard to the old 1998 document: http://www.unicode.org/reports/tr11-2/ More to follow. I neglected to comment where the original data came from, and neglected to comment it. In some cases it has more coverage than the 1998 document; in some cases less. * regex.c (create_wide_cs): Extending the 1100-115F range to 11F9 to cover all of Korean Hangeul. Replace two occurrences of 3000-303E with one 3000-303F. Merge 3250-32FE with 3300-4DB5, and extend to 4DBF. Add private use range E000-E757.
-rw-r--r--regex.c9
1 files changed, 4 insertions, 5 deletions
diff --git a/regex.c b/regex.c
index a4dc69eb..2de3daa2 100644
--- a/regex.c
+++ b/regex.c
@@ -3226,14 +3226,13 @@ static char_set_t *create_wide_cs(void)
char_set_t *cs = char_set_create(cst, 0, 1);
- char_set_add_range(cs, 0x1100, 0x115F);
+ char_set_add_range(cs, 0x1100, 0x11F9);
char_set_add_range(cs, 0x2329, 0x232A);
char_set_add_range(cs, 0x2E80, 0x2E99);
char_set_add_range(cs, 0x2E9B, 0x2EF3);
char_set_add_range(cs, 0x2F00, 0x2FD5);
char_set_add_range(cs, 0x2FF0, 0x2FFB);
- char_set_add_range(cs, 0x3000, 0x303E);
- char_set_add_range(cs, 0x3000, 0x303E);
+ char_set_add_range(cs, 0x3000, 0x303F);
char_set_add_range(cs, 0x3041, 0x3096);
char_set_add_range(cs, 0x3099, 0x30FF);
char_set_add_range(cs, 0x3105, 0x312D);
@@ -3242,13 +3241,13 @@ static char_set_t *create_wide_cs(void)
char_set_add_range(cs, 0x31C0, 0x31E3);
char_set_add_range(cs, 0x31F0, 0x321E);
char_set_add_range(cs, 0x3220, 0x3247);
- char_set_add_range(cs, 0x3250, 0x32FE);
- char_set_add_range(cs, 0x3300, 0x4DB5);
+ char_set_add_range(cs, 0x3250, 0x4DBF);
char_set_add_range(cs, 0x4E00, 0x9FFF);
char_set_add_range(cs, 0xA000, 0xA48C);
char_set_add_range(cs, 0xA490, 0xA4C6);
char_set_add_range(cs, 0xA960, 0xA97C);
char_set_add_range(cs, 0xAC00, 0xD7A3);
+ char_set_add_range(cs, 0xE000, 0xE757);
char_set_add_range(cs, 0xF900, 0xFAFF);
char_set_add_range(cs, 0xFE10, 0xFE19);
char_set_add_range(cs, 0xFE30, 0xFE52);