[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] Implement graphemewidth
- Subject: [PATCH] Implement graphemewidth
- From: "S. Gilles" <sgilles@xxxxxxxxxxxx>
- Reply-to: myrddin-dev@xxxxxxxxxxxxxx
- Date: Fri, 27 Oct 2017 00:15:16 -0400
- To: "myrddin-dev" <myrddin-dev@xxxxxxxxxxxxxx>
- Cc: "S. Gilles" <sgilles@xxxxxxxxxxxx>
- Moved tables into chartype (so cellwidth goes there)
- Moved strcellwidth into utf.myr
- Added tests for cellwidth, strcellwidth, and fmt
For now, I've kept my interval lists. Since we aren't combining
sets in any sort of way, an inversion list doesn't make anything
easier, and would be a bit less clear to read (in my opinion).
Plus, since reading Gillam, I agree that the Right Way to do this
would be to use something like a compact array (which is definitely
what musl uses). That's a project for mkchartab, and would affect
the whole file.
---
lib/std/chartype.myr | 490 ++++++++++++++++++++++++++++++++++++++++++++++
lib/std/fmt.myr | 10 +-
lib/std/test/chartype.myr | 15 ++
lib/std/test/fmt.myr | 4 +
lib/std/test/utf.myr | Bin 0 -> 1285 bytes
lib/std/utf.myr | 29 +++
6 files changed, 539 insertions(+), 9 deletions(-)
create mode 100644 lib/std/test/utf.myr
diff --git a/lib/std/chartype.myr b/lib/std/chartype.myr
index dd31e200..6465b258 100644
--- a/lib/std/chartype.myr
+++ b/lib/std/chartype.myr
@@ -26,6 +26,8 @@ pkg std =
const totitle : (c : char -> char)
generic charval : (c : char, base : int -> @a::(integral,numeric))
+
+ const cellwidth : (c : char -> int)
;;
extern const put : (fmt : byte[:], args : ... -> size)
@@ -1061,6 +1063,450 @@ const rtotitle1 = [
0x01f3, 499 /* dz Dz */
]
+type interval = struct
+ first : char
+ last : char
+;;
+
+/*
+ * Generated by
+ *
+ * uniset cat:Me,Mn,Cf | \
+ * sed -r 's/^([^.]+)[.][.]([^.]+)$/\t[.first = 0x\1, .last = 0x\2],/' | \
+ * sed -r 's/^([^.]+)$/\t[.first = 0x\1, .last = 0x\1],/'
+ */
+const width0 : interval[:] = [
+ [.first = 0xad, .last = 0xad],
+ [.first = 0x300, .last = 0x36f],
+ [.first = 0x483, .last = 0x489],
+ [.first = 0x591, .last = 0x5bd],
+ [.first = 0x5bf, .last = 0x5bf],
+ [.first = 0x5c1, .last = 0x5c2],
+ [.first = 0x5c4, .last = 0x5c5],
+ [.first = 0x5c7, .last = 0x5c7],
+ [.first = 0x600, .last = 0x605],
+ [.first = 0x610, .last = 0x61a],
+ [.first = 0x61c, .last = 0x61c],
+ [.first = 0x64b, .last = 0x65f],
+ [.first = 0x670, .last = 0x670],
+ [.first = 0x6d6, .last = 0x6dd],
+ [.first = 0x6df, .last = 0x6e4],
+ [.first = 0x6e7, .last = 0x6e8],
+ [.first = 0x6ea, .last = 0x6ed],
+ [.first = 0x70f, .last = 0x70f],
+ [.first = 0x711, .last = 0x711],
+ [.first = 0x730, .last = 0x74a],
+ [.first = 0x7a6, .last = 0x7b0],
+ [.first = 0x7eb, .last = 0x7f3],
+ [.first = 0x816, .last = 0x819],
+ [.first = 0x81b, .last = 0x823],
+ [.first = 0x825, .last = 0x827],
+ [.first = 0x829, .last = 0x82d],
+ [.first = 0x859, .last = 0x85b],
+ [.first = 0x8d4, .last = 0x902],
+ [.first = 0x93a, .last = 0x93a],
+ [.first = 0x93c, .last = 0x93c],
+ [.first = 0x941, .last = 0x948],
+ [.first = 0x94d, .last = 0x94d],
+ [.first = 0x951, .last = 0x957],
+ [.first = 0x962, .last = 0x963],
+ [.first = 0x981, .last = 0x981],
+ [.first = 0x9bc, .last = 0x9bc],
+ [.first = 0x9c1, .last = 0x9c4],
+ [.first = 0x9cd, .last = 0x9cd],
+ [.first = 0x9e2, .last = 0x9e3],
+ [.first = 0xa01, .last = 0xa02],
+ [.first = 0xa3c, .last = 0xa3c],
+ [.first = 0xa41, .last = 0xa42],
+ [.first = 0xa47, .last = 0xa48],
+ [.first = 0xa4b, .last = 0xa4d],
+ [.first = 0xa51, .last = 0xa51],
+ [.first = 0xa70, .last = 0xa71],
+ [.first = 0xa75, .last = 0xa75],
+ [.first = 0xa81, .last = 0xa82],
+ [.first = 0xabc, .last = 0xabc],
+ [.first = 0xac1, .last = 0xac5],
+ [.first = 0xac7, .last = 0xac8],
+ [.first = 0xacd, .last = 0xacd],
+ [.first = 0xae2, .last = 0xae3],
+ [.first = 0xafa, .last = 0xaff],
+ [.first = 0xb01, .last = 0xb01],
+ [.first = 0xb3c, .last = 0xb3c],
+ [.first = 0xb3f, .last = 0xb3f],
+ [.first = 0xb41, .last = 0xb44],
+ [.first = 0xb4d, .last = 0xb4d],
+ [.first = 0xb56, .last = 0xb56],
+ [.first = 0xb62, .last = 0xb63],
+ [.first = 0xb82, .last = 0xb82],
+ [.first = 0xbc0, .last = 0xbc0],
+ [.first = 0xbcd, .last = 0xbcd],
+ [.first = 0xc00, .last = 0xc00],
+ [.first = 0xc3e, .last = 0xc40],
+ [.first = 0xc46, .last = 0xc48],
+ [.first = 0xc4a, .last = 0xc4d],
+ [.first = 0xc55, .last = 0xc56],
+ [.first = 0xc62, .last = 0xc63],
+ [.first = 0xc81, .last = 0xc81],
+ [.first = 0xcbc, .last = 0xcbc],
+ [.first = 0xcbf, .last = 0xcbf],
+ [.first = 0xcc6, .last = 0xcc6],
+ [.first = 0xccc, .last = 0xccd],
+ [.first = 0xce2, .last = 0xce3],
+ [.first = 0xd00, .last = 0xd01],
+ [.first = 0xd3b, .last = 0xd3c],
+ [.first = 0xd41, .last = 0xd44],
+ [.first = 0xd4d, .last = 0xd4d],
+ [.first = 0xd62, .last = 0xd63],
+ [.first = 0xdca, .last = 0xdca],
+ [.first = 0xdd2, .last = 0xdd4],
+ [.first = 0xdd6, .last = 0xdd6],
+ [.first = 0xe31, .last = 0xe31],
+ [.first = 0xe34, .last = 0xe3a],
+ [.first = 0xe47, .last = 0xe4e],
+ [.first = 0xeb1, .last = 0xeb1],
+ [.first = 0xeb4, .last = 0xeb9],
+ [.first = 0xebb, .last = 0xebc],
+ [.first = 0xec8, .last = 0xecd],
+ [.first = 0xf18, .last = 0xf19],
+ [.first = 0xf35, .last = 0xf35],
+ [.first = 0xf37, .last = 0xf37],
+ [.first = 0xf39, .last = 0xf39],
+ [.first = 0xf71, .last = 0xf7e],
+ [.first = 0xf80, .last = 0xf84],
+ [.first = 0xf86, .last = 0xf87],
+ [.first = 0xf8d, .last = 0xf97],
+ [.first = 0xf99, .last = 0xfbc],
+ [.first = 0xfc6, .last = 0xfc6],
+ [.first = 0x102d, .last = 0x1030],
+ [.first = 0x1032, .last = 0x1037],
+ [.first = 0x1039, .last = 0x103a],
+ [.first = 0x103d, .last = 0x103e],
+ [.first = 0x1058, .last = 0x1059],
+ [.first = 0x105e, .last = 0x1060],
+ [.first = 0x1071, .last = 0x1074],
+ [.first = 0x1082, .last = 0x1082],
+ [.first = 0x1085, .last = 0x1086],
+ [.first = 0x108d, .last = 0x108d],
+ [.first = 0x109d, .last = 0x109d],
+ [.first = 0x135d, .last = 0x135f],
+ [.first = 0x1712, .last = 0x1714],
+ [.first = 0x1732, .last = 0x1734],
+ [.first = 0x1752, .last = 0x1753],
+ [.first = 0x1772, .last = 0x1773],
+ [.first = 0x17b4, .last = 0x17b5],
+ [.first = 0x17b7, .last = 0x17bd],
+ [.first = 0x17c6, .last = 0x17c6],
+ [.first = 0x17c9, .last = 0x17d3],
+ [.first = 0x17dd, .last = 0x17dd],
+ [.first = 0x180b, .last = 0x180e],
+ [.first = 0x1885, .last = 0x1886],
+ [.first = 0x18a9, .last = 0x18a9],
+ [.first = 0x1920, .last = 0x1922],
+ [.first = 0x1927, .last = 0x1928],
+ [.first = 0x1932, .last = 0x1932],
+ [.first = 0x1939, .last = 0x193b],
+ [.first = 0x1a17, .last = 0x1a18],
+ [.first = 0x1a1b, .last = 0x1a1b],
+ [.first = 0x1a56, .last = 0x1a56],
+ [.first = 0x1a58, .last = 0x1a5e],
+ [.first = 0x1a60, .last = 0x1a60],
+ [.first = 0x1a62, .last = 0x1a62],
+ [.first = 0x1a65, .last = 0x1a6c],
+ [.first = 0x1a73, .last = 0x1a7c],
+ [.first = 0x1a7f, .last = 0x1a7f],
+ [.first = 0x1ab0, .last = 0x1abe],
+ [.first = 0x1b00, .last = 0x1b03],
+ [.first = 0x1b34, .last = 0x1b34],
+ [.first = 0x1b36, .last = 0x1b3a],
+ [.first = 0x1b3c, .last = 0x1b3c],
+ [.first = 0x1b42, .last = 0x1b42],
+ [.first = 0x1b6b, .last = 0x1b73],
+ [.first = 0x1b80, .last = 0x1b81],
+ [.first = 0x1ba2, .last = 0x1ba5],
+ [.first = 0x1ba8, .last = 0x1ba9],
+ [.first = 0x1bab, .last = 0x1bad],
+ [.first = 0x1be6, .last = 0x1be6],
+ [.first = 0x1be8, .last = 0x1be9],
+ [.first = 0x1bed, .last = 0x1bed],
+ [.first = 0x1bef, .last = 0x1bf1],
+ [.first = 0x1c2c, .last = 0x1c33],
+ [.first = 0x1c36, .last = 0x1c37],
+ [.first = 0x1cd0, .last = 0x1cd2],
+ [.first = 0x1cd4, .last = 0x1ce0],
+ [.first = 0x1ce2, .last = 0x1ce8],
+ [.first = 0x1ced, .last = 0x1ced],
+ [.first = 0x1cf4, .last = 0x1cf4],
+ [.first = 0x1cf8, .last = 0x1cf9],
+ [.first = 0x1dc0, .last = 0x1df9],
+ [.first = 0x1dfb, .last = 0x1dff],
+ [.first = 0x200b, .last = 0x200f],
+ [.first = 0x202a, .last = 0x202e],
+ [.first = 0x2060, .last = 0x2064],
+ [.first = 0x2066, .last = 0x206f],
+ [.first = 0x20d0, .last = 0x20f0],
+ [.first = 0x2cef, .last = 0x2cf1],
+ [.first = 0x2d7f, .last = 0x2d7f],
+ [.first = 0x2de0, .last = 0x2dff],
+ [.first = 0x302a, .last = 0x302d],
+ [.first = 0x3099, .last = 0x309a],
+ [.first = 0xa66f, .last = 0xa672],
+ [.first = 0xa674, .last = 0xa67d],
+ [.first = 0xa69e, .last = 0xa69f],
+ [.first = 0xa6f0, .last = 0xa6f1],
+ [.first = 0xa802, .last = 0xa802],
+ [.first = 0xa806, .last = 0xa806],
+ [.first = 0xa80b, .last = 0xa80b],
+ [.first = 0xa825, .last = 0xa826],
+ [.first = 0xa8c4, .last = 0xa8c5],
+ [.first = 0xa8e0, .last = 0xa8f1],
+ [.first = 0xa926, .last = 0xa92d],
+ [.first = 0xa947, .last = 0xa951],
+ [.first = 0xa980, .last = 0xa982],
+ [.first = 0xa9b3, .last = 0xa9b3],
+ [.first = 0xa9b6, .last = 0xa9b9],
+ [.first = 0xa9bc, .last = 0xa9bc],
+ [.first = 0xa9e5, .last = 0xa9e5],
+ [.first = 0xaa29, .last = 0xaa2e],
+ [.first = 0xaa31, .last = 0xaa32],
+ [.first = 0xaa35, .last = 0xaa36],
+ [.first = 0xaa43, .last = 0xaa43],
+ [.first = 0xaa4c, .last = 0xaa4c],
+ [.first = 0xaa7c, .last = 0xaa7c],
+ [.first = 0xaab0, .last = 0xaab0],
+ [.first = 0xaab2, .last = 0xaab4],
+ [.first = 0xaab7, .last = 0xaab8],
+ [.first = 0xaabe, .last = 0xaabf],
+ [.first = 0xaac1, .last = 0xaac1],
+ [.first = 0xaaec, .last = 0xaaed],
+ [.first = 0xaaf6, .last = 0xaaf6],
+ [.first = 0xabe5, .last = 0xabe5],
+ [.first = 0xabe8, .last = 0xabe8],
+ [.first = 0xabed, .last = 0xabed],
+ [.first = 0xfb1e, .last = 0xfb1e],
+ [.first = 0xfe00, .last = 0xfe0f],
+ [.first = 0xfe20, .last = 0xfe2f],
+ [.first = 0xfeff, .last = 0xfeff],
+ [.first = 0xfff9, .last = 0xfffb],
+ [.first = 0x101fd, .last = 0x101fd],
+ [.first = 0x102e0, .last = 0x102e0],
+ [.first = 0x10376, .last = 0x1037a],
+ [.first = 0x10a01, .last = 0x10a03],
+ [.first = 0x10a05, .last = 0x10a06],
+ [.first = 0x10a0c, .last = 0x10a0f],
+ [.first = 0x10a38, .last = 0x10a3a],
+ [.first = 0x10a3f, .last = 0x10a3f],
+ [.first = 0x10ae5, .last = 0x10ae6],
+ [.first = 0x11001, .last = 0x11001],
+ [.first = 0x11038, .last = 0x11046],
+ [.first = 0x1107f, .last = 0x11081],
+ [.first = 0x110b3, .last = 0x110b6],
+ [.first = 0x110b9, .last = 0x110ba],
+ [.first = 0x110bd, .last = 0x110bd],
+ [.first = 0x11100, .last = 0x11102],
+ [.first = 0x11127, .last = 0x1112b],
+ [.first = 0x1112d, .last = 0x11134],
+ [.first = 0x11173, .last = 0x11173],
+ [.first = 0x11180, .last = 0x11181],
+ [.first = 0x111b6, .last = 0x111be],
+ [.first = 0x111ca, .last = 0x111cc],
+ [.first = 0x1122f, .last = 0x11231],
+ [.first = 0x11234, .last = 0x11234],
+ [.first = 0x11236, .last = 0x11237],
+ [.first = 0x1123e, .last = 0x1123e],
+ [.first = 0x112df, .last = 0x112df],
+ [.first = 0x112e3, .last = 0x112ea],
+ [.first = 0x11300, .last = 0x11301],
+ [.first = 0x1133c, .last = 0x1133c],
+ [.first = 0x11340, .last = 0x11340],
+ [.first = 0x11366, .last = 0x1136c],
+ [.first = 0x11370, .last = 0x11374],
+ [.first = 0x11438, .last = 0x1143f],
+ [.first = 0x11442, .last = 0x11444],
+ [.first = 0x11446, .last = 0x11446],
+ [.first = 0x114b3, .last = 0x114b8],
+ [.first = 0x114ba, .last = 0x114ba],
+ [.first = 0x114bf, .last = 0x114c0],
+ [.first = 0x114c2, .last = 0x114c3],
+ [.first = 0x115b2, .last = 0x115b5],
+ [.first = 0x115bc, .last = 0x115bd],
+ [.first = 0x115bf, .last = 0x115c0],
+ [.first = 0x115dc, .last = 0x115dd],
+ [.first = 0x11633, .last = 0x1163a],
+ [.first = 0x1163d, .last = 0x1163d],
+ [.first = 0x1163f, .last = 0x11640],
+ [.first = 0x116ab, .last = 0x116ab],
+ [.first = 0x116ad, .last = 0x116ad],
+ [.first = 0x116b0, .last = 0x116b5],
+ [.first = 0x116b7, .last = 0x116b7],
+ [.first = 0x1171d, .last = 0x1171f],
+ [.first = 0x11722, .last = 0x11725],
+ [.first = 0x11727, .last = 0x1172b],
+ [.first = 0x11a01, .last = 0x11a06],
+ [.first = 0x11a09, .last = 0x11a0a],
+ [.first = 0x11a33, .last = 0x11a38],
+ [.first = 0x11a3b, .last = 0x11a3e],
+ [.first = 0x11a47, .last = 0x11a47],
+ [.first = 0x11a51, .last = 0x11a56],
+ [.first = 0x11a59, .last = 0x11a5b],
+ [.first = 0x11a8a, .last = 0x11a96],
+ [.first = 0x11a98, .last = 0x11a99],
+ [.first = 0x11c30, .last = 0x11c36],
+ [.first = 0x11c38, .last = 0x11c3d],
+ [.first = 0x11c3f, .last = 0x11c3f],
+ [.first = 0x11c92, .last = 0x11ca7],
+ [.first = 0x11caa, .last = 0x11cb0],
+ [.first = 0x11cb2, .last = 0x11cb3],
+ [.first = 0x11cb5, .last = 0x11cb6],
+ [.first = 0x11d31, .last = 0x11d36],
+ [.first = 0x11d3a, .last = 0x11d3a],
+ [.first = 0x11d3c, .last = 0x11d3d],
+ [.first = 0x11d3f, .last = 0x11d45],
+ [.first = 0x11d47, .last = 0x11d47],
+ [.first = 0x16af0, .last = 0x16af4],
+ [.first = 0x16b30, .last = 0x16b36],
+ [.first = 0x16f8f, .last = 0x16f92],
+ [.first = 0x1bc9d, .last = 0x1bc9e],
+ [.first = 0x1bca0, .last = 0x1bca3],
+ [.first = 0x1d167, .last = 0x1d169],
+ [.first = 0x1d173, .last = 0x1d182],
+ [.first = 0x1d185, .last = 0x1d18b],
+ [.first = 0x1d1aa, .last = 0x1d1ad],
+ [.first = 0x1d242, .last = 0x1d244],
+ [.first = 0x1da00, .last = 0x1da36],
+ [.first = 0x1da3b, .last = 0x1da6c],
+ [.first = 0x1da75, .last = 0x1da75],
+ [.first = 0x1da84, .last = 0x1da84],
+ [.first = 0x1da9b, .last = 0x1da9f],
+ [.first = 0x1daa1, .last = 0x1daaf],
+ [.first = 0x1e000, .last = 0x1e006],
+ [.first = 0x1e008, .last = 0x1e018],
+ [.first = 0x1e01b, .last = 0x1e021],
+ [.first = 0x1e023, .last = 0x1e024],
+ [.first = 0x1e026, .last = 0x1e02a],
+ [.first = 0x1e8d0, .last = 0x1e8d6],
+ [.first = 0x1e944, .last = 0x1e94a],
+ [.first = 0xe0001, .last = 0xe0001],
+ [.first = 0xe0020, .last = 0xe007f],
+ [.first = 0xe0100, .last = 0xe01ef],
+][:]
+
+/*
+ * Generated by
+ *
+ * uniset eaw:W,F - cat:Me,Mn,Cf | \
+ * sed -r 's/^([^.]+)[.][.]([^.]+)$/\t[.first = 0x\1, .last = 0x\2],/' | \
+ * sed -r 's/^([^.]+)$/\t[.first = 0x\1, .last = 0x\1],/'
+ */
+const width2 : interval[:] = [
+ [.first = 0x1100, .last = 0x115f],
+ [.first = 0x231a, .last = 0x231b],
+ [.first = 0x2329, .last = 0x232a],
+ [.first = 0x23e9, .last = 0x23ec],
+ [.first = 0x23f0, .last = 0x23f0],
+ [.first = 0x23f3, .last = 0x23f3],
+ [.first = 0x25fd, .last = 0x25fe],
+ [.first = 0x2614, .last = 0x2615],
+ [.first = 0x2648, .last = 0x2653],
+ [.first = 0x267f, .last = 0x267f],
+ [.first = 0x2693, .last = 0x2693],
+ [.first = 0x26a1, .last = 0x26a1],
+ [.first = 0x26aa, .last = 0x26ab],
+ [.first = 0x26bd, .last = 0x26be],
+ [.first = 0x26c4, .last = 0x26c5],
+ [.first = 0x26ce, .last = 0x26ce],
+ [.first = 0x26d4, .last = 0x26d4],
+ [.first = 0x26ea, .last = 0x26ea],
+ [.first = 0x26f2, .last = 0x26f3],
+ [.first = 0x26f5, .last = 0x26f5],
+ [.first = 0x26fa, .last = 0x26fa],
+ [.first = 0x26fd, .last = 0x26fd],
+ [.first = 0x2705, .last = 0x2705],
+ [.first = 0x270a, .last = 0x270b],
+ [.first = 0x2728, .last = 0x2728],
+ [.first = 0x274c, .last = 0x274c],
+ [.first = 0x274e, .last = 0x274e],
+ [.first = 0x2753, .last = 0x2755],
+ [.first = 0x2757, .last = 0x2757],
+ [.first = 0x2795, .last = 0x2797],
+ [.first = 0x27b0, .last = 0x27b0],
+ [.first = 0x27bf, .last = 0x27bf],
+ [.first = 0x2b1b, .last = 0x2b1c],
+ [.first = 0x2b50, .last = 0x2b50],
+ [.first = 0x2b55, .last = 0x2b55],
+ [.first = 0x2e80, .last = 0x2e99],
+ [.first = 0x2e9b, .last = 0x2ef3],
+ [.first = 0x2f00, .last = 0x2fd5],
+ [.first = 0x2ff0, .last = 0x2ffb],
+ [.first = 0x3000, .last = 0x3029],
+ [.first = 0x302e, .last = 0x303e],
+ [.first = 0x3041, .last = 0x3096],
+ [.first = 0x309b, .last = 0x30ff],
+ [.first = 0x3105, .last = 0x312e],
+ [.first = 0x3131, .last = 0x318e],
+ [.first = 0x3190, .last = 0x31ba],
+ [.first = 0x31c0, .last = 0x31e3],
+ [.first = 0x31f0, .last = 0x321e],
+ [.first = 0x3220, .last = 0x3247],
+ [.first = 0x3250, .last = 0x32fe],
+ [.first = 0x3300, .last = 0x4dbf],
+ [.first = 0x4e00, .last = 0xa48c],
+ [.first = 0xa490, .last = 0xa4c6],
+ [.first = 0xa960, .last = 0xa97c],
+ [.first = 0xac00, .last = 0xd7a3],
+ [.first = 0xf900, .last = 0xfaff],
+ [.first = 0xfe10, .last = 0xfe19],
+ [.first = 0xfe30, .last = 0xfe52],
+ [.first = 0xfe54, .last = 0xfe66],
+ [.first = 0xfe68, .last = 0xfe6b],
+ [.first = 0xff01, .last = 0xff60],
+ [.first = 0xffe0, .last = 0xffe6],
+ [.first = 0x16fe0, .last = 0x16fe1],
+ [.first = 0x17000, .last = 0x187ec],
+ [.first = 0x18800, .last = 0x18af2],
+ [.first = 0x1b000, .last = 0x1b11e],
+ [.first = 0x1b170, .last = 0x1b2fb],
+ [.first = 0x1f004, .last = 0x1f004],
+ [.first = 0x1f0cf, .last = 0x1f0cf],
+ [.first = 0x1f18e, .last = 0x1f18e],
+ [.first = 0x1f191, .last = 0x1f19a],
+ [.first = 0x1f200, .last = 0x1f202],
+ [.first = 0x1f210, .last = 0x1f23b],
+ [.first = 0x1f240, .last = 0x1f248],
+ [.first = 0x1f250, .last = 0x1f251],
+ [.first = 0x1f260, .last = 0x1f265],
+ [.first = 0x1f300, .last = 0x1f320],
+ [.first = 0x1f32d, .last = 0x1f335],
+ [.first = 0x1f337, .last = 0x1f37c],
+ [.first = 0x1f37e, .last = 0x1f393],
+ [.first = 0x1f3a0, .last = 0x1f3ca],
+ [.first = 0x1f3cf, .last = 0x1f3d3],
+ [.first = 0x1f3e0, .last = 0x1f3f0],
+ [.first = 0x1f3f4, .last = 0x1f3f4],
+ [.first = 0x1f3f8, .last = 0x1f43e],
+ [.first = 0x1f440, .last = 0x1f440],
+ [.first = 0x1f442, .last = 0x1f4fc],
+ [.first = 0x1f4ff, .last = 0x1f53d],
+ [.first = 0x1f54b, .last = 0x1f54e],
+ [.first = 0x1f550, .last = 0x1f567],
+ [.first = 0x1f57a, .last = 0x1f57a],
+ [.first = 0x1f595, .last = 0x1f596],
+ [.first = 0x1f5a4, .last = 0x1f5a4],
+ [.first = 0x1f5fb, .last = 0x1f64f],
+ [.first = 0x1f680, .last = 0x1f6c5],
+ [.first = 0x1f6cc, .last = 0x1f6cc],
+ [.first = 0x1f6d0, .last = 0x1f6d2],
+ [.first = 0x1f6eb, .last = 0x1f6ec],
+ [.first = 0x1f6f4, .last = 0x1f6f8],
+ [.first = 0x1f910, .last = 0x1f93e],
+ [.first = 0x1f940, .last = 0x1f94c],
+ [.first = 0x1f950, .last = 0x1f96b],
+ [.first = 0x1f980, .last = 0x1f997],
+ [.first = 0x1f9c0, .last = 0x1f9c0],
+ [.first = 0x1f9d0, .last = 0x1f9e6],
+ [.first = 0x20000, .last = 0x2fffd],
+ [.first = 0x30000, .last = 0x3fffd],
+][:]
+
const findc = {c, t, n, nelt, ret
var p, m
@@ -1245,3 +1691,47 @@ generic charval = {c, base -> @a::(numeric,integral)
;;
-> v
}
+
+/* Unintelligent binary search */
+const in_range = { c : char, t : interval[:]
+ if c < t[0].first || c > t[t.len - 1].last
+ -> false
+ ;;
+
+ while t.len >= 1
+ var j : size = t.len / 2
+ if c < t[j].first
+ t = t[0:j]
+ elif c > t[j].last
+ t = t[j+1:]
+ else
+ -> true
+ ;;
+ ;;
+
+ -> false
+}
+
+const cellwidth = { c : char
+ /*
+ * Follows the general idea described at
+ * http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+ */
+ if c == 0
+ -> 0
+ ;;
+
+ if c == -1 || c < 0x20 || (c >= 0x7f && c <= 0x9f)
+ -> -1
+ ;;
+
+ if in_range(c, width0)
+ -> 0
+ ;;
+
+ if in_range(c, width2)
+ -> 2
+ ;;
+
+ -> 1
+}
diff --git a/lib/std/fmt.myr b/lib/std/fmt.myr
index 08b814db..1f542d31 100644
--- a/lib/std/fmt.myr
+++ b/lib/std/fmt.myr
@@ -553,7 +553,7 @@ const strfmt = {sb, str, params
;;
;;
else
- for i = 0; i < w - graphemewidth(str); i++
+ for i = 0; i < w - strcellwidth(str); i++
sbputc(sb, p)
;;
sbputs(sb, str)
@@ -564,14 +564,6 @@ const isprint = {b
-> b >= (' ' : byte) && b < ('~' : byte)
}
-/*
-Hah. like we're going to put in the work to actually
-count graphemes.
-*/
-const graphemewidth = {str
- -> str.len
-}
-
const digitchars = [
'0','1','2','3','4',
'5','6','7','8','9',
diff --git a/lib/std/test/chartype.myr b/lib/std/test/chartype.myr
index d0df1d2b..2b324a71 100644
--- a/lib/std/test/chartype.myr
+++ b/lib/std/test/chartype.myr
@@ -20,4 +20,19 @@ const main = {
std.assert(!std.isalnum('!'), "! should not be isalnum\n")
std.assert(!std.isspace('@'), "@ should not be isspace\n")
std.assert(!std.isblank('@'), "@ should not be isblank\n")
+
+ std.assert(std.cellwidth(std.Badchar) == -1, "Badchar should have width -1\n")
+ std.assert(std.cellwidth((0 : char)) == 0, "U+0000 should have width 0\n")
+ std.assert(std.cellwidth('') == -1, "^E should have width -1\n")
+ std.assert(std.cellwidth('a') == 1, "'a' should have width 1\n")
+ std.assert(std.cellwidth('ê') == 1, "'ê' should have width 1\n")
+ std.assert(std.cellwidth('̀') == 0, "U+300 should have width 0\n")
+ std.assert(std.cellwidth('ϼ') == 1, "ϼ should have width 1\n")
+ std.assert(std.cellwidth('Ї') == 1, "Ї should have width 1\n")
+ std.assert(std.cellwidth('⊲') == 1, "⊲ should have width 1\n")
+ std.assert(std.cellwidth(' ') == 2, "U+3000 should have width 2\n")
+ std.assert(std.cellwidth('ギ') == 2, "ギ should have width 2\n")
+ std.assert(std.cellwidth('匌') == 2, "匌 should have width 2\n")
+ std.assert(std.cellwidth('') == 0, "U+00AD should have width 0\n")
+ std.assert(std.cellwidth('ࠠ') == 0, "U+820 should have width 0\n")
}
diff --git a/lib/std/test/fmt.myr b/lib/std/test/fmt.myr
index 5d0cfa83..c1a17acc 100644
--- a/lib/std/test/fmt.myr
+++ b/lib/std/test/fmt.myr
@@ -56,6 +56,10 @@ const builtins = {
check("\\n\\r\\xff", "{e}", "\n\r\xff")
check("{}barbaz", "{{}}bar{}", "baz")
check("{barbaz}", "{{bar{}}}", "baz")
+ check(" Tlön", "{w=10}", "Tlön")
+ check(" háček", "{w=10}", "háček")
+ check("_____即是多多即", "{w=15,p=_}", "即是多多即")
+ check(" τῶν ῾Ελλήνων", "{w=15}", "τῶν ῾Ελλήνων")
check("abcd", "{}", "abcd")
check("123", "{}", 123)
check("7b", "{x}", 123)
diff --git a/lib/std/test/utf.myr b/lib/std/test/utf.myr
new file mode 100644
index 00000000..0308de27
Binary files /dev/null and b/lib/std/test/utf.myr differ
diff --git a/lib/std/utf.myr b/lib/std/utf.myr
index 6c655a33..439254ca 100644
--- a/lib/std/utf.myr
+++ b/lib/std/utf.myr
@@ -1,3 +1,5 @@
+use "extremum"
+use "chartype"
use "die"
use "types"
@@ -12,6 +14,7 @@ pkg std =
const decode : (buf : byte[:] -> char)
const strstep : (str : byte[:] -> (char, byte[:]))
+ const strcellwidth : (str : byte[:] -> size)
;;
const charlen = {c
@@ -101,3 +104,29 @@ const strstep = {str
-> ((chr : char), str[len:])
}
+
+const strcellwidth = {str
+ var s : byte[:] = str
+ var c : char = Badchar
+ var n : size = 0
+
+ while s.len > 0
+ (c, s) = strstep(s)
+ if c == Badchar
+ /* Something will probably be printed as U+FFFD */
+ n++
+ elif c < 0x20
+ /* Control characters take 0 cells */
+ elif c < 0x7f
+ /* Bog standard ASCII takes 1 cell */
+ n++
+ elif c == 0x7f
+ /* DEL is like a control character */
+ else
+ /* It's not ASCII, so ask chartype what to do */
+ n += (abs(cellwidth(c)) : size)
+ ;;
+ ;;
+
+ -> n
+}
--
2.14.3