Eigenstate: myrddin-dev mailing list

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] implement graphemewidth


---
Also rename it to cellwidth, because that's what I really want it
to do and I'm not sure if what I implemented really deals with
graphemes.

The goal is the following:

        use std
        const main = {
                std.put("|0123456789|\n")        /* |0123456789| */
                std.put("|{w=10}|\n", "foobar")  /* |    foobar| */
                std.put("|{w=10}|\n", "施氏食")  /* |    施氏食| */
                std.put("|{w=10}|\n", "человек") /* |   человек| */
        }

I wasn't particularly happy with any of the high-performance
implementations of wcwidth() I surveyed (in particular, musl's is
too clever for me to understand), so I ended up using the approach
of http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c . That appears to
be based on a pretty old version of Unicode, however, because it
looks like a lot of the exceptions aren't necessary anymore.

Doing a full binary search is probably wasteful, but if someone
wants to process that much non-ASCII data, they are probably in a
better position to contribute vectorized, SSE2-aware, triaxilating
frequency algorithms than I am.

As a disclaimer: I'm not a unicode guy, I just get emails from
people with non-ASCII names with text like “H⁰(P•) ≅ ℤ”. I haven't
even tried to verify that any of the more exotic scripts work as
expected.
---
 lib/std/bld.sub       |   1 +
 lib/std/cellwidth.myr | 526 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/std/fmt.myr       |  11 +-
 3 files changed, 529 insertions(+), 9 deletions(-)
 create mode 100644 lib/std/cellwidth.myr

diff --git a/lib/std/bld.sub b/lib/std/bld.sub
index a834dbb3..8e1639d5 100644
--- a/lib/std/bld.sub
+++ b/lib/std/bld.sub
@@ -10,6 +10,7 @@ lib std {inc=.} =
 	bigint.myr
 	bitset.myr
 	blat.myr
+	cellwidth.myr
 	chartype.myr
 	chomp.myr
 	clear.myr
diff --git a/lib/std/cellwidth.myr b/lib/std/cellwidth.myr
new file mode 100644
index 00000000..18c031bd
--- /dev/null
+++ b/lib/std/cellwidth.myr
@@ -0,0 +1,526 @@
+use "extremum"
+use "types"
+use "utf"
+
+pkg std =
+	/* Return how many cells c should take when printed */
+	const cellwidth : (c : char -> int)
+
+	/* Return how many cells str should take when printed */
+	const strcellwidth : (str : byte[:] -> size)
+;;
+
+type interval = struct
+	first : char
+	last : char
+;;
+
+/* Unintelligent binary search */
+const in_range = { c : char, t : interval[:]
+	if c < t[0].first || c > t[t.len - 1].last
+		-> false
+	;;
+
+	while t.len >= 1
+		var j : size = t.len / 2
+		if c < t[j].first
+			t = t[0:j]
+		elif c > t[j].last
+			t = t[j+1:]
+		else
+			-> true
+		;;
+	;;
+
+	-> false
+}
+
+const cellwidth = { c : char
+	/*
+	 * Follows the general algorithm at
+	 * http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+	 *
+	 * The width0 and width2 arrays are autogenerated.
+	 */
+	if c == 0
+		-> 0
+	;;
+
+	if c == Badchar || c < 0x20 || (c >= 0x7f && c <= 0x9f)
+		-> -1
+	;;
+
+	if in_range(c, width0)
+		-> 0
+	;;
+
+	if in_range(c, width2)
+		-> 2
+	;;
+
+	-> 1
+}
+
+const strcellwidth = {str
+	var s : byte[:] = str
+	var c : char = Badchar
+	var n : size = 0
+
+	while s.len > 0
+		(c, s) = strstep(s)
+		if c == Badchar
+			/* Something will probably be printed as U+FFFD */
+			n++
+		elif c < 0x20
+			/* Control characters take 0 cells */
+		elif c < 0x7f
+			/* Bog standard ASCII takes 1 cell */
+			n++
+		elif c == 0x7f
+			/* DEL is like a control character */
+		else
+			/* It's not ASCII, so ask runewidth what to do */
+			n += (abs(cellwidth(c)) : size)
+		;;
+	;;
+
+	-> n
+}
+
+/*
+ * Generated by
+ *
+ * uniset cat:Me,Mn,Cf - 00ad | \
+ *      sed -r 's/^([^.]+)[.][.]([^.]+)$/\t[.first = 0x\1, .last = 0x\2],/' | \
+ *      sed -r 's/^([^.]+)$/\t[.first = 0x\1, .last = 0x\1],/'
+ */
+const width0 : interval[:] = [
+	[.first = 0x300, .last = 0x36f],
+	[.first = 0x483, .last = 0x489],
+	[.first = 0x591, .last = 0x5bd],
+	[.first = 0x5bf, .last = 0x5bf],
+	[.first = 0x5c1, .last = 0x5c2],
+	[.first = 0x5c4, .last = 0x5c5],
+	[.first = 0x5c7, .last = 0x5c7],
+	[.first = 0x600, .last = 0x605],
+	[.first = 0x610, .last = 0x61a],
+	[.first = 0x61c, .last = 0x61c],
+	[.first = 0x64b, .last = 0x65f],
+	[.first = 0x670, .last = 0x670],
+	[.first = 0x6d6, .last = 0x6dd],
+	[.first = 0x6df, .last = 0x6e4],
+	[.first = 0x6e7, .last = 0x6e8],
+	[.first = 0x6ea, .last = 0x6ed],
+	[.first = 0x70f, .last = 0x70f],
+	[.first = 0x711, .last = 0x711],
+	[.first = 0x730, .last = 0x74a],
+	[.first = 0x7a6, .last = 0x7b0],
+	[.first = 0x7eb, .last = 0x7f3],
+	[.first = 0x816, .last = 0x819],
+	[.first = 0x81b, .last = 0x823],
+	[.first = 0x825, .last = 0x827],
+	[.first = 0x829, .last = 0x82d],
+	[.first = 0x859, .last = 0x85b],
+	[.first = 0x8d4, .last = 0x902],
+	[.first = 0x93a, .last = 0x93a],
+	[.first = 0x93c, .last = 0x93c],
+	[.first = 0x941, .last = 0x948],
+	[.first = 0x94d, .last = 0x94d],
+	[.first = 0x951, .last = 0x957],
+	[.first = 0x962, .last = 0x963],
+	[.first = 0x981, .last = 0x981],
+	[.first = 0x9bc, .last = 0x9bc],
+	[.first = 0x9c1, .last = 0x9c4],
+	[.first = 0x9cd, .last = 0x9cd],
+	[.first = 0x9e2, .last = 0x9e3],
+	[.first = 0xa01, .last = 0xa02],
+	[.first = 0xa3c, .last = 0xa3c],
+	[.first = 0xa41, .last = 0xa42],
+	[.first = 0xa47, .last = 0xa48],
+	[.first = 0xa4b, .last = 0xa4d],
+	[.first = 0xa51, .last = 0xa51],
+	[.first = 0xa70, .last = 0xa71],
+	[.first = 0xa75, .last = 0xa75],
+	[.first = 0xa81, .last = 0xa82],
+	[.first = 0xabc, .last = 0xabc],
+	[.first = 0xac1, .last = 0xac5],
+	[.first = 0xac7, .last = 0xac8],
+	[.first = 0xacd, .last = 0xacd],
+	[.first = 0xae2, .last = 0xae3],
+	[.first = 0xafa, .last = 0xaff],
+	[.first = 0xb01, .last = 0xb01],
+	[.first = 0xb3c, .last = 0xb3c],
+	[.first = 0xb3f, .last = 0xb3f],
+	[.first = 0xb41, .last = 0xb44],
+	[.first = 0xb4d, .last = 0xb4d],
+	[.first = 0xb56, .last = 0xb56],
+	[.first = 0xb62, .last = 0xb63],
+	[.first = 0xb82, .last = 0xb82],
+	[.first = 0xbc0, .last = 0xbc0],
+	[.first = 0xbcd, .last = 0xbcd],
+	[.first = 0xc00, .last = 0xc00],
+	[.first = 0xc3e, .last = 0xc40],
+	[.first = 0xc46, .last = 0xc48],
+	[.first = 0xc4a, .last = 0xc4d],
+	[.first = 0xc55, .last = 0xc56],
+	[.first = 0xc62, .last = 0xc63],
+	[.first = 0xc81, .last = 0xc81],
+	[.first = 0xcbc, .last = 0xcbc],
+	[.first = 0xcbf, .last = 0xcbf],
+	[.first = 0xcc6, .last = 0xcc6],
+	[.first = 0xccc, .last = 0xccd],
+	[.first = 0xce2, .last = 0xce3],
+	[.first = 0xd00, .last = 0xd01],
+	[.first = 0xd3b, .last = 0xd3c],
+	[.first = 0xd41, .last = 0xd44],
+	[.first = 0xd4d, .last = 0xd4d],
+	[.first = 0xd62, .last = 0xd63],
+	[.first = 0xdca, .last = 0xdca],
+	[.first = 0xdd2, .last = 0xdd4],
+	[.first = 0xdd6, .last = 0xdd6],
+	[.first = 0xe31, .last = 0xe31],
+	[.first = 0xe34, .last = 0xe3a],
+	[.first = 0xe47, .last = 0xe4e],
+	[.first = 0xeb1, .last = 0xeb1],
+	[.first = 0xeb4, .last = 0xeb9],
+	[.first = 0xebb, .last = 0xebc],
+	[.first = 0xec8, .last = 0xecd],
+	[.first = 0xf18, .last = 0xf19],
+	[.first = 0xf35, .last = 0xf35],
+	[.first = 0xf37, .last = 0xf37],
+	[.first = 0xf39, .last = 0xf39],
+	[.first = 0xf71, .last = 0xf7e],
+	[.first = 0xf80, .last = 0xf84],
+	[.first = 0xf86, .last = 0xf87],
+	[.first = 0xf8d, .last = 0xf97],
+	[.first = 0xf99, .last = 0xfbc],
+	[.first = 0xfc6, .last = 0xfc6],
+	[.first = 0x102d, .last = 0x1030],
+	[.first = 0x1032, .last = 0x1037],
+	[.first = 0x1039, .last = 0x103a],
+	[.first = 0x103d, .last = 0x103e],
+	[.first = 0x1058, .last = 0x1059],
+	[.first = 0x105e, .last = 0x1060],
+	[.first = 0x1071, .last = 0x1074],
+	[.first = 0x1082, .last = 0x1082],
+	[.first = 0x1085, .last = 0x1086],
+	[.first = 0x108d, .last = 0x108d],
+	[.first = 0x109d, .last = 0x109d],
+	[.first = 0x135d, .last = 0x135f],
+	[.first = 0x1712, .last = 0x1714],
+	[.first = 0x1732, .last = 0x1734],
+	[.first = 0x1752, .last = 0x1753],
+	[.first = 0x1772, .last = 0x1773],
+	[.first = 0x17b4, .last = 0x17b5],
+	[.first = 0x17b7, .last = 0x17bd],
+	[.first = 0x17c6, .last = 0x17c6],
+	[.first = 0x17c9, .last = 0x17d3],
+	[.first = 0x17dd, .last = 0x17dd],
+	[.first = 0x180b, .last = 0x180e],
+	[.first = 0x1885, .last = 0x1886],
+	[.first = 0x18a9, .last = 0x18a9],
+	[.first = 0x1920, .last = 0x1922],
+	[.first = 0x1927, .last = 0x1928],
+	[.first = 0x1932, .last = 0x1932],
+	[.first = 0x1939, .last = 0x193b],
+	[.first = 0x1a17, .last = 0x1a18],
+	[.first = 0x1a1b, .last = 0x1a1b],
+	[.first = 0x1a56, .last = 0x1a56],
+	[.first = 0x1a58, .last = 0x1a5e],
+	[.first = 0x1a60, .last = 0x1a60],
+	[.first = 0x1a62, .last = 0x1a62],
+	[.first = 0x1a65, .last = 0x1a6c],
+	[.first = 0x1a73, .last = 0x1a7c],
+	[.first = 0x1a7f, .last = 0x1a7f],
+	[.first = 0x1ab0, .last = 0x1abe],
+	[.first = 0x1b00, .last = 0x1b03],
+	[.first = 0x1b34, .last = 0x1b34],
+	[.first = 0x1b36, .last = 0x1b3a],
+	[.first = 0x1b3c, .last = 0x1b3c],
+	[.first = 0x1b42, .last = 0x1b42],
+	[.first = 0x1b6b, .last = 0x1b73],
+	[.first = 0x1b80, .last = 0x1b81],
+	[.first = 0x1ba2, .last = 0x1ba5],
+	[.first = 0x1ba8, .last = 0x1ba9],
+	[.first = 0x1bab, .last = 0x1bad],
+	[.first = 0x1be6, .last = 0x1be6],
+	[.first = 0x1be8, .last = 0x1be9],
+	[.first = 0x1bed, .last = 0x1bed],
+	[.first = 0x1bef, .last = 0x1bf1],
+	[.first = 0x1c2c, .last = 0x1c33],
+	[.first = 0x1c36, .last = 0x1c37],
+	[.first = 0x1cd0, .last = 0x1cd2],
+	[.first = 0x1cd4, .last = 0x1ce0],
+	[.first = 0x1ce2, .last = 0x1ce8],
+	[.first = 0x1ced, .last = 0x1ced],
+	[.first = 0x1cf4, .last = 0x1cf4],
+	[.first = 0x1cf8, .last = 0x1cf9],
+	[.first = 0x1dc0, .last = 0x1df9],
+	[.first = 0x1dfb, .last = 0x1dff],
+	[.first = 0x200b, .last = 0x200f],
+	[.first = 0x202a, .last = 0x202e],
+	[.first = 0x2060, .last = 0x2064],
+	[.first = 0x2066, .last = 0x206f],
+	[.first = 0x20d0, .last = 0x20f0],
+	[.first = 0x2cef, .last = 0x2cf1],
+	[.first = 0x2d7f, .last = 0x2d7f],
+	[.first = 0x2de0, .last = 0x2dff],
+	[.first = 0x302a, .last = 0x302d],
+	[.first = 0x3099, .last = 0x309a],
+	[.first = 0xa66f, .last = 0xa672],
+	[.first = 0xa674, .last = 0xa67d],
+	[.first = 0xa69e, .last = 0xa69f],
+	[.first = 0xa6f0, .last = 0xa6f1],
+	[.first = 0xa802, .last = 0xa802],
+	[.first = 0xa806, .last = 0xa806],
+	[.first = 0xa80b, .last = 0xa80b],
+	[.first = 0xa825, .last = 0xa826],
+	[.first = 0xa8c4, .last = 0xa8c5],
+	[.first = 0xa8e0, .last = 0xa8f1],
+	[.first = 0xa926, .last = 0xa92d],
+	[.first = 0xa947, .last = 0xa951],
+	[.first = 0xa980, .last = 0xa982],
+	[.first = 0xa9b3, .last = 0xa9b3],
+	[.first = 0xa9b6, .last = 0xa9b9],
+	[.first = 0xa9bc, .last = 0xa9bc],
+	[.first = 0xa9e5, .last = 0xa9e5],
+	[.first = 0xaa29, .last = 0xaa2e],
+	[.first = 0xaa31, .last = 0xaa32],
+	[.first = 0xaa35, .last = 0xaa36],
+	[.first = 0xaa43, .last = 0xaa43],
+	[.first = 0xaa4c, .last = 0xaa4c],
+	[.first = 0xaa7c, .last = 0xaa7c],
+	[.first = 0xaab0, .last = 0xaab0],
+	[.first = 0xaab2, .last = 0xaab4],
+	[.first = 0xaab7, .last = 0xaab8],
+	[.first = 0xaabe, .last = 0xaabf],
+	[.first = 0xaac1, .last = 0xaac1],
+	[.first = 0xaaec, .last = 0xaaed],
+	[.first = 0xaaf6, .last = 0xaaf6],
+	[.first = 0xabe5, .last = 0xabe5],
+	[.first = 0xabe8, .last = 0xabe8],
+	[.first = 0xabed, .last = 0xabed],
+	[.first = 0xfb1e, .last = 0xfb1e],
+	[.first = 0xfe00, .last = 0xfe0f],
+	[.first = 0xfe20, .last = 0xfe2f],
+	[.first = 0xfeff, .last = 0xfeff],
+	[.first = 0xfff9, .last = 0xfffb],
+	[.first = 0x101fd, .last = 0x101fd],
+	[.first = 0x102e0, .last = 0x102e0],
+	[.first = 0x10376, .last = 0x1037a],
+	[.first = 0x10a01, .last = 0x10a03],
+	[.first = 0x10a05, .last = 0x10a06],
+	[.first = 0x10a0c, .last = 0x10a0f],
+	[.first = 0x10a38, .last = 0x10a3a],
+	[.first = 0x10a3f, .last = 0x10a3f],
+	[.first = 0x10ae5, .last = 0x10ae6],
+	[.first = 0x11001, .last = 0x11001],
+	[.first = 0x11038, .last = 0x11046],
+	[.first = 0x1107f, .last = 0x11081],
+	[.first = 0x110b3, .last = 0x110b6],
+	[.first = 0x110b9, .last = 0x110ba],
+	[.first = 0x110bd, .last = 0x110bd],
+	[.first = 0x11100, .last = 0x11102],
+	[.first = 0x11127, .last = 0x1112b],
+	[.first = 0x1112d, .last = 0x11134],
+	[.first = 0x11173, .last = 0x11173],
+	[.first = 0x11180, .last = 0x11181],
+	[.first = 0x111b6, .last = 0x111be],
+	[.first = 0x111ca, .last = 0x111cc],
+	[.first = 0x1122f, .last = 0x11231],
+	[.first = 0x11234, .last = 0x11234],
+	[.first = 0x11236, .last = 0x11237],
+	[.first = 0x1123e, .last = 0x1123e],
+	[.first = 0x112df, .last = 0x112df],
+	[.first = 0x112e3, .last = 0x112ea],
+	[.first = 0x11300, .last = 0x11301],
+	[.first = 0x1133c, .last = 0x1133c],
+	[.first = 0x11340, .last = 0x11340],
+	[.first = 0x11366, .last = 0x1136c],
+	[.first = 0x11370, .last = 0x11374],
+	[.first = 0x11438, .last = 0x1143f],
+	[.first = 0x11442, .last = 0x11444],
+	[.first = 0x11446, .last = 0x11446],
+	[.first = 0x114b3, .last = 0x114b8],
+	[.first = 0x114ba, .last = 0x114ba],
+	[.first = 0x114bf, .last = 0x114c0],
+	[.first = 0x114c2, .last = 0x114c3],
+	[.first = 0x115b2, .last = 0x115b5],
+	[.first = 0x115bc, .last = 0x115bd],
+	[.first = 0x115bf, .last = 0x115c0],
+	[.first = 0x115dc, .last = 0x115dd],
+	[.first = 0x11633, .last = 0x1163a],
+	[.first = 0x1163d, .last = 0x1163d],
+	[.first = 0x1163f, .last = 0x11640],
+	[.first = 0x116ab, .last = 0x116ab],
+	[.first = 0x116ad, .last = 0x116ad],
+	[.first = 0x116b0, .last = 0x116b5],
+	[.first = 0x116b7, .last = 0x116b7],
+	[.first = 0x1171d, .last = 0x1171f],
+	[.first = 0x11722, .last = 0x11725],
+	[.first = 0x11727, .last = 0x1172b],
+	[.first = 0x11a01, .last = 0x11a06],
+	[.first = 0x11a09, .last = 0x11a0a],
+	[.first = 0x11a33, .last = 0x11a38],
+	[.first = 0x11a3b, .last = 0x11a3e],
+	[.first = 0x11a47, .last = 0x11a47],
+	[.first = 0x11a51, .last = 0x11a56],
+	[.first = 0x11a59, .last = 0x11a5b],
+	[.first = 0x11a8a, .last = 0x11a96],
+	[.first = 0x11a98, .last = 0x11a99],
+	[.first = 0x11c30, .last = 0x11c36],
+	[.first = 0x11c38, .last = 0x11c3d],
+	[.first = 0x11c3f, .last = 0x11c3f],
+	[.first = 0x11c92, .last = 0x11ca7],
+	[.first = 0x11caa, .last = 0x11cb0],
+	[.first = 0x11cb2, .last = 0x11cb3],
+	[.first = 0x11cb5, .last = 0x11cb6],
+	[.first = 0x11d31, .last = 0x11d36],
+	[.first = 0x11d3a, .last = 0x11d3a],
+	[.first = 0x11d3c, .last = 0x11d3d],
+	[.first = 0x11d3f, .last = 0x11d45],
+	[.first = 0x11d47, .last = 0x11d47],
+	[.first = 0x16af0, .last = 0x16af4],
+	[.first = 0x16b30, .last = 0x16b36],
+	[.first = 0x16f8f, .last = 0x16f92],
+	[.first = 0x1bc9d, .last = 0x1bc9e],
+	[.first = 0x1bca0, .last = 0x1bca3],
+	[.first = 0x1d167, .last = 0x1d169],
+	[.first = 0x1d173, .last = 0x1d182],
+	[.first = 0x1d185, .last = 0x1d18b],
+	[.first = 0x1d1aa, .last = 0x1d1ad],
+	[.first = 0x1d242, .last = 0x1d244],
+	[.first = 0x1da00, .last = 0x1da36],
+	[.first = 0x1da3b, .last = 0x1da6c],
+	[.first = 0x1da75, .last = 0x1da75],
+	[.first = 0x1da84, .last = 0x1da84],
+	[.first = 0x1da9b, .last = 0x1da9f],
+	[.first = 0x1daa1, .last = 0x1daaf],
+	[.first = 0x1e000, .last = 0x1e006],
+	[.first = 0x1e008, .last = 0x1e018],
+	[.first = 0x1e01b, .last = 0x1e021],
+	[.first = 0x1e023, .last = 0x1e024],
+	[.first = 0x1e026, .last = 0x1e02a],
+	[.first = 0x1e8d0, .last = 0x1e8d6],
+	[.first = 0x1e944, .last = 0x1e94a],
+	[.first = 0xe0001, .last = 0xe0001],
+	[.first = 0xe0020, .last = 0xe007f],
+	[.first = 0xe0100, .last = 0xe01ef],
+][:]
+
+/*
+ * Generated by
+ *
+ * uniset eaw:W,F - cat:Me,Mn,Cf - 00ad | \
+ *      sed -r 's/^([^.]+)[.][.]([^.]+)$/\t[.first = 0x\1, .last = 0x\2],/' | \
+ *      sed -r 's/^([^.]+)$/\t[.first = 0x\1, .last = 0x\1],/'
+ */
+const width2 : interval[:] = [
+	[.first = 0x1100, .last = 0x115f],
+	[.first = 0x231a, .last = 0x231b],
+	[.first = 0x2329, .last = 0x232a],
+	[.first = 0x23e9, .last = 0x23ec],
+	[.first = 0x23f0, .last = 0x23f0],
+	[.first = 0x23f3, .last = 0x23f3],
+	[.first = 0x25fd, .last = 0x25fe],
+	[.first = 0x2614, .last = 0x2615],
+	[.first = 0x2648, .last = 0x2653],
+	[.first = 0x267f, .last = 0x267f],
+	[.first = 0x2693, .last = 0x2693],
+	[.first = 0x26a1, .last = 0x26a1],
+	[.first = 0x26aa, .last = 0x26ab],
+	[.first = 0x26bd, .last = 0x26be],
+	[.first = 0x26c4, .last = 0x26c5],
+	[.first = 0x26ce, .last = 0x26ce],
+	[.first = 0x26d4, .last = 0x26d4],
+	[.first = 0x26ea, .last = 0x26ea],
+	[.first = 0x26f2, .last = 0x26f3],
+	[.first = 0x26f5, .last = 0x26f5],
+	[.first = 0x26fa, .last = 0x26fa],
+	[.first = 0x26fd, .last = 0x26fd],
+	[.first = 0x2705, .last = 0x2705],
+	[.first = 0x270a, .last = 0x270b],
+	[.first = 0x2728, .last = 0x2728],
+	[.first = 0x274c, .last = 0x274c],
+	[.first = 0x274e, .last = 0x274e],
+	[.first = 0x2753, .last = 0x2755],
+	[.first = 0x2757, .last = 0x2757],
+	[.first = 0x2795, .last = 0x2797],
+	[.first = 0x27b0, .last = 0x27b0],
+	[.first = 0x27bf, .last = 0x27bf],
+	[.first = 0x2b1b, .last = 0x2b1c],
+	[.first = 0x2b50, .last = 0x2b50],
+	[.first = 0x2b55, .last = 0x2b55],
+	[.first = 0x2e80, .last = 0x2e99],
+	[.first = 0x2e9b, .last = 0x2ef3],
+	[.first = 0x2f00, .last = 0x2fd5],
+	[.first = 0x2ff0, .last = 0x2ffb],
+	[.first = 0x3000, .last = 0x3029],
+	[.first = 0x302e, .last = 0x303e],
+	[.first = 0x3041, .last = 0x3096],
+	[.first = 0x309b, .last = 0x30ff],
+	[.first = 0x3105, .last = 0x312e],
+	[.first = 0x3131, .last = 0x318e],
+	[.first = 0x3190, .last = 0x31ba],
+	[.first = 0x31c0, .last = 0x31e3],
+	[.first = 0x31f0, .last = 0x321e],
+	[.first = 0x3220, .last = 0x3247],
+	[.first = 0x3250, .last = 0x32fe],
+	[.first = 0x3300, .last = 0x4dbf],
+	[.first = 0x4e00, .last = 0xa48c],
+	[.first = 0xa490, .last = 0xa4c6],
+	[.first = 0xa960, .last = 0xa97c],
+	[.first = 0xac00, .last = 0xd7a3],
+	[.first = 0xf900, .last = 0xfaff],
+	[.first = 0xfe10, .last = 0xfe19],
+	[.first = 0xfe30, .last = 0xfe52],
+	[.first = 0xfe54, .last = 0xfe66],
+	[.first = 0xfe68, .last = 0xfe6b],
+	[.first = 0xff01, .last = 0xff60],
+	[.first = 0xffe0, .last = 0xffe6],
+	[.first = 0x16fe0, .last = 0x16fe1],
+	[.first = 0x17000, .last = 0x187ec],
+	[.first = 0x18800, .last = 0x18af2],
+	[.first = 0x1b000, .last = 0x1b11e],
+	[.first = 0x1b170, .last = 0x1b2fb],
+	[.first = 0x1f004, .last = 0x1f004],
+	[.first = 0x1f0cf, .last = 0x1f0cf],
+	[.first = 0x1f18e, .last = 0x1f18e],
+	[.first = 0x1f191, .last = 0x1f19a],
+	[.first = 0x1f200, .last = 0x1f202],
+	[.first = 0x1f210, .last = 0x1f23b],
+	[.first = 0x1f240, .last = 0x1f248],
+	[.first = 0x1f250, .last = 0x1f251],
+	[.first = 0x1f260, .last = 0x1f265],
+	[.first = 0x1f300, .last = 0x1f320],
+	[.first = 0x1f32d, .last = 0x1f335],
+	[.first = 0x1f337, .last = 0x1f37c],
+	[.first = 0x1f37e, .last = 0x1f393],
+	[.first = 0x1f3a0, .last = 0x1f3ca],
+	[.first = 0x1f3cf, .last = 0x1f3d3],
+	[.first = 0x1f3e0, .last = 0x1f3f0],
+	[.first = 0x1f3f4, .last = 0x1f3f4],
+	[.first = 0x1f3f8, .last = 0x1f43e],
+	[.first = 0x1f440, .last = 0x1f440],
+	[.first = 0x1f442, .last = 0x1f4fc],
+	[.first = 0x1f4ff, .last = 0x1f53d],
+	[.first = 0x1f54b, .last = 0x1f54e],
+	[.first = 0x1f550, .last = 0x1f567],
+	[.first = 0x1f57a, .last = 0x1f57a],
+	[.first = 0x1f595, .last = 0x1f596],
+	[.first = 0x1f5a4, .last = 0x1f5a4],
+	[.first = 0x1f5fb, .last = 0x1f64f],
+	[.first = 0x1f680, .last = 0x1f6c5],
+	[.first = 0x1f6cc, .last = 0x1f6cc],
+	[.first = 0x1f6d0, .last = 0x1f6d2],
+	[.first = 0x1f6eb, .last = 0x1f6ec],
+	[.first = 0x1f6f4, .last = 0x1f6f8],
+	[.first = 0x1f910, .last = 0x1f93e],
+	[.first = 0x1f940, .last = 0x1f94c],
+	[.first = 0x1f950, .last = 0x1f96b],
+	[.first = 0x1f980, .last = 0x1f997],
+	[.first = 0x1f9c0, .last = 0x1f9c0],
+	[.first = 0x1f9d0, .last = 0x1f9e6],
+	[.first = 0x20000, .last = 0x2fffd],
+	[.first = 0x30000, .last = 0x3fffd],
+][:]
diff --git a/lib/std/fmt.myr b/lib/std/fmt.myr
index 08b814db..5c6f9043 100644
--- a/lib/std/fmt.myr
+++ b/lib/std/fmt.myr
@@ -1,4 +1,5 @@
 use "alloc"
+use "cellwidth"
 use "consts"
 use "chartype"
 use "die"
@@ -553,7 +554,7 @@ const strfmt = {sb, str, params
 			;;
 		;;
 	else
-		for i = 0; i < w - graphemewidth(str); i++
+		for i = 0; i < w - strcellwidth(str); i++
 			sbputc(sb, p)
 		;;
 		sbputs(sb, str)
@@ -564,14 +565,6 @@ const isprint = {b
 	-> b >= (' ' : byte) && b < ('~' : byte)
 }
 
-/*
-Hah. like we're going to put in the work to actually
-count graphemes.
-*/
-const graphemewidth = {str
-	-> str.len
-}
-
 const digitchars = [
 	'0','1','2','3','4',
 	'5','6','7','8','9',
-- 
2.14.3


Follow-Ups:
Re: [PATCH] implement graphemewidthOri Bernstein <ori@xxxxxxxxxxxxxx>