Eigenstate: myrddin-dev mailing list

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Fix use of width uninitialized in graphemestep


This would result in infinite loops in pretty much any caller. Also
fix handling of \r, \n, \t, which need to form their own graphemes,
but which aren't nicely marked as positive width by cellwidth.
---
If my assumptions are correct, the tests correctly tested the path
which was exercised, but since the test code was so simple, width
never started out non-zero. If that's not the case, I'd be interested
in seeing the correct explanation.
---
 lib/std/test/utf.myr | 15 ++++++++++++---
 lib/std/utf.myr      | 12 +++++++++---
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/lib/std/test/utf.myr b/lib/std/test/utf.myr
index 8778b05c..e4d0f47b 100644
--- a/lib/std/test/utf.myr
+++ b/lib/std/test/utf.myr
@@ -30,10 +30,13 @@ const main = {
 		"wrong width of Cuneiform")
 
 	/* graphemestep() */
-	var s = "a史cЯx̀̀̀̀̀yz̉"
+	var s = "̀a史c\tЯx̀̀̀̀̀\nz̉"
 	var sub, rest
 
 	(sub, rest) = std.graphemestep(s)
+	std.assert(std.streq(sub, "̀"), "didn't get U+0300 as next grapheme")
+
+	(sub, rest) = std.graphemestep(rest)
 	std.assert(std.streq(sub, "a"), "didn't get \"a\" as next grapheme")
 
 	(sub, rest) = std.graphemestep(rest)
@@ -42,6 +45,9 @@ const main = {
 	(sub, rest) = std.graphemestep(rest)
 	std.assert(std.streq(sub, "c"), "didn't get \"c\" as next grapheme")
 
+	(sub, rest) = std.graphemestep(rest)
+	std.assert(std.streq(sub, "\t"), "didn't get \"\\t\" as next grapheme")
+
 	(sub, rest) = std.graphemestep(rest)
 	std.assert(std.streq(sub, "Я"), "didn't get \"Я\" as next grapheme")
 
@@ -49,7 +55,7 @@ const main = {
 	std.assert(std.streq(sub, "x̀̀̀̀̀"), "didn't get \"x̀̀̀̀̀\" as next grapheme")
 
 	(sub, rest) = std.graphemestep(rest)
-	std.assert(std.streq(sub, "y"), "didn't get \"y\" as next grapheme")
+	std.assert(std.streq(sub, "\n"), "didn't get \"\\n\" as next grapheme")
 
 	(sub, rest) = std.graphemestep(rest)
 	std.assert(std.streq(sub, "z̉"), "didn't get \"z̉\" as next grapheme")
@@ -59,9 +65,12 @@ const main = {
 
 
 	/* with excessive combiners */
-	s = "c̸̶̡̡̗̣͕̪͖ͯ͑̈̄̿͊ͣ̈́͝ḧ̵̸̛̥͚̭̣͈͖̼͈͓͓̫͍́̓ͪͫ̋͘͡a̢̩̱̠̘̹̤̯͚̦̰̼̯̲̞͆͂̿ͬ̂͋͒̈ͅͅo̷̷̶̥͖̼̮̳̗͚ͦ̉̆̅̃̍ͤ̆͑ͣ̽́̚s̓̍̍̄͏̖̞̟̱́͡͡͝"
+	s = "\tc̸̶̡̡̗̣͕̪͖ͯ͑̈̄̿͊ͣ̈́͝ḧ̵̸̛̥͚̭̣͈͖̼͈͓͓̫͍́̓ͪͫ̋͘͡a̢̩̱̠̘̹̤̯͚̦̰̼̯̲̞͆͂̿ͬ̂͋͒̈ͅͅo̷̷̶̥͖̼̮̳̗͚ͦ̉̆̅̃̍ͤ̆͑ͣ̽́̚s̓̍̍̄͏̖̞̟̱́͡͡͝"
 
 	(sub, rest) = std.graphemestep(s)
+	std.assert(std.streq(sub, "\t"), "didn't get \"\\t\" as next grapheme")
+
+	(sub, rest) = std.graphemestep(rest)
 	std.assert(std.streq(sub, "c̸̶̡̡̗̣͕̪͖ͯ͑̈̄̿͊ͣ̈́͝"), "didn't get \"c̸̶̡̡̗̣͕̪͖ͯ͑̈̄̿͊ͣ̈́͝\" as next grapheme")
 
 	(sub, rest) = std.graphemestep(rest)
diff --git a/lib/std/utf.myr b/lib/std/utf.myr
index 9e297b33..6536791c 100644
--- a/lib/std/utf.myr
+++ b/lib/std/utf.myr
@@ -68,14 +68,20 @@ const graphemestep = {str
 	var len = 0
 	var rest = str
 	var c
-	var cn
-	var width
+	var cn = 0
+	var width = 0
 
 	while rest.len > 0
 		(c, rest) = charstep(rest)
 		cn = cellwidth(c)
 
-		if (cn > 0 || c == Badchar) && width > 0
+		if (c == '\r' || c == '\n' || c == '\t')
+			if len == 0
+				-> (str[:1], str[1:])
+			else
+				-> (str[:len], str[len:])
+			;;
+		elif (cn > 0 || c == Badchar) && len > 0
 			-> (str[:len], str[len:])
 		elif c == Badchar
 			-> (str[:1], str[1:])
-- 
2.15.0


Follow-Ups:
Re: [PATCH] Fix use of width uninitialized in graphemestepOri Bernstein <ori@xxxxxxxxxxxxxx>
References:
Re: [PATCH 2/2 v2] Implement graphemestepOri Bernstein <ori@xxxxxxxxxxxxxx>