ref: 76fc43acecea75b362d7e56557a0466cbda94572
parent: 4e018273cd06542a0605cebd840dbcb8b14ad906
author: S. Gilles <sgilles@math.umd.edu>
date: Mon Nov 6 19:16:11 EST 2017
Fix use of width uninitialized in graphemestep This would result in infinite loops in pretty much any caller. Also fix handling of \r, \n, \t, which need to form their own graphemes, but which aren't nicely marked as positive width by cellwidth.
--- a/lib/std/test/utf.myr
+++ b/lib/std/test/utf.myr
@@ -30,10 +30,13 @@
"wrong width of Cuneiform")
/* graphemestep() */
- var s = "a史cЯx̀̀̀̀̀yz̉"
+ var s = "̀a史c\tЯx̀̀̀̀̀\nz̉"
var sub, rest
(sub, rest) = std.graphemestep(s)
+ std.assert(std.streq(sub, "̀"), "didn't get U+0300 as next grapheme")
+
+ (sub, rest) = std.graphemestep(rest)
std.assert(std.streq(sub, "a"), "didn't get \"a\" as next grapheme")
(sub, rest) = std.graphemestep(rest)
@@ -43,6 +46,9 @@
std.assert(std.streq(sub, "c"), "didn't get \"c\" as next grapheme")
(sub, rest) = std.graphemestep(rest)
+ std.assert(std.streq(sub, "\t"), "didn't get \"\\t\" as next grapheme")
+
+ (sub, rest) = std.graphemestep(rest)
std.assert(std.streq(sub, "Я"), "didn't get \"Я\" as next grapheme")
(sub, rest) = std.graphemestep(rest)
@@ -49,7 +55,7 @@
std.assert(std.streq(sub, "x̀̀̀̀̀"), "didn't get \"x̀̀̀̀̀\" as next grapheme")
(sub, rest) = std.graphemestep(rest)
- std.assert(std.streq(sub, "y"), "didn't get \"y\" as next grapheme")
+ std.assert(std.streq(sub, "\n"), "didn't get \"\\n\" as next grapheme")
(sub, rest) = std.graphemestep(rest)
std.assert(std.streq(sub, "z̉"), "didn't get \"z̉\" as next grapheme")
@@ -59,9 +65,12 @@
/* with excessive combiners */
- s = "c̸̶̡̡̗̣͕̪͖ͯ͑̈̄̿͊ͣ̈́͝ḧ̵̸̛̥͚̭̣͈͖̼͈͓͓̫͍́̓ͪͫ̋͘͡a̢̩̱̠̘̹̤̯͚̦̰̼̯̲̞͆͂̿ͬ̂͋͒̈ͅͅo̷̷̶̥͖̼̮̳̗͚ͦ̉̆̅̃̍ͤ̆͑ͣ̽́̚s̓̍̍̄͏̖̞̟̱́͡͡͝"
+ s = "\tc̸̶̡̡̗̣͕̪͖ͯ͑̈̄̿͊ͣ̈́͝ḧ̵̸̛̥͚̭̣͈͖̼͈͓͓̫͍́̓ͪͫ̋͘͡a̢̩̱̠̘̹̤̯͚̦̰̼̯̲̞͆͂̿ͬ̂͋͒̈ͅͅo̷̷̶̥͖̼̮̳̗͚ͦ̉̆̅̃̍ͤ̆͑ͣ̽́̚s̓̍̍̄͏̖̞̟̱́͡͡͝"
(sub, rest) = std.graphemestep(s)
+ std.assert(std.streq(sub, "\t"), "didn't get \"\\t\" as next grapheme")
+
+ (sub, rest) = std.graphemestep(rest)
std.assert(std.streq(sub, "c̸̶̡̡̗̣͕̪͖ͯ͑̈̄̿͊ͣ̈́͝"), "didn't get \"c̸̶̡̡̗̣͕̪͖ͯ͑̈̄̿͊ͣ̈́͝\" as next grapheme")
(sub, rest) = std.graphemestep(rest)
--- a/lib/std/utf.myr
+++ b/lib/std/utf.myr
@@ -68,14 +68,20 @@
var len = 0
var rest = str
var c
- var cn
- var width
+ var cn = 0
+ var width = 0
while rest.len > 0
(c, rest) = charstep(rest)
cn = cellwidth(c)
- if (cn > 0 || c == Badchar) && width > 0
+ if (c == '\r' || c == '\n' || c == '\t')
+ if len == 0
+ -> (str[:1], str[1:])
+ else
+ -> (str[:len], str[len:])
+ ;;
+ elif (cn > 0 || c == Badchar) && len > 0
-> (str[:len], str[len:])
elif c == Badchar
-> (str[:1], str[1:])