ref: cba8dceb7246552cc446b60cef4d5a3069907faf
dir: /lib/std/striter.myr/
use "die" use "types" use "utf" use "strfind" use "option" use "chartype" use "slpush" use "alloc" pkg std = type chariter = struct rest : byte[:] ;; type graphemeiter = struct rest : byte[:] ;; type charoffiter = struct str : byte[:] idx : size ;; type splititer = struct rest : byte[:] split : byte[:] ;; type tokiter = struct str : byte[:] idx : size ;; impl iterable chariter -> char impl iterable graphemeiter -> char[:] impl iterable charoffiter -> (char, size) impl iterable splititer -> byte[:] impl iterable tokiter -> byte[:] const bychar : (str : byte[:] -> chariter) const bygrapheme : (str : byte[:] -> graphemeiter) const bycharoff : (str : byte[:] -> charoffiter) const bysplit : (str : byte[:], split : byte[:] -> splititer) const bytok : (str : byte[:] -> tokiter) ;; /* * Iterate through a string char by char, * decoding the utf8 bytes into a single * codepoint. */ impl iterable chariter -> char = __iternext__ = {ci, c if ci.rest.len == 0 -> false ;; (c#, ci.rest) = charstep(ci.rest) -> true } __iterfin__ = {ci, c } ;; const bychar = {str -> [.rest = str] } /* * Iterate through a string grapheme by grapheme, * returning a slice of characters composing the * grapheme. */ impl iterable graphemeiter -> char[:] = __iternext__ = {ci, g : char[:]# var gb, gc : char[:] if ci.rest.len == 0 -> false ;; (gb, ci.rest) = graphemestep(ci.rest) /* * Graphemestep returns bytes, but we * want to a slice of chars. */ gc = [][:] for c : std.bychar(gb) std.slpush(&gc, c) ;; g# = gc -> true } __iterfin__ = {ci, g std.slfree(g#) } ;; const bygrapheme = {str -> [.rest = str] } /* * Iterates through a string character by * character, similar to chariter, but returns * the offset into the string of the codepoint. * For example, * "ὐbὐc * would return the sequence: * (ὐ, 0), (b, 3), (ὐ, 4), (c, 7) */ impl iterable charoffiter -> (char, size) = __iternext__ = {ci, cv var c if ci.idx == ci.str.len -> false ;; c = std.decode(ci.str[ci.idx:]) ci.idx += std.charlen(c) cv# = (c, ci.idx) -> true } __iterfin__ = {ci, c } ;; const bycharoff = {s -> [.str=s, .idx=0] } /* * Iterates through the splits of a string by a * delimiter, skippin gthe delimiter. */ impl iterable splititer -> byte[:] = __iternext__ = {si, sp match std.strfind(si.rest, si.split) | `Some off: sp# = si.rest[:off] si.rest = si.rest[off + si.split.len:] -> true | `None: if si.rest.len > 0 sp# = si.rest si.rest = "" -> true ;; ;; -> false } __iterfin__ = {ci, c } ;; const bysplit = {str, split -> [.rest = str, .split = split] } /* * Tokenizes a string by spaces, iterating over * the results. */ impl iterable tokiter -> byte[:] = __iternext__ = {it, sp var s, lo, hi, c s = it.str lo = it.idx while lo < s.len c = std.decode(s[lo:]) if !isspace(c) break ;; lo += charlen(c) ;; hi = lo while hi < s.len c = std.decode(s[hi:]) if isspace(c) break ;; hi += charlen(c) ;; it.idx = hi sp# = s[lo:hi] -> hi > lo } __iterfin__ = {ci, c } ;; const bytok = {str -> [.str = str, .idx = 0] }