ref: da5bfc6d15a50130afdcd37ed9af737a81724506
dir: /mparse/tok.myr/
use std use "types.use" use "tokdefs.use" use "util.use" pkg parse = type tokstream = struct next : std.option((srcloc, tok)) rest : byte[:] data : byte[:] loc : srcloc ;; const tokinit : (path : byte[:] -> tokstream#) const tokinitf : (fd : std.fd, path : byte[:] -> tokstream#) const tokclose : (ts : tokstream# -> void) const toknext : (ts : tokstream# -> (srcloc, tok)) const tokpeek : (ts : tokstream# -> (srcloc, tok)) ;; const Eof = std.Badchar const tokinit = {path match std.slurp(path) | `std.Ok data: -> mkparser(path, data) | `std.Fail e: std.fatal("could not read file {}: {}\n", path, e) ;; } const tokinitf = {fd, name match std.fslurp(fd) | `std.Ok data: -> mkparser(name, data) | `std.Fail e: std.fatal("could not read file {}: {}\n", fd, e) ;; } const mkparser = {name, data -> std.mk([ .loc = [.file=name, .line=1, .col=1], .next=`std.None, .rest=data, .data=data, ]) } const tokclose = {ts std.slfree(ts.data) std.free(ts) } const toknext = {ts var t match ts.next | `std.Some tok: ts.next = `std.None -> tok | `std.None: t = tokread(ts) -> t ;; } const tokpeek = {ts var tok match ts.next | `std.Some t: -> t | `std.None: tok = tokread(ts) ts.next = `std.Some tok -> tok ;; } const tokread = {ts var c, loc skipspace(ts) loc = ts.loc c = peekc(ts) if ts.rest.len == 0 -> (loc, `Teof) elif c == '\n' takec(ts) ts.loc.line++ ts.loc.col = 1 -> (loc, `Tendln) elif c == '\'' -> (loc, chrlit(ts)) elif c == '"' -> (loc, strlit(ts)) elif c == '@' -> (loc, typaram(ts)) elif std.isdigit(c) -> (loc, numlit(ts)) elif isident(c) -> (loc, kwident(ts)) else -> (loc, oper(ts)) ;; } const skipspace = {ts var ignorenl ignorenl = false while true match peekc(ts) | '\n': if ignorenl takec(ts) ts.loc.line++ ts.loc.col = 1 else break ;; | '\\': ignorenl = true takec(ts) | '/': match npeekc(ts, 1) | '/': skipto(ts, '\n') | '*': skipcomment(ts) | _: break ;; | c: if std.isspace(c) takec(ts) else break ;; ;; ;; } const skipcomment = {ts var depth, startln depth = 0 startln = ts.loc.line while true match takec(ts) | '/': if matchc(ts, '*') depth++ ;; | '*': if matchc(ts, '/') depth-- ;; | '\n': ts.loc.line++ ts.loc.col = 1 | Eof: err(ts.loc, "file ended in comment starting on line {}\n", startln) | _: ;; if depth == 0 break ;; ;; } const chrlit = {ts var c, close takec(ts) c = takec(ts) if c == '\\' c = unescape(ts) ;; close = takec(ts) if close != '\'' err(ts.loc, "expected closing ' in character literal, got {}\n", close) ;; -> `Tchrlit c } const strlit = {ts var sb takec(ts) sb = std.mksb() while true match takec(ts) | Eof: err(ts.loc, "unexpected EOF within string literal\n") | '\n': err(ts.loc, "unexpected \\n within string literal\n") | '"': break | '\\': std.sbputc(sb, unescape(ts)) | c: std.sbputc(sb, c) ;; ;; -> `Tstrlit std.sbfin(sb) } const unescape = {ts var c, c1, c2 c = takec(ts) /* we've already seen the '\' */ match c | 'n': -> '\n' | 'r': -> '\r' | 't': -> '\t' | 'b': -> '\b' | '"': -> '\"' | '\'': -> '\'' | 'v': -> '\v' | '\\': -> '\\' | '0': -> '\0' | 'u': -> utfesc(ts); | 'x': c1 = takec(ts) if !std.isxdigit(c1) err(ts.loc, "expected hex digit, got {}\n", c1) ;; c2 = takec(ts) if !std.isxdigit(c2) err(ts.loc, "expected hex digit, got {}\n", c2) ;; -> 16*std.charval(c1, 16) + std.charval(c2, 16) c2 = takec(ts) | esc: err(ts.loc, "unknown escape code \\{}\n", esc) ;; } const utfesc = {ts var c, v if takec(ts) != '{' err(ts.loc, "\\u escape sequence without initial '{'\n") ;; v = 0 c = std.Badchar while true c = takec(ts) if std.isxdigit(c) v *= 16 v += std.charval(c, 16) else break ;; if v > 0x10FFFF err(ts.loc, "invalid codepoint in \\u escape sequence\n") ;; ;; if c != '}' err(ts.loc, "\\u escape sequence without closing '{'\n") ;; -> v } const typaram = {ts takec(ts) match kwident(ts) | `Tident id: -> `Ttyparam id | kw: err(ts.loc, "'{}' used as type parameter\n", kw) ;; } const numlit = {ts var t std.put("parsing number: {}\n", ts.rest[:10]) if matchc(ts, '0') if matchc(ts, 'x') t = number(ts, 16) elif matchc(ts, 'b') t = number(ts, 2) elif matchc(ts, 'o') t = number(ts, 8) else t = number(ts, 10) ;; else t = number(ts, 10) ;; -> t } /* only deals with the body of the number. if we reach this code, then it's guaranteed that we already have a numerical value. */ const number = {ts, base var buf, nbuf var isfloat, issigned var v, bits buf = ts.rest nbuf = 0 isfloat = false for var c = peekc(ts); std.isxdigit(c) || c == '.' || c == '_'; c = peekc(ts) takec(ts) if c == '_' continue elif c == '.' isfloat = true else v = std.charval(c, base) if v < 0 err(ts.loc, "digit {} out of range of base {}\n", c, base) ;; ;; nbuf++ ;; if isfloat if base != 10 err(ts.loc, "floats must be in base 10\n") ;; std.fatal("unable to parse floats: fuck me\n") /* -> `Tfltlit std.flt64parse(buf[:n]) */ else issigned = true if peekc(ts) == 'u' takec(ts) issigned = false ;; match peekc(ts) | 'l': bits = 64 | 'i': bits = 32 | 's': bits = 16 | 'b': bits = 8 | _: bits = 0 ;; v = std.get(std.intparsebase(buf[:nbuf], base)) /* guaranteed to be ok */ -> `Tintlit (v, bits, issigned) ;; } const kwident = {ts match identstr(ts) | "$": -> `Tidxlen | "_": -> `Tgap | "$noret": -> `Tattr `Attrnoret | "break": -> `Tbreak | "castto": -> `Tcast | "const": -> `Tconst | "continue": -> `Tcontinue | "elif": -> `Telif | "else": -> `Telse | "extern": -> `Tattr `Attrextern | "false": -> `Tboollit false | "for": -> `Tfor | "generic": -> `Tgeneric | "goto": -> `Tgoto | "if": -> `Tif | "impl": -> `Timpl | "in": -> `Tin | "match": -> `Tmatch | "pkg": -> `Tpkg | "pkglocal": -> `Tattr `Attrpkglocal | "sizeof": -> `Tsizeof | "struct": -> `Tstruct | "trait": -> `Ttrait | "true": -> `Tboollit true | "type": -> `Ttype | "union": -> `Tunion | "use": -> `Tuse | "var": -> `Tvar | "void": -> `Tvoidlit | "while": -> `Twhile | ident: -> `Tident ident ;; } const oper = {ts var t, chr chr = takec(ts) match chr | '{': t = `Tobrace | '}': t = `Tcbrace | '(': t = `Toparen | ')': t = `Tcparen | '[': t = `Tosqbrac | ']': t = `Tcsqbrac | ',': t = `Tcomma | '`': t = `Ttick | '#': t = `Tderef | '~': t = `Tbnot | ':': if matchc(ts, ':') t = `Twith else t = `Tcolon; ;; | ';': if matchc(ts, ';') t = `Tendblk; else t = `Tendln; ;; | '.': if npeekc(ts, 1) == '.' && npeekc(ts, 2) == '.' takec(ts) takec(ts) t = `Tellipsis; else t = `Tdot; ;; | '+': if matchc(ts, '=') t = `Taddeq; elif matchc(ts, '+') t = `Tinc; else t = `Tplus; ;; | '-': if matchc(ts, '=') t = `Tsubeq; elif matchc(ts, '-') t = `Tdec; elif matchc(ts, '>') t = `Tret; else t = `Tminus; ;; | '*': if matchc(ts, '=') t = `Tmuleq; else t = `Tmul; ;; | '/': if matchc(ts, '=') t = `Tdiveq; else t = `Tdiv; ;; | '%': if matchc(ts, '=') t = `Tmodeq; else t = `Tmod; ;; | '=': if matchc(ts, '=') t = `Teq; else t = `Tasn; ;; | '|': if matchc(ts, '=') t = `Tboreq; elif matchc(ts, '|') t = `Tlor; else t = `Tbor; ;; | '&': if matchc(ts, '=') t = `Tbandeq; elif matchc(ts, '&') t = `Tland; else t = `Tband; ;; | '^': if matchc(ts, '=') t = `Tbxoreq; else t = `Tbxor; ;; | '<': if matchc(ts, '=') t = `Tle; elif matchc(ts, '<') if matchc(ts, '=') t = `Tbsleq; else t = `Tbsl; ;; else t = `Tlt; ;; | '>': if matchc(ts, '=') t = `Tge; elif matchc(ts, '>') if matchc(ts, '=') t = `Tbsreq; else t = `Tbsr; ;; else t = `Tgt; ;; | '!': if matchc(ts, '=') t = `Tne; else t = `Tlnot; ;; | c: t = `Terror; err(ts.loc, "junk character {}", c); ;; -> t } const identstr = {ts var i, str /* ASCII */ if ts.rest.len == 0 || std.isdigit(ts.rest[0] castto(char)) -> "" ;; for i = 0; i < ts.rest.len; i++ if !isident(ts.rest[i] castto(char)) break ;; ;; str = ts.rest[:i] ts.rest = ts.rest[i:] -> std.sldup(str) } const isident = {c -> c & 0x80 == 0 && \ (c >= 'a' && c <= 'z' || \ c >= 'A' && c <= 'Z' || \ c >= '0' && c <= '9' || \ c == '_' || c == '$') } const peekc = {ts -> std.decode(ts.rest) } const npeekc = {ts, n var c, s s = ts.rest for var i = 0; i < n; i++ (c, s) = std.strstep(s) ;; -> std.decode(s) } const takec = {ts var c, s (c, s) = std.strstep(ts.rest) ts.rest = s -> c } const skipto = {ts, chr var c, s s = ts.rest while true (c, s) = std.strstep(s) if s.len == 0 || c == chr break ;; ;; } const matchc = {ts, chr var c, s (c, s) = std.strstep(ts.rest) if c == chr ts.rest = s -> true else -> false ;; }