ref: 3dde0d66b484358b413dde842959e8a832a6a991
parent: e25ab2a93db2cff8ee634e0f82244a66137279ba
author: Ori Bernstein <ori@eigenstate.org>
date: Wed Apr 29 17:23:39 EDT 2015
Add regex parsing to libregex. I'd like to use the same regex parser for source indexing and parser generation.
--- a/libregex/compile.myr
+++ b/libregex/compile.myr
@@ -4,38 +4,14 @@
use "ranges.use"
pkg regex =
+ const parse : (re : byte[:] -> std.result(ast#, status))
const compile : (re : byte[:] -> std.result(regex#, status))
const dbgcompile : (re : byte[:] -> std.result(regex#, status))
const free : (re : regex# -> void)
;;
-type tree = union
- /* basic string building */
- `Alt (tree#, tree#)
- `Cat (tree#, tree#)
-
- /* repetition */
- `Star tree#
- `Rstar tree#
- `Plus tree#
- `Rplus tree#
- `Quest tree#
-
- /* end matches */
- `Byte byte
- `Chr char
- `Ranges char[2][:]
-
- /* meta */
- `Cap (std.size, tree#) /* id, tree */
- `Bol /* beginning of line */
- `Eol /* end of line */
- `Bow /* beginning of word */
- `Eow /* end of word */
-;;
-
type parseresult = union
- `Some tree#
+ `Some ast#
`None
`Fail status
;;
@@ -42,23 +18,36 @@
/* Compiles a pattern into a regex */
const compile = {pat
- -> regexcompile(std.zalloc(), pat)
+ -> regexcompile(std.mk([.pat = pat, .nmatch = 1]))
}
+const parse = {pat
+ var re
+
+ re = std.mk([.pat = pat, .nmatch = 1])
+ match regexparse(re)
+ | `None: -> `std.Fail `Incomplete
+ | `Fail f: -> `std.Fail f
+ | `Some t:
+ if re.pat.len > 0
+ -> `std.Fail `Incomplete
+ else
+ -> `std.Ok t
+ ;;
+ ;;
+}
+
/* Compiles a pattern into a debug regex. This can be verbose. */
const dbgcompile = {pat
var re
- re = std.zalloc()
- re.debug = true
- -> regexcompile(re, pat)
+ re = std.mk([.pat = pat, .nmatch = 1, .debug = true])
+ -> regexcompile(re)
}
/* compiles a pattern into an allocated regex */
-const regexcompile = {re, pat
- re.pat = pat
- re.nmatch = 1 /* whole match */
- match parse(re)
+const regexcompile = {re
+ match regexparse(re)
| `None: -> `std.Fail (`Incomplete)
| `Fail f: -> `std.Fail f
| `Some t:
@@ -67,6 +56,7 @@
an incorrectly encoded char
*/
if re.pat.len > 0
+ astfree(t)
-> `std.Fail (`Incomplete)
;;
dump(re, t, 0)
@@ -473,7 +463,7 @@
}
/* parses an expression */
-const parse = {re
+const regexparse = {re
match altexpr(re)
| `Some t:
if re.pat.len == 0
@@ -490,7 +480,7 @@
}
const altexpr = {re
- var ret : tree#
+ var ret
match catexpr(re)
| `Some t:
--- a/libregex/types.myr
+++ b/libregex/types.myr
@@ -11,6 +11,32 @@
`Badescape
;;
+ type ast = union
+ /* basic string building */
+ `Alt (ast#, ast#)
+ `Cat (ast#, ast#)
+
+ /* repetition */
+ `Star ast#
+ `Rstar ast#
+ `Plus ast#
+ `Rplus ast#
+ `Quest ast#
+
+ /* end matches */
+ `Byte byte
+ `Chr char
+ `Ranges char[2][:]
+
+ /* meta */
+ `Cap (std.size, ast#) /* id, ast */
+ `Bol /* beginning of line */
+ `Eol /* end of line */
+ `Bow /* beginning of word */
+ `Eow /* end of word */
+ ;;
+
+
type regex = struct
/* compile state */
debug : bool
--- a/parse/gram.y
+++ b/parse/gram.y
@@ -467,10 +467,7 @@
compoundtype
: functype {$$ = $1;}
| type Tosqbrac Tcolon Tcsqbrac {$$ = mktyslice($2->loc, $1);}
- | type Tosqbrac expr Tcsqbrac {
- $3->expr.type = mktype($3->loc, Tyuint32);
- $$ = mktyarray($2->loc, $1, $3);
- }
+ | type Tosqbrac expr Tcsqbrac {$$ = mktyarray($2->loc, $1, $3);}
| type Tosqbrac Tellipsis Tcsqbrac {$$ = mktyarray($2->loc, $1, NULL);}
| type Tderef {$$ = mktyptr($2->loc, $1);}
| Tat Tident {$$ = mktyparam($1->loc, $2->id);}
--- a/parse/infer.c
+++ b/parse/infer.c
@@ -1712,8 +1712,10 @@
st->intype--;
} else if (t->type == Tyunion) {
for (i = 0; i < t->nmemb; i++) {
- if (t->udecls[i]->etype)
+ if (t->udecls[i]->etype) {
+ tyresolve(st, t->udecls[i]->etype);
t->udecls[i]->etype = tyfix(st, ctx, t->udecls[i]->etype, noerr);
+ }
}
} else if (t->type == Tyname) {
for (i = 0; i < t->narg; i++)