shithub: mc

ref: 4008733e9bc1a9285b70d055f6aeaa4b0a39483b
dir: /lib/regex/types.myr/

View raw version
use std

pkg regex =
	type status = union
		`Noimpl
		`Incomplete
		`Unbalanced char
		`Emptyparen
		`Badrep char
		`Badrange byte[:]
		`Badescape char
	;;

	type regex = struct
		/* compile state */
		debug	: bool
		trace	: bool
		pat	: byte[:]
		idx	: std.size
		nmatch	: std.size

		/* VM state */
		runq	: rethread#
		expired	: rethread#
		expiredtail	: rethread#
		proglen	: std.size
		prog	: reinst[:]
		nthr	: std.size
		str	: byte[:]
		strp	: std.size
		nexttid : std.size

		/* debug state */
		astloc	: std.htab(ast#, std.size)#
		traces	: std.bitset#[:]
		pcidx	: std.size[:]
		lastip	: std.size
		lastthr	: std.size
	;;

	type ast = union
		/* basic string building */
		`Alt	(ast#, ast#)
		`Cat	(ast#, ast#)

		/* repetition */
		`Star	(ast#, bool)
		`Plus	(ast#, bool)
		`Quest	ast#	

		/* end matches */
		`Chr	char
		`Ranges	char[2][:]

		/* meta */
		`Cap	(std.size, ast#) /* id, ast */
		`Bol	/* beginning of line */
		`Eol	/* end of line */
		`Bow	/* beginning of word */
		`Eow	/* end of word */
	;;

	pkglocal type rethread = struct
		next	: rethread#	/* run queue link */

		tid	: std.size	/* just for debugging */
		ip	: std.size	/* the instruction pointer */
		dead	: bool		/* thread died */
		matched	: bool		/* thread matched */

		mgroup	: std.size[2][:]	/* match starts */
	;;

	pkglocal type reinst = union
		/* direct consumers */
		`Ibyte	byte
		`Irange	(byte, byte)

		/* groups */
		`Ilbra	std.size
		`Irbra	std.size

		/* anchors */
		`Ibol
		`Ieol
		`Ibow
		`Ieow

		/* control flow */
		`Ifork	(std.size, std.size)
		`Ijmp	std.size
		`Imatch	std.size
	;;
;;