shithub: mc

Download patch

ref: 75ddf8edac3ea43fee7b7bdafe9e7799fb3571aa
parent: 929f3dcbc45901d30b40f06826443424825c251e
author: Ori Bernstein <ori@markovcorp.com>
date: Thu Dec 28 09:59:28 EST 2017

Performance increase.

	Actually using integers over unions speeds things up a lot.

--- a/lib/regex/compile.myr
+++ b/lib/regex/compile.myr
@@ -73,9 +73,32 @@
 		append(re, `Imatch id, t)
 		idump(re)
 		astfree(t)
+		re.code = convert(re.prog)
 		-> `std.Ok re
 	;;
 	-> `std.Err (`Noimpl)
+}
+
+const convert = {prog
+	var bc
+
+	bc = [][:]
+	for p : prog
+		match p
+		| `Ibyte b:		std.slpush(&bc, OpByte  | ((b : recode) << 4))
+		| `Irange (a, b):	std.slpush(&bc, OpRange | ((a : recode) << 4) | ((b : recode) << 16))
+		| `Ilbra m:		std.slpush(&bc, OpLbra  | ((m : recode) << 4))
+		| `Irbra m:		std.slpush(&bc, OpRbra  | ((m : recode) << 4))
+		| `Ibol:		std.slpush(&bc, OpBol)
+		| `Ieol:		std.slpush(&bc, OpEol)
+		| `Ibow:		std.slpush(&bc, OpBow)
+		| `Ieow:		std.slpush(&bc, OpEow)
+		| `Ijmp d:		std.slpush(&bc, OpJmp   | ((d : recode) << 4))
+		| `Imatch m:		std.slpush(&bc, OpMatch | ((m : recode) << 4))
+		| `Ifork (a, b):	std.slpush(&bc, OpFork  | ((a : recode) << 4) | ((b : recode) << 34))
+		;;
+	;;
+	-> bc
 }
 
 const free = {re
--- a/lib/regex/interp.myr
+++ b/lib/regex/interp.myr
@@ -252,10 +252,6 @@
 			thr = re.runq
 			re.runq = thr.next
 
-			if re.trace
-				trace(re, thr, "\nrunning tid={}, ip={}, s[{}]={}\n", \
-					thr.tid, thr.ip, re.strp, std.decode(re.str[re.strp:]))
-			;;
 			ip = thr.ip
 			consumed = step(re, thr, -1)
 			while !consumed
@@ -263,13 +259,12 @@
 			;;
 
 			if std.bshas(states, thr.ip)
-				die(re, thr, "there can be only one")
+				die(re, thr)
 			;;
 
 			if thr.dead
 				thrfree(re, thr)
 			elif thr.matched
-				trace(re, thr, "new bestmatch\n")
 				if bestmatch != Zthr
 					thrfree(re, bestmatch)
 				;;
@@ -311,117 +306,112 @@
  consumed, false otherwise.
 */
 const step = {re, thr, curip
-	var str
-	var nthr
+	var str, nthr, inst
 
 	str = re.str
-	match re.prog[thr.ip]
+	inst = re.code[thr.ip]
+	if re.trace
+		itrace(re, thr, re.prog[thr.ip])
+	;;
+	if re.debug
+		std.bsput(re.traces[thr.tid], thr.ip)
+	;;
+	match inst & 0xf
 	/* Char matching. Consume exactly one byte from the string. */
-	| `Ibyte b:
-		trace(re, thr, "\t{}:\tByte {} ({})\n", thr.ip, b, (b : char))
-		if !within(re, str)
-			die(re, thr, "end of string")
-		elif b != str[re.strp]
-			die(re, thr, "not right char")
+	| OpRange:
+		var lo = (inst >>  4 : byte)
+		var hi = (inst >> 16 : byte)
+		
+		if !within(re, str) || lo > str[re.strp] || hi < str[re.strp]
+			die(re, thr)
 		else
-			hit(re, thr)
 			thr.ip++
-			trace(re, thr, "\t\tmatched {} with {}\n", b, str[re.strp])
 		;;
-	| `Irange (start, end):
-		trace(re, thr, "\t{}:\tRange ({}, {}) /* {} - {} */\n", thr.ip, start, end, (start : char), (end : char))
-		if !within(re, str) || start > str[re.strp] || end < str[re.strp]
-			die(re, thr, "bad range")
+	| OpByte:
+		var b = (inst >> 4 : byte)
+		if !within(re, str)
+			die(re, thr)
+		elif b != str[re.strp]
+			die(re, thr)
 		else
-			hit(re, thr)
 			thr.ip++
 		;;
+	| OpFork:
+		var lip = ((inst >>  4) & 0x3fffffff : std.size)
+		var rip = ((inst >> 34) & 0x3fffffff : std.size)
+		if rip != curip
+			nthr = mkthread(re, rip)
+			nthr.next = re.runq
+			nthr.mgroup = thr.mgroup
+			re.runq = nthr
+		;;
+		if re.debug
+			std.slpush(&re.traces, std.bsdup(re.traces[thr.tid]))
+		;;
+		thr.ip = lip
+		-> false
 	/*
 	  Non-consuming. All of these return false, and expect step to be
 	  called again until exactly one byte is consumed from the string.
 	 */
-	| `Ibol:
-		trace(re, thr, "\t{}:\tBol\n", thr.ip)
+	| OpJmp:
+		var ip = (inst >> 4 : std.size)
+		thr.ip = ip
+		-> false
+	| OpMatch:
+		var id = (inst >> 4 : std.size)
+		re.lastthr = thr.tid
+		finish(re, thr)
+		-> true
+	| OpLbra:
+		var m = (inst >> 4 : std.size)
+		thr.mgroup[m][0] = re.strp
+		thr.ip++
+		-> false
+	| OpRbra:
+		var m = (inst >> 4 : std.size)
+		thr.mgroup[m][1] = re.strp
+		thr.ip++
+		-> false
+	| OpBol:
 		if re.strp == 0 || str[re.strp - 1] == ('\n' : byte)
-			hit(re, thr)
 			thr.ip++
 			-> false
 		else
-			die(re, thr, "not beginning of line")
+			die(re, thr)
 		;;
-	| `Ieol:
-		trace(re, thr, "\t{}:\tEol\n", thr.ip)
+	| OpEol:
 		if re.strp == str.len || str[re.strp] == ('\n' : byte)
-			hit(re, thr)
 			thr.ip++
 			-> false
 		else
-			die(re, thr, "not end of line")
+			die(re, thr)
 		;;
 	/* check for word characters */
-	| `Ibow:
-		trace(re, thr, "\t{}:\tBow\n", thr.ip)
+	| OpBow:
 		if iswordchar(str[re.strp:]) && (re.strp == 0 || !iswordchar(prevchar(str, re.strp)))
-			hit(re, thr)
 			thr.ip++
 			-> false
 		else
-			die(re, thr, "not beginning of word")
+			die(re, thr)
 		;;
-	| `Ieow:
-		trace(re, thr, "\t{}:\tEow\n", thr.ip)
+	| OpEow:
 		if re.strp == str.len && iswordchar(prevchar(str, re.strp))
-			hit(re, thr)
 			thr.ip++
 			-> false
 		elif re.strp > 0 && !iswordchar(str[re.strp:]) && iswordchar(prevchar(str, re.strp))
-			hit(re, thr)
 			thr.ip++
 			-> false
 		else
-			die(re, thr, "not end of word")
+			die(re, thr)
 		;;
-	| `Ilbra m:
-		trace(re, thr, "\t{}:\tLbra {}\n", thr.ip, m)
-		thr.mgroup[m][0] = re.strp
-		hit(re, thr)
-		thr.ip++
-		-> false
-	| `Irbra m:
-		trace(re, thr, "\t{}:\tRbra {}\n", thr.ip, m)
-		thr.mgroup[m][1] = re.strp
-		hit(re, thr)
-		thr.ip++
-		-> false
-	| `Ifork (lip, rip):
-		trace(re, thr, "\t{}:\tFork ({}, {})\n", thr.ip, lip, rip)
-		hit(re, thr)
-		if rip != curip
-			nthr = mkthread(re, rip)
-			nthr.next = re.runq
-			nthr.mgroup = thr.mgroup
-			re.runq = nthr
-		;;
-		if re.debug
-			std.slpush(&re.traces, std.bsdup(re.traces[thr.tid]))
-		;;
-		thr.ip = lip
-		-> false
-	| `Ijmp ip:
-		trace(re, thr, "\t{}:\tJmp {}\n", thr.ip, ip)
-		hit(re, thr)
-		thr.ip = ip
-		-> false
-	| `Imatch id:
-		trace(re, thr, "\t{}:\tMatch\n", thr.ip)
-		re.lastthr = thr.tid
-		finish(re, thr)
-		-> true
+	| _:
+		std.die("bad match")
 	;;
 	-> true
 }
 
-const die = {re, thr, msg
+const die = {re, thr
         /*
  	  we can have die called on a thread
 	  multiple times, eg, if it has a bad
@@ -429,7 +419,6 @@
 	  thread is in. We should only decrement
 	  the number of threads for that once.
 	 */
-	trace(re, thr, "\t\tdie {}: {}\n", thr.tid, msg)
         if !thr.dead
 		re.nthr--
 	;;
@@ -439,7 +428,6 @@
 }
 
 const finish = {re, thr
-	trace(re, thr, "finish {}\n", thr.tid)
 	thr.matched = true
 	re.nthr--
 }
@@ -478,6 +466,25 @@
 	-> re.strp < str.len
 }
 
+const itrace = {re, thr, inst
+	match inst
+	| `Ibyte b:	trace(re, thr, "\t{}: Byte ({})\n", thr.ip, b)
+	| `Irange (lo, hi):	trace(re, thr, "\t{}: Range {}, {}\n", thr.ip, lo, hi)
+	| `Ilbra m:	trace(re, thr, "\t{}: Lbra {}\n", thr.ip, m)
+	| `Irbra m:	trace(re, thr, "\t{}: Rbra {}\n", thr.ip, m)
+	/* anchors */
+	| `Ibol:	trace(re, thr, "\t{}: Bol\n", thr.ip)
+	| `Ieol:	trace(re, thr, "\t{}: Eol\n", thr.ip)
+	| `Ibow:	trace(re, thr, "\t{}: Bow\n", thr.ip)
+	| `Ieow:	trace(re, thr, "\t{}: Eow\n", thr.ip)
+
+	/* control flow */
+	| `Ifork (l, r):	trace(re, thr, "\t{}: Fork {}, {}\n", thr.ip, l, r)
+	| `Ijmp	ip:	trace(re, thr, "\t{}: Jmp {}\n", thr.ip, ip)
+	| `Imatch m:	trace(re, thr, "\t{}: Match {}\n", thr.ip, m)
+	;;
+}
+
 const trace : (re : regex#, thr : rethread#, msg : byte[:], args : ... -> void) = {re, thr, msg, args
 	var ap
 
@@ -491,12 +498,6 @@
 		;;
 		std.put("^\n")
 	;;
-}
-
-const hit = {re, thr
-        if re.debug
-                std.bsput(re.traces[thr.tid], thr.ip)
-        ;;
 }
 
 /* must be called with i >= 1 */
--- a/lib/regex/types.myr
+++ b/lib/regex/types.myr
@@ -27,6 +27,7 @@
 		nfree	: std.size
 		proglen	: std.size
 		prog	: reinst[:]
+		code	: recode[:]
 		nthr	: std.size
 		str	: byte[:]
 		strp	: std.size
@@ -70,9 +71,24 @@
 		dead	: bool		/* thread died */
 		matched	: bool		/* thread matched */
 
-		mgroup	: std.size[2][32]	/* match starts */
+		mgroup	: std.size[2][16]	/* match starts */
 	;;
 
+
+	pkglocal type recode = uint64
+
+	/* can have at most up to 0xf ops */
+	const OpByte	: recode = 0x0
+	const OpRange	: recode = 0x1
+	const OpLbra	: recode = 0x2
+	const OpRbra	: recode = 0x3
+	const OpBol	: recode = 0x4
+	const OpEol	: recode = 0x5
+	const OpBow	: recode = 0x6
+	const OpEow	: recode = 0x7
+	const OpFork	: recode = 0x8
+	const OpJmp	: recode = 0x9
+	const OpMatch	: recode = 0xa
 
 	pkglocal type reinst = union
 		/* direct consumers */