shithub: mc

ref: 847f11bb87ebfa7e4de48c05d65c83758d9220e1
dir: /lib/regex/redump.myr/

View raw version
use std
use bio
use regex

const main = {args
	var cmd, comp
	var verbose
	var fd

	verbose = false
	cmd = std.optparse(args, &[
		.argdesc = "regex [inputs...]",
		.minargs = 1,
		.opts = [
			[.opt='v', .desc="dump verbose regex output"]
		][:],
	])
	for opt : cmd.opts
		match opt
		| ('v', _):	verbose = true
		| _:	std.fatal("Unknown argument")
		;;
	;;
	if verbose
		comp = regex.dbgcompile(cmd.args[0], true)
	else
		comp = regex.dbgcompile(cmd.args[0], false)
	;;
	match comp
	| `std.Err m:	
		std.fatal("unable to compile regex: {}\n", m)
	| `std.Ok re:
		if cmd.args.len > 1
			runall(re, cmd.args)
		else
			fd = bio.mkfile(0, bio.Rd)
			dump(re, fd)
			bio.close(fd)
		;;
	;;
}

const runall = {re, files

	for f : files
		match bio.open(f, bio.Rd)
		| `std.Ok fd:
			dump(re, fd)
			bio.close(fd)
		| `std.Err m:
			std.fatal("failed to open {}: {}\n", f, m)
		;;
	;;
}

const dump = {re, fd 
	while true
		match bio.readln(fd)
		| `std.Ok ln:
			show(re, ln, regex.exec(re, ln))
			std.slfree(ln)
		| `std.Err `bio.Eof:
			break
		| `std.Err e:
			std.put("error reading from input: {}", e)
			break
		;;
	;;
}

/*
 * Renders a match in a way that's pleasant to read. There are
 * two cases here.
 *
 * 1) The pattern matched. In this case, we want to show the
 *    regions of the pattern that contributed to the match.
 *
 * 2) The pattern did not match. In this case, we want to show
 *    the location of the failed match.
 *
 * In both cases, we render a caret that describes the position
 * of the match. Unfortunately, for the coverage code we don't
 * have a great way of mapping whole subranges, so the caret can
 * be slightly truncated. Fixing this isn't worth hte complexity.
 */
const show = {re, ln, mg
	match mg
	| `std.Some rl:
		std.put("Matched: {}\n", rl[0])
		for var i = 1; i < rl.len; i++
			std.put("\tgroup {}: {}\n", i, rl[i])
		;;
		std.put("coverage:\n")
		std.put("\t{}\n", re.pat)
		showcoverage(re)
	| `std.None:
		std.put("Match failed at {}:\n", re.lastip)
		std.put("\t{}\n", re.pat)
		showpos(re, re.pcidx[re.lastip])
		std.put("\t{}\n", ln)
		showpos(re, re.strp - 1)
	;;
}

/* 
 * Simple position carets for failures: Draws out
 * an arrow of the form:
 *
 *    ~~~~~^
 * 
 * from the start of the line.
 */
const showpos = {re, idx
	std.put("\t")
	for var i = 0; i < idx; i++
		std.put("~")
	;;
	std.put("^\n")
}

/*
 * Coverage carets for success. This tries to output
 * a '^' for every section of the string that matched.
 *
 *   (this|that)
 *    ^^^^
 */
const showcoverage = {re
	var hit
	var idx

	hit = std.slzalloc(re.pat.len)
	for var ip = 0; ip < re.proglen; ip++
		if !std.bshas(re.traces[re.lastthr], ip)
			continue
		;;
		idx = re.pcidx[ip]
		if idx >= 0 && idx < hit.len
			hit[idx] = true
		;;
	;;

	std.put("\t")
	for h : hit
		if h
			std.put("^")
		else
			std.put(" ")
		;;
	;;
	std.put("\n")
}