ref: fe1eb39db7ae6904924f3ab1f6f9b34416f2eb1b
dir: /sys/src/cmd/dict/canonind.awk/
# turn output of mkindex into form needed by dict BEGIN { if(ARGC != 2) { print "usage: awk -F' ' -f canonind.awk rawindex > index" exit usage } file = ARGV[1] ARGV[1] = "" while ((getline < file) > 0) { for(i = 2; i <= NF; i++) { w = $i if(length(w) == 0) continue b = index(w, "(") e = index(w, ")") if(b && e && b < e) { w1 = substr(w, 1, b-1) w2 = substr(w, b+1, e-b-1) w3 = substr(w, e+1) printf "%s%s\t%d\n", w1, w3, $1 > "junk" printf "%s%s%s\t%d\n", w1, w2, w3, $1 > "junk" } else printf "%s\t%d\n", w, $1 > "junk" } } system("sort -u -t' ' +0f -1 +0 -1 +1n -2 < junk") system("rm junk") exit 0 }