shithub: 9intro

ref: 2b99422480d596ebc26921c87c6bb81a07949f3e
dir: /idx/gen.key/

View raw version
#!/bin/rc
awk ' # gen.key
#   Input: Each input line has one of the following two forms:
#	string                   (tab) numlist
#	string " %key " sort.key (tab) numlist
#   Output: Each output line has the form:
#	sort.key (tab) string (tab) numlist

BEGIN {	FS = OFS = "\t" }

/ %key / { # use sort.key if it is provided
	   i = index($1, " %key ")
	   print substr($1, i+6), substr($1, 1, i-1), $2
	   next
	 }

	{ # generate sort.key (in $2, by modifying string) if it is not provided
	$3 = $2
	$2 = $1

	#Modify sort.key
	# Remove some troff commands
	gsub(/\\f\(..|\\f.|\\s[+-][0-9]|\\s[0-9][0-9]?/, "", $2)

	# underscore -> 0, so "foo_gorp" sorts before "food"
	gsub(/_/, "0", $2)

	# quote character is %, space character is ~
	quoted = 0
	if ($2 ~ /%/) {  # hide quoted literals in Q
		quoted = 1
		gsub(/%%/,  "QQ0QQ", $2)
		gsub(/%\[/, "QQ1QQ", $2)
		gsub(/%\]/, "QQ2QQ", $2)
		gsub(/%\{/, "QQ3QQ", $2)
		gsub(/%\}/, "QQ4QQ", $2)
		gsub(/%~/,  "QQ5QQ", $2)
	}
	gsub(/%e/, "\\", $2)		# implement troff escape
	gsub(/~/, " ", $2)		# remove tildes
	gsub(/[%\[\]\{\}]/, "", $2)	# remove % and font-changing []{}
	if (quoted) {  # restore literals but without escape charcter
		gsub(/QQ0QQ/, "%", $2)
		gsub(/QQ1QQ/, "[", $2)
		gsub(/QQ2QQ/, "]", $2)
		gsub(/QQ3QQ/, "{", $2)
		gsub(/QQ4QQ/, "}", $2)
		gsub(/QQ5QQ/, "~", $2)
	}
	if ($2 ~ /^[^a-zA-Z]+$/)	# purely nonalphabetic lines go first
		$2 = "  " $2
	else if ($2 ~ /^[0-9]/)		# lines with eading digits come next
		$2 = " " $2
					# otherwise whatever final.sort does
}

	{ print $2, $1, $3 } 
' $*