shithub: asif

Download patch

ref: 2850b592998277aef88f3e6769063b741d850d58
parent: d13e97eec68079ee7efabb2714427276dcd9ea6f
author: qwx <qwx@sciops.net>
date: Tue Aug 11 19:03:50 EDT 2020

add simple (and broken) descriptive statistics in awk

--- /dev/null
+++ b/descstat.awk
@@ -1,0 +1,110 @@
+#!/bin/awk -f
+# assumption: no missing values/indices in input data
+
+function swap(X, a, b,    t)
+{
+	t = X[a]
+	X[a] = X[b]
+	X[b] = t
+}
+
+# from numerical recipes 3rd ed; rearranges X
+function select(X, k,    i, j, n, m, l, ir)
+{
+	l = 1
+	ir = length(X)
+	for(;;){
+		if(ir <= l+1){
+			if(ir == l+1 && X[ir] < X[l])
+				swap(X, l, ir)
+			return int(X[k])
+		}else{
+			m = (l + ir) / 2
+			swap(X, m, l+1)
+			if(X[l] > X[ir])
+				swap(X, l, ir)
+			if(X[l+1] > X[ir])
+				swap(X, l+1, ir)
+			if(X[l] > X[l+1])
+				swap(X, l, l+1)
+			i = l + 1
+			j = ir
+			m = X[l+1]
+			for(;;){
+				do i++; while(X[i] < m)
+				do j--; while(X[j] > m)
+				if(j < i)
+					break
+				swap(X, i, j)
+			}
+			X[l+1] = X[j]
+			X[j] = m
+			if(j >= k)
+				ir = j - 1
+			if(j <= k)
+				l = i
+		}
+	}
+}
+
+function max(X,    n)
+{
+	n = "-inf"
+	for(i in X)
+		if(X[i] > n)
+			n = X[i]
+	return n
+}
+
+function min(X,    n)
+{
+	n = "inf"
+	for(i in X)
+		if(X[i] < n)
+			n = X[i]
+	return n
+}
+
+function sum(X,    i, n)
+{
+	for(i in X)
+		n += X[i]
+	return n
+}
+
+function mean(X)
+{
+	return sum(X) / length(X)
+}
+
+function var(X,    i, n, m)
+{
+	m = mean(X)
+	for(i in X)
+		n += (X[i] - m) ^ 2
+	return n / (length(X) - 1)
+}
+
+function sd(X)
+{
+	return sqrt(var(X))
+}
+
+# FIXME: this is wrong and produces wrong results in subsequent stuff
+# select is busted
+# rearranges X
+function median(X,    n)
+{
+	n = select(X, int(length(X) / 2 + 1))
+	if(length(X) % 2 != 0)
+		return n
+	else
+		return (select(X, int(length(X) / 2)) + n) / 2
+}
+
+function freq(X)
+{
+	delete ans
+	for(i in X)
+		ans[X[i]]++
+}