shithub: mc

ref: 90d9a10c0234f5868c2e86882aae72fc931d53fd
dir: /lib/math/sum-impl.myr/

View raw version
use std

/* For references, see [Mul+10] section 6.3 */
pkg math =
	pkglocal const kahan_sum32 : (l : flt32[:] -> flt32)
	pkglocal const kahan_sum64 : (l : flt64[:] -> flt64)

	pkglocal const priest_sum32 : (l : flt32[:] -> flt32)
	pkglocal const priest_sum64 : (l : flt64[:] -> flt64)

	/* Backend for priest_sum; currently not useful enough to expose */
	pkglocal generic double_compensated_sum : (l : @f[:] -> (@f, @f)) :: numeric,floating @f
;;

type doomed_flt32_arr = flt32[:]
type doomed_flt64_arr = flt64[:]

impl disposable doomed_flt32_arr =
	__dispose__ = {a : doomed_flt32_arr; std.slfree((a : flt32[:])) }
;;

impl disposable doomed_flt64_arr =
	__dispose__ = {a : doomed_flt64_arr; std.slfree((a : flt64[:])) }
;;

/*
   Kahan's compensated summation. Fast and reasonably accurate,
   although cancellation can cause relative error blowup. For
   something slower, but more accurate, use something like Priest's
   doubly compensated sums.
 */
pkglocal const kahan_sum32 = {l; -> kahan_sum_gen(l)}
pkglocal const kahan_sum64 = {l; -> kahan_sum_gen(l)}

generic kahan_sum_gen = {l : @f[:] :: numeric,floating @f
	if l.len == 0
		-> (0.0 : @f)
	;;

	var s = (0.0 : @f)
	var c = (0.0 : @f)
	var y = (0.0 : @f)
	var t = (0.0 : @f)

	for x : l
		y = x - c
		t = s + y
		c = (t - s) - y
		s = t
	;;

	-> s
}

/*
   Priest's doubly compensated summation. Extremely accurate, but
   relatively slow. For situations in which cancellation is not
   expected, something like Kahan's compensated summation may be
   more useful.
 */
pkglocal const priest_sum32 = {l : flt32[:]
	var l2 = std.sldup(l)
	std.sort(l2, mag_cmp32)
	auto (l2 : doomed_flt32_arr)
	var s, c
	(s, c) = double_compensated_sum(l2)
	-> s
}

const mag_cmp32 = {f : flt32, g : flt32
	var u = std.flt32bits(f) & ~(1 << 31)
	var v = std.flt32bits(g) & ~(1 << 31)
	-> std.numcmp(v, u)
}

pkglocal const priest_sum64 = {l : flt64[:]
	var l2 = std.sldup(l)
	std.sort(l, mag_cmp64)
	auto (l2 : doomed_flt64_arr)
	var s, c
	(s, c) = double_compensated_sum(l2)
	-> s
}

const mag_cmp64 = {f : flt64, g : flt64
	var u = std.flt64bits(f) & ~(1 << 63)
	var v = std.flt64bits(g) & ~(1 << 63)
	-> std.numcmp(v, u)
}

generic double_compensated_sum = {l : @f[:] :: numeric,floating @f
	/* l should be sorted in descending order */
	if l.len == 0
		-> ((0.0 : @f), (0.0 : @f))
	;;

	var s = (0.0 : @f)
	var c = (0.0 : @f)

	for x : l
		var y = c + x
		var u = x - (y - c)
		var t = (y + s)
		var v = (y - (t - s))
		var z = u + v
		s = t + z
		c = z - (s - t)
	;;

	-> (s, c)
}