shithub: mc

Download patch

ref: 96907bd2e4deb4adbdcb668060a9376b929e9805
parent: 8092374b4e87b78053a68b2e5cc7159941a3f46f
author: S. Gilles <sgilles@math.umd.edu>
date: Tue Apr 17 19:13:34 EDT 2018

Implement exp.

--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -1,6 +1,9 @@
 lib math =
 	fpmath.myr
 
+	# exp
+	exp-impl.myr
+
 	# rounding (to actual integers)
 	round-impl+posixy-x64-sse4.s
 	round-impl.myr
--- /dev/null
+++ b/lib/math/exp-impl.myr
@@ -1,0 +1,236 @@
+use std
+
+use "fpmath"
+
+/* See [Mul16] (6.2.1), and in particular [Tan89] (which this notation follows) */
+pkg math =
+	pkglocal const exp32 : (f : flt32 -> flt32)
+	pkglocal const exp64 : (f : flt64 -> flt64)
+;;
+
+extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)
+extern const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)
+extern const horner_polyu32 : (f : flt32, a : uint32[:] -> flt32)
+extern const horner_polyu64 : (f : flt64, a : uint64[:] -> flt64)
+
+type fltdesc(@f, @u, @i) = struct
+	explode : (f : @f -> (bool, @i, @u))
+	assem : (n : bool, e : @i, s : @u -> @f)
+	fma : (x : @f, y : @f, z : @f -> @f)
+	horner : (f : @f, a : @u[:] -> @f)
+	sgnmask : @u
+	tobits : (f : @f -> @u)
+	frombits : (u : @u -> @f)
+	inf : @u
+	thresh_1_min : @u
+	thresh_1_max : @u
+	thresh_2 : @u
+	inv_L : @u
+	L1 : @u
+	L2 : @u
+	nabs : @u
+	Ai : @u[:]
+	S : (@u, @u)[32]
+;;
+
+/* Coefficients for p(t), locally approximating exp1m */
+const Ai32 : uint32[:] = [
+		0x00000000,
+		0x00000000,
+		0x3f000044,
+		0x3e2aaaec,
+	][:]
+
+const Ai64 : uint64[:] = [
+		0x0000000000000000,
+		0x0000000000000000,
+		0x3fe0000000000000,
+		0x3fc5555555548f7c,
+		0x3fa5555555545d4e,
+		0x3f811115b7aa905e,
+		0x3f56c1728d739765,
+	][:]
+
+/* Double-precise expansions of 2^(J/32) */
+const S32 : (uint32, uint32)[32] = [
+		(0x3f800000, 0x00000000),
+		(0x3f82cd80, 0x35531585),
+		(0x3f85aac0, 0x34d9f312),
+		(0x3f889800, 0x35e8092e),
+		(0x3f8b95c0, 0x3471f546),
+		(0x3f8ea400, 0x36e62d17),
+		(0x3f91c3c0, 0x361b9d59),
+		(0x3f94f4c0, 0x36bea3fc),
+		(0x3f9837c0, 0x36c14637),
+		(0x3f9b8d00, 0x36e6e755),
+		(0x3f9ef500, 0x36c98247),
+		(0x3fa27040, 0x34c0c312),
+		(0x3fa5fec0, 0x36354d8b),
+		(0x3fa9a140, 0x3655a754),
+		(0x3fad5800, 0x36fba90b),
+		(0x3fb123c0, 0x36d6074b),
+		(0x3fb504c0, 0x36cccfe7),
+		(0x3fb8fb80, 0x36bd1d8c),
+		(0x3fbd0880, 0x368e7d60),
+		(0x3fc12c40, 0x35cca667),
+		(0x3fc56700, 0x36a84554),
+		(0x3fc9b980, 0x36f619b9),
+		(0x3fce2480, 0x35c151f8),
+		(0x3fd2a800, 0x366c8f89),
+		(0x3fd744c0, 0x36f32b5a),
+		(0x3fdbfb80, 0x36de5f6c),
+		(0x3fe0ccc0, 0x36776155),
+		(0x3fe5b900, 0x355cef90),
+		(0x3feac0c0, 0x355cfba5),
+		(0x3fefe480, 0x36e66f73),
+		(0x3ff52540, 0x36f45492),
+		(0x3ffa8380, 0x36cb6dc9),
+	]
+
+const S64 : (uint64, uint64)[32] = [
+		(0x3ff0000000000000, 0x0000000000000000),
+		(0x3ff059b0d3158540, 0x3d0a1d73e2a475b4),
+		(0x3ff0b5586cf98900, 0x3ceec5317256e308),
+		(0x3ff11301d0125b40, 0x3cf0a4ebbf1aed93),
+		(0x3ff172b83c7d5140, 0x3d0d6e6fbe462876),
+		(0x3ff1d4873168b980, 0x3d053c02dc0144c8),
+		(0x3ff2387a6e756200, 0x3d0c3360fd6d8e0b),
+		(0x3ff29e9df51fdec0, 0x3d009612e8afad12),
+		(0x3ff306fe0a31b700, 0x3cf52de8d5a46306),
+		(0x3ff371a7373aa9c0, 0x3ce54e28aa05e8a9),
+		(0x3ff3dea64c123400, 0x3d011ada0911f09f),
+		(0x3ff44e0860618900, 0x3d068189b7a04ef8),
+		(0x3ff4bfdad5362a00, 0x3d038ea1cbd7f621),
+		(0x3ff5342b569d4f80, 0x3cbdf0a83c49d86a),
+		(0x3ff5ab07dd485400, 0x3d04ac64980a8c8f),
+		(0x3ff6247eb03a5580, 0x3cd2c7c3e81bf4b7),
+		(0x3ff6a09e667f3bc0, 0x3ce921165f626cdd),
+		(0x3ff71f75e8ec5f40, 0x3d09ee91b8797785),
+		(0x3ff7a11473eb0180, 0x3cdb5f54408fdb37),
+		(0x3ff82589994cce00, 0x3cf28acf88afab35),
+		(0x3ff8ace5422aa0c0, 0x3cfb5ba7c55a192d),
+		(0x3ff93737b0cdc5c0, 0x3d027a280e1f92a0),
+		(0x3ff9c49182a3f080, 0x3cf01c7c46b071f3),
+		(0x3ffa5503b23e2540, 0x3cfc8b424491caf8),
+		(0x3ffae89f995ad380, 0x3d06af439a68bb99),
+		(0x3ffb7f76f2fb5e40, 0x3cdbaa9ec206ad4f),
+		(0x3ffc199bdd855280, 0x3cfc2220cb12a092),
+		(0x3ffcb720dcef9040, 0x3d048a81e5e8f4a5),
+		(0x3ffd5818dcfba480, 0x3cdc976816bad9b8),
+		(0x3ffdfc97337b9b40, 0x3cfeb968cac39ed3),
+		(0x3ffea4afa2a490c0, 0x3cf9858f73a18f5e),
+		(0x3fff50765b6e4540, 0x3c99d3e12dd8a18b),
+	]
+
+const exp32 = {f : flt32
+	const d : fltdesc(flt32, uint32, int32) = [
+		.explode = std.flt32explode,
+		.assem = std.flt32assem,
+		.fma = fma32,
+		.horner = horner_polyu32,
+		.sgnmask = (1 << 31),
+		.tobits = std.flt32bits,
+		.frombits = std.flt32frombits,
+		.inf = 0x7f800000,
+		.thresh_1_min = 0xc2cff1b4, /* ln(2^(-127 - 23)) ~= -103.9720770839917 */
+		.thresh_1_max = 0x42b17218, /* ln([2 - 2^(-24)] * 2^127) ~= 88.72283905206 */
+		.inv_L = 0x4238aa3b, /* L = log(2) / 32, this is 1/L in working precision */
+		.L1 = 0x3cb17200, /* L1 and L2 sum to give L in high precision, and L1 */
+		.L2 = 0x333fbe8e, /* has some trailing zeroes. */
+		.nabs = 9, /* L1 has 9 trailing zeroes in binary */
+		.Ai = Ai32,
+		.S = S32,
+	]
+
+	-> expgen(f, d)
+}
+
+const exp64 = {f : flt64
+	const d : fltdesc(flt64, uint64, int64) = [
+		.explode = std.flt64explode,
+		.assem = std.flt64assem,
+		.fma = fma64,
+		.horner = horner_polyu64,
+		.sgnmask = (1 << 63),
+		.tobits = std.flt64bits,
+		.frombits = std.flt64frombits,
+		.inf = 0x7ff0000000000000,
+		.thresh_1_min = 0xc0874910d52d3052, /* ln(2^(-1023 - 52)) ~= -745.1332191019 */
+		.thresh_1_max = 0x40862e42fefa39ef, /* ln([2 - 2^(-53)] * 2^1023) ~= 709.78271289 */
+		.inv_L = 0x40471547652b82fe, /* as in exp32 */
+		.L1 = 0x3f962e42fef00000,
+		.L2 = 0x3d8473de6af278ed,
+		.nabs = 20,
+		.Ai = Ai64,
+		.S = S64,
+	]
+
+	-> expgen(f, d)
+}
+
+generic expgen = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i, roundable @f -> @i
+	var b = d.tobits(f)
+	var n, e, s
+	(n, e, s) = d.explode(f)
+
+	/*
+	   Detect if exp(f) would round to outside representability.
+	   We don't do bias adjustment, so Tang's thresholds can
+	   be tightened.
+	 */
+	if !n && b >= d.thresh_1_max
+		-> d.frombits(d.inf)
+	elif n && b >= d.thresh_1_min
+		-> (0.0 : @f)
+	;;
+
+	/* Detect if exp(f) is close to 1. */
+	if (b & ~d.sgnmask) <= d.thresh_2
+		-> (1.0 : @f)
+	;;
+
+	/* Argument reduction to [ -ln(2)/64, ln(2)/64 ] */
+	var inv_L = d.frombits(d.inv_L)
+	var finv_L = f * inv_L
+
+	var N = rn(finv_L)
+	var N2  = N % (32 : @i)
+	if N2 < 0
+		N2 += (32 : @i)
+	;;
+	var N1 = N - N2
+
+	var R1 : @f, R2 : @f
+	var Nf = (N : @f)
+
+	/*
+	   There are few enough significant bits that these are all
+	   exact, without need for an FMA. R1 + R2 will approximate
+	   (very well) f reduced into [ -ln(2)/64, ln(2)/64 ]
+	 */
+	if std.abs(N) >= (1 << d.nabs)
+		R1 = (f - (N1 : @f) * d.frombits(d.L1)) - ((N2 : @f) * d.frombits(d.L1))
+	else
+		R1 = f - (N : @f) * d.frombits(d.L1)
+	;;
+	R2 = -1.0 * (N : @f) * d.frombits(d.L2)
+
+	var M = (N1 : @i) / 32
+	var J = (N2 : std.size)
+
+	/* Compute a polynomial approximation of exp1m */
+	var R = R1 + R2
+	var Q = d.horner(R, d.Ai)
+	var P = R1 + (R2 + Q)
+
+	/* Expand out from reduced range */
+	var Su_hi, Su_lo
+	(Su_hi, Su_lo) = d.S[J]
+	var S_hi = d.frombits(Su_hi)
+	var S_lo = d.frombits(Su_lo)
+
+	var S = S_hi + S_lo
+	var exp = d.assem(false, M, 0) * (S_hi + (S_lo + (P * S)))
+
+	-> exp
+}
--- a/lib/math/fpmath.myr
+++ b/lib/math/fpmath.myr
@@ -3,11 +3,15 @@
 pkg math =
 	trait fpmath @f =
 
+		/* exp-impl */
+		exp : (f : @f -> @f)
+
 		/* fma-impl */
 		fma : (x : @f, y : @f, z : @f -> @f)
 
 		/* poly-impl */
 		horner_poly : (x : @f, a : @f[:] -> @f)
+		horner_polyu : (x : @f, a : @u[:] -> @f)
 
 		/* sqrt-impl */
 		sqrt : (f : @f -> @f)
@@ -62,7 +66,10 @@
 impl fpmath flt32 =
 	fma = {x, y, z; -> fma32(x, y, z)}
 
+	exp = {f; -> exp32(f)}
+
 	horner_poly = {f, a; -> horner_poly32(f, a)}
+	horner_polyu = {f, a; -> horner_polyu32(f, a)}
 
 	sqrt = {f; -> sqrt32(f)}
 
@@ -78,7 +85,10 @@
 impl fpmath flt64 =
 	fma = {x, y, z; -> fma64(x, y, z)}
 
+	exp = {f; -> exp64(f)}
+
 	horner_poly = {f, a; -> horner_poly64(f, a)}
+	horner_polyu = {f, a; -> horner_polyu64(f, a)}
 
 	sqrt = {f; -> sqrt64(f)}
 
@@ -93,11 +103,17 @@
 extern const rn32 : (f : flt32 -> int32)
 extern const rn64 : (f : flt64 -> int64)
 
+extern const exp32 : (x : flt32 -> flt32)
+extern const exp64 : (x : flt64 -> flt64)
+
 extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)
 extern const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)
 
 extern const horner_poly32 : (f : flt32, a : flt32[:] -> flt32)
 extern const horner_poly64 : (f : flt64, a : flt64[:] -> flt64)
+
+extern const horner_polyu32 : (f : flt32, a : uint32[:] -> flt32)
+extern const horner_polyu64 : (f : flt64, a : uint64[:] -> flt64)
 
 extern const sqrt32 : (x : flt32 -> flt32)
 extern const sqrt64 : (x : flt64 -> flt64)
--- a/lib/math/poly-impl.myr
+++ b/lib/math/poly-impl.myr
@@ -4,6 +4,9 @@
 pkg math =
         pkglocal const horner_poly32 : (f : flt32, a : flt32[:] -> flt32)
         pkglocal const horner_poly64 : (f : flt64, a : flt64[:] -> flt64)
+
+        pkglocal const horner_polyu32 : (f : flt32, a : uint32[:] -> flt32)
+        pkglocal const horner_polyu64 : (f : flt64, a : uint64[:] -> flt64)
 ;;
 
 extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)
@@ -21,6 +24,22 @@
         var r : flt64 = 0.0
         for var j = a.len - 1; j >= 0; j--
                 r = fma64(r, f, a[j])
+        ;;
+        -> r
+}
+
+const horner_polyu32 = {f : flt32, a : uint32[:]
+        var r : flt32 = 0.0
+        for var j = a.len - 1; j >= 0; j--
+                r = fma32(r, f, std.flt32frombits(a[j]))
+        ;;
+        -> r
+}
+
+const horner_polyu64 = {f : flt64, a : uint64[:]
+        var r : flt64 = 0.0
+        for var j = a.len - 1; j >= 0; j--
+                r = fma64(r, f, std.flt64frombits(a[j]))
         ;;
         -> r
 }
--- a/lib/math/references
+++ b/lib/math/references
@@ -13,3 +13,9 @@
 [Mul16]
 J. M. Muller. Elementary functions : algorithms and implementation.
 Third edition. New York: Birkhäuser, 2016. isbn: 9781489979810.
+
+[Tan89]
+Ping-Tak Peter Tang. “Table-driven Implementation of the Exponential
+Function in IEEE Floating-point Arithmetic”. In: ACM Trans. Math.
+Softw. 15.2 (June 1989), pp. 144–157. issn: 0098-3500.  doi:
+10.1145/63522.214389. url: http://doi.acm.org/10.1145/63522.214389.
--- a/lib/math/sqrt-impl.myr
+++ b/lib/math/sqrt-impl.myr
@@ -95,7 +95,7 @@
 	-> sqrtgen(f, d)
 }
 
-generic sqrtgen = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable,fpmath @f, numeric,integral @u, numeric,integral @i
+generic sqrtgen = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i
 	var n : bool, e : @i, s : @u, e2 : @i
 	(n, e, s) = d.explode(f)
 
@@ -146,10 +146,10 @@
 	;;
 
 	/* split up "x_{n+1} = x_n (3 - ax_n^2)/2" */
-	var epsn : @f = d.fma(-1.0 * a, xn * xn, 1.0)
-	var rn : @f = 0.5 * epsn
-	var gn : @f = a * xn
-	var hn : @f = 0.5 * xn
+	var epsn = d.fma(-1.0 * a, xn * xn, 1.0)
+	var rn = 0.5 * epsn
+	var gn = a * xn
+	var hn = 0.5 * xn
 	for var j = 0; j < d.iterlim; ++j
 		rn = d.fma(-1.0 * gn, hn, 0.5)
 		gn = d.fma(gn, rn, gn)
@@ -172,11 +172,11 @@
 		var r_plus_ulp : @f = d.frombits(d.tobits(r) + 1)
 		var r_minus_ulp : @f = d.frombits(d.tobits(r) - 1)
 
-		var delta_1 : @f = d.fma(r, r_minus_ulp, -1.0 * f)
+		var delta_1 = d.fma(r, r_minus_ulp, -1.0 * f)
 		if d.tobits(delta_1) & d.sgnmask == 0
 			r = r_minus_ulp
 		else
-			var delta_2 : @f = d.fma(r, r_plus_ulp, -1.0 * f)
+			var delta_2 = d.fma(r, r_plus_ulp, -1.0 * f)
 			if d.tobits(delta_2) & d.sgnmask != 0
 				r = r_plus_ulp
 			else
--- /dev/null
+++ b/lib/math/test/exp-impl.myr
@@ -1,0 +1,43 @@
+use std
+use math
+use testr
+
+const main = {
+	testr.run([
+		[.name="exp-01", .fn = exp01],
+		[.name="exp-02", .fn = exp02],
+	][:])
+}
+
+const exp01 = {c
+	var inputs : (uint32, uint32)[:] = [
+		(0x00000000, 0x3f800000),
+		(0x34000000, 0x3f800001),
+		(0x3c000000, 0x3f810101),
+		(0x42000000, 0x568fa1fe),
+	][:]
+
+	for (x, y) : inputs
+		var xf : flt32 = std.flt32frombits(x)
+		var yf : flt32 = std.flt32frombits(y)
+		var rf = math.exp(xf)
+		testr.check(c, rf == yf,
+			"exp(0x{b=16,w=8,p=0}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+			x, y, std.flt32bits(rf))
+	;;
+}
+
+const exp02 = {c
+	var inputs : (uint64, uint64)[:] = [
+		(0x0000000000000000, 0x3ff0000000000000),
+	][:]
+
+	for (x, y) : inputs
+		var xf : flt64 = std.flt64frombits(x)
+		var yf : flt64 = std.flt64frombits(y)
+		var rf = math.exp(xf)
+		testr.check(c, rf == yf,
+			"exp(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+			x, y, std.flt64bits(rf))
+	;;
+}