shithub: mc

Download patch

ref: 6ac5eec951dc80bc612e5c6784429e43ffff02ad
parent: 2002d2fa999d7efeb71aa64a32a1a87c2f4b8399
author: S. Gilles <sgilles@math.umd.edu>
date: Sat Jun 8 23:18:07 EDT 2019

Rewrite powr to use log-overkill.

--- a/lib/math/powr-impl.myr
+++ b/lib/math/powr-impl.myr
@@ -2,6 +2,7 @@
 
 use "fpmath"
 use "log-impl"
+use "log-overkill"
 use "util"
 
 /*
@@ -14,12 +15,6 @@
    example, IEEE 754-2008 does not specify what powr(infty, y) must
    return when y is not 0.0 (an erratum was planned in 2010, but
    does not appear to have been released as of 2018).
-
-   As a note: unlike many other functions in this library, there
-   has been no serious analysis of the accuracy and speed of this
-   particular implementation. Interested observers wishing to improve
-   this library will probably find this file goldmine of mistakes,
-   both theoretical and practical.
  */
 pkg math =
 	pkglocal const powr32 : (x : flt32, y : flt32 -> flt32)
@@ -38,17 +33,9 @@
 	emax : @i
 	emin : @i
 	sgnmask : @u
-	sig8mask : @u
-	sig8last : @u
-	split_prec_mask : @u
-	split_prec_mask2 : @u
-	C : (@u, @u)[:]
-	eps_inf_border : @u
-	eps_zero_border : @u
-	exp_inf_border : @u
+	log_overkill : (x : @f -> (@f, @f))
+	split_mul : (x_h : @f, x_l : @f, y_h : @f, y_l : @f -> (@f, @f))
 	exp_zero_border : @u
-	exp_subnormal_border : @u
-	itercount : @u
 ;;
 
 const desc32 : fltdesc(flt32, uint32, int32) =  [
@@ -63,17 +50,9 @@
 	.emax = 127,
 	.emin = -126,
 	.sgnmask = 1 << 31,
-	.sig8mask = 0xffff0000, /* Mask to get 8 significant bits */
-	.sig8last = 16, /* Last bit kept when masking */
-	.split_prec_mask = 0xffff0000, /* 16 trailing zeros */
-	.split_prec_mask2 = 0xfffff000, /* 12 trailing zeros */
-	.C = accurate_logs32[0:130], /* See log-impl.myr */
-	.eps_inf_border = 0x4eb00f34, /* maximal y st. (1.00..1)^y < oo */
-	.eps_zero_border = 0x4ecff1b4, /* minimal y st. (0.99..9)^y > 0 */
-	.exp_inf_border = 0x42b17218, /* maximal y such that e^y < oo */
+	.log_overkill = logoverkill32,
+	.split_mul = split_mul32,
 	.exp_zero_border = 0xc2cff1b4, /* minimal y such that e^y > 0 */
-	.exp_subnormal_border = 0xc2aeac50, /* minimal y such that e^y is normal */
-	.itercount = 4, /* How many iterations of Taylor series for (1 + f)^y' */
 ]
 
 const desc64 : fltdesc(flt64, uint64, int64) =  [
@@ -88,19 +67,20 @@
 	.emax = 1023,
 	.emin = -1022,
 	.sgnmask = 1 << 63,
-	.sig8mask = 0xffffe00000000000, /* Mask to get 8 significant bits */
-	.sig8last = 45, /* Last bit kept when masking */
-	.split_prec_mask = 0xffffff0000000000, /* 40 trailing zeroes */
-	.split_prec_mask2 = 0xfffffffffffc0000, /* 18 trailing zeroes */
-	.C = accurate_logs64[0:130], /* See log-impl.myr */
-	.eps_inf_border = 0x43d628b76e3a7b61, /* maximal y st. (1.00..1)^y < oo */
-	.eps_zero_border = 0x43d74e9c65eceee0, /*  minimal y st. (0.99..9)^y > 0 */
-	.exp_inf_border = 0x40862e42fefa39ef, /* maximal y such that e^y < oo */
+	.log_overkill = logoverkill64,
+	.split_mul = hl_mult,
 	.exp_zero_border = 0xc0874910d52d3052, /* minimal y such that e^y > 0 */
-	.exp_subnormal_border = 0xc086232bdd7abcd2, /* minimal y such that e^y is normal */
-	.itercount = 8,
 ]
 
+const split_mul32 = {x_h : flt32, x_l : flt32, y_h : flt32, y_l : flt32
+	var x : flt64 = (x_h : flt64) + (x_l : flt64)
+	var y : flt64 = (y_h : flt64) + (y_l : flt64)
+	var z = x * y
+	var z_h : flt32 = (z : flt32)
+	var z_l : flt32 = ((z - (z_h : flt64)) : flt32)
+	-> (z_h, z_l)
+}
+
 const powr32 = {x : flt32, y : flt32
 	-> powrgen(x, y, desc32)
 }
@@ -182,225 +162,41 @@
 		;;
 	;;
 
-	/* Normalize x and y */
-	if xe < d.emin
-		var first_1 = find_first1_64((xs : uint64), (d.precision : int64))
-		var offset = (d.precision : @u) - 1 - (first_1 : @u)
-		xs = xs << offset
-		xe = d.emin - offset
-	;;
-
-	if ye < d.emin
-		var first_1 = find_first1_64((ys : uint64), (d.precision : int64))
-		var offset = (d.precision : @u) - 1 - (first_1 : @u)
-		ys = ys << offset
-		ye = d.emin - offset
-	;;
-
 	/*
-           Split x into 2^N * F * (1 + f), with F = 1 + j/128 (some
-           j) and f tiny. Compute F naively by truncation. Compute
-           f via f = (x' - 1 - F)/(1 + F), where 1/(1 + F) is
-           precomputed and x' is x/2^N. 128 is chosen so that we
-           can borrow some constants from log-impl.myr.
-
-           [Tan90] hints at a method of computing x^y which may be
-           comparable to this approach, but which is unfortunately
-           has not been elaborated on (as far as I can discover).
+	   Just do the dumb thing: compute exp( log(x) · y ). All the hard work
+	   goes into computing log(x) with high enough precision that our exp()
+	   implementation becomes the weakest link. The Table Maker's Dilemma
+	   says that quantifying "high enough" is a very difficult problem, but
+	   experimentally twice the precision of @f appears quite good enough.
 	 */
-	var N = xe
-	var j, F, Fn, Fe, Fs
-	var xprime = d.assem(false, 0, xs)
+	var ln_x_hi, ln_x_lo
+	(ln_x_hi, ln_x_lo) = d.log_overkill(x)
 
-	if need_round_away(0, (xs : uint64), (d.sig8last : int64))
-		F = d.frombits((d.tobits(xprime) & d.sig8mask) + (1 << d.sig8last))
-	else
-		F = d.frombits(d.tobits(xprime) & d.sig8mask)
-	;;
+	var final_exp_hi, final_exp_lo
+	(final_exp_hi, final_exp_lo) = d.split_mul(ln_x_hi, ln_x_lo, y, 0.0)
 
-	(Fn, Fe, Fs) = d.explode(F)
-
-	if Fe != 0
-		j = 128
-	else
-		j = 0x7f & ((d.sig8mask & Fs) >> d.sig8last)
-	;;
-
-	var f = (xprime - F)/F
-
-	/*
-	   y could actually be above integer infinity, in which
-	   case x^y is most certainly infinity or 0. More importantly,
-	   we can't safely compute M (below).
-	 */
-	if x > (1.0 : @f)
-		if y > d.frombits(d.eps_inf_border)
+	if d.tobits(final_exp_hi) & d.expmask == d.inf
+		/*
+		   split_mul doesn't actually preserve the sign of infinity, so we can't
+		   trust final_exp_hi to get it.
+		 */
+		if (d.tobits(ln_x_hi) & d.sgnmask) == (yb & d.sgnmask)
+			/* e^+Inf */
 			-> d.frombits(d.inf)
-		elif -y > d.frombits(d.eps_inf_border)
-			-> (0.0 : @f)
+		else
+			/* e^-Inf */
+			-> 0.0
 		;;
-	elif x < (1.0 : @f)
-		if y > d.frombits(d.eps_zero_border) && x < (1.0 : @f)
-			-> (0.0 : @f)
-		elif -y > d.frombits(d.eps_zero_border) && x < (1.0 : @f)
-			-> d.frombits(d.inf)
-		;;
 	;;
 
-	/* Split y into M + y', with |y'| <= 0.5 and M an integer */
-	var M = floor(y)
-	var yprime = y - M
-	if yprime > (0.5 : @f)
-		M += (1.0 : @f)
-		yprime = y - M
-	elif yprime < (-0.5 : @f)
-		M -= (1.0: @f)
-		yprime = y - M
+	if final_exp_hi < d.frombits(d.exp_zero_border)
+		-> 0.0
 	;;
 
-	/*
-	   We'll multiply y' by log(2) and try to keep extra
-	   precision, so we need to split y'. Since the high word
-	   of C has 24 - 10 = 14 significant bits (53 - 16 = 37 in
-	   flt64 case), we ensure 15 (39) trailing zeroes in
-	   yprime_hi.  (We also need this for y'*N, M, &c).
-	 */
-	var yprime_hi = d.frombits(d.tobits(yprime) & d.split_prec_mask)
-	var yprime_lo = yprime - yprime_hi
-	var yprimeN_hi = d.frombits(d.tobits((N : @f) * yprime) & d.split_prec_mask)
-	var yprimeN_lo = fma((N : @f),  yprime, -yprimeN_hi)
-	var M_hi = d.frombits(d.tobits(M) & d.split_prec_mask)
-	var M_lo = M - M_hi
-
-	/* 
-	   At this point, we've built out
-	   
-	       x^y = [ 2^N * F * (1 + f) ]^(M + y')
-	
-	   where N, M are integers, F is well-known, and f, y' are
-	   tiny. So we can get to computing
-
-	        /-1-\     /-------------------2--------------------------\     /-3--\
-	       2^(N*M) * exp(log(F)*y' + log2*N*y' + log(F)*M + M*log(1+f)) * (1+f)^y'
-
-	   where 1 can be handled by scale2, 2 we can mostly fake
-	   by sticking high-precision values for log(F) and log(2)
-	   through exp(), and 3 is composed of small numbers,
-	   therefore can be reasonably approximated by a Taylor
-	   expansion.
-	 */
-
-	/* t2 */
-	var log2_lo, log2_hi, Cu_hi, Cu_lo
-	(log2_hi, log2_lo) = d.C[128]
-	(Cu_hi, Cu_lo) = d.C[j]
-
-	var es : @f[20]
-	std.slfill(es[:], (0.0 : @f))
-
-	/* log(F) * y' */
-	es[0] = d.frombits(Cu_hi) * yprime_hi
-	es[1] = d.frombits(Cu_lo) * yprime_hi
-	es[2] = d.frombits(Cu_hi) * yprime_lo
-	es[3] = d.frombits(Cu_lo) * yprime_lo
-
-	/* log(2) * N * y' */
-	es[4] = d.frombits(log2_hi) * yprimeN_hi
-	es[5] = d.frombits(log2_lo) * yprimeN_hi
-	es[6] = d.frombits(log2_hi) * yprimeN_lo
-	es[7] = d.frombits(log2_lo) * yprimeN_lo
-
-	/* log(F) * M */
-	es[8] = d.frombits(Cu_hi) * M_hi
-	es[9] = d.frombits(Cu_lo) * M_hi
-	es[10] = d.frombits(Cu_hi) * M_lo
-	es[11] = d.frombits(Cu_lo) * M_lo
-
-	/* log(1 + f) * M */
-	var lf = log1p(f)
-	var lf_hi = d.frombits(d.tobits(lf) & d.split_prec_mask)
-	var lf_lo = lf - lf_hi
-	es[12] = lf_hi * M_hi
-	es[13] = lf_lo * M_hi
-	es[14] = lf_hi * M_lo
-	es[15] = lf_lo * M_lo
-
-	/*
-	   The correct way to handle this would be to compare
-	   magnitudes of eis and parenthesize the additions correctly.
-	   We take the cheap way out.
-	 */
-	var exp_hi = priest_sum(es[0:16])
-
-	/*
-	   We would like to just compute exp(exp_hi) * exp(exp_lo).
-	   However, if that takes us into subnormal territory, yet
-	   N * M is large, that will throw away a few bits of
-	   information. We can correct for this by adding in a few
-	   copies of P*log(2), then subtract off P when we compute
-	   scale2() at the end.
-
-	   We also have to be careful that P doesn't have too many
-	   significant bits, otherwise we throw away some information
-	   of log2_hi.
-	 */
-	var P = -rn(exp_hi / d.frombits(log2_hi))
-	var P_f = (P : @f)
-	P_f = d.frombits(d.tobits(P_f) & d.split_prec_mask2)
-	P = rn(P_f)
-
-	es[16] = P_f * d.frombits(log2_hi)
-	es[17] = P_f * d.frombits(log2_lo)
-	exp_hi = priest_sum(es[0:18])
-	es[18] = -exp_hi
-	var exp_lo = priest_sum(es[0:19])
-
-
-	var t2 = exp(exp_hi) * exp(exp_lo)
-
-	/*
-	   t3: Abbreviated Taylor expansion for (1 + f)^y' - 1.
-	   Since f is on the order of 2^-7 (and y' is on the order
-	   of 2^-1), we need to go up to f^3 for single-precision,
-	   and f^7 for double. We can then compute (1 + t3) * t2
-
-	   The expansion is \Sum_{k=1}^{\infty} {y' \choose k} x^k
-	 */
-	var terms : @f[10] = [
-		(0.0 : @f),  (0.0 : @f),  (0.0 : @f),  (0.0 : @f),  (0.0 : @f),
-		(0.0 : @f),  (0.0 : @f),  (0.0 : @f),  (0.0 : @f),  (0.0 : @f),
-	]
-	var current = (1.0 : @f)
-	for var j = 0; j <= d.itercount; ++j
-		current = current * f * (yprime - (j : @f)) / ((j : @f) + (1.0 : @f))
-		terms[j] = current
+	var z_hi = exp(final_exp_hi)
+	if d.tobits(z_hi) & d.expmask == d.inf
+		-> z_hi
 	;;
-	var t3 = priest_sum(terms[0:d.itercount + 1])
 
-	var total_exp_f = (N : @f) * M - (P : @f)
-	if total_exp_f > ((d.emax - d.emin + d.precision + 1) : @f)
-		-> d.frombits(d.inf)
-	elif total_exp_f < -((d.emax - d.emin + d.precision + 1) : @f)
-		-> (0.0 : @f)
-	;;
-
-	/*
-	   Pull t2's exponent out so that we don't hit subnormal
-	   calculation with the t3 multiplication
-	 */
-	var t2n, t2e, t2s
-	(t2n, t2e, t2s) = d.explode(t2)
-
-	if t2e < d.emin
-		var t2_first_1 = find_first1_64((t2s : uint64), (d.precision : int64))
-		var t2_offset = (d.precision : @u) - 1 - (t2_first_1 : @u)
-		t2s = t2s << t2_offset
-		t2e = d.emin - (t2_offset : @i)
-	;;
-
-	t2 = d.assem(t2n, 0, t2s)
-	P -= t2e
-
-	var base = fma(t2, t3, t2)
-	-> scale2(base, N * rn(M) - P)
+	-> z_hi * (1.0 + final_exp_lo)
 }
--- a/lib/math/test/powr-impl.myr
+++ b/lib/math/test/powr-impl.myr
@@ -22,8 +22,6 @@
 		(0x653f944a, 0xbf7c2388, 0x1a3c784b),
 		(0x545ba67c, 0xc0c7e947, 0x00000000),
 		(0x3fca6b0d, 0x44ff18e0, 0x7f800000),
-		// (0x3f74c7a7, 0x44feae20, 0x000265c6),
-		// (0x3f7ebd6c, 0xc5587884, 0x4bc9ab07),
 	][:]
 
 	for (x, y, z) : inputs
@@ -40,6 +38,16 @@
 const powr02 = {c
 	var inputs : (uint64, uint64, uint64)[:] = [
 		(0x0000000000000000, 0x0000000000000000, 0x0000000000000000),
+		(0x0d30ad40d8223045, 0xffb56d6e33cd6ad2, 0x7ff0000000000000),
+		(0x1f192b55704d3500, 0xff8a52f877359f3c, 0x7ff0000000000000),
+		(0x7fe6c53d8cef3d27, 0x4bcca2e651c57788, 0x7ff0000000000000),
+		(0x7fe78a3740493383, 0xe84e801c38a71fc9, 0x0000000000000000),
+		(0x7fea162ffbabd3bc, 0x02414c7fa33dd7db, 0x3ff0000000000000),
+		(0x7fe087a9112a21d8, 0x0f2b7a9584736b41, 0x3ff0000000000000),
+		(0x7fe5d78f049c0918, 0xd3a145ba5b0fdb9b, 0x0000000000000000),
+		(0x7febb342860b3386, 0xe758bd2af063aec2, 0x0000000000000000),
+
+		(0x000342cdeeb18fc9, 0xbe645d513ed4670d, 0x3ff0001c3d5eaa62),
 	][:]
 
 	for (x, y, z) : inputs
@@ -57,7 +65,7 @@
 	var inputs : (uint32, uint32, uint32, uint32)[:] = [
 		(0x1bd2244e, 0x3a647973, 0x3f7535a1, 0x3f7535a0),
 		(0x3f264a46, 0x423c927a, 0x30c9b8d3, 0x30c9b8d4),
-		(0x61fb73d0, 0xbfd2666c, 0x06c539f6, 0x06c539f7),
+		(0x61fb73d0, 0xbfd2666c, 0x06c539f6, 0x06c539f5),
 		(0x3bbd11f6, 0x3cc159b1, 0x3f62ac1b, 0x3f62ac1a),
 		(0x3f7ca5b7, 0xc309857a, 0x40c41bbf, 0x40c41bc0),
 		(0x3f6a1a65, 0x43e16065, 0x226731e2, 0x226731e3),