shithub: mc

--- a/lib/math/powr-impl.myr

+++ b/lib/math/powr-impl.myr

@@ -2,6 +2,7 @@

 use "fpmath"

 use "log-impl"

+use "log-overkill"

 use "util"

/*

@@ -14,12 +15,6 @@

    example, IEEE 754-2008 does not specify what powr(infty, y) must

    return when y is not 0.0 (an erratum was planned in 2010, but

    does not appear to have been released as of 2018).

-   As a note: unlike many other functions in this library, there

-   has been no serious analysis of the accuracy and speed of this

-   particular implementation. Interested observers wishing to improve

-   this library will probably find this file goldmine of mistakes,

-   both theoretical and practical.

*/

 pkg math =

 	pkglocal const powr32 : (x : flt32, y : flt32 -> flt32)

@@ -38,17 +33,9 @@

 	emax : @i

 	emin : @i

 	sgnmask : @u

-	sig8mask : @u

-	sig8last : @u

-	split_prec_mask : @u

-	split_prec_mask2 : @u

-	C : (@u, @u)[:]

-	eps_inf_border : @u

-	eps_zero_border : @u

-	exp_inf_border : @u

+	log_overkill : (x : @f -> (@f, @f))

+	split_mul : (x_h : @f, x_l : @f, y_h : @f, y_l : @f -> (@f, @f))

 	exp_zero_border : @u

-	exp_subnormal_border : @u

-	itercount : @u

;;

 const desc32 : fltdesc(flt32, uint32, int32) =  [

@@ -63,17 +50,9 @@

 	.emax = 127,

 	.emin = -126,

 	.sgnmask = 1 << 31,

-	.sig8mask = 0xffff0000, /* Mask to get 8 significant bits */

-	.sig8last = 16, /* Last bit kept when masking */

-	.split_prec_mask = 0xffff0000, /* 16 trailing zeros */

-	.split_prec_mask2 = 0xfffff000, /* 12 trailing zeros */

-	.C = accurate_logs32[0:130], /* See log-impl.myr */

-	.eps_inf_border = 0x4eb00f34, /* maximal y st. (1.00..1)^y < oo */

-	.eps_zero_border = 0x4ecff1b4, /* minimal y st. (0.99..9)^y > 0 */

-	.exp_inf_border = 0x42b17218, /* maximal y such that e^y < oo */

+	.log_overkill = logoverkill32,

+	.split_mul = split_mul32,

 	.exp_zero_border = 0xc2cff1b4, /* minimal y such that e^y > 0 */

-	.exp_subnormal_border = 0xc2aeac50, /* minimal y such that e^y is normal */

-	.itercount = 4, /* How many iterations of Taylor series for (1 + f)^y' */

 const desc64 : fltdesc(flt64, uint64, int64) =  [

@@ -88,19 +67,20 @@

 	.emax = 1023,

 	.emin = -1022,

 	.sgnmask = 1 << 63,

-	.sig8mask = 0xffffe00000000000, /* Mask to get 8 significant bits */

-	.sig8last = 45, /* Last bit kept when masking */

-	.split_prec_mask = 0xffffff0000000000, /* 40 trailing zeroes */

-	.split_prec_mask2 = 0xfffffffffffc0000, /* 18 trailing zeroes */

-	.C = accurate_logs64[0:130], /* See log-impl.myr */

-	.eps_inf_border = 0x43d628b76e3a7b61, /* maximal y st. (1.00..1)^y < oo */

-	.eps_zero_border = 0x43d74e9c65eceee0, /*  minimal y st. (0.99..9)^y > 0 */

-	.exp_inf_border = 0x40862e42fefa39ef, /* maximal y such that e^y < oo */

+	.log_overkill = logoverkill64,

+	.split_mul = hl_mult,

 	.exp_zero_border = 0xc0874910d52d3052, /* minimal y such that e^y > 0 */

-	.exp_subnormal_border = 0xc086232bdd7abcd2, /* minimal y such that e^y is normal */

-	.itercount = 8,

+const split_mul32 = {x_h : flt32, x_l : flt32, y_h : flt32, y_l : flt32

+	var x : flt64 = (x_h : flt64) + (x_l : flt64)

+	var y : flt64 = (y_h : flt64) + (y_l : flt64)

+	var z = x * y

+	var z_h : flt32 = (z : flt32)

+	var z_l : flt32 = ((z - (z_h : flt64)) : flt32)

+	-> (z_h, z_l)

+}

 const powr32 = {x : flt32, y : flt32

 	-> powrgen(x, y, desc32)

@@ -182,225 +162,41 @@

;;

;;

-	/* Normalize x and y */

-	if xe < d.emin

-		var first_1 = find_first1_64((xs : uint64), (d.precision : int64))

-		var offset = (d.precision : @u) - 1 - (first_1 : @u)

-		xs = xs << offset

-		xe = d.emin - offset

-	;;

-	if ye < d.emin

-		var first_1 = find_first1_64((ys : uint64), (d.precision : int64))

-		var offset = (d.precision : @u) - 1 - (first_1 : @u)

-		ys = ys << offset

-		ye = d.emin - offset

-	;;

/*

-           Split x into 2^N * F * (1 + f), with F = 1 + j/128 (some

-           j) and f tiny. Compute F naively by truncation. Compute

-           f via f = (x' - 1 - F)/(1 + F), where 1/(1 + F) is

-           precomputed and x' is x/2^N. 128 is chosen so that we

-           can borrow some constants from log-impl.myr.

-           [Tan90] hints at a method of computing x^y which may be

-           comparable to this approach, but which is unfortunately

-           has not been elaborated on (as far as I can discover).

+	   Just do the dumb thing: compute exp( log(x) · y ). All the hard work

+	   goes into computing log(x) with high enough precision that our exp()

+	   implementation becomes the weakest link. The Table Maker's Dilemma

+	   says that quantifying "high enough" is a very difficult problem, but

+	   experimentally twice the precision of @f appears quite good enough.

*/

-	var N = xe

-	var j, F, Fn, Fe, Fs

-	var xprime = d.assem(false, 0, xs)

+	var ln_x_hi, ln_x_lo

+	(ln_x_hi, ln_x_lo) = d.log_overkill(x)

-	if need_round_away(0, (xs : uint64), (d.sig8last : int64))

-		F = d.frombits((d.tobits(xprime) & d.sig8mask) + (1 << d.sig8last))

-	else

-		F = d.frombits(d.tobits(xprime) & d.sig8mask)

-	;;

+	var final_exp_hi, final_exp_lo

+	(final_exp_hi, final_exp_lo) = d.split_mul(ln_x_hi, ln_x_lo, y, 0.0)

-	(Fn, Fe, Fs) = d.explode(F)

-	if Fe != 0

-		j = 128

-	else

-		j = 0x7f & ((d.sig8mask & Fs) >> d.sig8last)

-	;;

-	var f = (xprime - F)/F

-	/*

-	   y could actually be above integer infinity, in which

-	   case x^y is most certainly infinity or 0. More importantly,

-	   we can't safely compute M (below).

-	 */

-	if x > (1.0 : @f)

-		if y > d.frombits(d.eps_inf_border)

+	if d.tobits(final_exp_hi) & d.expmask == d.inf

+		/*

+		   split_mul doesn't actually preserve the sign of infinity, so we can't

+		   trust final_exp_hi to get it.

+		 */

+		if (d.tobits(ln_x_hi) & d.sgnmask) == (yb & d.sgnmask)

+			/* e^+Inf */

 			-> d.frombits(d.inf)

-		elif -y > d.frombits(d.eps_inf_border)

-			-> (0.0 : @f)

+		else

+			/* e^-Inf */

+			-> 0.0

;;

-	elif x < (1.0 : @f)

-		if y > d.frombits(d.eps_zero_border) && x < (1.0 : @f)

-			-> (0.0 : @f)

-		elif -y > d.frombits(d.eps_zero_border) && x < (1.0 : @f)

-			-> d.frombits(d.inf)

-		;;

;;

-	/* Split y into M + y', with |y'| <= 0.5 and M an integer */

-	var M = floor(y)

-	var yprime = y - M

-	if yprime > (0.5 : @f)

-		M += (1.0 : @f)

-		yprime = y - M

-	elif yprime < (-0.5 : @f)

-		M -= (1.0: @f)

-		yprime = y - M

+	if final_exp_hi < d.frombits(d.exp_zero_border)

+		-> 0.0

;;

-	/*

-	   We'll multiply y' by log(2) and try to keep extra

-	   precision, so we need to split y'. Since the high word

-	   of C has 24 - 10 = 14 significant bits (53 - 16 = 37 in

-	   flt64 case), we ensure 15 (39) trailing zeroes in

-	   yprime_hi.  (We also need this for y'*N, M, &c).

-	 */

-	var yprime_hi = d.frombits(d.tobits(yprime) & d.split_prec_mask)

-	var yprime_lo = yprime - yprime_hi

-	var yprimeN_hi = d.frombits(d.tobits((N : @f) * yprime) & d.split_prec_mask)

-	var yprimeN_lo = fma((N : @f),  yprime, -yprimeN_hi)

-	var M_hi = d.frombits(d.tobits(M) & d.split_prec_mask)

-	var M_lo = M - M_hi

-	/*

-	   At this point, we've built out

-	       x^y = [ 2^N * F * (1 + f) ]^(M + y')

-	   where N, M are integers, F is well-known, and f, y' are

-	   tiny. So we can get to computing

-	        /-1-\     /-------------------2--------------------------\     /-3--\

-	       2^(N*M) * exp(log(F)*y' + log2*N*y' + log(F)*M + M*log(1+f)) * (1+f)^y'

-	   where 1 can be handled by scale2, 2 we can mostly fake

-	   by sticking high-precision values for log(F) and log(2)

-	   through exp(), and 3 is composed of small numbers,

-	   therefore can be reasonably approximated by a Taylor

-	   expansion.

-	 */

-	/* t2 */

-	var log2_lo, log2_hi, Cu_hi, Cu_lo

-	(log2_hi, log2_lo) = d.C[128]

-	(Cu_hi, Cu_lo) = d.C[j]

-	var es : @f[20]

-	std.slfill(es[:], (0.0 : @f))

-	/* log(F) * y' */

-	es[0] = d.frombits(Cu_hi) * yprime_hi

-	es[1] = d.frombits(Cu_lo) * yprime_hi

-	es[2] = d.frombits(Cu_hi) * yprime_lo

-	es[3] = d.frombits(Cu_lo) * yprime_lo

-	/* log(2) * N * y' */

-	es[4] = d.frombits(log2_hi) * yprimeN_hi

-	es[5] = d.frombits(log2_lo) * yprimeN_hi

-	es[6] = d.frombits(log2_hi) * yprimeN_lo

-	es[7] = d.frombits(log2_lo) * yprimeN_lo

-	/* log(F) * M */

-	es[8] = d.frombits(Cu_hi) * M_hi

-	es[9] = d.frombits(Cu_lo) * M_hi

-	es[10] = d.frombits(Cu_hi) * M_lo

-	es[11] = d.frombits(Cu_lo) * M_lo

-	/* log(1 + f) * M */

-	var lf = log1p(f)

-	var lf_hi = d.frombits(d.tobits(lf) & d.split_prec_mask)

-	var lf_lo = lf - lf_hi

-	es[12] = lf_hi * M_hi

-	es[13] = lf_lo * M_hi

-	es[14] = lf_hi * M_lo

-	es[15] = lf_lo * M_lo

-	/*

-	   The correct way to handle this would be to compare

-	   magnitudes of eis and parenthesize the additions correctly.

-	   We take the cheap way out.

-	 */

-	var exp_hi = priest_sum(es[0:16])

-	/*

-	   We would like to just compute exp(exp_hi) * exp(exp_lo).

-	   However, if that takes us into subnormal territory, yet

-	   N * M is large, that will throw away a few bits of

-	   information. We can correct for this by adding in a few

-	   copies of P*log(2), then subtract off P when we compute

-	   scale2() at the end.

-	   We also have to be careful that P doesn't have too many

-	   significant bits, otherwise we throw away some information

-	   of log2_hi.

-	 */

-	var P = -rn(exp_hi / d.frombits(log2_hi))

-	var P_f = (P : @f)

-	P_f = d.frombits(d.tobits(P_f) & d.split_prec_mask2)

-	P = rn(P_f)

-	es[16] = P_f * d.frombits(log2_hi)

-	es[17] = P_f * d.frombits(log2_lo)

-	exp_hi = priest_sum(es[0:18])

-	es[18] = -exp_hi

-	var exp_lo = priest_sum(es[0:19])

-	var t2 = exp(exp_hi) * exp(exp_lo)

-	/*

-	   t3: Abbreviated Taylor expansion for (1 + f)^y' - 1.

-	   Since f is on the order of 2^-7 (and y' is on the order

-	   of 2^-1), we need to go up to f^3 for single-precision,

-	   and f^7 for double. We can then compute (1 + t3) * t2

-	   The expansion is \Sum_{k=1}^{\infty} {y' \choose k} x^k

-	 */

-	var terms : @f[10] = [

-		(0.0 : @f),  (0.0 : @f),  (0.0 : @f),  (0.0 : @f),  (0.0 : @f),

-		(0.0 : @f),  (0.0 : @f),  (0.0 : @f),  (0.0 : @f),  (0.0 : @f),

-	]

-	var current = (1.0 : @f)

-	for var j = 0; j <= d.itercount; ++j

-		current = current * f * (yprime - (j : @f)) / ((j : @f) + (1.0 : @f))

-		terms[j] = current

+	var z_hi = exp(final_exp_hi)

+	if d.tobits(z_hi) & d.expmask == d.inf

+		-> z_hi

;;

-	var t3 = priest_sum(terms[0:d.itercount + 1])

-	var total_exp_f = (N : @f) * M - (P : @f)

-	if total_exp_f > ((d.emax - d.emin + d.precision + 1) : @f)

-		-> d.frombits(d.inf)

-	elif total_exp_f < -((d.emax - d.emin + d.precision + 1) : @f)

-		-> (0.0 : @f)

-	;;

-	/*

-	   Pull t2's exponent out so that we don't hit subnormal

-	   calculation with the t3 multiplication

-	 */

-	var t2n, t2e, t2s

-	(t2n, t2e, t2s) = d.explode(t2)

-	if t2e < d.emin

-		var t2_first_1 = find_first1_64((t2s : uint64), (d.precision : int64))

-		var t2_offset = (d.precision : @u) - 1 - (t2_first_1 : @u)

-		t2s = t2s << t2_offset

-		t2e = d.emin - (t2_offset : @i)

-	;;

-	t2 = d.assem(t2n, 0, t2s)

-	P -= t2e

-	var base = fma(t2, t3, t2)

-	-> scale2(base, N * rn(M) - P)

+	-> z_hi * (1.0 + final_exp_lo)

--- a/lib/math/test/powr-impl.myr

+++ b/lib/math/test/powr-impl.myr

@@ -22,8 +22,6 @@

 		(0x653f944a, 0xbf7c2388, 0x1a3c784b),

 		(0x545ba67c, 0xc0c7e947, 0x00000000),

 		(0x3fca6b0d, 0x44ff18e0, 0x7f800000),

-		// (0x3f74c7a7, 0x44feae20, 0x000265c6),

-		// (0x3f7ebd6c, 0xc5587884, 0x4bc9ab07),

 	][:]

 	for (x, y, z) : inputs

@@ -40,6 +38,16 @@

 const powr02 = {c

 	var inputs : (uint64, uint64, uint64)[:] = [

 		(0x0000000000000000, 0x0000000000000000, 0x0000000000000000),

+		(0x0d30ad40d8223045, 0xffb56d6e33cd6ad2, 0x7ff0000000000000),

+		(0x1f192b55704d3500, 0xff8a52f877359f3c, 0x7ff0000000000000),

+		(0x7fe6c53d8cef3d27, 0x4bcca2e651c57788, 0x7ff0000000000000),

+		(0x7fe78a3740493383, 0xe84e801c38a71fc9, 0x0000000000000000),

+		(0x7fea162ffbabd3bc, 0x02414c7fa33dd7db, 0x3ff0000000000000),

+		(0x7fe087a9112a21d8, 0x0f2b7a9584736b41, 0x3ff0000000000000),

+		(0x7fe5d78f049c0918, 0xd3a145ba5b0fdb9b, 0x0000000000000000),

+		(0x7febb342860b3386, 0xe758bd2af063aec2, 0x0000000000000000),

+		(0x000342cdeeb18fc9, 0xbe645d513ed4670d, 0x3ff0001c3d5eaa62),

 	][:]

 	for (x, y, z) : inputs

@@ -57,7 +65,7 @@

 	var inputs : (uint32, uint32, uint32, uint32)[:] = [

 		(0x1bd2244e, 0x3a647973, 0x3f7535a1, 0x3f7535a0),

 		(0x3f264a46, 0x423c927a, 0x30c9b8d3, 0x30c9b8d4),

-		(0x61fb73d0, 0xbfd2666c, 0x06c539f6, 0x06c539f7),

+		(0x61fb73d0, 0xbfd2666c, 0x06c539f6, 0x06c539f5),

 		(0x3bbd11f6, 0x3cc159b1, 0x3f62ac1b, 0x3f62ac1a),

 		(0x3f7ca5b7, 0xc309857a, 0x40c41bbf, 0x40c41bc0),

 		(0x3f6a1a65, 0x43e16065, 0x226731e2, 0x226731e3),