ref: 1d77b810242a5824469d6f8cf462ee708ea25fe3
parent: 303798469e8afe0e514c667f3c8c7ae2c65e58c5
author: S. Gilles <sgilles@math.umd.edu>
date: Wed May 1 12:03:44 EDT 2019
Add pown function.
--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -24,6 +24,7 @@
# x^n
log-overkill.myr
+ pown-impl.myr
# x^y
powr-impl.myr
--- a/lib/math/fpmath.myr
+++ b/lib/math/fpmath.myr
@@ -23,6 +23,9 @@
horner_poly : (x : @f, a : @f[:] -> @f)
horner_polyu : (x : @f, a : @u[:] -> @f)
+ /* pown-impl */
+ pown : (x : @f, n : @i -> @f)
+
/* powr-impl */
powr : (x : @f, y : @f -> @f)
@@ -103,6 +106,8 @@
horner_poly = {x, a; -> horner_poly32(x, a)}
horner_polyu = {x, a; -> horner_polyu32(x, a)}
+ pown = {x, n; -> pown32(x, n)}
+
powr = {x, y; -> powr32(x, y)}
scale2 = {x, m; -> scale232(x, m)}
@@ -139,6 +144,8 @@
horner_poly = {x, a; -> horner_poly64(x, a)}
horner_polyu = {x, a; -> horner_polyu64(x, a)}
+
+ pown = {x, n; -> pown64(x, n)}
powr = {x, y; -> powr64(x, y)}
--- a/lib/math/impls.myr
+++ b/lib/math/impls.myr
@@ -29,6 +29,9 @@
pkglocal extern const horner_polyu32 : (x : flt32, a : uint32[:] -> flt32)
pkglocal extern const horner_polyu64 : (x : flt64, a : uint64[:] -> flt64)
+ pkglocal extern const pown32 : (x : flt32, n : int32 -> flt32)
+ pkglocal extern const pown64 : (x : flt64, n : int64 -> flt64)
+
pkglocal extern const powr32 : (x : flt32, y : flt32 -> flt32)
pkglocal extern const powr64 : (x : flt64, y : flt64 -> flt64)
--- /dev/null
+++ b/lib/math/pown-impl.myr
@@ -1,0 +1,219 @@
+use std
+
+use "fpmath"
+use "log-impl"
+use "log-overkill"
+use "sum-impl"
+use "util"
+
+/*
+ This is an implementation of pown: computing x^n where n is an
+ integer. We sort of follow [PEB04], but without their high-radix
+ log_2.
+ */
+pkg math =
+ pkglocal const pown32 : (x : flt32, n : int32 -> flt32)
+ pkglocal const pown64 : (x : flt64, n : int64 -> flt64)
+;;
+
+type fltdesc(@f, @u, @i) = struct
+ explode : (f : @f -> (bool, @i, @u))
+ assem : (n : bool, e : @i, s : @u -> @f)
+ tobits : (f : @f -> @u)
+ frombits : (u : @u -> @f)
+ C : (@u, @u)[:]
+ one_over_ln2_hi : @u
+ one_over_ln2_lo : @u
+ nan : @u
+ inf : @u
+ neginf : @u
+ magcmp : (f : @f, g : @f -> std.order)
+ two_by_two : (x : @f, y : @f -> (@f, @f))
+ log_overkill : (x : @f -> (@f, @f))
+ emin : @i
+ emax : @i
+ imax : @i
+ imin : @i
+;;
+
+const desc32 : fltdesc(flt32, uint32, int32) = [
+ .explode = std.flt32explode,
+ .assem = std.flt32assem,
+ .tobits = std.flt32bits,
+ .frombits = std.flt32frombits,
+ .C = accurate_logs32[0:130], /* See log-impl.myr */
+ .one_over_ln2_hi = 0x3fb8aa3b, /* 1/ln(2), top part */
+ .one_over_ln2_lo = 0x32a57060, /* 1/ln(2), bottom part */
+ .nan = 0x7fc00000,
+ .inf = 0x7f800000,
+ .neginf = 0xff800000,
+ .magcmp = mag_cmp32,
+ .two_by_two = two_by_two32,
+ .log_overkill = logoverkill32,
+ .emin = -126,
+ .emax = 127,
+ .imax = 2147483647, /* For detecting overflow in final exponent */
+ .imin = -2147483648,
+]
+
+const desc64 : fltdesc(flt64, uint64, int64) = [
+ .explode = std.flt64explode,
+ .assem = std.flt64assem,
+ .tobits = std.flt64bits,
+ .frombits = std.flt64frombits,
+ .C = accurate_logs64[0:130], /* See log-impl.myr */
+ .one_over_ln2_hi = 0x3ff71547652b82fe,
+ .one_over_ln2_lo = 0x3c7777d0ffda0d24,
+ .nan = 0x7ff8000000000000,
+ .inf = 0x7ff0000000000000,
+ .neginf = 0xfff0000000000000,
+ .magcmp = mag_cmp64,
+ .two_by_two = two_by_two64,
+ .log_overkill = logoverkill64,
+ .emin = -1022,
+ .emax = 1023,
+ .imax = 9223372036854775807,
+ .imin = -9223372036854775808,
+]
+
+const pown32 = {x : flt32, n : int32
+ -> powngen(x, n, desc32)
+}
+
+const pown64 = {x : flt64, n : int64
+ -> powngen(x, n, desc64)
+}
+
+generic powngen = {x : @f, n : @i, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i
+ var xb
+ xb = d.tobits(x)
+
+ var xn : bool, xe : @i, xs : @u
+ (xn, xe, xs) = d.explode(x)
+
+ var nf : @f = (n : @f)
+
+ /*
+ Special cases. Note we do not follow IEEE exceptions.
+ */
+ if n == 0
+ /*
+ Anything^0 is 1. We're taking the view that x is a tiny range of reals,
+ so a dense subset of them are 1, even if x is 0.0.
+ */
+ -> 1.0
+ elif std.isnan(x)
+ /* Propagate NaN (why doesn't this come first? Ask IEEE.) */
+ -> d.frombits(d.nan)
+ elif (x == 0.0)
+ if n < 0 && (n % 2 == 1) && xn
+ /* (+/- 0)^n = +/- oo */
+ -> d.frombits(d.neginf)
+ elif n < 0
+ -> d.frombits(d.inf)
+ elif n % 2 == 1
+ /* (+/- 0)^n = +/- 0 (n odd) */
+ -> d.assem(xn, d.emin - 1, 0)
+ else
+ -> 0.0
+ ;;
+ elif n == 1
+ /* Anything^1 is itself */
+ -> x
+ ;;
+
+ /* (-f)^n = (-1)^n * (f)^n. Figure this out now, then pretend f >= 0.0 */
+ var ult_sgn = 1.0
+ if xn && (n % 2 == 1 || n % 2 == -1)
+ ult_sgn = -1.0
+ ;;
+
+ /*
+ Compute (with x = xs * 2^e)
+
+ x^n = 2^(n*log2(xs)) * 2^(n*e)
+
+ = 2^(I + F) * 2^(n*e)
+ = 2^(F) * 2^(I+n*e)
+
+ Since n and e, and I are all integers, we can get the last part from
+ scale2. The hard part is computing I and F, and then computing 2^F.
+ */
+ var ln_xs_hi, ln_xs_lo
+ (ln_xs_hi, ln_xs_lo) = d.log_overkill(d.assem(false, 0, xs))
+
+ /* Now x^n = 2^(n * [ ln_xs / ln(2) ]) * 2^(n + e) */
+
+ var ls1 : @f[8]
+ (ls1[0], ls1[1]) = d.two_by_two(ln_xs_hi, d.frombits(d.one_over_ln2_hi))
+ (ls1[2], ls1[3]) = d.two_by_two(ln_xs_hi, d.frombits(d.one_over_ln2_lo))
+ (ls1[4], ls1[5]) = d.two_by_two(ln_xs_lo, d.frombits(d.one_over_ln2_hi))
+ (ls1[6], ls1[7]) = d.two_by_two(ln_xs_lo, d.frombits(d.one_over_ln2_lo))
+
+ /*
+ Now log2(xs) = Sum(ls1), so
+
+ x^n = 2^(n * Sum(ls1)) * 2^(n * e)
+ */
+ var E1, E2
+ (E1, E2) = double_compensated_sum(ls1[0:8])
+ var ls2 : @f[5]
+ var ls2s : @f[5]
+ var I = 0
+ (ls2[0], ls2[1]) = d.two_by_two(E1, nf)
+ (ls2[2], ls2[3]) = d.two_by_two(E2, nf)
+ ls2[4] = 0.0
+
+ /* Now x^n = 2^(Sum(ls2)) * 2^(n + e) */
+
+ for var j = 0; j < 5; ++j
+ var i = rn(ls2[j])
+ I += i
+ ls2[j] -= (i : @f)
+ ;;
+
+ var F1, F2
+ std.slcp(ls2s[0:5], ls2[0:5])
+ std.sort(ls2s[0:5], d.magcmp)
+ (F1, F2) = double_compensated_sum(ls2s[0:5])
+
+ if (F1 < 0.0 || F1 > 1.0)
+ var i = rn(F1)
+ I += i
+ ls2[4] -= (i : @f)
+ std.slcp(ls2s[0:5], ls2[0:5])
+ std.sort(ls2s[0:5], d.magcmp)
+ (F1, F2) = double_compensated_sum(ls2s[0:5])
+ ;;
+
+ /* Now, x^n = 2^(F1 + F2) * 2^(I + n*e). */
+ var ls3 : @f[6]
+ var log2_hi, log2_lo
+ (log2_hi, log2_lo) = d.C[128]
+ (ls3[0], ls3[1]) = d.two_by_two(F1, d.frombits(log2_hi))
+ (ls3[2], ls3[3]) = d.two_by_two(F1, d.frombits(log2_lo))
+ (ls3[4], ls3[5]) = d.two_by_two(F2, d.frombits(log2_hi))
+ var G1, G2
+ (G1, G2) = double_compensated_sum(ls3[0:6])
+
+ var base1 = exp(G1)
+ var base2 = exp(G2)
+ var pow_xen = xe * n
+ var pow = pow_xen + I
+ if pow_xen / n != xe || (I > 0 && d.imax - I < pow_xen) || (I < 0 && d.imin - I > pow_xen)
+ /*
+ The exponent overflowed. There's no way this is representable. We need
+ to at least recover the correct sign. If the overflow was from the
+ multiplication, then the sign we want is the sign that pow_xen should
+ have been. If the overflow was from the addition, then we still want
+ the sign that pow_xen should have had.
+ */
+ if (xe > 0) == (n > 0)
+ pow = 2 * d.emax
+ else
+ pow = 2 * d.emin
+ ;;
+ ;;
+
+ -> ult_sgn * scale2(base1 * base2, pow)
+}
--- /dev/null
+++ b/lib/math/test/pown-impl.myr
@@ -1,0 +1,119 @@
+use std
+use math
+use testr
+
+const main = {
+ math.fptrap(false)
+ testr.run([
+ [.name="pown-01", .fn = pown01],
+ [.name="pown-02", .fn = pown02],
+ [.name="pown-03", .fn = pown03],
+ ][:])
+}
+
+const int32fromuint32 = {b; -> (&b : int32#)#}
+const int64fromuint64 = {b; -> (&b : int64#)#}
+
+/* Mostly obtained from fpmath-consensus */
+const pown01 = {c
+ var inputs : (uint32, uint32, uint32)[:] = [
+ (0x000000f6, 0x00000000, 0x3f800000),
+ (0x00000000, 0x3d800000, 0x00000000),
+ (0x946fc13b, 0x3b21efc7, 0x80000000),
+ (0xb76e98b6, 0xdbeb6637, 0xff800000),
+ (0xc04825b7, 0x53cdd772, 0x7f800000),
+ (0x3f7ff8c0, 0x000a53ba, 0x097c15ec),
+ (0xbefb4dd0, 0xffffffc6, 0x5d3b4cbc),
+ (0xbf77a88b, 0x0000038f, 0xa9b0342c),
+ (0xbd5cdf23, 0xffffffeb, 0xebb17f33),
+ (0x3fb66246, 0x000000e0, 0x78ac13ae),
+ (0x3bfec3ab, 0x00000005, 0x2df9e189),
+ (0x3f7762db, 0x000002b8, 0x2e466343),
+ (0x3e245431, 0xfffffff7, 0x4b582b71),
+ (0xbf73903f, 0x00000328, 0x2276ffa9),
+ (0x3f6a088a, 0xfffffe4d, 0x5b9dcb2e),
+ (0xc06650d6, 0x00000019, 0xd691b004),
+ (0x3f751050, 0x00000731, 0x0583b3e2),
+ (0x3f8124aa, 0x00001e6d, 0x7171d834),
+ (0xbf6c06b0, 0x000001b2, 0x260cb0e7),
+ (0x38307acf, 0x00000003, 0x29a7bd3a),
+ (0x3f7fafa1, 0x00005c97, 0x2a835867),
+ (0xbf80fb78, 0xfffffdcb, 0xbc5a0a5b),
+ (0xbff532bd, 0xffffffe7, 0xb3bc09eb),
+ (0xbf804fe0, 0x00002cbb, 0xd39529b4),
+ (0xbf7eaac3, 0x000026c1, 0x9a1b5779),
+ (0xb6ecbef2, 0xfffffff9, 0xfb5d43d7),
+ (0x40c1d332, 0xffffffef, 0x29628a12),
+ ][:]
+
+ for (x, y, z) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var yi : int32 = int32fromuint32(y)
+ var zf : flt32 = std.flt32frombits(z)
+ var rf = math.pown(xf, yi)
+ testr.check(c, rf == zf,
+ "pown(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+ x, yi, z, std.flt32bits(rf))
+ ;;
+}
+
+/* Mostly obtained from fpmath-consensus */
+const pown02 = {c
+ var inputs : (uint64, uint64, uint64)[:] = [
+ (0x212c65442137286a, 0x000000007f47df7c, 0x0000000000000000),
+ (0x9add65abd325b6eb, 0xfc2b1173d2f9d7c5, 0xfff0000000000000),
+ (0xc00616fb023b43b5, 0x57ce36258314fd54, 0x7ff0000000000000),
+ (0xc12300696c144c98, 0x0000000000000032, 0x7c152603516d90a8),
+ (0xbfd3dfdfc81e60e0, 0xfffffffffffffe8a, 0x675fe457fecdc77b),
+ (0xbff03687f3c6d148, 0xffffffffffffa5ce, 0x2465ab45a663c6e7),
+ (0x3feae36a83f91b30, 0xfffffffffffff2a9, 0x75864016b705ed6e),
+ (0x3feb9a4d0abc8192, 0xffffffffffffff7f, 0x41a6cb5da02a95f7),
+ (0xbfbf733faaeccb42, 0xffffffffffffffc8, 0x4a851d5e1c674b7c),
+ (0xbff0a5cfa8cc163b, 0xfffffffffffffc2c, 0x3c6dbc034c342e8e),
+ (0xbfee9beac6a3caf0, 0xffffffffffffef92, 0x50c94fc31f862949),
+ (0x4cba94fc5ab93856, 0xfffffffffffffffe, 0x26572fe2438caeb6),
+ (0x3feccc63016da0b8, 0x00000000000010a3, 0x17735a1a2b8d1115),
+ (0x3ff06cc390fe6413, 0x0000000000006183, 0x7aec68e2525e0756),
+ (0xbfef7da61a3fc45e, 0x0000000000003c20, 0x29ac311e57d77bb2),
+ (0xbd17d3142424b4ce, 0x000000000000000d, 0x9b061d29ecc312d7),
+ (0xbd9b46e7dcf5ddd7, 0xffffffffffffffee, 0x69d1b75168c2d601),
+ (0x3fb885ed105fdf42, 0x000000000000004b, 0x301272ca384d27ab),
+ (0xbfedd75714c016db, 0x00000000000004a5, 0xb8723796de107a57),
+ (0x3ff25198b971bb27, 0x00000000000012f3, 0x7b21bc435390825e),
+ (0x3fef791958603e0e, 0xffffffffffff85b6, 0x6ecebfe2dc493669),
+ (0xc017043172d0152b, 0x00000000000000e9, 0xe4b2c1666379afdc),
+ (0xc0325800cfeffb8e, 0x00000000000000d8, 0x78983c24a5e29e19),
+ (0xbfee2ae3cd3208ec, 0x00000000000006b7, 0xb6cb06585f39893d),
+ ][:]
+
+ for (x, y, z) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var yi : int64 = int64fromuint64(y)
+ var zf : flt64 = std.flt64frombits(z)
+ var rf = math.pown(xf, yi)
+ testr.check(c, rf == zf,
+ "pown(0x{b=16,w=16,p=0}, {}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ x, yi, z, std.flt64bits(rf))
+ ;;
+}
+
+/* Here we quarantine off some known-bad results */
+const pown03 = {c
+ var inputs : (uint32, uint32, uint32, uint32)[:] = [
+ (0x3f80040a, 0xfff6d0a0, 0x09f93f64, 0x09f93f63),
+ (0x409d7f5a, 0xffffffd5, 0x0e0c8fa0, 0x0e0c8f9f),
+ (0xbfb588b4, 0x0000008c, 0x62be78b9, 0x62be78ba),
+ (0xb5e0d8c0, 0x00000002, 0x2c457c08, 0x2c457c07),
+ ][:]
+
+ for (x, y, z_perfect, z_accepted) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var yi : int32 = int32fromuint32(y)
+ var zf_perfect : flt32 = std.flt32frombits(z_perfect)
+ var zf_accepted : flt32 = std.flt32frombits(z_accepted)
+ var rf = math.pown(xf, yi)
+ testr.check(c, rf == zf_perfect || rf == zf_accepted,
+ "pown(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, will also accept 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+ x, yi, z_perfect, z_accepted, std.flt32bits(rf))
+ ;;
+}