ref: 77246bfe1b9f147b1b255e811fb8b2032c49dc9e
dir: /lib/math/powr-impl.myr/
use std
use "fpmath"
use "impls"
use "log-impl"
use "log-overkill"
use "util"
/*
This is an implementation of powr, not pow, so the special cases
are tailored more closely to the mathematical x^y = e^(y * log(x))
than to historical C implementations (pow was aligned to the C99
standard, which was aligned to codify existing practice).
Even then, some parts of the powr specification are unclear. For
example, IEEE 754-2008 does not specify what powr(infty, y) must
return when y is not 0.0 (an erratum was planned in 2010, but
does not appear to have been released as of 2018).
*/
pkg math =
pkglocal const powr32 : (x : flt32, y : flt32 -> flt32)
pkglocal const powr64 : (x : flt64, y : flt64 -> flt64)
;;
type fltdesc(@f, @u, @i) = struct
explode : (f : @f -> (bool, @i, @u))
assem : (n : bool, e : @i, s : @u -> @f)
tobits : (f : @f -> @u)
frombits : (u : @u -> @f)
nan : @u
inf : @u
expmask : @u
precision : @u
emax : @i
emin : @i
sgnmask : @u
log_overkill : (x : @f -> (@f, @f))
fma : (x : @f, y : @f, z : @f -> @f)
split_mul : (x_h : @f, x_l : @f, y_h : @f, y_l : @f -> (@f, @f))
exp_zero_border : @u
;;
const desc32 : fltdesc(flt32, uint32, int32) = [
.explode = std.flt32explode,
.assem = std.flt32assem,
.tobits = std.flt32bits,
.frombits = std.flt32frombits,
.nan = 0x7fc00000,
.inf = 0x7f800000,
.expmask = 0x7f800000, /* mask to detect inf or NaN (inf, repeated for clarity) */
.precision = 24,
.emax = 127,
.emin = -126,
.sgnmask = 1 << 31,
.log_overkill = logoverkill32,
.fma = fma32,
.split_mul = split_mul32,
.exp_zero_border = 0xc2cff1b4, /* minimal y such that e^y > 0 */
]
const desc64 : fltdesc(flt64, uint64, int64) = [
.explode = std.flt64explode,
.assem = std.flt64assem,
.tobits = std.flt64bits,
.frombits = std.flt64frombits,
.nan = 0x7ff8000000000000,
.inf = 0x7ff0000000000000,
.expmask = 0x7ff0000000000000,
.precision = 53,
.emax = 1023,
.emin = -1022,
.sgnmask = 1 << 63,
.log_overkill = logoverkill64,
.fma = fma64,
.split_mul = hl_mult,
.exp_zero_border = 0xc0874910d52d3052, /* minimal y such that e^y > 0 */
]
const split_mul32 = {x_h : flt32, x_l : flt32, y_h : flt32, y_l : flt32
var x : flt64 = (x_h : flt64) + (x_l : flt64)
var y : flt64 = (y_h : flt64) + (y_l : flt64)
var z = x * y
var z_h : flt32 = (z : flt32)
var z_l : flt32 = ((z - (z_h : flt64)) : flt32)
-> (z_h, z_l)
}
const powr32 = {x : flt32, y : flt32
-> powrgen(x, y, desc32)
}
const powr64 = {x : flt64, y : flt64
-> powrgen(x, y, desc64)
}
generic powrgen = {x : @f, y : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i
var xb, yb
xb = d.tobits(x)
yb = d.tobits(y)
var xn : bool, xe : @i, xs : @u
var yn : bool, ye : @i, ys : @u
(xn, xe, xs) = d.explode(x)
(yn, ye, ys) = d.explode(y)
/*
Special cases. Note we do not follow IEEE exceptions.
*/
if std.isnan(x) || std.isnan(y)
/* Propagate NaN */
-> d.frombits(d.nan)
elif (xb & ~d.sgnmask == 0)
if (yb & ~d.sgnmask == 0)
/* 0^0 is undefined. */
-> d.frombits(d.nan)
elif yn
/* 0^(< 0) is infinity */
-> d.frombits(d.inf)
else
/* otherwise, 0^y = 0. */
-> (0.0 : @f)
;;
elif xn
/*
(< 0)^(anything) is undefined. This comes from
thinking of floating-point numbers as representing
small ranges of real numbers. If you really want
to compute (-1.23)^5, use pown.
*/
-> d.frombits(d.nan)
elif (xb & ~d.sgnmask == d.inf)
if (yb & ~d.sgnmask == 0)
/* oo^0 is undefined */
-> d.frombits(d.nan)
elif yn
/* +/-oo^(< 0) is +/-0 */
-> d.assem(xn, 0, 0)
elif xn
/* (-oo)^(anything) is undefined */
-> d.frombits(d.nan)
else
/* oo^(> 0) is oo */
-> d.frombits(d.inf)
;;
elif std.eq(y, (1.0 : @f))
/* x^1 = x */
-> x
elif yb & ~d.sgnmask == 0
/* (finite, positive)^0 = 1 */
-> (1.0 : @f)
elif std.eq(x, (1.0 : @f))
if yb & ~d.sgnmask == d.inf
/* 1^oo is undefined */
-> d.frombits(d.nan)
else
/* 1^(finite, positive) = 1 */
-> (1.0 : @f)
;;
elif yb & ~d.sgnmask == d.inf
if xe < 0
/* (0 < x < 1)^oo = 0 */
-> (0.0 : @f)
else
/* (x > 1)^oo = oo */
-> d.frombits(d.inf)
;;
;;
/*
Just do the dumb thing: compute exp( log(x) · y ). All the hard work
goes into computing log(x) with high enough precision that our exp()
implementation becomes the weakest link. The Table Maker's Dilemma
says that quantifying "high enough" is a very difficult problem, but
experimentally twice the precision of @f appears quite good enough.
*/
var ln_x_hi, ln_x_lo
(ln_x_hi, ln_x_lo) = d.log_overkill(x)
var final_exp_hi, final_exp_lo
(final_exp_hi, final_exp_lo) = d.split_mul(ln_x_hi, ln_x_lo, y, 0.0)
if d.tobits(final_exp_hi) & d.expmask == d.inf
/*
split_mul doesn't actually preserve the sign of infinity, so we can't
trust final_exp_hi to get it.
*/
if (d.tobits(ln_x_hi) & d.sgnmask) == (yb & d.sgnmask)
/* e^+Inf */
-> d.frombits(d.inf)
else
/* e^-Inf */
-> 0.0
;;
;;
if final_exp_hi < d.frombits(d.exp_zero_border)
-> 0.0
;;
var z_hi = exp(final_exp_hi)
if d.tobits(z_hi) & d.expmask == d.inf
-> z_hi
;;
-> d.fma(z_hi, final_exp_lo, z_hi)
}