ref: d24386b8f13e0d790203a81c7a7eb598c5b6bf18
parent: dbe60482e84a28a55ae6d9f5a7c9728e00f690d3
author: S. Gilles <sgilles@math.umd.edu>
date: Wed Mar 21 06:29:44 EDT 2018
Implement and test fma32
--- a/lib/math/fpmath-fma-impl.myr
+++ b/lib/math/fpmath-fma-impl.myr
@@ -14,20 +14,39 @@
var zd : flt64 = flt64fromflt32(z)
var prod : flt64 = xd * yd
var r : flt64 = prod + zd
- var r32 : flt32 = flt32fromflt64(r)
- var rn, re, rs
- (rn, re, rs) = std.flt64explode(r)
- if re == 1023 || r - prod == zd || rs & 0x1fffffff != 0x10000000
- -> r32
+ var zn, ze, zs
+ (zn, ze, zs) = std.flt32explode(z)
+ if ze == -127
+ ze = -126
;;
+ var pn, pe, ps
+ (pn, pe, ps) = std.flt64explode(prod)
+ if pe == -1023
+ pe = -1022
+ ;;
- /*
- This is a non-infinity, non-NaN, non-exact result, and
- the bits we cut off in truncating to r32 are exactly a
- halfway case, so rounding might be difficult.
- */
- -> 0.0
+ var beyond : int64 = 0
+ if pe - 52 > (ze : int64) - 23
+ /*
+ Identify all the bits of z that were too far
+ away to be added into the product. If there are
+ any, they will be used later to influence rounding
+ away from the halfway mark.
+ */
+ var shift = std.min((pe - 52) - ((ze : int64) - 23), 64)
+ var zs1 : uint64 = (zs : uint64)
+ if zs1 & shr((-1 : uint64), 64 - shift) != 0
+ if zn == pn
+ beyond = 1
+ else
+ beyond = -1
+ ;;
+ ;;
+ ;;
+
+ var r32 : flt32 = flt32fromflt64(r, beyond)
+ -> r32
}
const flt64fromflt32 = {f : flt32
@@ -36,8 +55,8 @@
var xs : uint64 = (s : uint64)
var xe : int64 = (e : int64)
- if e == 127
- -> std.flt64assem(n, 1023, xs)
+ if e == 128
+ -> std.flt64assem(n, 1024, xs)
elif e == -127
/*
All subnormals in single precision (except 0.0s)
@@ -44,31 +63,31 @@
can be upgraded to double precision, since the
exponent range is so much wider.
*/
- var first1 = find_first1_64(xs, 52)
+ var first1 = find_first1_64(xs, 23)
if first1 < 0
-> std.flt64assem(n, -1023, 0)
;;
xs = xs << (52 - (first1 : uint64))
- xe = -127 + (52 - first1)
+ xe = -126 - (23 - first1)
-> std.flt64assem(n, xe, xs)
;;
- -> std.flt64assem(n, xe, xs)
+ -> std.flt64assem(n, xe, xs << (52 - 23))
}
-const flt32fromflt64 = {f : flt64
- var n, e, s
+const flt32fromflt64 = {f : flt64, beyond : int64
+ var n : bool, e : int64, s : uint64
(n, e, s) = std.flt64explode(f)
var ts : uint32
var te : int32 = (e : int32)
- if e >= 127
+ if e >= 128
if e == 1023 && s != 0
/* NaN */
- -> std.flt32assem(n, 127, 1)
+ -> std.flt32assem(n, 128, 1)
else
/* infinity */
- -> std.flt32assem(n, 127, 0)
+ -> std.flt32assem(n, 128, 0)
;;
;;
@@ -75,8 +94,12 @@
if e >= -126
/* normal */
ts = ((s >> (52 - 23)) : uint32)
- if need_round_away(0, s, 52 - 23)
+ if need_round_away(0, s, 52 - 23, beyond)
ts++
+ if ts & (1 << 24) != 0
+ ts >>= 1
+ te++
+ ;;
;;
-> std.flt32assem(n, te, ts)
;;
@@ -88,8 +111,10 @@
/* subnormal (at least, it will be) */
te = -127
- ts = (shr(s, (52 - 23) + (-1022 - e)) : uint32)
- if need_round_away(0, s, (52 - 23) + (-1022 - e))
+ var shift : int64 = (52 - 23) + (-126 - e)
+ var ts1 = shr(s, shift)
+ ts = (ts1 : uint32)
+ if need_round_away(0, s, shift, beyond)
ts++
;;
-> std.flt32assem(n, te, ts)
@@ -249,7 +274,7 @@
res_s |= shl(res_l, -12 + (res_lastbit_e + 64) - (-1022))
;;
- if need_round_away(res_h, res_l, res_first1 + (-1074 - res_firstbit_e))
+ if need_round_away(res_h, res_l, res_first1 + (-1074 - res_firstbit_e), 0)
res_s++
;;
@@ -272,13 +297,13 @@
if res_first1 - 52 >= 64
res_s = shr(res_h, (res_first1 : int64) - 64 - 52)
- if need_round_away(res_h, res_l, res_first1 - 52)
+ if need_round_away(res_h, res_l, res_first1 - 52, 0)
res_s++
;;
elif res_first1 - 52 >= 0
res_s = shl(res_h, 64 - (res_first1 - 52))
res_s |= shr(res_l, res_first1 - 52)
- if need_round_away(res_h, res_l, res_first1 - 52)
+ if need_round_away(res_h, res_l, res_first1 - 52, 0)
res_s++
;;
else
@@ -302,7 +327,7 @@
}
/* >> and <<, but without wrapping when the shift is >= 64 */
-const shr = {u : uint64, s : int64
+const shr : (u : uint64, s : int64 -> uint64) = {u : uint64, s : int64
if (s : uint64) >= 64
-> 0
else
@@ -310,7 +335,7 @@
;;
}
-const shl = {u : uint64, s : int64
+const shl : (u : uint64, s : int64 -> uint64) = {u : uint64, s : int64
if (s : uint64) >= 64
-> 0
else
@@ -450,17 +475,21 @@
- following bitpos_last is a 1, then a zero sequence, and the
round would be to even
+
+ The beyond parameter indicates whether there is lingering
+ addition/subtraction past the range of l. This adds a bit more
+ information about rounding before we hit round-to-even.
*/
-const need_round_away = {h : uint64, l : uint64, bitpos_last : int64
+const need_round_away = {h : uint64, l : uint64, bitpos_last : int64, beyond : int64
var first_omitted_is_1 = false
- var nonzero_beyond = false
+ var nonzero_beyond = beyond > 0
if bitpos_last > 64
first_omitted_is_1 = h & shl(1, bitpos_last - 1 - 64) != 0
- nonzero_beyond = h & shr((-1 : uint64), 2 + 64 - (bitpos_last - 64)) != 0
+ nonzero_beyond = nonzero_beyond || h & shr((-1 : uint64), 2 + 64 - (bitpos_last - 64)) != 0
nonzero_beyond = nonzero_beyond || (l != 0)
else
first_omitted_is_1 = l & shl(1, bitpos_last - 1) != 0
- nonzero_beyond = l & shr((-1 : uint64), 2 + 64 - bitpos_last) != 0
+ nonzero_beyond = nonzero_beyond || l & shr((-1 : uint64), 1 + 64 - bitpos_last) != 0
;;
if !first_omitted_is_1
@@ -479,5 +508,5 @@
hl_is_odd = l & shl(1, bitpos_last) != 0
;;
- -> hl_is_odd
+ -> hl_is_odd && (beyond >= 0)
}
--- a/lib/math/test/fpmath-fma-impl.myr
+++ b/lib/math/test/fpmath-fma-impl.myr
@@ -10,7 +10,39 @@
}
const fma01 = {c
- /* Put the flt32 tests here */
+ var inputs : (uint32, uint32, uint32, uint32)[:] = [
+ /*
+ These are mostly obtained by running fpmath-consensus
+ with seed 1234. Each (mostly) covers a different
+ corner case.
+ */
+ (0x000009a4, 0x00000000, 0x00000002, 0x00000002),
+ (0x69000000, 0x90008002, 0x68348026, 0x68348026),
+ (0x334802ab, 0x49113e8d, 0x90aea62e, 0x3ce2f4c3),
+ (0x5c35d8c1, 0x12dcb6e2, 0x6c1a8cc2, 0x6c1a8cc2),
+ (0xf6266d83, 0x2b3e04e8, 0x62f99bda, 0x62bbd79e),
+ (0x7278e907, 0x75f6c0f1, 0xf6f9b8e0, 0x7f800000),
+ (0xd7748eeb, 0x6737b23e, 0x68e3bbc7, 0xff2f7c71),
+ (0x7f373de4, 0x3dcf90f0, 0xd22ac17c, 0x7d9492ca),
+ (0xb50fce04, 0x00cd486d, 0x03800000, 0x03800000),
+ (0xbb600000, 0x43b7161a, 0x8684d442, 0xbfa03357),
+ (0xf26f8a00, 0x4bfac000, 0xc74ba9fc, 0xfeeaa06c),
+ (0x55d1fa60, 0x32f20000, 0x1b1fea3d, 0x49467eaf),
+ (0x29e26c00, 0x62352000, 0xa0e845af, 0x4ca032a9),
+ (0x287650f8, 0x7cd00000, 0x94e85d5e, 0x65c821c9),
+ (0x7689f580, 0x91418000, 0xaa2822ae, 0xc8508e21),
+ (0xbd813cc0, 0x421f0000, 0x9f098e17, 0xc0208977),
+ ][:]
+
+ for (x, y, z, r) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var yf : flt32 = std.flt32frombits(y)
+ var zf : flt32 = std.flt32frombits(z)
+ var rf = math.fma(xf, yf, zf)
+ testr.check(c, rf == std.flt32frombits(r),
+ "0x{b=16,w=8,p=0} * 0x{b=16,w=8,p=0} + 0x{b=16,w=8,p=0} should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+ x, y, z, r, std.flt32bits(rf))
+ ;;
}
const fma02 = {c
@@ -47,6 +79,7 @@
var zf : flt64 = std.flt64frombits(z)
var rf = math.fma(xf, yf, zf)
testr.check(c, rf == std.flt64frombits(r),
- "0x{b=16,w=16,p=0} * 0x{b=16,w=16,p=0} + 0x{b=16,w=16,p=0} should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}", x, y, z, r, std.flt64bits(rf))
+ "0x{b=16,w=16,p=0} * 0x{b=16,w=16,p=0} + 0x{b=16,w=16,p=0} should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+ x, y, z, r, std.flt64bits(rf))
;;
}