shithub: mc

Download patch

ref: d24386b8f13e0d790203a81c7a7eb598c5b6bf18
parent: dbe60482e84a28a55ae6d9f5a7c9728e00f690d3
author: S. Gilles <sgilles@math.umd.edu>
date: Wed Mar 21 06:29:44 EDT 2018

Implement and test fma32

--- a/lib/math/fpmath-fma-impl.myr
+++ b/lib/math/fpmath-fma-impl.myr
@@ -14,20 +14,39 @@
 	var zd : flt64 = flt64fromflt32(z)
 	var prod : flt64 = xd * yd
 	var r : flt64 = prod + zd
-	var r32 : flt32 = flt32fromflt64(r)
-	var rn, re, rs
-	(rn, re, rs) = std.flt64explode(r)
 
-	if re == 1023 || r - prod == zd || rs & 0x1fffffff != 0x10000000
-		-> r32
+	var zn, ze, zs
+	(zn, ze, zs) = std.flt32explode(z)
+	if ze == -127
+		ze = -126
 	;;
+	var pn, pe, ps
+	(pn, pe, ps) = std.flt64explode(prod)
+	if pe == -1023
+		pe = -1022
+	;;
 
-	/*
-           This is a non-infinity, non-NaN, non-exact result, and
-           the bits we cut off in truncating to r32 are exactly a
-           halfway case, so rounding might be difficult.
-	 */
-	-> 0.0
+	var beyond : int64 = 0
+	if pe - 52 > (ze : int64) - 23
+		/*
+		   Identify all the bits of z that were too far
+		   away to be added into the product. If there are
+		   any, they will be used later to influence rounding
+		   away from the halfway mark.
+		 */
+		var shift = std.min((pe - 52) - ((ze : int64) - 23), 64)
+		var zs1 : uint64 = (zs : uint64)
+		if zs1 & shr((-1 : uint64), 64 - shift) != 0
+			if zn == pn
+				beyond = 1
+			else
+				beyond = -1
+			;;
+		;;
+	;;
+
+	var r32 : flt32 = flt32fromflt64(r, beyond)
+	-> r32
 }
 
 const flt64fromflt32 = {f : flt32
@@ -36,8 +55,8 @@
 	var xs : uint64 = (s : uint64)
 	var xe : int64 = (e : int64)
 
-	if e == 127
-		-> std.flt64assem(n, 1023, xs)
+	if e == 128
+		-> std.flt64assem(n, 1024, xs)
 	elif e == -127
 		/*
 		  All subnormals in single precision (except 0.0s)
@@ -44,31 +63,31 @@
 		  can be upgraded to double precision, since the
 		  exponent range is so much wider.
 		 */
-		var first1 = find_first1_64(xs, 52)
+		var first1 = find_first1_64(xs, 23)
 		if first1 < 0
 			-> std.flt64assem(n, -1023, 0)
 		;;
 		xs = xs << (52 - (first1 : uint64))
-		xe = -127 + (52 - first1)
+		xe = -126 - (23 - first1)
 		-> std.flt64assem(n, xe, xs)
 	;;
 
-	-> std.flt64assem(n, xe, xs)
+	-> std.flt64assem(n, xe, xs << (52 - 23))
 }
 
-const flt32fromflt64 = {f : flt64
-	var n, e, s
+const flt32fromflt64 = {f : flt64, beyond : int64
+	var n : bool, e : int64, s : uint64
 	(n, e, s) = std.flt64explode(f)
 	var ts : uint32
 	var te : int32 = (e : int32)
 
-	if e >= 127
+	if e >= 128
 		if e == 1023 && s != 0
 			/* NaN */
-			-> std.flt32assem(n, 127, 1)
+			-> std.flt32assem(n, 128, 1)
 		else
 			/* infinity */
-			-> std.flt32assem(n, 127, 0)
+			-> std.flt32assem(n, 128, 0)
 		;;
 	;;
 
@@ -75,8 +94,12 @@
 	if e >= -126
 		/* normal */
 		ts = ((s >> (52 - 23)) : uint32)
-		if need_round_away(0, s, 52 - 23)
+		if need_round_away(0, s, 52 - 23, beyond)
 			ts++
+			if ts & (1 << 24) != 0
+				ts >>= 1
+				te++
+			;;
 		;;
 		-> std.flt32assem(n, te, ts)
 	;;
@@ -88,8 +111,10 @@
 
 	/* subnormal (at least, it will be) */
 	te = -127
-	ts = (shr(s, (52 - 23) + (-1022 - e)) : uint32)
-	if need_round_away(0, s, (52 - 23) + (-1022 - e))
+	var shift : int64 = (52 - 23) + (-126 - e)
+	var ts1 = shr(s, shift)
+	ts = (ts1 : uint32)
+	if need_round_away(0, s, shift, beyond)
 		ts++
 	;;
 	-> std.flt32assem(n, te, ts)
@@ -249,7 +274,7 @@
 			res_s |= shl(res_l, -12 + (res_lastbit_e + 64) - (-1022))
 		;;
 
-		if need_round_away(res_h, res_l, res_first1 + (-1074 - res_firstbit_e))
+		if need_round_away(res_h, res_l, res_first1 + (-1074 - res_firstbit_e), 0)
 			res_s++
 		;;
 
@@ -272,13 +297,13 @@
 
 	if res_first1 - 52 >= 64
 		res_s = shr(res_h, (res_first1 : int64) - 64 - 52)
-		if need_round_away(res_h, res_l, res_first1 - 52)
+		if need_round_away(res_h, res_l, res_first1 - 52, 0)
 			res_s++
 		;;
 	elif res_first1 - 52 >= 0
 		res_s = shl(res_h, 64 - (res_first1 - 52))
 		res_s |= shr(res_l, res_first1 - 52)
-		if need_round_away(res_h, res_l, res_first1 - 52)
+		if need_round_away(res_h, res_l, res_first1 - 52, 0)
 			res_s++
 		;;
 	else
@@ -302,7 +327,7 @@
 }
 
 /* >> and <<, but without wrapping when the shift is >= 64 */
-const shr = {u : uint64, s : int64
+const shr : (u : uint64, s : int64 -> uint64) = {u : uint64, s : int64
 	if (s : uint64) >= 64
 		-> 0
 	else
@@ -310,7 +335,7 @@
 	;;
 }
 
-const shl = {u : uint64, s : int64
+const shl : (u : uint64, s : int64 -> uint64) = {u : uint64, s : int64
 	if (s : uint64) >= 64
 		-> 0
 	else
@@ -450,17 +475,21 @@
 
     - following bitpos_last is a 1, then a zero sequence, and the
       round would be to even
+
+   The beyond parameter indicates whether there is lingering
+   addition/subtraction past the range of l. This adds a bit more
+   information about rounding before we hit round-to-even.
  */
-const need_round_away = {h : uint64, l : uint64, bitpos_last : int64
+const need_round_away = {h : uint64, l : uint64, bitpos_last : int64, beyond : int64
 	var first_omitted_is_1 = false
-	var nonzero_beyond = false
+	var nonzero_beyond = beyond > 0
 	if bitpos_last > 64
 		first_omitted_is_1 = h & shl(1, bitpos_last - 1 - 64) != 0
-		nonzero_beyond = h & shr((-1 : uint64), 2 + 64 - (bitpos_last - 64)) != 0
+		nonzero_beyond = nonzero_beyond || h & shr((-1 : uint64), 2 + 64 - (bitpos_last - 64)) != 0
 		nonzero_beyond = nonzero_beyond || (l != 0)
 	else
 		first_omitted_is_1 = l & shl(1, bitpos_last - 1) != 0
-		nonzero_beyond = l & shr((-1 : uint64), 2 + 64 - bitpos_last) != 0
+		nonzero_beyond = nonzero_beyond || l & shr((-1 : uint64), 1 + 64 - bitpos_last) != 0
 	;;
 
 	if !first_omitted_is_1
@@ -479,5 +508,5 @@
 		hl_is_odd = l & shl(1, bitpos_last) != 0
 	;;
 
-	-> hl_is_odd
+	-> hl_is_odd && (beyond >= 0)
 }
--- a/lib/math/test/fpmath-fma-impl.myr
+++ b/lib/math/test/fpmath-fma-impl.myr
@@ -10,7 +10,39 @@
 }
 
 const fma01 = {c
-	/* Put the flt32 tests here */
+	var inputs : (uint32, uint32, uint32, uint32)[:] = [
+		/*
+		   These are mostly obtained by running fpmath-consensus
+		   with seed 1234. Each (mostly) covers a different
+		   corner case.
+		 */
+		(0x000009a4, 0x00000000, 0x00000002, 0x00000002),
+		(0x69000000, 0x90008002, 0x68348026, 0x68348026),
+		(0x334802ab, 0x49113e8d, 0x90aea62e, 0x3ce2f4c3),
+		(0x5c35d8c1, 0x12dcb6e2, 0x6c1a8cc2, 0x6c1a8cc2),
+		(0xf6266d83, 0x2b3e04e8, 0x62f99bda, 0x62bbd79e),
+		(0x7278e907, 0x75f6c0f1, 0xf6f9b8e0, 0x7f800000),
+		(0xd7748eeb, 0x6737b23e, 0x68e3bbc7, 0xff2f7c71),
+		(0x7f373de4, 0x3dcf90f0, 0xd22ac17c, 0x7d9492ca),
+		(0xb50fce04, 0x00cd486d, 0x03800000, 0x03800000),
+		(0xbb600000, 0x43b7161a, 0x8684d442, 0xbfa03357),
+		(0xf26f8a00, 0x4bfac000, 0xc74ba9fc, 0xfeeaa06c),
+		(0x55d1fa60, 0x32f20000, 0x1b1fea3d, 0x49467eaf),
+		(0x29e26c00, 0x62352000, 0xa0e845af, 0x4ca032a9),
+		(0x287650f8, 0x7cd00000, 0x94e85d5e, 0x65c821c9),
+		(0x7689f580, 0x91418000, 0xaa2822ae, 0xc8508e21),
+		(0xbd813cc0, 0x421f0000, 0x9f098e17, 0xc0208977),
+	][:]
+
+	for (x, y, z, r) : inputs
+		var xf : flt32 = std.flt32frombits(x)
+		var yf : flt32 = std.flt32frombits(y)
+		var zf : flt32 = std.flt32frombits(z)
+		var rf = math.fma(xf, yf, zf)
+		testr.check(c, rf == std.flt32frombits(r),
+			"0x{b=16,w=8,p=0} * 0x{b=16,w=8,p=0} + 0x{b=16,w=8,p=0} should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
+			x, y, z, r, std.flt32bits(rf))
+	;;
 }
 
 const fma02 = {c
@@ -47,6 +79,7 @@
 		var zf : flt64 = std.flt64frombits(z)
 		var rf = math.fma(xf, yf, zf)
 		testr.check(c, rf == std.flt64frombits(r),
-			"0x{b=16,w=16,p=0} * 0x{b=16,w=16,p=0} + 0x{b=16,w=16,p=0} should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}", x, y, z, r, std.flt64bits(rf))
+			"0x{b=16,w=16,p=0} * 0x{b=16,w=16,p=0} + 0x{b=16,w=16,p=0} should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+			x, y, z, r, std.flt64bits(rf))
 	;;
 }