shithub: mc

Download patch

ref: 007b768f79b8a45846062086451484a32c1e30fd
parent: 373facebac71ac1e6743d30152543627ad115cf7
author: S. Gilles <sgilles@math.umd.edu>
date: Mon Mar 12 00:24:33 EDT 2018

Make fltXYexplode and fltXYassem inverses of each other

The exponent and significand (mantissa) returned from fltXYexplode
are the numerical values, not just the bit patterns. Informally,

    flt64explode(1.23 x 10^101)

would return (false, 123000..., 101), where the significand has
precisely 52 bits after the intial `1'.

--- a/lib/std/fltbits.myr
+++ b/lib/std/fltbits.myr
@@ -20,6 +20,9 @@
 const flt64frombits	= {bits;	-> (&bits : flt64#)#}
 const flt32frombits	= {bits;	-> (&bits : flt32#)#}
 
+const Dblbias = 1023
+const Fltbias = 127
+
 const flt64explode = {flt
 	var bits, isneg, mant, uexp, exp
 
@@ -31,16 +34,10 @@
 	/* add back the implicit bit if this is not a denormal */
 	if uexp != 0
 		mant |= 1ul << 52
-		exp = (uexp : int64)
-	else
-		exp = 1
 	;;
-	/*
-	   adjust for exponent bias. nb: because we are
-	   treating the mantissa as m.0 instead of 0.m,
-	   our exponent bias needs to be offset by the
-	   size of m
-	*/
+
+	/* adjust for exponent bias */
+	exp = (uexp : int64) - Dblbias
 	-> (isneg, mant, exp)
 }
 
@@ -55,16 +52,10 @@
 	/* add back the implicit bit if this is not a denormal */
 	if uexp != 0
 		mant |= 1 << 23
-		exp = (uexp : int32)
-	else
-		exp = 1
 	;;
-	/*
-	   adjust for exponent bias. nb: because we are
-	   treating the mantissa as m.0 instead of 0.m,
-	   our exponent bias needs to be offset by the
-	   size of m
-	*/
+
+	/* adjust for exponent bias */
+	exp = (uexp : int32) - Fltbias
 	-> (isneg, mant, exp)
 }
 
@@ -72,7 +63,7 @@
 	var s, m, e
 
 	s = (sign : uint64)
-	e = (exp : uint64) & 0x7ff
+	e = (exp + Dblbias : uint64) & 0x7ff
 	m = (mant : uint64) & ((1ul<<52) - 1)
 	-> std.flt64frombits((s << 63) | (e << 52) | m)
 }
@@ -81,7 +72,7 @@
 	var s, m, e
 
 	s = (sign : uint32)
-	e = (exp : uint32) & 0xff
+	e = (exp + Fltbias : uint32) & 0xff
 	m = (mant : uint32) & ((1<<23) - 1)
 	-> std.flt32frombits(s << 31 | e << 23 | m)
 
--- a/lib/std/fltfmt.myr
+++ b/lib/std/fltfmt.myr
@@ -25,7 +25,8 @@
 	var isneg, exp, mant
 
 	(isneg, mant, exp) = flt64explode(val)
-	dragon4(sb, isneg, mant, (exp - 52 : int64), Dblbias, mode, precision)
+	exp = max(exp, 1 - Dblbias)
+	dragon4(sb, isneg, mant, exp - 52, Dblbias, mode, precision)
 }
 
 const flt32bfmt = {sb, val, mode, precision
@@ -32,6 +33,7 @@
 	var isneg, exp, mant
 
 	(isneg, mant, exp) = flt32explode(val)
+	exp = (max((exp : int64), 1 - Fltbias) : int32)
 	dragon4(sb, isneg, (mant : uint64), (exp - 23 : int64), Fltbias, mode, precision)
 }
 
@@ -64,9 +66,9 @@
 	/* initialize */
 	roundup = false
 	u = mkbigint(0)
-	r = bigshli(mkbigint(f), max(e - p, 0))
-	s = bigshli(mkbigint(1), max(0, -(e - p)))
-	mm = bigshli(mkbigint(1), max((e - p), 0))
+	r = bigshli(mkbigint(f), max(e, 0))
+	s = bigshli(mkbigint(1), max(0, -e))
+	mm = bigshli(mkbigint(1), max(e, 0))
 	mp = bigdup(mm)
 
 	/* fixup: unequal gaps */
--- a/lib/std/test/fltbits.myr
+++ b/lib/std/test/fltbits.myr
@@ -9,6 +9,8 @@
 		[.name = "bits-roundtrip-64", .fn = bitsround64],
 		[.name = "flt32bits", .fn = flt32bits],
 		[.name = "flt64bits", .fn = flt64bits],
+		[.name = "explode-roundtrip-32", .fn = exploderound32],
+		[.name = "explode-roundtrip-64", .fn = exploderound64],
 	][:])
 }
 
@@ -96,3 +98,64 @@
 		testr.check(c, u == uprime, "flt64bits wrong for {}:  0x{x} != 0x{x}", f, u, uprime)
 	;;
 }
+
+const exploderound32 = {c
+	for f : [1.0, 0.00001, 123.45, 1111111111111111.2, -1.9, -0.0001, std.flt32nan()][:]
+		var n, e, s
+		(n, e, s) = std.flt32explode(f)
+		var g = std.flt32assem(n, e, s)
+		testr.check(c, f == g, "assem o explode non-identity: {} != {}", f, g)
+	;;
+
+	/*
+	   The exponents and significands need to be rather specific
+	   in order for flt32assem to work as expected
+	 */
+	for (n, e, s) : [
+			(false, 0, -127),
+			(true, 0, -127),
+			(false, 0x399, -127),
+			(true, 0x23, -127),
+			(false, (1 << 23) | 0x23, 45),
+			(true, (1 << 23) | 0x3a2, -12),
+			(true, (1 << 23) | 0x3a1, -126),
+		][:]
+		var m, f, t
+		(m, f, t) = std.flt32explode(std.flt32assem(n, e, s))
+		testr.check(c, n == m, "explode o assem non-identity: {} != {}", (n, e, s), (m, f, t))
+		testr.check(c, e == f, "explode o assem non-identity: {} != {}", (n, e, s), (m, f, t))
+		testr.check(c, s == t, "explode o assem non-identity: {} != {}", (n, e, s), (m, f, t))
+	;;
+}
+
+const exploderound64 = {c
+	for f : [1.0, 0.00001, 123.45, 1111111111111e+309, -1.9, -0.0001, std.flt64nan()][:]
+		var n, e, s
+		(n, e, s) = std.flt64explode(f)
+		var g = std.flt64assem(n, e, s)
+		testr.check(c, f == g, "assem o explode non-identity: {} != {}", f, g)
+	;;
+
+	/*
+	   The exponents and significands need to be rather specific
+	   in order for flt32assem to work as expected
+	 */
+	for (n, e, s) : [
+			(false, 0, -1023),
+			(true, 0, -1023),
+			(false, 0x399, -1023),
+			(true, 0x23, -1023),
+			(false, (1 << 52) | 0xa33bc, 45),
+			(true, (1 << 52) | 0x3, -12),
+			(true, (1 << 52) | 0x11aabbcc, -200),
+			(true, (1 << 52) | 0x3a1, 543),
+			(true, (1 << 52) | 0x99aa228, 1001),
+		][:]
+		var m, f, t
+		(m, f, t) = std.flt64explode(std.flt64assem(n, e, s))
+		testr.check(c, n == m, "explode o assem non-identity: {} != {}", (n, e, s), (m, f, t))
+		testr.check(c, e == f, "explode o assem non-identity: {} != {}", (n, e, s), (m, f, t))
+		testr.check(c, s == t, "explode o assem non-identity: {} != {}", (n, e, s), (m, f, t))
+	;;
+}
+
--- a/lib/std/test/fmt.myr
+++ b/lib/std/test/fmt.myr
@@ -66,6 +66,7 @@
 	check("7b", "{x}", 123)
 	check("0x7b", "0x{x}", 123)
 	check("0.0", "{}", 0.0)
+	check("-0.0", "{}", -0.0)
 	check("0.3", "{}", 0.3)
 	check("0.3", "{}", (0.3 : flt32))
 	check("1.0", "{}", 1.0)