ref: 007b768f79b8a45846062086451484a32c1e30fd
parent: 373facebac71ac1e6743d30152543627ad115cf7
author: S. Gilles <sgilles@math.umd.edu>
date: Mon Mar 12 00:24:33 EDT 2018
Make fltXYexplode and fltXYassem inverses of each other The exponent and significand (mantissa) returned from fltXYexplode are the numerical values, not just the bit patterns. Informally, flt64explode(1.23 x 10^101) would return (false, 123000..., 101), where the significand has precisely 52 bits after the intial `1'.
--- a/lib/std/fltbits.myr
+++ b/lib/std/fltbits.myr
@@ -20,6 +20,9 @@
const flt64frombits = {bits; -> (&bits : flt64#)#}
const flt32frombits = {bits; -> (&bits : flt32#)#}
+const Dblbias = 1023
+const Fltbias = 127
+
const flt64explode = {flt
var bits, isneg, mant, uexp, exp
@@ -31,16 +34,10 @@
/* add back the implicit bit if this is not a denormal */
if uexp != 0
mant |= 1ul << 52
- exp = (uexp : int64)
- else
- exp = 1
;;
- /*
- adjust for exponent bias. nb: because we are
- treating the mantissa as m.0 instead of 0.m,
- our exponent bias needs to be offset by the
- size of m
- */
+
+ /* adjust for exponent bias */
+ exp = (uexp : int64) - Dblbias
-> (isneg, mant, exp)
}
@@ -55,16 +52,10 @@
/* add back the implicit bit if this is not a denormal */
if uexp != 0
mant |= 1 << 23
- exp = (uexp : int32)
- else
- exp = 1
;;
- /*
- adjust for exponent bias. nb: because we are
- treating the mantissa as m.0 instead of 0.m,
- our exponent bias needs to be offset by the
- size of m
- */
+
+ /* adjust for exponent bias */
+ exp = (uexp : int32) - Fltbias
-> (isneg, mant, exp)
}
@@ -72,7 +63,7 @@
var s, m, e
s = (sign : uint64)
- e = (exp : uint64) & 0x7ff
+ e = (exp + Dblbias : uint64) & 0x7ff
m = (mant : uint64) & ((1ul<<52) - 1)
-> std.flt64frombits((s << 63) | (e << 52) | m)
}
@@ -81,7 +72,7 @@
var s, m, e
s = (sign : uint32)
- e = (exp : uint32) & 0xff
+ e = (exp + Fltbias : uint32) & 0xff
m = (mant : uint32) & ((1<<23) - 1)
-> std.flt32frombits(s << 31 | e << 23 | m)
--- a/lib/std/fltfmt.myr
+++ b/lib/std/fltfmt.myr
@@ -25,7 +25,8 @@
var isneg, exp, mant
(isneg, mant, exp) = flt64explode(val)
- dragon4(sb, isneg, mant, (exp - 52 : int64), Dblbias, mode, precision)
+ exp = max(exp, 1 - Dblbias)
+ dragon4(sb, isneg, mant, exp - 52, Dblbias, mode, precision)
}
const flt32bfmt = {sb, val, mode, precision
@@ -32,6 +33,7 @@
var isneg, exp, mant
(isneg, mant, exp) = flt32explode(val)
+ exp = (max((exp : int64), 1 - Fltbias) : int32)
dragon4(sb, isneg, (mant : uint64), (exp - 23 : int64), Fltbias, mode, precision)
}
@@ -64,9 +66,9 @@
/* initialize */
roundup = false
u = mkbigint(0)
- r = bigshli(mkbigint(f), max(e - p, 0))
- s = bigshli(mkbigint(1), max(0, -(e - p)))
- mm = bigshli(mkbigint(1), max((e - p), 0))
+ r = bigshli(mkbigint(f), max(e, 0))
+ s = bigshli(mkbigint(1), max(0, -e))
+ mm = bigshli(mkbigint(1), max(e, 0))
mp = bigdup(mm)
/* fixup: unequal gaps */
--- a/lib/std/test/fltbits.myr
+++ b/lib/std/test/fltbits.myr
@@ -9,6 +9,8 @@
[.name = "bits-roundtrip-64", .fn = bitsround64],
[.name = "flt32bits", .fn = flt32bits],
[.name = "flt64bits", .fn = flt64bits],
+ [.name = "explode-roundtrip-32", .fn = exploderound32],
+ [.name = "explode-roundtrip-64", .fn = exploderound64],
][:])
}
@@ -96,3 +98,64 @@
testr.check(c, u == uprime, "flt64bits wrong for {}: 0x{x} != 0x{x}", f, u, uprime)
;;
}
+
+const exploderound32 = {c
+ for f : [1.0, 0.00001, 123.45, 1111111111111111.2, -1.9, -0.0001, std.flt32nan()][:]
+ var n, e, s
+ (n, e, s) = std.flt32explode(f)
+ var g = std.flt32assem(n, e, s)
+ testr.check(c, f == g, "assem o explode non-identity: {} != {}", f, g)
+ ;;
+
+ /*
+ The exponents and significands need to be rather specific
+ in order for flt32assem to work as expected
+ */
+ for (n, e, s) : [
+ (false, 0, -127),
+ (true, 0, -127),
+ (false, 0x399, -127),
+ (true, 0x23, -127),
+ (false, (1 << 23) | 0x23, 45),
+ (true, (1 << 23) | 0x3a2, -12),
+ (true, (1 << 23) | 0x3a1, -126),
+ ][:]
+ var m, f, t
+ (m, f, t) = std.flt32explode(std.flt32assem(n, e, s))
+ testr.check(c, n == m, "explode o assem non-identity: {} != {}", (n, e, s), (m, f, t))
+ testr.check(c, e == f, "explode o assem non-identity: {} != {}", (n, e, s), (m, f, t))
+ testr.check(c, s == t, "explode o assem non-identity: {} != {}", (n, e, s), (m, f, t))
+ ;;
+}
+
+const exploderound64 = {c
+ for f : [1.0, 0.00001, 123.45, 1111111111111e+309, -1.9, -0.0001, std.flt64nan()][:]
+ var n, e, s
+ (n, e, s) = std.flt64explode(f)
+ var g = std.flt64assem(n, e, s)
+ testr.check(c, f == g, "assem o explode non-identity: {} != {}", f, g)
+ ;;
+
+ /*
+ The exponents and significands need to be rather specific
+ in order for flt32assem to work as expected
+ */
+ for (n, e, s) : [
+ (false, 0, -1023),
+ (true, 0, -1023),
+ (false, 0x399, -1023),
+ (true, 0x23, -1023),
+ (false, (1 << 52) | 0xa33bc, 45),
+ (true, (1 << 52) | 0x3, -12),
+ (true, (1 << 52) | 0x11aabbcc, -200),
+ (true, (1 << 52) | 0x3a1, 543),
+ (true, (1 << 52) | 0x99aa228, 1001),
+ ][:]
+ var m, f, t
+ (m, f, t) = std.flt64explode(std.flt64assem(n, e, s))
+ testr.check(c, n == m, "explode o assem non-identity: {} != {}", (n, e, s), (m, f, t))
+ testr.check(c, e == f, "explode o assem non-identity: {} != {}", (n, e, s), (m, f, t))
+ testr.check(c, s == t, "explode o assem non-identity: {} != {}", (n, e, s), (m, f, t))
+ ;;
+}
+
--- a/lib/std/test/fmt.myr
+++ b/lib/std/test/fmt.myr
@@ -66,6 +66,7 @@
check("7b", "{x}", 123)
check("0x7b", "0x{x}", 123)
check("0.0", "{}", 0.0)
+ check("-0.0", "{}", -0.0)
check("0.3", "{}", 0.3)
check("0.3", "{}", (0.3 : flt32))
check("1.0", "{}", 1.0)