ref: 4acd9ee51a541f94e81457b1a7fb3accbd790696
parent: 55f8d064387dcf0229b6020d5a8e57e660d358c7
author: S. Gilles <sgilles@math.umd.edu>
date: Thu Apr 12 23:08:12 EDT 2018
Break out some fpmath functions to utililty file.
--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -16,5 +16,8 @@
# summation
sum-impl.myr
+ # util
+ util.myr
+
lib ../std:std
;;
--- a/lib/math/fma-impl.myr
+++ b/lib/math/fma-impl.myr
@@ -1,5 +1,7 @@
use std
+use "util"
+
pkg math =
pkglocal const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)
pkglocal const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)
@@ -136,83 +138,6 @@
-> flt32fromflt64(std.flt64assem(rn, re, rs))
}
-const flt64fromflt32 = {f : flt32
- var n, e, s
- (n, e, s) = std.flt32explode(f)
- var xs : uint64 = (s : uint64)
- var xe : int64 = (e : int64)
-
- if e == 128
- -> std.flt64assem(n, 1024, xs)
- elif e == -127
- /*
- All subnormals in single precision (except 0.0s)
- can be upgraded to double precision, since the
- exponent range is so much wider.
- */
- var first1 = find_first1_64(xs, 23)
- if first1 < 0
- -> std.flt64assem(n, -1023, 0)
- ;;
- xs = xs << (52 - (first1 : uint64))
- xe = -126 - (23 - first1)
- -> std.flt64assem(n, xe, xs)
- ;;
-
- -> std.flt64assem(n, xe, xs << (52 - 23))
-}
-
-const flt32fromflt64 = {f : flt64
- var n : bool, e : int64, s : uint64
- (n, e, s) = std.flt64explode(f)
- var ts : uint32
- var te : int32 = (e : int32)
-
- if e >= 128
- if e == 1023 && s != 0
- /* NaN */
- -> std.flt32assem(n, 128, 1)
- else
- /* infinity */
- -> std.flt32assem(n, 128, 0)
- ;;
- ;;
-
- if e >= -127
- /* normal */
- ts = ((s >> (52 - 23)) : uint32)
- if need_round_away(0, s, 52 - 23)
- ts++
- if ts & (1 << 24) != 0
- ts >>= 1
- te++
- ;;
- ;;
- if te >= -126
- -> std.flt32assem(n, te, ts)
- ;;
- ;;
-
- /* subnormal already, will have to go to 0 */
- if e == -1023
- -> std.flt32assem(n, -127, 0)
- ;;
-
- /* subnormal (at least, it will be) */
- te = -127
- var shift : int64 = (52 - 23) + (-126 - e)
- var ts1 = shr(s, shift)
- ts = (ts1 : uint32)
- if need_round_away(0, s, shift)
- ts++
- if ts & (1 << 23) != 0
- /* false alarm, it's normal again */
- te++
- ;;
- ;;
- -> std.flt32assem(n, te, ts)
-}
-
pkglocal const fma64 = {x : flt64, y : flt64, z : flt64
var xn : bool, yn : bool, zn : bool
var xe : int64, ye : int64, ze : int64
@@ -419,23 +344,6 @@
-> std.flt64assem(res_n, res_firstbit_e, res_s)
}
-/* >> and <<, but without wrapping when the shift is >= 64 */
-const shr : (u : uint64, s : int64 -> uint64) = {u : uint64, s : int64
- if (s : uint64) >= 64
- -> 0
- else
- -> u >> (s : uint64)
- ;;
-}
-
-const shl : (u : uint64, s : int64 -> uint64) = {u : uint64, s : int64
- if (s : uint64) >= 64
- -> 0
- else
- -> u << (s : uint64)
- ;;
-}
-
/*
Add (a << s) to [ h ][ l ], where if s < 0 then a corresponding
right-shift is used. This is aligned such that if s == 0, then
@@ -535,67 +443,4 @@
;;
-> `std.Equal
-}
-
-/* Find the first 1 bit in a bitstring */
-const find_first1_64 : (b : uint64, start : int64 -> int64) = {b : uint64, start : int64
- for var j = start; j >= 0; --j
- var m = shl(1, j)
- if b & m != 0
- -> j
- ;;
- ;;
-
- -> -1
-}
-
-const find_first1_64_hl = {h, l, start
- var first1_h = find_first1_64(h, start - 64)
- if first1_h >= 0
- -> first1_h + 64
- ;;
-
- -> find_first1_64(l, 63)
-}
-
-/*
- For [ h ][ l ], where bitpos_last is the position of the last
- bit that was included in the truncated result (l's last bit has
- position 0), decide whether rounding up/away is needed. This is
- true if
-
- - following bitpos_last is a 1, then a non-zero sequence, or
-
- - following bitpos_last is a 1, then a zero sequence, and the
- round would be to even
- */
-const need_round_away = {h : uint64, l : uint64, bitpos_last : int64
- var first_omitted_is_1 = false
- var nonzero_beyond = false
- if bitpos_last > 64
- first_omitted_is_1 = h & shl(1, bitpos_last - 1 - 64) != 0
- nonzero_beyond = nonzero_beyond || h & shr((-1 : uint64), 2 + 64 - (bitpos_last - 64)) != 0
- nonzero_beyond = nonzero_beyond || (l != 0)
- else
- first_omitted_is_1 = l & shl(1, bitpos_last - 1) != 0
- nonzero_beyond = nonzero_beyond || l & shr((-1 : uint64), 1 + 64 - bitpos_last) != 0
- ;;
-
- if !first_omitted_is_1
- -> false
- ;;
-
- if nonzero_beyond
- -> true
- ;;
-
- var hl_is_odd = false
-
- if bitpos_last >= 64
- hl_is_odd = h & shl(1, bitpos_last - 64) != 0
- else
- hl_is_odd = l & shl(1, bitpos_last) != 0
- ;;
-
- -> hl_is_odd
}
--- /dev/null
+++ b/lib/math/util.myr
@@ -1,0 +1,174 @@
+use std
+
+pkg math =
+ const flt32fromflt64 : (f : flt64 -> flt32)
+ const flt64fromflt32 : (x : flt32 -> flt64)
+
+ /* For use in various normalizations */
+ const find_first1_64 : (b : uint64, start : int64 -> int64)
+ const find_first1_64_hl : (h : uint64, l : uint64, start : int64 -> int64)
+
+ /* >> and <<, but without wrapping when the shift is >= 64 */
+ const shr : (u : uint64, s : int64 -> uint64)
+ const shl : (u : uint64, s : int64 -> uint64)
+
+ /* Whether RN() requires incrementing after truncating */
+ const need_round_away : (h : uint64, l : uint64, bitpos_last : int64 -> bool)
+;;
+
+const flt64fromflt32 = {f : flt32
+ var n, e, s
+ (n, e, s) = std.flt32explode(f)
+ var xs : uint64 = (s : uint64)
+ var xe : int64 = (e : int64)
+
+ if e == 128
+ -> std.flt64assem(n, 1024, xs)
+ elif e == -127
+ /*
+ All subnormals in single precision (except 0.0s)
+ can be upgraded to double precision, since the
+ exponent range is so much wider.
+ */
+ var first1 = find_first1_64(xs, 23)
+ if first1 < 0
+ -> std.flt64assem(n, -1023, 0)
+ ;;
+ xs = xs << (52 - (first1 : uint64))
+ xe = -126 - (23 - first1)
+ -> std.flt64assem(n, xe, xs)
+ ;;
+
+ -> std.flt64assem(n, xe, xs << (52 - 23))
+}
+
+const flt32fromflt64 = {f : flt64
+ var n : bool, e : int64, s : uint64
+ (n, e, s) = std.flt64explode(f)
+ var ts : uint32
+ var te : int32 = (e : int32)
+
+ if e >= 128
+ if e == 1023 && s != 0
+ /* NaN */
+ -> std.flt32assem(n, 128, 1)
+ else
+ /* infinity */
+ -> std.flt32assem(n, 128, 0)
+ ;;
+ ;;
+
+ if e >= -127
+ /* normal */
+ ts = ((s >> (52 - 23)) : uint32)
+ if need_round_away(0, s, 52 - 23)
+ ts++
+ if ts & (1 << 24) != 0
+ ts >>= 1
+ te++
+ ;;
+ ;;
+ if te >= -126
+ -> std.flt32assem(n, te, ts)
+ ;;
+ ;;
+
+ /* subnormal already, will have to go to 0 */
+ if e == -1023
+ -> std.flt32assem(n, -127, 0)
+ ;;
+
+ /* subnormal (at least, it will be) */
+ te = -127
+ var shift : int64 = (52 - 23) + (-126 - e)
+ var ts1 = shr(s, shift)
+ ts = (ts1 : uint32)
+ if need_round_away(0, s, shift)
+ ts++
+ if ts & (1 << 23) != 0
+ /* false alarm, it's normal again */
+ te++
+ ;;
+ ;;
+ -> std.flt32assem(n, te, ts)
+}
+
+/* >> and <<, but without wrapping when the shift is >= 64 */
+const shr = {u : uint64, s : int64
+ if (s : uint64) >= 64
+ -> 0
+ else
+ -> u >> (s : uint64)
+ ;;
+}
+
+const shl = {u : uint64, s : int64
+ if (s : uint64) >= 64
+ -> 0
+ else
+ -> u << (s : uint64)
+ ;;
+}
+
+/* Find the first 1 bit in a bitstring */
+const find_first1_64 = {b : uint64, start : int64
+ for var j = start; j >= 0; --j
+ var m = shl(1, j)
+ if b & m != 0
+ -> j
+ ;;
+ ;;
+
+ -> -1
+}
+
+const find_first1_64_hl = {h, l, start
+ var first1_h = find_first1_64(h, start - 64)
+ if first1_h >= 0
+ -> first1_h + 64
+ ;;
+
+ -> find_first1_64(l, 63)
+}
+
+/*
+ For [ h ][ l ], where bitpos_last is the position of the last
+ bit that was included in the truncated result (l's last bit has
+ position 0), decide whether rounding up/away is needed. This is
+ true if
+
+ - following bitpos_last is a 1, then a non-zero sequence, or
+
+ - following bitpos_last is a 1, then a zero sequence, and the
+ round would be to even
+ */
+const need_round_away = {h : uint64, l : uint64, bitpos_last : int64
+ var first_omitted_is_1 = false
+ var nonzero_beyond = false
+ if bitpos_last > 64
+ first_omitted_is_1 = h & shl(1, bitpos_last - 1 - 64) != 0
+ nonzero_beyond = nonzero_beyond || h & shr((-1 : uint64), 2 + 64 - (bitpos_last - 64)) != 0
+ nonzero_beyond = nonzero_beyond || (l != 0)
+ else
+ first_omitted_is_1 = l & shl(1, bitpos_last - 1) != 0
+ nonzero_beyond = nonzero_beyond || l & shr((-1 : uint64), 1 + 64 - bitpos_last) != 0
+ ;;
+
+ if !first_omitted_is_1
+ -> false
+ ;;
+
+ if nonzero_beyond
+ -> true
+ ;;
+
+ var hl_is_odd = false
+
+ if bitpos_last >= 64
+ hl_is_odd = h & shl(1, bitpos_last - 64) != 0
+ else
+ hl_is_odd = l & shl(1, bitpos_last) != 0
+ ;;
+
+ -> hl_is_odd
+}