ref: bf035b686f58f139ee0b04746d3215e7bb8413e9
parent: 0dd6c9d5124707e4c69116745469e21f7e076429
author: S. Gilles <sgilles@math.umd.edu>
date: Thu May 3 07:13:58 EDT 2018
Implment scale2. This is, perhaps, a conforming implementation of scalB, but it is highly probable that, in the future, it will be desireable for scalB to accept arbitrary, exponents, not simply integers. Furthermore, we find scalB a non-intuitive name. Therefore, calling the function ‘scale2’ serves two purposes.
--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -15,6 +15,9 @@
# polynomial evaluation methods
poly-impl.myr
+ # scalb (multiply x by 2^m)
+ scale2-impl.myr
+
# sqrt
sqrt-impl+posixy-x64-sse2.s
sqrt-impl.myr
--- a/lib/math/fpmath.myr
+++ b/lib/math/fpmath.myr
@@ -14,6 +14,9 @@
horner_poly : (x : @f, a : @f[:] -> @f)
horner_polyu : (x : @f, a : @u[:] -> @f)
+ /* scale2-impl */
+ scale2 : (f : @f, m : @i -> @f)
+
/* sqrt-impl */
sqrt : (f : @f -> @f)
@@ -73,6 +76,8 @@
horner_poly = {f, a; -> horner_poly32(f, a)}
horner_polyu = {f, a; -> horner_polyu32(f, a)}
+ scale2 = {f, m; -> scale232(f, m)}
+
sqrt = {f; -> sqrt32(f)}
kahan_sum = {l; -> kahan_sum32(l) }
@@ -93,6 +98,8 @@
horner_poly = {f, a; -> horner_poly64(f, a)}
horner_polyu = {f, a; -> horner_polyu64(f, a)}
+ scale2 = {f, m; -> scale264(f, m)}
+
sqrt = {f; -> sqrt64(f)}
kahan_sum = {l; -> kahan_sum64(l) }
@@ -120,6 +127,9 @@
extern const horner_polyu32 : (f : flt32, a : uint32[:] -> flt32)
extern const horner_polyu64 : (f : flt64, a : uint64[:] -> flt64)
+
+extern const scale232 : (f : flt32, m : int32 -> flt32)
+extern const scale264 : (f : flt64, m : int64 -> flt64)
extern const sqrt32 : (x : flt32 -> flt32)
extern const sqrt64 : (x : flt64 -> flt64)
--- /dev/null
+++ b/lib/math/scale2-impl.myr
@@ -1,0 +1,73 @@
+use std
+
+use "fpmath"
+use "util"
+
+/*
+ The scaleB function recommended by the 2008 revision of IEEE
+ 754. Since only integer exponents are considered, the naive
+ approach works quite well.
+ */
+pkg math =
+ const scale232 : (f : flt32, m : int32 -> flt32)
+ const scale264 : (f : flt64, m : int64 -> flt64)
+;;
+
+const scale232 = {f : flt32, m : int32
+ var n, e, s
+ (n, e, s) = std.flt32explode(f)
+ (n, e, s) = scale2gen(n, e, s, -126, 127, 24, m)
+ -> std.flt32assem(n, e, s)
+}
+
+const scale264 = {f : flt64, m : int64
+ var n, e, s
+ (n, e, s) = std.flt64explode(f)
+ (n, e, s) = scale2gen(n, e, s, -1022, 1023, 53, m)
+ -> std.flt64assem(n, e, s)
+}
+
+generic scale2gen = {n : bool, e : @i, s : @u, emin : @i, emax : @i, p : @u, m : @i :: numeric, integral @i, numeric, integral @u
+ if e < emin && s == 0
+ -> (n, e, s)
+ elif m == 0
+ -> (n, e, s)
+ elif m < 0
+ var sprime = s
+ var eprime = e
+ if e < emin
+ sprime = s >> (-m)
+ if need_round_away(0, (s : uint64), (-m : int64))
+ sprime++
+ ;;
+ eprime = emin - 1
+ elif e + m < emin
+ sprime = s >> (emin - m - e)
+ if need_round_away(0, (s : uint64), ((emin - m - e) : int64))
+ sprime++
+ ;;
+ eprime = emin - 1
+ else
+ eprime = e + m
+ ;;
+ -> (n, eprime, sprime)
+ ;;
+
+ if e < emin
+ var first_1 = find_first1_64((s : uint64), (p : int64))
+ var shift = p - (first_1 : @u) - 1
+ if m >= (shift : @i)
+ s = s << shift
+ m -= (shift : @i)
+ e = emin
+ else
+ -> (n, e, s << m)
+ ;;
+ ;;
+
+ if e + m > emax && s != 0
+ -> (n, emax + 1, 1 << (p - 1))
+ ;;
+
+ -> (n, e + m, s)
+}
--- /dev/null
+++ b/lib/math/test/scale2-impl.myr
@@ -1,0 +1,95 @@
+use std
+use math
+use testr
+
+const main = {
+ testr.run([
+ [.name = "scale2-01", .fn = scale201],
+ [.name = "scale2-02", .fn = scale202],
+ [.name = "scale2-03", .fn = scale203],
+ [.name = "scale2-04", .fn = scale204],
+ ][:])
+}
+
+const scale201 = {c
+ var inputsf : (flt32, int32, flt32)[:] = [
+ (0.0, 1, 0.0),
+ (-0.0, 2, -0.0),
+ (1.0, 3, 8.0),
+ (1.0, -3, 0.125),
+ (23.0, 2, 92.0),
+ (184.2, 10, 188620.8),
+ (0.00000234, 15, 0.07667712),
+ (1834.2, -31, 0.0000008541159331798554),
+ (4321.22341, 0, 4321.22341),
+ ][:]
+
+ for (f, m, g) : inputsf
+ testr.eq(c, math.scale2(f, m), g)
+ ;;
+}
+
+const scale202 = {c
+ var inputsf : (flt64, int64, flt64)[:] = [
+ (0.0, 1, 0.0),
+ (-0.0, 2, -0.0),
+ (1.0, 3, 8.0),
+ (1.0, -3, 0.125),
+ (23.0, 2, 92.0),
+ (184.2, 10, 188620.8),
+ (0.00000234, 15, 0.07667712),
+ (1834.2, -31, 0.0000008541159331798554),
+ (4321.22341, 0, 4321.22341),
+ ][:]
+
+ for (f, m, g) : inputsf
+ testr.eq(c, math.scale2(f, m), g)
+ ;;
+}
+
+const scale203 = {c
+ var inputsb : (uint32, int32, uint32)[:] = [
+ (0x00000000, 1, 0x00000000),
+ (0x7f38aa32, 0, 0x7f38aa32),
+ (0xaaaaaaaa, 0, 0xaaaaaaaa),
+ (0x43000000, -3, 0x41800000),
+ (0x000030a0, -8, 0x00000031),
+ (0x002f3030, -8, 0x00002f30),
+ (0x032f3030, -20, 0x0000015e),
+ (0x032aafff, -8, 0x00155600),
+ (0x002aafff, 8, 0x03aabffc),
+ (0x0000af31, 2, 0x0002bcc4),
+ (0x0000af31, 260, 0x7eaf3100),
+ (0x0000af31, 266, 0x7f800000),
+ ][:]
+
+ for (u, m, v) : inputsb
+ var f = std.flt32frombits(u)
+ var g = math.scale2(f, m)
+ var w = std.flt32bits(g)
+ testr.check(c, v == w, "scale2(0x{w=8,b=16,p=0}, {}) should be 0x{w=8,b=16,p=0}, was 0x{w=8,b=16,p=0}", u, m, v, w)
+ ;;
+}
+
+const scale204 = {c
+ var inputsb : (uint64, int64, uint64)[:] = [
+ (0x0000000000000000, 1, 0x0000000000000000),
+ (0x7f83785551aa873c, 0, 0x7f83785551aa873c),
+ (0xc2b00000aabbccdd, -1080, 0x800000400002aaef),
+ (0xc644fa802f33cfbd, -1, 0xc634fa802f33cfbd),
+ (0x8004fa802f33cfbd, -1, 0x80027d401799e7de),
+ (0x8004fa8fffffffff, -1, 0x80027d4800000000),
+ (0x0082aaffffffffff, 8, 0x0102aaffffffffff),
+ (0x000000ffffffffff, 1, 0x000001fffffffffe),
+ (0x000000ffffffffff, 1000, 0x3dcfffffffffe000),
+ (0x000000ffffffffff, 2000, 0x7c4fffffffffe000),
+ (0x000000ffffffffff, 2400, 0x7ff0000000000000),
+ ][:]
+
+ for (u, m, v) : inputsb
+ var f = std.flt64frombits(u)
+ var g = math.scale2(f, m)
+ var w = std.flt64bits(g)
+ testr.check(c, v == w, "scale2(0x{w=16,b=16,p=0}, {}) should be 0x{w=16,b=16,p=0}, was 0x{w=16,b=16,p=0}", u, m, v, w)
+ ;;
+}