shithub: mc

Download patch

ref: bf035b686f58f139ee0b04746d3215e7bb8413e9
parent: 0dd6c9d5124707e4c69116745469e21f7e076429
author: S. Gilles <sgilles@math.umd.edu>
date: Thu May 3 07:13:58 EDT 2018

Implment scale2.

This is, perhaps, a conforming implementation of scalB, but it is
highly probable that, in the future, it will be desireable for scalB
to accept arbitrary, exponents, not simply integers. Furthermore,
we find scalB a non-intuitive name. Therefore, calling the function
‘scale2’ serves two purposes.

--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -15,6 +15,9 @@
 	# polynomial evaluation methods
 	poly-impl.myr
 
+	# scalb (multiply x by 2^m)
+	scale2-impl.myr
+
 	# sqrt
 	sqrt-impl+posixy-x64-sse2.s
 	sqrt-impl.myr
--- a/lib/math/fpmath.myr
+++ b/lib/math/fpmath.myr
@@ -14,6 +14,9 @@
 		horner_poly : (x : @f, a : @f[:] -> @f)
 		horner_polyu : (x : @f, a : @u[:] -> @f)
 
+		/* scale2-impl */
+		scale2 : (f : @f, m : @i -> @f)
+
 		/* sqrt-impl */
 		sqrt : (f : @f -> @f)
 
@@ -73,6 +76,8 @@
 	horner_poly = {f, a; -> horner_poly32(f, a)}
 	horner_polyu = {f, a; -> horner_polyu32(f, a)}
 
+	scale2 = {f, m; -> scale232(f, m)}
+
 	sqrt = {f; -> sqrt32(f)}
 
 	kahan_sum = {l; -> kahan_sum32(l) }
@@ -93,6 +98,8 @@
 	horner_poly = {f, a; -> horner_poly64(f, a)}
 	horner_polyu = {f, a; -> horner_polyu64(f, a)}
 
+	scale2 = {f, m; -> scale264(f, m)}
+
 	sqrt = {f; -> sqrt64(f)}
 
 	kahan_sum = {l; -> kahan_sum64(l) }
@@ -120,6 +127,9 @@
 
 extern const horner_polyu32 : (f : flt32, a : uint32[:] -> flt32)
 extern const horner_polyu64 : (f : flt64, a : uint64[:] -> flt64)
+
+extern const scale232 : (f : flt32, m : int32 -> flt32)
+extern const scale264 : (f : flt64, m : int64 -> flt64)
 
 extern const sqrt32 : (x : flt32 -> flt32)
 extern const sqrt64 : (x : flt64 -> flt64)
--- /dev/null
+++ b/lib/math/scale2-impl.myr
@@ -1,0 +1,73 @@
+use std
+
+use "fpmath"
+use "util"
+
+/*
+   The scaleB function recommended by the 2008 revision of IEEE
+   754. Since only integer exponents are considered, the naive
+   approach works quite well.
+ */
+pkg math =
+	const scale232 : (f : flt32, m : int32 -> flt32)
+	const scale264 : (f : flt64, m : int64 -> flt64)
+;;
+
+const scale232 = {f : flt32, m : int32
+	var n, e, s
+	(n, e, s) = std.flt32explode(f)
+	(n, e, s) = scale2gen(n, e, s, -126, 127, 24, m)
+	-> std.flt32assem(n, e, s)
+}
+
+const scale264 = {f : flt64, m : int64
+	var n, e, s
+	(n, e, s) = std.flt64explode(f)
+	(n, e, s) = scale2gen(n, e, s, -1022, 1023, 53, m)
+	-> std.flt64assem(n, e, s)
+}
+
+generic scale2gen = {n : bool, e : @i, s : @u, emin : @i, emax : @i, p : @u, m : @i :: numeric, integral @i, numeric, integral @u
+	if e < emin && s == 0
+		-> (n, e, s)
+	elif m == 0
+		-> (n, e, s)
+	elif m < 0
+		var sprime = s
+		var eprime = e
+		if e < emin
+			sprime = s >> (-m)
+			if need_round_away(0, (s : uint64), (-m : int64))
+				sprime++
+			;;
+			eprime = emin - 1
+		elif e + m < emin
+			sprime = s >> (emin - m - e)
+			if need_round_away(0, (s : uint64), ((emin - m - e) : int64))
+				sprime++
+			;;
+			eprime = emin - 1
+		else
+			eprime = e + m
+		;;
+		-> (n, eprime, sprime)
+	;;
+
+	if e < emin
+		var first_1 = find_first1_64((s : uint64), (p : int64))
+		var shift = p - (first_1 : @u) - 1
+		if m >= (shift : @i)
+			s = s << shift
+			m -= (shift : @i)
+			e = emin
+		else
+			-> (n, e, s << m)
+		;;
+	;;
+
+	if e + m > emax && s != 0
+		-> (n, emax + 1, 1 << (p - 1))
+	;;
+
+	-> (n, e + m, s)
+}
--- /dev/null
+++ b/lib/math/test/scale2-impl.myr
@@ -1,0 +1,95 @@
+use std
+use math
+use testr
+
+const main = {
+	testr.run([
+		[.name = "scale2-01",    .fn = scale201],
+		[.name = "scale2-02",    .fn = scale202],
+		[.name = "scale2-03",    .fn = scale203],
+		[.name = "scale2-04",    .fn = scale204],
+	][:])
+}
+
+const scale201 = {c
+	var inputsf : (flt32, int32, flt32)[:] = [
+		(0.0, 1, 0.0),
+		(-0.0, 2, -0.0),
+		(1.0, 3, 8.0),
+		(1.0, -3, 0.125),
+		(23.0, 2, 92.0),
+		(184.2, 10, 188620.8),
+		(0.00000234, 15, 0.07667712),
+		(1834.2, -31, 0.0000008541159331798554),
+		(4321.22341, 0, 4321.22341),
+	][:]
+
+	for (f, m, g) : inputsf
+		testr.eq(c, math.scale2(f, m), g)
+	;;
+}
+
+const scale202 = {c
+	var inputsf : (flt64, int64, flt64)[:] = [
+		(0.0, 1, 0.0),
+		(-0.0, 2, -0.0),
+		(1.0, 3, 8.0),
+		(1.0, -3, 0.125),
+		(23.0, 2, 92.0),
+		(184.2, 10, 188620.8),
+		(0.00000234, 15, 0.07667712),
+		(1834.2, -31, 0.0000008541159331798554),
+		(4321.22341, 0, 4321.22341),
+	][:]
+
+	for (f, m, g) : inputsf
+		testr.eq(c, math.scale2(f, m), g)
+	;;
+}
+
+const scale203 = {c
+	var inputsb : (uint32, int32, uint32)[:] = [
+		(0x00000000,   1, 0x00000000),
+		(0x7f38aa32,   0, 0x7f38aa32),
+		(0xaaaaaaaa,   0, 0xaaaaaaaa),
+		(0x43000000,  -3, 0x41800000),
+		(0x000030a0,  -8, 0x00000031),
+		(0x002f3030,  -8, 0x00002f30),
+		(0x032f3030, -20, 0x0000015e),
+		(0x032aafff,  -8, 0x00155600),
+		(0x002aafff,   8, 0x03aabffc),
+		(0x0000af31,   2, 0x0002bcc4),
+		(0x0000af31, 260, 0x7eaf3100),
+		(0x0000af31, 266, 0x7f800000),
+	][:]
+
+	for (u, m, v) : inputsb
+		var f = std.flt32frombits(u)
+		var g = math.scale2(f, m)
+		var w = std.flt32bits(g)
+		testr.check(c, v == w, "scale2(0x{w=8,b=16,p=0}, {}) should be 0x{w=8,b=16,p=0}, was 0x{w=8,b=16,p=0}", u, m, v, w)
+	;;
+}
+
+const scale204 = {c
+	var inputsb : (uint64, int64, uint64)[:] = [
+		(0x0000000000000000,     1, 0x0000000000000000),
+		(0x7f83785551aa873c,     0, 0x7f83785551aa873c),
+		(0xc2b00000aabbccdd, -1080, 0x800000400002aaef),
+		(0xc644fa802f33cfbd,    -1, 0xc634fa802f33cfbd),
+		(0x8004fa802f33cfbd,    -1, 0x80027d401799e7de),
+		(0x8004fa8fffffffff,    -1, 0x80027d4800000000),
+		(0x0082aaffffffffff,     8, 0x0102aaffffffffff), 
+		(0x000000ffffffffff,     1, 0x000001fffffffffe),
+		(0x000000ffffffffff,  1000, 0x3dcfffffffffe000),
+		(0x000000ffffffffff,  2000, 0x7c4fffffffffe000),
+		(0x000000ffffffffff,  2400, 0x7ff0000000000000),
+	][:]
+
+	for (u, m, v) : inputsb
+		var f = std.flt64frombits(u)
+		var g = math.scale2(f, m)
+		var w = std.flt64bits(g)
+		testr.check(c, v == w, "scale2(0x{w=16,b=16,p=0}, {}) should be 0x{w=16,b=16,p=0}, was 0x{w=16,b=16,p=0}", u, m, v, w)
+	;;
+}