ref: f6e3a00fc29a7770b39c1ad51e4cfbff305bda38
parent: 9843290d44b6266758c478838d57a9478af39aa3
author: S. Gilles <sgilles@math.umd.edu>
date: Wed Jun 27 20:09:38 EDT 2018
Fix some errors in sin-impl. - Remez algorithms don't work when the function to be approximated vanishes (they track relative error, which would go to infinity), so the sin_coeffs needed to be recalculated. - sin and cos were swapped when retrieving from C, so sin(0) was 1. - Negative post-reduction x values needed to be handled better (negating sign before other potential permutations). - Fix huge_reduction by calculating xb from x - xhi, not x. - Change pi/4 for pi/2 (I forgot I was in the middle of doing this halfway through writing it.)
--- a/lib/math/ancillary/generate-minimax-by-Remez.gp
+++ b/lib/math/ancillary/generate-minimax-by-Remez.gp
@@ -37,8 +37,8 @@
return(abs(f(x) - p_eval(n, v, x)));
}
-{ find_M(f, n, v, a, b) = my(X, gran, l, lnext, len, xprev, xcur, xnext, yprev, ycur, ynext, thisa, thisb, k, j);
- gran = 100 * n;
+{ find_M(f, n, v, a, b, depth) = my(X, gran, l, lnext, len, xprev, xcur, xnext, yprev, ycur, ynext, thisa, thisb, k, j);
+ gran = 10000 * depth;
l = listcreate();
xprev = a - (b - a)/gran;
@@ -64,7 +64,7 @@
);
vecsort(l, 2);
- if(length(l) < n + 2 || l[1][2] < 2^(-100), return("q"),);
+ if(length(l) < n + 2 || l[1][2] < 2^(-2000), return("q"),);
len = length(l);
lnext = listcreate();
@@ -111,17 +111,25 @@
for(j = 1, 100,
v = remez_step(f, n, a, b, c);
print("v = ", v);
- c = find_M(f, n, v, a, b);
+ c = find_M(f, n, v, a, b, j);
if(c == "q", return,);
print("c = ", c);
);
}
+{ sinoverx(x) =
+ return(if(x == 0, 1, sin(x)/x));
+}
+
print("\n");
-print("Minimaxing sin(x), degree 7, on [-Pi/(4 * 256), Pi/(4 * 256)]:");
-find_minimax(sin, 7, -Pi/1024, Pi/1024)
+print("Minimaxing sin(x) / x, degree 6, on [-Pi/(4 * 256), Pi/(4 * 256)]:");
+find_minimax(sinoverx, 6, -Pi/1024, Pi/1024)
print("\n");
+print("(You'll need to add a 0x0 at the beginning to make a degree 7...\n");
+print("\n");
+print("\n");
print("Minimaxing cos(x), degree 7, on [-Pi/(4 * 256), Pi/(4 * 256)]:");
find_minimax(cos, 7, -Pi/1024, Pi/1024)
print("\n");
-
+print("\n");
+print("Remember that there's that extra, ugly E term at the end of the vector that you want to lop off.\n");
--- a/lib/math/ancillary/pi-constants.c
+++ b/lib/math/ancillary/pi-constants.c
@@ -47,10 +47,10 @@
}
printf("\n");
- printf("1000 bits of pi/4:\n");
+ printf("1000 bits of pi/2:\n");
for (int bits_obtained = 0; bits_obtained < 1000; bits_obtained += 53) {
- mpfr_div_si(pi, pi, 4, MPFR_RNDN);
+ mpfr_div_si(pi, pi, 2, MPFR_RNDN);
d = mpfr_get_d(pi, MPFR_RNDN);
u = FLT64_TO_UINT64(d);
printf("%#018lx\n", u);
@@ -59,9 +59,9 @@
}
printf("\n");
- printf("Pre-computed 4/pi:\n");
+ printf("Pre-computed 2/pi:\n");
mpfr_const_pi(pi, MPFR_RNDN);
- mpfr_set_si(t, 4, MPFR_RNDN);
+ mpfr_set_si(t, 2, MPFR_RNDN);
mpfr_div(pi, t, pi, MPFR_RNDN);
d = mpfr_get_d(pi, MPFR_RNDN);
u = FLT64_TO_UINT64(d);
--- a/lib/math/sin-impl.myr
+++ b/lib/math/sin-impl.myr
@@ -50,16 +50,16 @@
/* Split precision down the middle */
const twentysix_bits_mask = (0xffffffffffffffff << 27)
-/* Pi/4 in lots of detail, for range reducing sin(2^18) or so */
-const pi_over_4 : uint64[4] = [
- 0x3fe921fb54442d18,
- 0x3c61a62633145c07,
- 0xb8cf1976b7ed8fbc,
- 0x3544cf98e804177d,
+/* Pi/2 in lots of detail, for range reducing sin(2^18) or so */
+const pi_over_2 : uint64[4] = [
+ 0x3ff921fb54442d18,
+ 0x3c81a62633145c07,
+ 0xb8ff1976b7ed8fbc,
+ 0x3584cf98e804177d,
]
/* Pre-computed inverses */
-const four_over_pi : uint64 = 0x3ff45f306dc9c883 /* 1/(pi/4) */
+const two_over_pi : uint64 = 0x3fe45f306dc9c883 /* 1/(pi/2) */
const oneohtwofour_over_pi : uint64 = 0x40745f306dc9c883 /* 1/(pi/(4 * 256)) */
/*
@@ -67,14 +67,14 @@
and cos on [-Pi/1024, Pi/1024] (generated by a Remez algorithm).
*/
const sin_coeff : uint64[8] = [
- 0xb791800000000000,
+ 0x0000000000000000,
0x3ff0000000000000,
- 0x38b6000000000000,
+ 0xb8c7400000000000,
0xbfc5555555555555,
- 0xb9cc000000000000,
- 0x3f81111111111024,
- 0x3ab0000000000000,
- 0xbf2a019f99ab90ae,
+ 0x39d0000000000000,
+ 0x3f81111111111061,
+ 0xbacc000000000000,
+ 0xbf2a019fa7ee0417,
]
const cos_coeff : uint64[8] = [
@@ -83,7 +83,7 @@
0xbfe0000000000000,
0x39a0000000000000,
0x3fa55555555553ee,
- 000000000000000000,
+ 0x0000000000000000,
0xbf56c16b9bfd9fd6,
0xbbec000000000000,
]
@@ -447,7 +447,6 @@
var swap_sin_cos : bool = false
var then_negate_sin : bool = false
var then_negate_cos : bool = false
- N = ((N % 4) + 4) % 4
match N
| 1:
/* sin(x + pi/2) = cos(x), cos(x + pi/2) = -sin(x) */
@@ -464,11 +463,12 @@
| _:
;;
+ var first_negate_sin : bool = false
if x1 < 0.0
x1 = -x1
x2 = -x2
x3 = -x3
- N = -N
+ first_negate_sin = true
;;
/* Figure out where in the C table x lies */
@@ -480,7 +480,7 @@
;;
var xi, sin_xi, cos_xi, sin_delta, cos_delta
- (xi, sin_xi, cos_xi) = C[j]
+ (xi, cos_xi, sin_xi) = C[j]
/*
Compute x - xi with compensated summation. Because xi
@@ -510,6 +510,7 @@
cos_delta += delta2 * fma64(delta1, 2.0*std.flt64frombits(cos_coeff[2]), std.flt64frombits(cos_coeff[1]))
var q : flt64[4]
+
if (want_sin && !swap_sin_cos) || (want_cos && swap_sin_cos)
(q[0], q[1]) = two_by_two(std.flt64frombits(sin_xi), cos_delta)
(q[2], q[3]) = two_by_two(std.flt64frombits(cos_xi), sin_delta)
@@ -518,6 +519,7 @@
/* TODO: use double-compensation instead */
(sin_ret, _, _) = triple_compensate_sum(q[:])
;;
+
if (want_cos && !swap_sin_cos) || (want_sin && swap_sin_cos)
(q[0], q[1]) = two_by_two(std.flt64frombits(cos_xi), cos_delta)
(q[2], q[3]) = two_by_two(std.flt64frombits(sin_xi), sin_delta)
@@ -529,8 +531,12 @@
(cos_ret, _, _) = triple_compensate_sum(q[:])
;;
+ if first_negate_sin
+ sin_ret = -sin_ret
+ ;;
+
if swap_sin_cos
- (sin_ret, cos_ret) = (cos_ret, sin_ret)
+ std.swap(&sin_ret, &cos_ret)
;;
if then_negate_sin
@@ -550,10 +556,10 @@
var Nf : flt64
/*
- We actually only care about N's parity. If x is very
- large, the ultimate N that we end up using might not be
+ We actually only care about N mod 4. If x is very large,
+ the ultimate N that we end up using might not be
representable (either as an int64 or flt64), so we instead
- just keep track of the parity exactly.
+ just keep track of the mod 4 part exactly.
*/
/*
@@ -572,31 +578,33 @@
however.
*/
var x1, x2, x3
- var q : flt64[12]
(x1, x2, x3) = huge_reduce_2pi(x)
- var pi_o_4_0 : flt64 = std.flt64frombits(pi_over_4[0])
- var pi_o_4_1 : flt64 = std.flt64frombits(pi_over_4[1])
- var pi_o_4_2 : flt64 = std.flt64frombits(pi_over_4[2])
- var pi_o_4_3 : flt64 = std.flt64frombits(pi_over_4[3])
+ var pi_o_2_0 : flt64 = std.flt64frombits(pi_over_2[0])
+ var pi_o_2_1 : flt64 = std.flt64frombits(pi_over_2[1])
+ var pi_o_2_2 : flt64 = std.flt64frombits(pi_over_2[2])
+ var pi_o_2_3 : flt64 = std.flt64frombits(pi_over_2[3])
+ var total_N = 0
+ var q : flt64[11]
while true
- N = rn(x1 * std.flt64frombits(four_over_pi))
+ N = rn(x1 * std.flt64frombits(two_over_pi))
Nf = (-N : flt64)
(q[0], q[1], q[2]) = (x1, x2, x3)
- (q[3], q[ 4]) = two_by_two(Nf, pi_o_4_0)
- (q[5], q[ 6]) = two_by_two(Nf, pi_o_4_1)
- (q[7], q[ 8]) = two_by_two(Nf, pi_o_4_2)
- (q[9], q[10]) = two_by_two(Nf, pi_o_4_3)
+ (q[3], q[ 4]) = two_by_two(Nf, pi_o_2_0)
+ (q[5], q[ 6]) = two_by_two(Nf, pi_o_2_1)
+ (q[7], q[ 8]) = two_by_two(Nf, pi_o_2_2)
+ (q[9], q[10]) = two_by_two(Nf, pi_o_2_3)
(x1, x2, x3) = triple_compensate_sum(q[:])
+ total_N += (N % 4)
- if le_33(x1, x2, x3, pi_o_4_0, pi_o_4_1, pi_o_4_2) && \
- le_33(-pi_o_4_0, -pi_o_4_1, -pi_o_4_2, x1, x2, x3)
+ if le_33(x1, x2, x3, pi_o_2_0, pi_o_2_1, pi_o_2_2) && \
+ le_33(-pi_o_2_0, -pi_o_2_1, -pi_o_2_2, x1, x2, x3)
break
;;
;;
- -> (N, x1, x2, x3)
+ -> (((total_N % 4) + 4) % 4, x1, x2, x3)
}
@@ -632,6 +640,8 @@
var b1 : flt64, b2 : flt64, b3 : flt64
var c1 : flt64, c2 : flt64, c3 : flt64
var u1 : uint64, u2 : uint64, u3 : uint64
+ var xcur = x
+
if j == 0
(u1, u2, u3) = R[0]
a1 = std.flt64frombits(u1)
@@ -638,6 +648,8 @@
a2 = std.flt64frombits(u2)
a3 = std.flt64frombits(u3)
var xhi = std.flt64frombits(b & ((~0) << (52 + (25 - e : uint64))))
+ xcur -= xhi
+ b = std.flt64bits(xcur)
xa = scale2(xhi, -25)
(b1, b2, b3) = (x - xhi, 0.0, 0.0)
@@ -649,6 +661,8 @@
a3 = std.flt64frombits(u3)
var e1 : int64 = 50*(j : int64) + 25
var xhi = std.flt64frombits(b & ((~0) << (52 + (e1 - e : uint64))))
+ xcur -= xhi
+ b = std.flt64bits(xcur)
xa = scale2(xhi, -e1)
/* j > 0 in this branch */
@@ -658,6 +672,8 @@
b3 = std.flt64frombits(u3)
var e2 : int64 = 50*(j - 1 : int64) + 25
var xmi = std.flt64frombits(b & ((~0) << (52 + (e2 - e : uint64))))
+ xcur -= xmi
+ b = std.flt64bits(xcur)
xb = scale2(xmi, -e2)
if j == 1
@@ -674,7 +690,6 @@
;;
;;
-
/*
Now we need to combine all this. Even worse, when we
multiply the two together, we need to keep full precision
@@ -741,7 +756,7 @@
/* Multiply out */
var a1 : flt64 = xh * yh
var a2 : flt64 = xh * yl
- var a3 : flt64 = yl * yh
+ var a3 : flt64 = xl * yh
var a4 : flt64 = xl * yl
/* By-hand compensated summation */
@@ -748,7 +763,7 @@
/* TODO: convert to normal compensated summation, move to sum-impl.myr */
var yy, u, t, v, z, s, c
if a2 < a3
- (a3, a2) = (a2, a3)
+ std.swap(&a3, &a2)
;;
s = a1
--- /dev/null
+++ b/lib/math/test/sin-impl.myr
@@ -1,0 +1,106 @@
+use std
+use math
+use testr
+
+const main = {
+ math.fptrap(false)
+ testr.run([
+ [.name="sin-cos-01", .fn = sincos01], /* flt32 */
+ [.name="sin-cos-02", .fn = sincos02], /* flt64 */
+ [.name="sin-cos-03", .fn = sincos03], /* off-by-1-ulp quarantine */
+ [.name="sin-cos-04", .fn = sincos04], /* exhaustively test C */
+ [.name="sin-cos-05", .fn = sincos05], /* NaN handling */
+ ][:])
+}
+
+const same32 = {a, b
+ if a == b
+ -> true
+ ;;
+
+ if std.isnan(std.flt32frombits(a)) && std.isnan(std.flt32frombits(b))
+ -> true
+ ;;
+
+ -> false
+}
+
+const same64 = {a, b
+ if a == b
+ -> true
+ ;;
+
+ if std.isnan(std.flt64frombits(a)) && std.isnan(std.flt64frombits(b))
+ -> true
+ ;;
+
+ -> false
+}
+
+const sincos01 = {c
+ var inputs : (uint32, uint32, uint32)[:] = [
+ (0x00000000, 0x00000000, 0x3f800000),
+ (0x3f000000, 0x3ef57744, 0x3f60a940),
+ (0x6e000000, 0xbec002e4, 0xbf6d50ea),
+ ][:]
+
+ for (x, ys, yc) : inputs
+ var xf : flt32 = std.flt32frombits(x)
+ var rsf1, rcf1, rsf2, rcf2
+ (rsf1, rcf1) = math.sincos(xf)
+ rsf2 = math.sin(xf)
+ rcf2 = math.cos(xf)
+
+ var rsu1 = std.flt32bits(rsf1)
+ var rcu1 = std.flt32bits(rcf1)
+ var rsu2 = std.flt32bits(rsf2)
+ var rcu2 = std.flt32bits(rcf2)
+
+
+ testr.check(c, rsf1 == rsf2 && rcf1 == rcf2,
+ "sincos(0x{b=16,w=8,p=0}) is (0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0}), individual results (0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0})",
+ x, rsu1, rcu1, rsu2, rcu2)
+
+ testr.check(c, same32(rsu1, ys) && same32(rcu1, yc),
+ "sincos(0x{b=16,w=8,p=0}) should be (0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0}), was (0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0})",
+ x, ys, yc, rsu1, rcu1)
+ ;;
+}
+
+const sincos02 = {c
+ var inputs : (uint64, uint64, uint64)[:] = [
+ (0x0000000000000000, 0x0000000000000000, 0x3ff0000000000000),
+ (0x4100000000000000, 0xbfeff8bd7b10d6b0, 0x3fa58ced65ec8b50),
+ ][:]
+
+ for (x, ys, yc) : inputs
+ var xf : flt64 = std.flt64frombits(x)
+ var rsf1, rcf1, rsf2, rcf2
+ (rsf1, rcf1) = math.sincos(xf)
+ rsf2 = math.sin(xf)
+ rcf2 = math.cos(xf)
+
+ var rsu1 = std.flt64bits(rsf1)
+ var rcu1 = std.flt64bits(rcf1)
+ var rsu2 = std.flt64bits(rsf2)
+ var rcu2 = std.flt64bits(rcf2)
+
+
+ testr.check(c, rsf1 == rsf2 && rcf1 == rcf2,
+ "sincos(0x{b=16,w=16,p=0}) is (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}), individual results (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0})",
+ x, rsu1, rcu1, rsu2, rcu2)
+
+ testr.check(c, same64(rsu1, ys) && same64(rcu1, yc),
+ "sincos(0x{b=16,w=16,p=0}) should be (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}), was (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0})",
+ x, ys, yc, rsu1, rcu1)
+ ;;
+}
+
+const sincos03 = {c
+}
+
+const sincos04 = {c
+}
+
+const sincos05 = {c
+}