ref: 9afaec2ee7ed8864f9f28c39533e2f2094301fba
parent: 09e7cfb56388b1ea79ee7e100e52cb5b34e6017c
author: S. Gilles <sgilles@math.umd.edu>
date: Sat Jun 30 13:36:51 EDT 2018
Eliminate loop sorts in sin/cos reduction.
--- a/lib/math/sin-impl.myr
+++ b/lib/math/sin-impl.myr
@@ -624,23 +624,32 @@
/* Compute initial reduction -- this might not be sufficient. */
total_N = rn(x1 * std.flt64frombits(two_over_pi))
Nf = (-total_N : flt64)
- (q[0], q[ 1], q[2]) = (x1, x2, x3)
- (q[3], q[ 4]) = two_by_two(Nf, pi_o_2_0)
- (q[5], q[ 6]) = two_by_two(Nf, pi_o_2_1)
+ (q[0], q[ 4], q[5]) = (x1, x2, x3)
+ (q[1], q[ 3]) = two_by_two(Nf, pi_o_2_0)
+ (q[2], q[ 6]) = two_by_two(Nf, pi_o_2_1)
(q[7], q[ 8]) = two_by_two(Nf, pi_o_2_2)
(q[9], q[10]) = two_by_two(Nf, pi_o_2_3)
- std.sort(q[:], fltabscmp)
+
+ /*
+ Sorting is very slow, but it's only the top five or so
+ that are in question
+ */
+ std.sort(q[0:5], fltabscmp)
(x1, x2) = double_compensated_sum(q[:])
while !(le_22(x1, x2, pi_o_4_0, pi_o_4_1) && le_22(-pi_o_4_0, -pi_o_4_1, x1, x2))
N = rn(x1 * std.flt64frombits(two_over_pi))
Nf = (-N : flt64)
- (q[0], q[1]) = (x1, x2)
- (q[2], q[3]) = two_by_two(Nf, pi_o_2_0)
+
+ /*
+ Sorting is slow. We know that x1 is roughly
+ cancelled by Nf * pi_o_2_0, so line those up.
+ */
+ (q[0], q[2]) = (x1, x2)
+ (q[1], q[3]) = two_by_two(Nf, pi_o_2_0)
(q[4], q[5]) = two_by_two(Nf, pi_o_2_1)
(q[6], q[7]) = two_by_two(Nf, pi_o_2_2)
(q[8], q[9]) = two_by_two(Nf, pi_o_2_3)
- std.sort(q[0:10], fltabscmp)
(x1, x2) = double_compensated_sum(q[0:10])
total_N += (N % 4)
;;
@@ -737,11 +746,10 @@
(q[14], q[15]) = two_by_two(xc, c2)
(q[16], q[17]) = two_by_two(xc, c3)
- -> triple_compensate_sum(q[:])
+ -> triple_compensated_sum(q[:])
}
-/* TODO: move to sum-impl.myr, figure out a good API to allow optional sorting */
-const triple_compensate_sum = {q : flt64[:]
+const triple_compensated_sum = {q : flt64[:]
/* TODO: verify, with GAPPA or something, that this is correct. */
std.sort(q, fltabscmp)
var s1 : flt64, s2 : flt64, s3
@@ -788,7 +796,6 @@
var a4 : flt64 = xl * yl
/* By-hand compensated summation */
- /* TODO: convert to normal compensated summation, move to sum-impl.myr */
var yy, u, t, v, z, s, c
if a2 < a3
std.swap(&a3, &a2)