ref: 43a9edcd25ed37e9f043caa8498261af39cab081
parent: 1019b884273ecfba87c33205a50084981820376f
author: S. Gilles <sgilles@math.umd.edu>
date: Sun Jul 22 16:30:34 EDT 2018
Correct typo in cotangent calculations.
--- a/lib/math/ancillary/generate-minimax-by-Remez.gp
+++ b/lib/math/ancillary/generate-minimax-by-Remez.gp
@@ -156,4 +156,10 @@
print("\n");
print("---\n");
print("\n");
+print("Minmimaxing tan(x) / x, degree 10, on [0, 15.5/256]:");
+find_minimax(tanoverx, 10, 0, 15.5/256)
+print("\n");
+print("(You'll need to add a 0x0 at the beginning to make a degree 11...\n");
+print("\n");
+print("---\n");
print("Remember that there's that extra, ugly E term at the end of the vector that you want to lop off.\n");
--- a/lib/math/tan-impl.myr
+++ b/lib/math/tan-impl.myr
@@ -48,6 +48,24 @@
]
/*
+ Coefficients for a minimax polynomial approximating p(x), with
+ tan(x) = x*p(x^2), expanding out to a degree 11 polynomial for
+ tan(x). This is slower than the tan_coeff version and overkill
+ for the typical range of delta.
+ */
+const tan_coeff_good : uint64[6] = [
+ 0x3ff0000000000000,
+ 0x3fd5555555555557,
+ 0x3fc1111111124b1b,
+ 0x3faba1ba5c206e2d,
+ 0x3f96660414be66b8,
+ 0x3f829ece970a9ba2,
+]
+
+/* Split 21 zeros out, for special cot() computation */
+const split_mask : uint64 = 0xffffffffffffffff << 21
+
+/*
The Highly Accurate Tables for use in a [GB91]-type algorithm;
generated by ancillary/generate-triples-for-GB91.c.
@@ -56,48 +74,6 @@
*/
const C : (uint64, uint64, uint64)[257] = [
/* xi cot(xi) tan(xi) */
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
(0x0000000000000000, 0x7ff0000000000000, 0x0000000000000000),
(0x3f6921fb43c5e5fb, 0x40745f2c4ad38437, 0x3f6922006eb65bc4),
(0x3f7921fb69404898, 0x40645f1f9b724978, 0x3f7922101511c956),
@@ -433,6 +409,32 @@
(delta2, _) = fast2sum(deltat, x2)
var p1, p2
+ /*
+ Since cot() can blow up close to 0, just fall back to polynomial approximation
+ */
+ if x1 < 0.060546875
+ var s = x1 * x1
+ p1 = x1 * horner_polyu(s, tan_coeff_good[:])
+ p2 = x2 + 3.0*s*x2
+
+ if want_tan
+ (ret1, ret2) = fast2sum(p1, p2)
+ goto have_result
+ ;;
+
+ (p1, p2) = fast2sum(p1, p2)
+
+ var f = std.flt64frombits(std.flt64bits(p1) & split_mask)
+ var g = (p1 - f) + p2
+
+ var u0 = 1.0/f
+ var u1 = std.flt64frombits(std.flt64bits(u0) & split_mask)
+ var u2 = u0 - u1
+
+ (ret1, ret2) = fast2sum(u0 - u0*u0*g, u0*((1.0 - u1*f) - u2*f))
+ goto have_result
+ ;;
+
if want_tan
(p1, p2) = ptan(delta1, delta2)
var num = cot + tan
@@ -444,7 +446,7 @@
(q1, q2) = fast2sum(q1, q2)
(ret1, ret2) = fast2sum(q1, q2 + q3)
else
- (p1, p2) = pcot(delta1, delta2)
+ (p1, p2) = ptan(delta1, delta2)
var num = cot + tan
var den = (tan + p1) + p2
var f = num/den
@@ -469,7 +471,7 @@
var s : flt64 = x1 * x1
var p : flt64 = horner_polyu(s, tan_coeff[:])
var r1, r2
- (r1, r2) = two_by_two(s, x1)
+ (r1, r2) = two_by_two(p, x1)
-> fast2sum(r1, r2 + x2)
}
--- a/lib/math/test/tan-impl.myr
+++ b/lib/math/test/tan-impl.myr
@@ -40,6 +40,7 @@
const tancot01 = {c
var inputs : (uint32, uint32, uint32)[:] = [
(0x00000000, 0x00000000, 0x7f800000),
+ (0x01000000, 0x01000000, 0x7e000000),
][:]
for (x, yt, yc) : inputs
@@ -64,10 +65,14 @@
const tancot02 = {c
var inputs : (uint64, uint64, uint64)[:] = [
(0x0000000000000000, 0x0000000000000000, 0x7ff0000000000000),
- (0x41bb951f1572eba5, 0xbc8f54f5227a4e84, 0x3ff0000000000000), /* [GB91]'s "Xhard" */
+ (0x5101000000000000, 0xbff4f77bbc53c8f9, 0xbfe86b6d64c43ec0),
+ (0x4b01000000000000, 0xbfe96f60bbc6c837, 0xbff421332f057cb5),
+ (0x41bb951f1572eba5, 0xbc8f54f5227a4e84, 0xc35057584c429b3a), /* [GB91]'s "Xhard" */
][:]
+var n = 0
for (x, yt, yc) : inputs
+n++
var xf : flt64 = std.flt64frombits(x)
var rtf, rcf
rtf = math.tan(xf)
@@ -81,7 +86,7 @@
x, yt, rtu)
testr.check(c, same64(rcu, yc),
- "cot(0x{b=16,w=16,p=0}) should be (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}), was (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0})",
+ "cot(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
x, yc, rcu)
;;
}
@@ -88,9 +93,7 @@
const tancot03 = {c
var inputs : (uint64, uint64, uint64, uint64, uint64)[:] = [
- (0x5101000000000000, 0x3fe9706123d509f1, 0xbfe369af9695aba1, 0x3fe9706123d509f0, 0xbfe369af9695aba0),
- (0xf83b13a6a142b6d5, 0xbf5a86f4edeb02f2, 0x3feffffd404efc20, 0xbf5a86f4edeb02f1, 0x3feffffd404efc20),
- (0x4b01000000000000, 0xbfe3e9527dc75f12, 0x3fe90cf80997c963, 0xbfe3e9527dc75f13, 0x3fe90cf80997c964),
+ (0xf83b13a6a142b6d5, 0xbf5a86f73542c78a, 0xc0834d0a344cbe85, 0xbf5a86f73542c789, 0xc0834d0a344cbe85),
][:]
for (x, yt_perfect, yc_perfect, yt_acceptable, yc_acceptable) : inputs