shithub: mc

Download patch

ref: 43a9edcd25ed37e9f043caa8498261af39cab081
parent: 1019b884273ecfba87c33205a50084981820376f
author: S. Gilles <sgilles@math.umd.edu>
date: Sun Jul 22 16:30:34 EDT 2018

Correct typo in cotangent calculations.

--- a/lib/math/ancillary/generate-minimax-by-Remez.gp
+++ b/lib/math/ancillary/generate-minimax-by-Remez.gp
@@ -156,4 +156,10 @@
 print("\n");
 print("---\n");
 print("\n");
+print("Minmimaxing tan(x) / x, degree 10, on [0, 15.5/256]:");
+find_minimax(tanoverx, 10, 0, 15.5/256)
+print("\n");
+print("(You'll need to add a 0x0 at the beginning to make a degree 11...\n");
+print("\n");
+print("---\n");
 print("Remember that there's that extra, ugly E term at the end of the vector that you want to lop off.\n");
--- a/lib/math/tan-impl.myr
+++ b/lib/math/tan-impl.myr
@@ -48,6 +48,24 @@
 ]
 
 /*
+   Coefficients for a minimax polynomial approximating p(x), with
+   tan(x) = x*p(x^2), expanding out to a degree 11 polynomial for
+   tan(x). This is slower than the tan_coeff version and overkill
+   for the typical range of delta.
+ */
+const tan_coeff_good : uint64[6] = [
+	0x3ff0000000000000,
+	0x3fd5555555555557,
+	0x3fc1111111124b1b,
+	0x3faba1ba5c206e2d,
+	0x3f96660414be66b8,
+	0x3f829ece970a9ba2,
+]
+
+/* Split 21 zeros out, for special cot() computation */
+const split_mask : uint64 = 0xffffffffffffffff << 21
+
+/*
    The Highly Accurate Tables for use in a [GB91]-type algorithm;
    generated by ancillary/generate-triples-for-GB91.c.
 
@@ -56,48 +74,6 @@
  */
 const C : (uint64, uint64, uint64)[257] = [
 	/*       xi	       cot(xi)	    tan(xi)      */
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 	(0x0000000000000000, 0x7ff0000000000000, 0x0000000000000000),
 	(0x3f6921fb43c5e5fb, 0x40745f2c4ad38437, 0x3f6922006eb65bc4),
 	(0x3f7921fb69404898, 0x40645f1f9b724978, 0x3f7922101511c956),
@@ -433,6 +409,32 @@
 	(delta2, _) = fast2sum(deltat, x2)
 	var p1, p2
 
+	/*
+	  Since cot() can blow up close to 0, just fall back to polynomial approximation
+	 */
+	if x1 < 0.060546875
+		var s = x1 * x1
+		p1 = x1 * horner_polyu(s, tan_coeff_good[:])
+		p2 = x2 + 3.0*s*x2
+
+		if want_tan
+			(ret1, ret2) = fast2sum(p1, p2)
+			goto have_result
+		;;
+
+		(p1, p2) = fast2sum(p1, p2)
+
+		var f = std.flt64frombits(std.flt64bits(p1) & split_mask)
+		var g = (p1 - f) + p2
+
+		var u0 = 1.0/f
+		var u1 = std.flt64frombits(std.flt64bits(u0) & split_mask)
+		var u2 = u0 - u1
+
+		(ret1, ret2) = fast2sum(u0 - u0*u0*g, u0*((1.0 - u1*f) - u2*f))
+		goto have_result
+	;;
+
 	if want_tan
 		(p1, p2) = ptan(delta1, delta2)
 		var num = cot + tan
@@ -444,7 +446,7 @@
 		(q1, q2) = fast2sum(q1, q2)
 		(ret1, ret2) = fast2sum(q1, q2 + q3)
 	else
-		(p1, p2) = pcot(delta1, delta2)
+		(p1, p2) = ptan(delta1, delta2)
 		var num = cot + tan
 		var den = (tan + p1) + p2
 		var f = num/den
@@ -469,7 +471,7 @@
 	var s : flt64 = x1 * x1
 	var p : flt64 = horner_polyu(s, tan_coeff[:])
 	var r1, r2
-	(r1, r2) = two_by_two(s, x1)
+	(r1, r2) = two_by_two(p, x1)
 	-> fast2sum(r1, r2 + x2)
 }
 
--- a/lib/math/test/tan-impl.myr
+++ b/lib/math/test/tan-impl.myr
@@ -40,6 +40,7 @@
 const tancot01 = {c
 	var inputs : (uint32, uint32, uint32)[:] = [
 		(0x00000000, 0x00000000, 0x7f800000),
+		(0x01000000, 0x01000000, 0x7e000000),
 	][:]
 
 	for (x, yt, yc) : inputs
@@ -64,10 +65,14 @@
 const tancot02 = {c
 	var inputs : (uint64, uint64, uint64)[:] = [
 		(0x0000000000000000, 0x0000000000000000, 0x7ff0000000000000),
-		(0x41bb951f1572eba5, 0xbc8f54f5227a4e84, 0x3ff0000000000000), /* [GB91]'s "Xhard" */
+		(0x5101000000000000, 0xbff4f77bbc53c8f9, 0xbfe86b6d64c43ec0),
+		(0x4b01000000000000, 0xbfe96f60bbc6c837, 0xbff421332f057cb5),
+		(0x41bb951f1572eba5, 0xbc8f54f5227a4e84, 0xc35057584c429b3a), /* [GB91]'s "Xhard" */
 	][:]
 
+var n = 0
 	for (x, yt, yc) : inputs
+n++
 		var xf : flt64 = std.flt64frombits(x)
 		var rtf, rcf
 		rtf = math.tan(xf)
@@ -81,7 +86,7 @@
 			x, yt, rtu)
 
 		testr.check(c, same64(rcu, yc),
-			"cot(0x{b=16,w=16,p=0}) should be (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}), was (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0})",
+			"cot(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
 			x, yc, rcu)
 	;;
 }
@@ -88,9 +93,7 @@
 
 const tancot03 = {c
 	var inputs : (uint64, uint64, uint64, uint64, uint64)[:] = [
-		(0x5101000000000000, 0x3fe9706123d509f1, 0xbfe369af9695aba1, 0x3fe9706123d509f0, 0xbfe369af9695aba0),
-		(0xf83b13a6a142b6d5, 0xbf5a86f4edeb02f2, 0x3feffffd404efc20, 0xbf5a86f4edeb02f1, 0x3feffffd404efc20),
-		(0x4b01000000000000, 0xbfe3e9527dc75f12, 0x3fe90cf80997c963, 0xbfe3e9527dc75f13, 0x3fe90cf80997c964),
+		(0xf83b13a6a142b6d5, 0xbf5a86f73542c78a, 0xc0834d0a344cbe85, 0xbf5a86f73542c789, 0xc0834d0a344cbe85),
 	][:]
 
 	for (x, yt_perfect, yc_perfect, yt_acceptable, yc_acceptable) : inputs