shithub: mc

--- a/lib/math/ancillary/generate-minimax-by-Remez.gp

+++ b/lib/math/ancillary/generate-minimax-by-Remez.gp

@@ -156,4 +156,10 @@

 print("\n");

 print("---\n");

 print("\n");

+print("Minmimaxing tan(x) / x, degree 10, on [0, 15.5/256]:");

+find_minimax(tanoverx, 10, 0, 15.5/256)

+print("\n");

+print("(You'll need to add a 0x0 at the beginning to make a degree 11...\n");

+print("\n");

+print("---\n");

 print("Remember that there's that extra, ugly E term at the end of the vector that you want to lop off.\n");

--- a/lib/math/tan-impl.myr

+++ b/lib/math/tan-impl.myr

@@ -48,6 +48,24 @@

/*

+   Coefficients for a minimax polynomial approximating p(x), with

+   tan(x) = x*p(x^2), expanding out to a degree 11 polynomial for

+   tan(x). This is slower than the tan_coeff version and overkill

+   for the typical range of delta.

+ */

+const tan_coeff_good : uint64[6] = [

+	0x3ff0000000000000,

+	0x3fd5555555555557,

+	0x3fc1111111124b1b,

+	0x3faba1ba5c206e2d,

+	0x3f96660414be66b8,

+	0x3f829ece970a9ba2,

+]

+/* Split 21 zeros out, for special cot() computation */

+const split_mask : uint64 = 0xffffffffffffffff << 21

+/*

    The Highly Accurate Tables for use in a [GB91]-type algorithm;

    generated by ancillary/generate-triples-for-GB91.c.

@@ -56,48 +74,6 @@

*/

 const C : (uint64, uint64, uint64)[257] = [

 	/*       xi	       cot(xi)	    tan(xi)      */

 	(0x0000000000000000, 0x7ff0000000000000, 0x0000000000000000),

 	(0x3f6921fb43c5e5fb, 0x40745f2c4ad38437, 0x3f6922006eb65bc4),

 	(0x3f7921fb69404898, 0x40645f1f9b724978, 0x3f7922101511c956),

@@ -433,6 +409,32 @@

 	(delta2, _) = fast2sum(deltat, x2)

 	var p1, p2

+	/*

+	  Since cot() can blow up close to 0, just fall back to polynomial approximation

+	 */

+	if x1 < 0.060546875

+		var s = x1 * x1

+		p1 = x1 * horner_polyu(s, tan_coeff_good[:])

+		p2 = x2 + 3.0*s*x2

+		if want_tan

+			(ret1, ret2) = fast2sum(p1, p2)

+			goto have_result

+		;;

+		(p1, p2) = fast2sum(p1, p2)

+		var f = std.flt64frombits(std.flt64bits(p1) & split_mask)

+		var g = (p1 - f) + p2

+		var u0 = 1.0/f

+		var u1 = std.flt64frombits(std.flt64bits(u0) & split_mask)

+		var u2 = u0 - u1

+		(ret1, ret2) = fast2sum(u0 - u0*u0*g, u0*((1.0 - u1*f) - u2*f))

+		goto have_result

+	;;

 	if want_tan

 		(p1, p2) = ptan(delta1, delta2)

 		var num = cot + tan

@@ -444,7 +446,7 @@

 		(q1, q2) = fast2sum(q1, q2)

 		(ret1, ret2) = fast2sum(q1, q2 + q3)

 	else

-		(p1, p2) = pcot(delta1, delta2)

+		(p1, p2) = ptan(delta1, delta2)

 		var num = cot + tan

 		var den = (tan + p1) + p2

 		var f = num/den

@@ -469,7 +471,7 @@

 	var s : flt64 = x1 * x1

 	var p : flt64 = horner_polyu(s, tan_coeff[:])

 	var r1, r2

-	(r1, r2) = two_by_two(s, x1)

+	(r1, r2) = two_by_two(p, x1)

 	-> fast2sum(r1, r2 + x2)

--- a/lib/math/test/tan-impl.myr

+++ b/lib/math/test/tan-impl.myr

@@ -40,6 +40,7 @@

 const tancot01 = {c

 	var inputs : (uint32, uint32, uint32)[:] = [

 		(0x00000000, 0x00000000, 0x7f800000),

+		(0x01000000, 0x01000000, 0x7e000000),

 	][:]

 	for (x, yt, yc) : inputs

@@ -64,10 +65,14 @@

 const tancot02 = {c

 	var inputs : (uint64, uint64, uint64)[:] = [

 		(0x0000000000000000, 0x0000000000000000, 0x7ff0000000000000),

-		(0x41bb951f1572eba5, 0xbc8f54f5227a4e84, 0x3ff0000000000000), /* [GB91]'s "Xhard" */

+		(0x5101000000000000, 0xbff4f77bbc53c8f9, 0xbfe86b6d64c43ec0),

+		(0x4b01000000000000, 0xbfe96f60bbc6c837, 0xbff421332f057cb5),

+		(0x41bb951f1572eba5, 0xbc8f54f5227a4e84, 0xc35057584c429b3a), /* [GB91]'s "Xhard" */

 	][:]

+var n = 0

 	for (x, yt, yc) : inputs

+n++

 		var xf : flt64 = std.flt64frombits(x)

 		var rtf, rcf

 		rtf = math.tan(xf)

@@ -81,7 +86,7 @@

 			x, yt, rtu)

 		testr.check(c, same64(rcu, yc),

-			"cot(0x{b=16,w=16,p=0}) should be (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0}), was (0x{b=16,w=16,p=0}, 0x{b=16,w=16,p=0})",

+			"cot(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",

 			x, yc, rcu)

;;

@@ -88,9 +93,7 @@

 const tancot03 = {c

 	var inputs : (uint64, uint64, uint64, uint64, uint64)[:] = [

-		(0x5101000000000000, 0x3fe9706123d509f1, 0xbfe369af9695aba1, 0x3fe9706123d509f0, 0xbfe369af9695aba0),

-		(0xf83b13a6a142b6d5, 0xbf5a86f4edeb02f2, 0x3feffffd404efc20, 0xbf5a86f4edeb02f1, 0x3feffffd404efc20),

-		(0x4b01000000000000, 0xbfe3e9527dc75f12, 0x3fe90cf80997c963, 0xbfe3e9527dc75f13, 0x3fe90cf80997c964),

+		(0xf83b13a6a142b6d5, 0xbf5a86f73542c78a, 0xc0834d0a344cbe85, 0xbf5a86f73542c789, 0xc0834d0a344cbe85),

 	][:]

 	for (x, yt_perfect, yc_perfect, yt_acceptable, yc_acceptable) : inputs

--

⑨