shithub: mc

Download patch

ref: 5a95abf7cc557ebccb3aa80c8ce9679eb82dcaa1
parent: 4aa0e6c6b8a1a23b144b0382d6156aaa55b2c7fd
author: S. Gilles <sgilles@math.umd.edu>
date: Thu Jun 21 11:23:23 EDT 2018

Add ancillary programs for libmath.

Some constants in libmath come from papers, but some come from long
computations. Those computations should be reproducible, so that
10 years from now someone can answer the questions "Why are they
using 0x3f6921fb53cc441b?" and "Would 0x3f6921fb42e71072 be better?".

--- /dev/null
+++ b/lib/math/ancillary/KM06-calc.gp
@@ -1,0 +1,37 @@
+# Implementations of some functions from [KM06]. The exact ranges
+# were applied by manual fiddling.
+
+{ betap(amin, amax, p, n) = my(l, s);
+        l = amax^(1/p);
+        s = amin^(1/p);
+        return(polroots(l^(1 - 1/(2^(n-1))) * (3/s - (x - s) * (p+1)/(s^2)) * (x - s)^2 - s^(1 - 1/(2^(n-1))) * (3/l - (x - l) * (p+1)/(l^2)) * (x - l)^2));
+}
+
+{ betam(amin, amax, p, n) = my(l, s);
+        l = amax^(1/p);
+        s = amin^(1/p);
+        return(polroots(l^(1 - 1/(2^(n-1))) * (3/s - (x - s) * (p+1)/(s^2)) * (x - s)^2 + s^(1 - 1/(2^(n-1))) * (3/l - (x - l) * (p+1)/(l^2)) * (x - l)^2));
+}
+
+{ beta(amin, amax, p, n) = my(plus, minus, alsmaller, allarger, l, r);
+        plus = betap(amin, amax, p, n);
+        minus = betam(amin, amax, p, n);
+        alsmaller = min(amax^(1/p), amin^(1/p));
+        allarger = max(amax^(1/p), amin^(1/p));
+        l = List();
+        for(i=1, length(plus), if(imag(plus[i]) < 0.001 && imag(plus[i]) > -0.001, listput(l, real(plus[i])), ));
+        for(i=1, length(minus), if(imag(minus[i]) < 0.001 && imag(minus[i]) > -0.001, listput(l, real(minus[i])), ));
+        r=0.0;
+        for(i=1, length(l), if(l[i] <= allarger && l[i] >= alsmaller, r=l[i], ));
+        return(r);
+}
+
+{ maxerr(amin, amax, p, n) = my(x1, x2, e1, e2);
+        x1 = List([beta(amin, amax, p, n)]);
+        x2 = List([beta(amin, amax, p, n)]);
+        for(i=1, n, listput(x1, x1[i]/p * (p - 1 + amin * x1[i]^(-p))));
+        for(i=1, n, listput(x2, x2[i]/p * (p - 1 + amax * x2[i]^(-p))));
+        e1 = abs(amin^(1/p) - x1[n + 1]);
+        e2 = abs(amax^(1/p) - x2[n + 1]);
+        return(max(e1, e2));
+}
--- /dev/null
+++ b/lib/math/ancillary/README
@@ -1,0 +1,4 @@
+The files in this directory are only useful for understanding
+derivation of constants. If you wish to improve a file like
+sqrt-impl.myr, and are curious as to how certain constants were
+derived, this might hold some answers.
--- /dev/null
+++ b/lib/math/ancillary/generate-triples-for-GB91.c
@@ -1,0 +1,193 @@
+/* cc -o generate-triples-for-GB91 generate-triples-for-GB91.c -lmpfr */
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <mpfr.h>
+
+/*
+   Find triples (xi, sin(xi), cos(xi)) very close to machine numbers
+   for use in the algorithm of [GB91]. See [Lut95] for a better way
+   of calculating these, which we don't follow.
+
+   The idea is that, by good fortune (or more persuasive arguments),
+   there exist floating-point numbers xi that are very close to
+   numbers of the form (i/N)(π/4). They are so close, in fact, that
+   the exact binary expansion of (i/N)(π/4) is xi, followed by a
+   bunch of zeroes, then the irrational, unpredictable part.
+
+   Then, when we discretize the input for sin(x) to some xi + delta,
+   the precomputed sin(xi) and cos(xi) can be stored as single
+   floating-point numbers; the extra zeroes in the infinite-precision
+   result mean that we get some precision for free. Compare with
+   the constants for log-impl.myr or exp-impl.myr, where the constants
+   require more space to be stored.
+ */
+
+/* Something something -fno-strict-aliasing */
+#define FLT64_TO_UINT64(f) (*((uint64_t *) ((char *) &f)))
+#define UINT64_TO_FLT64(u) (*((double *) ((char *) &u)))
+#define xmin(a, b) ((a) < (b) ? (a) : (b))
+#define xmax(a, b) ((a) > (b) ? (a) : (b))
+
+#define EXP_OF_FLT64(f) (((FLT64_TO_UINT64(f)) >> 52) & 0x7ff)
+
+static int N = 256;
+
+/* Returns >= zero iff successful */
+static int find_triple_64(int i, int min_leeway, int perfect_leeway)
+{
+        /*
+           Using mpfr is entirely overkill for this; [Lut95] includes
+           PASCAL fragments that use almost entirely integer
+           arithmetic, and the initial calculation doesn't need to
+           be so precise. No matter.
+         */
+        mpfr_t xi;
+        mpfr_t xip1;
+        mpfr_t cos;
+        mpfr_t sin;
+        double xip1_d;
+        double t;
+        uint64_t sin_u;
+        uint64_t cos_u;
+        int e1;
+        int e2;
+        uint64_t xip1_u;
+        double xi_initial;
+        uint64_t xi_initial_u;
+        double xi_current;
+        uint64_t xi_current_u;
+        long int r = 0;
+        long int best_r = 0;
+        int sgn = 1;
+        int ml = min_leeway;
+        int best_l = 0;
+        uint64_t best_xi_u;
+        uint64_t best_sin_u;
+        uint64_t best_cos_u;
+        time_t start;
+        time_t end;
+
+        start = time(0);
+        mpfr_init2(xi, 100);
+        mpfr_init2(xip1, 100);
+        mpfr_init2(cos, 100);
+        mpfr_init2(sin, 100);
+
+        /* start out at xi = πi/(4N) */
+        mpfr_const_pi(xi, MPFR_RNDN);
+        mpfr_mul_si(xip1, xi, (long int) (i + 1), MPFR_RNDN);
+        mpfr_mul_si(xi, xi, (long int) i, MPFR_RNDN);
+        mpfr_div_si(xi, xi, (long int) 4 * N, MPFR_RNDN);
+        mpfr_div_si(xip1, xip1, (long int) 4 * N, MPFR_RNDN);
+        xip1_d = mpfr_get_d(xip1, MPFR_RNDN);
+        xip1_u = FLT64_TO_UINT64(xip1_d);
+        xi_initial = mpfr_get_d(xi, MPFR_RNDN);
+        xi_initial_u = FLT64_TO_UINT64(xi_initial);
+
+        while (1) {
+                xi_current_u = xi_initial_u + (sgn * r);
+                xi_current = UINT64_TO_FLT64(xi_current_u);
+                mpfr_set_d(xi, xi_current, MPFR_RNDN);
+
+                /* Test if cos(xi) has enough zeroes */
+                mpfr_cos(cos, xi, MPFR_RNDN);
+                t = mpfr_get_d(cos, MPFR_RNDN);
+                cos_u = FLT64_TO_UINT64(t);
+                e1 = EXP_OF_FLT64(t);
+                mpfr_sub_d(cos, cos, t, MPFR_RNDN);
+                t = mpfr_get_d(cos, MPFR_RNDN);
+                e2 = EXP_OF_FLT64(t);
+
+                if (e2 == -1024) {
+
+                        /* Damn; this is too close to a subnormal. i = 0 or N? */
+                        return -1;
+                }
+
+                if (e1 - e2 < (52 + min_leeway)) {
+                        goto inc;
+                }
+
+                ml = xmax(min_leeway, e1 - e2 - 52);
+
+                /* Test if sin(xi) has enough zeroes */
+                mpfr_sin(sin, xi, MPFR_RNDN);
+                t = mpfr_get_d(sin, MPFR_RNDN);
+                sin_u = FLT64_TO_UINT64(t);
+                e1 = EXP_OF_FLT64(t);
+                mpfr_sub_d(sin, sin, t, MPFR_RNDN);
+                t = mpfr_get_d(sin, MPFR_RNDN);
+                e2 = EXP_OF_FLT64(t);
+
+                if (e2 == -1024) {
+
+                        /* Damn; this is too close to a subnormal. i = 0 or N? */
+                        return -1;
+                }
+
+                if (e1 - e2 < (52 + min_leeway)) {
+                        goto inc;
+                }
+
+                ml = xmin(ml, e1 - e2 - 52);
+
+                /* Hurrah, this is valid */
+                if (ml > best_l) {
+                        best_l = ml;
+                        best_xi_u = xi_current_u;
+                        best_cos_u = cos_u;
+                        best_sin_u = sin_u;
+                        best_r = sgn * r;
+
+                        /* If this is super-good, don't bother finding more */
+                        if (best_l >= perfect_leeway) {
+                                break;
+                        }
+                }
+
+inc:
+
+                /* Increment */
+                sgn *= -1;
+
+                if (sgn < 0) {
+                        r++;
+                } else if (r > (1 << 29) || xi_current_u > xip1_u) {
+                        /*
+                           This is taking too long, give up looking
+                           for perfection and take the best we've
+                           got. A sweep of 1 << 28 finishes in ~60
+                           hrs on my personal machine as I write
+                           this.
+                         */
+                        break;
+                }
+        }
+
+        end = time(0);
+
+        if (best_l > min_leeway) {
+                printf(
+                        "[ .a = %#018lx, .c = %#018lx, .s = %#018lx ], /* i = %03d, l = %02d, r = %010ld, t = %ld */ \n",
+                        best_xi_u, best_cos_u, best_sin_u, i, best_l, best_r,
+                        end -
+                        start);
+
+                return 0;
+        } else {
+                return -1;
+        }
+}
+
+int main(void)
+{
+        for (int i = 190; i <= N; ++i) {
+                if (find_triple_64(i, 11, 20) < 0) {
+                        printf("CANNOT FIND SUITABLE CANDIDATE FOR i = %03d\n", i);
+                }
+        }
+
+        return 0;
+}
--- a/lib/math/references
+++ b/lib/math/references
@@ -1,5 +1,11 @@
 References
 
+[GB91]
+Shmuel Gal and Boris Bachelis. “An accurate elementary mathematical
+library for the IEEE floating point standard”. In: ACM Trans. Math.
+Software 17.1 (1991), pp. 26–45. issn: 0098- 3500. doi:
+10.1145/103147.103151. url: https://doi.acm.org/10.1145/103147.103151.
+
 [KM06]
 Peter Kornerup and Jean-Michel Muller. “Choosing starting values
 for certain Newton–Raphson iterations”. In: Theoretical Computer
@@ -6,6 +12,11 @@
 Science 351 (1 2006), pp. 101–110. doi:
 https://doi.org/10.1016/j.tcs.2005.09.056.
 
+[Lut95]
+Wolfram Luther. “Highly accurate tables for elementary functions”.
+In: BIT Numerical Mathematics 35.3 (Sept. 1995), pp. 352–360. doi:
+10.1007/BF01732609. url: https://doi.org/10.1007/BF01732609.
+
 [Mar00]
 Peter Markstein. IA-64 and elementary functions : speed and precision.
 Upper Saddle River, NJ: Prentice Hall, 2000. isbn: 9780130183484.
@@ -21,17 +32,17 @@
 [Tan89]
 Ping-Tak Peter Tang. “Table-driven Implementation of the Exponential
 Function in IEEE Floating-point Arithmetic”. In: ACM Trans. Math.
-Softw. 15.2 (June 1989), pp. 144–157. issn: 0098-3500.  doi:
+Softw. 15.2 (June 1989), pp. 144–157. issn: 0098-3500. doi:
 10.1145/63522.214389. url: http://doi.acm.org/10.1145/63522.214389.
 
 [Tan90]
 Ping-Tak Peter Tang. “Table-driven Implementation of the Logarithm
 Function in IEEE Floating-point Arithmetic”. In: ACM Trans. Math.
-Softw. 16.4 (Dec. 1990), pp. 378–400. issn: 0098-3500.  doi:
+Softw. 16.4 (Dec. 1990), pp. 378–400. issn: 0098-3500. doi:
 10.1145/98267.98294. url: http://doi.acm.org/10.1145/98267.98294.
 
 [Tan92]
 Ping Tak Peter Tang. “Table-driven Implementation of the Expm1
 Function in IEEE Floating-point Arithmetic”. In: ACM Trans. Math.
-Softw. 18.2 (June 1992), pp. 211–222. issn: 0098-3500.  doi:
+Softw. 18.2 (June 1992), pp. 211–222. issn: 0098-3500. doi:
 10.1145/146847.146928. url: http://doi.acm.org/10.1145/146847.146928.