shithub: mc

ref: 0f79e7dd998f35f84ce794cc5adeaad683ac0022
dir: /lib/math/ancillary/generate-triples-for-GB91.c/

View raw version
/* cc -o generate-triples-for-GB91 generate-triples-for-GB91.c -lmpfr # -fno-strict-aliasing */
#include <stdint.h>
#include <stdio.h>
#include <time.h>

#include <mpfr.h>

/*
   Find triples (xi, sin(xi), cos(xi)) very close to machine numbers
   for use in the algorithm of [GB91]. See [Lut95] for a better way
   of calculating these, which we don't follow.

   The idea is that, by good fortune (or more persuasive arguments),
   there exist floating-point numbers xi that are very close to
   numbers of the form (i/N)(π/4). They are so close, in fact, that
   the exact binary expansion of (i/N)(π/4) is xi, followed by a
   bunch of zeroes, then the irrational, unpredictable part.

   Then, when we discretize the input for sin(x) to some xi + delta,
   the precomputed sin(xi) and cos(xi) can be stored as single
   floating-point numbers; the extra zeroes in the infinite-precision
   result mean that we get some precision for free. Compare with
   the constants for log-impl.myr or exp-impl.myr, where the constants
   require more space to be stored.
 */

/* Something something -fno-strict-aliasing */
#define FLT64_TO_UINT64(f) (*((uint64_t *) ((char *) &f)))
#define UINT64_TO_FLT64(u) (*((double *) ((char *) &u)))
#define xmin(a, b) ((a) < (b) ? (a) : (b))
#define xmax(a, b) ((a) > (b) ? (a) : (b))

#define EXP_OF_FLT64(f) (((FLT64_TO_UINT64(f)) >> 52) & 0x7ff)

static int N = 256;

/* Returns >= zero iff successful */
static int find_triple_64(int i, int min_leeway, int perfect_leeway)
{
        /*
           Using mpfr is not entirely overkill for this; [Lut95]
           includes PASCAL fragments that use almost entirely integer
           arithmetic... but the error term in that only handles
           up to 13 extra bits of zeroes or so. We proudly boast
           at least 16 bits of extra zeroes in all cases.
         */
        mpfr_t xi;
        mpfr_t xip1;
        mpfr_t cos;
        mpfr_t sin;
        double xip1_d;
        double t;
        uint64_t sin_u;
        uint64_t cos_u;
        int e1;
        int e2;
        uint64_t xip1_u;
        double xi_initial;
        uint64_t xi_initial_u;
        double xi_current;
        uint64_t xi_current_u;
        long int r = 0;
        long int best_r = 0;
        int sgn = 1;
        int ml = min_leeway;
        int best_l = 0;
        uint64_t best_xi_u;
        uint64_t best_sin_u;
        uint64_t best_cos_u;
        time_t start;
        time_t end;

        start = time(0);
        mpfr_init2(xi, 100);
        mpfr_init2(xip1, 100);
        mpfr_init2(cos, 100);
        mpfr_init2(sin, 100);

        /* start out at xi = πi/(4N) */
        mpfr_const_pi(xi, MPFR_RNDN);
        mpfr_mul_si(xip1, xi, (long int) (i + 1), MPFR_RNDN);
        mpfr_mul_si(xi, xi, (long int) i, MPFR_RNDN);
        mpfr_div_si(xi, xi, (long int) 4 * N, MPFR_RNDN);
        mpfr_div_si(xip1, xip1, (long int) 4 * N, MPFR_RNDN);
        xip1_d = mpfr_get_d(xip1, MPFR_RNDN);
        xip1_u = FLT64_TO_UINT64(xip1_d);
        xi_initial = mpfr_get_d(xi, MPFR_RNDN);
        xi_initial_u = FLT64_TO_UINT64(xi_initial);

        while (1) {
                xi_current_u = xi_initial_u + (sgn * r);
                xi_current = UINT64_TO_FLT64(xi_current_u);
                mpfr_set_d(xi, xi_current, MPFR_RNDN);

                /* Test if cos(xi) has enough zeroes */
                mpfr_cos(cos, xi, MPFR_RNDN);
                t = mpfr_get_d(cos, MPFR_RNDN);
                cos_u = FLT64_TO_UINT64(t);
                e1 = EXP_OF_FLT64(t);
                mpfr_sub_d(cos, cos, t, MPFR_RNDN);
                t = mpfr_get_d(cos, MPFR_RNDN);
                e2 = EXP_OF_FLT64(t);

                if (e2 == -1024) {

                        /* Damn; this is too close to a subnormal. i = 0 or N? */
                        return -1;
                }

                if (e1 - e2 < (52 + min_leeway)) {
                        goto inc;
                }

                ml = xmax(min_leeway, e1 - e2 - 52);

                /* Test if sin(xi) has enough zeroes */
                mpfr_sin(sin, xi, MPFR_RNDN);
                t = mpfr_get_d(sin, MPFR_RNDN);
                sin_u = FLT64_TO_UINT64(t);
                e1 = EXP_OF_FLT64(t);
                mpfr_sub_d(sin, sin, t, MPFR_RNDN);
                t = mpfr_get_d(sin, MPFR_RNDN);
                e2 = EXP_OF_FLT64(t);

                if (e2 == -1024) {

                        /* Damn; this is too close to a subnormal. i = 0 or N? */
                        return -1;
                }

                if (e1 - e2 < (52 + min_leeway)) {
                        goto inc;
                }

                ml = xmin(ml, e1 - e2 - 52);

                /* Hurrah, this is valid */
                if (ml > best_l) {
                        best_l = ml;
                        best_xi_u = xi_current_u;
                        best_cos_u = cos_u;
                        best_sin_u = sin_u;
                        best_r = sgn * r;

                        /* If this is super-good, don't bother finding more */
                        if (best_l >= perfect_leeway) {
                                break;
                        }
                }

inc:

                /* Increment */
                sgn *= -1;

                if (sgn < 0) {
                        r++;
                } else if (r > (1 << 29) ||
                           xi_current_u > xip1_u) {
                        /*
                           This is taking too long, give up looking
                           for perfection and take the best we've
                           got. A sweep of 1 << 28 finishes in ~60
                           hrs on my personal machine as I write
                           this.
                         */
                        break;
                }
        }

        end = time(0);

        if (best_l > min_leeway) {
                printf(
                        "(%#018lx, %#018lx, %#018lx), /* i = %03d, l = %02d, r = %010ld, t = %ld */ \n",
                        best_xi_u, best_cos_u, best_sin_u, i, best_l, best_r,
                        end -
                        start);

                return 0;
        } else {
                return -1;
        }
}

int main(void)
{
        for (int i = 190; i <= N; ++i) {
                if (find_triple_64(i, 11, 20) < 0) {
                        /*
                           For some reason, i = 190 requires special
                           handling; drop the range limitations on
                           r and come back in a week.

                           Other indices (4, 47, 74, &c) also benefit
                           from this special handling, so the
                           contents of sin-impl.myr might not exactly
                           match the output of this particular file.
                         */
                        printf("CANNOT FIND SUITABLE CANDIDATE FOR i = %03d\n",
                               i);
                }
        }

        return 0;
}