src/libm/k_rem_pio2.c
author Philipp Wiesemann <philipp.wiesemann@arcor.de>
Sun, 28 Dec 2014 22:00:24 +0100
changeset 9301 7377a9a3aed6
parent 8670 0c15c8a2f8c3
child 10650 b6ec7005ca15
permissions -rw-r--r--
Renamed README-emscripten.txt to README-emscripten.md.
slouken@2757
     1
/* @(#)k_rem_pio2.c 5.1 93/09/24 */
slouken@2757
     2
/*
slouken@2757
     3
 * ====================================================
slouken@2757
     4
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
slouken@2757
     5
 *
slouken@2757
     6
 * Developed at SunPro, a Sun Microsystems, Inc. business.
slouken@2757
     7
 * Permission to use, copy, modify, and distribute this
slouken@2757
     8
 * software is freely granted, provided that this notice
slouken@2757
     9
 * is preserved.
slouken@2757
    10
 * ====================================================
slouken@2757
    11
 */
slouken@2757
    12
slouken@2757
    13
#if defined(LIBM_SCCS) && !defined(lint)
slouken@3162
    14
static const char rcsid[] =
slouken@2757
    15
    "$NetBSD: k_rem_pio2.c,v 1.7 1995/05/10 20:46:25 jtc Exp $";
slouken@2757
    16
#endif
slouken@2757
    17
slouken@2757
    18
/*
slouken@2757
    19
 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
slouken@2757
    20
 * double x[],y[]; int e0,nx,prec; int ipio2[];
slouken@2757
    21
 *
slouken@2757
    22
 * __kernel_rem_pio2 return the last three digits of N with
slouken@2757
    23
 *		y = x - N*pi/2
slouken@2757
    24
 * so that |y| < pi/2.
slouken@2757
    25
 *
slouken@2757
    26
 * The method is to compute the integer (mod 8) and fraction parts of
slouken@2757
    27
 * (2/pi)*x without doing the full multiplication. In general we
slouken@2757
    28
 * skip the part of the product that are known to be a huge integer (
slouken@2757
    29
 * more accurately, = 0 mod 8 ). Thus the number of operations are
slouken@2757
    30
 * independent of the exponent of the input.
slouken@2757
    31
 *
slouken@2757
    32
 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
slouken@2757
    33
 *
slouken@2757
    34
 * Input parameters:
slouken@2757
    35
 * 	x[]	The input value (must be positive) is broken into nx
slouken@2757
    36
 *		pieces of 24-bit integers in double precision format.
slouken@2757
    37
 *		x[i] will be the i-th 24 bit of x. The scaled exponent
slouken@2757
    38
 *		of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
slouken@2757
    39
 *		match x's up to 24 bits.
slouken@2757
    40
 *
slouken@2757
    41
 *		Example of breaking a double positive z into x[0]+x[1]+x[2]:
slouken@2757
    42
 *			e0 = ilogb(z)-23
slouken@2757
    43
 *			z  = scalbn(z,-e0)
slouken@2757
    44
 *		for i = 0,1,2
slouken@2757
    45
 *			x[i] = floor(z)
slouken@2757
    46
 *			z    = (z-x[i])*2**24
slouken@2757
    47
 *
slouken@2757
    48
 *
slouken@2757
    49
 *	y[]	ouput result in an array of double precision numbers.
slouken@2757
    50
 *		The dimension of y[] is:
slouken@2757
    51
 *			24-bit  precision	1
slouken@2757
    52
 *			53-bit  precision	2
slouken@2757
    53
 *			64-bit  precision	2
slouken@2757
    54
 *			113-bit precision	3
slouken@2757
    55
 *		The actual value is the sum of them. Thus for 113-bit
slouken@2757
    56
 *		precison, one may have to do something like:
slouken@2757
    57
 *
slouken@2757
    58
 *		long double t,w,r_head, r_tail;
slouken@2757
    59
 *		t = (long double)y[2] + (long double)y[1];
slouken@2757
    60
 *		w = (long double)y[0];
slouken@2757
    61
 *		r_head = t+w;
slouken@2757
    62
 *		r_tail = w - (r_head - t);
slouken@2757
    63
 *
slouken@2757
    64
 *	e0	The exponent of x[0]
slouken@2757
    65
 *
slouken@2757
    66
 *	nx	dimension of x[]
slouken@2757
    67
 *
slouken@2757
    68
 *  	prec	an integer indicating the precision:
slouken@2757
    69
 *			0	24  bits (single)
slouken@2757
    70
 *			1	53  bits (double)
slouken@2757
    71
 *			2	64  bits (extended)
slouken@2757
    72
 *			3	113 bits (quad)
slouken@2757
    73
 *
slouken@2757
    74
 *	ipio2[]
slouken@2757
    75
 *		integer array, contains the (24*i)-th to (24*i+23)-th
slouken@2757
    76
 *		bit of 2/pi after binary point. The corresponding
slouken@2757
    77
 *		floating value is
slouken@2757
    78
 *
slouken@2757
    79
 *			ipio2[i] * 2^(-24(i+1)).
slouken@2757
    80
 *
slouken@2757
    81
 * External function:
slouken@2757
    82
 *	double scalbn(), floor();
slouken@2757
    83
 *
slouken@2757
    84
 *
slouken@2757
    85
 * Here is the description of some local variables:
slouken@2757
    86
 *
slouken@2757
    87
 * 	jk	jk+1 is the initial number of terms of ipio2[] needed
slouken@2757
    88
 *		in the computation. The recommended value is 2,3,4,
slouken@2757
    89
 *		6 for single, double, extended,and quad.
slouken@2757
    90
 *
slouken@2757
    91
 * 	jz	local integer variable indicating the number of
slouken@2757
    92
 *		terms of ipio2[] used.
slouken@2757
    93
 *
slouken@2757
    94
 *	jx	nx - 1
slouken@2757
    95
 *
slouken@2757
    96
 *	jv	index for pointing to the suitable ipio2[] for the
slouken@2757
    97
 *		computation. In general, we want
slouken@2757
    98
 *			( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
slouken@2757
    99
 *		is an integer. Thus
slouken@2757
   100
 *			e0-3-24*jv >= 0 or (e0-3)/24 >= jv
slouken@2757
   101
 *		Hence jv = max(0,(e0-3)/24).
slouken@2757
   102
 *
slouken@2757
   103
 *	jp	jp+1 is the number of terms in PIo2[] needed, jp = jk.
slouken@2757
   104
 *
slouken@2757
   105
 * 	q[]	double array with integral value, representing the
slouken@2757
   106
 *		24-bits chunk of the product of x and 2/pi.
slouken@2757
   107
 *
slouken@2757
   108
 *	q0	the corresponding exponent of q[0]. Note that the
slouken@2757
   109
 *		exponent for q[i] would be q0-24*i.
slouken@2757
   110
 *
slouken@2757
   111
 *	PIo2[]	double precision array, obtained by cutting pi/2
slouken@2757
   112
 *		into 24 bits chunks.
slouken@2757
   113
 *
slouken@2757
   114
 *	f[]	ipio2[] in floating point
slouken@2757
   115
 *
slouken@2757
   116
 *	iq[]	integer array by breaking up q[] in 24-bits chunk.
slouken@2757
   117
 *
slouken@2757
   118
 *	fq[]	final product of x*(2/pi) in fq[0],..,fq[jk]
slouken@2757
   119
 *
slouken@2757
   120
 *	ih	integer. If >0 it indicates q[] is >= 0.5, hence
slouken@2757
   121
 *		it also indicates the *sign* of the result.
slouken@2757
   122
 *
slouken@2757
   123
 */
slouken@2757
   124
slouken@2757
   125
slouken@2757
   126
/*
slouken@2757
   127
 * Constants:
slouken@2757
   128
 * The hexadecimal values are the intended ones for the following
slouken@2757
   129
 * constants. The decimal values may be used, provided that the
slouken@2757
   130
 * compiler will convert from decimal to binary accurately enough
slouken@2757
   131
 * to produce the hexadecimal values shown.
slouken@2757
   132
 */
slouken@2757
   133
slouken@6044
   134
#include "math_libm.h"
slouken@2757
   135
#include "math_private.h"
slouken@2757
   136
icculus@8670
   137
#include "SDL_assert.h"
icculus@8670
   138
slouken@2757
   139
libm_hidden_proto(scalbn)
slouken@2757
   140
    libm_hidden_proto(floor)
slouken@2757
   141
#ifdef __STDC__
slouken@2757
   142
     static const int init_jk[] = { 2, 3, 4, 6 };       /* initial value for jk */
slouken@2757
   143
#else
slouken@2757
   144
     static int init_jk[] = { 2, 3, 4, 6 };
slouken@2757
   145
#endif
slouken@2757
   146
slouken@2757
   147
#ifdef __STDC__
slouken@2757
   148
static const double PIo2[] = {
slouken@2757
   149
#else
slouken@2757
   150
static double PIo2[] = {
slouken@2757
   151
#endif
slouken@2757
   152
    1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
slouken@2757
   153
    7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
slouken@2757
   154
    5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
slouken@2757
   155
    3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
slouken@2757
   156
    1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
slouken@2757
   157
    1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
slouken@2757
   158
    2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
slouken@2757
   159
    2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
slouken@2757
   160
};
slouken@2757
   161
slouken@2757
   162
#ifdef __STDC__
slouken@2757
   163
static const double
slouken@2757
   164
#else
slouken@2757
   165
static double
slouken@2757
   166
#endif
slouken@2757
   167
  zero = 0.0, one = 1.0, two24 = 1.67772160000000000000e+07,    /* 0x41700000, 0x00000000 */
slouken@2757
   168
    twon24 = 5.96046447753906250000e-08;        /* 0x3E700000, 0x00000000 */
slouken@2757
   169
slouken@2757
   170
#ifdef __STDC__
slouken@2757
   171
int attribute_hidden
slouken@2757
   172
__kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
slouken@2757
   173
                  const int32_t * ipio2)
slouken@2757
   174
#else
slouken@2757
   175
int attribute_hidden
slouken@2757
   176
__kernel_rem_pio2(x, y, e0, nx, prec, ipio2)
slouken@2757
   177
     double x[], y[];
slouken@2757
   178
     int e0, nx, prec;
slouken@2757
   179
     int32_t ipio2[];
slouken@2757
   180
#endif
slouken@2757
   181
{
slouken@2757
   182
    int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
slouken@2757
   183
    double z, fw, f[20], fq[20], q[20];
slouken@2757
   184
slouken@2757
   185
    /* initialize jk */
icculus@8670
   186
    SDL_assert((prec >= 0) && (prec < SDL_arraysize(init_jk)));
slouken@2757
   187
    jk = init_jk[prec];
icculus@8670
   188
    SDL_assert((jk >= 2) && (jk <= 6));
slouken@2757
   189
    jp = jk;
slouken@2757
   190
slouken@2757
   191
    /* determine jx,jv,q0, note that 3>q0 */
icculus@8670
   192
    SDL_assert(nx > 0);
slouken@2757
   193
    jx = nx - 1;
slouken@2757
   194
    jv = (e0 - 3) / 24;
slouken@2757
   195
    if (jv < 0)
slouken@2757
   196
        jv = 0;
slouken@2757
   197
    q0 = e0 - 24 * (jv + 1);
slouken@2757
   198
slouken@2757
   199
    /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
slouken@2757
   200
    j = jv - jx;
slouken@2757
   201
    m = jx + jk;
slouken@2757
   202
    for (i = 0; i <= m; i++, j++)
slouken@2757
   203
        f[i] = (j < 0) ? zero : (double) ipio2[j];
slouken@2757
   204
slouken@2757
   205
    /* compute q[0],q[1],...q[jk] */
slouken@2757
   206
    for (i = 0; i <= jk; i++) {
slouken@2757
   207
        for (j = 0, fw = 0.0; j <= jx; j++)
slouken@2757
   208
            fw += x[j] * f[jx + i - j];
slouken@2757
   209
        q[i] = fw;
slouken@2757
   210
    }
slouken@2757
   211
slouken@2757
   212
    jz = jk;
slouken@2757
   213
  recompute:
slouken@2757
   214
    /* distill q[] into iq[] reversingly */
slouken@2757
   215
    for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
slouken@2757
   216
        fw = (double) ((int32_t) (twon24 * z));
slouken@2757
   217
        iq[i] = (int32_t) (z - two24 * fw);
slouken@2757
   218
        z = q[j - 1] + fw;
slouken@2757
   219
    }
slouken@2757
   220
slouken@2757
   221
    /* compute n */
slouken@2757
   222
    z = scalbn(z, q0);          /* actual value of z */
slouken@2757
   223
    z -= 8.0 * floor(z * 0.125);        /* trim off integer >= 8 */
slouken@2757
   224
    n = (int32_t) z;
slouken@2757
   225
    z -= (double) n;
slouken@2757
   226
    ih = 0;
slouken@2757
   227
    if (q0 > 0) {               /* need iq[jz-1] to determine n */
slouken@2757
   228
        i = (iq[jz - 1] >> (24 - q0));
slouken@2757
   229
        n += i;
slouken@2757
   230
        iq[jz - 1] -= i << (24 - q0);
slouken@2757
   231
        ih = iq[jz - 1] >> (23 - q0);
slouken@2757
   232
    } else if (q0 == 0)
slouken@2757
   233
        ih = iq[jz - 1] >> 23;
slouken@2757
   234
    else if (z >= 0.5)
slouken@2757
   235
        ih = 2;
slouken@2757
   236
slouken@2757
   237
    if (ih > 0) {               /* q > 0.5 */
slouken@2757
   238
        n += 1;
slouken@2757
   239
        carry = 0;
slouken@2757
   240
        for (i = 0; i < jz; i++) {      /* compute 1-q */
slouken@2757
   241
            j = iq[i];
slouken@2757
   242
            if (carry == 0) {
slouken@2757
   243
                if (j != 0) {
slouken@2757
   244
                    carry = 1;
slouken@2757
   245
                    iq[i] = 0x1000000 - j;
slouken@2757
   246
                }
slouken@2757
   247
            } else
slouken@2757
   248
                iq[i] = 0xffffff - j;
slouken@2757
   249
        }
slouken@2757
   250
        if (q0 > 0) {           /* rare case: chance is 1 in 12 */
slouken@2757
   251
            switch (q0) {
slouken@2757
   252
            case 1:
slouken@2757
   253
                iq[jz - 1] &= 0x7fffff;
slouken@2757
   254
                break;
slouken@2757
   255
            case 2:
slouken@2757
   256
                iq[jz - 1] &= 0x3fffff;
slouken@2757
   257
                break;
slouken@2757
   258
            }
slouken@2757
   259
        }
slouken@2757
   260
        if (ih == 2) {
slouken@2757
   261
            z = one - z;
slouken@2757
   262
            if (carry != 0)
slouken@2757
   263
                z -= scalbn(one, q0);
slouken@2757
   264
        }
slouken@2757
   265
    }
slouken@2757
   266
slouken@2757
   267
    /* check if recomputation is needed */
slouken@2757
   268
    if (z == zero) {
slouken@2757
   269
        j = 0;
slouken@2757
   270
        for (i = jz - 1; i >= jk; i--)
slouken@2757
   271
            j |= iq[i];
slouken@2757
   272
        if (j == 0) {           /* need recomputation */
slouken@2757
   273
            for (k = 1; iq[jk - k] == 0; k++);  /* k = no. of terms needed */
slouken@2757
   274
slouken@2757
   275
            for (i = jz + 1; i <= jz + k; i++) {        /* add q[jz+1] to q[jz+k] */
slouken@2757
   276
                f[jx + i] = (double) ipio2[jv + i];
slouken@2757
   277
                for (j = 0, fw = 0.0; j <= jx; j++)
slouken@2757
   278
                    fw += x[j] * f[jx + i - j];
slouken@2757
   279
                q[i] = fw;
slouken@2757
   280
            }
slouken@2757
   281
            jz += k;
slouken@2757
   282
            goto recompute;
slouken@2757
   283
        }
slouken@2757
   284
    }
slouken@2757
   285
slouken@2757
   286
    /* chop off zero terms */
slouken@2757
   287
    if (z == 0.0) {
slouken@2757
   288
        jz -= 1;
slouken@2757
   289
        q0 -= 24;
slouken@2757
   290
        while (iq[jz] == 0) {
slouken@2757
   291
            jz--;
slouken@2757
   292
            q0 -= 24;
slouken@2757
   293
        }
slouken@2757
   294
    } else {                    /* break z into 24-bit if necessary */
slouken@2757
   295
        z = scalbn(z, -q0);
slouken@2757
   296
        if (z >= two24) {
slouken@2757
   297
            fw = (double) ((int32_t) (twon24 * z));
slouken@2757
   298
            iq[jz] = (int32_t) (z - two24 * fw);
slouken@2757
   299
            jz += 1;
slouken@2757
   300
            q0 += 24;
slouken@2757
   301
            iq[jz] = (int32_t) fw;
slouken@2757
   302
        } else
slouken@2757
   303
            iq[jz] = (int32_t) z;
slouken@2757
   304
    }
slouken@2757
   305
slouken@2757
   306
    /* convert integer "bit" chunk to floating-point value */
slouken@2757
   307
    fw = scalbn(one, q0);
slouken@2757
   308
    for (i = jz; i >= 0; i--) {
slouken@2757
   309
        q[i] = fw * (double) iq[i];
slouken@2757
   310
        fw *= twon24;
slouken@2757
   311
    }
slouken@2757
   312
slouken@2757
   313
    /* compute PIo2[0,...,jp]*q[jz,...,0] */
slouken@2757
   314
    for (i = jz; i >= 0; i--) {
slouken@2757
   315
        for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
slouken@2757
   316
            fw += PIo2[k] * q[i + k];
slouken@2757
   317
        fq[jz - i] = fw;
slouken@2757
   318
    }
slouken@2757
   319
slouken@2757
   320
    /* compress fq[] into y[] */
slouken@2757
   321
    switch (prec) {
slouken@2757
   322
    case 0:
slouken@2757
   323
        fw = 0.0;
slouken@2757
   324
        for (i = jz; i >= 0; i--)
slouken@2757
   325
            fw += fq[i];
slouken@2757
   326
        y[0] = (ih == 0) ? fw : -fw;
slouken@2757
   327
        break;
slouken@2757
   328
    case 1:
slouken@2757
   329
    case 2:
slouken@2757
   330
        fw = 0.0;
slouken@2757
   331
        for (i = jz; i >= 0; i--)
slouken@2757
   332
            fw += fq[i];
slouken@2757
   333
        y[0] = (ih == 0) ? fw : -fw;
slouken@2757
   334
        fw = fq[0] - fw;
slouken@2757
   335
        for (i = 1; i <= jz; i++)
slouken@2757
   336
            fw += fq[i];
slouken@2757
   337
        y[1] = (ih == 0) ? fw : -fw;
slouken@2757
   338
        break;
slouken@2757
   339
    case 3:                    /* painful */
slouken@2757
   340
        for (i = jz; i > 0; i--) {
slouken@2757
   341
            fw = fq[i - 1] + fq[i];
slouken@2757
   342
            fq[i] += fq[i - 1] - fw;
slouken@2757
   343
            fq[i - 1] = fw;
slouken@2757
   344
        }
slouken@2757
   345
        for (i = jz; i > 1; i--) {
slouken@2757
   346
            fw = fq[i - 1] + fq[i];
slouken@2757
   347
            fq[i] += fq[i - 1] - fw;
slouken@2757
   348
            fq[i - 1] = fw;
slouken@2757
   349
        }
slouken@2757
   350
        for (fw = 0.0, i = jz; i >= 2; i--)
slouken@2757
   351
            fw += fq[i];
slouken@2757
   352
        if (ih == 0) {
slouken@2757
   353
            y[0] = fq[0];
slouken@2757
   354
            y[1] = fq[1];
slouken@2757
   355
            y[2] = fw;
slouken@2757
   356
        } else {
slouken@2757
   357
            y[0] = -fq[0];
slouken@2757
   358
            y[1] = -fq[1];
slouken@2757
   359
            y[2] = -fw;
slouken@2757
   360
        }
slouken@2757
   361
    }
slouken@2757
   362
    return n & 7;
slouken@2757
   363
}