src/libm/k_rem_pio2.c
author Ryan C. Gordon <icculus@icculus.org>
Sun, 23 Mar 2014 18:56:47 -0400
changeset 8670 0c15c8a2f8c3
parent 6044 35448a5ea044
child 10650 b6ec7005ca15
permissions -rw-r--r--
Tossed in some SDL_asserts to make static analyzer happier.

(Most of its complaints here are that these ints can be negative, although
they wouldn't been in sane cases. Checking sanity is what assertions do, and
it placates the analyzer appropriately.)
     1 /* @(#)k_rem_pio2.c 5.1 93/09/24 */
     2 /*
     3  * ====================================================
     4  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     5  *
     6  * Developed at SunPro, a Sun Microsystems, Inc. business.
     7  * Permission to use, copy, modify, and distribute this
     8  * software is freely granted, provided that this notice
     9  * is preserved.
    10  * ====================================================
    11  */
    12 
    13 #if defined(LIBM_SCCS) && !defined(lint)
    14 static const char rcsid[] =
    15     "$NetBSD: k_rem_pio2.c,v 1.7 1995/05/10 20:46:25 jtc Exp $";
    16 #endif
    17 
    18 /*
    19  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
    20  * double x[],y[]; int e0,nx,prec; int ipio2[];
    21  *
    22  * __kernel_rem_pio2 return the last three digits of N with
    23  *		y = x - N*pi/2
    24  * so that |y| < pi/2.
    25  *
    26  * The method is to compute the integer (mod 8) and fraction parts of
    27  * (2/pi)*x without doing the full multiplication. In general we
    28  * skip the part of the product that are known to be a huge integer (
    29  * more accurately, = 0 mod 8 ). Thus the number of operations are
    30  * independent of the exponent of the input.
    31  *
    32  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
    33  *
    34  * Input parameters:
    35  * 	x[]	The input value (must be positive) is broken into nx
    36  *		pieces of 24-bit integers in double precision format.
    37  *		x[i] will be the i-th 24 bit of x. The scaled exponent
    38  *		of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
    39  *		match x's up to 24 bits.
    40  *
    41  *		Example of breaking a double positive z into x[0]+x[1]+x[2]:
    42  *			e0 = ilogb(z)-23
    43  *			z  = scalbn(z,-e0)
    44  *		for i = 0,1,2
    45  *			x[i] = floor(z)
    46  *			z    = (z-x[i])*2**24
    47  *
    48  *
    49  *	y[]	ouput result in an array of double precision numbers.
    50  *		The dimension of y[] is:
    51  *			24-bit  precision	1
    52  *			53-bit  precision	2
    53  *			64-bit  precision	2
    54  *			113-bit precision	3
    55  *		The actual value is the sum of them. Thus for 113-bit
    56  *		precison, one may have to do something like:
    57  *
    58  *		long double t,w,r_head, r_tail;
    59  *		t = (long double)y[2] + (long double)y[1];
    60  *		w = (long double)y[0];
    61  *		r_head = t+w;
    62  *		r_tail = w - (r_head - t);
    63  *
    64  *	e0	The exponent of x[0]
    65  *
    66  *	nx	dimension of x[]
    67  *
    68  *  	prec	an integer indicating the precision:
    69  *			0	24  bits (single)
    70  *			1	53  bits (double)
    71  *			2	64  bits (extended)
    72  *			3	113 bits (quad)
    73  *
    74  *	ipio2[]
    75  *		integer array, contains the (24*i)-th to (24*i+23)-th
    76  *		bit of 2/pi after binary point. The corresponding
    77  *		floating value is
    78  *
    79  *			ipio2[i] * 2^(-24(i+1)).
    80  *
    81  * External function:
    82  *	double scalbn(), floor();
    83  *
    84  *
    85  * Here is the description of some local variables:
    86  *
    87  * 	jk	jk+1 is the initial number of terms of ipio2[] needed
    88  *		in the computation. The recommended value is 2,3,4,
    89  *		6 for single, double, extended,and quad.
    90  *
    91  * 	jz	local integer variable indicating the number of
    92  *		terms of ipio2[] used.
    93  *
    94  *	jx	nx - 1
    95  *
    96  *	jv	index for pointing to the suitable ipio2[] for the
    97  *		computation. In general, we want
    98  *			( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
    99  *		is an integer. Thus
   100  *			e0-3-24*jv >= 0 or (e0-3)/24 >= jv
   101  *		Hence jv = max(0,(e0-3)/24).
   102  *
   103  *	jp	jp+1 is the number of terms in PIo2[] needed, jp = jk.
   104  *
   105  * 	q[]	double array with integral value, representing the
   106  *		24-bits chunk of the product of x and 2/pi.
   107  *
   108  *	q0	the corresponding exponent of q[0]. Note that the
   109  *		exponent for q[i] would be q0-24*i.
   110  *
   111  *	PIo2[]	double precision array, obtained by cutting pi/2
   112  *		into 24 bits chunks.
   113  *
   114  *	f[]	ipio2[] in floating point
   115  *
   116  *	iq[]	integer array by breaking up q[] in 24-bits chunk.
   117  *
   118  *	fq[]	final product of x*(2/pi) in fq[0],..,fq[jk]
   119  *
   120  *	ih	integer. If >0 it indicates q[] is >= 0.5, hence
   121  *		it also indicates the *sign* of the result.
   122  *
   123  */
   124 
   125 
   126 /*
   127  * Constants:
   128  * The hexadecimal values are the intended ones for the following
   129  * constants. The decimal values may be used, provided that the
   130  * compiler will convert from decimal to binary accurately enough
   131  * to produce the hexadecimal values shown.
   132  */
   133 
   134 #include "math_libm.h"
   135 #include "math_private.h"
   136 
   137 #include "SDL_assert.h"
   138 
   139 libm_hidden_proto(scalbn)
   140     libm_hidden_proto(floor)
   141 #ifdef __STDC__
   142      static const int init_jk[] = { 2, 3, 4, 6 };       /* initial value for jk */
   143 #else
   144      static int init_jk[] = { 2, 3, 4, 6 };
   145 #endif
   146 
   147 #ifdef __STDC__
   148 static const double PIo2[] = {
   149 #else
   150 static double PIo2[] = {
   151 #endif
   152     1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
   153     7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
   154     5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
   155     3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
   156     1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
   157     1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
   158     2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
   159     2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
   160 };
   161 
   162 #ifdef __STDC__
   163 static const double
   164 #else
   165 static double
   166 #endif
   167   zero = 0.0, one = 1.0, two24 = 1.67772160000000000000e+07,    /* 0x41700000, 0x00000000 */
   168     twon24 = 5.96046447753906250000e-08;        /* 0x3E700000, 0x00000000 */
   169 
   170 #ifdef __STDC__
   171 int attribute_hidden
   172 __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
   173                   const int32_t * ipio2)
   174 #else
   175 int attribute_hidden
   176 __kernel_rem_pio2(x, y, e0, nx, prec, ipio2)
   177      double x[], y[];
   178      int e0, nx, prec;
   179      int32_t ipio2[];
   180 #endif
   181 {
   182     int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
   183     double z, fw, f[20], fq[20], q[20];
   184 
   185     /* initialize jk */
   186     SDL_assert((prec >= 0) && (prec < SDL_arraysize(init_jk)));
   187     jk = init_jk[prec];
   188     SDL_assert((jk >= 2) && (jk <= 6));
   189     jp = jk;
   190 
   191     /* determine jx,jv,q0, note that 3>q0 */
   192     SDL_assert(nx > 0);
   193     jx = nx - 1;
   194     jv = (e0 - 3) / 24;
   195     if (jv < 0)
   196         jv = 0;
   197     q0 = e0 - 24 * (jv + 1);
   198 
   199     /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
   200     j = jv - jx;
   201     m = jx + jk;
   202     for (i = 0; i <= m; i++, j++)
   203         f[i] = (j < 0) ? zero : (double) ipio2[j];
   204 
   205     /* compute q[0],q[1],...q[jk] */
   206     for (i = 0; i <= jk; i++) {
   207         for (j = 0, fw = 0.0; j <= jx; j++)
   208             fw += x[j] * f[jx + i - j];
   209         q[i] = fw;
   210     }
   211 
   212     jz = jk;
   213   recompute:
   214     /* distill q[] into iq[] reversingly */
   215     for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
   216         fw = (double) ((int32_t) (twon24 * z));
   217         iq[i] = (int32_t) (z - two24 * fw);
   218         z = q[j - 1] + fw;
   219     }
   220 
   221     /* compute n */
   222     z = scalbn(z, q0);          /* actual value of z */
   223     z -= 8.0 * floor(z * 0.125);        /* trim off integer >= 8 */
   224     n = (int32_t) z;
   225     z -= (double) n;
   226     ih = 0;
   227     if (q0 > 0) {               /* need iq[jz-1] to determine n */
   228         i = (iq[jz - 1] >> (24 - q0));
   229         n += i;
   230         iq[jz - 1] -= i << (24 - q0);
   231         ih = iq[jz - 1] >> (23 - q0);
   232     } else if (q0 == 0)
   233         ih = iq[jz - 1] >> 23;
   234     else if (z >= 0.5)
   235         ih = 2;
   236 
   237     if (ih > 0) {               /* q > 0.5 */
   238         n += 1;
   239         carry = 0;
   240         for (i = 0; i < jz; i++) {      /* compute 1-q */
   241             j = iq[i];
   242             if (carry == 0) {
   243                 if (j != 0) {
   244                     carry = 1;
   245                     iq[i] = 0x1000000 - j;
   246                 }
   247             } else
   248                 iq[i] = 0xffffff - j;
   249         }
   250         if (q0 > 0) {           /* rare case: chance is 1 in 12 */
   251             switch (q0) {
   252             case 1:
   253                 iq[jz - 1] &= 0x7fffff;
   254                 break;
   255             case 2:
   256                 iq[jz - 1] &= 0x3fffff;
   257                 break;
   258             }
   259         }
   260         if (ih == 2) {
   261             z = one - z;
   262             if (carry != 0)
   263                 z -= scalbn(one, q0);
   264         }
   265     }
   266 
   267     /* check if recomputation is needed */
   268     if (z == zero) {
   269         j = 0;
   270         for (i = jz - 1; i >= jk; i--)
   271             j |= iq[i];
   272         if (j == 0) {           /* need recomputation */
   273             for (k = 1; iq[jk - k] == 0; k++);  /* k = no. of terms needed */
   274 
   275             for (i = jz + 1; i <= jz + k; i++) {        /* add q[jz+1] to q[jz+k] */
   276                 f[jx + i] = (double) ipio2[jv + i];
   277                 for (j = 0, fw = 0.0; j <= jx; j++)
   278                     fw += x[j] * f[jx + i - j];
   279                 q[i] = fw;
   280             }
   281             jz += k;
   282             goto recompute;
   283         }
   284     }
   285 
   286     /* chop off zero terms */
   287     if (z == 0.0) {
   288         jz -= 1;
   289         q0 -= 24;
   290         while (iq[jz] == 0) {
   291             jz--;
   292             q0 -= 24;
   293         }
   294     } else {                    /* break z into 24-bit if necessary */
   295         z = scalbn(z, -q0);
   296         if (z >= two24) {
   297             fw = (double) ((int32_t) (twon24 * z));
   298             iq[jz] = (int32_t) (z - two24 * fw);
   299             jz += 1;
   300             q0 += 24;
   301             iq[jz] = (int32_t) fw;
   302         } else
   303             iq[jz] = (int32_t) z;
   304     }
   305 
   306     /* convert integer "bit" chunk to floating-point value */
   307     fw = scalbn(one, q0);
   308     for (i = jz; i >= 0; i--) {
   309         q[i] = fw * (double) iq[i];
   310         fw *= twon24;
   311     }
   312 
   313     /* compute PIo2[0,...,jp]*q[jz,...,0] */
   314     for (i = jz; i >= 0; i--) {
   315         for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
   316             fw += PIo2[k] * q[i + k];
   317         fq[jz - i] = fw;
   318     }
   319 
   320     /* compress fq[] into y[] */
   321     switch (prec) {
   322     case 0:
   323         fw = 0.0;
   324         for (i = jz; i >= 0; i--)
   325             fw += fq[i];
   326         y[0] = (ih == 0) ? fw : -fw;
   327         break;
   328     case 1:
   329     case 2:
   330         fw = 0.0;
   331         for (i = jz; i >= 0; i--)
   332             fw += fq[i];
   333         y[0] = (ih == 0) ? fw : -fw;
   334         fw = fq[0] - fw;
   335         for (i = 1; i <= jz; i++)
   336             fw += fq[i];
   337         y[1] = (ih == 0) ? fw : -fw;
   338         break;
   339     case 3:                    /* painful */
   340         for (i = jz; i > 0; i--) {
   341             fw = fq[i - 1] + fq[i];
   342             fq[i] += fq[i - 1] - fw;
   343             fq[i - 1] = fw;
   344         }
   345         for (i = jz; i > 1; i--) {
   346             fw = fq[i - 1] + fq[i];
   347             fq[i] += fq[i - 1] - fw;
   348             fq[i - 1] = fw;
   349         }
   350         for (fw = 0.0, i = jz; i >= 2; i--)
   351             fw += fq[i];
   352         if (ih == 0) {
   353             y[0] = fq[0];
   354             y[1] = fq[1];
   355             y[2] = fw;
   356         } else {
   357             y[0] = -fq[0];
   358             y[1] = -fq[1];
   359             y[2] = -fw;
   360         }
   361     }
   362     return n & 7;
   363 }