src/libm/e_rem_pio2.c
author Ryan C. Gordon <icculus@icculus.org>
Thu, 21 Apr 2016 03:16:44 -0400
changeset 11729 d1ce8396c356
parent 11683 48bcba563d9c
child 12084 f0092ea3a038
permissions -rw-r--r--
Initial shot at a renderer target for Apple's Metal API.

This isn't complete, but is enough to run testsprite2. It's currently
Mac-only; with a little work to figure out how to properly glue in a Metal
layer to a UIView, this will likely work on iOS, too.

This is only wired up to the configure script right now, and disabled by
default. CMake and Xcode still need their bits filled in as appropriate.
     1 /*
     2  * ====================================================
     3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     4  *
     5  * Developed at SunPro, a Sun Microsystems, Inc. business.
     6  * Permission to use, copy, modify, and distribute this
     7  * software is freely granted, provided that this notice
     8  * is preserved.
     9  * ====================================================
    10  */
    11 
    12 /* __ieee754_rem_pio2(x,y)
    13  *
    14  * return the remainder of x rem pi/2 in y[0]+y[1]
    15  * use __kernel_rem_pio2()
    16  */
    17 
    18 #include "math_libm.h"
    19 #include "math_private.h"
    20 
    21 /*
    22  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
    23  */
    24 static const int32_t two_over_pi[] = {
    25 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
    26 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
    27 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
    28 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
    29 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
    30 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
    31 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
    32 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
    33 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
    34 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
    35 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
    36 };
    37 
    38 static const int32_t npio2_hw[] = {
    39 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
    40 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
    41 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
    42 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
    43 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
    44 0x404858EB, 0x404921FB,
    45 };
    46 
    47 /*
    48  * invpio2:  53 bits of 2/pi
    49  * pio2_1:   first  33 bit of pi/2
    50  * pio2_1t:  pi/2 - pio2_1
    51  * pio2_2:   second 33 bit of pi/2
    52  * pio2_2t:  pi/2 - (pio2_1+pio2_2)
    53  * pio2_3:   third  33 bit of pi/2
    54  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
    55  */
    56 
    57 static const double
    58 zero =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
    59 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
    60 two24 =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
    61 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
    62 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
    63 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
    64 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
    65 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
    66 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
    67 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
    68 
    69 int32_t attribute_hidden __ieee754_rem_pio2(double x, double *y)
    70 {
    71 	double z=0.0,w,t,r,fn;
    72 	double tx[3];
    73 	int32_t e0,i,j,nx,n,ix,hx;
    74 	u_int32_t low;
    75 
    76 	GET_HIGH_WORD(hx,x);		/* high word of x */
    77 	ix = hx&0x7fffffff;
    78 	if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
    79 	    {y[0] = x; y[1] = 0; return 0;}
    80 	if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
    81 	    if(hx>0) {
    82 		z = x - pio2_1;
    83 		if(ix!=0x3ff921fb) { 	/* 33+53 bit pi is good enough */
    84 		    y[0] = z - pio2_1t;
    85 		    y[1] = (z-y[0])-pio2_1t;
    86 		} else {		/* near pi/2, use 33+33+53 bit pi */
    87 		    z -= pio2_2;
    88 		    y[0] = z - pio2_2t;
    89 		    y[1] = (z-y[0])-pio2_2t;
    90 		}
    91 		return 1;
    92 	    } else {	/* negative x */
    93 		z = x + pio2_1;
    94 		if(ix!=0x3ff921fb) { 	/* 33+53 bit pi is good enough */
    95 		    y[0] = z + pio2_1t;
    96 		    y[1] = (z-y[0])+pio2_1t;
    97 		} else {		/* near pi/2, use 33+33+53 bit pi */
    98 		    z += pio2_2;
    99 		    y[0] = z + pio2_2t;
   100 		    y[1] = (z-y[0])+pio2_2t;
   101 		}
   102 		return -1;
   103 	    }
   104 	}
   105 	if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
   106 	    t  = fabs(x);
   107 	    n  = (int32_t) (t*invpio2+half);
   108 	    fn = (double)n;
   109 	    r  = t-fn*pio2_1;
   110 	    w  = fn*pio2_1t;	/* 1st round good to 85 bit */
   111 	    if(n<32&&ix!=npio2_hw[n-1]) {
   112 		y[0] = r-w;	/* quick check no cancellation */
   113 	    } else {
   114 	        u_int32_t high;
   115 	        j  = ix>>20;
   116 	        y[0] = r-w;
   117 		GET_HIGH_WORD(high,y[0]);
   118 	        i = j-((high>>20)&0x7ff);
   119 	        if(i>16) {  /* 2nd iteration needed, good to 118 */
   120 		    t  = r;
   121 		    w  = fn*pio2_2;
   122 		    r  = t-w;
   123 		    w  = fn*pio2_2t-((t-r)-w);
   124 		    y[0] = r-w;
   125 		    GET_HIGH_WORD(high,y[0]);
   126 		    i = j-((high>>20)&0x7ff);
   127 		    if(i>49)  {	/* 3rd iteration need, 151 bits acc */
   128 		    	t  = r;	/* will cover all possible cases */
   129 		    	w  = fn*pio2_3;
   130 		    	r  = t-w;
   131 		    	w  = fn*pio2_3t-((t-r)-w);
   132 		    	y[0] = r-w;
   133 		    }
   134 		}
   135 	    }
   136 	    y[1] = (r-y[0])-w;
   137 	    if(hx<0) 	{y[0] = -y[0]; y[1] = -y[1]; return -n;}
   138 	    else	 return n;
   139 	}
   140     /*
   141      * all other (large) arguments
   142      */
   143 	if(ix>=0x7ff00000) {		/* x is inf or NaN */
   144 	    y[0]=y[1]=x-x; return 0;
   145 	}
   146     /* set z = scalbn(|x|,ilogb(x)-23) */
   147 	GET_LOW_WORD(low,x);
   148 	SET_LOW_WORD(z,low);
   149 	e0 	= (ix>>20)-1046;	/* e0 = ilogb(z)-23; */
   150 	SET_HIGH_WORD(z, ix - ((int32_t)(e0<<20)));
   151 	for(i=0;i<2;i++) {
   152 		tx[i] = (double)((int32_t)(z));
   153 		z     = (z-tx[i])*two24;
   154 	}
   155 	tx[2] = z;
   156 	nx = 3;
   157 	while(tx[nx-1]==zero) nx--;	/* skip zero term */
   158 	n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
   159 	if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
   160 	return n;
   161 }