src/libm/e_rem_pio2.c
author Ryan C. Gordon <icculus@icculus.org>
Thu, 21 Apr 2016 03:16:44 -0400
changeset 11729 d1ce8396c356
parent 11683 48bcba563d9c
child 12084 f0092ea3a038
permissions -rw-r--r--
Initial shot at a renderer target for Apple's Metal API.

This isn't complete, but is enough to run testsprite2. It's currently
Mac-only; with a little work to figure out how to properly glue in a Metal
layer to a UIView, this will likely work on iOS, too.

This is only wired up to the configure script right now, and disabled by
default. CMake and Xcode still need their bits filled in as appropriate.
slouken@2756
     1
/*
slouken@2756
     2
 * ====================================================
slouken@2756
     3
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
slouken@2756
     4
 *
slouken@2756
     5
 * Developed at SunPro, a Sun Microsystems, Inc. business.
slouken@2756
     6
 * Permission to use, copy, modify, and distribute this
slouken@2756
     7
 * software is freely granted, provided that this notice
slouken@2756
     8
 * is preserved.
slouken@2756
     9
 * ====================================================
slouken@2756
    10
 */
slouken@2756
    11
slouken@2756
    12
/* __ieee754_rem_pio2(x,y)
slouken@2756
    13
 *
slouken@2756
    14
 * return the remainder of x rem pi/2 in y[0]+y[1]
slouken@2756
    15
 * use __kernel_rem_pio2()
slouken@2756
    16
 */
slouken@2756
    17
slouken@6044
    18
#include "math_libm.h"
slouken@2756
    19
#include "math_private.h"
slouken@2756
    20
slouken@2756
    21
/*
slouken@2756
    22
 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
slouken@2756
    23
 */
slouken@11683
    24
static const int32_t two_over_pi[] = {
slouken@11683
    25
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
slouken@11683
    26
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
slouken@11683
    27
0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
slouken@11683
    28
0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
slouken@11683
    29
0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
slouken@11683
    30
0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
slouken@11683
    31
0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
slouken@11683
    32
0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
slouken@11683
    33
0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
slouken@11683
    34
0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
slouken@11683
    35
0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
slouken@11683
    36
};
slouken@2756
    37
slouken@2756
    38
static const int32_t npio2_hw[] = {
slouken@11683
    39
0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
slouken@11683
    40
0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
slouken@11683
    41
0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
slouken@11683
    42
0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
slouken@11683
    43
0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
slouken@11683
    44
0x404858EB, 0x404921FB,
slouken@2756
    45
};
slouken@2756
    46
slouken@2756
    47
/*
slouken@2756
    48
 * invpio2:  53 bits of 2/pi
slouken@2756
    49
 * pio2_1:   first  33 bit of pi/2
slouken@2756
    50
 * pio2_1t:  pi/2 - pio2_1
slouken@2756
    51
 * pio2_2:   second 33 bit of pi/2
slouken@2756
    52
 * pio2_2t:  pi/2 - (pio2_1+pio2_2)
slouken@2756
    53
 * pio2_3:   third  33 bit of pi/2
slouken@2756
    54
 * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
slouken@2756
    55
 */
slouken@2756
    56
slouken@2756
    57
static const double
slouken@11683
    58
zero =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
slouken@11683
    59
half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
slouken@11683
    60
two24 =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
slouken@11683
    61
invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
slouken@11683
    62
pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
slouken@11683
    63
pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
slouken@11683
    64
pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
slouken@11683
    65
pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
slouken@11683
    66
pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
slouken@11683
    67
pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
slouken@2756
    68
slouken@11683
    69
int32_t attribute_hidden __ieee754_rem_pio2(double x, double *y)
slouken@2756
    70
{
slouken@11683
    71
	double z=0.0,w,t,r,fn;
slouken@11683
    72
	double tx[3];
slouken@11683
    73
	int32_t e0,i,j,nx,n,ix,hx;
slouken@11683
    74
	u_int32_t low;
slouken@2756
    75
slouken@11683
    76
	GET_HIGH_WORD(hx,x);		/* high word of x */
slouken@11683
    77
	ix = hx&0x7fffffff;
slouken@11683
    78
	if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
slouken@11683
    79
	    {y[0] = x; y[1] = 0; return 0;}
slouken@11683
    80
	if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
slouken@11683
    81
	    if(hx>0) {
slouken@11683
    82
		z = x - pio2_1;
slouken@11683
    83
		if(ix!=0x3ff921fb) { 	/* 33+53 bit pi is good enough */
slouken@11683
    84
		    y[0] = z - pio2_1t;
slouken@11683
    85
		    y[1] = (z-y[0])-pio2_1t;
slouken@11683
    86
		} else {		/* near pi/2, use 33+33+53 bit pi */
slouken@11683
    87
		    z -= pio2_2;
slouken@11683
    88
		    y[0] = z - pio2_2t;
slouken@11683
    89
		    y[1] = (z-y[0])-pio2_2t;
slouken@11683
    90
		}
slouken@11683
    91
		return 1;
slouken@11683
    92
	    } else {	/* negative x */
slouken@11683
    93
		z = x + pio2_1;
slouken@11683
    94
		if(ix!=0x3ff921fb) { 	/* 33+53 bit pi is good enough */
slouken@11683
    95
		    y[0] = z + pio2_1t;
slouken@11683
    96
		    y[1] = (z-y[0])+pio2_1t;
slouken@11683
    97
		} else {		/* near pi/2, use 33+33+53 bit pi */
slouken@11683
    98
		    z += pio2_2;
slouken@11683
    99
		    y[0] = z + pio2_2t;
slouken@11683
   100
		    y[1] = (z-y[0])+pio2_2t;
slouken@11683
   101
		}
slouken@11683
   102
		return -1;
slouken@11683
   103
	    }
slouken@11683
   104
	}
slouken@11683
   105
	if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
slouken@11683
   106
	    t  = fabs(x);
slouken@11683
   107
	    n  = (int32_t) (t*invpio2+half);
slouken@11683
   108
	    fn = (double)n;
slouken@11683
   109
	    r  = t-fn*pio2_1;
slouken@11683
   110
	    w  = fn*pio2_1t;	/* 1st round good to 85 bit */
slouken@11683
   111
	    if(n<32&&ix!=npio2_hw[n-1]) {
slouken@11683
   112
		y[0] = r-w;	/* quick check no cancellation */
slouken@11683
   113
	    } else {
slouken@11683
   114
	        u_int32_t high;
slouken@11683
   115
	        j  = ix>>20;
slouken@11683
   116
	        y[0] = r-w;
slouken@11683
   117
		GET_HIGH_WORD(high,y[0]);
slouken@11683
   118
	        i = j-((high>>20)&0x7ff);
slouken@11683
   119
	        if(i>16) {  /* 2nd iteration needed, good to 118 */
slouken@11683
   120
		    t  = r;
slouken@11683
   121
		    w  = fn*pio2_2;
slouken@11683
   122
		    r  = t-w;
slouken@11683
   123
		    w  = fn*pio2_2t-((t-r)-w);
slouken@11683
   124
		    y[0] = r-w;
slouken@11683
   125
		    GET_HIGH_WORD(high,y[0]);
slouken@11683
   126
		    i = j-((high>>20)&0x7ff);
slouken@11683
   127
		    if(i>49)  {	/* 3rd iteration need, 151 bits acc */
slouken@11683
   128
		    	t  = r;	/* will cover all possible cases */
slouken@11683
   129
		    	w  = fn*pio2_3;
slouken@11683
   130
		    	r  = t-w;
slouken@11683
   131
		    	w  = fn*pio2_3t-((t-r)-w);
slouken@11683
   132
		    	y[0] = r-w;
slouken@11683
   133
		    }
slouken@11683
   134
		}
slouken@11683
   135
	    }
slouken@11683
   136
	    y[1] = (r-y[0])-w;
slouken@11683
   137
	    if(hx<0) 	{y[0] = -y[0]; y[1] = -y[1]; return -n;}
slouken@11683
   138
	    else	 return n;
slouken@11683
   139
	}
slouken@2756
   140
    /*
slouken@2756
   141
     * all other (large) arguments
slouken@2756
   142
     */
slouken@11683
   143
	if(ix>=0x7ff00000) {		/* x is inf or NaN */
slouken@11683
   144
	    y[0]=y[1]=x-x; return 0;
slouken@11683
   145
	}
slouken@2756
   146
    /* set z = scalbn(|x|,ilogb(x)-23) */
slouken@11683
   147
	GET_LOW_WORD(low,x);
slouken@11683
   148
	SET_LOW_WORD(z,low);
slouken@11683
   149
	e0 	= (ix>>20)-1046;	/* e0 = ilogb(z)-23; */
slouken@11683
   150
	SET_HIGH_WORD(z, ix - ((int32_t)(e0<<20)));
slouken@11683
   151
	for(i=0;i<2;i++) {
slouken@11683
   152
		tx[i] = (double)((int32_t)(z));
slouken@11683
   153
		z     = (z-tx[i])*two24;
slouken@11683
   154
	}
slouken@11683
   155
	tx[2] = z;
slouken@11683
   156
	nx = 3;
slouken@11683
   157
	while(tx[nx-1]==zero) nx--;	/* skip zero term */
slouken@11683
   158
	n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
slouken@11683
   159
	if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
slouken@11683
   160
	return n;
slouken@2756
   161
}