src/libm/e_pow.c
author Ryan C. Gordon <icculus@icculus.org>
Thu, 21 Apr 2016 03:16:44 -0400
changeset 11729 d1ce8396c356
parent 11711 8a982ed61896
child 12420 4a6c91d9cc33
permissions -rw-r--r--
Initial shot at a renderer target for Apple's Metal API.

This isn't complete, but is enough to run testsprite2. It's currently
Mac-only; with a little work to figure out how to properly glue in a Metal
layer to a UIView, this will likely work on iOS, too.

This is only wired up to the configure script right now, and disabled by
default. CMake and Xcode still need their bits filled in as appropriate.
     1 /*
     2  * ====================================================
     3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     4  *
     5  * Developed at SunPro, a Sun Microsystems, Inc. business.
     6  * Permission to use, copy, modify, and distribute this
     7  * software is freely granted, provided that this notice
     8  * is preserved.
     9  * ====================================================
    10  */
    11 
    12 /* __ieee754_pow(x,y) return x**y
    13  *
    14  *		      n
    15  * Method:  Let x =  2   * (1+f)
    16  *	1. Compute and return log2(x) in two pieces:
    17  *		log2(x) = w1 + w2,
    18  *	   where w1 has 53-24 = 29 bit trailing zeros.
    19  *	2. Perform y*log2(x) = n+y' by simulating muti-precision
    20  *	   arithmetic, where |y'|<=0.5.
    21  *	3. Return x**y = 2**n*exp(y'*log2)
    22  *
    23  * Special cases:
    24  *	1.  +-1 ** anything  is 1.0
    25  *	2.  +-1 ** +-INF     is 1.0
    26  *	3.  (anything) ** 0  is 1
    27  *	4.  (anything) ** 1  is itself
    28  *	5.  (anything) ** NAN is NAN
    29  *	6.  NAN ** (anything except 0) is NAN
    30  *	7.  +-(|x| > 1) **  +INF is +INF
    31  *	8.  +-(|x| > 1) **  -INF is +0
    32  *	9.  +-(|x| < 1) **  +INF is +0
    33  *	10  +-(|x| < 1) **  -INF is +INF
    34  *	11. +0 ** (+anything except 0, NAN)               is +0
    35  *	12. -0 ** (+anything except 0, NAN, odd integer)  is +0
    36  *	13. +0 ** (-anything except 0, NAN)               is +INF
    37  *	14. -0 ** (-anything except 0, NAN, odd integer)  is +INF
    38  *	15. -0 ** (odd integer) = -( +0 ** (odd integer) )
    39  *	16. +INF ** (+anything except 0,NAN) is +INF
    40  *	17. +INF ** (-anything except 0,NAN) is +0
    41  *	18. -INF ** (anything)  = -0 ** (-anything)
    42  *	19. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
    43  *	20. (-anything except 0 and inf) ** (non-integer) is NAN
    44  *
    45  * Accuracy:
    46  *	pow(x,y) returns x**y nearly rounded. In particular
    47  *			pow(integer,integer)
    48  *	always returns the correct integer provided it is
    49  *	representable.
    50  *
    51  * Constants :
    52  * The hexadecimal values are the intended ones for the following
    53  * constants. The decimal values may be used, provided that the
    54  * compiler will convert from decimal to binary accurately enough
    55  * to produce the hexadecimal values shown.
    56  */
    57 
    58 #include "math_libm.h"
    59 #include "math_private.h"
    60 
    61 #if defined(_MSC_VER)           /* Handle Microsoft VC++ compiler specifics. */
    62 /* C4756: overflow in constant arithmetic */
    63 #pragma warning ( disable : 4756 )
    64 #endif
    65 
    66 static const double
    67 bp[] = {1.0, 1.5,},
    68 dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
    69 dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
    70 zero    =  0.0,
    71 one	=  1.0,
    72 two	=  2.0,
    73 two53	=  9007199254740992.0,	/* 0x43400000, 0x00000000 */
    74 huge	=  1.0e300,
    75 tiny    =  1.0e-300,
    76 	/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
    77 L1  =  5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */
    78 L2  =  4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */
    79 L3  =  3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */
    80 L4  =  2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */
    81 L5  =  2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */
    82 L6  =  2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */
    83 P1   =  1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
    84 P2   = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
    85 P3   =  6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
    86 P4   = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
    87 P5   =  4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */
    88 lg2  =  6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
    89 lg2_h  =  6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */
    90 lg2_l  = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */
    91 ovt =  8.0085662595372944372e-0017, /* -(1024-log2(ovfl+.5ulp)) */
    92 cp    =  9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
    93 cp_h  =  9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */
    94 cp_l  = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/
    95 ivln2    =  1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */
    96 ivln2_h  =  1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/
    97 ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
    98 
    99 double attribute_hidden __ieee754_pow(double x, double y)
   100 {
   101 	double z,ax,z_h,z_l,p_h,p_l;
   102 	double y1,t1,t2,r,s,t,u,v,w;
   103 	int32_t i,j,k,yisint,n;
   104 	int32_t hx,hy,ix,iy;
   105 	u_int32_t lx,ly;
   106 
   107 	EXTRACT_WORDS(hx,lx,x);
   108     /* x==1: 1**y = 1 (even if y is NaN) */
   109 	if (hx==0x3ff00000 && lx==0) {
   110 		return x;
   111 	}
   112 	ix = hx&0x7fffffff;
   113 
   114 	EXTRACT_WORDS(hy,ly,y);
   115 	iy = hy&0x7fffffff;
   116 
   117     /* y==zero: x**0 = 1 */
   118 	if((iy|ly)==0) return one;
   119 
   120     /* +-NaN return x+y */
   121 	if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) ||
   122 	   iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0)))
   123 		return x+y;
   124 
   125     /* determine if y is an odd int when x < 0
   126      * yisint = 0	... y is not an integer
   127      * yisint = 1	... y is an odd int
   128      * yisint = 2	... y is an even int
   129      */
   130 	yisint  = 0;
   131 	if(hx<0) {
   132 	    if(iy>=0x43400000) yisint = 2; /* even integer y */
   133 	    else if(iy>=0x3ff00000) {
   134 		k = (iy>>20)-0x3ff;	   /* exponent */
   135 		if(k>20) {
   136 		    j = ly>>(52-k);
   137 		    if((j<<(52-k))==ly) yisint = 2-(j&1);
   138 		} else if(ly==0) {
   139 		    j = iy>>(20-k);
   140 		    if((j<<(20-k))==iy) yisint = 2-(j&1);
   141 		}
   142 	    }
   143 	}
   144 
   145     /* special value of y */
   146 	if(ly==0) {
   147 	    if (iy==0x7ff00000) {       /* y is +-inf */
   148 	        if (((ix-0x3ff00000)|lx)==0)
   149 		    return one;	        /* +-1**+-inf is 1 (yes, weird rule) */
   150 	        if (ix >= 0x3ff00000)   /* (|x|>1)**+-inf = inf,0 */
   151 		    return (hy>=0) ? y : zero;
   152 	        /* (|x|<1)**-,+inf = inf,0 */
   153 		return (hy<0) ? -y : zero;
   154 	    }
   155 	    if(iy==0x3ff00000) {	/* y is  +-1 */
   156 		if(hy<0) return one/x; else return x;
   157 	    }
   158 	    if(hy==0x40000000) return x*x; /* y is  2 */
   159 	    if(hy==0x3fe00000) {	/* y is  0.5 */
   160 		if(hx>=0)	/* x >= +0 */
   161 		    return __ieee754_sqrt(x);
   162 	    }
   163 	}
   164 
   165 	ax   = fabs(x);
   166     /* special value of x */
   167 	if(lx==0) {
   168 	    if(ix==0x7ff00000||ix==0||ix==0x3ff00000){
   169 		z = ax;			/*x is +-0,+-inf,+-1*/
   170 		if(hy<0) z = one/z;	/* z = (1/|x|) */
   171 		if(hx<0) {
   172 		    if(((ix-0x3ff00000)|yisint)==0) {
   173 			z = (z-z)/(z-z); /* (-1)**non-int is NaN */
   174 		    } else if(yisint==1)
   175 			z = -z;		/* (x<0)**odd = -(|x|**odd) */
   176 		}
   177 		return z;
   178 	    }
   179 	}
   180 
   181     /* (x<0)**(non-int) is NaN */
   182 	if(((((u_int32_t)hx>>31)-1)|yisint)==0) return (x-x)/(x-x);
   183 
   184     /* |y| is huge */
   185 	if(iy>0x41e00000) { /* if |y| > 2**31 */
   186 	    if(iy>0x43f00000){	/* if |y| > 2**64, must o/uflow */
   187 		if(ix<=0x3fefffff) return (hy<0)? huge*huge:tiny*tiny;
   188 		if(ix>=0x3ff00000) return (hy>0)? huge*huge:tiny*tiny;
   189 	    }
   190 	/* over/underflow if x is not close to one */
   191 	    if(ix<0x3fefffff) return (hy<0)? huge*huge:tiny*tiny;
   192 	    if(ix>0x3ff00000) return (hy>0)? huge*huge:tiny*tiny;
   193 	/* now |1-x| is tiny <= 2**-20, suffice to compute
   194 	   log(x) by x-x^2/2+x^3/3-x^4/4 */
   195 	    t = x-1;		/* t has 20 trailing zeros */
   196 	    w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25));
   197 	    u = ivln2_h*t;	/* ivln2_h has 21 sig. bits */
   198 	    v = t*ivln2_l-w*ivln2;
   199 	    t1 = u+v;
   200 	    SET_LOW_WORD(t1,0);
   201 	    t2 = v-(t1-u);
   202 	} else {
   203 	    double s2,s_h,s_l,t_h,t_l;
   204 	    n = 0;
   205 	/* take care subnormal number */
   206 	    if(ix<0x00100000)
   207 		{ax *= two53; n -= 53; GET_HIGH_WORD(ix,ax); }
   208 	    n  += ((ix)>>20)-0x3ff;
   209 	    j  = ix&0x000fffff;
   210 	/* determine interval */
   211 	    ix = j|0x3ff00000;		/* normalize ix */
   212 	    if(j<=0x3988E) k=0;		/* |x|<sqrt(3/2) */
   213 	    else if(j<0xBB67A) k=1;	/* |x|<sqrt(3)   */
   214 	    else {k=0;n+=1;ix -= 0x00100000;}
   215 	    SET_HIGH_WORD(ax,ix);
   216 
   217 	/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
   218 	    u = ax-bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
   219 	    v = one/(ax+bp[k]);
   220 	    s = u*v;
   221 	    s_h = s;
   222 	    SET_LOW_WORD(s_h,0);
   223 	/* t_h=ax+bp[k] High */
   224 	    t_h = zero;
   225 	    SET_HIGH_WORD(t_h,((ix>>1)|0x20000000)+0x00080000+(k<<18));
   226 	    t_l = ax - (t_h-bp[k]);
   227 	    s_l = v*((u-s_h*t_h)-s_h*t_l);
   228 	/* compute log(ax) */
   229 	    s2 = s*s;
   230 	    r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
   231 	    r += s_l*(s_h+s);
   232 	    s2  = s_h*s_h;
   233 	    t_h = 3.0+s2+r;
   234 	    SET_LOW_WORD(t_h,0);
   235 	    t_l = r-((t_h-3.0)-s2);
   236 	/* u+v = s*(1+...) */
   237 	    u = s_h*t_h;
   238 	    v = s_l*t_h+t_l*s;
   239 	/* 2/(3log2)*(s+...) */
   240 	    p_h = u+v;
   241 	    SET_LOW_WORD(p_h,0);
   242 	    p_l = v-(p_h-u);
   243 	    z_h = cp_h*p_h;		/* cp_h+cp_l = 2/(3*log2) */
   244 	    z_l = cp_l*p_h+p_l*cp+dp_l[k];
   245 	/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
   246 	    t = (double)n;
   247 	    t1 = (((z_h+z_l)+dp_h[k])+t);
   248 	    SET_LOW_WORD(t1,0);
   249 	    t2 = z_l-(((t1-t)-dp_h[k])-z_h);
   250 	}
   251 
   252 	s = one; /* s (sign of result -ve**odd) = -1 else = 1 */
   253 	if(((((u_int32_t)hx>>31)-1)|(yisint-1))==0)
   254 	    s = -one;/* (-ve)**(odd int) */
   255 
   256     /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
   257 	y1  = y;
   258 	SET_LOW_WORD(y1,0);
   259 	p_l = (y-y1)*t1+y*t2;
   260 	p_h = y1*t1;
   261 	z = p_l+p_h;
   262 	EXTRACT_WORDS(j,i,z);
   263 	if (j>=0x40900000) {				/* z >= 1024 */
   264 	    if(((j-0x40900000)|i)!=0)			/* if z > 1024 */
   265 		return s*huge*huge;			/* overflow */
   266 	    else {
   267 		if(p_l+ovt>z-p_h) return s*huge*huge;	/* overflow */
   268 	    }
   269 	} else if((j&0x7fffffff)>=0x4090cc00 ) {	/* z <= -1075 */
   270 	    if(((j-0xc090cc00)|i)!=0) 		/* z < -1075 */
   271 		return s*tiny*tiny;		/* underflow */
   272 	    else {
   273 		if(p_l<=z-p_h) return s*tiny*tiny;	/* underflow */
   274 	    }
   275 	}
   276     /*
   277      * compute 2**(p_h+p_l)
   278      */
   279 	i = j&0x7fffffff;
   280 	k = (i>>20)-0x3ff;
   281 	n = 0;
   282 	if(i>0x3fe00000) {		/* if |z| > 0.5, set n = [z+0.5] */
   283 	    n = j+(0x00100000>>(k+1));
   284 	    k = ((n&0x7fffffff)>>20)-0x3ff;	/* new k for n */
   285 	    t = zero;
   286 	    SET_HIGH_WORD(t,n&~(0x000fffff>>k));
   287 	    n = ((n&0x000fffff)|0x00100000)>>(20-k);
   288 	    if(j<0) n = -n;
   289 	    p_h -= t;
   290 	}
   291 	t = p_l+p_h;
   292 	SET_LOW_WORD(t,0);
   293 	u = t*lg2_h;
   294 	v = (p_l-(t-p_h))*lg2+t*lg2_l;
   295 	z = u+v;
   296 	w = v-(z-u);
   297 	t  = z*z;
   298 	t1  = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
   299 	r  = (z*t1)/(t1-two)-(w+z*w);
   300 	z  = one-(r-z);
   301 	GET_HIGH_WORD(j,z);
   302 	j += (n<<20);
   303 	if((j>>20)<=0) z = scalbn(z,n);	/* subnormal output */
   304 	else SET_HIGH_WORD(z,j);
   305 	return s*z;
   306 }
   307 
   308 /*
   309  * wrapper pow(x,y) return x**y
   310  */
   311 #ifndef _IEEE_LIBM
   312 double pow(double x, double y)
   313 {
   314 	double z = __ieee754_pow(x, y);
   315 	if (_LIB_VERSION == _IEEE_|| isnan(y))
   316 		return z;
   317 	if (isnan(x)) {
   318 		if (y == 0.0)
   319 			return __kernel_standard(x, y, 42); /* pow(NaN,0.0) */
   320 		return z;
   321 	}
   322 	if (x == 0.0) {
   323 		if (y == 0.0)
   324 	    		return __kernel_standard(x, y, 20); /* pow(0.0,0.0) */
   325 		if (isfinite(y) && y < 0.0)
   326 			return __kernel_standard(x,y,23); /* pow(0.0,negative) */
   327 		return z;
   328 	}
   329 	if (!isfinite(z)) {
   330 		if (isfinite(x) && isfinite(y)) {
   331 			if (isnan(z))
   332 				return __kernel_standard(x, y, 24); /* pow neg**non-int */
   333 			return __kernel_standard(x, y, 21); /* pow overflow */
   334 		}
   335 	}
   336 	if (z == 0.0 && isfinite(x) && isfinite(y))
   337 		return __kernel_standard(x, y, 22); /* pow underflow */
   338 	return z;
   339 }
   340 #else
   341 strong_alias(__ieee754_pow, pow)
   342 #endif
   343 libm_hidden_def(pow)