Expanded the libm support and put it into a separate directory.
authorSam Lantinga <slouken@libsdl.org>
Mon, 15 Sep 2008 06:33:23 +0000
changeset 2756a98604b691c8
parent 2755 2a3ec308d995
child 2757 0581f49c9294
Expanded the libm support and put it into a separate directory.
src/libm/e_log.c
src/libm/e_pow.c
src/libm/e_rem_pio2.c
src/libm/e_sqrt.c
src/libm/k_cos.c
src/libm/k_sin.c
src/libm/math.h
src/libm/math_private.h
src/libm/s_copysign.c
src/libm/s_cos.c
src/libm/s_fabs.c
src/libm/s_scalbn.c
src/libm/s_sin.c
src/video/SDL_gamma.c
src/video/e_log.h
src/video/e_pow.h
src/video/e_sqrt.h
src/video/math_private.h
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/libm/e_log.c	Mon Sep 15 06:33:23 2008 +0000
     1.3 @@ -0,0 +1,166 @@
     1.4 +/* @(#)e_log.c 5.1 93/09/24 */
     1.5 +/*
     1.6 + * ====================================================
     1.7 + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     1.8 + *
     1.9 + * Developed at SunPro, a Sun Microsystems, Inc. business.
    1.10 + * Permission to use, copy, modify, and distribute this
    1.11 + * software is freely granted, provided that this notice
    1.12 + * is preserved.
    1.13 + * ====================================================
    1.14 + */
    1.15 +
    1.16 +#if defined(LIBM_SCCS) && !defined(lint)
    1.17 +static char rcsid[] = "$NetBSD: e_log.c,v 1.8 1995/05/10 20:45:49 jtc Exp $";
    1.18 +#endif
    1.19 +
    1.20 +/* __ieee754_log(x)
    1.21 + * Return the logrithm of x
    1.22 + *
    1.23 + * Method :
    1.24 + *   1. Argument Reduction: find k and f such that
    1.25 + *			x = 2^k * (1+f),
    1.26 + *	   where  sqrt(2)/2 < 1+f < sqrt(2) .
    1.27 + *
    1.28 + *   2. Approximation of log(1+f).
    1.29 + *	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
    1.30 + *		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
    1.31 + *	     	 = 2s + s*R
    1.32 + *      We use a special Reme algorithm on [0,0.1716] to generate
    1.33 + * 	a polynomial of degree 14 to approximate R The maximum error
    1.34 + *	of this polynomial approximation is bounded by 2**-58.45. In
    1.35 + *	other words,
    1.36 + *		        2      4      6      8      10      12      14
    1.37 + *	    R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
    1.38 + *  	(the values of Lg1 to Lg7 are listed in the program)
    1.39 + *	and
    1.40 + *	    |      2          14          |     -58.45
    1.41 + *	    | Lg1*s +...+Lg7*s    -  R(z) | <= 2
    1.42 + *	    |                             |
    1.43 + *	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
    1.44 + *	In order to guarantee error in log below 1ulp, we compute log
    1.45 + *	by
    1.46 + *		log(1+f) = f - s*(f - R)	(if f is not too large)
    1.47 + *		log(1+f) = f - (hfsq - s*(hfsq+R)).	(better accuracy)
    1.48 + *
    1.49 + *	3. Finally,  log(x) = k*ln2 + log(1+f).
    1.50 + *			    = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
    1.51 + *	   Here ln2 is split into two floating point number:
    1.52 + *			ln2_hi + ln2_lo,
    1.53 + *	   where n*ln2_hi is always exact for |n| < 2000.
    1.54 + *
    1.55 + * Special cases:
    1.56 + *	log(x) is NaN with signal if x < 0 (including -INF) ;
    1.57 + *	log(+INF) is +INF; log(0) is -INF with signal;
    1.58 + *	log(NaN) is that NaN with no signal.
    1.59 + *
    1.60 + * Accuracy:
    1.61 + *	according to an error analysis, the error is always less than
    1.62 + *	1 ulp (unit in the last place).
    1.63 + *
    1.64 + * Constants:
    1.65 + * The hexadecimal values are the intended ones for the following
    1.66 + * constants. The decimal values may be used, provided that the
    1.67 + * compiler will convert from decimal to binary accurately enough
    1.68 + * to produce the hexadecimal values shown.
    1.69 + */
    1.70 +
    1.71 +#include "math.h"
    1.72 +#include "math_private.h"
    1.73 +
    1.74 +#ifdef __STDC__
    1.75 +static const double
    1.76 +#else
    1.77 +static double
    1.78 +#endif
    1.79 +  ln2_hi = 6.93147180369123816490e-01,  /* 3fe62e42 fee00000 */
    1.80 +    ln2_lo = 1.90821492927058770002e-10,        /* 3dea39ef 35793c76 */
    1.81 +    two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
    1.82 +    Lg1 = 6.666666666666735130e-01,     /* 3FE55555 55555593 */
    1.83 +    Lg2 = 3.999999999940941908e-01,     /* 3FD99999 9997FA04 */
    1.84 +    Lg3 = 2.857142874366239149e-01,     /* 3FD24924 94229359 */
    1.85 +    Lg4 = 2.222219843214978396e-01,     /* 3FCC71C5 1D8E78AF */
    1.86 +    Lg5 = 1.818357216161805012e-01,     /* 3FC74664 96CB03DE */
    1.87 +    Lg6 = 1.531383769920937332e-01,     /* 3FC39A09 D078C69F */
    1.88 +    Lg7 = 1.479819860511658591e-01;     /* 3FC2F112 DF3E5244 */
    1.89 +
    1.90 +#ifdef __STDC__
    1.91 +static const double zero = 0.0;
    1.92 +#else
    1.93 +static double zero = 0.0;
    1.94 +#endif
    1.95 +
    1.96 +#ifdef __STDC__
    1.97 +double attribute_hidden
    1.98 +__ieee754_log(double x)
    1.99 +#else
   1.100 +double attribute_hidden
   1.101 +__ieee754_log(x)
   1.102 +     double x;
   1.103 +#endif
   1.104 +{
   1.105 +    double hfsq, f, s, z, R, w, t1, t2, dk;
   1.106 +    int32_t k, hx, i, j;
   1.107 +    u_int32_t lx;
   1.108 +
   1.109 +    EXTRACT_WORDS(hx, lx, x);
   1.110 +
   1.111 +    k = 0;
   1.112 +    if (hx < 0x00100000) {      /* x < 2**-1022  */
   1.113 +        if (((hx & 0x7fffffff) | lx) == 0)
   1.114 +            return -two54 / zero;       /* log(+-0)=-inf */
   1.115 +        if (hx < 0)
   1.116 +            return (x - x) / zero;      /* log(-#) = NaN */
   1.117 +        k -= 54;
   1.118 +        x *= two54;             /* subnormal number, scale up x */
   1.119 +        GET_HIGH_WORD(hx, x);
   1.120 +    }
   1.121 +    if (hx >= 0x7ff00000)
   1.122 +        return x + x;
   1.123 +    k += (hx >> 20) - 1023;
   1.124 +    hx &= 0x000fffff;
   1.125 +    i = (hx + 0x95f64) & 0x100000;
   1.126 +    SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000));    /* normalize x or x/2 */
   1.127 +    k += (i >> 20);
   1.128 +    f = x - 1.0;
   1.129 +    if ((0x000fffff & (2 + hx)) < 3) {  /* |f| < 2**-20 */
   1.130 +        if (f == zero) {
   1.131 +            if (k == 0)
   1.132 +                return zero;
   1.133 +            else {
   1.134 +                dk = (double) k;
   1.135 +                return dk * ln2_hi + dk * ln2_lo;
   1.136 +            }
   1.137 +        }
   1.138 +        R = f * f * (0.5 - 0.33333333333333333 * f);
   1.139 +        if (k == 0)
   1.140 +            return f - R;
   1.141 +        else {
   1.142 +            dk = (double) k;
   1.143 +            return dk * ln2_hi - ((R - dk * ln2_lo) - f);
   1.144 +        }
   1.145 +    }
   1.146 +    s = f / (2.0 + f);
   1.147 +    dk = (double) k;
   1.148 +    z = s * s;
   1.149 +    i = hx - 0x6147a;
   1.150 +    w = z * z;
   1.151 +    j = 0x6b851 - hx;
   1.152 +    t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
   1.153 +    t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
   1.154 +    i |= j;
   1.155 +    R = t2 + t1;
   1.156 +    if (i > 0) {
   1.157 +        hfsq = 0.5 * f * f;
   1.158 +        if (k == 0)
   1.159 +            return f - (hfsq - s * (hfsq + R));
   1.160 +        else
   1.161 +            return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) -
   1.162 +                                  f);
   1.163 +    } else {
   1.164 +        if (k == 0)
   1.165 +            return f - s * (f - R);
   1.166 +        else
   1.167 +            return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f);
   1.168 +    }
   1.169 +}
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/src/libm/e_pow.c	Mon Sep 15 06:33:23 2008 +0000
     2.3 @@ -0,0 +1,342 @@
     2.4 +/* @(#)e_pow.c 5.1 93/09/24 */
     2.5 +/*
     2.6 + * ====================================================
     2.7 + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     2.8 + *
     2.9 + * Developed at SunPro, a Sun Microsystems, Inc. business.
    2.10 + * Permission to use, copy, modify, and distribute this
    2.11 + * software is freely granted, provided that this notice
    2.12 + * is preserved.
    2.13 + * ====================================================
    2.14 + */
    2.15 +
    2.16 +#if defined(LIBM_SCCS) && !defined(lint)
    2.17 +static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $";
    2.18 +#endif
    2.19 +
    2.20 +/* __ieee754_pow(x,y) return x**y
    2.21 + *
    2.22 + *		      n
    2.23 + * Method:  Let x =  2   * (1+f)
    2.24 + *	1. Compute and return log2(x) in two pieces:
    2.25 + *		log2(x) = w1 + w2,
    2.26 + *	   where w1 has 53-24 = 29 bit trailing zeros.
    2.27 + *	2. Perform y*log2(x) = n+y' by simulating muti-precision
    2.28 + *	   arithmetic, where |y'|<=0.5.
    2.29 + *	3. Return x**y = 2**n*exp(y'*log2)
    2.30 + *
    2.31 + * Special cases:
    2.32 + *	1.  (anything) ** 0  is 1
    2.33 + *	2.  (anything) ** 1  is itself
    2.34 + *	3.  (anything) ** NAN is NAN
    2.35 + *	4.  NAN ** (anything except 0) is NAN
    2.36 + *	5.  +-(|x| > 1) **  +INF is +INF
    2.37 + *	6.  +-(|x| > 1) **  -INF is +0
    2.38 + *	7.  +-(|x| < 1) **  +INF is +0
    2.39 + *	8.  +-(|x| < 1) **  -INF is +INF
    2.40 + *	9.  +-1         ** +-INF is NAN
    2.41 + *	10. +0 ** (+anything except 0, NAN)               is +0
    2.42 + *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
    2.43 + *	12. +0 ** (-anything except 0, NAN)               is +INF
    2.44 + *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
    2.45 + *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
    2.46 + *	15. +INF ** (+anything except 0,NAN) is +INF
    2.47 + *	16. +INF ** (-anything except 0,NAN) is +0
    2.48 + *	17. -INF ** (anything)  = -0 ** (-anything)
    2.49 + *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
    2.50 + *	19. (-anything except 0 and inf) ** (non-integer) is NAN
    2.51 + *
    2.52 + * Accuracy:
    2.53 + *	pow(x,y) returns x**y nearly rounded. In particular
    2.54 + *			pow(integer,integer)
    2.55 + *	always returns the correct integer provided it is
    2.56 + *	representable.
    2.57 + *
    2.58 + * Constants :
    2.59 + * The hexadecimal values are the intended ones for the following
    2.60 + * constants. The decimal values may be used, provided that the
    2.61 + * compiler will convert from decimal to binary accurately enough
    2.62 + * to produce the hexadecimal values shown.
    2.63 + */
    2.64 +
    2.65 +#include "math.h"
    2.66 +#include "math_private.h"
    2.67 +
    2.68 +libm_hidden_proto(scalbn)
    2.69 +    libm_hidden_proto(fabs)
    2.70 +#ifdef __STDC__
    2.71 +     static const double
    2.72 +#else
    2.73 +     static double
    2.74 +#endif
    2.75 +       bp[] = { 1.0, 1.5, }, dp_h[] = {
    2.76 +     0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
    2.77 +
    2.78 +         dp_l[] = {
    2.79 +     0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
    2.80 +
    2.81 +         zero = 0.0, one = 1.0, two = 2.0, two53 = 9007199254740992.0,  /* 0x43400000, 0x00000000 */
    2.82 +         huge = 1.0e300, tiny = 1.0e-300,
    2.83 +         /* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
    2.84 +         L1 = 5.99999999999994648725e-01,       /* 0x3FE33333, 0x33333303 */
    2.85 +         L2 = 4.28571428578550184252e-01,       /* 0x3FDB6DB6, 0xDB6FABFF */
    2.86 +         L3 = 3.33333329818377432918e-01,       /* 0x3FD55555, 0x518F264D */
    2.87 +         L4 = 2.72728123808534006489e-01,       /* 0x3FD17460, 0xA91D4101 */
    2.88 +         L5 = 2.30660745775561754067e-01,       /* 0x3FCD864A, 0x93C9DB65 */
    2.89 +         L6 = 2.06975017800338417784e-01,       /* 0x3FCA7E28, 0x4A454EEF */
    2.90 +         P1 = 1.66666666666666019037e-01,       /* 0x3FC55555, 0x5555553E */
    2.91 +         P2 = -2.77777777770155933842e-03,      /* 0xBF66C16C, 0x16BEBD93 */
    2.92 +         P3 = 6.61375632143793436117e-05,       /* 0x3F11566A, 0xAF25DE2C */
    2.93 +         P4 = -1.65339022054652515390e-06,      /* 0xBEBBBD41, 0xC5D26BF1 */
    2.94 +         P5 = 4.13813679705723846039e-08,       /* 0x3E663769, 0x72BEA4D0 */
    2.95 +         lg2 = 6.93147180559945286227e-01,      /* 0x3FE62E42, 0xFEFA39EF */
    2.96 +         lg2_h = 6.93147182464599609375e-01,    /* 0x3FE62E43, 0x00000000 */
    2.97 +         lg2_l = -1.90465429995776804525e-09,   /* 0xBE205C61, 0x0CA86C39 */
    2.98 +         ovt = 8.0085662595372944372e-0017,     /* -(1024-log2(ovfl+.5ulp)) */
    2.99 +         cp = 9.61796693925975554329e-01,       /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
   2.100 +         cp_h = 9.61796700954437255859e-01,     /* 0x3FEEC709, 0xE0000000 =(float)cp */
   2.101 +         cp_l = -7.02846165095275826516e-09,    /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h */
   2.102 +         ivln2 = 1.44269504088896338700e+00,    /* 0x3FF71547, 0x652B82FE =1/ln2 */
   2.103 +         ivln2_h = 1.44269502162933349609e+00,  /* 0x3FF71547, 0x60000000 =24b 1/ln2 */
   2.104 +         ivln2_l = 1.92596299112661746887e-08;  /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail */
   2.105 +
   2.106 +#ifdef __STDC__
   2.107 +     double attribute_hidden __ieee754_pow(double x, double y)
   2.108 +#else
   2.109 +     double attribute_hidden __ieee754_pow(x, y)
   2.110 +     double x, y;
   2.111 +#endif
   2.112 +     {
   2.113 +         double z, ax, z_h, z_l, p_h, p_l;
   2.114 +         double y1, t1, t2, r, s, t, u, v, w;
   2.115 +         int32_t i, j, k, yisint, n;
   2.116 +         int32_t hx, hy, ix, iy;
   2.117 +         u_int32_t lx, ly;
   2.118 +
   2.119 +         EXTRACT_WORDS(hx, lx, x);
   2.120 +         EXTRACT_WORDS(hy, ly, y);
   2.121 +         ix = hx & 0x7fffffff;
   2.122 +         iy = hy & 0x7fffffff;
   2.123 +
   2.124 +         /* y==zero: x**0 = 1 */
   2.125 +         if ((iy | ly) == 0)
   2.126 +             return one;
   2.127 +
   2.128 +         /* +-NaN return x+y */
   2.129 +         if (ix > 0x7ff00000 || ((ix == 0x7ff00000) && (lx != 0)) ||
   2.130 +             iy > 0x7ff00000 || ((iy == 0x7ff00000) && (ly != 0)))
   2.131 +             return x + y;
   2.132 +
   2.133 +         /* determine if y is an odd int when x < 0
   2.134 +          * yisint = 0       ... y is not an integer
   2.135 +          * yisint = 1       ... y is an odd int
   2.136 +          * yisint = 2       ... y is an even int
   2.137 +          */
   2.138 +         yisint = 0;
   2.139 +         if (hx < 0) {
   2.140 +             if (iy >= 0x43400000)
   2.141 +                 yisint = 2;    /* even integer y */
   2.142 +             else if (iy >= 0x3ff00000) {
   2.143 +                 k = (iy >> 20) - 0x3ff;        /* exponent */
   2.144 +                 if (k > 20) {
   2.145 +                     j = ly >> (52 - k);
   2.146 +                     if ((j << (52 - k)) == ly)
   2.147 +                         yisint = 2 - (j & 1);
   2.148 +                 } else if (ly == 0) {
   2.149 +                     j = iy >> (20 - k);
   2.150 +                     if ((j << (20 - k)) == iy)
   2.151 +                         yisint = 2 - (j & 1);
   2.152 +                 }
   2.153 +             }
   2.154 +         }
   2.155 +
   2.156 +         /* special value of y */
   2.157 +         if (ly == 0) {
   2.158 +             if (iy == 0x7ff00000) {    /* y is +-inf */
   2.159 +                 if (((ix - 0x3ff00000) | lx) == 0)
   2.160 +                     return y - y;      /* inf**+-1 is NaN */
   2.161 +                 else if (ix >= 0x3ff00000)     /* (|x|>1)**+-inf = inf,0 */
   2.162 +                     return (hy >= 0) ? y : zero;
   2.163 +                 else           /* (|x|<1)**-,+inf = inf,0 */
   2.164 +                     return (hy < 0) ? -y : zero;
   2.165 +             }
   2.166 +             if (iy == 0x3ff00000) {    /* y is  +-1 */
   2.167 +                 if (hy < 0)
   2.168 +                     return one / x;
   2.169 +                 else
   2.170 +                     return x;
   2.171 +             }
   2.172 +             if (hy == 0x40000000)
   2.173 +                 return x * x;  /* y is  2 */
   2.174 +             if (hy == 0x3fe00000) {    /* y is  0.5 */
   2.175 +                 if (hx >= 0)   /* x >= +0 */
   2.176 +                     return __ieee754_sqrt(x);
   2.177 +             }
   2.178 +         }
   2.179 +
   2.180 +         ax = fabs(x);
   2.181 +         /* special value of x */
   2.182 +         if (lx == 0) {
   2.183 +             if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000) {
   2.184 +                 z = ax;        /*x is +-0,+-inf,+-1 */
   2.185 +                 if (hy < 0)
   2.186 +                     z = one / z;       /* z = (1/|x|) */
   2.187 +                 if (hx < 0) {
   2.188 +                     if (((ix - 0x3ff00000) | yisint) == 0) {
   2.189 +                         z = (z - z) / (z - z); /* (-1)**non-int is NaN */
   2.190 +                     } else if (yisint == 1)
   2.191 +                         z = -z;        /* (x<0)**odd = -(|x|**odd) */
   2.192 +                 }
   2.193 +                 return z;
   2.194 +             }
   2.195 +         }
   2.196 +
   2.197 +         /* (x<0)**(non-int) is NaN */
   2.198 +         if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
   2.199 +             return (x - x) / (x - x);
   2.200 +
   2.201 +         /* |y| is huge */
   2.202 +         if (iy > 0x41e00000) { /* if |y| > 2**31 */
   2.203 +             if (iy > 0x43f00000) {     /* if |y| > 2**64, must o/uflow */
   2.204 +                 if (ix <= 0x3fefffff)
   2.205 +                     return (hy < 0) ? huge * huge : tiny * tiny;
   2.206 +                 if (ix >= 0x3ff00000)
   2.207 +                     return (hy > 0) ? huge * huge : tiny * tiny;
   2.208 +             }
   2.209 +             /* over/underflow if x is not close to one */
   2.210 +             if (ix < 0x3fefffff)
   2.211 +                 return (hy < 0) ? huge * huge : tiny * tiny;
   2.212 +             if (ix > 0x3ff00000)
   2.213 +                 return (hy > 0) ? huge * huge : tiny * tiny;
   2.214 +             /* now |1-x| is tiny <= 2**-20, suffice to compute
   2.215 +                log(x) by x-x^2/2+x^3/3-x^4/4 */
   2.216 +             t = x - 1;         /* t has 20 trailing zeros */
   2.217 +             w = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25));
   2.218 +             u = ivln2_h * t;   /* ivln2_h has 21 sig. bits */
   2.219 +             v = t * ivln2_l - w * ivln2;
   2.220 +             t1 = u + v;
   2.221 +             SET_LOW_WORD(t1, 0);
   2.222 +             t2 = v - (t1 - u);
   2.223 +         } else {
   2.224 +             double s2, s_h, s_l, t_h, t_l;
   2.225 +             n = 0;
   2.226 +             /* take care subnormal number */
   2.227 +             if (ix < 0x00100000) {
   2.228 +                 ax *= two53;
   2.229 +                 n -= 53;
   2.230 +                 GET_HIGH_WORD(ix, ax);
   2.231 +             }
   2.232 +             n += ((ix) >> 20) - 0x3ff;
   2.233 +             j = ix & 0x000fffff;
   2.234 +             /* determine interval */
   2.235 +             ix = j | 0x3ff00000;       /* normalize ix */
   2.236 +             if (j <= 0x3988E)
   2.237 +                 k = 0;         /* |x|<sqrt(3/2) */
   2.238 +             else if (j < 0xBB67A)
   2.239 +                 k = 1;         /* |x|<sqrt(3)   */
   2.240 +             else {
   2.241 +                 k = 0;
   2.242 +                 n += 1;
   2.243 +                 ix -= 0x00100000;
   2.244 +             }
   2.245 +             SET_HIGH_WORD(ax, ix);
   2.246 +
   2.247 +             /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
   2.248 +             u = ax - bp[k];    /* bp[0]=1.0, bp[1]=1.5 */
   2.249 +             v = one / (ax + bp[k]);
   2.250 +             s = u * v;
   2.251 +             s_h = s;
   2.252 +             SET_LOW_WORD(s_h, 0);
   2.253 +             /* t_h=ax+bp[k] High */
   2.254 +             t_h = zero;
   2.255 +             SET_HIGH_WORD(t_h,
   2.256 +                           ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18));
   2.257 +             t_l = ax - (t_h - bp[k]);
   2.258 +             s_l = v * ((u - s_h * t_h) - s_h * t_l);
   2.259 +             /* compute log(ax) */
   2.260 +             s2 = s * s;
   2.261 +             r = s2 * s2 * (L1 +
   2.262 +                            s2 * (L2 +
   2.263 +                                  s2 * (L3 +
   2.264 +                                        s2 * (L4 + s2 * (L5 + s2 * L6)))));
   2.265 +             r += s_l * (s_h + s);
   2.266 +             s2 = s_h * s_h;
   2.267 +             t_h = 3.0 + s2 + r;
   2.268 +             SET_LOW_WORD(t_h, 0);
   2.269 +             t_l = r - ((t_h - 3.0) - s2);
   2.270 +             /* u+v = s*(1+...) */
   2.271 +             u = s_h * t_h;
   2.272 +             v = s_l * t_h + t_l * s;
   2.273 +             /* 2/(3log2)*(s+...) */
   2.274 +             p_h = u + v;
   2.275 +             SET_LOW_WORD(p_h, 0);
   2.276 +             p_l = v - (p_h - u);
   2.277 +             z_h = cp_h * p_h;  /* cp_h+cp_l = 2/(3*log2) */
   2.278 +             z_l = cp_l * p_h + p_l * cp + dp_l[k];
   2.279 +             /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
   2.280 +             t = (double) n;
   2.281 +             t1 = (((z_h + z_l) + dp_h[k]) + t);
   2.282 +             SET_LOW_WORD(t1, 0);
   2.283 +             t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
   2.284 +         }
   2.285 +
   2.286 +         s = one;               /* s (sign of result -ve**odd) = -1 else = 1 */
   2.287 +         if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
   2.288 +             s = -one;          /* (-ve)**(odd int) */
   2.289 +
   2.290 +         /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
   2.291 +         y1 = y;
   2.292 +         SET_LOW_WORD(y1, 0);
   2.293 +         p_l = (y - y1) * t1 + y * t2;
   2.294 +         p_h = y1 * t1;
   2.295 +         z = p_l + p_h;
   2.296 +         EXTRACT_WORDS(j, i, z);
   2.297 +         if (j >= 0x40900000) { /* z >= 1024 */
   2.298 +             if (((j - 0x40900000) | i) != 0)   /* if z > 1024 */
   2.299 +                 return s * huge * huge;        /* overflow */
   2.300 +             else {
   2.301 +                 if (p_l + ovt > z - p_h)
   2.302 +                     return s * huge * huge;    /* overflow */
   2.303 +             }
   2.304 +         } else if ((j & 0x7fffffff) >= 0x4090cc00) {   /* z <= -1075 */
   2.305 +             if (((j - 0xc090cc00) | i) != 0)   /* z < -1075 */
   2.306 +                 return s * tiny * tiny;        /* underflow */
   2.307 +             else {
   2.308 +                 if (p_l <= z - p_h)
   2.309 +                     return s * tiny * tiny;    /* underflow */
   2.310 +             }
   2.311 +         }
   2.312 +         /*
   2.313 +          * compute 2**(p_h+p_l)
   2.314 +          */
   2.315 +         i = j & 0x7fffffff;
   2.316 +         k = (i >> 20) - 0x3ff;
   2.317 +         n = 0;
   2.318 +         if (i > 0x3fe00000) {  /* if |z| > 0.5, set n = [z+0.5] */
   2.319 +             n = j + (0x00100000 >> (k + 1));
   2.320 +             k = ((n & 0x7fffffff) >> 20) - 0x3ff;      /* new k for n */
   2.321 +             t = zero;
   2.322 +             SET_HIGH_WORD(t, n & ~(0x000fffff >> k));
   2.323 +             n = ((n & 0x000fffff) | 0x00100000) >> (20 - k);
   2.324 +             if (j < 0)
   2.325 +                 n = -n;
   2.326 +             p_h -= t;
   2.327 +         }
   2.328 +         t = p_l + p_h;
   2.329 +         SET_LOW_WORD(t, 0);
   2.330 +         u = t * lg2_h;
   2.331 +         v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
   2.332 +         z = u + v;
   2.333 +         w = v - (z - u);
   2.334 +         t = z * z;
   2.335 +         t1 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
   2.336 +         r = (z * t1) / (t1 - two) - (w + z * w);
   2.337 +         z = one - (r - z);
   2.338 +         GET_HIGH_WORD(j, z);
   2.339 +         j += (n << 20);
   2.340 +         if ((j >> 20) <= 0)
   2.341 +             z = scalbn(z, n);  /* subnormal output */
   2.342 +         else
   2.343 +             SET_HIGH_WORD(z, j);
   2.344 +         return s * z;
   2.345 +     }
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/src/libm/e_rem_pio2.c	Mon Sep 15 06:33:23 2008 +0000
     3.3 @@ -0,0 +1,201 @@
     3.4 +/* @(#)e_rem_pio2.c 5.1 93/09/24 */
     3.5 +/*
     3.6 + * ====================================================
     3.7 + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     3.8 + *
     3.9 + * Developed at SunPro, a Sun Microsystems, Inc. business.
    3.10 + * Permission to use, copy, modify, and distribute this
    3.11 + * software is freely granted, provided that this notice
    3.12 + * is preserved.
    3.13 + * ====================================================
    3.14 + */
    3.15 +
    3.16 +#if defined(LIBM_SCCS) && !defined(lint)
    3.17 +static char rcsid[] =
    3.18 +    "$NetBSD: e_rem_pio2.c,v 1.8 1995/05/10 20:46:02 jtc Exp $";
    3.19 +#endif
    3.20 +
    3.21 +/* __ieee754_rem_pio2(x,y)
    3.22 + *
    3.23 + * return the remainder of x rem pi/2 in y[0]+y[1]
    3.24 + * use __kernel_rem_pio2()
    3.25 + */
    3.26 +
    3.27 +#include "math.h"
    3.28 +#include "math_private.h"
    3.29 +
    3.30 +libm_hidden_proto(fabs)
    3.31 +
    3.32 +/*
    3.33 + * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
    3.34 + */
    3.35 +#ifdef __STDC__
    3.36 +     static const int32_t two_over_pi[] = {
    3.37 +#else
    3.38 +     static int32_t two_over_pi[] = {
    3.39 +#endif
    3.40 +         0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
    3.41 +         0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
    3.42 +         0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
    3.43 +         0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
    3.44 +         0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
    3.45 +         0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
    3.46 +         0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
    3.47 +         0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
    3.48 +         0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
    3.49 +         0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
    3.50 +         0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
    3.51 +     };
    3.52 +
    3.53 +#ifdef __STDC__
    3.54 +static const int32_t npio2_hw[] = {
    3.55 +#else
    3.56 +static int32_t npio2_hw[] = {
    3.57 +#endif
    3.58 +    0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
    3.59 +    0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
    3.60 +    0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
    3.61 +    0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
    3.62 +    0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
    3.63 +    0x404858EB, 0x404921FB,
    3.64 +};
    3.65 +
    3.66 +/*
    3.67 + * invpio2:  53 bits of 2/pi
    3.68 + * pio2_1:   first  33 bit of pi/2
    3.69 + * pio2_1t:  pi/2 - pio2_1
    3.70 + * pio2_2:   second 33 bit of pi/2
    3.71 + * pio2_2t:  pi/2 - (pio2_1+pio2_2)
    3.72 + * pio2_3:   third  33 bit of pi/2
    3.73 + * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
    3.74 + */
    3.75 +
    3.76 +#ifdef __STDC__
    3.77 +static const double
    3.78 +#else
    3.79 +static double
    3.80 +#endif
    3.81 +  zero = 0.00000000000000000000e+00,    /* 0x00000000, 0x00000000 */
    3.82 +    half = 5.00000000000000000000e-01,  /* 0x3FE00000, 0x00000000 */
    3.83 +    two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
    3.84 +    invpio2 = 6.36619772367581382433e-01,       /* 0x3FE45F30, 0x6DC9C883 */
    3.85 +    pio2_1 = 1.57079632673412561417e+00,        /* 0x3FF921FB, 0x54400000 */
    3.86 +    pio2_1t = 6.07710050650619224932e-11,       /* 0x3DD0B461, 0x1A626331 */
    3.87 +    pio2_2 = 6.07710050630396597660e-11,        /* 0x3DD0B461, 0x1A600000 */
    3.88 +    pio2_2t = 2.02226624879595063154e-21,       /* 0x3BA3198A, 0x2E037073 */
    3.89 +    pio2_3 = 2.02226624871116645580e-21,        /* 0x3BA3198A, 0x2E000000 */
    3.90 +    pio2_3t = 8.47842766036889956997e-32;       /* 0x397B839A, 0x252049C1 */
    3.91 +
    3.92 +#ifdef __STDC__
    3.93 +int32_t attribute_hidden
    3.94 +__ieee754_rem_pio2(double x, double *y)
    3.95 +#else
    3.96 +int32_t attribute_hidden
    3.97 +__ieee754_rem_pio2(x, y)
    3.98 +     double x, y[];
    3.99 +#endif
   3.100 +{
   3.101 +    double z = 0.0, w, t, r, fn;
   3.102 +    double tx[3];
   3.103 +    int32_t e0, i, j, nx, n, ix, hx;
   3.104 +    u_int32_t low;
   3.105 +
   3.106 +    GET_HIGH_WORD(hx, x);       /* high word of x */
   3.107 +    ix = hx & 0x7fffffff;
   3.108 +    if (ix <= 0x3fe921fb) {     /* |x| ~<= pi/4 , no need for reduction */
   3.109 +        y[0] = x;
   3.110 +        y[1] = 0;
   3.111 +        return 0;
   3.112 +    }
   3.113 +    if (ix < 0x4002d97c) {      /* |x| < 3pi/4, special case with n=+-1 */
   3.114 +        if (hx > 0) {
   3.115 +            z = x - pio2_1;
   3.116 +            if (ix != 0x3ff921fb) {     /* 33+53 bit pi is good enough */
   3.117 +                y[0] = z - pio2_1t;
   3.118 +                y[1] = (z - y[0]) - pio2_1t;
   3.119 +            } else {            /* near pi/2, use 33+33+53 bit pi */
   3.120 +                z -= pio2_2;
   3.121 +                y[0] = z - pio2_2t;
   3.122 +                y[1] = (z - y[0]) - pio2_2t;
   3.123 +            }
   3.124 +            return 1;
   3.125 +        } else {                /* negative x */
   3.126 +            z = x + pio2_1;
   3.127 +            if (ix != 0x3ff921fb) {     /* 33+53 bit pi is good enough */
   3.128 +                y[0] = z + pio2_1t;
   3.129 +                y[1] = (z - y[0]) + pio2_1t;
   3.130 +            } else {            /* near pi/2, use 33+33+53 bit pi */
   3.131 +                z += pio2_2;
   3.132 +                y[0] = z + pio2_2t;
   3.133 +                y[1] = (z - y[0]) + pio2_2t;
   3.134 +            }
   3.135 +            return -1;
   3.136 +        }
   3.137 +    }
   3.138 +    if (ix <= 0x413921fb) {     /* |x| ~<= 2^19*(pi/2), medium size */
   3.139 +        t = fabs(x);
   3.140 +        n = (int32_t) (t * invpio2 + half);
   3.141 +        fn = (double) n;
   3.142 +        r = t - fn * pio2_1;
   3.143 +        w = fn * pio2_1t;       /* 1st round good to 85 bit */
   3.144 +        if (n < 32 && ix != npio2_hw[n - 1]) {
   3.145 +            y[0] = r - w;       /* quick check no cancellation */
   3.146 +        } else {
   3.147 +            u_int32_t high;
   3.148 +            j = ix >> 20;
   3.149 +            y[0] = r - w;
   3.150 +            GET_HIGH_WORD(high, y[0]);
   3.151 +            i = j - ((high >> 20) & 0x7ff);
   3.152 +            if (i > 16) {       /* 2nd iteration needed, good to 118 */
   3.153 +                t = r;
   3.154 +                w = fn * pio2_2;
   3.155 +                r = t - w;
   3.156 +                w = fn * pio2_2t - ((t - r) - w);
   3.157 +                y[0] = r - w;
   3.158 +                GET_HIGH_WORD(high, y[0]);
   3.159 +                i = j - ((high >> 20) & 0x7ff);
   3.160 +                if (i > 49) {   /* 3rd iteration need, 151 bits acc */
   3.161 +                    t = r;      /* will cover all possible cases */
   3.162 +                    w = fn * pio2_3;
   3.163 +                    r = t - w;
   3.164 +                    w = fn * pio2_3t - ((t - r) - w);
   3.165 +                    y[0] = r - w;
   3.166 +                }
   3.167 +            }
   3.168 +        }
   3.169 +        y[1] = (r - y[0]) - w;
   3.170 +        if (hx < 0) {
   3.171 +            y[0] = -y[0];
   3.172 +            y[1] = -y[1];
   3.173 +            return -n;
   3.174 +        } else
   3.175 +            return n;
   3.176 +    }
   3.177 +    /*
   3.178 +     * all other (large) arguments
   3.179 +     */
   3.180 +    if (ix >= 0x7ff00000) {     /* x is inf or NaN */
   3.181 +        y[0] = y[1] = x - x;
   3.182 +        return 0;
   3.183 +    }
   3.184 +    /* set z = scalbn(|x|,ilogb(x)-23) */
   3.185 +    GET_LOW_WORD(low, x);
   3.186 +    SET_LOW_WORD(z, low);
   3.187 +    e0 = (ix >> 20) - 1046;     /* e0 = ilogb(z)-23; */
   3.188 +    SET_HIGH_WORD(z, ix - ((int32_t) (e0 << 20)));
   3.189 +    for (i = 0; i < 2; i++) {
   3.190 +        tx[i] = (double) ((int32_t) (z));
   3.191 +        z = (z - tx[i]) * two24;
   3.192 +    }
   3.193 +    tx[2] = z;
   3.194 +    nx = 3;
   3.195 +    while (tx[nx - 1] == zero)
   3.196 +        nx--;                   /* skip zero term */
   3.197 +    n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);
   3.198 +    if (hx < 0) {
   3.199 +        y[0] = -y[0];
   3.200 +        y[1] = -y[1];
   3.201 +        return -n;
   3.202 +    }
   3.203 +    return n;
   3.204 +}
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/src/libm/e_sqrt.c	Mon Sep 15 06:33:23 2008 +0000
     4.3 @@ -0,0 +1,463 @@
     4.4 +/* @(#)e_sqrt.c 5.1 93/09/24 */
     4.5 +/*
     4.6 + * ====================================================
     4.7 + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     4.8 + *
     4.9 + * Developed at SunPro, a Sun Microsystems, Inc. business.
    4.10 + * Permission to use, copy, modify, and distribute this
    4.11 + * software is freely granted, provided that this notice
    4.12 + * is preserved.
    4.13 + * ====================================================
    4.14 + */
    4.15 +
    4.16 +#if defined(LIBM_SCCS) && !defined(lint)
    4.17 +static char rcsid[] = "$NetBSD: e_sqrt.c,v 1.8 1995/05/10 20:46:17 jtc Exp $";
    4.18 +#endif
    4.19 +
    4.20 +/* __ieee754_sqrt(x)
    4.21 + * Return correctly rounded sqrt.
    4.22 + *           ------------------------------------------
    4.23 + *	     |  Use the hardware sqrt if you have one |
    4.24 + *           ------------------------------------------
    4.25 + * Method:
    4.26 + *   Bit by bit method using integer arithmetic. (Slow, but portable)
    4.27 + *   1. Normalization
    4.28 + *	Scale x to y in [1,4) with even powers of 2:
    4.29 + *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
    4.30 + *		sqrt(x) = 2^k * sqrt(y)
    4.31 + *   2. Bit by bit computation
    4.32 + *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
    4.33 + *	     i							 0
    4.34 + *                                     i+1         2
    4.35 + *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
    4.36 + *	     i      i            i                 i
    4.37 + *
    4.38 + *	To compute q    from q , one checks whether
    4.39 + *		    i+1       i
    4.40 + *
    4.41 + *			      -(i+1) 2
    4.42 + *			(q + 2      ) <= y.			(2)
    4.43 + *     			  i
    4.44 + *							      -(i+1)
    4.45 + *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
    4.46 + *		 	       i+1   i             i+1   i
    4.47 + *
    4.48 + *	With some algebric manipulation, it is not difficult to see
    4.49 + *	that (2) is equivalent to
    4.50 + *                             -(i+1)
    4.51 + *			s  +  2       <= y			(3)
    4.52 + *			 i                i
    4.53 + *
    4.54 + *	The advantage of (3) is that s  and y  can be computed by
    4.55 + *				      i      i
    4.56 + *	the following recurrence formula:
    4.57 + *	    if (3) is false
    4.58 + *
    4.59 + *	    s     =  s  ,	y    = y   ;			(4)
    4.60 + *	     i+1      i		 i+1    i
    4.61 + *
    4.62 + *	    otherwise,
    4.63 + *                         -i                     -(i+1)
    4.64 + *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
    4.65 + *           i+1      i          i+1    i     i
    4.66 + *
    4.67 + *	One may easily use induction to prove (4) and (5).
    4.68 + *	Note. Since the left hand side of (3) contain only i+2 bits,
    4.69 + *	      it does not necessary to do a full (53-bit) comparison
    4.70 + *	      in (3).
    4.71 + *   3. Final rounding
    4.72 + *	After generating the 53 bits result, we compute one more bit.
    4.73 + *	Together with the remainder, we can decide whether the
    4.74 + *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
    4.75 + *	(it will never equal to 1/2ulp).
    4.76 + *	The rounding mode can be detected by checking whether
    4.77 + *	huge + tiny is equal to huge, and whether huge - tiny is
    4.78 + *	equal to huge for some floating point number "huge" and "tiny".
    4.79 + *
    4.80 + * Special cases:
    4.81 + *	sqrt(+-0) = +-0 	... exact
    4.82 + *	sqrt(inf) = inf
    4.83 + *	sqrt(-ve) = NaN		... with invalid signal
    4.84 + *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
    4.85 + *
    4.86 + * Other methods : see the appended file at the end of the program below.
    4.87 + *---------------
    4.88 + */
    4.89 +
    4.90 +#include "math.h"
    4.91 +#include "math_private.h"
    4.92 +
    4.93 +#ifdef __STDC__
    4.94 +static const double one = 1.0, tiny = 1.0e-300;
    4.95 +#else
    4.96 +static double one = 1.0, tiny = 1.0e-300;
    4.97 +#endif
    4.98 +
    4.99 +#ifdef __STDC__
   4.100 +double attribute_hidden
   4.101 +__ieee754_sqrt(double x)
   4.102 +#else
   4.103 +double attribute_hidden
   4.104 +__ieee754_sqrt(x)
   4.105 +     double x;
   4.106 +#endif
   4.107 +{
   4.108 +    double z;
   4.109 +    int32_t sign = (int) 0x80000000;
   4.110 +    int32_t ix0, s0, q, m, t, i;
   4.111 +    u_int32_t r, t1, s1, ix1, q1;
   4.112 +
   4.113 +    EXTRACT_WORDS(ix0, ix1, x);
   4.114 +
   4.115 +    /* take care of Inf and NaN */
   4.116 +    if ((ix0 & 0x7ff00000) == 0x7ff00000) {
   4.117 +        return x * x + x;       /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
   4.118 +                                   sqrt(-inf)=sNaN */
   4.119 +    }
   4.120 +    /* take care of zero */
   4.121 +    if (ix0 <= 0) {
   4.122 +        if (((ix0 & (~sign)) | ix1) == 0)
   4.123 +            return x;           /* sqrt(+-0) = +-0 */
   4.124 +        else if (ix0 < 0)
   4.125 +            return (x - x) / (x - x);   /* sqrt(-ve) = sNaN */
   4.126 +    }
   4.127 +    /* normalize x */
   4.128 +    m = (ix0 >> 20);
   4.129 +    if (m == 0) {               /* subnormal x */
   4.130 +        while (ix0 == 0) {
   4.131 +            m -= 21;
   4.132 +            ix0 |= (ix1 >> 11);
   4.133 +            ix1 <<= 21;
   4.134 +        }
   4.135 +        for (i = 0; (ix0 & 0x00100000) == 0; i++)
   4.136 +            ix0 <<= 1;
   4.137 +        m -= i - 1;
   4.138 +        ix0 |= (ix1 >> (32 - i));
   4.139 +        ix1 <<= i;
   4.140 +    }
   4.141 +    m -= 1023;                  /* unbias exponent */
   4.142 +    ix0 = (ix0 & 0x000fffff) | 0x00100000;
   4.143 +    if (m & 1) {                /* odd m, double x to make it even */
   4.144 +        ix0 += ix0 + ((ix1 & sign) >> 31);
   4.145 +        ix1 += ix1;
   4.146 +    }
   4.147 +    m >>= 1;                    /* m = [m/2] */
   4.148 +
   4.149 +    /* generate sqrt(x) bit by bit */
   4.150 +    ix0 += ix0 + ((ix1 & sign) >> 31);
   4.151 +    ix1 += ix1;
   4.152 +    q = q1 = s0 = s1 = 0;       /* [q,q1] = sqrt(x) */
   4.153 +    r = 0x00200000;             /* r = moving bit from right to left */
   4.154 +
   4.155 +    while (r != 0) {
   4.156 +        t = s0 + r;
   4.157 +        if (t <= ix0) {
   4.158 +            s0 = t + r;
   4.159 +            ix0 -= t;
   4.160 +            q += r;
   4.161 +        }
   4.162 +        ix0 += ix0 + ((ix1 & sign) >> 31);
   4.163 +        ix1 += ix1;
   4.164 +        r >>= 1;
   4.165 +    }
   4.166 +
   4.167 +    r = sign;
   4.168 +    while (r != 0) {
   4.169 +        t1 = s1 + r;
   4.170 +        t = s0;
   4.171 +        if ((t < ix0) || ((t == ix0) && (t1 <= ix1))) {
   4.172 +            s1 = t1 + r;
   4.173 +            if (((t1 & sign) == sign) && (s1 & sign) == 0)
   4.174 +                s0 += 1;
   4.175 +            ix0 -= t;
   4.176 +            if (ix1 < t1)
   4.177 +                ix0 -= 1;
   4.178 +            ix1 -= t1;
   4.179 +            q1 += r;
   4.180 +        }
   4.181 +        ix0 += ix0 + ((ix1 & sign) >> 31);
   4.182 +        ix1 += ix1;
   4.183 +        r >>= 1;
   4.184 +    }
   4.185 +
   4.186 +    /* use floating add to find out rounding direction */
   4.187 +    if ((ix0 | ix1) != 0) {
   4.188 +        z = one - tiny;         /* trigger inexact flag */
   4.189 +        if (z >= one) {
   4.190 +            z = one + tiny;
   4.191 +            if (q1 == (u_int32_t) 0xffffffff) {
   4.192 +                q1 = 0;
   4.193 +                q += 1;
   4.194 +            } else if (z > one) {
   4.195 +                if (q1 == (u_int32_t) 0xfffffffe)
   4.196 +                    q += 1;
   4.197 +                q1 += 2;
   4.198 +            } else
   4.199 +                q1 += (q1 & 1);
   4.200 +        }
   4.201 +    }
   4.202 +    ix0 = (q >> 1) + 0x3fe00000;
   4.203 +    ix1 = q1 >> 1;
   4.204 +    if ((q & 1) == 1)
   4.205 +        ix1 |= sign;
   4.206 +    ix0 += (m << 20);
   4.207 +    INSERT_WORDS(z, ix0, ix1);
   4.208 +    return z;
   4.209 +}
   4.210 +
   4.211 +/*
   4.212 +Other methods  (use floating-point arithmetic)
   4.213 +-------------
   4.214 +(This is a copy of a drafted paper by Prof W. Kahan
   4.215 +and K.C. Ng, written in May, 1986)
   4.216 +
   4.217 +	Two algorithms are given here to implement sqrt(x)
   4.218 +	(IEEE double precision arithmetic) in software.
   4.219 +	Both supply sqrt(x) correctly rounded. The first algorithm (in
   4.220 +	Section A) uses newton iterations and involves four divisions.
   4.221 +	The second one uses reciproot iterations to avoid division, but
   4.222 +	requires more multiplications. Both algorithms need the ability
   4.223 +	to chop results of arithmetic operations instead of round them,
   4.224 +	and the INEXACT flag to indicate when an arithmetic operation
   4.225 +	is executed exactly with no roundoff error, all part of the
   4.226 +	standard (IEEE 754-1985). The ability to perform shift, add,
   4.227 +	subtract and logical AND operations upon 32-bit words is needed
   4.228 +	too, though not part of the standard.
   4.229 +
   4.230 +A.  sqrt(x) by Newton Iteration
   4.231 +
   4.232 +   (1)	Initial approximation
   4.233 +
   4.234 +	Let x0 and x1 be the leading and the trailing 32-bit words of
   4.235 +	a floating point number x (in IEEE double format) respectively
   4.236 +
   4.237 +	    1    11		     52				  ...widths
   4.238 +	   ------------------------------------------------------
   4.239 +	x: |s|	  e     |	      f				|
   4.240 +	   ------------------------------------------------------
   4.241 +	      msb    lsb  msb				      lsb ...order
   4.242 +
   4.243 +
   4.244 +	     ------------------------  	     ------------------------
   4.245 +	x0:  |s|   e    |    f1     |	 x1: |          f2           |
   4.246 +	     ------------------------  	     ------------------------
   4.247 +
   4.248 +	By performing shifts and subtracts on x0 and x1 (both regarded
   4.249 +	as integers), we obtain an 8-bit approximation of sqrt(x) as
   4.250 +	follows.
   4.251 +
   4.252 +		k  := (x0>>1) + 0x1ff80000;
   4.253 +		y0 := k - T1[31&(k>>15)].	... y ~ sqrt(x) to 8 bits
   4.254 +	Here k is a 32-bit integer and T1[] is an integer array containing
   4.255 +	correction terms. Now magically the floating value of y (y's
   4.256 +	leading 32-bit word is y0, the value of its trailing word is 0)
   4.257 +	approximates sqrt(x) to almost 8-bit.
   4.258 +
   4.259 +	Value of T1:
   4.260 +	static int T1[32]= {
   4.261 +	0,	1024,	3062,	5746,	9193,	13348,	18162,	23592,
   4.262 +	29598,	36145,	43202,	50740,	58733,	67158,	75992,	85215,
   4.263 +	83599,	71378,	60428,	50647,	41945,	34246,	27478,	21581,
   4.264 +	16499,	12183,	8588,	5674,	3403,	1742,	661,	130,};
   4.265 +
   4.266 +    (2)	Iterative refinement
   4.267 +
   4.268 +	Apply Heron's rule three times to y, we have y approximates
   4.269 +	sqrt(x) to within 1 ulp (Unit in the Last Place):
   4.270 +
   4.271 +		y := (y+x/y)/2		... almost 17 sig. bits
   4.272 +		y := (y+x/y)/2		... almost 35 sig. bits
   4.273 +		y := y-(y-x/y)/2	... within 1 ulp
   4.274 +
   4.275 +
   4.276 +	Remark 1.
   4.277 +	    Another way to improve y to within 1 ulp is:
   4.278 +
   4.279 +		y := (y+x/y)		... almost 17 sig. bits to 2*sqrt(x)
   4.280 +		y := y - 0x00100006	... almost 18 sig. bits to sqrt(x)
   4.281 +
   4.282 +				2
   4.283 +			    (x-y )*y
   4.284 +		y := y + 2* ----------	...within 1 ulp
   4.285 +			       2
   4.286 +			     3y  + x
   4.287 +
   4.288 +
   4.289 +	This formula has one division fewer than the one above; however,
   4.290 +	it requires more multiplications and additions. Also x must be
   4.291 +	scaled in advance to avoid spurious overflow in evaluating the
   4.292 +	expression 3y*y+x. Hence it is not recommended uless division
   4.293 +	is slow. If division is very slow, then one should use the
   4.294 +	reciproot algorithm given in section B.
   4.295 +
   4.296 +    (3) Final adjustment
   4.297 +
   4.298 +	By twiddling y's last bit it is possible to force y to be
   4.299 +	correctly rounded according to the prevailing rounding mode
   4.300 +	as follows. Let r and i be copies of the rounding mode and
   4.301 +	inexact flag before entering the square root program. Also we
   4.302 +	use the expression y+-ulp for the next representable floating
   4.303 +	numbers (up and down) of y. Note that y+-ulp = either fixed
   4.304 +	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
   4.305 +	mode.
   4.306 +
   4.307 +		I := FALSE;	... reset INEXACT flag I
   4.308 +		R := RZ;	... set rounding mode to round-toward-zero
   4.309 +		z := x/y;	... chopped quotient, possibly inexact
   4.310 +		If(not I) then {	... if the quotient is exact
   4.311 +		    if(z=y) {
   4.312 +		        I := i;	 ... restore inexact flag
   4.313 +		        R := r;  ... restore rounded mode
   4.314 +		        return sqrt(x):=y.
   4.315 +		    } else {
   4.316 +			z := z - ulp;	... special rounding
   4.317 +		    }
   4.318 +		}
   4.319 +		i := TRUE;		... sqrt(x) is inexact
   4.320 +		If (r=RN) then z=z+ulp	... rounded-to-nearest
   4.321 +		If (r=RP) then {	... round-toward-+inf
   4.322 +		    y = y+ulp; z=z+ulp;
   4.323 +		}
   4.324 +		y := y+z;		... chopped sum
   4.325 +		y0:=y0-0x00100000;	... y := y/2 is correctly rounded.
   4.326 +	        I := i;	 		... restore inexact flag
   4.327 +	        R := r;  		... restore rounded mode
   4.328 +	        return sqrt(x):=y.
   4.329 +
   4.330 +    (4)	Special cases
   4.331 +
   4.332 +	Square root of +inf, +-0, or NaN is itself;
   4.333 +	Square root of a negative number is NaN with invalid signal.
   4.334 +
   4.335 +
   4.336 +B.  sqrt(x) by Reciproot Iteration
   4.337 +
   4.338 +   (1)	Initial approximation
   4.339 +
   4.340 +	Let x0 and x1 be the leading and the trailing 32-bit words of
   4.341 +	a floating point number x (in IEEE double format) respectively
   4.342 +	(see section A). By performing shifs and subtracts on x0 and y0,
   4.343 +	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
   4.344 +
   4.345 +	    k := 0x5fe80000 - (x0>>1);
   4.346 +	    y0:= k - T2[63&(k>>14)].	... y ~ 1/sqrt(x) to 7.8 bits
   4.347 +
   4.348 +	Here k is a 32-bit integer and T2[] is an integer array
   4.349 +	containing correction terms. Now magically the floating
   4.350 +	value of y (y's leading 32-bit word is y0, the value of
   4.351 +	its trailing word y1 is set to zero) approximates 1/sqrt(x)
   4.352 +	to almost 7.8-bit.
   4.353 +
   4.354 +	Value of T2:
   4.355 +	static int T2[64]= {
   4.356 +	0x1500,	0x2ef8,	0x4d67,	0x6b02,	0x87be,	0xa395,	0xbe7a,	0xd866,
   4.357 +	0xf14a,	0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
   4.358 +	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
   4.359 +	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
   4.360 +	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
   4.361 +	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
   4.362 +	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
   4.363 +	0x1527f,0x1334a,0x11051,0xe951,	0xbe01,	0x8e0d,	0x5924,	0x1edd,};
   4.364 +
   4.365 +    (2)	Iterative refinement
   4.366 +
   4.367 +	Apply Reciproot iteration three times to y and multiply the
   4.368 +	result by x to get an approximation z that matches sqrt(x)
   4.369 +	to about 1 ulp. To be exact, we will have
   4.370 +		-1ulp < sqrt(x)-z<1.0625ulp.
   4.371 +
   4.372 +	... set rounding mode to Round-to-nearest
   4.373 +	   y := y*(1.5-0.5*x*y*y)	... almost 15 sig. bits to 1/sqrt(x)
   4.374 +	   y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
   4.375 +	... special arrangement for better accuracy
   4.376 +	   z := x*y			... 29 bits to sqrt(x), with z*y<1
   4.377 +	   z := z + 0.5*z*(1-z*y)	... about 1 ulp to sqrt(x)
   4.378 +
   4.379 +	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
   4.380 +	(a) the term z*y in the final iteration is always less than 1;
   4.381 +	(b) the error in the final result is biased upward so that
   4.382 +		-1 ulp < sqrt(x) - z < 1.0625 ulp
   4.383 +	    instead of |sqrt(x)-z|<1.03125ulp.
   4.384 +
   4.385 +    (3)	Final adjustment
   4.386 +
   4.387 +	By twiddling y's last bit it is possible to force y to be
   4.388 +	correctly rounded according to the prevailing rounding mode
   4.389 +	as follows. Let r and i be copies of the rounding mode and
   4.390 +	inexact flag before entering the square root program. Also we
   4.391 +	use the expression y+-ulp for the next representable floating
   4.392 +	numbers (up and down) of y. Note that y+-ulp = either fixed
   4.393 +	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
   4.394 +	mode.
   4.395 +
   4.396 +	R := RZ;		... set rounding mode to round-toward-zero
   4.397 +	switch(r) {
   4.398 +	    case RN:		... round-to-nearest
   4.399 +	       if(x<= z*(z-ulp)...chopped) z = z - ulp; else
   4.400 +	       if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
   4.401 +	       break;
   4.402 +	    case RZ:case RM:	... round-to-zero or round-to--inf
   4.403 +	       R:=RP;		... reset rounding mod to round-to-+inf
   4.404 +	       if(x<z*z ... rounded up) z = z - ulp; else
   4.405 +	       if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
   4.406 +	       break;
   4.407 +	    case RP:		... round-to-+inf
   4.408 +	       if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
   4.409 +	       if(x>z*z ...chopped) z = z+ulp;
   4.410 +	       break;
   4.411 +	}
   4.412 +
   4.413 +	Remark 3. The above comparisons can be done in fixed point. For
   4.414 +	example, to compare x and w=z*z chopped, it suffices to compare
   4.415 +	x1 and w1 (the trailing parts of x and w), regarding them as
   4.416 +	two's complement integers.
   4.417 +
   4.418 +	...Is z an exact square root?
   4.419 +	To determine whether z is an exact square root of x, let z1 be the
   4.420 +	trailing part of z, and also let x0 and x1 be the leading and
   4.421 +	trailing parts of x.
   4.422 +
   4.423 +	If ((z1&0x03ffffff)!=0)	... not exact if trailing 26 bits of z!=0
   4.424 +	    I := 1;		... Raise Inexact flag: z is not exact
   4.425 +	else {
   4.426 +	    j := 1 - [(x0>>20)&1]	... j = logb(x) mod 2
   4.427 +	    k := z1 >> 26;		... get z's 25-th and 26-th
   4.428 +					    fraction bits
   4.429 +	    I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
   4.430 +	}
   4.431 +	R:= r		... restore rounded mode
   4.432 +	return sqrt(x):=z.
   4.433 +
   4.434 +	If multiplication is cheaper then the foregoing red tape, the
   4.435 +	Inexact flag can be evaluated by
   4.436 +
   4.437 +	    I := i;
   4.438 +	    I := (z*z!=x) or I.
   4.439 +
   4.440 +	Note that z*z can overwrite I; this value must be sensed if it is
   4.441 +	True.
   4.442 +
   4.443 +	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
   4.444 +	zero.
   4.445 +
   4.446 +		    --------------------
   4.447 +		z1: |        f2        |
   4.448 +		    --------------------
   4.449 +		bit 31		   bit 0
   4.450 +
   4.451 +	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
   4.452 +	or even of logb(x) have the following relations:
   4.453 +
   4.454 +	-------------------------------------------------
   4.455 +	bit 27,26 of z1		bit 1,0 of x1	logb(x)
   4.456 +	-------------------------------------------------
   4.457 +	00			00		odd and even
   4.458 +	01			01		even
   4.459 +	10			10		odd
   4.460 +	10			00		even
   4.461 +	11			01		even
   4.462 +	-------------------------------------------------
   4.463 +
   4.464 +    (4)	Special cases (see (4) of Section A).
   4.465 +
   4.466 + */
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/src/libm/k_cos.c	Mon Sep 15 06:33:23 2008 +0000
     5.3 @@ -0,0 +1,99 @@
     5.4 +/* @(#)k_cos.c 5.1 93/09/24 */
     5.5 +/*
     5.6 + * ====================================================
     5.7 + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     5.8 + *
     5.9 + * Developed at SunPro, a Sun Microsystems, Inc. business.
    5.10 + * Permission to use, copy, modify, and distribute this
    5.11 + * software is freely granted, provided that this notice
    5.12 + * is preserved.
    5.13 + * ====================================================
    5.14 + */
    5.15 +
    5.16 +#if defined(LIBM_SCCS) && !defined(lint)
    5.17 +static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $";
    5.18 +#endif
    5.19 +
    5.20 +/*
    5.21 + * __kernel_cos( x,  y )
    5.22 + * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
    5.23 + * Input x is assumed to be bounded by ~pi/4 in magnitude.
    5.24 + * Input y is the tail of x.
    5.25 + *
    5.26 + * Algorithm
    5.27 + *	1. Since cos(-x) = cos(x), we need only to consider positive x.
    5.28 + *	2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
    5.29 + *	3. cos(x) is approximated by a polynomial of degree 14 on
    5.30 + *	   [0,pi/4]
    5.31 + *		  	                 4            14
    5.32 + *	   	cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
    5.33 + *	   where the remez error is
    5.34 + *
    5.35 + * 	|              2     4     6     8     10    12     14 |     -58
    5.36 + * 	|cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
    5.37 + * 	|    					               |
    5.38 + *
    5.39 + * 	               4     6     8     10    12     14
    5.40 + *	4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
    5.41 + *	       cos(x) = 1 - x*x/2 + r
    5.42 + *	   since cos(x+y) ~ cos(x) - sin(x)*y
    5.43 + *			  ~ cos(x) - x*y,
    5.44 + *	   a correction term is necessary in cos(x) and hence
    5.45 + *		cos(x+y) = 1 - (x*x/2 - (r - x*y))
    5.46 + *	   For better accuracy when x > 0.3, let qx = |x|/4 with
    5.47 + *	   the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
    5.48 + *	   Then
    5.49 + *		cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
    5.50 + *	   Note that 1-qx and (x*x/2-qx) is EXACT here, and the
    5.51 + *	   magnitude of the latter is at least a quarter of x*x/2,
    5.52 + *	   thus, reducing the rounding error in the subtraction.
    5.53 + */
    5.54 +
    5.55 +#include "math.h"
    5.56 +#include "math_private.h"
    5.57 +
    5.58 +#ifdef __STDC__
    5.59 +static const double
    5.60 +#else
    5.61 +static double
    5.62 +#endif
    5.63 +  one = 1.00000000000000000000e+00,     /* 0x3FF00000, 0x00000000 */
    5.64 +    C1 = 4.16666666666666019037e-02,    /* 0x3FA55555, 0x5555554C */
    5.65 +    C2 = -1.38888888888741095749e-03,   /* 0xBF56C16C, 0x16C15177 */
    5.66 +    C3 = 2.48015872894767294178e-05,    /* 0x3EFA01A0, 0x19CB1590 */
    5.67 +    C4 = -2.75573143513906633035e-07,   /* 0xBE927E4F, 0x809C52AD */
    5.68 +    C5 = 2.08757232129817482790e-09,    /* 0x3E21EE9E, 0xBDB4B1C4 */
    5.69 +    C6 = -1.13596475577881948265e-11;   /* 0xBDA8FAE9, 0xBE8838D4 */
    5.70 +
    5.71 +#ifdef __STDC__
    5.72 +double attribute_hidden
    5.73 +__kernel_cos(double x, double y)
    5.74 +#else
    5.75 +double attribute_hidden
    5.76 +__kernel_cos(x, y)
    5.77 +     double x, y;
    5.78 +#endif
    5.79 +{
    5.80 +    double a, hz, z, r, qx;
    5.81 +    int32_t ix;
    5.82 +    GET_HIGH_WORD(ix, x);
    5.83 +    ix &= 0x7fffffff;           /* ix = |x|'s high word */
    5.84 +    if (ix < 0x3e400000) {      /* if x < 2**27 */
    5.85 +        if (((int) x) == 0)
    5.86 +            return one;         /* generate inexact */
    5.87 +    }
    5.88 +    z = x * x;
    5.89 +    r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
    5.90 +    if (ix < 0x3FD33333)        /* if |x| < 0.3 */
    5.91 +        return one - (0.5 * z - (z * r - x * y));
    5.92 +    else {
    5.93 +        if (ix > 0x3fe90000) {  /* x > 0.78125 */
    5.94 +            qx = 0.28125;
    5.95 +        } else {
    5.96 +            INSERT_WORDS(qx, ix - 0x00200000, 0);       /* x/4 */
    5.97 +        }
    5.98 +        hz = 0.5 * z - qx;
    5.99 +        a = one - qx;
   5.100 +        return a - (hz - (z * r - x * y));
   5.101 +    }
   5.102 +}
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/src/libm/k_sin.c	Mon Sep 15 06:33:23 2008 +0000
     6.3 @@ -0,0 +1,86 @@
     6.4 +/* @(#)k_sin.c 5.1 93/09/24 */
     6.5 +/*
     6.6 + * ====================================================
     6.7 + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     6.8 + *
     6.9 + * Developed at SunPro, a Sun Microsystems, Inc. business.
    6.10 + * Permission to use, copy, modify, and distribute this
    6.11 + * software is freely granted, provided that this notice
    6.12 + * is preserved.
    6.13 + * ====================================================
    6.14 + */
    6.15 +
    6.16 +#if defined(LIBM_SCCS) && !defined(lint)
    6.17 +static char rcsid[] = "$NetBSD: k_sin.c,v 1.8 1995/05/10 20:46:31 jtc Exp $";
    6.18 +#endif
    6.19 +
    6.20 +/* __kernel_sin( x, y, iy)
    6.21 + * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
    6.22 + * Input x is assumed to be bounded by ~pi/4 in magnitude.
    6.23 + * Input y is the tail of x.
    6.24 + * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
    6.25 + *
    6.26 + * Algorithm
    6.27 + *	1. Since sin(-x) = -sin(x), we need only to consider positive x.
    6.28 + *	2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
    6.29 + *	3. sin(x) is approximated by a polynomial of degree 13 on
    6.30 + *	   [0,pi/4]
    6.31 + *		  	         3            13
    6.32 + *	   	sin(x) ~ x + S1*x + ... + S6*x
    6.33 + *	   where
    6.34 + *
    6.35 + * 	|sin(x)         2     4     6     8     10     12  |     -58
    6.36 + * 	|----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
    6.37 + * 	|  x 					           |
    6.38 + *
    6.39 + *	4. sin(x+y) = sin(x) + sin'(x')*y
    6.40 + *		    ~ sin(x) + (1-x*x/2)*y
    6.41 + *	   For better accuracy, let
    6.42 + *		     3      2      2      2      2
    6.43 + *		r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
    6.44 + *	   then                   3    2
    6.45 + *		sin(x) = x + (S1*x + (x *(r-y/2)+y))
    6.46 + */
    6.47 +
    6.48 +#include "math.h"
    6.49 +#include "math_private.h"
    6.50 +
    6.51 +#ifdef __STDC__
    6.52 +static const double
    6.53 +#else
    6.54 +static double
    6.55 +#endif
    6.56 +  half = 5.00000000000000000000e-01,    /* 0x3FE00000, 0x00000000 */
    6.57 +    S1 = -1.66666666666666324348e-01,   /* 0xBFC55555, 0x55555549 */
    6.58 +    S2 = 8.33333333332248946124e-03,    /* 0x3F811111, 0x1110F8A6 */
    6.59 +    S3 = -1.98412698298579493134e-04,   /* 0xBF2A01A0, 0x19C161D5 */
    6.60 +    S4 = 2.75573137070700676789e-06,    /* 0x3EC71DE3, 0x57B1FE7D */
    6.61 +    S5 = -2.50507602534068634195e-08,   /* 0xBE5AE5E6, 0x8A2B9CEB */
    6.62 +    S6 = 1.58969099521155010221e-10;    /* 0x3DE5D93A, 0x5ACFD57C */
    6.63 +
    6.64 +#ifdef __STDC__
    6.65 +double attribute_hidden
    6.66 +__kernel_sin(double x, double y, int iy)
    6.67 +#else
    6.68 +double attribute_hidden
    6.69 +__kernel_sin(x, y, iy)
    6.70 +     double x, y;
    6.71 +     int iy;                    /* iy=0 if y is zero */
    6.72 +#endif
    6.73 +{
    6.74 +    double z, r, v;
    6.75 +    int32_t ix;
    6.76 +    GET_HIGH_WORD(ix, x);
    6.77 +    ix &= 0x7fffffff;           /* high word of x */
    6.78 +    if (ix < 0x3e400000) {      /* |x| < 2**-27 */
    6.79 +        if ((int) x == 0)
    6.80 +            return x;
    6.81 +    }                           /* generate inexact */
    6.82 +    z = x * x;
    6.83 +    v = z * x;
    6.84 +    r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
    6.85 +    if (iy == 0)
    6.86 +        return x + v * (S1 + z * r);
    6.87 +    else
    6.88 +        return x - ((z * (half * y - v * r) - y) - v * S1);
    6.89 +}
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/src/libm/math.h	Mon Sep 15 06:33:23 2008 +0000
     7.3 @@ -0,0 +1,44 @@
     7.4 +/*
     7.5 +    SDL - Simple DirectMedia Layer
     7.6 +    Copyright (C) 1997-2006 Sam Lantinga
     7.7 +
     7.8 +    This library is free software; you can redistribute it and/or
     7.9 +    modify it under the terms of the GNU Lesser General Public
    7.10 +    License as published by the Free Software Foundation; either
    7.11 +    version 2.1 of the License, or (at your option) any later version.
    7.12 +
    7.13 +    This library is distributed in the hope that it will be useful,
    7.14 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
    7.15 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    7.16 +    Lesser General Public License for more details.
    7.17 +
    7.18 +    You should have received a copy of the GNU Lesser General Public
    7.19 +    License along with this library; if not, write to the Free Software
    7.20 +    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    7.21 +
    7.22 +    Sam Lantinga
    7.23 +    slouken@libsdl.org
    7.24 +*/
    7.25 +#include "SDL_config.h"
    7.26 +
    7.27 +#ifdef HAVE_MATH_H
    7.28 +#include <math.h>
    7.29 +#else
    7.30 +
    7.31 +extern double __ieee754_log(double x);
    7.32 +extern double __ieee754_pow(double x, double y);
    7.33 +extern double __ieee754_sqrt(double x);
    7.34 +
    7.35 +#define log(x)      __ieee754_log(x)
    7.36 +#define pow(x, y)   __ieee754_pow(x, y)
    7.37 +#define sqrt(x)     __ieee754_sqrt(x)
    7.38 +
    7.39 +extern double copysign(double x, double y);
    7.40 +extern double cos(double x);
    7.41 +extern double fabs(double x);
    7.42 +extern double scalbn(double x, int n);
    7.43 +extern double sin(double x);
    7.44 +
    7.45 +#endif /* HAVE_MATH_H */
    7.46 +
    7.47 +/* vi: set ts=4 sw=4 expandtab: */
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/src/libm/math_private.h	Mon Sep 15 06:33:23 2008 +0000
     8.3 @@ -0,0 +1,204 @@
     8.4 +/*
     8.5 + * ====================================================
     8.6 + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     8.7 + *
     8.8 + * Developed at SunPro, a Sun Microsystems, Inc. business.
     8.9 + * Permission to use, copy, modify, and distribute this
    8.10 + * software is freely granted, provided that this notice
    8.11 + * is preserved.
    8.12 + * ====================================================
    8.13 + */
    8.14 +
    8.15 +/*
    8.16 + * from: @(#)fdlibm.h 5.1 93/09/24
    8.17 + * $Id: math_private.h,v 1.3 2004/02/09 07:10:38 andersen Exp $
    8.18 + */
    8.19 +
    8.20 +#ifndef _MATH_PRIVATE_H_
    8.21 +#define _MATH_PRIVATE_H_
    8.22 +
    8.23 +/*#include <endian.h>*/
    8.24 +#include <sys/types.h>
    8.25 +
    8.26 +#define attribute_hidden
    8.27 +#define libm_hidden_proto(x)
    8.28 +#define libm_hidden_def(x)
    8.29 +
    8.30 +/* The original fdlibm code used statements like:
    8.31 +	n0 = ((*(int*)&one)>>29)^1;		* index of high word *
    8.32 +	ix0 = *(n0+(int*)&x);			* high word of x *
    8.33 +	ix1 = *((1-n0)+(int*)&x);		* low word of x *
    8.34 +   to dig two 32 bit words out of the 64 bit IEEE floating point
    8.35 +   value.  That is non-ANSI, and, moreover, the gcc instruction
    8.36 +   scheduler gets it wrong.  We instead use the following macros.
    8.37 +   Unlike the original code, we determine the endianness at compile
    8.38 +   time, not at run time; I don't see much benefit to selecting
    8.39 +   endianness at run time.  */
    8.40 +
    8.41 +/* A union which permits us to convert between a double and two 32 bit
    8.42 +   ints.  */
    8.43 +
    8.44 +/*
    8.45 + * Math on arm is special:
    8.46 + * For FPA, float words are always big-endian.
    8.47 + * For VFP, floats words follow the memory system mode.
    8.48 + */
    8.49 +
    8.50 +#if (__BYTE_ORDER == __BIG_ENDIAN) || \
    8.51 +    (!defined(__VFP_FP__) && (defined(__arm__) || defined(__thumb__)))
    8.52 +
    8.53 +typedef union
    8.54 +{
    8.55 +    double value;
    8.56 +    struct
    8.57 +    {
    8.58 +        u_int32_t msw;
    8.59 +        u_int32_t lsw;
    8.60 +    } parts;
    8.61 +} ieee_double_shape_type;
    8.62 +
    8.63 +#else
    8.64 +
    8.65 +typedef union
    8.66 +{
    8.67 +    double value;
    8.68 +    struct
    8.69 +    {
    8.70 +        u_int32_t lsw;
    8.71 +        u_int32_t msw;
    8.72 +    } parts;
    8.73 +} ieee_double_shape_type;
    8.74 +
    8.75 +#endif
    8.76 +
    8.77 +/* Get two 32 bit ints from a double.  */
    8.78 +
    8.79 +#define EXTRACT_WORDS(ix0,ix1,d)				\
    8.80 +do {								\
    8.81 +  ieee_double_shape_type ew_u;					\
    8.82 +  ew_u.value = (d);						\
    8.83 +  (ix0) = ew_u.parts.msw;					\
    8.84 +  (ix1) = ew_u.parts.lsw;					\
    8.85 +} while (0)
    8.86 +
    8.87 +/* Get the more significant 32 bit int from a double.  */
    8.88 +
    8.89 +#define GET_HIGH_WORD(i,d)					\
    8.90 +do {								\
    8.91 +  ieee_double_shape_type gh_u;					\
    8.92 +  gh_u.value = (d);						\
    8.93 +  (i) = gh_u.parts.msw;						\
    8.94 +} while (0)
    8.95 +
    8.96 +/* Get the less significant 32 bit int from a double.  */
    8.97 +
    8.98 +#define GET_LOW_WORD(i,d)					\
    8.99 +do {								\
   8.100 +  ieee_double_shape_type gl_u;					\
   8.101 +  gl_u.value = (d);						\
   8.102 +  (i) = gl_u.parts.lsw;						\
   8.103 +} while (0)
   8.104 +
   8.105 +/* Set a double from two 32 bit ints.  */
   8.106 +
   8.107 +#define INSERT_WORDS(d,ix0,ix1)					\
   8.108 +do {								\
   8.109 +  ieee_double_shape_type iw_u;					\
   8.110 +  iw_u.parts.msw = (ix0);					\
   8.111 +  iw_u.parts.lsw = (ix1);					\
   8.112 +  (d) = iw_u.value;						\
   8.113 +} while (0)
   8.114 +
   8.115 +/* Set the more significant 32 bits of a double from an int.  */
   8.116 +
   8.117 +#define SET_HIGH_WORD(d,v)					\
   8.118 +do {								\
   8.119 +  ieee_double_shape_type sh_u;					\
   8.120 +  sh_u.value = (d);						\
   8.121 +  sh_u.parts.msw = (v);						\
   8.122 +  (d) = sh_u.value;						\
   8.123 +} while (0)
   8.124 +
   8.125 +/* Set the less significant 32 bits of a double from an int.  */
   8.126 +
   8.127 +#define SET_LOW_WORD(d,v)					\
   8.128 +do {								\
   8.129 +  ieee_double_shape_type sl_u;					\
   8.130 +  sl_u.value = (d);						\
   8.131 +  sl_u.parts.lsw = (v);						\
   8.132 +  (d) = sl_u.value;						\
   8.133 +} while (0)
   8.134 +
   8.135 +/* A union which permits us to convert between a float and a 32 bit
   8.136 +   int.  */
   8.137 +
   8.138 +typedef union
   8.139 +{
   8.140 +    float value;
   8.141 +    u_int32_t word;
   8.142 +} ieee_float_shape_type;
   8.143 +
   8.144 +/* Get a 32 bit int from a float.  */
   8.145 +
   8.146 +#define GET_FLOAT_WORD(i,d)					\
   8.147 +do {								\
   8.148 +  ieee_float_shape_type gf_u;					\
   8.149 +  gf_u.value = (d);						\
   8.150 +  (i) = gf_u.word;						\
   8.151 +} while (0)
   8.152 +
   8.153 +/* Set a float from a 32 bit int.  */
   8.154 +
   8.155 +#define SET_FLOAT_WORD(d,i)					\
   8.156 +do {								\
   8.157 +  ieee_float_shape_type sf_u;					\
   8.158 +  sf_u.word = (i);						\
   8.159 +  (d) = sf_u.value;						\
   8.160 +} while (0)
   8.161 +
   8.162 +/* ieee style elementary functions */
   8.163 +extern double
   8.164 +__ieee754_sqrt(double)
   8.165 +    attribute_hidden;
   8.166 +     extern double __ieee754_acos(double) attribute_hidden;
   8.167 +     extern double __ieee754_acosh(double) attribute_hidden;
   8.168 +     extern double __ieee754_log(double) attribute_hidden;
   8.169 +     extern double __ieee754_atanh(double) attribute_hidden;
   8.170 +     extern double __ieee754_asin(double) attribute_hidden;
   8.171 +     extern double __ieee754_atan2(double, double) attribute_hidden;
   8.172 +     extern double __ieee754_exp(double) attribute_hidden;
   8.173 +     extern double __ieee754_cosh(double) attribute_hidden;
   8.174 +     extern double __ieee754_fmod(double, double) attribute_hidden;
   8.175 +     extern double __ieee754_pow(double, double) attribute_hidden;
   8.176 +     extern double __ieee754_lgamma_r(double, int *) attribute_hidden;
   8.177 +     extern double __ieee754_gamma_r(double, int *) attribute_hidden;
   8.178 +     extern double __ieee754_lgamma(double) attribute_hidden;
   8.179 +     extern double __ieee754_gamma(double) attribute_hidden;
   8.180 +     extern double __ieee754_log10(double) attribute_hidden;
   8.181 +     extern double __ieee754_sinh(double) attribute_hidden;
   8.182 +     extern double __ieee754_hypot(double, double) attribute_hidden;
   8.183 +     extern double __ieee754_j0(double) attribute_hidden;
   8.184 +     extern double __ieee754_j1(double) attribute_hidden;
   8.185 +     extern double __ieee754_y0(double) attribute_hidden;
   8.186 +     extern double __ieee754_y1(double) attribute_hidden;
   8.187 +     extern double __ieee754_jn(int, double) attribute_hidden;
   8.188 +     extern double __ieee754_yn(int, double) attribute_hidden;
   8.189 +     extern double __ieee754_remainder(double, double) attribute_hidden;
   8.190 +     extern int __ieee754_rem_pio2(double, double *) attribute_hidden;
   8.191 +#if defined(_SCALB_INT)
   8.192 +     extern double __ieee754_scalb(double, int) attribute_hidden;
   8.193 +#else
   8.194 +     extern double __ieee754_scalb(double, double) attribute_hidden;
   8.195 +#endif
   8.196 +
   8.197 +/* fdlibm kernel function */
   8.198 +#ifndef _IEEE_LIBM
   8.199 +     extern double __kernel_standard(double, double, int) attribute_hidden;
   8.200 +#endif
   8.201 +     extern double __kernel_sin(double, double, int) attribute_hidden;
   8.202 +     extern double __kernel_cos(double, double) attribute_hidden;
   8.203 +     extern double __kernel_tan(double, double, int) attribute_hidden;
   8.204 +     extern int __kernel_rem_pio2(double *, double *, int, int, int,
   8.205 +                                  const int *) attribute_hidden;
   8.206 +
   8.207 +#endif /* _MATH_PRIVATE_H_ */
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/src/libm/s_copysign.c	Mon Sep 15 06:33:23 2008 +0000
     9.3 @@ -0,0 +1,42 @@
     9.4 +/* @(#)s_copysign.c 5.1 93/09/24 */
     9.5 +/*
     9.6 + * ====================================================
     9.7 + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     9.8 + *
     9.9 + * Developed at SunPro, a Sun Microsystems, Inc. business.
    9.10 + * Permission to use, copy, modify, and distribute this
    9.11 + * software is freely granted, provided that this notice
    9.12 + * is preserved.
    9.13 + * ====================================================
    9.14 + */
    9.15 +
    9.16 +#if defined(LIBM_SCCS) && !defined(lint)
    9.17 +static char rcsid[] =
    9.18 +    "$NetBSD: s_copysign.c,v 1.8 1995/05/10 20:46:57 jtc Exp $";
    9.19 +#endif
    9.20 +
    9.21 +/*
    9.22 + * copysign(double x, double y)
    9.23 + * copysign(x,y) returns a value with the magnitude of x and
    9.24 + * with the sign bit of y.
    9.25 + */
    9.26 +
    9.27 +#include "math.h"
    9.28 +#include "math_private.h"
    9.29 +
    9.30 +libm_hidden_proto(copysign)
    9.31 +#ifdef __STDC__
    9.32 +     double copysign(double x, double y)
    9.33 +#else
    9.34 +     double copysign(x, y)
    9.35 +     double x, y;
    9.36 +#endif
    9.37 +{
    9.38 +    u_int32_t hx, hy;
    9.39 +    GET_HIGH_WORD(hx, x);
    9.40 +    GET_HIGH_WORD(hy, y);
    9.41 +    SET_HIGH_WORD(x, (hx & 0x7fffffff) | (hy & 0x80000000));
    9.42 +    return x;
    9.43 +}
    9.44 +
    9.45 +libm_hidden_def(copysign)
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/src/libm/s_cos.c	Mon Sep 15 06:33:23 2008 +0000
    10.3 @@ -0,0 +1,90 @@
    10.4 +/* @(#)s_cos.c 5.1 93/09/24 */
    10.5 +/*
    10.6 + * ====================================================
    10.7 + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    10.8 + *
    10.9 + * Developed at SunPro, a Sun Microsystems, Inc. business.
   10.10 + * Permission to use, copy, modify, and distribute this
   10.11 + * software is freely granted, provided that this notice
   10.12 + * is preserved.
   10.13 + * ====================================================
   10.14 + */
   10.15 +
   10.16 +#if defined(LIBM_SCCS) && !defined(lint)
   10.17 +static char rcsid[] = "$NetBSD: s_cos.c,v 1.7 1995/05/10 20:47:02 jtc Exp $";
   10.18 +#endif
   10.19 +
   10.20 +/* cos(x)
   10.21 + * Return cosine function of x.
   10.22 + *
   10.23 + * kernel function:
   10.24 + *	__kernel_sin		... sine function on [-pi/4,pi/4]
   10.25 + *	__kernel_cos		... cosine function on [-pi/4,pi/4]
   10.26 + *	__ieee754_rem_pio2	... argument reduction routine
   10.27 + *
   10.28 + * Method.
   10.29 + *      Let S,C and T denote the sin, cos and tan respectively on
   10.30 + *	[-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   10.31 + *	in [-pi/4 , +pi/4], and let n = k mod 4.
   10.32 + *	We have
   10.33 + *
   10.34 + *          n        sin(x)      cos(x)        tan(x)
   10.35 + *     ----------------------------------------------------------
   10.36 + *	    0	       S	   C		 T
   10.37 + *	    1	       C	  -S		-1/T
   10.38 + *	    2	      -S	  -C		 T
   10.39 + *	    3	      -C	   S		-1/T
   10.40 + *     ----------------------------------------------------------
   10.41 + *
   10.42 + * Special cases:
   10.43 + *      Let trig be any of sin, cos, or tan.
   10.44 + *      trig(+-INF)  is NaN, with signals;
   10.45 + *      trig(NaN)    is that NaN;
   10.46 + *
   10.47 + * Accuracy:
   10.48 + *	TRIG(x) returns trig(x) nearly rounded
   10.49 + */
   10.50 +
   10.51 +#include "math.h"
   10.52 +#include "math_private.h"
   10.53 +
   10.54 +libm_hidden_proto(cos)
   10.55 +#ifdef __STDC__
   10.56 +     double cos(double x)
   10.57 +#else
   10.58 +     double cos(x)
   10.59 +     double x;
   10.60 +#endif
   10.61 +{
   10.62 +    double y[2], z = 0.0;
   10.63 +    int32_t n, ix;
   10.64 +
   10.65 +    /* High word of x. */
   10.66 +    GET_HIGH_WORD(ix, x);
   10.67 +
   10.68 +    /* |x| ~< pi/4 */
   10.69 +    ix &= 0x7fffffff;
   10.70 +    if (ix <= 0x3fe921fb)
   10.71 +        return __kernel_cos(x, z);
   10.72 +
   10.73 +    /* cos(Inf or NaN) is NaN */
   10.74 +    else if (ix >= 0x7ff00000)
   10.75 +        return x - x;
   10.76 +
   10.77 +    /* argument reduction needed */
   10.78 +    else {
   10.79 +        n = __ieee754_rem_pio2(x, y);
   10.80 +        switch (n & 3) {
   10.81 +        case 0:
   10.82 +            return __kernel_cos(y[0], y[1]);
   10.83 +        case 1:
   10.84 +            return -__kernel_sin(y[0], y[1], 1);
   10.85 +        case 2:
   10.86 +            return -__kernel_cos(y[0], y[1]);
   10.87 +        default:
   10.88 +            return __kernel_sin(y[0], y[1], 1);
   10.89 +        }
   10.90 +    }
   10.91 +}
   10.92 +
   10.93 +libm_hidden_def(cos)
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/src/libm/s_fabs.c	Mon Sep 15 06:33:23 2008 +0000
    11.3 @@ -0,0 +1,38 @@
    11.4 +/* @(#)s_fabs.c 5.1 93/09/24 */
    11.5 +/*
    11.6 + * ====================================================
    11.7 + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    11.8 + *
    11.9 + * Developed at SunPro, a Sun Microsystems, Inc. business.
   11.10 + * Permission to use, copy, modify, and distribute this
   11.11 + * software is freely granted, provided that this notice
   11.12 + * is preserved.
   11.13 + * ====================================================
   11.14 + */
   11.15 +
   11.16 +#if defined(LIBM_SCCS) && !defined(lint)
   11.17 +static char rcsid[] = "$NetBSD: s_fabs.c,v 1.7 1995/05/10 20:47:13 jtc Exp $";
   11.18 +#endif
   11.19 +
   11.20 +/*
   11.21 + * fabs(x) returns the absolute value of x.
   11.22 + */
   11.23 +
   11.24 +#include "math.h"
   11.25 +#include "math_private.h"
   11.26 +
   11.27 +libm_hidden_proto(fabs)
   11.28 +#ifdef __STDC__
   11.29 +     double fabs(double x)
   11.30 +#else
   11.31 +     double fabs(x)
   11.32 +     double x;
   11.33 +#endif
   11.34 +{
   11.35 +    u_int32_t high;
   11.36 +    GET_HIGH_WORD(high, x);
   11.37 +    SET_HIGH_WORD(x, high & 0x7fffffff);
   11.38 +    return x;
   11.39 +}
   11.40 +
   11.41 +libm_hidden_def(fabs)
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/src/libm/s_scalbn.c	Mon Sep 15 06:33:23 2008 +0000
    12.3 @@ -0,0 +1,79 @@
    12.4 +/* @(#)s_scalbn.c 5.1 93/09/24 */
    12.5 +/*
    12.6 + * ====================================================
    12.7 + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    12.8 + *
    12.9 + * Developed at SunPro, a Sun Microsystems, Inc. business.
   12.10 + * Permission to use, copy, modify, and distribute this
   12.11 + * software is freely granted, provided that this notice
   12.12 + * is preserved.
   12.13 + * ====================================================
   12.14 + */
   12.15 +
   12.16 +#if defined(LIBM_SCCS) && !defined(lint)
   12.17 +static char rcsid[] =
   12.18 +    "$NetBSD: s_scalbn.c,v 1.8 1995/05/10 20:48:08 jtc Exp $";
   12.19 +#endif
   12.20 +
   12.21 +/*
   12.22 + * scalbn (double x, int n)
   12.23 + * scalbn(x,n) returns x* 2**n  computed by  exponent
   12.24 + * manipulation rather than by actually performing an
   12.25 + * exponentiation or a multiplication.
   12.26 + */
   12.27 +
   12.28 +#include "math.h"
   12.29 +#include "math_private.h"
   12.30 +
   12.31 +libm_hidden_proto(copysign)
   12.32 +#ifdef __STDC__
   12.33 +     static const double
   12.34 +#else
   12.35 +     static double
   12.36 +#endif
   12.37 +       two54 = 1.80143985094819840000e+16,      /* 0x43500000, 0x00000000 */
   12.38 +         twom54 = 5.55111512312578270212e-17,   /* 0x3C900000, 0x00000000 */
   12.39 +         huge = 1.0e+300, tiny = 1.0e-300;
   12.40 +
   12.41 +libm_hidden_proto(scalbn)
   12.42 +#ifdef __STDC__
   12.43 +     double scalbn(double x, int n)
   12.44 +#else
   12.45 +     double scalbn(x, n)
   12.46 +     double x;
   12.47 +     int n;
   12.48 +#endif
   12.49 +{
   12.50 +    int32_t k, hx, lx;
   12.51 +    EXTRACT_WORDS(hx, lx, x);
   12.52 +    k = (hx & 0x7ff00000) >> 20;        /* extract exponent */
   12.53 +    if (k == 0) {               /* 0 or subnormal x */
   12.54 +        if ((lx | (hx & 0x7fffffff)) == 0)
   12.55 +            return x;           /* +-0 */
   12.56 +        x *= two54;
   12.57 +        GET_HIGH_WORD(hx, x);
   12.58 +        k = ((hx & 0x7ff00000) >> 20) - 54;
   12.59 +        if (n < -50000)
   12.60 +            return tiny * x;    /*underflow */
   12.61 +    }
   12.62 +    if (k == 0x7ff)
   12.63 +        return x + x;           /* NaN or Inf */
   12.64 +    k = k + n;
   12.65 +    if (k > 0x7fe)
   12.66 +        return huge * copysign(huge, x);        /* overflow  */
   12.67 +    if (k > 0) {                /* normal result */
   12.68 +        SET_HIGH_WORD(x, (hx & 0x800fffff) | (k << 20));
   12.69 +        return x;
   12.70 +    }
   12.71 +    if (k <= -54) {
   12.72 +        if (n > 50000)          /* in case integer overflow in n+k */
   12.73 +            return huge * copysign(huge, x);    /*overflow */
   12.74 +        else
   12.75 +            return tiny * copysign(tiny, x);    /*underflow */
   12.76 +    }
   12.77 +    k += 54;                    /* subnormal result */
   12.78 +    SET_HIGH_WORD(x, (hx & 0x800fffff) | (k << 20));
   12.79 +    return x * twom54;
   12.80 +}
   12.81 +
   12.82 +libm_hidden_def(scalbn)
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/src/libm/s_sin.c	Mon Sep 15 06:33:23 2008 +0000
    13.3 @@ -0,0 +1,90 @@
    13.4 +/* @(#)s_sin.c 5.1 93/09/24 */
    13.5 +/*
    13.6 + * ====================================================
    13.7 + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    13.8 + *
    13.9 + * Developed at SunPro, a Sun Microsystems, Inc. business.
   13.10 + * Permission to use, copy, modify, and distribute this
   13.11 + * software is freely granted, provided that this notice
   13.12 + * is preserved.
   13.13 + * ====================================================
   13.14 + */
   13.15 +
   13.16 +#if defined(LIBM_SCCS) && !defined(lint)
   13.17 +static char rcsid[] = "$NetBSD: s_sin.c,v 1.7 1995/05/10 20:48:15 jtc Exp $";
   13.18 +#endif
   13.19 +
   13.20 +/* sin(x)
   13.21 + * Return sine function of x.
   13.22 + *
   13.23 + * kernel function:
   13.24 + *	__kernel_sin		... sine function on [-pi/4,pi/4]
   13.25 + *	__kernel_cos		... cose function on [-pi/4,pi/4]
   13.26 + *	__ieee754_rem_pio2	... argument reduction routine
   13.27 + *
   13.28 + * Method.
   13.29 + *      Let S,C and T denote the sin, cos and tan respectively on
   13.30 + *	[-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   13.31 + *	in [-pi/4 , +pi/4], and let n = k mod 4.
   13.32 + *	We have
   13.33 + *
   13.34 + *          n        sin(x)      cos(x)        tan(x)
   13.35 + *     ----------------------------------------------------------
   13.36 + *	    0	       S	   C		 T
   13.37 + *	    1	       C	  -S		-1/T
   13.38 + *	    2	      -S	  -C		 T
   13.39 + *	    3	      -C	   S		-1/T
   13.40 + *     ----------------------------------------------------------
   13.41 + *
   13.42 + * Special cases:
   13.43 + *      Let trig be any of sin, cos, or tan.
   13.44 + *      trig(+-INF)  is NaN, with signals;
   13.45 + *      trig(NaN)    is that NaN;
   13.46 + *
   13.47 + * Accuracy:
   13.48 + *	TRIG(x) returns trig(x) nearly rounded
   13.49 + */
   13.50 +
   13.51 +#include "math.h"
   13.52 +#include "math_private.h"
   13.53 +
   13.54 +libm_hidden_proto(sin)
   13.55 +#ifdef __STDC__
   13.56 +     double sin(double x)
   13.57 +#else
   13.58 +     double sin(x)
   13.59 +     double x;
   13.60 +#endif
   13.61 +{
   13.62 +    double y[2], z = 0.0;
   13.63 +    int32_t n, ix;
   13.64 +
   13.65 +    /* High word of x. */
   13.66 +    GET_HIGH_WORD(ix, x);
   13.67 +
   13.68 +    /* |x| ~< pi/4 */
   13.69 +    ix &= 0x7fffffff;
   13.70 +    if (ix <= 0x3fe921fb)
   13.71 +        return __kernel_sin(x, z, 0);
   13.72 +
   13.73 +    /* sin(Inf or NaN) is NaN */
   13.74 +    else if (ix >= 0x7ff00000)
   13.75 +        return x - x;
   13.76 +
   13.77 +    /* argument reduction needed */
   13.78 +    else {
   13.79 +        n = __ieee754_rem_pio2(x, y);
   13.80 +        switch (n & 3) {
   13.81 +        case 0:
   13.82 +            return __kernel_sin(y[0], y[1], 1);
   13.83 +        case 1:
   13.84 +            return __kernel_cos(y[0], y[1]);
   13.85 +        case 2:
   13.86 +            return -__kernel_sin(y[0], y[1], 1);
   13.87 +        default:
   13.88 +            return -__kernel_cos(y[0], y[1]);
   13.89 +        }
   13.90 +    }
   13.91 +}
   13.92 +
   13.93 +libm_hidden_def(sin)
    14.1 --- a/src/video/SDL_gamma.c	Mon Sep 15 05:14:11 2008 +0000
    14.2 +++ b/src/video/SDL_gamma.c	Mon Sep 15 06:33:23 2008 +0000
    14.3 @@ -23,17 +23,7 @@
    14.4  
    14.5  /* Gamma correction support */
    14.6  
    14.7 -#ifdef HAVE_MATH_H
    14.8 -#include <math.h>               /* Used for calculating gamma ramps */
    14.9 -#else
   14.10 -/* Math routines from uClibc: http://www.uclibc.org */
   14.11 -#include "math_private.h"
   14.12 -#include "e_sqrt.h"
   14.13 -#include "e_pow.h"
   14.14 -#include "e_log.h"
   14.15 -#define pow(x, y)	__ieee754_pow(x, y)
   14.16 -#define log(x)		__ieee754_log(x)
   14.17 -#endif
   14.18 +#include "../libm/math.h"
   14.19  
   14.20  #include "SDL_sysvideo.h"
   14.21  
    15.1 --- a/src/video/e_log.h	Mon Sep 15 05:14:11 2008 +0000
    15.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.3 @@ -1,161 +0,0 @@
    15.4 -/* @(#)e_log.c 5.1 93/09/24 */
    15.5 -/*
    15.6 - * ====================================================
    15.7 - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    15.8 - *
    15.9 - * Developed at SunPro, a Sun Microsystems, Inc. business.
   15.10 - * Permission to use, copy, modify, and distribute this
   15.11 - * software is freely granted, provided that this notice
   15.12 - * is preserved.
   15.13 - * ====================================================
   15.14 - */
   15.15 -
   15.16 -#if defined(LIBM_SCCS) && !defined(lint)
   15.17 -static char rcsid[] = "$NetBSD: e_log.c,v 1.8 1995/05/10 20:45:49 jtc Exp $";
   15.18 -#endif
   15.19 -
   15.20 -/* __ieee754_log(x)
   15.21 - * Return the logrithm of x
   15.22 - *
   15.23 - * Method :
   15.24 - *   1. Argument Reduction: find k and f such that
   15.25 - *			x = 2^k * (1+f),
   15.26 - *	   where  sqrt(2)/2 < 1+f < sqrt(2) .
   15.27 - *
   15.28 - *   2. Approximation of log(1+f).
   15.29 - *	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
   15.30 - *		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
   15.31 - *	     	 = 2s + s*R
   15.32 - *      We use a special Reme algorithm on [0,0.1716] to generate
   15.33 - * 	a polynomial of degree 14 to approximate R The maximum error
   15.34 - *	of this polynomial approximation is bounded by 2**-58.45. In
   15.35 - *	other words,
   15.36 - *		        2      4      6      8      10      12      14
   15.37 - *	    R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
   15.38 - *  	(the values of Lg1 to Lg7 are listed in the program)
   15.39 - *	and
   15.40 - *	    |      2          14          |     -58.45
   15.41 - *	    | Lg1*s +...+Lg7*s    -  R(z) | <= 2
   15.42 - *	    |                             |
   15.43 - *	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
   15.44 - *	In order to guarantee error in log below 1ulp, we compute log
   15.45 - *	by
   15.46 - *		log(1+f) = f - s*(f - R)	(if f is not too large)
   15.47 - *		log(1+f) = f - (hfsq - s*(hfsq+R)).	(better accuracy)
   15.48 - *
   15.49 - *	3. Finally,  log(x) = k*ln2 + log(1+f).
   15.50 - *			    = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
   15.51 - *	   Here ln2 is split into two floating point number:
   15.52 - *			ln2_hi + ln2_lo,
   15.53 - *	   where n*ln2_hi is always exact for |n| < 2000.
   15.54 - *
   15.55 - * Special cases:
   15.56 - *	log(x) is NaN with signal if x < 0 (including -INF) ;
   15.57 - *	log(+INF) is +INF; log(0) is -INF with signal;
   15.58 - *	log(NaN) is that NaN with no signal.
   15.59 - *
   15.60 - * Accuracy:
   15.61 - *	according to an error analysis, the error is always less than
   15.62 - *	1 ulp (unit in the last place).
   15.63 - *
   15.64 - * Constants:
   15.65 - * The hexadecimal values are the intended ones for the following
   15.66 - * constants. The decimal values may be used, provided that the
   15.67 - * compiler will convert from decimal to binary accurately enough
   15.68 - * to produce the hexadecimal values shown.
   15.69 - */
   15.70 -
   15.71 -/*#include "math.h"*/
   15.72 -#include "math_private.h"
   15.73 -
   15.74 -#ifdef __STDC__
   15.75 -static const double
   15.76 -#else
   15.77 -static double
   15.78 -#endif
   15.79 -  ln2_hi = 6.93147180369123816490e-01,  /* 3fe62e42 fee00000 */
   15.80 -    ln2_lo = 1.90821492927058770002e-10,        /* 3dea39ef 35793c76 */
   15.81 -    Lg1 = 6.666666666666735130e-01,     /* 3FE55555 55555593 */
   15.82 -    Lg2 = 3.999999999940941908e-01,     /* 3FD99999 9997FA04 */
   15.83 -    Lg3 = 2.857142874366239149e-01,     /* 3FD24924 94229359 */
   15.84 -    Lg4 = 2.222219843214978396e-01,     /* 3FCC71C5 1D8E78AF */
   15.85 -    Lg5 = 1.818357216161805012e-01,     /* 3FC74664 96CB03DE */
   15.86 -    Lg6 = 1.531383769920937332e-01,     /* 3FC39A09 D078C69F */
   15.87 -    Lg7 = 1.479819860511658591e-01;     /* 3FC2F112 DF3E5244 */
   15.88 -
   15.89 -#ifdef __STDC__
   15.90 -double
   15.91 -__ieee754_log(double x)
   15.92 -#else
   15.93 -double
   15.94 -__ieee754_log(x)
   15.95 -     double x;
   15.96 -#endif
   15.97 -{
   15.98 -    double hfsq, f, s, z, R, w, t1, t2, dk;
   15.99 -    int32_t k, hx, i, j;
  15.100 -    u_int32_t lx;
  15.101 -
  15.102 -    EXTRACT_WORDS(hx, lx, x);
  15.103 -
  15.104 -    k = 0;
  15.105 -    if (hx < 0x00100000) {      /* x < 2**-1022  */
  15.106 -        if (((hx & 0x7fffffff) | lx) == 0)
  15.107 -            return -two54 / zero;       /* log(+-0)=-inf */
  15.108 -        if (hx < 0)
  15.109 -            return (x - x) / zero;      /* log(-#) = NaN */
  15.110 -        k -= 54;
  15.111 -        x *= two54;             /* subnormal number, scale up x */
  15.112 -        GET_HIGH_WORD(hx, x);
  15.113 -    }
  15.114 -    if (hx >= 0x7ff00000)
  15.115 -        return x + x;
  15.116 -    k += (hx >> 20) - 1023;
  15.117 -    hx &= 0x000fffff;
  15.118 -    i = (hx + 0x95f64) & 0x100000;
  15.119 -    SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000));    /* normalize x or x/2 */
  15.120 -    k += (i >> 20);
  15.121 -    f = x - 1.0;
  15.122 -    if ((0x000fffff & (2 + hx)) < 3) {  /* |f| < 2**-20 */
  15.123 -        if (f == zero) {
  15.124 -            if (k == 0)
  15.125 -                return zero;
  15.126 -            else {
  15.127 -                dk = (double) k;
  15.128 -                return dk * ln2_hi + dk * ln2_lo;
  15.129 -            }
  15.130 -        }
  15.131 -        R = f * f * (0.5 - 0.33333333333333333 * f);
  15.132 -        if (k == 0)
  15.133 -            return f - R;
  15.134 -        else {
  15.135 -            dk = (double) k;
  15.136 -            return dk * ln2_hi - ((R - dk * ln2_lo) - f);
  15.137 -        }
  15.138 -    }
  15.139 -    s = f / (2.0 + f);
  15.140 -    dk = (double) k;
  15.141 -    z = s * s;
  15.142 -    i = hx - 0x6147a;
  15.143 -    w = z * z;
  15.144 -    j = 0x6b851 - hx;
  15.145 -    t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
  15.146 -    t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
  15.147 -    i |= j;
  15.148 -    R = t2 + t1;
  15.149 -    if (i > 0) {
  15.150 -        hfsq = 0.5 * f * f;
  15.151 -        if (k == 0)
  15.152 -            return f - (hfsq - s * (hfsq + R));
  15.153 -        else
  15.154 -            return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) -
  15.155 -                                  f);
  15.156 -    } else {
  15.157 -        if (k == 0)
  15.158 -            return f - s * (f - R);
  15.159 -        else
  15.160 -            return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f);
  15.161 -    }
  15.162 -}
  15.163 -
  15.164 -/* vi: set ts=4 sw=4 expandtab: */
    16.1 --- a/src/video/e_pow.h	Mon Sep 15 05:14:11 2008 +0000
    16.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.3 @@ -1,340 +0,0 @@
    16.4 -/* @(#)e_pow.c 5.1 93/09/24 */
    16.5 -/*
    16.6 - * ====================================================
    16.7 - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    16.8 - *
    16.9 - * Developed at SunPro, a Sun Microsystems, Inc. business.
   16.10 - * Permission to use, copy, modify, and distribute this
   16.11 - * software is freely granted, provided that this notice
   16.12 - * is preserved.
   16.13 - * ====================================================
   16.14 - */
   16.15 -
   16.16 -#if defined(LIBM_SCCS) && !defined(lint)
   16.17 -static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $";
   16.18 -#endif
   16.19 -
   16.20 -/* __ieee754_pow(x,y) return x**y
   16.21 - *
   16.22 - *		      n
   16.23 - * Method:  Let x =  2   * (1+f)
   16.24 - *	1. Compute and return log2(x) in two pieces:
   16.25 - *		log2(x) = w1 + w2,
   16.26 - *	   where w1 has 53-24 = 29 bit trailing zeros.
   16.27 - *	2. Perform y*log2(x) = n+y' by simulating muti-precision
   16.28 - *	   arithmetic, where |y'|<=0.5.
   16.29 - *	3. Return x**y = 2**n*exp(y'*log2)
   16.30 - *
   16.31 - * Special cases:
   16.32 - *	1.  (anything) ** 0  is 1
   16.33 - *	2.  (anything) ** 1  is itself
   16.34 - *	3.  (anything) ** NAN is NAN
   16.35 - *	4.  NAN ** (anything except 0) is NAN
   16.36 - *	5.  +-(|x| > 1) **  +INF is +INF
   16.37 - *	6.  +-(|x| > 1) **  -INF is +0
   16.38 - *	7.  +-(|x| < 1) **  +INF is +0
   16.39 - *	8.  +-(|x| < 1) **  -INF is +INF
   16.40 - *	9.  +-1         ** +-INF is NAN
   16.41 - *	10. +0 ** (+anything except 0, NAN)               is +0
   16.42 - *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
   16.43 - *	12. +0 ** (-anything except 0, NAN)               is +INF
   16.44 - *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
   16.45 - *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
   16.46 - *	15. +INF ** (+anything except 0,NAN) is +INF
   16.47 - *	16. +INF ** (-anything except 0,NAN) is +0
   16.48 - *	17. -INF ** (anything)  = -0 ** (-anything)
   16.49 - *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
   16.50 - *	19. (-anything except 0 and inf) ** (non-integer) is NAN
   16.51 - *
   16.52 - * Accuracy:
   16.53 - *	pow(x,y) returns x**y nearly rounded. In particular
   16.54 - *			pow(integer,integer)
   16.55 - *	always returns the correct integer provided it is
   16.56 - *	representable.
   16.57 - *
   16.58 - * Constants :
   16.59 - * The hexadecimal values are the intended ones for the following
   16.60 - * constants. The decimal values may be used, provided that the
   16.61 - * compiler will convert from decimal to binary accurately enough
   16.62 - * to produce the hexadecimal values shown.
   16.63 - */
   16.64 -
   16.65 -/*#include "math.h"*/
   16.66 -#include "math_private.h"
   16.67 -
   16.68 -#ifdef __STDC__
   16.69 -static const double
   16.70 -#else
   16.71 -static double
   16.72 -#endif
   16.73 -  bp[] = { 1.0, 1.5, }, dp_h[] = {
   16.74 -0.0, 5.84962487220764160156e-01,},      /* 0x3FE2B803, 0x40000000 */
   16.75 -
   16.76 -    dp_l[] = {
   16.77 -0.0, 1.35003920212974897128e-08,},      /* 0x3E4CFDEB, 0x43CFD006 */
   16.78 -
   16.79 -    /* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
   16.80 -    L1 = 5.99999999999994648725e-01,    /* 0x3FE33333, 0x33333303 */
   16.81 -    L2 = 4.28571428578550184252e-01,    /* 0x3FDB6DB6, 0xDB6FABFF */
   16.82 -    L3 = 3.33333329818377432918e-01,    /* 0x3FD55555, 0x518F264D */
   16.83 -    L4 = 2.72728123808534006489e-01,    /* 0x3FD17460, 0xA91D4101 */
   16.84 -    L5 = 2.30660745775561754067e-01,    /* 0x3FCD864A, 0x93C9DB65 */
   16.85 -    L6 = 2.06975017800338417784e-01,    /* 0x3FCA7E28, 0x4A454EEF */
   16.86 -    P1 = 1.66666666666666019037e-01,    /* 0x3FC55555, 0x5555553E */
   16.87 -    P2 = -2.77777777770155933842e-03,   /* 0xBF66C16C, 0x16BEBD93 */
   16.88 -    P3 = 6.61375632143793436117e-05,    /* 0x3F11566A, 0xAF25DE2C */
   16.89 -    P4 = -1.65339022054652515390e-06,   /* 0xBEBBBD41, 0xC5D26BF1 */
   16.90 -    P5 = 4.13813679705723846039e-08,    /* 0x3E663769, 0x72BEA4D0 */
   16.91 -    lg2 = 6.93147180559945286227e-01,   /* 0x3FE62E42, 0xFEFA39EF */
   16.92 -    lg2_h = 6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */
   16.93 -    lg2_l = -1.90465429995776804525e-09,        /* 0xBE205C61, 0x0CA86C39 */
   16.94 -    ovt = 8.0085662595372944372e-0017,  /* -(1024-log2(ovfl+.5ulp)) */
   16.95 -    cp = 9.61796693925975554329e-01,    /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
   16.96 -    cp_h = 9.61796700954437255859e-01,  /* 0x3FEEC709, 0xE0000000 =(float)cp */
   16.97 -    cp_l = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h */
   16.98 -    ivln2 = 1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */
   16.99 -    ivln2_h = 1.44269502162933349609e+00,       /* 0x3FF71547, 0x60000000 =24b 1/ln2 */
  16.100 -    ivln2_l = 1.92596299112661746887e-08;       /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail */
  16.101 -
  16.102 -#ifdef __STDC__
  16.103 -double
  16.104 -__ieee754_pow(double x, double y)
  16.105 -#else
  16.106 -double
  16.107 -__ieee754_pow(x, y)
  16.108 -     double x, y;
  16.109 -#endif
  16.110 -{
  16.111 -    double z, ax, z_h, z_l, p_h, p_l;
  16.112 -    double y1, t1, t2, r, s, t, u, v, w;
  16.113 -    int32_t i, j, k, yisint, n;
  16.114 -    int32_t hx, hy, ix, iy;
  16.115 -    u_int32_t lx, ly;
  16.116 -
  16.117 -    EXTRACT_WORDS(hx, lx, x);
  16.118 -    EXTRACT_WORDS(hy, ly, y);
  16.119 -    ix = hx & 0x7fffffff;
  16.120 -    iy = hy & 0x7fffffff;
  16.121 -
  16.122 -    /* y==zero: x**0 = 1 */
  16.123 -    if ((iy | ly) == 0)
  16.124 -        return one;
  16.125 -
  16.126 -    /* +-NaN return x+y */
  16.127 -    if (ix > 0x7ff00000 || ((ix == 0x7ff00000) && (lx != 0)) ||
  16.128 -        iy > 0x7ff00000 || ((iy == 0x7ff00000) && (ly != 0)))
  16.129 -        return x + y;
  16.130 -
  16.131 -    /* determine if y is an odd int when x < 0
  16.132 -     * yisint = 0       ... y is not an integer
  16.133 -     * yisint = 1       ... y is an odd int
  16.134 -     * yisint = 2       ... y is an even int
  16.135 -     */
  16.136 -    yisint = 0;
  16.137 -    if (hx < 0) {
  16.138 -        if (iy >= 0x43400000)
  16.139 -            yisint = 2;         /* even integer y */
  16.140 -        else if (iy >= 0x3ff00000) {
  16.141 -            k = (iy >> 20) - 0x3ff;     /* exponent */
  16.142 -            if (k > 20) {
  16.143 -                j = ly >> (52 - k);
  16.144 -                if ((u_int32_t) (j << (52 - k)) == ly)
  16.145 -                    yisint = 2 - (j & 1);
  16.146 -            } else if (ly == 0) {
  16.147 -                j = iy >> (20 - k);
  16.148 -                if ((j << (20 - k)) == iy)
  16.149 -                    yisint = 2 - (j & 1);
  16.150 -            }
  16.151 -        }
  16.152 -    }
  16.153 -
  16.154 -    /* special value of y */
  16.155 -    if (ly == 0) {
  16.156 -        if (iy == 0x7ff00000) { /* y is +-inf */
  16.157 -            if (((ix - 0x3ff00000) | lx) == 0)
  16.158 -                return y - y;   /* inf**+-1 is NaN */
  16.159 -            else if (ix >= 0x3ff00000)  /* (|x|>1)**+-inf = inf,0 */
  16.160 -                return (hy >= 0) ? y : zero;
  16.161 -            else                /* (|x|<1)**-,+inf = inf,0 */
  16.162 -                return (hy < 0) ? -y : zero;
  16.163 -        }
  16.164 -        if (iy == 0x3ff00000) { /* y is  +-1 */
  16.165 -            if (hy < 0)
  16.166 -                return one / x;
  16.167 -            else
  16.168 -                return x;
  16.169 -        }
  16.170 -        if (hy == 0x40000000)
  16.171 -            return x * x;       /* y is  2 */
  16.172 -        if (hy == 0x3fe00000) { /* y is  0.5 */
  16.173 -            if (hx >= 0)        /* x >= +0 */
  16.174 -                return __ieee754_sqrt(x);
  16.175 -        }
  16.176 -    }
  16.177 -
  16.178 -    ax = x < 0 ? -x : x;        /*fabs(x); */
  16.179 -    /* special value of x */
  16.180 -    if (lx == 0) {
  16.181 -        if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000) {
  16.182 -            z = ax;             /*x is +-0,+-inf,+-1 */
  16.183 -            if (hy < 0)
  16.184 -                z = one / z;    /* z = (1/|x|) */
  16.185 -            if (hx < 0) {
  16.186 -                if (((ix - 0x3ff00000) | yisint) == 0) {
  16.187 -                    z = (z - z) / (z - z);      /* (-1)**non-int is NaN */
  16.188 -                } else if (yisint == 1)
  16.189 -                    z = -z;     /* (x<0)**odd = -(|x|**odd) */
  16.190 -            }
  16.191 -            return z;
  16.192 -        }
  16.193 -    }
  16.194 -
  16.195 -    /* (x<0)**(non-int) is NaN */
  16.196 -    if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
  16.197 -        return (x - x) / (x - x);
  16.198 -
  16.199 -    /* |y| is huge */
  16.200 -    if (iy > 0x41e00000) {      /* if |y| > 2**31 */
  16.201 -        if (iy > 0x43f00000) {  /* if |y| > 2**64, must o/uflow */
  16.202 -            if (ix <= 0x3fefffff)
  16.203 -                return (hy < 0) ? huge * huge : tiny * tiny;
  16.204 -            if (ix >= 0x3ff00000)
  16.205 -                return (hy > 0) ? huge * huge : tiny * tiny;
  16.206 -        }
  16.207 -        /* over/underflow if x is not close to one */
  16.208 -        if (ix < 0x3fefffff)
  16.209 -            return (hy < 0) ? huge * huge : tiny * tiny;
  16.210 -        if (ix > 0x3ff00000)
  16.211 -            return (hy > 0) ? huge * huge : tiny * tiny;
  16.212 -        /* now |1-x| is tiny <= 2**-20, suffice to compute
  16.213 -           log(x) by x-x^2/2+x^3/3-x^4/4 */
  16.214 -        t = ax - 1;             /* t has 20 trailing zeros */
  16.215 -        w = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25));
  16.216 -        u = ivln2_h * t;        /* ivln2_h has 21 sig. bits */
  16.217 -        v = t * ivln2_l - w * ivln2;
  16.218 -        t1 = u + v;
  16.219 -        SET_LOW_WORD(t1, 0);
  16.220 -        t2 = v - (t1 - u);
  16.221 -    } else {
  16.222 -        double s2, s_h, s_l, t_h, t_l;
  16.223 -        n = 0;
  16.224 -        /* take care subnormal number */
  16.225 -        if (ix < 0x00100000) {
  16.226 -            ax *= two53;
  16.227 -            n -= 53;
  16.228 -            GET_HIGH_WORD(ix, ax);
  16.229 -        }
  16.230 -        n += ((ix) >> 20) - 0x3ff;
  16.231 -        j = ix & 0x000fffff;
  16.232 -        /* determine interval */
  16.233 -        ix = j | 0x3ff00000;    /* normalize ix */
  16.234 -        if (j <= 0x3988E)
  16.235 -            k = 0;              /* |x|<sqrt(3/2) */
  16.236 -        else if (j < 0xBB67A)
  16.237 -            k = 1;              /* |x|<sqrt(3)   */
  16.238 -        else {
  16.239 -            k = 0;
  16.240 -            n += 1;
  16.241 -            ix -= 0x00100000;
  16.242 -        }
  16.243 -        SET_HIGH_WORD(ax, ix);
  16.244 -
  16.245 -        /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
  16.246 -        u = ax - bp[k];         /* bp[0]=1.0, bp[1]=1.5 */
  16.247 -        v = one / (ax + bp[k]);
  16.248 -        s = u * v;
  16.249 -        s_h = s;
  16.250 -        SET_LOW_WORD(s_h, 0);
  16.251 -        /* t_h=ax+bp[k] High */
  16.252 -        t_h = zero;
  16.253 -        SET_HIGH_WORD(t_h, ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18));
  16.254 -        t_l = ax - (t_h - bp[k]);
  16.255 -        s_l = v * ((u - s_h * t_h) - s_h * t_l);
  16.256 -        /* compute log(ax) */
  16.257 -        s2 = s * s;
  16.258 -        r = s2 * s2 * (L1 +
  16.259 -                       s2 * (L2 +
  16.260 -                             s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6)))));
  16.261 -        r += s_l * (s_h + s);
  16.262 -        s2 = s_h * s_h;
  16.263 -        t_h = 3.0 + s2 + r;
  16.264 -        SET_LOW_WORD(t_h, 0);
  16.265 -        t_l = r - ((t_h - 3.0) - s2);
  16.266 -        /* u+v = s*(1+...) */
  16.267 -        u = s_h * t_h;
  16.268 -        v = s_l * t_h + t_l * s;
  16.269 -        /* 2/(3log2)*(s+...) */
  16.270 -        p_h = u + v;
  16.271 -        SET_LOW_WORD(p_h, 0);
  16.272 -        p_l = v - (p_h - u);
  16.273 -        z_h = cp_h * p_h;       /* cp_h+cp_l = 2/(3*log2) */
  16.274 -        z_l = cp_l * p_h + p_l * cp + dp_l[k];
  16.275 -        /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
  16.276 -        t = (double) n;
  16.277 -        t1 = (((z_h + z_l) + dp_h[k]) + t);
  16.278 -        SET_LOW_WORD(t1, 0);
  16.279 -        t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
  16.280 -    }
  16.281 -
  16.282 -    s = one;                    /* s (sign of result -ve**odd) = -1 else = 1 */
  16.283 -    if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
  16.284 -        s = -one;               /* (-ve)**(odd int) */
  16.285 -
  16.286 -    /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
  16.287 -    y1 = y;
  16.288 -    SET_LOW_WORD(y1, 0);
  16.289 -    p_l = (y - y1) * t1 + y * t2;
  16.290 -    p_h = y1 * t1;
  16.291 -    z = p_l + p_h;
  16.292 -    EXTRACT_WORDS(j, i, z);
  16.293 -    if (j >= 0x40900000) {      /* z >= 1024 */
  16.294 -        if (((j - 0x40900000) | i) != 0)        /* if z > 1024 */
  16.295 -            return s * huge * huge;     /* overflow */
  16.296 -        else {
  16.297 -            if (p_l + ovt > z - p_h)
  16.298 -                return s * huge * huge; /* overflow */
  16.299 -        }
  16.300 -    } else if ((j & 0x7fffffff) >= 0x4090cc00) {        /* z <= -1075 */
  16.301 -        if (((j - 0xc090cc00) | i) != 0)        /* z < -1075 */
  16.302 -            return s * tiny * tiny;     /* underflow */
  16.303 -        else {
  16.304 -            if (p_l <= z - p_h)
  16.305 -                return s * tiny * tiny; /* underflow */
  16.306 -        }
  16.307 -    }
  16.308 -    /*
  16.309 -     * compute 2**(p_h+p_l)
  16.310 -     */
  16.311 -    i = j & 0x7fffffff;
  16.312 -    k = (i >> 20) - 0x3ff;
  16.313 -    n = 0;
  16.314 -    if (i > 0x3fe00000) {       /* if |z| > 0.5, set n = [z+0.5] */
  16.315 -        n = j + (0x00100000 >> (k + 1));
  16.316 -        k = ((n & 0x7fffffff) >> 20) - 0x3ff;   /* new k for n */
  16.317 -        t = zero;
  16.318 -        SET_HIGH_WORD(t, n & ~(0x000fffff >> k));
  16.319 -        n = ((n & 0x000fffff) | 0x00100000) >> (20 - k);
  16.320 -        if (j < 0)
  16.321 -            n = -n;
  16.322 -        p_h -= t;
  16.323 -    }
  16.324 -    t = p_l + p_h;
  16.325 -    SET_LOW_WORD(t, 0);
  16.326 -    u = t * lg2_h;
  16.327 -    v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
  16.328 -    z = u + v;
  16.329 -    w = v - (z - u);
  16.330 -    t = z * z;
  16.331 -    t1 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
  16.332 -    r = (z * t1) / (t1 - two) - (w + z * w);
  16.333 -    z = one - (r - z);
  16.334 -    GET_HIGH_WORD(j, z);
  16.335 -    j += (n << 20);
  16.336 -    if ((j >> 20) <= 0)
  16.337 -        z = SDL_NAME(scalbn) (z, n);    /* subnormal output */
  16.338 -    else
  16.339 -        SET_HIGH_WORD(z, j);
  16.340 -    return s * z;
  16.341 -}
  16.342 -
  16.343 -/* vi: set ts=4 sw=4 expandtab: */
    17.1 --- a/src/video/e_sqrt.h	Mon Sep 15 05:14:11 2008 +0000
    17.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.3 @@ -1,516 +0,0 @@
    17.4 -/* @(#)e_sqrt.c 5.1 93/09/24 */
    17.5 -/*
    17.6 - * ====================================================
    17.7 - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    17.8 - *
    17.9 - * Developed at SunPro, a Sun Microsystems, Inc. business.
   17.10 - * Permission to use, copy, modify, and distribute this
   17.11 - * software is freely granted, provided that this notice
   17.12 - * is preserved.
   17.13 - * ====================================================
   17.14 - */
   17.15 -
   17.16 -#if defined(LIBM_SCCS) && !defined(lint)
   17.17 -static char rcsid[] = "$NetBSD: e_sqrt.c,v 1.8 1995/05/10 20:46:17 jtc Exp $";
   17.18 -#endif
   17.19 -
   17.20 -/* __ieee754_sqrt(x)
   17.21 - * Return correctly rounded sqrt.
   17.22 - *           ------------------------------------------
   17.23 - *	     |  Use the hardware sqrt if you have one |
   17.24 - *           ------------------------------------------
   17.25 - * Method:
   17.26 - *   Bit by bit method using integer arithmetic. (Slow, but portable)
   17.27 - *   1. Normalization
   17.28 - *	Scale x to y in [1,4) with even powers of 2:
   17.29 - *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
   17.30 - *		sqrt(x) = 2^k * sqrt(y)
   17.31 - *   2. Bit by bit computation
   17.32 - *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
   17.33 - *	     i							 0
   17.34 - *                                     i+1         2
   17.35 - *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
   17.36 - *	     i      i            i                 i
   17.37 - *
   17.38 - *	To compute q    from q , one checks whether
   17.39 - *		    i+1       i
   17.40 - *
   17.41 - *			      -(i+1) 2
   17.42 - *			(q + 2      ) <= y.			(2)
   17.43 - *     			  i
   17.44 - *							      -(i+1)
   17.45 - *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
   17.46 - *		 	       i+1   i             i+1   i
   17.47 - *
   17.48 - *	With some algebric manipulation, it is not difficult to see
   17.49 - *	that (2) is equivalent to
   17.50 - *                             -(i+1)
   17.51 - *			s  +  2       <= y			(3)
   17.52 - *			 i                i
   17.53 - *
   17.54 - *	The advantage of (3) is that s  and y  can be computed by
   17.55 - *				      i      i
   17.56 - *	the following recurrence formula:
   17.57 - *	    if (3) is false
   17.58 - *
   17.59 - *	    s     =  s  ,	y    = y   ;			(4)
   17.60 - *	     i+1      i		 i+1    i
   17.61 - *
   17.62 - *	    otherwise,
   17.63 - *                         -i                     -(i+1)
   17.64 - *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
   17.65 - *           i+1      i          i+1    i     i
   17.66 - *
   17.67 - *	One may easily use induction to prove (4) and (5).
   17.68 - *	Note. Since the left hand side of (3) contain only i+2 bits,
   17.69 - *	      it does not necessary to do a full (53-bit) comparison
   17.70 - *	      in (3).
   17.71 - *   3. Final rounding
   17.72 - *	After generating the 53 bits result, we compute one more bit.
   17.73 - *	Together with the remainder, we can decide whether the
   17.74 - *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
   17.75 - *	(it will never equal to 1/2ulp).
   17.76 - *	The rounding mode can be detected by checking whether
   17.77 - *	huge + tiny is equal to huge, and whether huge - tiny is
   17.78 - *	equal to huge for some floating point number "huge" and "tiny".
   17.79 - *
   17.80 - * Special cases:
   17.81 - *	sqrt(+-0) = +-0 	... exact
   17.82 - *	sqrt(inf) = inf
   17.83 - *	sqrt(-ve) = NaN		... with invalid signal
   17.84 - *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
   17.85 - *
   17.86 - * Other methods : see the appended file at the end of the program below.
   17.87 - *---------------
   17.88 - */
   17.89 -
   17.90 -/*#include "math.h"*/
   17.91 -#include "math_private.h"
   17.92 -
   17.93 -#ifdef __STDC__
   17.94 -double SDL_NAME(copysign) (double x, double y)
   17.95 -#else
   17.96 -double SDL_NAME(copysign) (x, y)
   17.97 -     double
   17.98 -         x,
   17.99 -         y;
  17.100 -#endif
  17.101 -{
  17.102 -    u_int32_t hx, hy;
  17.103 -    GET_HIGH_WORD(hx, x);
  17.104 -    GET_HIGH_WORD(hy, y);
  17.105 -    SET_HIGH_WORD(x, (hx & 0x7fffffff) | (hy & 0x80000000));
  17.106 -    return x;
  17.107 -}
  17.108 -
  17.109 -#ifdef __STDC__
  17.110 -double SDL_NAME(scalbn) (double x, int n)
  17.111 -#else
  17.112 -double SDL_NAME(scalbn) (x, n)
  17.113 -     double
  17.114 -         x;
  17.115 -     int
  17.116 -         n;
  17.117 -#endif
  17.118 -{
  17.119 -    int32_t k, hx, lx;
  17.120 -    EXTRACT_WORDS(hx, lx, x);
  17.121 -    k = (hx & 0x7ff00000) >> 20;        /* extract exponent */
  17.122 -    if (k == 0) {               /* 0 or subnormal x */
  17.123 -        if ((lx | (hx & 0x7fffffff)) == 0)
  17.124 -            return x;           /* +-0 */
  17.125 -        x *= two54;
  17.126 -        GET_HIGH_WORD(hx, x);
  17.127 -        k = ((hx & 0x7ff00000) >> 20) - 54;
  17.128 -        if (n < -50000)
  17.129 -            return tiny * x;    /*underflow */
  17.130 -    }
  17.131 -    if (k == 0x7ff)
  17.132 -        return x + x;           /* NaN or Inf */
  17.133 -    k = k + n;
  17.134 -    if (k > 0x7fe)
  17.135 -        return huge * SDL_NAME(copysign) (huge, x);     /* overflow  */
  17.136 -    if (k > 0) {                /* normal result */
  17.137 -        SET_HIGH_WORD(x, (hx & 0x800fffff) | (k << 20));
  17.138 -        return x;
  17.139 -    }
  17.140 -    if (k <= -54) {
  17.141 -        if (n > 50000)          /* in case integer overflow in n+k */
  17.142 -            return huge * SDL_NAME(copysign) (huge, x); /*overflow */
  17.143 -        else
  17.144 -            return tiny * SDL_NAME(copysign) (tiny, x); /*underflow */
  17.145 -    }
  17.146 -    k += 54;                    /* subnormal result */
  17.147 -    SET_HIGH_WORD(x, (hx & 0x800fffff) | (k << 20));
  17.148 -    return x * twom54;
  17.149 -}
  17.150 -
  17.151 -#ifdef __STDC__
  17.152 -double
  17.153 -__ieee754_sqrt(double x)
  17.154 -#else
  17.155 -double
  17.156 -__ieee754_sqrt(x)
  17.157 -     double x;
  17.158 -#endif
  17.159 -{
  17.160 -    double z;
  17.161 -    int32_t sign = (int) 0x80000000;
  17.162 -    int32_t ix0, s0, q, m, t, i;
  17.163 -    u_int32_t r, t1, s1, ix1, q1;
  17.164 -
  17.165 -    EXTRACT_WORDS(ix0, ix1, x);
  17.166 -
  17.167 -    /* take care of Inf and NaN */
  17.168 -    if ((ix0 & 0x7ff00000) == 0x7ff00000) {
  17.169 -        return x * x + x;       /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
  17.170 -                                   sqrt(-inf)=sNaN */
  17.171 -    }
  17.172 -    /* take care of zero */
  17.173 -    if (ix0 <= 0) {
  17.174 -        if (((ix0 & (~sign)) | ix1) == 0)
  17.175 -            return x;           /* sqrt(+-0) = +-0 */
  17.176 -        else if (ix0 < 0)
  17.177 -            return (x - x) / (x - x);   /* sqrt(-ve) = sNaN */
  17.178 -    }
  17.179 -    /* normalize x */
  17.180 -    m = (ix0 >> 20);
  17.181 -    if (m == 0) {               /* subnormal x */
  17.182 -        while (ix0 == 0) {
  17.183 -            m -= 21;
  17.184 -            ix0 |= (ix1 >> 11);
  17.185 -            ix1 <<= 21;
  17.186 -        }
  17.187 -        for (i = 0; (ix0 & 0x00100000) == 0; i++)
  17.188 -            ix0 <<= 1;
  17.189 -        m -= i - 1;
  17.190 -        ix0 |= (ix1 >> (32 - i));
  17.191 -        ix1 <<= i;
  17.192 -    }
  17.193 -    m -= 1023;                  /* unbias exponent */
  17.194 -    ix0 = (ix0 & 0x000fffff) | 0x00100000;
  17.195 -    if (m & 1) {                /* odd m, double x to make it even */
  17.196 -        ix0 += ix0 + ((ix1 & sign) >> 31);
  17.197 -        ix1 += ix1;
  17.198 -    }
  17.199 -    m >>= 1;                    /* m = [m/2] */
  17.200 -
  17.201 -    /* generate sqrt(x) bit by bit */
  17.202 -    ix0 += ix0 + ((ix1 & sign) >> 31);
  17.203 -    ix1 += ix1;
  17.204 -    q = q1 = s0 = s1 = 0;       /* [q,q1] = sqrt(x) */
  17.205 -    r = 0x00200000;             /* r = moving bit from right to left */
  17.206 -
  17.207 -    while (r != 0) {
  17.208 -        t = s0 + r;
  17.209 -        if (t <= ix0) {
  17.210 -            s0 = t + r;
  17.211 -            ix0 -= t;
  17.212 -            q += r;
  17.213 -        }
  17.214 -        ix0 += ix0 + ((ix1 & sign) >> 31);
  17.215 -        ix1 += ix1;
  17.216 -        r >>= 1;
  17.217 -    }
  17.218 -
  17.219 -    r = sign;
  17.220 -    while (r != 0) {
  17.221 -        t1 = s1 + r;
  17.222 -        t = s0;
  17.223 -        if ((t < ix0) || ((t == ix0) && (t1 <= ix1))) {
  17.224 -            s1 = t1 + r;
  17.225 -            if (((int32_t) (t1 & sign) == sign) && (s1 & sign) == 0)
  17.226 -                s0 += 1;
  17.227 -            ix0 -= t;
  17.228 -            if (ix1 < t1)
  17.229 -                ix0 -= 1;
  17.230 -            ix1 -= t1;
  17.231 -            q1 += r;
  17.232 -        }
  17.233 -        ix0 += ix0 + ((ix1 & sign) >> 31);
  17.234 -        ix1 += ix1;
  17.235 -        r >>= 1;
  17.236 -    }
  17.237 -
  17.238 -    /* use floating add to find out rounding direction */
  17.239 -    if ((ix0 | ix1) != 0) {
  17.240 -        z = one - tiny;         /* trigger inexact flag */
  17.241 -        if (z >= one) {
  17.242 -            z = one + tiny;
  17.243 -            if (q1 == (u_int32_t) 0xffffffff) {
  17.244 -                q1 = 0;
  17.245 -                q += 1;
  17.246 -            } else if (z > one) {
  17.247 -                if (q1 == (u_int32_t) 0xfffffffe)
  17.248 -                    q += 1;
  17.249 -                q1 += 2;
  17.250 -            } else
  17.251 -                q1 += (q1 & 1);
  17.252 -        }
  17.253 -    }
  17.254 -    ix0 = (q >> 1) + 0x3fe00000;
  17.255 -    ix1 = q1 >> 1;
  17.256 -    if ((q & 1) == 1)
  17.257 -        ix1 |= sign;
  17.258 -    ix0 += (m << 20);
  17.259 -    INSERT_WORDS(z, ix0, ix1);
  17.260 -    return z;
  17.261 -}
  17.262 -
  17.263 -/*
  17.264 -Other methods  (use floating-point arithmetic)
  17.265 --------------
  17.266 -(This is a copy of a drafted paper by Prof W. Kahan
  17.267 -and K.C. Ng, written in May, 1986)
  17.268 -
  17.269 -	Two algorithms are given here to implement sqrt(x)
  17.270 -	(IEEE double precision arithmetic) in software.
  17.271 -	Both supply sqrt(x) correctly rounded. The first algorithm (in
  17.272 -	Section A) uses newton iterations and involves four divisions.
  17.273 -	The second one uses reciproot iterations to avoid division, but
  17.274 -	requires more multiplications. Both algorithms need the ability
  17.275 -	to chop results of arithmetic operations instead of round them,
  17.276 -	and the INEXACT flag to indicate when an arithmetic operation
  17.277 -	is executed exactly with no roundoff error, all part of the
  17.278 -	standard (IEEE 754-1985). The ability to perform shift, add,
  17.279 -	subtract and logical AND operations upon 32-bit words is needed
  17.280 -	too, though not part of the standard.
  17.281 -
  17.282 -A.  sqrt(x) by Newton Iteration
  17.283 -
  17.284 -   (1)	Initial approximation
  17.285 -
  17.286 -	Let x0 and x1 be the leading and the trailing 32-bit words of
  17.287 -	a floating point number x (in IEEE double format) respectively
  17.288 -
  17.289 -	    1    11		     52				  ...widths
  17.290 -	   ------------------------------------------------------
  17.291 -	x: |s|	  e     |	      f				|
  17.292 -	   ------------------------------------------------------
  17.293 -	      msb    lsb  msb				      lsb ...order
  17.294 -
  17.295 -
  17.296 -	     ------------------------  	     ------------------------
  17.297 -	x0:  |s|   e    |    f1     |	 x1: |          f2           |
  17.298 -	     ------------------------  	     ------------------------
  17.299 -
  17.300 -	By performing shifts and subtracts on x0 and x1 (both regarded
  17.301 -	as integers), we obtain an 8-bit approximation of sqrt(x) as
  17.302 -	follows.
  17.303 -
  17.304 -		k  := (x0>>1) + 0x1ff80000;
  17.305 -		y0 := k - T1[31&(k>>15)].	... y ~ sqrt(x) to 8 bits
  17.306 -	Here k is a 32-bit integer and T1[] is an integer array containing
  17.307 -	correction terms. Now magically the floating value of y (y's
  17.308 -	leading 32-bit word is y0, the value of its trailing word is 0)
  17.309 -	approximates sqrt(x) to almost 8-bit.
  17.310 -
  17.311 -	Value of T1:
  17.312 -	static int T1[32]= {
  17.313 -	0,	1024,	3062,	5746,	9193,	13348,	18162,	23592,
  17.314 -	29598,	36145,	43202,	50740,	58733,	67158,	75992,	85215,
  17.315 -	83599,	71378,	60428,	50647,	41945,	34246,	27478,	21581,
  17.316 -	16499,	12183,	8588,	5674,	3403,	1742,	661,	130,};
  17.317 -
  17.318 -    (2)	Iterative refinement
  17.319 -
  17.320 -	Apply Heron's rule three times to y, we have y approximates
  17.321 -	sqrt(x) to within 1 ulp (Unit in the Last Place):
  17.322 -
  17.323 -		y := (y+x/y)/2		... almost 17 sig. bits
  17.324 -		y := (y+x/y)/2		... almost 35 sig. bits
  17.325 -		y := y-(y-x/y)/2	... within 1 ulp
  17.326 -
  17.327 -
  17.328 -	Remark 1.
  17.329 -	    Another way to improve y to within 1 ulp is:
  17.330 -
  17.331 -		y := (y+x/y)		... almost 17 sig. bits to 2*sqrt(x)
  17.332 -		y := y - 0x00100006	... almost 18 sig. bits to sqrt(x)
  17.333 -
  17.334 -				2
  17.335 -			    (x-y )*y
  17.336 -		y := y + 2* ----------	...within 1 ulp
  17.337 -			       2
  17.338 -			     3y  + x
  17.339 -
  17.340 -
  17.341 -	This formula has one division fewer than the one above; however,
  17.342 -	it requires more multiplications and additions. Also x must be
  17.343 -	scaled in advance to avoid spurious overflow in evaluating the
  17.344 -	expression 3y*y+x. Hence it is not recommended uless division
  17.345 -	is slow. If division is very slow, then one should use the
  17.346 -	reciproot algorithm given in section B.
  17.347 -
  17.348 -    (3) Final adjustment
  17.349 -
  17.350 -	By twiddling y's last bit it is possible to force y to be
  17.351 -	correctly rounded according to the prevailing rounding mode
  17.352 -	as follows. Let r and i be copies of the rounding mode and
  17.353 -	inexact flag before entering the square root program. Also we
  17.354 -	use the expression y+-ulp for the next representable floating
  17.355 -	numbers (up and down) of y. Note that y+-ulp = either fixed
  17.356 -	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
  17.357 -	mode.
  17.358 -
  17.359 -		I := FALSE;	... reset INEXACT flag I
  17.360 -		R := RZ;	... set rounding mode to round-toward-zero
  17.361 -		z := x/y;	... chopped quotient, possibly inexact
  17.362 -		If(not I) then {	... if the quotient is exact
  17.363 -		    if(z=y) {
  17.364 -		        I := i;	 ... restore inexact flag
  17.365 -		        R := r;  ... restore rounded mode
  17.366 -		        return sqrt(x):=y.
  17.367 -		    } else {
  17.368 -			z := z - ulp;	... special rounding
  17.369 -		    }
  17.370 -		}
  17.371 -		i := TRUE;		... sqrt(x) is inexact
  17.372 -		If (r=RN) then z=z+ulp	... rounded-to-nearest
  17.373 -		If (r=RP) then {	... round-toward-+inf
  17.374 -		    y = y+ulp; z=z+ulp;
  17.375 -		}
  17.376 -		y := y+z;		... chopped sum
  17.377 -		y0:=y0-0x00100000;	... y := y/2 is correctly rounded.
  17.378 -	        I := i;	 		... restore inexact flag
  17.379 -	        R := r;  		... restore rounded mode
  17.380 -	        return sqrt(x):=y.
  17.381 -
  17.382 -    (4)	Special cases
  17.383 -
  17.384 -	Square root of +inf, +-0, or NaN is itself;
  17.385 -	Square root of a negative number is NaN with invalid signal.
  17.386 -
  17.387 -
  17.388 -B.  sqrt(x) by Reciproot Iteration
  17.389 -
  17.390 -   (1)	Initial approximation
  17.391 -
  17.392 -	Let x0 and x1 be the leading and the trailing 32-bit words of
  17.393 -	a floating point number x (in IEEE double format) respectively
  17.394 -	(see section A). By performing shifs and subtracts on x0 and y0,
  17.395 -	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
  17.396 -
  17.397 -	    k := 0x5fe80000 - (x0>>1);
  17.398 -	    y0:= k - T2[63&(k>>14)].	... y ~ 1/sqrt(x) to 7.8 bits
  17.399 -
  17.400 -	Here k is a 32-bit integer and T2[] is an integer array
  17.401 -	containing correction terms. Now magically the floating
  17.402 -	value of y (y's leading 32-bit word is y0, the value of
  17.403 -	its trailing word y1 is set to zero) approximates 1/sqrt(x)
  17.404 -	to almost 7.8-bit.
  17.405 -
  17.406 -	Value of T2:
  17.407 -	static int T2[64]= {
  17.408 -	0x1500,	0x2ef8,	0x4d67,	0x6b02,	0x87be,	0xa395,	0xbe7a,	0xd866,
  17.409 -	0xf14a,	0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
  17.410 -	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
  17.411 -	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
  17.412 -	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
  17.413 -	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
  17.414 -	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
  17.415 -	0x1527f,0x1334a,0x11051,0xe951,	0xbe01,	0x8e0d,	0x5924,	0x1edd,};
  17.416 -
  17.417 -    (2)	Iterative refinement
  17.418 -
  17.419 -	Apply Reciproot iteration three times to y and multiply the
  17.420 -	result by x to get an approximation z that matches sqrt(x)
  17.421 -	to about 1 ulp. To be exact, we will have
  17.422 -		-1ulp < sqrt(x)-z<1.0625ulp.
  17.423 -
  17.424 -	... set rounding mode to Round-to-nearest
  17.425 -	   y := y*(1.5-0.5*x*y*y)	... almost 15 sig. bits to 1/sqrt(x)
  17.426 -	   y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
  17.427 -	... special arrangement for better accuracy
  17.428 -	   z := x*y			... 29 bits to sqrt(x), with z*y<1
  17.429 -	   z := z + 0.5*z*(1-z*y)	... about 1 ulp to sqrt(x)
  17.430 -
  17.431 -	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
  17.432 -	(a) the term z*y in the final iteration is always less than 1;
  17.433 -	(b) the error in the final result is biased upward so that
  17.434 -		-1 ulp < sqrt(x) - z < 1.0625 ulp
  17.435 -	    instead of |sqrt(x)-z|<1.03125ulp.
  17.436 -
  17.437 -    (3)	Final adjustment
  17.438 -
  17.439 -	By twiddling y's last bit it is possible to force y to be
  17.440 -	correctly rounded according to the prevailing rounding mode
  17.441 -	as follows. Let r and i be copies of the rounding mode and
  17.442 -	inexact flag before entering the square root program. Also we
  17.443 -	use the expression y+-ulp for the next representable floating
  17.444 -	numbers (up and down) of y. Note that y+-ulp = either fixed
  17.445 -	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
  17.446 -	mode.
  17.447 -
  17.448 -	R := RZ;		... set rounding mode to round-toward-zero
  17.449 -	switch(r) {
  17.450 -	    case RN:		... round-to-nearest
  17.451 -	       if(x<= z*(z-ulp)...chopped) z = z - ulp; else
  17.452 -	       if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
  17.453 -	       break;
  17.454 -	    case RZ:case RM:	... round-to-zero or round-to--inf
  17.455 -	       R:=RP;		... reset rounding mod to round-to-+inf
  17.456 -	       if(x<z*z ... rounded up) z = z - ulp; else
  17.457 -	       if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
  17.458 -	       break;
  17.459 -	    case RP:		... round-to-+inf
  17.460 -	       if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
  17.461 -	       if(x>z*z ...chopped) z = z+ulp;
  17.462 -	       break;
  17.463 -	}
  17.464 -
  17.465 -	Remark 3. The above comparisons can be done in fixed point. For
  17.466 -	example, to compare x and w=z*z chopped, it suffices to compare
  17.467 -	x1 and w1 (the trailing parts of x and w), regarding them as
  17.468 -	two's complement integers.
  17.469 -
  17.470 -	...Is z an exact square root?
  17.471 -	To determine whether z is an exact square root of x, let z1 be the
  17.472 -	trailing part of z, and also let x0 and x1 be the leading and
  17.473 -	trailing parts of x.
  17.474 -
  17.475 -	If ((z1&0x03ffffff)!=0)	... not exact if trailing 26 bits of z!=0
  17.476 -	    I := 1;		... Raise Inexact flag: z is not exact
  17.477 -	else {
  17.478 -	    j := 1 - [(x0>>20)&1]	... j = logb(x) mod 2
  17.479 -	    k := z1 >> 26;		... get z's 25-th and 26-th
  17.480 -					    fraction bits
  17.481 -	    I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
  17.482 -	}
  17.483 -	R:= r		... restore rounded mode
  17.484 -	return sqrt(x):=z.
  17.485 -
  17.486 -	If multiplication is cheaper then the foregoing red tape, the
  17.487 -	Inexact flag can be evaluated by
  17.488 -
  17.489 -	    I := i;
  17.490 -	    I := (z*z!=x) or I.
  17.491 -
  17.492 -	Note that z*z can overwrite I; this value must be sensed if it is
  17.493 -	True.
  17.494 -
  17.495 -	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
  17.496 -	zero.
  17.497 -
  17.498 -		    --------------------
  17.499 -		z1: |        f2        |
  17.500 -		    --------------------
  17.501 -		bit 31		   bit 0
  17.502 -
  17.503 -	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
  17.504 -	or even of logb(x) have the following relations:
  17.505 -
  17.506 -	-------------------------------------------------
  17.507 -	bit 27,26 of z1		bit 1,0 of x1	logb(x)
  17.508 -	-------------------------------------------------
  17.509 -	00			00		odd and even
  17.510 -	01			01		even
  17.511 -	10			10		odd
  17.512 -	10			00		even
  17.513 -	11			01		even
  17.514 -	-------------------------------------------------
  17.515 -
  17.516 -    (4)	Special cases (see (4) of Section A).
  17.517 -
  17.518 - */
  17.519 -/* vi: set ts=4 sw=4 expandtab: */
    18.1 --- a/src/video/math_private.h	Mon Sep 15 05:14:11 2008 +0000
    18.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.3 @@ -1,171 +0,0 @@
    18.4 -/*
    18.5 - * ====================================================
    18.6 - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    18.7 - *
    18.8 - * Developed at SunPro, a Sun Microsystems, Inc. business.
    18.9 - * Permission to use, copy, modify, and distribute this
   18.10 - * software is freely granted, provided that this notice
   18.11 - * is preserved.
   18.12 - * ====================================================
   18.13 - */
   18.14 -
   18.15 -/*
   18.16 - * from: @(#)fdlibm.h 5.1 93/09/24
   18.17 - * $Id$
   18.18 - */
   18.19 -
   18.20 -#ifndef _MATH_PRIVATE_H_
   18.21 -#define _MATH_PRIVATE_H_
   18.22 -
   18.23 -#include "SDL_name.h"
   18.24 -#include "SDL_endian.h"
   18.25 -
   18.26 -#define huge		really_big      /* huge is a reserved keyword in VC++ 6.0 */
   18.27 -#define u_int32_t	uint32_t
   18.28 -
   18.29 -/* The original fdlibm code used statements like:
   18.30 -	n0 = ((*(int*)&one)>>29)^1;		* index of high word *
   18.31 -	ix0 = *(n0+(int*)&x);			* high word of x *
   18.32 -	ix1 = *((1-n0)+(int*)&x);		* low word of x *
   18.33 -   to dig two 32 bit words out of the 64 bit IEEE floating point
   18.34 -   value.  That is non-ANSI, and, moreover, the gcc instruction
   18.35 -   scheduler gets it wrong.  We instead use the following macros.
   18.36 -   Unlike the original code, we determine the endianness at compile
   18.37 -   time, not at run time; I don't see much benefit to selecting
   18.38 -   endianness at run time.  */
   18.39 -
   18.40 -/* A union which permits us to convert between a double and two 32 bit
   18.41 -   ints.  */
   18.42 -
   18.43 -/*
   18.44 - * Math on arm is special:
   18.45 - * For FPA, float words are always big-endian.
   18.46 - * For VFP, floats words follow the memory system mode.
   18.47 - */
   18.48 -
   18.49 -#if (SDL_BYTEORDER == SDL_BIG_ENDIAN) || \
   18.50 -    (!defined(__VFP_FP__) && (defined(__arm__) || defined(__thumb__)))
   18.51 -
   18.52 -typedef union
   18.53 -{
   18.54 -    double value;
   18.55 -    struct
   18.56 -    {
   18.57 -        u_int32_t msw;
   18.58 -        u_int32_t lsw;
   18.59 -    } parts;
   18.60 -} ieee_double_shape_type;
   18.61 -
   18.62 -#else
   18.63 -
   18.64 -typedef union
   18.65 -{
   18.66 -    double value;
   18.67 -    struct
   18.68 -    {
   18.69 -        u_int32_t lsw;
   18.70 -        u_int32_t msw;
   18.71 -    } parts;
   18.72 -} ieee_double_shape_type;
   18.73 -
   18.74 -#endif
   18.75 -
   18.76 -/* Get two 32 bit ints from a double.  */
   18.77 -
   18.78 -#define EXTRACT_WORDS(ix0,ix1,d)				\
   18.79 -do {								\
   18.80 -  ieee_double_shape_type ew_u;					\
   18.81 -  ew_u.value = (d);						\
   18.82 -  (ix0) = ew_u.parts.msw;					\
   18.83 -  (ix1) = ew_u.parts.lsw;					\
   18.84 -} while (0)
   18.85 -
   18.86 -/* Get the more significant 32 bit int from a double.  */
   18.87 -
   18.88 -#define GET_HIGH_WORD(i,d)					\
   18.89 -do {								\
   18.90 -  ieee_double_shape_type gh_u;					\
   18.91 -  gh_u.value = (d);						\
   18.92 -  (i) = gh_u.parts.msw;						\
   18.93 -} while (0)
   18.94 -
   18.95 -/* Get the less significant 32 bit int from a double.  */
   18.96 -
   18.97 -#define GET_LOW_WORD(i,d)					\
   18.98 -do {								\
   18.99 -  ieee_double_shape_type gl_u;					\
  18.100 -  gl_u.value = (d);						\
  18.101 -  (i) = gl_u.parts.lsw;						\
  18.102 -} while (0)
  18.103 -
  18.104 -/* Set a double from two 32 bit ints.  */
  18.105 -
  18.106 -#define INSERT_WORDS(d,ix0,ix1)					\
  18.107 -do {								\
  18.108 -  ieee_double_shape_type iw_u;					\
  18.109 -  iw_u.parts.msw = (ix0);					\
  18.110 -  iw_u.parts.lsw = (ix1);					\
  18.111 -  (d) = iw_u.value;						\
  18.112 -} while (0)
  18.113 -
  18.114 -/* Set the more significant 32 bits of a double from an int.  */
  18.115 -
  18.116 -#define SET_HIGH_WORD(d,v)					\
  18.117 -do {								\
  18.118 -  ieee_double_shape_type sh_u;					\
  18.119 -  sh_u.value = (d);						\
  18.120 -  sh_u.parts.msw = (v);						\
  18.121 -  (d) = sh_u.value;						\
  18.122 -} while (0)
  18.123 -
  18.124 -/* Set the less significant 32 bits of a double from an int.  */
  18.125 -
  18.126 -#define SET_LOW_WORD(d,v)					\
  18.127 -do {								\
  18.128 -  ieee_double_shape_type sl_u;					\
  18.129 -  sl_u.value = (d);						\
  18.130 -  sl_u.parts.lsw = (v);						\
  18.131 -  (d) = sl_u.value;						\
  18.132 -} while (0)
  18.133 -
  18.134 -/* A union which permits us to convert between a float and a 32 bit
  18.135 -   int.  */
  18.136 -
  18.137 -typedef union
  18.138 -{
  18.139 -    float value;
  18.140 -    u_int32_t word;
  18.141 -} ieee_float_shape_type;
  18.142 -
  18.143 -/* Get a 32 bit int from a float.  */
  18.144 -
  18.145 -#define GET_FLOAT_WORD(i,d)					\
  18.146 -do {								\
  18.147 -  ieee_float_shape_type gf_u;					\
  18.148 -  gf_u.value = (d);						\
  18.149 -  (i) = gf_u.word;						\
  18.150 -} while (0)
  18.151 -
  18.152 -/* Set a float from a 32 bit int.  */
  18.153 -
  18.154 -#define SET_FLOAT_WORD(d,i)					\
  18.155 -do {								\
  18.156 -  ieee_float_shape_type sf_u;					\
  18.157 -  sf_u.word = (i);						\
  18.158 -  (d) = sf_u.value;						\
  18.159 -} while (0)
  18.160 -
  18.161 -
  18.162 -#ifdef __STDC__
  18.163 -static const double
  18.164 -#else
  18.165 -static double
  18.166 -#endif
  18.167 -  zero = 0.0, one = 1.0, two = 2.0, two53 = 9007199254740992.0, /* 0x43400000, 0x00000000 */
  18.168 -    two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
  18.169 -    twom54 = 5.55111512312578270212e-17,        /* 0x3C900000, 0x00000000 */
  18.170 -    huge = 1.0e+300, tiny = 1.0e-300;
  18.171 -
  18.172 -#endif /* _MATH_PRIVATE_H_ */
  18.173 -
  18.174 -/* vi: set ts=4 sw=4 expandtab: */