src/libm/e_sqrt.c
author Sam Lantinga <slouken@libsdl.org>
Mon, 22 Jun 2015 23:36:06 -0700
changeset 9776 952ff8a5076f
parent 6044 35448a5ea044
child 11683 48bcba563d9c
permissions -rw-r--r--
Fixed bug 3030 - SDL_RecreateWindow fails to restore title, icon, etc.

Adam M.

It loses the title and icon when window recreation fails. For instance, this may happen when trying to create an OpenGL ES window on a system that doesn't support it. But at that point, the title and icon have already been lost.
slouken@2756
     1
/* @(#)e_sqrt.c 5.1 93/09/24 */
slouken@2756
     2
/*
slouken@2756
     3
 * ====================================================
slouken@2756
     4
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
slouken@2756
     5
 *
slouken@2756
     6
 * Developed at SunPro, a Sun Microsystems, Inc. business.
slouken@2756
     7
 * Permission to use, copy, modify, and distribute this
slouken@2756
     8
 * software is freely granted, provided that this notice
slouken@2756
     9
 * is preserved.
slouken@2756
    10
 * ====================================================
slouken@2756
    11
 */
slouken@2756
    12
slouken@2756
    13
#if defined(LIBM_SCCS) && !defined(lint)
slouken@3162
    14
static const char rcsid[] =
slouken@3162
    15
    "$NetBSD: e_sqrt.c,v 1.8 1995/05/10 20:46:17 jtc Exp $";
slouken@2756
    16
#endif
slouken@2756
    17
slouken@2756
    18
/* __ieee754_sqrt(x)
slouken@2756
    19
 * Return correctly rounded sqrt.
slouken@2756
    20
 *           ------------------------------------------
slouken@2756
    21
 *	     |  Use the hardware sqrt if you have one |
slouken@2756
    22
 *           ------------------------------------------
slouken@2756
    23
 * Method:
slouken@2756
    24
 *   Bit by bit method using integer arithmetic. (Slow, but portable)
slouken@2756
    25
 *   1. Normalization
slouken@2756
    26
 *	Scale x to y in [1,4) with even powers of 2:
slouken@2756
    27
 *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
slouken@2756
    28
 *		sqrt(x) = 2^k * sqrt(y)
slouken@2756
    29
 *   2. Bit by bit computation
slouken@2756
    30
 *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
slouken@2756
    31
 *	     i							 0
slouken@2756
    32
 *                                     i+1         2
slouken@2756
    33
 *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
slouken@2756
    34
 *	     i      i            i                 i
slouken@2756
    35
 *
slouken@2756
    36
 *	To compute q    from q , one checks whether
slouken@2756
    37
 *		    i+1       i
slouken@2756
    38
 *
slouken@2756
    39
 *			      -(i+1) 2
slouken@2756
    40
 *			(q + 2      ) <= y.			(2)
slouken@2756
    41
 *     			  i
slouken@2756
    42
 *							      -(i+1)
slouken@2756
    43
 *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
slouken@2756
    44
 *		 	       i+1   i             i+1   i
slouken@2756
    45
 *
slouken@2756
    46
 *	With some algebric manipulation, it is not difficult to see
slouken@2756
    47
 *	that (2) is equivalent to
slouken@2756
    48
 *                             -(i+1)
slouken@2756
    49
 *			s  +  2       <= y			(3)
slouken@2756
    50
 *			 i                i
slouken@2756
    51
 *
slouken@2756
    52
 *	The advantage of (3) is that s  and y  can be computed by
slouken@2756
    53
 *				      i      i
slouken@2756
    54
 *	the following recurrence formula:
slouken@2756
    55
 *	    if (3) is false
slouken@2756
    56
 *
slouken@2756
    57
 *	    s     =  s  ,	y    = y   ;			(4)
slouken@2756
    58
 *	     i+1      i		 i+1    i
slouken@2756
    59
 *
slouken@2756
    60
 *	    otherwise,
slouken@2756
    61
 *                         -i                     -(i+1)
slouken@2756
    62
 *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
slouken@2756
    63
 *           i+1      i          i+1    i     i
slouken@2756
    64
 *
slouken@2756
    65
 *	One may easily use induction to prove (4) and (5).
slouken@2756
    66
 *	Note. Since the left hand side of (3) contain only i+2 bits,
slouken@2756
    67
 *	      it does not necessary to do a full (53-bit) comparison
slouken@2756
    68
 *	      in (3).
slouken@2756
    69
 *   3. Final rounding
slouken@2756
    70
 *	After generating the 53 bits result, we compute one more bit.
slouken@2756
    71
 *	Together with the remainder, we can decide whether the
slouken@2756
    72
 *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
slouken@2756
    73
 *	(it will never equal to 1/2ulp).
slouken@2756
    74
 *	The rounding mode can be detected by checking whether
slouken@2756
    75
 *	huge + tiny is equal to huge, and whether huge - tiny is
slouken@2756
    76
 *	equal to huge for some floating point number "huge" and "tiny".
slouken@2756
    77
 *
slouken@2756
    78
 * Special cases:
slouken@2756
    79
 *	sqrt(+-0) = +-0 	... exact
slouken@2756
    80
 *	sqrt(inf) = inf
slouken@2756
    81
 *	sqrt(-ve) = NaN		... with invalid signal
slouken@2756
    82
 *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
slouken@2756
    83
 *
slouken@2756
    84
 * Other methods : see the appended file at the end of the program below.
slouken@2756
    85
 *---------------
slouken@2756
    86
 */
slouken@2756
    87
slouken@6044
    88
#include "math_libm.h"
slouken@2756
    89
#include "math_private.h"
slouken@2756
    90
slouken@2756
    91
#ifdef __STDC__
slouken@2756
    92
static const double one = 1.0, tiny = 1.0e-300;
slouken@2756
    93
#else
slouken@2756
    94
static double one = 1.0, tiny = 1.0e-300;
slouken@2756
    95
#endif
slouken@2756
    96
slouken@2756
    97
#ifdef __STDC__
slouken@2756
    98
double attribute_hidden
slouken@2756
    99
__ieee754_sqrt(double x)
slouken@2756
   100
#else
slouken@2756
   101
double attribute_hidden
slouken@2756
   102
__ieee754_sqrt(x)
slouken@2756
   103
     double x;
slouken@2756
   104
#endif
slouken@2756
   105
{
slouken@2756
   106
    double z;
slouken@2756
   107
    int32_t sign = (int) 0x80000000;
slouken@2756
   108
    int32_t ix0, s0, q, m, t, i;
slouken@2756
   109
    u_int32_t r, t1, s1, ix1, q1;
slouken@2756
   110
slouken@2756
   111
    EXTRACT_WORDS(ix0, ix1, x);
slouken@2756
   112
slouken@2756
   113
    /* take care of Inf and NaN */
slouken@2756
   114
    if ((ix0 & 0x7ff00000) == 0x7ff00000) {
slouken@2756
   115
        return x * x + x;       /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
slouken@2756
   116
                                   sqrt(-inf)=sNaN */
slouken@2756
   117
    }
slouken@2756
   118
    /* take care of zero */
slouken@2756
   119
    if (ix0 <= 0) {
slouken@2756
   120
        if (((ix0 & (~sign)) | ix1) == 0)
slouken@2756
   121
            return x;           /* sqrt(+-0) = +-0 */
slouken@2756
   122
        else if (ix0 < 0)
slouken@2756
   123
            return (x - x) / (x - x);   /* sqrt(-ve) = sNaN */
slouken@2756
   124
    }
slouken@2756
   125
    /* normalize x */
slouken@2756
   126
    m = (ix0 >> 20);
slouken@2756
   127
    if (m == 0) {               /* subnormal x */
slouken@2756
   128
        while (ix0 == 0) {
slouken@2756
   129
            m -= 21;
slouken@2756
   130
            ix0 |= (ix1 >> 11);
slouken@2756
   131
            ix1 <<= 21;
slouken@2756
   132
        }
slouken@2756
   133
        for (i = 0; (ix0 & 0x00100000) == 0; i++)
slouken@2756
   134
            ix0 <<= 1;
slouken@2756
   135
        m -= i - 1;
slouken@2756
   136
        ix0 |= (ix1 >> (32 - i));
slouken@2756
   137
        ix1 <<= i;
slouken@2756
   138
    }
slouken@2756
   139
    m -= 1023;                  /* unbias exponent */
slouken@2756
   140
    ix0 = (ix0 & 0x000fffff) | 0x00100000;
slouken@2756
   141
    if (m & 1) {                /* odd m, double x to make it even */
slouken@2756
   142
        ix0 += ix0 + ((ix1 & sign) >> 31);
slouken@2756
   143
        ix1 += ix1;
slouken@2756
   144
    }
slouken@2756
   145
    m >>= 1;                    /* m = [m/2] */
slouken@2756
   146
slouken@2756
   147
    /* generate sqrt(x) bit by bit */
slouken@2756
   148
    ix0 += ix0 + ((ix1 & sign) >> 31);
slouken@2756
   149
    ix1 += ix1;
slouken@2756
   150
    q = q1 = s0 = s1 = 0;       /* [q,q1] = sqrt(x) */
slouken@2756
   151
    r = 0x00200000;             /* r = moving bit from right to left */
slouken@2756
   152
slouken@2756
   153
    while (r != 0) {
slouken@2756
   154
        t = s0 + r;
slouken@2756
   155
        if (t <= ix0) {
slouken@2756
   156
            s0 = t + r;
slouken@2756
   157
            ix0 -= t;
slouken@2756
   158
            q += r;
slouken@2756
   159
        }
slouken@2756
   160
        ix0 += ix0 + ((ix1 & sign) >> 31);
slouken@2756
   161
        ix1 += ix1;
slouken@2756
   162
        r >>= 1;
slouken@2756
   163
    }
slouken@2756
   164
slouken@2756
   165
    r = sign;
slouken@2756
   166
    while (r != 0) {
slouken@2756
   167
        t1 = s1 + r;
slouken@2756
   168
        t = s0;
slouken@2756
   169
        if ((t < ix0) || ((t == ix0) && (t1 <= ix1))) {
slouken@2756
   170
            s1 = t1 + r;
slouken@2756
   171
            if (((t1 & sign) == sign) && (s1 & sign) == 0)
slouken@2756
   172
                s0 += 1;
slouken@2756
   173
            ix0 -= t;
slouken@2756
   174
            if (ix1 < t1)
slouken@2756
   175
                ix0 -= 1;
slouken@2756
   176
            ix1 -= t1;
slouken@2756
   177
            q1 += r;
slouken@2756
   178
        }
slouken@2756
   179
        ix0 += ix0 + ((ix1 & sign) >> 31);
slouken@2756
   180
        ix1 += ix1;
slouken@2756
   181
        r >>= 1;
slouken@2756
   182
    }
slouken@2756
   183
slouken@2756
   184
    /* use floating add to find out rounding direction */
slouken@2756
   185
    if ((ix0 | ix1) != 0) {
slouken@2756
   186
        z = one - tiny;         /* trigger inexact flag */
slouken@2756
   187
        if (z >= one) {
slouken@2756
   188
            z = one + tiny;
slouken@2756
   189
            if (q1 == (u_int32_t) 0xffffffff) {
slouken@2756
   190
                q1 = 0;
slouken@2756
   191
                q += 1;
slouken@2756
   192
            } else if (z > one) {
slouken@2756
   193
                if (q1 == (u_int32_t) 0xfffffffe)
slouken@2756
   194
                    q += 1;
slouken@2756
   195
                q1 += 2;
slouken@2756
   196
            } else
slouken@2756
   197
                q1 += (q1 & 1);
slouken@2756
   198
        }
slouken@2756
   199
    }
slouken@2756
   200
    ix0 = (q >> 1) + 0x3fe00000;
slouken@2756
   201
    ix1 = q1 >> 1;
slouken@2756
   202
    if ((q & 1) == 1)
slouken@2756
   203
        ix1 |= sign;
slouken@2756
   204
    ix0 += (m << 20);
slouken@2756
   205
    INSERT_WORDS(z, ix0, ix1);
slouken@2756
   206
    return z;
slouken@2756
   207
}
slouken@2756
   208
slouken@2756
   209
/*
slouken@2756
   210
Other methods  (use floating-point arithmetic)
slouken@2756
   211
-------------
slouken@2756
   212
(This is a copy of a drafted paper by Prof W. Kahan
slouken@2756
   213
and K.C. Ng, written in May, 1986)
slouken@2756
   214
slouken@2756
   215
	Two algorithms are given here to implement sqrt(x)
slouken@2756
   216
	(IEEE double precision arithmetic) in software.
slouken@2756
   217
	Both supply sqrt(x) correctly rounded. The first algorithm (in
slouken@2756
   218
	Section A) uses newton iterations and involves four divisions.
slouken@2756
   219
	The second one uses reciproot iterations to avoid division, but
slouken@2756
   220
	requires more multiplications. Both algorithms need the ability
slouken@2756
   221
	to chop results of arithmetic operations instead of round them,
slouken@2756
   222
	and the INEXACT flag to indicate when an arithmetic operation
slouken@2756
   223
	is executed exactly with no roundoff error, all part of the
slouken@2756
   224
	standard (IEEE 754-1985). The ability to perform shift, add,
slouken@2756
   225
	subtract and logical AND operations upon 32-bit words is needed
slouken@2756
   226
	too, though not part of the standard.
slouken@2756
   227
slouken@2756
   228
A.  sqrt(x) by Newton Iteration
slouken@2756
   229
slouken@2756
   230
   (1)	Initial approximation
slouken@2756
   231
slouken@2756
   232
	Let x0 and x1 be the leading and the trailing 32-bit words of
slouken@2756
   233
	a floating point number x (in IEEE double format) respectively
slouken@2756
   234
slouken@2756
   235
	    1    11		     52				  ...widths
slouken@2756
   236
	   ------------------------------------------------------
slouken@2756
   237
	x: |s|	  e     |	      f				|
slouken@2756
   238
	   ------------------------------------------------------
slouken@2756
   239
	      msb    lsb  msb				      lsb ...order
slouken@2756
   240
slouken@2756
   241
slouken@2756
   242
	     ------------------------  	     ------------------------
slouken@2756
   243
	x0:  |s|   e    |    f1     |	 x1: |          f2           |
slouken@2756
   244
	     ------------------------  	     ------------------------
slouken@2756
   245
slouken@2756
   246
	By performing shifts and subtracts on x0 and x1 (both regarded
slouken@2756
   247
	as integers), we obtain an 8-bit approximation of sqrt(x) as
slouken@2756
   248
	follows.
slouken@2756
   249
slouken@2756
   250
		k  := (x0>>1) + 0x1ff80000;
slouken@2756
   251
		y0 := k - T1[31&(k>>15)].	... y ~ sqrt(x) to 8 bits
slouken@2756
   252
	Here k is a 32-bit integer and T1[] is an integer array containing
slouken@2756
   253
	correction terms. Now magically the floating value of y (y's
slouken@2756
   254
	leading 32-bit word is y0, the value of its trailing word is 0)
slouken@2756
   255
	approximates sqrt(x) to almost 8-bit.
slouken@2756
   256
slouken@2756
   257
	Value of T1:
slouken@2756
   258
	static int T1[32]= {
slouken@2756
   259
	0,	1024,	3062,	5746,	9193,	13348,	18162,	23592,
slouken@2756
   260
	29598,	36145,	43202,	50740,	58733,	67158,	75992,	85215,
slouken@2756
   261
	83599,	71378,	60428,	50647,	41945,	34246,	27478,	21581,
slouken@2756
   262
	16499,	12183,	8588,	5674,	3403,	1742,	661,	130,};
slouken@2756
   263
slouken@2756
   264
    (2)	Iterative refinement
slouken@2756
   265
slouken@2756
   266
	Apply Heron's rule three times to y, we have y approximates
slouken@2756
   267
	sqrt(x) to within 1 ulp (Unit in the Last Place):
slouken@2756
   268
slouken@2756
   269
		y := (y+x/y)/2		... almost 17 sig. bits
slouken@2756
   270
		y := (y+x/y)/2		... almost 35 sig. bits
slouken@2756
   271
		y := y-(y-x/y)/2	... within 1 ulp
slouken@2756
   272
slouken@2756
   273
slouken@2756
   274
	Remark 1.
slouken@2756
   275
	    Another way to improve y to within 1 ulp is:
slouken@2756
   276
slouken@2756
   277
		y := (y+x/y)		... almost 17 sig. bits to 2*sqrt(x)
slouken@2756
   278
		y := y - 0x00100006	... almost 18 sig. bits to sqrt(x)
slouken@2756
   279
slouken@2756
   280
				2
slouken@2756
   281
			    (x-y )*y
slouken@2756
   282
		y := y + 2* ----------	...within 1 ulp
slouken@2756
   283
			       2
slouken@2756
   284
			     3y  + x
slouken@2756
   285
slouken@2756
   286
slouken@2756
   287
	This formula has one division fewer than the one above; however,
slouken@2756
   288
	it requires more multiplications and additions. Also x must be
slouken@2756
   289
	scaled in advance to avoid spurious overflow in evaluating the
slouken@2756
   290
	expression 3y*y+x. Hence it is not recommended uless division
slouken@2756
   291
	is slow. If division is very slow, then one should use the
slouken@2756
   292
	reciproot algorithm given in section B.
slouken@2756
   293
slouken@2756
   294
    (3) Final adjustment
slouken@2756
   295
slouken@2756
   296
	By twiddling y's last bit it is possible to force y to be
slouken@2756
   297
	correctly rounded according to the prevailing rounding mode
slouken@2756
   298
	as follows. Let r and i be copies of the rounding mode and
slouken@2756
   299
	inexact flag before entering the square root program. Also we
slouken@2756
   300
	use the expression y+-ulp for the next representable floating
slouken@2756
   301
	numbers (up and down) of y. Note that y+-ulp = either fixed
slouken@2756
   302
	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
slouken@2756
   303
	mode.
slouken@2756
   304
slouken@2756
   305
		I := FALSE;	... reset INEXACT flag I
slouken@2756
   306
		R := RZ;	... set rounding mode to round-toward-zero
slouken@2756
   307
		z := x/y;	... chopped quotient, possibly inexact
slouken@2756
   308
		If(not I) then {	... if the quotient is exact
slouken@2756
   309
		    if(z=y) {
slouken@2756
   310
		        I := i;	 ... restore inexact flag
slouken@2756
   311
		        R := r;  ... restore rounded mode
slouken@2756
   312
		        return sqrt(x):=y.
slouken@2756
   313
		    } else {
slouken@2756
   314
			z := z - ulp;	... special rounding
slouken@2756
   315
		    }
slouken@2756
   316
		}
slouken@2756
   317
		i := TRUE;		... sqrt(x) is inexact
slouken@2756
   318
		If (r=RN) then z=z+ulp	... rounded-to-nearest
slouken@2756
   319
		If (r=RP) then {	... round-toward-+inf
slouken@2756
   320
		    y = y+ulp; z=z+ulp;
slouken@2756
   321
		}
slouken@2756
   322
		y := y+z;		... chopped sum
slouken@2756
   323
		y0:=y0-0x00100000;	... y := y/2 is correctly rounded.
slouken@2756
   324
	        I := i;	 		... restore inexact flag
slouken@2756
   325
	        R := r;  		... restore rounded mode
slouken@2756
   326
	        return sqrt(x):=y.
slouken@2756
   327
slouken@2756
   328
    (4)	Special cases
slouken@2756
   329
slouken@2756
   330
	Square root of +inf, +-0, or NaN is itself;
slouken@2756
   331
	Square root of a negative number is NaN with invalid signal.
slouken@2756
   332
slouken@2756
   333
slouken@2756
   334
B.  sqrt(x) by Reciproot Iteration
slouken@2756
   335
slouken@2756
   336
   (1)	Initial approximation
slouken@2756
   337
slouken@2756
   338
	Let x0 and x1 be the leading and the trailing 32-bit words of
slouken@2756
   339
	a floating point number x (in IEEE double format) respectively
slouken@2756
   340
	(see section A). By performing shifs and subtracts on x0 and y0,
slouken@2756
   341
	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
slouken@2756
   342
slouken@2756
   343
	    k := 0x5fe80000 - (x0>>1);
slouken@2756
   344
	    y0:= k - T2[63&(k>>14)].	... y ~ 1/sqrt(x) to 7.8 bits
slouken@2756
   345
slouken@2756
   346
	Here k is a 32-bit integer and T2[] is an integer array
slouken@2756
   347
	containing correction terms. Now magically the floating
slouken@2756
   348
	value of y (y's leading 32-bit word is y0, the value of
slouken@2756
   349
	its trailing word y1 is set to zero) approximates 1/sqrt(x)
slouken@2756
   350
	to almost 7.8-bit.
slouken@2756
   351
slouken@2756
   352
	Value of T2:
slouken@2756
   353
	static int T2[64]= {
slouken@2756
   354
	0x1500,	0x2ef8,	0x4d67,	0x6b02,	0x87be,	0xa395,	0xbe7a,	0xd866,
slouken@2756
   355
	0xf14a,	0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
slouken@2756
   356
	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
slouken@2756
   357
	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
slouken@2756
   358
	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
slouken@2756
   359
	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
slouken@2756
   360
	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
slouken@2756
   361
	0x1527f,0x1334a,0x11051,0xe951,	0xbe01,	0x8e0d,	0x5924,	0x1edd,};
slouken@2756
   362
slouken@2756
   363
    (2)	Iterative refinement
slouken@2756
   364
slouken@2756
   365
	Apply Reciproot iteration three times to y and multiply the
slouken@2756
   366
	result by x to get an approximation z that matches sqrt(x)
slouken@2756
   367
	to about 1 ulp. To be exact, we will have
slouken@2756
   368
		-1ulp < sqrt(x)-z<1.0625ulp.
slouken@2756
   369
slouken@2756
   370
	... set rounding mode to Round-to-nearest
slouken@2756
   371
	   y := y*(1.5-0.5*x*y*y)	... almost 15 sig. bits to 1/sqrt(x)
slouken@2756
   372
	   y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
slouken@2756
   373
	... special arrangement for better accuracy
slouken@2756
   374
	   z := x*y			... 29 bits to sqrt(x), with z*y<1
slouken@2756
   375
	   z := z + 0.5*z*(1-z*y)	... about 1 ulp to sqrt(x)
slouken@2756
   376
slouken@2756
   377
	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
slouken@2756
   378
	(a) the term z*y in the final iteration is always less than 1;
slouken@2756
   379
	(b) the error in the final result is biased upward so that
slouken@2756
   380
		-1 ulp < sqrt(x) - z < 1.0625 ulp
slouken@2756
   381
	    instead of |sqrt(x)-z|<1.03125ulp.
slouken@2756
   382
slouken@2756
   383
    (3)	Final adjustment
slouken@2756
   384
slouken@2756
   385
	By twiddling y's last bit it is possible to force y to be
slouken@2756
   386
	correctly rounded according to the prevailing rounding mode
slouken@2756
   387
	as follows. Let r and i be copies of the rounding mode and
slouken@2756
   388
	inexact flag before entering the square root program. Also we
slouken@2756
   389
	use the expression y+-ulp for the next representable floating
slouken@2756
   390
	numbers (up and down) of y. Note that y+-ulp = either fixed
slouken@2756
   391
	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
slouken@2756
   392
	mode.
slouken@2756
   393
slouken@2756
   394
	R := RZ;		... set rounding mode to round-toward-zero
slouken@2756
   395
	switch(r) {
slouken@2756
   396
	    case RN:		... round-to-nearest
slouken@2756
   397
	       if(x<= z*(z-ulp)...chopped) z = z - ulp; else
slouken@2756
   398
	       if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
slouken@2756
   399
	       break;
slouken@2756
   400
	    case RZ:case RM:	... round-to-zero or round-to--inf
slouken@2756
   401
	       R:=RP;		... reset rounding mod to round-to-+inf
slouken@2756
   402
	       if(x<z*z ... rounded up) z = z - ulp; else
slouken@2756
   403
	       if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
slouken@2756
   404
	       break;
slouken@2756
   405
	    case RP:		... round-to-+inf
slouken@2756
   406
	       if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
slouken@2756
   407
	       if(x>z*z ...chopped) z = z+ulp;
slouken@2756
   408
	       break;
slouken@2756
   409
	}
slouken@2756
   410
slouken@2756
   411
	Remark 3. The above comparisons can be done in fixed point. For
slouken@2756
   412
	example, to compare x and w=z*z chopped, it suffices to compare
slouken@2756
   413
	x1 and w1 (the trailing parts of x and w), regarding them as
slouken@2756
   414
	two's complement integers.
slouken@2756
   415
slouken@2756
   416
	...Is z an exact square root?
slouken@2756
   417
	To determine whether z is an exact square root of x, let z1 be the
slouken@2756
   418
	trailing part of z, and also let x0 and x1 be the leading and
slouken@2756
   419
	trailing parts of x.
slouken@2756
   420
slouken@2756
   421
	If ((z1&0x03ffffff)!=0)	... not exact if trailing 26 bits of z!=0
slouken@2756
   422
	    I := 1;		... Raise Inexact flag: z is not exact
slouken@2756
   423
	else {
slouken@2756
   424
	    j := 1 - [(x0>>20)&1]	... j = logb(x) mod 2
slouken@2756
   425
	    k := z1 >> 26;		... get z's 25-th and 26-th
slouken@2756
   426
					    fraction bits
slouken@2756
   427
	    I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
slouken@2756
   428
	}
slouken@2756
   429
	R:= r		... restore rounded mode
slouken@2756
   430
	return sqrt(x):=z.
slouken@2756
   431
slouken@2756
   432
	If multiplication is cheaper then the foregoing red tape, the
slouken@2756
   433
	Inexact flag can be evaluated by
slouken@2756
   434
slouken@2756
   435
	    I := i;
slouken@2756
   436
	    I := (z*z!=x) or I.
slouken@2756
   437
slouken@2756
   438
	Note that z*z can overwrite I; this value must be sensed if it is
slouken@2756
   439
	True.
slouken@2756
   440
slouken@2756
   441
	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
slouken@2756
   442
	zero.
slouken@2756
   443
slouken@2756
   444
		    --------------------
slouken@2756
   445
		z1: |        f2        |
slouken@2756
   446
		    --------------------
slouken@2756
   447
		bit 31		   bit 0
slouken@2756
   448
slouken@2756
   449
	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
slouken@2756
   450
	or even of logb(x) have the following relations:
slouken@2756
   451
slouken@2756
   452
	-------------------------------------------------
slouken@2756
   453
	bit 27,26 of z1		bit 1,0 of x1	logb(x)
slouken@2756
   454
	-------------------------------------------------
slouken@2756
   455
	00			00		odd and even
slouken@2756
   456
	01			01		even
slouken@2756
   457
	10			10		odd
slouken@2756
   458
	10			00		even
slouken@2756
   459
	11			01		even
slouken@2756
   460
	-------------------------------------------------
slouken@2756
   461
slouken@2756
   462
    (4)	Special cases (see (4) of Section A).
slouken@2756
   463
slouken@2756
   464
 */