src/libm/e_sqrt.c
author Ryan C. Gordon <icculus@icculus.org>
Thu, 21 Apr 2016 03:16:44 -0400
changeset 11729 d1ce8396c356
parent 11683 48bcba563d9c
permissions -rw-r--r--
Initial shot at a renderer target for Apple's Metal API.

This isn't complete, but is enough to run testsprite2. It's currently
Mac-only; with a little work to figure out how to properly glue in a Metal
layer to a UIView, this will likely work on iOS, too.

This is only wired up to the configure script right now, and disabled by
default. CMake and Xcode still need their bits filled in as appropriate.
slouken@2756
     1
/*
slouken@2756
     2
 * ====================================================
slouken@2756
     3
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
slouken@2756
     4
 *
slouken@2756
     5
 * Developed at SunPro, a Sun Microsystems, Inc. business.
slouken@2756
     6
 * Permission to use, copy, modify, and distribute this
slouken@2756
     7
 * software is freely granted, provided that this notice
slouken@2756
     8
 * is preserved.
slouken@2756
     9
 * ====================================================
slouken@2756
    10
 */
slouken@2756
    11
slouken@2756
    12
/* __ieee754_sqrt(x)
slouken@2756
    13
 * Return correctly rounded sqrt.
slouken@2756
    14
 *           ------------------------------------------
slouken@2756
    15
 *	     |  Use the hardware sqrt if you have one |
slouken@2756
    16
 *           ------------------------------------------
slouken@2756
    17
 * Method:
slouken@2756
    18
 *   Bit by bit method using integer arithmetic. (Slow, but portable)
slouken@2756
    19
 *   1. Normalization
slouken@2756
    20
 *	Scale x to y in [1,4) with even powers of 2:
slouken@2756
    21
 *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
slouken@2756
    22
 *		sqrt(x) = 2^k * sqrt(y)
slouken@2756
    23
 *   2. Bit by bit computation
slouken@2756
    24
 *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
slouken@2756
    25
 *	     i							 0
slouken@2756
    26
 *                                     i+1         2
slouken@2756
    27
 *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
slouken@2756
    28
 *	     i      i            i                 i
slouken@2756
    29
 *
slouken@2756
    30
 *	To compute q    from q , one checks whether
slouken@2756
    31
 *		    i+1       i
slouken@2756
    32
 *
slouken@2756
    33
 *			      -(i+1) 2
slouken@2756
    34
 *			(q + 2      ) <= y.			(2)
slouken@2756
    35
 *     			  i
slouken@2756
    36
 *							      -(i+1)
slouken@2756
    37
 *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
slouken@2756
    38
 *		 	       i+1   i             i+1   i
slouken@2756
    39
 *
slouken@2756
    40
 *	With some algebric manipulation, it is not difficult to see
slouken@2756
    41
 *	that (2) is equivalent to
slouken@2756
    42
 *                             -(i+1)
slouken@2756
    43
 *			s  +  2       <= y			(3)
slouken@2756
    44
 *			 i                i
slouken@2756
    45
 *
slouken@2756
    46
 *	The advantage of (3) is that s  and y  can be computed by
slouken@2756
    47
 *				      i      i
slouken@2756
    48
 *	the following recurrence formula:
slouken@2756
    49
 *	    if (3) is false
slouken@2756
    50
 *
slouken@2756
    51
 *	    s     =  s  ,	y    = y   ;			(4)
slouken@2756
    52
 *	     i+1      i		 i+1    i
slouken@2756
    53
 *
slouken@2756
    54
 *	    otherwise,
slouken@2756
    55
 *                         -i                     -(i+1)
slouken@2756
    56
 *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
slouken@2756
    57
 *           i+1      i          i+1    i     i
slouken@2756
    58
 *
slouken@2756
    59
 *	One may easily use induction to prove (4) and (5).
slouken@2756
    60
 *	Note. Since the left hand side of (3) contain only i+2 bits,
slouken@2756
    61
 *	      it does not necessary to do a full (53-bit) comparison
slouken@2756
    62
 *	      in (3).
slouken@2756
    63
 *   3. Final rounding
slouken@2756
    64
 *	After generating the 53 bits result, we compute one more bit.
slouken@2756
    65
 *	Together with the remainder, we can decide whether the
slouken@2756
    66
 *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
slouken@2756
    67
 *	(it will never equal to 1/2ulp).
slouken@2756
    68
 *	The rounding mode can be detected by checking whether
slouken@2756
    69
 *	huge + tiny is equal to huge, and whether huge - tiny is
slouken@2756
    70
 *	equal to huge for some floating point number "huge" and "tiny".
slouken@2756
    71
 *
slouken@2756
    72
 * Special cases:
slouken@2756
    73
 *	sqrt(+-0) = +-0 	... exact
slouken@2756
    74
 *	sqrt(inf) = inf
slouken@2756
    75
 *	sqrt(-ve) = NaN		... with invalid signal
slouken@2756
    76
 *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
slouken@2756
    77
 *
slouken@2756
    78
 * Other methods : see the appended file at the end of the program below.
slouken@2756
    79
 *---------------
slouken@2756
    80
 */
slouken@2756
    81
slouken@6044
    82
#include "math_libm.h"
slouken@2756
    83
#include "math_private.h"
slouken@2756
    84
slouken@2756
    85
static const double one = 1.0, tiny = 1.0e-300;
slouken@2756
    86
slouken@11683
    87
double attribute_hidden __ieee754_sqrt(double x)
slouken@2756
    88
{
slouken@11683
    89
	double z;
slouken@11683
    90
	int32_t sign = (int)0x80000000;
slouken@11683
    91
	int32_t ix0,s0,q,m,t,i;
slouken@11683
    92
	u_int32_t r,t1,s1,ix1,q1;
slouken@2756
    93
slouken@11683
    94
	EXTRACT_WORDS(ix0,ix1,x);
slouken@2756
    95
slouken@2756
    96
    /* take care of Inf and NaN */
slouken@11683
    97
	if((ix0&0x7ff00000)==0x7ff00000) {
slouken@11683
    98
	    return x*x+x;		/* sqrt(NaN)=NaN, sqrt(+inf)=+inf
slouken@11683
    99
					   sqrt(-inf)=sNaN */
slouken@11683
   100
	}
slouken@2756
   101
    /* take care of zero */
slouken@11683
   102
	if(ix0<=0) {
slouken@11683
   103
	    if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
slouken@11683
   104
	    else if(ix0<0)
slouken@11683
   105
		return (x-x)/(x-x);		/* sqrt(-ve) = sNaN */
slouken@11683
   106
	}
slouken@2756
   107
    /* normalize x */
slouken@11683
   108
	m = (ix0>>20);
slouken@11683
   109
	if(m==0) {				/* subnormal x */
slouken@11683
   110
	    while(ix0==0) {
slouken@11683
   111
		m -= 21;
slouken@11683
   112
		ix0 |= (ix1>>11); ix1 <<= 21;
slouken@11683
   113
	    }
slouken@11683
   114
	    for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
slouken@11683
   115
	    m -= i-1;
slouken@11683
   116
	    ix0 |= (ix1>>(32-i));
slouken@11683
   117
	    ix1 <<= i;
slouken@11683
   118
	}
slouken@11683
   119
	m -= 1023;	/* unbias exponent */
slouken@11683
   120
	ix0 = (ix0&0x000fffff)|0x00100000;
slouken@11683
   121
	if(m&1){	/* odd m, double x to make it even */
slouken@11683
   122
	    ix0 += ix0 + ((ix1&sign)>>31);
slouken@11683
   123
	    ix1 += ix1;
slouken@11683
   124
	}
slouken@11683
   125
	m >>= 1;	/* m = [m/2] */
slouken@2756
   126
slouken@2756
   127
    /* generate sqrt(x) bit by bit */
slouken@11683
   128
	ix0 += ix0 + ((ix1&sign)>>31);
slouken@11683
   129
	ix1 += ix1;
slouken@11683
   130
	q = q1 = s0 = s1 = 0;	/* [q,q1] = sqrt(x) */
slouken@11683
   131
	r = 0x00200000;		/* r = moving bit from right to left */
slouken@2756
   132
slouken@11683
   133
	while(r!=0) {
slouken@11683
   134
	    t = s0+r;
slouken@11683
   135
	    if(t<=ix0) {
slouken@11683
   136
		s0   = t+r;
slouken@11683
   137
		ix0 -= t;
slouken@11683
   138
		q   += r;
slouken@11683
   139
	    }
slouken@11683
   140
	    ix0 += ix0 + ((ix1&sign)>>31);
slouken@11683
   141
	    ix1 += ix1;
slouken@11683
   142
	    r>>=1;
slouken@11683
   143
	}
slouken@2756
   144
slouken@11683
   145
	r = sign;
slouken@11683
   146
	while(r!=0) {
slouken@11683
   147
	    t1 = s1+r;
slouken@11683
   148
	    t  = s0;
slouken@11683
   149
	    if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
slouken@11683
   150
		s1  = t1+r;
slouken@11683
   151
		if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
slouken@11683
   152
		ix0 -= t;
slouken@11683
   153
		if (ix1 < t1) ix0 -= 1;
slouken@11683
   154
		ix1 -= t1;
slouken@11683
   155
		q1  += r;
slouken@11683
   156
	    }
slouken@11683
   157
	    ix0 += ix0 + ((ix1&sign)>>31);
slouken@11683
   158
	    ix1 += ix1;
slouken@11683
   159
	    r>>=1;
slouken@11683
   160
	}
slouken@2756
   161
slouken@2756
   162
    /* use floating add to find out rounding direction */
slouken@11683
   163
	if((ix0|ix1)!=0) {
slouken@11683
   164
	    z = one-tiny; /* trigger inexact flag */
slouken@11683
   165
	    if (z>=one) {
slouken@11683
   166
	        z = one+tiny;
slouken@11683
   167
	        if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;}
slouken@11683
   168
		else if (z>one) {
slouken@11683
   169
		    if (q1==(u_int32_t)0xfffffffe) q+=1;
slouken@11683
   170
		    q1+=2;
slouken@11683
   171
		} else
slouken@11683
   172
	            q1 += (q1&1);
slouken@11683
   173
	    }
slouken@11683
   174
	}
slouken@11683
   175
	ix0 = (q>>1)+0x3fe00000;
slouken@11683
   176
	ix1 =  q1>>1;
slouken@11683
   177
	if ((q&1)==1) ix1 |= sign;
slouken@11683
   178
	ix0 += (m <<20);
slouken@11683
   179
	INSERT_WORDS(z,ix0,ix1);
slouken@11683
   180
	return z;
slouken@2756
   181
}
slouken@2756
   182
slouken@2756
   183
/*
slouken@11683
   184
 * wrapper sqrt(x)
slouken@11683
   185
 */
slouken@11683
   186
#ifndef _IEEE_LIBM
slouken@11683
   187
double sqrt(double x)
slouken@11683
   188
{
slouken@11683
   189
	double z = __ieee754_sqrt(x);
slouken@11683
   190
	if (_LIB_VERSION == _IEEE_ || isnan(x))
slouken@11683
   191
		return z;
slouken@11683
   192
	if (x < 0.0)
slouken@11683
   193
		return __kernel_standard(x, x, 26); /* sqrt(negative) */
slouken@11683
   194
	return z;
slouken@11683
   195
}
slouken@11683
   196
#else
slouken@11683
   197
strong_alias(__ieee754_sqrt, sqrt)
slouken@11683
   198
#endif
slouken@11683
   199
libm_hidden_def(sqrt)
slouken@11683
   200
slouken@11683
   201
slouken@11683
   202
/*
slouken@2756
   203
Other methods  (use floating-point arithmetic)
slouken@2756
   204
-------------
slouken@2756
   205
(This is a copy of a drafted paper by Prof W. Kahan
slouken@2756
   206
and K.C. Ng, written in May, 1986)
slouken@2756
   207
slouken@2756
   208
	Two algorithms are given here to implement sqrt(x)
slouken@2756
   209
	(IEEE double precision arithmetic) in software.
slouken@2756
   210
	Both supply sqrt(x) correctly rounded. The first algorithm (in
slouken@2756
   211
	Section A) uses newton iterations and involves four divisions.
slouken@2756
   212
	The second one uses reciproot iterations to avoid division, but
slouken@2756
   213
	requires more multiplications. Both algorithms need the ability
slouken@2756
   214
	to chop results of arithmetic operations instead of round them,
slouken@2756
   215
	and the INEXACT flag to indicate when an arithmetic operation
slouken@2756
   216
	is executed exactly with no roundoff error, all part of the
slouken@2756
   217
	standard (IEEE 754-1985). The ability to perform shift, add,
slouken@2756
   218
	subtract and logical AND operations upon 32-bit words is needed
slouken@2756
   219
	too, though not part of the standard.
slouken@2756
   220
slouken@2756
   221
A.  sqrt(x) by Newton Iteration
slouken@2756
   222
slouken@2756
   223
   (1)	Initial approximation
slouken@2756
   224
slouken@2756
   225
	Let x0 and x1 be the leading and the trailing 32-bit words of
slouken@2756
   226
	a floating point number x (in IEEE double format) respectively
slouken@2756
   227
slouken@2756
   228
	    1    11		     52				  ...widths
slouken@2756
   229
	   ------------------------------------------------------
slouken@2756
   230
	x: |s|	  e     |	      f				|
slouken@2756
   231
	   ------------------------------------------------------
slouken@2756
   232
	      msb    lsb  msb				      lsb ...order
slouken@2756
   233
slouken@2756
   234
slouken@2756
   235
	     ------------------------  	     ------------------------
slouken@2756
   236
	x0:  |s|   e    |    f1     |	 x1: |          f2           |
slouken@2756
   237
	     ------------------------  	     ------------------------
slouken@2756
   238
slouken@2756
   239
	By performing shifts and subtracts on x0 and x1 (both regarded
slouken@2756
   240
	as integers), we obtain an 8-bit approximation of sqrt(x) as
slouken@2756
   241
	follows.
slouken@2756
   242
slouken@2756
   243
		k  := (x0>>1) + 0x1ff80000;
slouken@2756
   244
		y0 := k - T1[31&(k>>15)].	... y ~ sqrt(x) to 8 bits
slouken@2756
   245
	Here k is a 32-bit integer and T1[] is an integer array containing
slouken@2756
   246
	correction terms. Now magically the floating value of y (y's
slouken@2756
   247
	leading 32-bit word is y0, the value of its trailing word is 0)
slouken@2756
   248
	approximates sqrt(x) to almost 8-bit.
slouken@2756
   249
slouken@2756
   250
	Value of T1:
slouken@2756
   251
	static int T1[32]= {
slouken@2756
   252
	0,	1024,	3062,	5746,	9193,	13348,	18162,	23592,
slouken@2756
   253
	29598,	36145,	43202,	50740,	58733,	67158,	75992,	85215,
slouken@2756
   254
	83599,	71378,	60428,	50647,	41945,	34246,	27478,	21581,
slouken@2756
   255
	16499,	12183,	8588,	5674,	3403,	1742,	661,	130,};
slouken@2756
   256
slouken@2756
   257
    (2)	Iterative refinement
slouken@2756
   258
slouken@2756
   259
	Apply Heron's rule three times to y, we have y approximates
slouken@2756
   260
	sqrt(x) to within 1 ulp (Unit in the Last Place):
slouken@2756
   261
slouken@2756
   262
		y := (y+x/y)/2		... almost 17 sig. bits
slouken@2756
   263
		y := (y+x/y)/2		... almost 35 sig. bits
slouken@2756
   264
		y := y-(y-x/y)/2	... within 1 ulp
slouken@2756
   265
slouken@2756
   266
slouken@2756
   267
	Remark 1.
slouken@2756
   268
	    Another way to improve y to within 1 ulp is:
slouken@2756
   269
slouken@2756
   270
		y := (y+x/y)		... almost 17 sig. bits to 2*sqrt(x)
slouken@2756
   271
		y := y - 0x00100006	... almost 18 sig. bits to sqrt(x)
slouken@2756
   272
slouken@2756
   273
				2
slouken@2756
   274
			    (x-y )*y
slouken@2756
   275
		y := y + 2* ----------	...within 1 ulp
slouken@2756
   276
			       2
slouken@2756
   277
			     3y  + x
slouken@2756
   278
slouken@2756
   279
slouken@2756
   280
	This formula has one division fewer than the one above; however,
slouken@2756
   281
	it requires more multiplications and additions. Also x must be
slouken@2756
   282
	scaled in advance to avoid spurious overflow in evaluating the
slouken@2756
   283
	expression 3y*y+x. Hence it is not recommended uless division
slouken@2756
   284
	is slow. If division is very slow, then one should use the
slouken@2756
   285
	reciproot algorithm given in section B.
slouken@2756
   286
slouken@2756
   287
    (3) Final adjustment
slouken@2756
   288
slouken@2756
   289
	By twiddling y's last bit it is possible to force y to be
slouken@2756
   290
	correctly rounded according to the prevailing rounding mode
slouken@2756
   291
	as follows. Let r and i be copies of the rounding mode and
slouken@2756
   292
	inexact flag before entering the square root program. Also we
slouken@2756
   293
	use the expression y+-ulp for the next representable floating
slouken@2756
   294
	numbers (up and down) of y. Note that y+-ulp = either fixed
slouken@2756
   295
	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
slouken@2756
   296
	mode.
slouken@2756
   297
slouken@2756
   298
		I := FALSE;	... reset INEXACT flag I
slouken@2756
   299
		R := RZ;	... set rounding mode to round-toward-zero
slouken@2756
   300
		z := x/y;	... chopped quotient, possibly inexact
slouken@2756
   301
		If(not I) then {	... if the quotient is exact
slouken@2756
   302
		    if(z=y) {
slouken@2756
   303
		        I := i;	 ... restore inexact flag
slouken@2756
   304
		        R := r;  ... restore rounded mode
slouken@2756
   305
		        return sqrt(x):=y.
slouken@2756
   306
		    } else {
slouken@2756
   307
			z := z - ulp;	... special rounding
slouken@2756
   308
		    }
slouken@2756
   309
		}
slouken@2756
   310
		i := TRUE;		... sqrt(x) is inexact
slouken@2756
   311
		If (r=RN) then z=z+ulp	... rounded-to-nearest
slouken@2756
   312
		If (r=RP) then {	... round-toward-+inf
slouken@2756
   313
		    y = y+ulp; z=z+ulp;
slouken@2756
   314
		}
slouken@2756
   315
		y := y+z;		... chopped sum
slouken@2756
   316
		y0:=y0-0x00100000;	... y := y/2 is correctly rounded.
slouken@2756
   317
	        I := i;	 		... restore inexact flag
slouken@2756
   318
	        R := r;  		... restore rounded mode
slouken@2756
   319
	        return sqrt(x):=y.
slouken@2756
   320
slouken@2756
   321
    (4)	Special cases
slouken@2756
   322
slouken@2756
   323
	Square root of +inf, +-0, or NaN is itself;
slouken@2756
   324
	Square root of a negative number is NaN with invalid signal.
slouken@2756
   325
slouken@2756
   326
slouken@2756
   327
B.  sqrt(x) by Reciproot Iteration
slouken@2756
   328
slouken@2756
   329
   (1)	Initial approximation
slouken@2756
   330
slouken@2756
   331
	Let x0 and x1 be the leading and the trailing 32-bit words of
slouken@2756
   332
	a floating point number x (in IEEE double format) respectively
slouken@2756
   333
	(see section A). By performing shifs and subtracts on x0 and y0,
slouken@2756
   334
	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
slouken@2756
   335
slouken@2756
   336
	    k := 0x5fe80000 - (x0>>1);
slouken@2756
   337
	    y0:= k - T2[63&(k>>14)].	... y ~ 1/sqrt(x) to 7.8 bits
slouken@2756
   338
slouken@2756
   339
	Here k is a 32-bit integer and T2[] is an integer array
slouken@2756
   340
	containing correction terms. Now magically the floating
slouken@2756
   341
	value of y (y's leading 32-bit word is y0, the value of
slouken@2756
   342
	its trailing word y1 is set to zero) approximates 1/sqrt(x)
slouken@2756
   343
	to almost 7.8-bit.
slouken@2756
   344
slouken@2756
   345
	Value of T2:
slouken@2756
   346
	static int T2[64]= {
slouken@2756
   347
	0x1500,	0x2ef8,	0x4d67,	0x6b02,	0x87be,	0xa395,	0xbe7a,	0xd866,
slouken@2756
   348
	0xf14a,	0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
slouken@2756
   349
	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
slouken@2756
   350
	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
slouken@2756
   351
	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
slouken@2756
   352
	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
slouken@2756
   353
	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
slouken@2756
   354
	0x1527f,0x1334a,0x11051,0xe951,	0xbe01,	0x8e0d,	0x5924,	0x1edd,};
slouken@2756
   355
slouken@2756
   356
    (2)	Iterative refinement
slouken@2756
   357
slouken@2756
   358
	Apply Reciproot iteration three times to y and multiply the
slouken@2756
   359
	result by x to get an approximation z that matches sqrt(x)
slouken@2756
   360
	to about 1 ulp. To be exact, we will have
slouken@2756
   361
		-1ulp < sqrt(x)-z<1.0625ulp.
slouken@2756
   362
slouken@2756
   363
	... set rounding mode to Round-to-nearest
slouken@2756
   364
	   y := y*(1.5-0.5*x*y*y)	... almost 15 sig. bits to 1/sqrt(x)
slouken@2756
   365
	   y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
slouken@2756
   366
	... special arrangement for better accuracy
slouken@2756
   367
	   z := x*y			... 29 bits to sqrt(x), with z*y<1
slouken@2756
   368
	   z := z + 0.5*z*(1-z*y)	... about 1 ulp to sqrt(x)
slouken@2756
   369
slouken@2756
   370
	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
slouken@2756
   371
	(a) the term z*y in the final iteration is always less than 1;
slouken@2756
   372
	(b) the error in the final result is biased upward so that
slouken@2756
   373
		-1 ulp < sqrt(x) - z < 1.0625 ulp
slouken@2756
   374
	    instead of |sqrt(x)-z|<1.03125ulp.
slouken@2756
   375
slouken@2756
   376
    (3)	Final adjustment
slouken@2756
   377
slouken@2756
   378
	By twiddling y's last bit it is possible to force y to be
slouken@2756
   379
	correctly rounded according to the prevailing rounding mode
slouken@2756
   380
	as follows. Let r and i be copies of the rounding mode and
slouken@2756
   381
	inexact flag before entering the square root program. Also we
slouken@2756
   382
	use the expression y+-ulp for the next representable floating
slouken@2756
   383
	numbers (up and down) of y. Note that y+-ulp = either fixed
slouken@2756
   384
	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
slouken@2756
   385
	mode.
slouken@2756
   386
slouken@2756
   387
	R := RZ;		... set rounding mode to round-toward-zero
slouken@2756
   388
	switch(r) {
slouken@2756
   389
	    case RN:		... round-to-nearest
slouken@2756
   390
	       if(x<= z*(z-ulp)...chopped) z = z - ulp; else
slouken@2756
   391
	       if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
slouken@2756
   392
	       break;
slouken@2756
   393
	    case RZ:case RM:	... round-to-zero or round-to--inf
slouken@2756
   394
	       R:=RP;		... reset rounding mod to round-to-+inf
slouken@2756
   395
	       if(x<z*z ... rounded up) z = z - ulp; else
slouken@2756
   396
	       if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
slouken@2756
   397
	       break;
slouken@2756
   398
	    case RP:		... round-to-+inf
slouken@2756
   399
	       if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
slouken@2756
   400
	       if(x>z*z ...chopped) z = z+ulp;
slouken@2756
   401
	       break;
slouken@2756
   402
	}
slouken@2756
   403
slouken@2756
   404
	Remark 3. The above comparisons can be done in fixed point. For
slouken@2756
   405
	example, to compare x and w=z*z chopped, it suffices to compare
slouken@2756
   406
	x1 and w1 (the trailing parts of x and w), regarding them as
slouken@2756
   407
	two's complement integers.
slouken@2756
   408
slouken@2756
   409
	...Is z an exact square root?
slouken@2756
   410
	To determine whether z is an exact square root of x, let z1 be the
slouken@2756
   411
	trailing part of z, and also let x0 and x1 be the leading and
slouken@2756
   412
	trailing parts of x.
slouken@2756
   413
slouken@2756
   414
	If ((z1&0x03ffffff)!=0)	... not exact if trailing 26 bits of z!=0
slouken@2756
   415
	    I := 1;		... Raise Inexact flag: z is not exact
slouken@2756
   416
	else {
slouken@2756
   417
	    j := 1 - [(x0>>20)&1]	... j = logb(x) mod 2
slouken@2756
   418
	    k := z1 >> 26;		... get z's 25-th and 26-th
slouken@2756
   419
					    fraction bits
slouken@2756
   420
	    I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
slouken@2756
   421
	}
slouken@2756
   422
	R:= r		... restore rounded mode
slouken@2756
   423
	return sqrt(x):=z.
slouken@2756
   424
slouken@2756
   425
	If multiplication is cheaper then the foregoing red tape, the
slouken@2756
   426
	Inexact flag can be evaluated by
slouken@2756
   427
slouken@2756
   428
	    I := i;
slouken@2756
   429
	    I := (z*z!=x) or I.
slouken@2756
   430
slouken@2756
   431
	Note that z*z can overwrite I; this value must be sensed if it is
slouken@2756
   432
	True.
slouken@2756
   433
slouken@2756
   434
	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
slouken@2756
   435
	zero.
slouken@2756
   436
slouken@2756
   437
		    --------------------
slouken@2756
   438
		z1: |        f2        |
slouken@2756
   439
		    --------------------
slouken@2756
   440
		bit 31		   bit 0
slouken@2756
   441
slouken@2756
   442
	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
slouken@2756
   443
	or even of logb(x) have the following relations:
slouken@2756
   444
slouken@2756
   445
	-------------------------------------------------
slouken@2756
   446
	bit 27,26 of z1		bit 1,0 of x1	logb(x)
slouken@2756
   447
	-------------------------------------------------
slouken@2756
   448
	00			00		odd and even
slouken@2756
   449
	01			01		even
slouken@2756
   450
	10			10		odd
slouken@2756
   451
	10			00		even
slouken@2756
   452
	11			01		even
slouken@2756
   453
	-------------------------------------------------
slouken@2756
   454
slouken@2756
   455
    (4)	Special cases (see (4) of Section A).
slouken@2756
   456
slouken@2756
   457
 */