src/libm/k_rem_pio2.c
author Sam Lantinga <slouken@libsdl.org>
Wed, 03 Jun 2009 04:37:27 +0000
changeset 3162 dc1eb82ffdaa
parent 2757 0581f49c9294
child 6044 35448a5ea044
permissions -rw-r--r--
Von: Thomas Zimmermann
Betreff: [SDL] [PATCH] Make static variables const
Datum: Tue, 19 May 2009 19:45:37 +0200

Hi,

this is a set of simple changes which make some of SDL's internal static
arrays constant. The purpose is to shrink the number of write-able
static bytes and thus increase the number of memory pages shared between
SDL applications.

The patch set is against trunk@4513. Each of the attached patch files is
specific to a sub-system. The set is completed by a second mail, because
of the list's 40 KiB limit.

The files readelf-r4513.txt and readelf-const-patch.txt where made by
calling 'readelf -S libSDL.so'. They show the difference in ELF sections
without and with the patch. Some numbers measured on my x86-64:

Before

[13] .rodata PROGBITS 00000000000eaaa0 000eaaa0
0000000000008170 0000000000000000 A 0 0 32
[19] .data.rel.ro PROGBITS 00000000003045e0 001045e0
00000000000023d0 0000000000000000 WA 0 0 32
[23] .data PROGBITS 00000000003076e0 001076e0
0000000000004988 0000000000000000 WA 0 0 32

After

[13] .rodata PROGBITS 00000000000eaaa0 000eaaa0
0000000000009a50 0000000000000000 A 0 0 32
[19] .data.rel.ro PROGBITS 0000000000306040 00106040
0000000000002608 0000000000000000 WA 0 0 32
[23] .data PROGBITS 0000000000309360 00109360
0000000000002e88 0000000000000000 WA 0 0 32

The size of the write-able data section decreased considerably. Some
entries became const-after-relocation, while most of its content went
straight into the read-only data section.

Best regards, Thomas
slouken@2757
     1
/* @(#)k_rem_pio2.c 5.1 93/09/24 */
slouken@2757
     2
/*
slouken@2757
     3
 * ====================================================
slouken@2757
     4
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
slouken@2757
     5
 *
slouken@2757
     6
 * Developed at SunPro, a Sun Microsystems, Inc. business.
slouken@2757
     7
 * Permission to use, copy, modify, and distribute this
slouken@2757
     8
 * software is freely granted, provided that this notice
slouken@2757
     9
 * is preserved.
slouken@2757
    10
 * ====================================================
slouken@2757
    11
 */
slouken@2757
    12
slouken@2757
    13
#if defined(LIBM_SCCS) && !defined(lint)
slouken@3162
    14
static const char rcsid[] =
slouken@2757
    15
    "$NetBSD: k_rem_pio2.c,v 1.7 1995/05/10 20:46:25 jtc Exp $";
slouken@2757
    16
#endif
slouken@2757
    17
slouken@2757
    18
/*
slouken@2757
    19
 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
slouken@2757
    20
 * double x[],y[]; int e0,nx,prec; int ipio2[];
slouken@2757
    21
 *
slouken@2757
    22
 * __kernel_rem_pio2 return the last three digits of N with
slouken@2757
    23
 *		y = x - N*pi/2
slouken@2757
    24
 * so that |y| < pi/2.
slouken@2757
    25
 *
slouken@2757
    26
 * The method is to compute the integer (mod 8) and fraction parts of
slouken@2757
    27
 * (2/pi)*x without doing the full multiplication. In general we
slouken@2757
    28
 * skip the part of the product that are known to be a huge integer (
slouken@2757
    29
 * more accurately, = 0 mod 8 ). Thus the number of operations are
slouken@2757
    30
 * independent of the exponent of the input.
slouken@2757
    31
 *
slouken@2757
    32
 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
slouken@2757
    33
 *
slouken@2757
    34
 * Input parameters:
slouken@2757
    35
 * 	x[]	The input value (must be positive) is broken into nx
slouken@2757
    36
 *		pieces of 24-bit integers in double precision format.
slouken@2757
    37
 *		x[i] will be the i-th 24 bit of x. The scaled exponent
slouken@2757
    38
 *		of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
slouken@2757
    39
 *		match x's up to 24 bits.
slouken@2757
    40
 *
slouken@2757
    41
 *		Example of breaking a double positive z into x[0]+x[1]+x[2]:
slouken@2757
    42
 *			e0 = ilogb(z)-23
slouken@2757
    43
 *			z  = scalbn(z,-e0)
slouken@2757
    44
 *		for i = 0,1,2
slouken@2757
    45
 *			x[i] = floor(z)
slouken@2757
    46
 *			z    = (z-x[i])*2**24
slouken@2757
    47
 *
slouken@2757
    48
 *
slouken@2757
    49
 *	y[]	ouput result in an array of double precision numbers.
slouken@2757
    50
 *		The dimension of y[] is:
slouken@2757
    51
 *			24-bit  precision	1
slouken@2757
    52
 *			53-bit  precision	2
slouken@2757
    53
 *			64-bit  precision	2
slouken@2757
    54
 *			113-bit precision	3
slouken@2757
    55
 *		The actual value is the sum of them. Thus for 113-bit
slouken@2757
    56
 *		precison, one may have to do something like:
slouken@2757
    57
 *
slouken@2757
    58
 *		long double t,w,r_head, r_tail;
slouken@2757
    59
 *		t = (long double)y[2] + (long double)y[1];
slouken@2757
    60
 *		w = (long double)y[0];
slouken@2757
    61
 *		r_head = t+w;
slouken@2757
    62
 *		r_tail = w - (r_head - t);
slouken@2757
    63
 *
slouken@2757
    64
 *	e0	The exponent of x[0]
slouken@2757
    65
 *
slouken@2757
    66
 *	nx	dimension of x[]
slouken@2757
    67
 *
slouken@2757
    68
 *  	prec	an integer indicating the precision:
slouken@2757
    69
 *			0	24  bits (single)
slouken@2757
    70
 *			1	53  bits (double)
slouken@2757
    71
 *			2	64  bits (extended)
slouken@2757
    72
 *			3	113 bits (quad)
slouken@2757
    73
 *
slouken@2757
    74
 *	ipio2[]
slouken@2757
    75
 *		integer array, contains the (24*i)-th to (24*i+23)-th
slouken@2757
    76
 *		bit of 2/pi after binary point. The corresponding
slouken@2757
    77
 *		floating value is
slouken@2757
    78
 *
slouken@2757
    79
 *			ipio2[i] * 2^(-24(i+1)).
slouken@2757
    80
 *
slouken@2757
    81
 * External function:
slouken@2757
    82
 *	double scalbn(), floor();
slouken@2757
    83
 *
slouken@2757
    84
 *
slouken@2757
    85
 * Here is the description of some local variables:
slouken@2757
    86
 *
slouken@2757
    87
 * 	jk	jk+1 is the initial number of terms of ipio2[] needed
slouken@2757
    88
 *		in the computation. The recommended value is 2,3,4,
slouken@2757
    89
 *		6 for single, double, extended,and quad.
slouken@2757
    90
 *
slouken@2757
    91
 * 	jz	local integer variable indicating the number of
slouken@2757
    92
 *		terms of ipio2[] used.
slouken@2757
    93
 *
slouken@2757
    94
 *	jx	nx - 1
slouken@2757
    95
 *
slouken@2757
    96
 *	jv	index for pointing to the suitable ipio2[] for the
slouken@2757
    97
 *		computation. In general, we want
slouken@2757
    98
 *			( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
slouken@2757
    99
 *		is an integer. Thus
slouken@2757
   100
 *			e0-3-24*jv >= 0 or (e0-3)/24 >= jv
slouken@2757
   101
 *		Hence jv = max(0,(e0-3)/24).
slouken@2757
   102
 *
slouken@2757
   103
 *	jp	jp+1 is the number of terms in PIo2[] needed, jp = jk.
slouken@2757
   104
 *
slouken@2757
   105
 * 	q[]	double array with integral value, representing the
slouken@2757
   106
 *		24-bits chunk of the product of x and 2/pi.
slouken@2757
   107
 *
slouken@2757
   108
 *	q0	the corresponding exponent of q[0]. Note that the
slouken@2757
   109
 *		exponent for q[i] would be q0-24*i.
slouken@2757
   110
 *
slouken@2757
   111
 *	PIo2[]	double precision array, obtained by cutting pi/2
slouken@2757
   112
 *		into 24 bits chunks.
slouken@2757
   113
 *
slouken@2757
   114
 *	f[]	ipio2[] in floating point
slouken@2757
   115
 *
slouken@2757
   116
 *	iq[]	integer array by breaking up q[] in 24-bits chunk.
slouken@2757
   117
 *
slouken@2757
   118
 *	fq[]	final product of x*(2/pi) in fq[0],..,fq[jk]
slouken@2757
   119
 *
slouken@2757
   120
 *	ih	integer. If >0 it indicates q[] is >= 0.5, hence
slouken@2757
   121
 *		it also indicates the *sign* of the result.
slouken@2757
   122
 *
slouken@2757
   123
 */
slouken@2757
   124
slouken@2757
   125
slouken@2757
   126
/*
slouken@2757
   127
 * Constants:
slouken@2757
   128
 * The hexadecimal values are the intended ones for the following
slouken@2757
   129
 * constants. The decimal values may be used, provided that the
slouken@2757
   130
 * compiler will convert from decimal to binary accurately enough
slouken@2757
   131
 * to produce the hexadecimal values shown.
slouken@2757
   132
 */
slouken@2757
   133
slouken@2757
   134
#include "math.h"
slouken@2757
   135
#include "math_private.h"
slouken@2757
   136
slouken@2757
   137
libm_hidden_proto(scalbn)
slouken@2757
   138
    libm_hidden_proto(floor)
slouken@2757
   139
#ifdef __STDC__
slouken@2757
   140
     static const int init_jk[] = { 2, 3, 4, 6 };       /* initial value for jk */
slouken@2757
   141
#else
slouken@2757
   142
     static int init_jk[] = { 2, 3, 4, 6 };
slouken@2757
   143
#endif
slouken@2757
   144
slouken@2757
   145
#ifdef __STDC__
slouken@2757
   146
static const double PIo2[] = {
slouken@2757
   147
#else
slouken@2757
   148
static double PIo2[] = {
slouken@2757
   149
#endif
slouken@2757
   150
    1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
slouken@2757
   151
    7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
slouken@2757
   152
    5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
slouken@2757
   153
    3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
slouken@2757
   154
    1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
slouken@2757
   155
    1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
slouken@2757
   156
    2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
slouken@2757
   157
    2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
slouken@2757
   158
};
slouken@2757
   159
slouken@2757
   160
#ifdef __STDC__
slouken@2757
   161
static const double
slouken@2757
   162
#else
slouken@2757
   163
static double
slouken@2757
   164
#endif
slouken@2757
   165
  zero = 0.0, one = 1.0, two24 = 1.67772160000000000000e+07,    /* 0x41700000, 0x00000000 */
slouken@2757
   166
    twon24 = 5.96046447753906250000e-08;        /* 0x3E700000, 0x00000000 */
slouken@2757
   167
slouken@2757
   168
#ifdef __STDC__
slouken@2757
   169
int attribute_hidden
slouken@2757
   170
__kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
slouken@2757
   171
                  const int32_t * ipio2)
slouken@2757
   172
#else
slouken@2757
   173
int attribute_hidden
slouken@2757
   174
__kernel_rem_pio2(x, y, e0, nx, prec, ipio2)
slouken@2757
   175
     double x[], y[];
slouken@2757
   176
     int e0, nx, prec;
slouken@2757
   177
     int32_t ipio2[];
slouken@2757
   178
#endif
slouken@2757
   179
{
slouken@2757
   180
    int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
slouken@2757
   181
    double z, fw, f[20], fq[20], q[20];
slouken@2757
   182
slouken@2757
   183
    /* initialize jk */
slouken@2757
   184
    jk = init_jk[prec];
slouken@2757
   185
    jp = jk;
slouken@2757
   186
slouken@2757
   187
    /* determine jx,jv,q0, note that 3>q0 */
slouken@2757
   188
    jx = nx - 1;
slouken@2757
   189
    jv = (e0 - 3) / 24;
slouken@2757
   190
    if (jv < 0)
slouken@2757
   191
        jv = 0;
slouken@2757
   192
    q0 = e0 - 24 * (jv + 1);
slouken@2757
   193
slouken@2757
   194
    /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
slouken@2757
   195
    j = jv - jx;
slouken@2757
   196
    m = jx + jk;
slouken@2757
   197
    for (i = 0; i <= m; i++, j++)
slouken@2757
   198
        f[i] = (j < 0) ? zero : (double) ipio2[j];
slouken@2757
   199
slouken@2757
   200
    /* compute q[0],q[1],...q[jk] */
slouken@2757
   201
    for (i = 0; i <= jk; i++) {
slouken@2757
   202
        for (j = 0, fw = 0.0; j <= jx; j++)
slouken@2757
   203
            fw += x[j] * f[jx + i - j];
slouken@2757
   204
        q[i] = fw;
slouken@2757
   205
    }
slouken@2757
   206
slouken@2757
   207
    jz = jk;
slouken@2757
   208
  recompute:
slouken@2757
   209
    /* distill q[] into iq[] reversingly */
slouken@2757
   210
    for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
slouken@2757
   211
        fw = (double) ((int32_t) (twon24 * z));
slouken@2757
   212
        iq[i] = (int32_t) (z - two24 * fw);
slouken@2757
   213
        z = q[j - 1] + fw;
slouken@2757
   214
    }
slouken@2757
   215
slouken@2757
   216
    /* compute n */
slouken@2757
   217
    z = scalbn(z, q0);          /* actual value of z */
slouken@2757
   218
    z -= 8.0 * floor(z * 0.125);        /* trim off integer >= 8 */
slouken@2757
   219
    n = (int32_t) z;
slouken@2757
   220
    z -= (double) n;
slouken@2757
   221
    ih = 0;
slouken@2757
   222
    if (q0 > 0) {               /* need iq[jz-1] to determine n */
slouken@2757
   223
        i = (iq[jz - 1] >> (24 - q0));
slouken@2757
   224
        n += i;
slouken@2757
   225
        iq[jz - 1] -= i << (24 - q0);
slouken@2757
   226
        ih = iq[jz - 1] >> (23 - q0);
slouken@2757
   227
    } else if (q0 == 0)
slouken@2757
   228
        ih = iq[jz - 1] >> 23;
slouken@2757
   229
    else if (z >= 0.5)
slouken@2757
   230
        ih = 2;
slouken@2757
   231
slouken@2757
   232
    if (ih > 0) {               /* q > 0.5 */
slouken@2757
   233
        n += 1;
slouken@2757
   234
        carry = 0;
slouken@2757
   235
        for (i = 0; i < jz; i++) {      /* compute 1-q */
slouken@2757
   236
            j = iq[i];
slouken@2757
   237
            if (carry == 0) {
slouken@2757
   238
                if (j != 0) {
slouken@2757
   239
                    carry = 1;
slouken@2757
   240
                    iq[i] = 0x1000000 - j;
slouken@2757
   241
                }
slouken@2757
   242
            } else
slouken@2757
   243
                iq[i] = 0xffffff - j;
slouken@2757
   244
        }
slouken@2757
   245
        if (q0 > 0) {           /* rare case: chance is 1 in 12 */
slouken@2757
   246
            switch (q0) {
slouken@2757
   247
            case 1:
slouken@2757
   248
                iq[jz - 1] &= 0x7fffff;
slouken@2757
   249
                break;
slouken@2757
   250
            case 2:
slouken@2757
   251
                iq[jz - 1] &= 0x3fffff;
slouken@2757
   252
                break;
slouken@2757
   253
            }
slouken@2757
   254
        }
slouken@2757
   255
        if (ih == 2) {
slouken@2757
   256
            z = one - z;
slouken@2757
   257
            if (carry != 0)
slouken@2757
   258
                z -= scalbn(one, q0);
slouken@2757
   259
        }
slouken@2757
   260
    }
slouken@2757
   261
slouken@2757
   262
    /* check if recomputation is needed */
slouken@2757
   263
    if (z == zero) {
slouken@2757
   264
        j = 0;
slouken@2757
   265
        for (i = jz - 1; i >= jk; i--)
slouken@2757
   266
            j |= iq[i];
slouken@2757
   267
        if (j == 0) {           /* need recomputation */
slouken@2757
   268
            for (k = 1; iq[jk - k] == 0; k++);  /* k = no. of terms needed */
slouken@2757
   269
slouken@2757
   270
            for (i = jz + 1; i <= jz + k; i++) {        /* add q[jz+1] to q[jz+k] */
slouken@2757
   271
                f[jx + i] = (double) ipio2[jv + i];
slouken@2757
   272
                for (j = 0, fw = 0.0; j <= jx; j++)
slouken@2757
   273
                    fw += x[j] * f[jx + i - j];
slouken@2757
   274
                q[i] = fw;
slouken@2757
   275
            }
slouken@2757
   276
            jz += k;
slouken@2757
   277
            goto recompute;
slouken@2757
   278
        }
slouken@2757
   279
    }
slouken@2757
   280
slouken@2757
   281
    /* chop off zero terms */
slouken@2757
   282
    if (z == 0.0) {
slouken@2757
   283
        jz -= 1;
slouken@2757
   284
        q0 -= 24;
slouken@2757
   285
        while (iq[jz] == 0) {
slouken@2757
   286
            jz--;
slouken@2757
   287
            q0 -= 24;
slouken@2757
   288
        }
slouken@2757
   289
    } else {                    /* break z into 24-bit if necessary */
slouken@2757
   290
        z = scalbn(z, -q0);
slouken@2757
   291
        if (z >= two24) {
slouken@2757
   292
            fw = (double) ((int32_t) (twon24 * z));
slouken@2757
   293
            iq[jz] = (int32_t) (z - two24 * fw);
slouken@2757
   294
            jz += 1;
slouken@2757
   295
            q0 += 24;
slouken@2757
   296
            iq[jz] = (int32_t) fw;
slouken@2757
   297
        } else
slouken@2757
   298
            iq[jz] = (int32_t) z;
slouken@2757
   299
    }
slouken@2757
   300
slouken@2757
   301
    /* convert integer "bit" chunk to floating-point value */
slouken@2757
   302
    fw = scalbn(one, q0);
slouken@2757
   303
    for (i = jz; i >= 0; i--) {
slouken@2757
   304
        q[i] = fw * (double) iq[i];
slouken@2757
   305
        fw *= twon24;
slouken@2757
   306
    }
slouken@2757
   307
slouken@2757
   308
    /* compute PIo2[0,...,jp]*q[jz,...,0] */
slouken@2757
   309
    for (i = jz; i >= 0; i--) {
slouken@2757
   310
        for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
slouken@2757
   311
            fw += PIo2[k] * q[i + k];
slouken@2757
   312
        fq[jz - i] = fw;
slouken@2757
   313
    }
slouken@2757
   314
slouken@2757
   315
    /* compress fq[] into y[] */
slouken@2757
   316
    switch (prec) {
slouken@2757
   317
    case 0:
slouken@2757
   318
        fw = 0.0;
slouken@2757
   319
        for (i = jz; i >= 0; i--)
slouken@2757
   320
            fw += fq[i];
slouken@2757
   321
        y[0] = (ih == 0) ? fw : -fw;
slouken@2757
   322
        break;
slouken@2757
   323
    case 1:
slouken@2757
   324
    case 2:
slouken@2757
   325
        fw = 0.0;
slouken@2757
   326
        for (i = jz; i >= 0; i--)
slouken@2757
   327
            fw += fq[i];
slouken@2757
   328
        y[0] = (ih == 0) ? fw : -fw;
slouken@2757
   329
        fw = fq[0] - fw;
slouken@2757
   330
        for (i = 1; i <= jz; i++)
slouken@2757
   331
            fw += fq[i];
slouken@2757
   332
        y[1] = (ih == 0) ? fw : -fw;
slouken@2757
   333
        break;
slouken@2757
   334
    case 3:                    /* painful */
slouken@2757
   335
        for (i = jz; i > 0; i--) {
slouken@2757
   336
            fw = fq[i - 1] + fq[i];
slouken@2757
   337
            fq[i] += fq[i - 1] - fw;
slouken@2757
   338
            fq[i - 1] = fw;
slouken@2757
   339
        }
slouken@2757
   340
        for (i = jz; i > 1; i--) {
slouken@2757
   341
            fw = fq[i - 1] + fq[i];
slouken@2757
   342
            fq[i] += fq[i - 1] - fw;
slouken@2757
   343
            fq[i - 1] = fw;
slouken@2757
   344
        }
slouken@2757
   345
        for (fw = 0.0, i = jz; i >= 2; i--)
slouken@2757
   346
            fw += fq[i];
slouken@2757
   347
        if (ih == 0) {
slouken@2757
   348
            y[0] = fq[0];
slouken@2757
   349
            y[1] = fq[1];
slouken@2757
   350
            y[2] = fw;
slouken@2757
   351
        } else {
slouken@2757
   352
            y[0] = -fq[0];
slouken@2757
   353
            y[1] = -fq[1];
slouken@2757
   354
            y[2] = -fw;
slouken@2757
   355
        }
slouken@2757
   356
    }
slouken@2757
   357
    return n & 7;
slouken@2757
   358
}