src/libm/e_rem_pio2.c
author Sam Lantinga <slouken@libsdl.org>
Wed, 03 Jun 2009 04:37:27 +0000
changeset 3162 dc1eb82ffdaa
parent 2756 a98604b691c8
child 6044 35448a5ea044
permissions -rw-r--r--
Von: Thomas Zimmermann
Betreff: [SDL] [PATCH] Make static variables const
Datum: Tue, 19 May 2009 19:45:37 +0200

Hi,

this is a set of simple changes which make some of SDL's internal static
arrays constant. The purpose is to shrink the number of write-able
static bytes and thus increase the number of memory pages shared between
SDL applications.

The patch set is against trunk@4513. Each of the attached patch files is
specific to a sub-system. The set is completed by a second mail, because
of the list's 40 KiB limit.

The files readelf-r4513.txt and readelf-const-patch.txt where made by
calling 'readelf -S libSDL.so'. They show the difference in ELF sections
without and with the patch. Some numbers measured on my x86-64:

Before

[13] .rodata PROGBITS 00000000000eaaa0 000eaaa0
0000000000008170 0000000000000000 A 0 0 32
[19] .data.rel.ro PROGBITS 00000000003045e0 001045e0
00000000000023d0 0000000000000000 WA 0 0 32
[23] .data PROGBITS 00000000003076e0 001076e0
0000000000004988 0000000000000000 WA 0 0 32

After

[13] .rodata PROGBITS 00000000000eaaa0 000eaaa0
0000000000009a50 0000000000000000 A 0 0 32
[19] .data.rel.ro PROGBITS 0000000000306040 00106040
0000000000002608 0000000000000000 WA 0 0 32
[23] .data PROGBITS 0000000000309360 00109360
0000000000002e88 0000000000000000 WA 0 0 32

The size of the write-able data section decreased considerably. Some
entries became const-after-relocation, while most of its content went
straight into the read-only data section.

Best regards, Thomas
     1 /* @(#)e_rem_pio2.c 5.1 93/09/24 */
     2 /*
     3  * ====================================================
     4  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
     5  *
     6  * Developed at SunPro, a Sun Microsystems, Inc. business.
     7  * Permission to use, copy, modify, and distribute this
     8  * software is freely granted, provided that this notice
     9  * is preserved.
    10  * ====================================================
    11  */
    12 
    13 #if defined(LIBM_SCCS) && !defined(lint)
    14 static const char rcsid[] =
    15     "$NetBSD: e_rem_pio2.c,v 1.8 1995/05/10 20:46:02 jtc Exp $";
    16 #endif
    17 
    18 /* __ieee754_rem_pio2(x,y)
    19  *
    20  * return the remainder of x rem pi/2 in y[0]+y[1]
    21  * use __kernel_rem_pio2()
    22  */
    23 
    24 #include "math.h"
    25 #include "math_private.h"
    26 
    27 libm_hidden_proto(fabs)
    28 
    29 /*
    30  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
    31  */
    32 #ifdef __STDC__
    33      static const int32_t two_over_pi[] = {
    34 #else
    35      static int32_t two_over_pi[] = {
    36 #endif
    37          0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
    38          0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
    39          0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
    40          0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
    41          0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
    42          0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
    43          0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
    44          0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
    45          0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
    46          0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
    47          0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
    48      };
    49 
    50 #ifdef __STDC__
    51 static const int32_t npio2_hw[] = {
    52 #else
    53 static int32_t npio2_hw[] = {
    54 #endif
    55     0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
    56     0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
    57     0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
    58     0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
    59     0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
    60     0x404858EB, 0x404921FB,
    61 };
    62 
    63 /*
    64  * invpio2:  53 bits of 2/pi
    65  * pio2_1:   first  33 bit of pi/2
    66  * pio2_1t:  pi/2 - pio2_1
    67  * pio2_2:   second 33 bit of pi/2
    68  * pio2_2t:  pi/2 - (pio2_1+pio2_2)
    69  * pio2_3:   third  33 bit of pi/2
    70  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
    71  */
    72 
    73 #ifdef __STDC__
    74 static const double
    75 #else
    76 static double
    77 #endif
    78   zero = 0.00000000000000000000e+00,    /* 0x00000000, 0x00000000 */
    79     half = 5.00000000000000000000e-01,  /* 0x3FE00000, 0x00000000 */
    80     two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
    81     invpio2 = 6.36619772367581382433e-01,       /* 0x3FE45F30, 0x6DC9C883 */
    82     pio2_1 = 1.57079632673412561417e+00,        /* 0x3FF921FB, 0x54400000 */
    83     pio2_1t = 6.07710050650619224932e-11,       /* 0x3DD0B461, 0x1A626331 */
    84     pio2_2 = 6.07710050630396597660e-11,        /* 0x3DD0B461, 0x1A600000 */
    85     pio2_2t = 2.02226624879595063154e-21,       /* 0x3BA3198A, 0x2E037073 */
    86     pio2_3 = 2.02226624871116645580e-21,        /* 0x3BA3198A, 0x2E000000 */
    87     pio2_3t = 8.47842766036889956997e-32;       /* 0x397B839A, 0x252049C1 */
    88 
    89 #ifdef __STDC__
    90 int32_t attribute_hidden
    91 __ieee754_rem_pio2(double x, double *y)
    92 #else
    93 int32_t attribute_hidden
    94 __ieee754_rem_pio2(x, y)
    95      double x, y[];
    96 #endif
    97 {
    98     double z = 0.0, w, t, r, fn;
    99     double tx[3];
   100     int32_t e0, i, j, nx, n, ix, hx;
   101     u_int32_t low;
   102 
   103     GET_HIGH_WORD(hx, x);       /* high word of x */
   104     ix = hx & 0x7fffffff;
   105     if (ix <= 0x3fe921fb) {     /* |x| ~<= pi/4 , no need for reduction */
   106         y[0] = x;
   107         y[1] = 0;
   108         return 0;
   109     }
   110     if (ix < 0x4002d97c) {      /* |x| < 3pi/4, special case with n=+-1 */
   111         if (hx > 0) {
   112             z = x - pio2_1;
   113             if (ix != 0x3ff921fb) {     /* 33+53 bit pi is good enough */
   114                 y[0] = z - pio2_1t;
   115                 y[1] = (z - y[0]) - pio2_1t;
   116             } else {            /* near pi/2, use 33+33+53 bit pi */
   117                 z -= pio2_2;
   118                 y[0] = z - pio2_2t;
   119                 y[1] = (z - y[0]) - pio2_2t;
   120             }
   121             return 1;
   122         } else {                /* negative x */
   123             z = x + pio2_1;
   124             if (ix != 0x3ff921fb) {     /* 33+53 bit pi is good enough */
   125                 y[0] = z + pio2_1t;
   126                 y[1] = (z - y[0]) + pio2_1t;
   127             } else {            /* near pi/2, use 33+33+53 bit pi */
   128                 z += pio2_2;
   129                 y[0] = z + pio2_2t;
   130                 y[1] = (z - y[0]) + pio2_2t;
   131             }
   132             return -1;
   133         }
   134     }
   135     if (ix <= 0x413921fb) {     /* |x| ~<= 2^19*(pi/2), medium size */
   136         t = fabs(x);
   137         n = (int32_t) (t * invpio2 + half);
   138         fn = (double) n;
   139         r = t - fn * pio2_1;
   140         w = fn * pio2_1t;       /* 1st round good to 85 bit */
   141         if (n < 32 && ix != npio2_hw[n - 1]) {
   142             y[0] = r - w;       /* quick check no cancellation */
   143         } else {
   144             u_int32_t high;
   145             j = ix >> 20;
   146             y[0] = r - w;
   147             GET_HIGH_WORD(high, y[0]);
   148             i = j - ((high >> 20) & 0x7ff);
   149             if (i > 16) {       /* 2nd iteration needed, good to 118 */
   150                 t = r;
   151                 w = fn * pio2_2;
   152                 r = t - w;
   153                 w = fn * pio2_2t - ((t - r) - w);
   154                 y[0] = r - w;
   155                 GET_HIGH_WORD(high, y[0]);
   156                 i = j - ((high >> 20) & 0x7ff);
   157                 if (i > 49) {   /* 3rd iteration need, 151 bits acc */
   158                     t = r;      /* will cover all possible cases */
   159                     w = fn * pio2_3;
   160                     r = t - w;
   161                     w = fn * pio2_3t - ((t - r) - w);
   162                     y[0] = r - w;
   163                 }
   164             }
   165         }
   166         y[1] = (r - y[0]) - w;
   167         if (hx < 0) {
   168             y[0] = -y[0];
   169             y[1] = -y[1];
   170             return -n;
   171         } else
   172             return n;
   173     }
   174     /*
   175      * all other (large) arguments
   176      */
   177     if (ix >= 0x7ff00000) {     /* x is inf or NaN */
   178         y[0] = y[1] = x - x;
   179         return 0;
   180     }
   181     /* set z = scalbn(|x|,ilogb(x)-23) */
   182     GET_LOW_WORD(low, x);
   183     SET_LOW_WORD(z, low);
   184     e0 = (ix >> 20) - 1046;     /* e0 = ilogb(z)-23; */
   185     SET_HIGH_WORD(z, ix - ((int32_t) (e0 << 20)));
   186     for (i = 0; i < 2; i++) {
   187         tx[i] = (double) ((int32_t) (z));
   188         z = (z - tx[i]) * two24;
   189     }
   190     tx[2] = z;
   191     nx = 3;
   192     while (tx[nx - 1] == zero)
   193         nx--;                   /* skip zero term */
   194     n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);
   195     if (hx < 0) {
   196         y[0] = -y[0];
   197         y[1] = -y[1];
   198         return -n;
   199     }
   200     return n;
   201 }