src/stdlib/SDL_stdlib.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 06 Nov 2016 10:01:08 -0800
changeset 10587 513f0e80a7dd
parent 10550 d8a72d273dd2
child 10737 3406a0f8b041
permissions -rw-r--r--
Fixed bug 3468 - _allshr in SDL_stdlib.c is not working properly

Mark Pizzolato

On Windows with Visual Studio, when building SDL as a static library using the x86 (32bit) mode, several intrinsic operations are implemented in code in SDL_stdlib.c.

One of these, _allshr() is not properly implemented and fails for some input. As a result, some operations on 64bit data elements (long long) don't always work.

I classified this bug as a blocker since things absolutely don't work when the affected code is invoked. The affected code is only invoked when SDL is compiled in x86 mode on Visual Studio when building a SDL as a static library. This build environment isn't common, and hence the bug hasn't been noticed previously.

I reopened #2537 and mentioned this problem and provided a fix. That fix is provided again here along with test code which could be added to some of the SDL test code. This test code verifies that the x86 intrinsic routines produce the same results as the native x64 instructions which these routines emulate under the Microsoft compiler. The point of the tests is to make sure that Visual Studio x86 code produces the same results as Visual Studio x64 code. Some of the arguments (or boundary conditions) may produce different results on other compiler environments, so the tests really shouldn't be run on all compilers. The test driver only actually exercised code when the compiler defines _MSC_VER, so the driver can generically be invoked without issue.
slouken@1330
     1
/*
slouken@5535
     2
  Simple DirectMedia Layer
slouken@9998
     3
  Copyright (C) 1997-2016 Sam Lantinga <slouken@libsdl.org>
slouken@1330
     4
slouken@5535
     5
  This software is provided 'as-is', without any express or implied
slouken@5535
     6
  warranty.  In no event will the authors be held liable for any damages
slouken@5535
     7
  arising from the use of this software.
slouken@1330
     8
slouken@5535
     9
  Permission is granted to anyone to use this software for any purpose,
slouken@5535
    10
  including commercial applications, and to alter it and redistribute it
slouken@5535
    11
  freely, subject to the following restrictions:
slouken@1330
    12
slouken@5535
    13
  1. The origin of this software must not be misrepresented; you must not
slouken@5535
    14
     claim that you wrote the original software. If you use this software
slouken@5535
    15
     in a product, an acknowledgment in the product documentation would be
slouken@5535
    16
     appreciated but is not required.
slouken@5535
    17
  2. Altered source versions must be plainly marked as such, and must not be
slouken@5535
    18
     misrepresented as being the original software.
slouken@5535
    19
  3. This notice may not be removed or altered from any source distribution.
slouken@1330
    20
*/
icculus@9306
    21
icculus@9306
    22
#if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
icculus@9306
    23
#define SDL_DISABLE_ANALYZE_MACROS 1
icculus@9306
    24
#endif
icculus@9306
    25
icculus@8093
    26
#include "../SDL_internal.h"
slouken@1330
    27
slouken@1330
    28
/* This file contains portable stdlib functions for SDL */
slouken@1330
    29
slouken@1354
    30
#include "SDL_stdinc.h"
slouken@7351
    31
#include "../libm/math_libm.h"
slouken@1330
    32
slouken@7351
    33
slouken@7351
    34
double
slouken@7351
    35
SDL_atan(double x)
slouken@7351
    36
{
slouken@10505
    37
#if defined(HAVE_ATAN)
slouken@7351
    38
    return atan(x);
slouken@7351
    39
#else
slouken@7351
    40
    return SDL_uclibc_atan(x);
slouken@7351
    41
#endif /* HAVE_ATAN */
slouken@7351
    42
}
slouken@7351
    43
slouken@7351
    44
double
slouken@7351
    45
SDL_atan2(double x, double y)
slouken@7351
    46
{
slouken@7351
    47
#if defined(HAVE_ATAN2)
slouken@7351
    48
    return atan2(x, y);
slouken@7351
    49
#else
slouken@7351
    50
    return SDL_uclibc_atan2(x, y);
slouken@7351
    51
#endif /* HAVE_ATAN2 */
slouken@7351
    52
}
slouken@7351
    53
slouken@7351
    54
double
icculus@8056
    55
SDL_acos(double val)
icculus@8056
    56
{
icculus@8056
    57
#if defined(HAVE_ACOS)
icculus@8056
    58
    return acos(val);
icculus@8056
    59
#else
icculus@8056
    60
    double result;
icculus@8056
    61
    if (val == -1.0) {
icculus@8056
    62
        result = M_PI;
icculus@8056
    63
    } else {
icculus@8056
    64
        result = SDL_atan(SDL_sqrt(1.0 - val * val) / val);
icculus@8056
    65
        if (result < 0.0)
icculus@8056
    66
        {
icculus@8056
    67
            result += M_PI;
icculus@8056
    68
        }
icculus@8056
    69
    }
icculus@8056
    70
    return result;
icculus@8056
    71
#endif
icculus@8056
    72
}
icculus@8056
    73
icculus@8056
    74
double
icculus@8056
    75
SDL_asin(double val)
icculus@8056
    76
{
icculus@8056
    77
#if defined(HAVE_ASIN)
icculus@8056
    78
    return asin(val);
icculus@8056
    79
#else
icculus@8056
    80
    double result;
icculus@8056
    81
    if (val == -1.0) {
icculus@8056
    82
        result = -(M_PI / 2.0);
icculus@8056
    83
    } else {
icculus@8056
    84
        result = (M_PI / 2.0) - SDL_acos(val);
icculus@8056
    85
    }
icculus@8056
    86
    return result;
icculus@8056
    87
#endif
icculus@8056
    88
}
icculus@8056
    89
icculus@8056
    90
double
slouken@7351
    91
SDL_ceil(double x)
slouken@7351
    92
{
slouken@10505
    93
#if defined(HAVE_CEIL)
slouken@7351
    94
    return ceil(x);
slouken@7351
    95
#else
slouken@8033
    96
    double integer = SDL_floor(x);
slouken@8033
    97
    double fraction = x - integer;
slouken@8033
    98
    if (fraction > 0.0) {
slouken@8033
    99
        integer += 1.0;
slouken@8033
   100
    }
slouken@8033
   101
    return integer;
slouken@7351
   102
#endif /* HAVE_CEIL */
slouken@7351
   103
}
slouken@7351
   104
slouken@7351
   105
double
slouken@7351
   106
SDL_copysign(double x, double y)
slouken@7351
   107
{
slouken@7351
   108
#if defined(HAVE_COPYSIGN)
slouken@7351
   109
    return copysign(x, y);
dludwig@8478
   110
#elif defined(HAVE__COPYSIGN)
dludwig@8478
   111
    return _copysign(x, y);
slouken@7351
   112
#else
slouken@7351
   113
    return SDL_uclibc_copysign(x, y);
slouken@7351
   114
#endif /* HAVE_COPYSIGN */
slouken@7351
   115
}
slouken@7351
   116
slouken@7351
   117
double
slouken@7351
   118
SDL_cos(double x)
slouken@7351
   119
{
slouken@7351
   120
#if defined(HAVE_COS)
slouken@7351
   121
    return cos(x);
slouken@7351
   122
#else
slouken@7351
   123
    return SDL_uclibc_cos(x);
slouken@7351
   124
#endif /* HAVE_COS */
slouken@7351
   125
}
slouken@7351
   126
slouken@7351
   127
float
slouken@7351
   128
SDL_cosf(float x)
slouken@7351
   129
{
slouken@10505
   130
#if defined(HAVE_COSF)
slouken@7351
   131
    return cosf(x);
slouken@7351
   132
#else
slouken@7351
   133
    return (float)SDL_cos((double)x);
slouken@7351
   134
#endif
slouken@7351
   135
}
slouken@7351
   136
slouken@7351
   137
double
slouken@7351
   138
SDL_fabs(double x)
slouken@7351
   139
{
slouken@7351
   140
#if defined(HAVE_FABS)
slouken@7351
   141
    return fabs(x); 
slouken@7351
   142
#else
slouken@7351
   143
    return SDL_uclibc_fabs(x);
slouken@7351
   144
#endif /* HAVE_FABS */
slouken@7351
   145
}
slouken@7351
   146
slouken@7351
   147
double
slouken@7351
   148
SDL_floor(double x)
slouken@7351
   149
{
slouken@7351
   150
#if defined(HAVE_FLOOR)
slouken@7351
   151
    return floor(x);
slouken@7351
   152
#else
slouken@7351
   153
    return SDL_uclibc_floor(x);
slouken@7351
   154
#endif /* HAVE_FLOOR */
slouken@7351
   155
}
slouken@7351
   156
slouken@7351
   157
double
slouken@7351
   158
SDL_log(double x)
slouken@7351
   159
{
slouken@7351
   160
#if defined(HAVE_LOG)
slouken@7351
   161
    return log(x);
slouken@7351
   162
#else
slouken@7351
   163
    return SDL_uclibc_log(x);
slouken@7351
   164
#endif /* HAVE_LOG */
slouken@7351
   165
}
slouken@7351
   166
slouken@7351
   167
double
slouken@7351
   168
SDL_pow(double x, double y)
slouken@7351
   169
{
slouken@7351
   170
#if defined(HAVE_POW)
slouken@7351
   171
    return pow(x, y);
slouken@7351
   172
#else
slouken@7351
   173
    return SDL_uclibc_pow(x, y);
slouken@7351
   174
#endif /* HAVE_POW */
slouken@7351
   175
}
slouken@7351
   176
slouken@7351
   177
double
slouken@7351
   178
SDL_scalbn(double x, int n)
slouken@7351
   179
{
slouken@7351
   180
#if defined(HAVE_SCALBN)
slouken@7351
   181
    return scalbn(x, n);
dludwig@8478
   182
#elif defined(HAVE__SCALB)
dludwig@8478
   183
    return _scalb(x, n);
slouken@7351
   184
#else
slouken@7351
   185
    return SDL_uclibc_scalbn(x, n);
slouken@7351
   186
#endif /* HAVE_SCALBN */
slouken@7351
   187
}
slouken@7351
   188
slouken@7351
   189
double
slouken@7351
   190
SDL_sin(double x)
slouken@7351
   191
{
slouken@7351
   192
#if defined(HAVE_SIN)
slouken@7351
   193
    return sin(x);
slouken@7351
   194
#else
slouken@7351
   195
    return SDL_uclibc_sin(x);
slouken@7351
   196
#endif /* HAVE_SIN */
slouken@7351
   197
}
slouken@7351
   198
slouken@7351
   199
float 
slouken@7351
   200
SDL_sinf(float x)
slouken@7351
   201
{
slouken@10505
   202
#if defined(HAVE_SINF)
slouken@7351
   203
    return sinf(x);
slouken@7351
   204
#else
slouken@7351
   205
    return (float)SDL_sin((double)x);
slouken@7351
   206
#endif /* HAVE_SINF */
slouken@7351
   207
}
slouken@7351
   208
slouken@7351
   209
double
slouken@7351
   210
SDL_sqrt(double x)
slouken@7351
   211
{
slouken@7351
   212
#if defined(HAVE_SQRT)
slouken@7351
   213
    return sqrt(x);
slouken@7351
   214
#else
slouken@7351
   215
    return SDL_uclibc_sqrt(x);
slouken@7351
   216
#endif
slouken@7351
   217
}
slouken@7351
   218
slouken@8840
   219
float
slouken@8840
   220
SDL_sqrtf(float x)
slouken@8840
   221
{
slouken@8840
   222
#if defined(HAVE_SQRTF)
slouken@8840
   223
    return sqrtf(x);
slouken@8840
   224
#else
slouken@8840
   225
    return (float)SDL_sqrt((double)x);
slouken@8840
   226
#endif
slouken@8840
   227
}
slouken@8840
   228
slouken@8840
   229
double
slouken@8840
   230
SDL_tan(double x)
slouken@8840
   231
{
slouken@8840
   232
#if defined(HAVE_TAN)
slouken@8840
   233
    return tan(x);
slouken@8840
   234
#else
slouken@8840
   235
    return SDL_uclibc_tan(x);
slouken@8840
   236
#endif
slouken@8840
   237
}
slouken@8840
   238
slouken@8840
   239
float
slouken@8840
   240
SDL_tanf(float x)
slouken@8840
   241
{
slouken@8840
   242
#if defined(HAVE_TANF)
slouken@8840
   243
    return tanf(x);
slouken@8840
   244
#else
slouken@8840
   245
    return (float)SDL_tan((double)x);
slouken@8840
   246
#endif
slouken@8840
   247
}
slouken@8840
   248
slouken@7351
   249
int SDL_abs(int x)
slouken@7351
   250
{
slouken@10505
   251
#if defined(HAVE_ABS)
slouken@7351
   252
    return abs(x);
slouken@7351
   253
#else
slouken@7351
   254
    return ((x) < 0 ? -(x) : (x));
slouken@7351
   255
#endif
slouken@7351
   256
}
slouken@7351
   257
slouken@10505
   258
#if defined(HAVE_CTYPE_H)
slouken@7351
   259
int SDL_isdigit(int x) { return isdigit(x); }
slouken@7351
   260
int SDL_isspace(int x) { return isspace(x); }
slouken@7351
   261
int SDL_toupper(int x) { return toupper(x); }
slouken@7351
   262
int SDL_tolower(int x) { return tolower(x); }
slouken@7351
   263
#else
slouken@7351
   264
int SDL_isdigit(int x) { return ((x) >= '0') && ((x) <= '9'); }
philipp@7357
   265
int SDL_isspace(int x) { return ((x) == ' ') || ((x) == '\t') || ((x) == '\r') || ((x) == '\n') || ((x) == '\f') || ((x) == '\v'); }
slouken@7351
   266
int SDL_toupper(int x) { return ((x) >= 'a') && ((x) <= 'z') ? ('A'+((x)-'a')) : (x); }
slouken@7351
   267
int SDL_tolower(int x) { return ((x) >= 'A') && ((x) <= 'Z') ? ('a'+((x)-'A')) : (x); }
slouken@7351
   268
#endif
icculus@7003
   269
icculus@7003
   270
slouken@1330
   271
#ifndef HAVE_LIBC
slouken@1330
   272
/* These are some C runtime intrinsics that need to be defined */
slouken@1330
   273
slouken@3255
   274
#if defined(_MSC_VER)
slouken@1330
   275
slouken@1433
   276
#ifndef __FLTUSED__
slouken@1433
   277
#define __FLTUSED__
slouken@3255
   278
__declspec(selectany) int _fltused = 1;
slouken@1433
   279
#endif
slouken@3255
   280
icculus@9716
   281
/* The optimizer on Visual Studio 2005 and later generates memcpy() calls */
slouken@10550
   282
#if (_MSC_VER >= 1400) && defined(_WIN64) && !defined(_DEBUG) && !(_MSC_VER >= 1900 && defined(_MT))
slouken@5455
   283
#include <intrin.h>
slouken@5455
   284
slouken@5455
   285
#pragma function(memcpy)
slouken@5455
   286
void * memcpy ( void * destination, const void * source, size_t num )
slouken@5455
   287
{
slouken@5455
   288
    const Uint8 *src = (const Uint8 *)source;
slouken@5455
   289
    Uint8 *dst = (Uint8 *)destination;
slouken@5455
   290
    size_t i;
slouken@5455
   291
    
slouken@5455
   292
    /* All WIN64 architectures have SSE, right? */
slouken@5455
   293
    if (!((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) {
slouken@5455
   294
        __m128 values[4];
slouken@5455
   295
        for (i = num / 64; i--;) {
slouken@5455
   296
            _mm_prefetch(src, _MM_HINT_NTA);
slouken@5455
   297
            values[0] = *(__m128 *) (src + 0);
slouken@5455
   298
            values[1] = *(__m128 *) (src + 16);
slouken@5455
   299
            values[2] = *(__m128 *) (src + 32);
slouken@5455
   300
            values[3] = *(__m128 *) (src + 48);
slouken@5455
   301
            _mm_stream_ps((float *) (dst + 0), values[0]);
slouken@5455
   302
            _mm_stream_ps((float *) (dst + 16), values[1]);
slouken@5455
   303
            _mm_stream_ps((float *) (dst + 32), values[2]);
slouken@5455
   304
            _mm_stream_ps((float *) (dst + 48), values[3]);
slouken@5455
   305
            src += 64;
slouken@5455
   306
            dst += 64;
slouken@5455
   307
        }
slouken@5455
   308
        num &= 63;
slouken@5455
   309
    }
slouken@5455
   310
slouken@5455
   311
    while (num--) {
slouken@5455
   312
        *dst++ = *src++;
slouken@5455
   313
    }
slouken@5455
   314
    return destination;
slouken@5455
   315
}
slouken@5455
   316
#endif /* _MSC_VER == 1600 && defined(_WIN64) && !defined(_DEBUG) */
slouken@5455
   317
slouken@5086
   318
#ifdef _M_IX86
slouken@1433
   319
slouken@1346
   320
/* Float to long */
slouken@3253
   321
void
slouken@3253
   322
__declspec(naked)
slouken@3253
   323
_ftol()
slouken@1345
   324
{
slouken@1895
   325
    /* *INDENT-OFF* */
slouken@1895
   326
    __asm {
slouken@1895
   327
        push        ebp
slouken@1895
   328
        mov         ebp,esp
slouken@1895
   329
        sub         esp,20h
slouken@1895
   330
        and         esp,0FFFFFFF0h
slouken@1895
   331
        fld         st(0)
slouken@1895
   332
        fst         dword ptr [esp+18h]
slouken@1895
   333
        fistp       qword ptr [esp+10h]
slouken@1895
   334
        fild        qword ptr [esp+10h]
slouken@1895
   335
        mov         edx,dword ptr [esp+18h]
slouken@1895
   336
        mov         eax,dword ptr [esp+10h]
slouken@1895
   337
        test        eax,eax
slouken@1895
   338
        je          integer_QnaN_or_zero
slouken@1346
   339
arg_is_not_integer_QnaN:
slouken@1895
   340
        fsubp       st(1),st
slouken@1895
   341
        test        edx,edx
slouken@1895
   342
        jns         positive
slouken@1895
   343
        fstp        dword ptr [esp]
slouken@1895
   344
        mov         ecx,dword ptr [esp]
slouken@1895
   345
        xor         ecx,80000000h
slouken@1895
   346
        add         ecx,7FFFFFFFh
slouken@1895
   347
        adc         eax,0
slouken@1895
   348
        mov         edx,dword ptr [esp+14h]
slouken@1895
   349
        adc         edx,0
slouken@1895
   350
        jmp         localexit
slouken@1346
   351
positive:
slouken@1895
   352
        fstp        dword ptr [esp]
slouken@1895
   353
        mov         ecx,dword ptr [esp]
slouken@1895
   354
        add         ecx,7FFFFFFFh
slouken@1895
   355
        sbb         eax,0
slouken@1895
   356
        mov         edx,dword ptr [esp+14h]
slouken@1895
   357
        sbb         edx,0
slouken@1895
   358
        jmp         localexit
slouken@1346
   359
integer_QnaN_or_zero:
slouken@1895
   360
        mov         edx,dword ptr [esp+14h]
slouken@1895
   361
        test        edx,7FFFFFFFh
slouken@1895
   362
        jne         arg_is_not_integer_QnaN
slouken@1895
   363
        fstp        dword ptr [esp+18h]
slouken@1895
   364
        fstp        dword ptr [esp+18h]
slouken@1346
   365
localexit:
slouken@1895
   366
        leave
slouken@1895
   367
        ret
slouken@1895
   368
    }
slouken@1895
   369
    /* *INDENT-ON* */
slouken@1345
   370
}
slouken@1895
   371
slouken@1895
   372
void
slouken@1895
   373
_ftol2_sse()
slouken@1330
   374
{
slouken@1895
   375
    _ftol();
slouken@1330
   376
}
slouken@1330
   377
slouken@10587
   378
/* 64-bit math operators for 32-bit systems */
slouken@10587
   379
void
slouken@10587
   380
__declspec(naked)
slouken@10587
   381
_allmul()
slouken@10587
   382
{
slouken@10587
   383
    /* *INDENT-OFF* */
slouken@10587
   384
    __asm {
slouken@10587
   385
        mov         eax, dword ptr[esp+8]
slouken@10587
   386
        mov         ecx, dword ptr[esp+10h]
slouken@10587
   387
        or          ecx, eax
slouken@10587
   388
        mov         ecx, dword ptr[esp+0Ch]
slouken@10587
   389
        jne         hard
slouken@10587
   390
        mov         eax, dword ptr[esp+4]
slouken@10587
   391
        mul         ecx
slouken@10587
   392
        ret         10h
slouken@10587
   393
hard:
slouken@10587
   394
        push        ebx
slouken@10587
   395
        mul         ecx
slouken@10587
   396
        mov         ebx, eax
slouken@10587
   397
        mov         eax, dword ptr[esp+8]
slouken@10587
   398
        mul         dword ptr[esp+14h]
slouken@10587
   399
        add         ebx, eax
slouken@10587
   400
        mov         eax, dword ptr[esp+8]
slouken@10587
   401
        mul         ecx
slouken@10587
   402
        add         edx, ebx
slouken@10587
   403
        pop         ebx
slouken@10587
   404
        ret         10h
slouken@10587
   405
    }
slouken@10587
   406
    /* *INDENT-ON* */
slouken@1330
   407
}
slouken@2735
   408
slouken@1895
   409
void
slouken@1895
   410
__declspec(naked)
slouken@1895
   411
_alldiv()
slouken@1330
   412
{
slouken@1895
   413
    /* *INDENT-OFF* */
slouken@1895
   414
    __asm {
slouken@1895
   415
        push        edi
slouken@1895
   416
        push        esi
slouken@1895
   417
        push        ebx
slouken@1895
   418
        xor         edi,edi
slouken@1895
   419
        mov         eax,dword ptr [esp+14h]
slouken@1895
   420
        or          eax,eax
slouken@1895
   421
        jge         L1
slouken@1895
   422
        inc         edi
slouken@1895
   423
        mov         edx,dword ptr [esp+10h]
slouken@1895
   424
        neg         eax
slouken@1895
   425
        neg         edx
slouken@1895
   426
        sbb         eax,0
slouken@1895
   427
        mov         dword ptr [esp+14h],eax
slouken@1895
   428
        mov         dword ptr [esp+10h],edx
slouken@1346
   429
L1:
slouken@1895
   430
        mov         eax,dword ptr [esp+1Ch]
slouken@1895
   431
        or          eax,eax
slouken@1895
   432
        jge         L2
slouken@1895
   433
        inc         edi
slouken@1895
   434
        mov         edx,dword ptr [esp+18h]
slouken@1895
   435
        neg         eax
slouken@1895
   436
        neg         edx
slouken@1895
   437
        sbb         eax,0
slouken@1895
   438
        mov         dword ptr [esp+1Ch],eax
slouken@1895
   439
        mov         dword ptr [esp+18h],edx
slouken@1346
   440
L2:
slouken@1895
   441
        or          eax,eax
slouken@1895
   442
        jne         L3
slouken@1895
   443
        mov         ecx,dword ptr [esp+18h]
slouken@1895
   444
        mov         eax,dword ptr [esp+14h]
slouken@1895
   445
        xor         edx,edx
slouken@1895
   446
        div         ecx
slouken@1895
   447
        mov         ebx,eax
slouken@1895
   448
        mov         eax,dword ptr [esp+10h]
slouken@1895
   449
        div         ecx
slouken@1895
   450
        mov         edx,ebx
slouken@1895
   451
        jmp         L4
slouken@1346
   452
L3:
slouken@1895
   453
        mov         ebx,eax
slouken@1895
   454
        mov         ecx,dword ptr [esp+18h]
slouken@1895
   455
        mov         edx,dword ptr [esp+14h]
slouken@1895
   456
        mov         eax,dword ptr [esp+10h]
slouken@1346
   457
L5:
slouken@1895
   458
        shr         ebx,1
slouken@1895
   459
        rcr         ecx,1
slouken@1895
   460
        shr         edx,1
slouken@1895
   461
        rcr         eax,1
slouken@1895
   462
        or          ebx,ebx
slouken@1895
   463
        jne         L5
slouken@1895
   464
        div         ecx
slouken@1895
   465
        mov         esi,eax
slouken@1895
   466
        mul         dword ptr [esp+1Ch]
slouken@1895
   467
        mov         ecx,eax
slouken@1895
   468
        mov         eax,dword ptr [esp+18h]
slouken@1895
   469
        mul         esi
slouken@1895
   470
        add         edx,ecx
slouken@1895
   471
        jb          L6
slouken@1895
   472
        cmp         edx,dword ptr [esp+14h]
slouken@1895
   473
        ja          L6
slouken@1895
   474
        jb          L7
slouken@1895
   475
        cmp         eax,dword ptr [esp+10h]
slouken@1895
   476
        jbe         L7
slouken@1346
   477
L6:
slouken@1895
   478
        dec         esi
slouken@1346
   479
L7:
slouken@1895
   480
        xor         edx,edx
slouken@1895
   481
        mov         eax,esi
slouken@1346
   482
L4:
slouken@1895
   483
        dec         edi
slouken@1895
   484
        jne         L8
slouken@1895
   485
        neg         edx
slouken@1895
   486
        neg         eax
slouken@1895
   487
        sbb         edx,0
slouken@1346
   488
L8:
slouken@1895
   489
        pop         ebx
slouken@1895
   490
        pop         esi
slouken@1895
   491
        pop         edi
slouken@1895
   492
        ret         10h
slouken@1895
   493
    }
slouken@1895
   494
    /* *INDENT-ON* */
slouken@1330
   495
}
slouken@2735
   496
slouken@1895
   497
void
slouken@1895
   498
__declspec(naked)
slouken@1895
   499
_aulldiv()
slouken@1330
   500
{
slouken@1895
   501
    /* *INDENT-OFF* */
slouken@1895
   502
    __asm {
slouken@1895
   503
        push        ebx
slouken@1895
   504
        push        esi
slouken@1895
   505
        mov         eax,dword ptr [esp+18h]
slouken@1895
   506
        or          eax,eax
slouken@1895
   507
        jne         L1
slouken@1895
   508
        mov         ecx,dword ptr [esp+14h]
slouken@1895
   509
        mov         eax,dword ptr [esp+10h]
slouken@1895
   510
        xor         edx,edx
slouken@1895
   511
        div         ecx
slouken@1895
   512
        mov         ebx,eax
slouken@1895
   513
        mov         eax,dword ptr [esp+0Ch]
slouken@1895
   514
        div         ecx
slouken@1895
   515
        mov         edx,ebx
slouken@1895
   516
        jmp         L2
slouken@1346
   517
L1:
slouken@1895
   518
        mov         ecx,eax
slouken@1895
   519
        mov         ebx,dword ptr [esp+14h]
slouken@1895
   520
        mov         edx,dword ptr [esp+10h]
slouken@1895
   521
        mov         eax,dword ptr [esp+0Ch]
slouken@1346
   522
L3:
slouken@1895
   523
        shr         ecx,1
slouken@1895
   524
        rcr         ebx,1
slouken@1895
   525
        shr         edx,1
slouken@1895
   526
        rcr         eax,1
slouken@1895
   527
        or          ecx,ecx
slouken@1895
   528
        jne         L3
slouken@1895
   529
        div         ebx
slouken@1895
   530
        mov         esi,eax
slouken@1895
   531
        mul         dword ptr [esp+18h]
slouken@1895
   532
        mov         ecx,eax
slouken@1895
   533
        mov         eax,dword ptr [esp+14h]
slouken@1895
   534
        mul         esi
slouken@1895
   535
        add         edx,ecx
slouken@1895
   536
        jb          L4
slouken@1895
   537
        cmp         edx,dword ptr [esp+10h]
slouken@1895
   538
        ja          L4
slouken@1895
   539
        jb          L5
slouken@1895
   540
        cmp         eax,dword ptr [esp+0Ch]
slouken@1895
   541
        jbe         L5
slouken@1346
   542
L4:
slouken@1895
   543
        dec         esi
slouken@1346
   544
L5:
slouken@1895
   545
        xor         edx,edx
slouken@1895
   546
        mov         eax,esi
slouken@1346
   547
L2:
slouken@1895
   548
        pop         esi
slouken@1895
   549
        pop         ebx
slouken@1895
   550
        ret         10h
slouken@1895
   551
    }
slouken@1895
   552
    /* *INDENT-ON* */
slouken@1330
   553
}
slouken@2735
   554
slouken@1895
   555
void
slouken@1895
   556
__declspec(naked)
slouken@1895
   557
_allrem()
slouken@1330
   558
{
slouken@1895
   559
    /* *INDENT-OFF* */
slouken@1895
   560
    __asm {
slouken@1895
   561
        push        ebx
slouken@1895
   562
        push        edi
slouken@1895
   563
        xor         edi,edi
slouken@1895
   564
        mov         eax,dword ptr [esp+10h]
slouken@1895
   565
        or          eax,eax
slouken@1895
   566
        jge         L1
slouken@1895
   567
        inc         edi
slouken@1895
   568
        mov         edx,dword ptr [esp+0Ch]
slouken@1895
   569
        neg         eax
slouken@1895
   570
        neg         edx
slouken@1895
   571
        sbb         eax,0
slouken@1895
   572
        mov         dword ptr [esp+10h],eax
slouken@1895
   573
        mov         dword ptr [esp+0Ch],edx
slouken@1346
   574
L1:
slouken@1895
   575
        mov         eax,dword ptr [esp+18h]
slouken@1895
   576
        or          eax,eax
slouken@1895
   577
        jge         L2
slouken@1895
   578
        mov         edx,dword ptr [esp+14h]
slouken@1895
   579
        neg         eax
slouken@1895
   580
        neg         edx
slouken@1895
   581
        sbb         eax,0
slouken@1895
   582
        mov         dword ptr [esp+18h],eax
slouken@1895
   583
        mov         dword ptr [esp+14h],edx
slouken@1346
   584
L2:
slouken@1895
   585
        or          eax,eax
slouken@1895
   586
        jne         L3
slouken@1895
   587
        mov         ecx,dword ptr [esp+14h]
slouken@1895
   588
        mov         eax,dword ptr [esp+10h]
slouken@1895
   589
        xor         edx,edx
slouken@1895
   590
        div         ecx
slouken@1895
   591
        mov         eax,dword ptr [esp+0Ch]
slouken@1895
   592
        div         ecx
slouken@1895
   593
        mov         eax,edx
slouken@1895
   594
        xor         edx,edx
slouken@1895
   595
        dec         edi
slouken@1895
   596
        jns         L4
slouken@1895
   597
        jmp         L8
slouken@1346
   598
L3:
slouken@1895
   599
        mov         ebx,eax
slouken@1895
   600
        mov         ecx,dword ptr [esp+14h]
slouken@1895
   601
        mov         edx,dword ptr [esp+10h]
slouken@1895
   602
        mov         eax,dword ptr [esp+0Ch]
slouken@1346
   603
L5:
slouken@1895
   604
        shr         ebx,1
slouken@1895
   605
        rcr         ecx,1
slouken@1895
   606
        shr         edx,1
slouken@1895
   607
        rcr         eax,1
slouken@1895
   608
        or          ebx,ebx
slouken@1895
   609
        jne         L5
slouken@1895
   610
        div         ecx
slouken@1895
   611
        mov         ecx,eax
slouken@1895
   612
        mul         dword ptr [esp+18h]
slouken@1895
   613
        xchg        eax,ecx
slouken@1895
   614
        mul         dword ptr [esp+14h]
slouken@1895
   615
        add         edx,ecx
slouken@1895
   616
        jb          L6
slouken@1895
   617
        cmp         edx,dword ptr [esp+10h]
slouken@1895
   618
        ja          L6
slouken@1895
   619
        jb          L7
slouken@1895
   620
        cmp         eax,dword ptr [esp+0Ch]
slouken@1895
   621
        jbe         L7
slouken@1346
   622
L6:
slouken@1895
   623
        sub         eax,dword ptr [esp+14h]
slouken@1895
   624
        sbb         edx,dword ptr [esp+18h]
slouken@1346
   625
L7:
slouken@1895
   626
        sub         eax,dword ptr [esp+0Ch]
slouken@1895
   627
        sbb         edx,dword ptr [esp+10h]
slouken@1895
   628
        dec         edi
slouken@1895
   629
        jns         L8
slouken@1346
   630
L4:
slouken@1895
   631
        neg         edx
slouken@1895
   632
        neg         eax
slouken@1895
   633
        sbb         edx,0
slouken@1346
   634
L8:
slouken@1895
   635
        pop         edi
slouken@1895
   636
        pop         ebx
slouken@1895
   637
        ret         10h
slouken@1895
   638
    }
slouken@1895
   639
    /* *INDENT-ON* */
slouken@1330
   640
}
slouken@2735
   641
slouken@1895
   642
void
slouken@1895
   643
__declspec(naked)
slouken@1895
   644
_aullrem()
slouken@1330
   645
{
slouken@1895
   646
    /* *INDENT-OFF* */
slouken@1895
   647
    __asm {
slouken@1895
   648
        push        ebx
slouken@1895
   649
        mov         eax,dword ptr [esp+14h]
slouken@1895
   650
        or          eax,eax
slouken@1895
   651
        jne         L1
slouken@1895
   652
        mov         ecx,dword ptr [esp+10h]
slouken@1895
   653
        mov         eax,dword ptr [esp+0Ch]
slouken@1895
   654
        xor         edx,edx
slouken@1895
   655
        div         ecx
slouken@1895
   656
        mov         eax,dword ptr [esp+8]
slouken@1895
   657
        div         ecx
slouken@1895
   658
        mov         eax,edx
slouken@1895
   659
        xor         edx,edx
slouken@1895
   660
        jmp         L2
slouken@1346
   661
L1:
slouken@1895
   662
        mov         ecx,eax
slouken@1895
   663
        mov         ebx,dword ptr [esp+10h]
slouken@1895
   664
        mov         edx,dword ptr [esp+0Ch]
slouken@1895
   665
        mov         eax,dword ptr [esp+8]
slouken@1346
   666
L3:
slouken@1895
   667
        shr         ecx,1
slouken@1895
   668
        rcr         ebx,1
slouken@1895
   669
        shr         edx,1
slouken@1895
   670
        rcr         eax,1
slouken@1895
   671
        or          ecx,ecx
slouken@1895
   672
        jne         L3
slouken@1895
   673
        div         ebx
slouken@1895
   674
        mov         ecx,eax
slouken@1895
   675
        mul         dword ptr [esp+14h]
slouken@1895
   676
        xchg        eax,ecx
slouken@1895
   677
        mul         dword ptr [esp+10h]
slouken@1895
   678
        add         edx,ecx
slouken@1895
   679
        jb          L4
slouken@1895
   680
        cmp         edx,dword ptr [esp+0Ch]
slouken@1895
   681
        ja          L4
slouken@1895
   682
        jb          L5
slouken@1895
   683
        cmp         eax,dword ptr [esp+8]
slouken@1895
   684
        jbe         L5
slouken@1346
   685
L4:
slouken@1895
   686
        sub         eax,dword ptr [esp+10h]
slouken@1895
   687
        sbb         edx,dword ptr [esp+14h]
slouken@1346
   688
L5:
slouken@1895
   689
        sub         eax,dword ptr [esp+8]
slouken@1895
   690
        sbb         edx,dword ptr [esp+0Ch]
slouken@1895
   691
        neg         edx
slouken@1895
   692
        neg         eax
slouken@1895
   693
        sbb         edx,0
slouken@1346
   694
L2:
slouken@1895
   695
        pop         ebx
slouken@1895
   696
        ret         10h
slouken@1895
   697
    }
slouken@1895
   698
    /* *INDENT-ON* */
slouken@1330
   699
}
slouken@2735
   700
slouken@1895
   701
void
slouken@1895
   702
__declspec(naked)
slouken@1895
   703
_alldvrm()
slouken@1330
   704
{
slouken@1895
   705
    /* *INDENT-OFF* */
slouken@1895
   706
    __asm {
slouken@1895
   707
        push        edi
slouken@1895
   708
        push        esi
slouken@1895
   709
        push        ebp
slouken@1895
   710
        xor         edi,edi
slouken@1895
   711
        xor         ebp,ebp
slouken@1895
   712
        mov         eax,dword ptr [esp+14h]
slouken@1895
   713
        or          eax,eax
slouken@1895
   714
        jge         L1
slouken@1895
   715
        inc         edi
slouken@1895
   716
        inc         ebp
slouken@1895
   717
        mov         edx,dword ptr [esp+10h]
slouken@1895
   718
        neg         eax
slouken@1895
   719
        neg         edx
slouken@1895
   720
        sbb         eax,0
slouken@1895
   721
        mov         dword ptr [esp+14h],eax
slouken@1895
   722
        mov         dword ptr [esp+10h],edx
slouken@1346
   723
L1:
slouken@1895
   724
        mov         eax,dword ptr [esp+1Ch]
slouken@1895
   725
        or          eax,eax
slouken@1895
   726
        jge         L2
slouken@1895
   727
        inc         edi
slouken@1895
   728
        mov         edx,dword ptr [esp+18h]
slouken@1895
   729
        neg         eax
slouken@1895
   730
        neg         edx
slouken@1895
   731
        sbb         eax,0
slouken@1895
   732
        mov         dword ptr [esp+1Ch],eax
slouken@1895
   733
        mov         dword ptr [esp+18h],edx
slouken@1346
   734
L2:
slouken@1895
   735
        or          eax,eax
slouken@1895
   736
        jne         L3
slouken@1895
   737
        mov         ecx,dword ptr [esp+18h]
slouken@1895
   738
        mov         eax,dword ptr [esp+14h]
slouken@1895
   739
        xor         edx,edx
slouken@1895
   740
        div         ecx
slouken@1895
   741
        mov         ebx,eax
slouken@1895
   742
        mov         eax,dword ptr [esp+10h]
slouken@1895
   743
        div         ecx
slouken@1895
   744
        mov         esi,eax
slouken@1895
   745
        mov         eax,ebx
slouken@1895
   746
        mul         dword ptr [esp+18h]
slouken@1895
   747
        mov         ecx,eax
slouken@1895
   748
        mov         eax,esi
slouken@1895
   749
        mul         dword ptr [esp+18h]
slouken@1895
   750
        add         edx,ecx
slouken@1895
   751
        jmp         L4
slouken@1346
   752
L3:
slouken@1895
   753
        mov         ebx,eax
slouken@1895
   754
        mov         ecx,dword ptr [esp+18h]
slouken@1895
   755
        mov         edx,dword ptr [esp+14h]
slouken@1895
   756
        mov         eax,dword ptr [esp+10h]
slouken@1346
   757
L5:
slouken@1895
   758
        shr         ebx,1
slouken@1895
   759
        rcr         ecx,1
slouken@1895
   760
        shr         edx,1
slouken@1895
   761
        rcr         eax,1
slouken@1895
   762
        or          ebx,ebx
slouken@1895
   763
        jne         L5
slouken@1895
   764
        div         ecx
slouken@1895
   765
        mov         esi,eax
slouken@1895
   766
        mul         dword ptr [esp+1Ch]
slouken@1895
   767
        mov         ecx,eax
slouken@1895
   768
        mov         eax,dword ptr [esp+18h]
slouken@1895
   769
        mul         esi
slouken@1895
   770
        add         edx,ecx
slouken@1895
   771
        jb          L6
slouken@1895
   772
        cmp         edx,dword ptr [esp+14h]
slouken@1895
   773
        ja          L6
slouken@1895
   774
        jb          L7
slouken@1895
   775
        cmp         eax,dword ptr [esp+10h]
slouken@1895
   776
        jbe         L7
slouken@1346
   777
L6:
slouken@1895
   778
        dec         esi
slouken@1895
   779
        sub         eax,dword ptr [esp+18h]
slouken@1895
   780
        sbb         edx,dword ptr [esp+1Ch]
slouken@1346
   781
L7:
slouken@1895
   782
        xor         ebx,ebx
slouken@1346
   783
L4:
slouken@1895
   784
        sub         eax,dword ptr [esp+10h]
slouken@1895
   785
        sbb         edx,dword ptr [esp+14h]
slouken@1895
   786
        dec         ebp
slouken@1895
   787
        jns         L9
slouken@1895
   788
        neg         edx
slouken@1895
   789
        neg         eax
slouken@1895
   790
        sbb         edx,0
slouken@1346
   791
L9:
slouken@1895
   792
        mov         ecx,edx
slouken@1895
   793
        mov         edx,ebx
slouken@1895
   794
        mov         ebx,ecx
slouken@1895
   795
        mov         ecx,eax
slouken@1895
   796
        mov         eax,esi
slouken@1895
   797
        dec         edi
slouken@1895
   798
        jne         L8
slouken@1895
   799
        neg         edx
slouken@1895
   800
        neg         eax
slouken@1895
   801
        sbb         edx,0
slouken@1346
   802
L8:
slouken@1895
   803
        pop         ebp
slouken@1895
   804
        pop         esi
slouken@1895
   805
        pop         edi
slouken@1895
   806
        ret         10h
slouken@1895
   807
    }
slouken@1895
   808
    /* *INDENT-ON* */
slouken@1330
   809
}
slouken@2735
   810
slouken@1895
   811
void
slouken@1895
   812
__declspec(naked)
slouken@1895
   813
_aulldvrm()
slouken@1330
   814
{
slouken@1895
   815
    /* *INDENT-OFF* */
slouken@1895
   816
    __asm {
slouken@1895
   817
        push        esi
slouken@1895
   818
        mov         eax,dword ptr [esp+14h]
slouken@1895
   819
        or          eax,eax
slouken@1895
   820
        jne         L1
slouken@1895
   821
        mov         ecx,dword ptr [esp+10h]
slouken@1895
   822
        mov         eax,dword ptr [esp+0Ch]
slouken@1895
   823
        xor         edx,edx
slouken@1895
   824
        div         ecx
slouken@1895
   825
        mov         ebx,eax
slouken@1895
   826
        mov         eax,dword ptr [esp+8]
slouken@1895
   827
        div         ecx
slouken@1895
   828
        mov         esi,eax
slouken@1895
   829
        mov         eax,ebx
slouken@1895
   830
        mul         dword ptr [esp+10h]
slouken@1895
   831
        mov         ecx,eax
slouken@1895
   832
        mov         eax,esi
slouken@1895
   833
        mul         dword ptr [esp+10h]
slouken@1895
   834
        add         edx,ecx
slouken@1895
   835
        jmp         L2
slouken@1346
   836
L1:
slouken@1895
   837
        mov         ecx,eax
slouken@1895
   838
        mov         ebx,dword ptr [esp+10h]
slouken@1895
   839
        mov         edx,dword ptr [esp+0Ch]
slouken@1895
   840
        mov         eax,dword ptr [esp+8]
slouken@1346
   841
L3:
slouken@1895
   842
        shr         ecx,1
slouken@1895
   843
        rcr         ebx,1
slouken@1895
   844
        shr         edx,1
slouken@1895
   845
        rcr         eax,1
slouken@1895
   846
        or          ecx,ecx
slouken@1895
   847
        jne         L3
slouken@1895
   848
        div         ebx
slouken@1895
   849
        mov         esi,eax
slouken@1895
   850
        mul         dword ptr [esp+14h]
slouken@1895
   851
        mov         ecx,eax
slouken@1895
   852
        mov         eax,dword ptr [esp+10h]
slouken@1895
   853
        mul         esi
slouken@1895
   854
        add         edx,ecx
slouken@1895
   855
        jb          L4
slouken@1895
   856
        cmp         edx,dword ptr [esp+0Ch]
slouken@1895
   857
        ja          L4
slouken@1895
   858
        jb          L5
slouken@1895
   859
        cmp         eax,dword ptr [esp+8]
slouken@1895
   860
        jbe         L5
slouken@1346
   861
L4:
slouken@1895
   862
        dec         esi
slouken@1895
   863
        sub         eax,dword ptr [esp+10h]
slouken@1895
   864
        sbb         edx,dword ptr [esp+14h]
slouken@1346
   865
L5:
slouken@1895
   866
        xor         ebx,ebx
slouken@1346
   867
L2:
slouken@1895
   868
        sub         eax,dword ptr [esp+8]
slouken@1895
   869
        sbb         edx,dword ptr [esp+0Ch]
slouken@1895
   870
        neg         edx
slouken@1895
   871
        neg         eax
slouken@1895
   872
        sbb         edx,0
slouken@1895
   873
        mov         ecx,edx
slouken@1895
   874
        mov         edx,ebx
slouken@1895
   875
        mov         ebx,ecx
slouken@1895
   876
        mov         ecx,eax
slouken@1895
   877
        mov         eax,esi
slouken@1895
   878
        pop         esi
slouken@1895
   879
        ret         10h
slouken@1895
   880
    }
slouken@1895
   881
    /* *INDENT-ON* */
slouken@1330
   882
}
slouken@2735
   883
slouken@1895
   884
void
slouken@1895
   885
__declspec(naked)
slouken@1895
   886
_allshl()
slouken@1330
   887
{
slouken@1895
   888
    /* *INDENT-OFF* */
slouken@1895
   889
    __asm {
slouken@1895
   890
        cmp         cl,40h
slouken@1895
   891
        jae         RETZERO
slouken@1895
   892
        cmp         cl,20h
slouken@1895
   893
        jae         MORE32
slouken@1895
   894
        shld        edx,eax,cl
slouken@1895
   895
        shl         eax,cl
slouken@1895
   896
        ret
slouken@1346
   897
MORE32:
slouken@1895
   898
        mov         edx,eax
slouken@1895
   899
        xor         eax,eax
slouken@1895
   900
        and         cl,1Fh
slouken@1895
   901
        shl         edx,cl
slouken@1895
   902
        ret
slouken@1346
   903
RETZERO:
slouken@1895
   904
        xor         eax,eax
slouken@1895
   905
        xor         edx,edx
slouken@1895
   906
        ret
slouken@1895
   907
    }
slouken@1895
   908
    /* *INDENT-ON* */
slouken@1330
   909
}
slouken@2735
   910
slouken@1895
   911
void
slouken@1895
   912
__declspec(naked)
slouken@2760
   913
_allshr()
slouken@2760
   914
{
slouken@2760
   915
    /* *INDENT-OFF* */
slouken@2760
   916
    __asm {
slouken@10587
   917
        cmp         cl,3Fh
slouken@10587
   918
        jae         RETSIGN
slouken@2760
   919
        cmp         cl,20h
slouken@2760
   920
        jae         MORE32
slouken@2760
   921
        shrd        eax,edx,cl
slouken@2760
   922
        sar         edx,cl
slouken@2760
   923
        ret
slouken@2760
   924
MORE32:
slouken@2760
   925
        mov         eax,edx
slouken@10587
   926
        sar         edx,1Fh
slouken@2760
   927
        and         cl,1Fh
slouken@2760
   928
        sar         eax,cl
slouken@2760
   929
        ret
slouken@10587
   930
RETSIGN:
slouken@10587
   931
        sar         edx,1Fh
slouken@10587
   932
        mov         eax,edx
slouken@2760
   933
        ret
slouken@2760
   934
    }
slouken@2760
   935
    /* *INDENT-ON* */
slouken@2760
   936
}
slouken@2760
   937
slouken@2760
   938
void
slouken@2760
   939
__declspec(naked)
slouken@1895
   940
_aullshr()
slouken@1330
   941
{
slouken@1895
   942
    /* *INDENT-OFF* */
slouken@1895
   943
    __asm {
slouken@1895
   944
        cmp         cl,40h
slouken@1895
   945
        jae         RETZERO
slouken@1895
   946
        cmp         cl,20h
slouken@1895
   947
        jae         MORE32
slouken@1895
   948
        shrd        eax,edx,cl
slouken@1895
   949
        shr         edx,cl
slouken@1895
   950
        ret
slouken@1346
   951
MORE32:
slouken@1895
   952
        mov         eax,edx
slouken@1895
   953
        xor         edx,edx
slouken@1895
   954
        and         cl,1Fh
slouken@1895
   955
        shr         eax,cl
slouken@1895
   956
        ret
slouken@1346
   957
RETZERO:
slouken@1895
   958
        xor         eax,eax
slouken@1895
   959
        xor         edx,edx
slouken@1895
   960
        ret
slouken@1895
   961
    }
slouken@1895
   962
    /* *INDENT-ON* */
slouken@1330
   963
}
slouken@1330
   964
slouken@5455
   965
#endif /* _M_IX86 */
slouken@3255
   966
slouken@1330
   967
#endif /* MSC_VER */
slouken@1330
   968
slouken@1331
   969
#endif /* !HAVE_LIBC */
slouken@1895
   970
slouken@1895
   971
/* vi: set ts=4 sw=4 expandtab: */