src/cpuinfo/SDL_cpuinfo.c
author Sylvain Becker <sylvain.becker@gmail.com>
Sun, 27 Oct 2019 14:52:09 +0100
changeset 13192 8974226b4075
parent 13187 20031279f4d4
child 13243 2ec6a2476551
permissions -rw-r--r--
Android: fix compilation error of SDL_cpuinfo.c on android-r13b (bug 4365)
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2019 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #ifdef TEST_MAIN
    22 #include "SDL_config.h"
    23 #else
    24 #include "../SDL_internal.h"
    25 #endif
    26 
    27 #if defined(__WIN32__) || defined(__WINRT__)
    28 #include "../core/windows/SDL_windows.h"
    29 #endif
    30 #if defined(__OS2__)
    31 #define INCL_DOS
    32 #include <os2.h>
    33 #ifndef QSV_NUMPROCESSORS
    34 #define QSV_NUMPROCESSORS 26
    35 #endif
    36 #endif
    37 
    38 /* CPU feature detection for SDL */
    39 
    40 #include "SDL_cpuinfo.h"
    41 #include "SDL_assert.h"
    42 
    43 #ifdef HAVE_SYSCONF
    44 #include <unistd.h>
    45 #endif
    46 #ifdef HAVE_SYSCTLBYNAME
    47 #include <sys/types.h>
    48 #include <sys/sysctl.h>
    49 #endif
    50 #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
    51 #include <sys/sysctl.h>         /* For AltiVec check */
    52 #elif defined(__OpenBSD__) && defined(__powerpc__)
    53 #include <sys/param.h>
    54 #include <sys/sysctl.h> /* For AltiVec check */
    55 #include <machine/cpu.h>
    56 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
    57 #include <signal.h>
    58 #include <setjmp.h>
    59 #endif
    60 
    61 #if defined(__QNXNTO__)
    62 #include <sys/syspage.h>
    63 #endif
    64 
    65 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH)
    66 /*#include <asm/hwcap.h>*/
    67 #ifndef AT_HWCAP
    68 #define AT_HWCAP 16
    69 #endif
    70 #ifndef AT_PLATFORM
    71 #define AT_PLATFORM 15
    72 #endif
    73 /* Prevent compilation error when including elf.h would also try to define AT_* as an enum */
    74 #ifndef AT_NULL
    75 #define AT_NULL 0
    76 #endif
    77 #ifndef HWCAP_NEON
    78 #define HWCAP_NEON (1 << 12)
    79 #endif
    80 #if defined HAVE_GETAUXVAL
    81 #include <sys/auxv.h>
    82 #else
    83 #include <fcntl.h>
    84 #endif
    85 #endif
    86 
    87 #if defined(__ANDROID__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
    88 #if __ARM_ARCH < 8
    89 #include <cpu-features.h>
    90 #endif
    91 #endif
    92 
    93 #define CPU_HAS_RDTSC   (1 << 0)
    94 #define CPU_HAS_ALTIVEC (1 << 1)
    95 #define CPU_HAS_MMX     (1 << 2)
    96 #define CPU_HAS_3DNOW   (1 << 3)
    97 #define CPU_HAS_SSE     (1 << 4)
    98 #define CPU_HAS_SSE2    (1 << 5)
    99 #define CPU_HAS_SSE3    (1 << 6)
   100 #define CPU_HAS_SSE41   (1 << 7)
   101 #define CPU_HAS_SSE42   (1 << 8)
   102 #define CPU_HAS_AVX     (1 << 9)
   103 #define CPU_HAS_AVX2    (1 << 10)
   104 #define CPU_HAS_NEON    (1 << 11)
   105 #define CPU_HAS_AVX512F (1 << 12)
   106 #define CPU_HAS_ARM_SIMD (1 << 13)
   107 
   108 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
   109 /* This is the brute force way of detecting instruction sets...
   110    the idea is borrowed from the libmpeg2 library - thanks!
   111  */
   112 static jmp_buf jmpbuf;
   113 static void
   114 illegal_instruction(int sig)
   115 {
   116     longjmp(jmpbuf, 1);
   117 }
   118 #endif /* HAVE_SETJMP */
   119 
   120 static int
   121 CPU_haveCPUID(void)
   122 {
   123     int has_CPUID = 0;
   124 
   125 /* *INDENT-OFF* */
   126 #ifndef SDL_CPUINFO_DISABLED
   127 #if defined(__GNUC__) && defined(i386)
   128     __asm__ (
   129 "        pushfl                      # Get original EFLAGS             \n"
   130 "        popl    %%eax                                                 \n"
   131 "        movl    %%eax,%%ecx                                           \n"
   132 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
   133 "        pushl   %%eax               # Save new EFLAGS value on stack  \n"
   134 "        popfl                       # Replace current EFLAGS value    \n"
   135 "        pushfl                      # Get new EFLAGS                  \n"
   136 "        popl    %%eax               # Store new EFLAGS in EAX         \n"
   137 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
   138 "        jz      1f                  # Processor=80486                 \n"
   139 "        movl    $1,%0               # We have CPUID support           \n"
   140 "1:                                                                    \n"
   141     : "=m" (has_CPUID)
   142     :
   143     : "%eax", "%ecx"
   144     );
   145 #elif defined(__GNUC__) && defined(__x86_64__)
   146 /* Technically, if this is being compiled under __x86_64__ then it has 
   147    CPUid by definition.  But it's nice to be able to prove it.  :)      */
   148     __asm__ (
   149 "        pushfq                      # Get original EFLAGS             \n"
   150 "        popq    %%rax                                                 \n"
   151 "        movq    %%rax,%%rcx                                           \n"
   152 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
   153 "        pushq   %%rax               # Save new EFLAGS value on stack  \n"
   154 "        popfq                       # Replace current EFLAGS value    \n"
   155 "        pushfq                      # Get new EFLAGS                  \n"
   156 "        popq    %%rax               # Store new EFLAGS in EAX         \n"
   157 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
   158 "        jz      1f                  # Processor=80486                 \n"
   159 "        movl    $1,%0               # We have CPUID support           \n"
   160 "1:                                                                    \n"
   161     : "=m" (has_CPUID)
   162     :
   163     : "%rax", "%rcx"
   164     );
   165 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   166     __asm {
   167         pushfd                      ; Get original EFLAGS
   168         pop     eax
   169         mov     ecx, eax
   170         xor     eax, 200000h        ; Flip ID bit in EFLAGS
   171         push    eax                 ; Save new EFLAGS value on stack
   172         popfd                       ; Replace current EFLAGS value
   173         pushfd                      ; Get new EFLAGS
   174         pop     eax                 ; Store new EFLAGS in EAX
   175         xor     eax, ecx            ; Can not toggle ID bit,
   176         jz      done                ; Processor=80486
   177         mov     has_CPUID,1         ; We have CPUID support
   178 done:
   179     }
   180 #elif defined(_MSC_VER) && defined(_M_X64)
   181     has_CPUID = 1;
   182 #elif defined(__sun) && defined(__i386)
   183     __asm (
   184 "       pushfl                 \n"
   185 "       popl    %eax           \n"
   186 "       movl    %eax,%ecx      \n"
   187 "       xorl    $0x200000,%eax \n"
   188 "       pushl   %eax           \n"
   189 "       popfl                  \n"
   190 "       pushfl                 \n"
   191 "       popl    %eax           \n"
   192 "       xorl    %ecx,%eax      \n"
   193 "       jz      1f             \n"
   194 "       movl    $1,-8(%ebp)    \n"
   195 "1:                            \n"
   196     );
   197 #elif defined(__sun) && defined(__amd64)
   198     __asm (
   199 "       pushfq                 \n"
   200 "       popq    %rax           \n"
   201 "       movq    %rax,%rcx      \n"
   202 "       xorl    $0x200000,%eax \n"
   203 "       pushq   %rax           \n"
   204 "       popfq                  \n"
   205 "       pushfq                 \n"
   206 "       popq    %rax           \n"
   207 "       xorl    %ecx,%eax      \n"
   208 "       jz      1f             \n"
   209 "       movl    $1,-8(%rbp)    \n"
   210 "1:                            \n"
   211     );
   212 #endif
   213 #endif
   214 /* *INDENT-ON* */
   215     return has_CPUID;
   216 }
   217 
   218 #if defined(__GNUC__) && defined(i386)
   219 #define cpuid(func, a, b, c, d) \
   220     __asm__ __volatile__ ( \
   221 "        pushl %%ebx        \n" \
   222 "        xorl %%ecx,%%ecx   \n" \
   223 "        cpuid              \n" \
   224 "        movl %%ebx, %%esi  \n" \
   225 "        popl %%ebx         \n" : \
   226             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
   227 #elif defined(__GNUC__) && defined(__x86_64__)
   228 #define cpuid(func, a, b, c, d) \
   229     __asm__ __volatile__ ( \
   230 "        pushq %%rbx        \n" \
   231 "        xorq %%rcx,%%rcx   \n" \
   232 "        cpuid              \n" \
   233 "        movq %%rbx, %%rsi  \n" \
   234 "        popq %%rbx         \n" : \
   235             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
   236 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   237 #define cpuid(func, a, b, c, d) \
   238     __asm { \
   239         __asm mov eax, func \
   240         __asm xor ecx, ecx \
   241         __asm cpuid \
   242         __asm mov a, eax \
   243         __asm mov b, ebx \
   244         __asm mov c, ecx \
   245         __asm mov d, edx \
   246 }
   247 #elif defined(_MSC_VER) && defined(_M_X64)
   248 #define cpuid(func, a, b, c, d) \
   249 { \
   250     int CPUInfo[4]; \
   251     __cpuid(CPUInfo, func); \
   252     a = CPUInfo[0]; \
   253     b = CPUInfo[1]; \
   254     c = CPUInfo[2]; \
   255     d = CPUInfo[3]; \
   256 }
   257 #else
   258 #define cpuid(func, a, b, c, d) \
   259     do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
   260 #endif
   261 
   262 static int CPU_CPUIDFeatures[4];
   263 static int CPU_CPUIDMaxFunction = 0;
   264 static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
   265 static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
   266 
   267 static void
   268 CPU_calcCPUIDFeatures(void)
   269 {
   270     static SDL_bool checked = SDL_FALSE;
   271     if (!checked) {
   272         checked = SDL_TRUE;
   273         if (CPU_haveCPUID()) {
   274             int a, b, c, d;
   275             cpuid(0, a, b, c, d);
   276             CPU_CPUIDMaxFunction = a;
   277             if (CPU_CPUIDMaxFunction >= 1) {
   278                 cpuid(1, a, b, c, d);
   279                 CPU_CPUIDFeatures[0] = a;
   280                 CPU_CPUIDFeatures[1] = b;
   281                 CPU_CPUIDFeatures[2] = c;
   282                 CPU_CPUIDFeatures[3] = d;
   283 
   284                 /* Check to make sure we can call xgetbv */
   285                 if (c & 0x08000000) {
   286                     /* Call xgetbv to see if YMM (etc) register state is saved */
   287 #if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
   288                     __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
   289 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
   290                     a = (int)_xgetbv(0);
   291 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   292                     __asm
   293                     {
   294                         xor ecx, ecx
   295                         _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
   296                         mov a, eax
   297                     }
   298 #endif
   299                     CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
   300                     CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
   301                 }
   302             }
   303         }
   304     }
   305 }
   306 
   307 static int
   308 CPU_haveAltiVec(void)
   309 {
   310     volatile int altivec = 0;
   311 #ifndef SDL_CPUINFO_DISABLED
   312 #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__))
   313 #ifdef __OpenBSD__
   314     int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
   315 #else
   316     int selectors[2] = { CTL_HW, HW_VECTORUNIT };
   317 #endif
   318     int hasVectorUnit = 0;
   319     size_t length = sizeof(hasVectorUnit);
   320     int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
   321     if (0 == error)
   322         altivec = (hasVectorUnit != 0);
   323 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
   324     void (*handler) (int sig);
   325     handler = signal(SIGILL, illegal_instruction);
   326     if (setjmp(jmpbuf) == 0) {
   327         asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
   328         altivec = 1;
   329     }
   330     signal(SIGILL, handler);
   331 #endif
   332 #endif
   333     return altivec;
   334 }
   335 
   336 #if !defined(__ARM_ARCH)
   337 static SDL_bool CPU_haveARMSIMD(void) { return 0; }
   338 
   339 #elif defined(__linux__)
   340 #include <unistd.h>
   341 #include <sys/types.h>
   342 #include <sys/stat.h>
   343 #include <fcntl.h>
   344 #include <elf.h>
   345 
   346 static SDL_bool
   347 CPU_haveARMSIMD(void)
   348 {
   349     int arm_simd = 0;
   350     int fd;
   351 
   352     fd = open("/proc/self/auxv", O_RDONLY);
   353     if (fd >= 0)
   354     {
   355         Elf32_auxv_t aux;
   356         while (read(fd, &aux, sizeof aux) == sizeof aux)
   357         {
   358             if (aux.a_type == AT_PLATFORM)
   359             {
   360                 const char *plat = (const char *) aux.a_un.a_val;
   361                 arm_simd = strncmp(plat, "v6l", 3) == 0 ||
   362                            strncmp(plat, "v7l", 3) == 0;
   363             }
   364         }
   365         close(fd);
   366     }
   367     return arm_simd;
   368 }
   369 
   370 #else
   371 static SDL_bool
   372 CPU_haveARMSIMD(void)
   373 {
   374     #warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me.
   375     return 0;
   376 }
   377 #endif
   378 
   379 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
   380 static int
   381 readProcAuxvForNeon(void)
   382 {
   383     int neon = 0;
   384     int kv[2];
   385     const int fd = open("/proc/self/auxv", O_RDONLY);
   386     if (fd != -1) {
   387         while (read(fd, kv, sizeof (kv)) == sizeof (kv)) {
   388             if (kv[0] == AT_HWCAP) {
   389                 neon = ((kv[1] & HWCAP_NEON) == HWCAP_NEON);
   390                 break;
   391             }
   392         }
   393         close(fd);
   394     }
   395     return neon;
   396 }
   397 #endif
   398 
   399 
   400 static int
   401 CPU_haveNEON(void)
   402 {
   403 /* The way you detect NEON is a privileged instruction on ARM, so you have
   404    query the OS kernel in a platform-specific way. :/ */
   405 #if defined(SDL_CPUINFO_DISABLED)
   406    return 0; /* disabled */
   407 #elif (defined(__WINDOWS__) || defined(__WINRT__)) && (defined(_M_ARM) || defined(_M_ARM64))
   408 /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */
   409 /* Seems to have been removed */
   410 #  if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
   411 #    define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
   412 #  endif
   413 /* All WinRT ARM devices are required to support NEON, but just in case. */
   414     return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
   415 #elif !defined(__ARM_ARCH)
   416     return 0;  /* not an ARM CPU at all. */
   417 #elif __ARM_ARCH >= 8
   418     return 1;  /* ARMv8 always has non-optional NEON support. */
   419 #elif defined(__APPLE__) && (__ARM_ARCH >= 7)
   420     /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
   421     return 1;  /* all Apple ARMv7 chips and later have NEON. */
   422 #elif defined(__APPLE__)
   423     return 0;  /* assume anything else from Apple doesn't have NEON. */
   424 #elif defined(__QNXNTO__)
   425     return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
   426 #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
   427     return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
   428 #elif defined(__LINUX__)
   429     return readProcAuxvForNeon();
   430 #elif defined(__ANDROID__)
   431     /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
   432     {
   433         AndroidCpuFamily cpu_family = android_getCpuFamily();
   434         if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
   435             uint64_t cpu_features = android_getCpuFeatures();
   436             if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) {
   437                 return 1;
   438             }
   439         }
   440         return 0;
   441     }
   442 #else
   443 #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
   444     return 0;
   445 #endif
   446 }
   447 
   448 static int
   449 CPU_have3DNow(void)
   450 {
   451     if (CPU_CPUIDMaxFunction > 0) {  /* that is, do we have CPUID at all? */
   452         int a, b, c, d;
   453         cpuid(0x80000000, a, b, c, d);
   454         if (a >= 0x80000001) {
   455             cpuid(0x80000001, a, b, c, d);
   456             return (d & 0x80000000);
   457         }
   458     }
   459     return 0;
   460 }
   461 
   462 #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
   463 #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
   464 #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
   465 #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
   466 #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
   467 #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
   468 #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
   469 #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
   470 
   471 static int
   472 CPU_haveAVX2(void)
   473 {
   474     if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
   475         int a, b, c, d;
   476         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
   477         cpuid(7, a, b, c, d);
   478         return (b & 0x00000020);
   479     }
   480     return 0;
   481 }
   482 
   483 static int
   484 CPU_haveAVX512F(void)
   485 {
   486     if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
   487         int a, b, c, d;
   488         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
   489         cpuid(7, a, b, c, d);
   490         return (b & 0x00010000);
   491     }
   492     return 0;
   493 }
   494 
   495 static int SDL_CPUCount = 0;
   496 
   497 int
   498 SDL_GetCPUCount(void)
   499 {
   500     if (!SDL_CPUCount) {
   501 #ifndef SDL_CPUINFO_DISABLED
   502 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
   503         if (SDL_CPUCount <= 0) {
   504             SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
   505         }
   506 #endif
   507 #ifdef HAVE_SYSCTLBYNAME
   508         if (SDL_CPUCount <= 0) {
   509             size_t size = sizeof(SDL_CPUCount);
   510             sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
   511         }
   512 #endif
   513 #ifdef __WIN32__
   514         if (SDL_CPUCount <= 0) {
   515             SYSTEM_INFO info;
   516             GetSystemInfo(&info);
   517             SDL_CPUCount = info.dwNumberOfProcessors;
   518         }
   519 #endif
   520 #ifdef __OS2__
   521         if (SDL_CPUCount <= 0) {
   522             DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS,
   523                             &SDL_CPUCount, sizeof(SDL_CPUCount) );
   524         }
   525 #endif
   526 #endif
   527         /* There has to be at least 1, right? :) */
   528         if (SDL_CPUCount <= 0) {
   529             SDL_CPUCount = 1;
   530         }
   531     }
   532     return SDL_CPUCount;
   533 }
   534 
   535 /* Oh, such a sweet sweet trick, just not very useful. :) */
   536 static const char *
   537 SDL_GetCPUType(void)
   538 {
   539     static char SDL_CPUType[13];
   540 
   541     if (!SDL_CPUType[0]) {
   542         int i = 0;
   543 
   544         CPU_calcCPUIDFeatures();
   545         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
   546             int a, b, c, d;
   547             cpuid(0x00000000, a, b, c, d);
   548             (void) a;
   549             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   550             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   551             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   552             SDL_CPUType[i++] = (char)(b & 0xff);
   553 
   554             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   555             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   556             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   557             SDL_CPUType[i++] = (char)(d & 0xff);
   558 
   559             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   560             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   561             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   562             SDL_CPUType[i++] = (char)(c & 0xff);
   563         }
   564         if (!SDL_CPUType[0]) {
   565             SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
   566         }
   567     }
   568     return SDL_CPUType;
   569 }
   570 
   571 
   572 #ifdef TEST_MAIN  /* !!! FIXME: only used for test at the moment. */
   573 static const char *
   574 SDL_GetCPUName(void)
   575 {
   576     static char SDL_CPUName[48];
   577 
   578     if (!SDL_CPUName[0]) {
   579         int i = 0;
   580         int a, b, c, d;
   581 
   582         CPU_calcCPUIDFeatures();
   583         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
   584             cpuid(0x80000000, a, b, c, d);
   585             if (a >= 0x80000004) {
   586                 cpuid(0x80000002, a, b, c, d);
   587                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   588                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   589                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   590                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   591                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   592                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   593                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   594                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   595                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   596                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   597                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   598                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   599                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   600                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   601                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   602                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   603                 cpuid(0x80000003, a, b, c, d);
   604                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   605                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   606                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   607                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   608                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   609                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   610                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   611                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   612                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   613                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   614                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   615                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   616                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   617                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   618                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   619                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   620                 cpuid(0x80000004, a, b, c, d);
   621                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   622                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   623                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   624                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   625                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   626                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   627                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   628                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   629                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   630                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   631                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   632                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   633                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   634                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   635                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   636                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   637             }
   638         }
   639         if (!SDL_CPUName[0]) {
   640             SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
   641         }
   642     }
   643     return SDL_CPUName;
   644 }
   645 #endif
   646 
   647 int
   648 SDL_GetCPUCacheLineSize(void)
   649 {
   650     const char *cpuType = SDL_GetCPUType();
   651     int a, b, c, d;
   652     (void) a; (void) b; (void) c; (void) d;
   653     if (SDL_strcmp(cpuType, "GenuineIntel") == 0) {
   654         cpuid(0x00000001, a, b, c, d);
   655         return (((b >> 8) & 0xff) * 8);
   656     } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) {
   657         cpuid(0x80000005, a, b, c, d);
   658         return (c & 0xff);
   659     } else {
   660         /* Just make a guess here... */
   661         return SDL_CACHELINE_SIZE;
   662     }
   663 }
   664 
   665 static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
   666 static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
   667 
   668 static Uint32
   669 SDL_GetCPUFeatures(void)
   670 {
   671     if (SDL_CPUFeatures == 0xFFFFFFFF) {
   672         CPU_calcCPUIDFeatures();
   673         SDL_CPUFeatures = 0;
   674         SDL_SIMDAlignment = sizeof(void *);  /* a good safe base value */
   675         if (CPU_haveRDTSC()) {
   676             SDL_CPUFeatures |= CPU_HAS_RDTSC;
   677         }
   678         if (CPU_haveAltiVec()) {
   679             SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
   680             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   681         }
   682         if (CPU_haveMMX()) {
   683             SDL_CPUFeatures |= CPU_HAS_MMX;
   684             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
   685         }
   686         if (CPU_have3DNow()) {
   687             SDL_CPUFeatures |= CPU_HAS_3DNOW;
   688             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
   689         }
   690         if (CPU_haveSSE()) {
   691             SDL_CPUFeatures |= CPU_HAS_SSE;
   692             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   693         }
   694         if (CPU_haveSSE2()) {
   695             SDL_CPUFeatures |= CPU_HAS_SSE2;
   696             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   697         }
   698         if (CPU_haveSSE3()) {
   699             SDL_CPUFeatures |= CPU_HAS_SSE3;
   700             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   701         }
   702         if (CPU_haveSSE41()) {
   703             SDL_CPUFeatures |= CPU_HAS_SSE41;
   704             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   705         }
   706         if (CPU_haveSSE42()) {
   707             SDL_CPUFeatures |= CPU_HAS_SSE42;
   708             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   709         }
   710         if (CPU_haveAVX()) {
   711             SDL_CPUFeatures |= CPU_HAS_AVX;
   712             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
   713         }
   714         if (CPU_haveAVX2()) {
   715             SDL_CPUFeatures |= CPU_HAS_AVX2;
   716             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
   717         }
   718         if (CPU_haveAVX512F()) {
   719             SDL_CPUFeatures |= CPU_HAS_AVX512F;
   720             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
   721         }
   722         if (CPU_haveARMSIMD()) {
   723             SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
   724             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   725         }
   726         if (CPU_haveNEON()) {
   727             SDL_CPUFeatures |= CPU_HAS_NEON;
   728             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   729         }
   730     }
   731     return SDL_CPUFeatures;
   732 }
   733 
   734 #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
   735 
   736 SDL_bool SDL_HasRDTSC(void)
   737 {
   738     return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
   739 }
   740 
   741 SDL_bool
   742 SDL_HasAltiVec(void)
   743 {
   744     return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
   745 }
   746 
   747 SDL_bool
   748 SDL_HasMMX(void)
   749 {
   750     return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
   751 }
   752 
   753 SDL_bool
   754 SDL_Has3DNow(void)
   755 {
   756     return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
   757 }
   758 
   759 SDL_bool
   760 SDL_HasSSE(void)
   761 {
   762     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
   763 }
   764 
   765 SDL_bool
   766 SDL_HasSSE2(void)
   767 {
   768     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
   769 }
   770 
   771 SDL_bool
   772 SDL_HasSSE3(void)
   773 {
   774     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
   775 }
   776 
   777 SDL_bool
   778 SDL_HasSSE41(void)
   779 {
   780     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
   781 }
   782 
   783 SDL_bool
   784 SDL_HasSSE42(void)
   785 {
   786     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
   787 }
   788 
   789 SDL_bool
   790 SDL_HasAVX(void)
   791 {
   792     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
   793 }
   794 
   795 SDL_bool
   796 SDL_HasAVX2(void)
   797 {
   798     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
   799 }
   800 
   801 SDL_bool
   802 SDL_HasAVX512F(void)
   803 {
   804     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
   805 }
   806 
   807 SDL_bool
   808 SDL_HasARMSIMD(void)
   809 {
   810     return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD);
   811 }
   812 
   813 SDL_bool
   814 SDL_HasNEON(void)
   815 {
   816     return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
   817 }
   818 
   819 static int SDL_SystemRAM = 0;
   820 
   821 int
   822 SDL_GetSystemRAM(void)
   823 {
   824     if (!SDL_SystemRAM) {
   825 #ifndef SDL_CPUINFO_DISABLED
   826 #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
   827         if (SDL_SystemRAM <= 0) {
   828             SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
   829         }
   830 #endif
   831 #ifdef HAVE_SYSCTLBYNAME
   832         if (SDL_SystemRAM <= 0) {
   833 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__)
   834 #ifdef HW_REALMEM
   835             int mib[2] = {CTL_HW, HW_REALMEM};
   836 #else
   837             /* might only report up to 2 GiB */
   838             int mib[2] = {CTL_HW, HW_PHYSMEM};
   839 #endif /* HW_REALMEM */
   840 #else
   841             int mib[2] = {CTL_HW, HW_MEMSIZE};
   842 #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
   843             Uint64 memsize = 0;
   844             size_t len = sizeof(memsize);
   845             
   846             if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
   847                 SDL_SystemRAM = (int)(memsize / (1024*1024));
   848             }
   849         }
   850 #endif
   851 #ifdef __WIN32__
   852         if (SDL_SystemRAM <= 0) {
   853             MEMORYSTATUSEX stat;
   854             stat.dwLength = sizeof(stat);
   855             if (GlobalMemoryStatusEx(&stat)) {
   856                 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
   857             }
   858         }
   859 #endif
   860 #ifdef __OS2__
   861         if (SDL_SystemRAM <= 0) {
   862             Uint32 sysram = 0;
   863             DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4);
   864             SDL_SystemRAM = (int) (sysram / 0x100000U);
   865         }
   866 #endif
   867 #endif
   868     }
   869     return SDL_SystemRAM;
   870 }
   871 
   872 
   873 size_t
   874 SDL_SIMDGetAlignment(void)
   875 {
   876     if (SDL_SIMDAlignment == 0xFFFFFFFF) {
   877         SDL_GetCPUFeatures();  /* make sure this has been calculated */
   878     }
   879     SDL_assert(SDL_SIMDAlignment != 0);
   880     return SDL_SIMDAlignment;
   881 }
   882 
   883 void *
   884 SDL_SIMDAlloc(const size_t len)
   885 {
   886     const size_t alignment = SDL_SIMDGetAlignment();
   887     const size_t padding = alignment - (len % alignment);
   888     const size_t padded = (padding != alignment) ? (len + padding) : len;
   889     Uint8 *retval = NULL;
   890     Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
   891     if (ptr) {
   892         /* store the actual malloc pointer right before our aligned pointer. */
   893         retval = ptr + sizeof (void *);
   894         retval += alignment - (((size_t) retval) % alignment);
   895         *(((void **) retval) - 1) = ptr;
   896     }
   897     return retval;
   898 }
   899 
   900 void
   901 SDL_SIMDFree(void *ptr)
   902 {
   903     if (ptr) {
   904         void **realptr = (void **) ptr;
   905         realptr--;
   906         SDL_free(*(((void **) ptr) - 1));
   907     }
   908 }
   909 
   910 
   911 #ifdef TEST_MAIN
   912 
   913 #include <stdio.h>
   914 
   915 int
   916 main()
   917 {
   918     printf("CPU count: %d\n", SDL_GetCPUCount());
   919     printf("CPU type: %s\n", SDL_GetCPUType());
   920     printf("CPU name: %s\n", SDL_GetCPUName());
   921     printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
   922     printf("RDTSC: %d\n", SDL_HasRDTSC());
   923     printf("Altivec: %d\n", SDL_HasAltiVec());
   924     printf("MMX: %d\n", SDL_HasMMX());
   925     printf("3DNow: %d\n", SDL_Has3DNow());
   926     printf("SSE: %d\n", SDL_HasSSE());
   927     printf("SSE2: %d\n", SDL_HasSSE2());
   928     printf("SSE3: %d\n", SDL_HasSSE3());
   929     printf("SSE4.1: %d\n", SDL_HasSSE41());
   930     printf("SSE4.2: %d\n", SDL_HasSSE42());
   931     printf("AVX: %d\n", SDL_HasAVX());
   932     printf("AVX2: %d\n", SDL_HasAVX2());
   933     printf("AVX-512F: %d\n", SDL_HasAVX512F());
   934     printf("ARM SIMD: %d\n", SDL_HasARMSIMD());
   935     printf("NEON: %d\n", SDL_HasNEON());
   936     printf("RAM: %d MB\n", SDL_GetSystemRAM());
   937     return 0;
   938 }
   939 
   940 #endif /* TEST_MAIN */
   941 
   942 /* vi: set ts=4 sw=4 expandtab: */