src/cpuinfo/SDL_cpuinfo.c
author Ben Avison <bavison@riscosopen.org>
Thu, 24 Oct 2019 21:12:08 -0400
changeset 13173 de3493dacdaf
parent 13151 568f67c04841
child 13187 20031279f4d4
permissions -rw-r--r--
ARM: Create configure option --enable-arm-simd to govern assembly optimizations
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2019 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #ifdef TEST_MAIN
    22 #include "SDL_config.h"
    23 #else
    24 #include "../SDL_internal.h"
    25 #endif
    26 
    27 #if defined(__WIN32__) || defined(__WINRT__)
    28 #include "../core/windows/SDL_windows.h"
    29 #endif
    30 #if defined(__OS2__)
    31 #define INCL_DOS
    32 #include <os2.h>
    33 #ifndef QSV_NUMPROCESSORS
    34 #define QSV_NUMPROCESSORS 26
    35 #endif
    36 #endif
    37 
    38 /* CPU feature detection for SDL */
    39 
    40 #include "SDL_cpuinfo.h"
    41 #include "SDL_assert.h"
    42 
    43 #ifdef HAVE_SYSCONF
    44 #include <unistd.h>
    45 #endif
    46 #ifdef HAVE_SYSCTLBYNAME
    47 #include <sys/types.h>
    48 #include <sys/sysctl.h>
    49 #endif
    50 #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
    51 #include <sys/sysctl.h>         /* For AltiVec check */
    52 #elif defined(__OpenBSD__) && defined(__powerpc__)
    53 #include <sys/param.h>
    54 #include <sys/sysctl.h> /* For AltiVec check */
    55 #include <machine/cpu.h>
    56 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
    57 #include <signal.h>
    58 #include <setjmp.h>
    59 #endif
    60 
    61 #if defined(__QNXNTO__)
    62 #include <sys/syspage.h>
    63 #endif
    64 
    65 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH)
    66 /*#include <asm/hwcap.h>*/
    67 #ifndef AT_HWCAP
    68 #define AT_HWCAP 16
    69 #endif
    70 #ifndef HWCAP_NEON
    71 #define HWCAP_NEON (1 << 12)
    72 #endif
    73 #if defined HAVE_GETAUXVAL
    74 #include <sys/auxv.h>
    75 #else
    76 #include <fcntl.h>
    77 #endif
    78 #endif
    79 
    80 #if defined(__ANDROID__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
    81 #if __ARM_ARCH < 8
    82 #include <cpu-features.h>
    83 #endif
    84 #endif
    85 
    86 #define CPU_HAS_RDTSC   (1 << 0)
    87 #define CPU_HAS_ALTIVEC (1 << 1)
    88 #define CPU_HAS_MMX     (1 << 2)
    89 #define CPU_HAS_3DNOW   (1 << 3)
    90 #define CPU_HAS_SSE     (1 << 4)
    91 #define CPU_HAS_SSE2    (1 << 5)
    92 #define CPU_HAS_SSE3    (1 << 6)
    93 #define CPU_HAS_SSE41   (1 << 7)
    94 #define CPU_HAS_SSE42   (1 << 8)
    95 #define CPU_HAS_AVX     (1 << 9)
    96 #define CPU_HAS_AVX2    (1 << 10)
    97 #define CPU_HAS_NEON    (1 << 11)
    98 #define CPU_HAS_AVX512F (1 << 12)
    99 #define CPU_HAS_ARM_SIMD (1 << 13)
   100 
   101 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
   102 /* This is the brute force way of detecting instruction sets...
   103    the idea is borrowed from the libmpeg2 library - thanks!
   104  */
   105 static jmp_buf jmpbuf;
   106 static void
   107 illegal_instruction(int sig)
   108 {
   109     longjmp(jmpbuf, 1);
   110 }
   111 #endif /* HAVE_SETJMP */
   112 
   113 static int
   114 CPU_haveCPUID(void)
   115 {
   116     int has_CPUID = 0;
   117 
   118 /* *INDENT-OFF* */
   119 #ifndef SDL_CPUINFO_DISABLED
   120 #if defined(__GNUC__) && defined(i386)
   121     __asm__ (
   122 "        pushfl                      # Get original EFLAGS             \n"
   123 "        popl    %%eax                                                 \n"
   124 "        movl    %%eax,%%ecx                                           \n"
   125 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
   126 "        pushl   %%eax               # Save new EFLAGS value on stack  \n"
   127 "        popfl                       # Replace current EFLAGS value    \n"
   128 "        pushfl                      # Get new EFLAGS                  \n"
   129 "        popl    %%eax               # Store new EFLAGS in EAX         \n"
   130 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
   131 "        jz      1f                  # Processor=80486                 \n"
   132 "        movl    $1,%0               # We have CPUID support           \n"
   133 "1:                                                                    \n"
   134     : "=m" (has_CPUID)
   135     :
   136     : "%eax", "%ecx"
   137     );
   138 #elif defined(__GNUC__) && defined(__x86_64__)
   139 /* Technically, if this is being compiled under __x86_64__ then it has 
   140    CPUid by definition.  But it's nice to be able to prove it.  :)      */
   141     __asm__ (
   142 "        pushfq                      # Get original EFLAGS             \n"
   143 "        popq    %%rax                                                 \n"
   144 "        movq    %%rax,%%rcx                                           \n"
   145 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
   146 "        pushq   %%rax               # Save new EFLAGS value on stack  \n"
   147 "        popfq                       # Replace current EFLAGS value    \n"
   148 "        pushfq                      # Get new EFLAGS                  \n"
   149 "        popq    %%rax               # Store new EFLAGS in EAX         \n"
   150 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
   151 "        jz      1f                  # Processor=80486                 \n"
   152 "        movl    $1,%0               # We have CPUID support           \n"
   153 "1:                                                                    \n"
   154     : "=m" (has_CPUID)
   155     :
   156     : "%rax", "%rcx"
   157     );
   158 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   159     __asm {
   160         pushfd                      ; Get original EFLAGS
   161         pop     eax
   162         mov     ecx, eax
   163         xor     eax, 200000h        ; Flip ID bit in EFLAGS
   164         push    eax                 ; Save new EFLAGS value on stack
   165         popfd                       ; Replace current EFLAGS value
   166         pushfd                      ; Get new EFLAGS
   167         pop     eax                 ; Store new EFLAGS in EAX
   168         xor     eax, ecx            ; Can not toggle ID bit,
   169         jz      done                ; Processor=80486
   170         mov     has_CPUID,1         ; We have CPUID support
   171 done:
   172     }
   173 #elif defined(_MSC_VER) && defined(_M_X64)
   174     has_CPUID = 1;
   175 #elif defined(__sun) && defined(__i386)
   176     __asm (
   177 "       pushfl                 \n"
   178 "       popl    %eax           \n"
   179 "       movl    %eax,%ecx      \n"
   180 "       xorl    $0x200000,%eax \n"
   181 "       pushl   %eax           \n"
   182 "       popfl                  \n"
   183 "       pushfl                 \n"
   184 "       popl    %eax           \n"
   185 "       xorl    %ecx,%eax      \n"
   186 "       jz      1f             \n"
   187 "       movl    $1,-8(%ebp)    \n"
   188 "1:                            \n"
   189     );
   190 #elif defined(__sun) && defined(__amd64)
   191     __asm (
   192 "       pushfq                 \n"
   193 "       popq    %rax           \n"
   194 "       movq    %rax,%rcx      \n"
   195 "       xorl    $0x200000,%eax \n"
   196 "       pushq   %rax           \n"
   197 "       popfq                  \n"
   198 "       pushfq                 \n"
   199 "       popq    %rax           \n"
   200 "       xorl    %ecx,%eax      \n"
   201 "       jz      1f             \n"
   202 "       movl    $1,-8(%rbp)    \n"
   203 "1:                            \n"
   204     );
   205 #endif
   206 #endif
   207 /* *INDENT-ON* */
   208     return has_CPUID;
   209 }
   210 
   211 #if defined(__GNUC__) && defined(i386)
   212 #define cpuid(func, a, b, c, d) \
   213     __asm__ __volatile__ ( \
   214 "        pushl %%ebx        \n" \
   215 "        xorl %%ecx,%%ecx   \n" \
   216 "        cpuid              \n" \
   217 "        movl %%ebx, %%esi  \n" \
   218 "        popl %%ebx         \n" : \
   219             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
   220 #elif defined(__GNUC__) && defined(__x86_64__)
   221 #define cpuid(func, a, b, c, d) \
   222     __asm__ __volatile__ ( \
   223 "        pushq %%rbx        \n" \
   224 "        xorq %%rcx,%%rcx   \n" \
   225 "        cpuid              \n" \
   226 "        movq %%rbx, %%rsi  \n" \
   227 "        popq %%rbx         \n" : \
   228             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
   229 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   230 #define cpuid(func, a, b, c, d) \
   231     __asm { \
   232         __asm mov eax, func \
   233         __asm xor ecx, ecx \
   234         __asm cpuid \
   235         __asm mov a, eax \
   236         __asm mov b, ebx \
   237         __asm mov c, ecx \
   238         __asm mov d, edx \
   239 }
   240 #elif defined(_MSC_VER) && defined(_M_X64)
   241 #define cpuid(func, a, b, c, d) \
   242 { \
   243     int CPUInfo[4]; \
   244     __cpuid(CPUInfo, func); \
   245     a = CPUInfo[0]; \
   246     b = CPUInfo[1]; \
   247     c = CPUInfo[2]; \
   248     d = CPUInfo[3]; \
   249 }
   250 #else
   251 #define cpuid(func, a, b, c, d) \
   252     do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
   253 #endif
   254 
   255 static int CPU_CPUIDFeatures[4];
   256 static int CPU_CPUIDMaxFunction = 0;
   257 static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
   258 static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
   259 
   260 static void
   261 CPU_calcCPUIDFeatures(void)
   262 {
   263     static SDL_bool checked = SDL_FALSE;
   264     if (!checked) {
   265         checked = SDL_TRUE;
   266         if (CPU_haveCPUID()) {
   267             int a, b, c, d;
   268             cpuid(0, a, b, c, d);
   269             CPU_CPUIDMaxFunction = a;
   270             if (CPU_CPUIDMaxFunction >= 1) {
   271                 cpuid(1, a, b, c, d);
   272                 CPU_CPUIDFeatures[0] = a;
   273                 CPU_CPUIDFeatures[1] = b;
   274                 CPU_CPUIDFeatures[2] = c;
   275                 CPU_CPUIDFeatures[3] = d;
   276 
   277                 /* Check to make sure we can call xgetbv */
   278                 if (c & 0x08000000) {
   279                     /* Call xgetbv to see if YMM (etc) register state is saved */
   280 #if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
   281                     __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
   282 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
   283                     a = (int)_xgetbv(0);
   284 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   285                     __asm
   286                     {
   287                         xor ecx, ecx
   288                         _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
   289                         mov a, eax
   290                     }
   291 #endif
   292                     CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
   293                     CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
   294                 }
   295             }
   296         }
   297     }
   298 }
   299 
   300 static int
   301 CPU_haveAltiVec(void)
   302 {
   303     volatile int altivec = 0;
   304 #ifndef SDL_CPUINFO_DISABLED
   305 #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__))
   306 #ifdef __OpenBSD__
   307     int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
   308 #else
   309     int selectors[2] = { CTL_HW, HW_VECTORUNIT };
   310 #endif
   311     int hasVectorUnit = 0;
   312     size_t length = sizeof(hasVectorUnit);
   313     int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
   314     if (0 == error)
   315         altivec = (hasVectorUnit != 0);
   316 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
   317     void (*handler) (int sig);
   318     handler = signal(SIGILL, illegal_instruction);
   319     if (setjmp(jmpbuf) == 0) {
   320         asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
   321         altivec = 1;
   322     }
   323     signal(SIGILL, handler);
   324 #endif
   325 #endif
   326     return altivec;
   327 }
   328 
   329 #ifdef __linux__
   330 
   331 #include <unistd.h>
   332 #include <sys/types.h>
   333 #include <sys/stat.h>
   334 #include <fcntl.h>
   335 #include <elf.h>
   336 
   337 static SDL_bool
   338 CPU_haveARMSIMD(void)
   339 {
   340     int arm_simd = 0;
   341     int fd;
   342 
   343     fd = open("/proc/self/auxv", O_RDONLY);
   344     if (fd >= 0)
   345     {
   346         Elf32_auxv_t aux;
   347         while (read(fd, &aux, sizeof aux) == sizeof aux)
   348         {
   349             if (aux.a_type == AT_PLATFORM)
   350             {
   351                 const char *plat = (const char *) aux.a_un.a_val;
   352                 arm_simd = strncmp(plat, "v6l", 3) == 0 ||
   353                            strncmp(plat, "v7l", 3) == 0;
   354             }
   355         }
   356         close(fd);
   357     }
   358     return arm_simd;
   359 }
   360 
   361 #else
   362 
   363 static SDL_bool
   364 CPU_haveARMSIMD(void)
   365 {
   366 #warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me.
   367     return 0;
   368 }
   369 
   370 #endif
   371 
   372 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
   373 static int
   374 readProcAuxvForNeon(void)
   375 {
   376     int neon = 0;
   377     int kv[2];
   378     const int fd = open("/proc/self/auxv", O_RDONLY);
   379     if (fd != -1) {
   380         while (read(fd, kv, sizeof (kv)) == sizeof (kv)) {
   381             if (kv[0] == AT_HWCAP) {
   382                 neon = ((kv[1] & HWCAP_NEON) == HWCAP_NEON);
   383                 break;
   384             }
   385         }
   386         close(fd);
   387     }
   388     return neon;
   389 }
   390 #endif
   391 
   392 
   393 static int
   394 CPU_haveNEON(void)
   395 {
   396 /* The way you detect NEON is a privileged instruction on ARM, so you have
   397    query the OS kernel in a platform-specific way. :/ */
   398 #if defined(SDL_CPUINFO_DISABLED)
   399    return 0; /* disabled */
   400 #elif (defined(__WINDOWS__) || defined(__WINRT__)) && (defined(_M_ARM) || defined(_M_ARM64))
   401 /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */
   402 /* Seems to have been removed */
   403 #  if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
   404 #    define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
   405 #  endif
   406 /* All WinRT ARM devices are required to support NEON, but just in case. */
   407     return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
   408 #elif !defined(__ARM_ARCH)
   409     return 0;  /* not an ARM CPU at all. */
   410 #elif __ARM_ARCH >= 8
   411     return 1;  /* ARMv8 always has non-optional NEON support. */
   412 #elif defined(__APPLE__) && (__ARM_ARCH >= 7)
   413     /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
   414     return 1;  /* all Apple ARMv7 chips and later have NEON. */
   415 #elif defined(__APPLE__)
   416     return 0;  /* assume anything else from Apple doesn't have NEON. */
   417 #elif defined(__QNXNTO__)
   418     return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
   419 #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
   420     return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
   421 #elif defined(__LINUX__)
   422     return readProcAuxvForNeon();
   423 #elif defined(__ANDROID__)
   424     /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
   425     {
   426         AndroidCpuFamily cpu_family = android_getCpuFamily();
   427         if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
   428             uint64_t cpu_features = android_getCpuFeatures();
   429             if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) {
   430                 return 1;
   431             }
   432         }
   433         return 0;
   434     }
   435 #else
   436 #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
   437     return 0;
   438 #endif
   439 }
   440 
   441 static int
   442 CPU_have3DNow(void)
   443 {
   444     if (CPU_CPUIDMaxFunction > 0) {  /* that is, do we have CPUID at all? */
   445         int a, b, c, d;
   446         cpuid(0x80000000, a, b, c, d);
   447         if (a >= 0x80000001) {
   448             cpuid(0x80000001, a, b, c, d);
   449             return (d & 0x80000000);
   450         }
   451     }
   452     return 0;
   453 }
   454 
   455 #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
   456 #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
   457 #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
   458 #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
   459 #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
   460 #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
   461 #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
   462 #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
   463 
   464 static int
   465 CPU_haveAVX2(void)
   466 {
   467     if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
   468         int a, b, c, d;
   469         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
   470         cpuid(7, a, b, c, d);
   471         return (b & 0x00000020);
   472     }
   473     return 0;
   474 }
   475 
   476 static int
   477 CPU_haveAVX512F(void)
   478 {
   479     if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
   480         int a, b, c, d;
   481         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
   482         cpuid(7, a, b, c, d);
   483         return (b & 0x00010000);
   484     }
   485     return 0;
   486 }
   487 
   488 static int SDL_CPUCount = 0;
   489 
   490 int
   491 SDL_GetCPUCount(void)
   492 {
   493     if (!SDL_CPUCount) {
   494 #ifndef SDL_CPUINFO_DISABLED
   495 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
   496         if (SDL_CPUCount <= 0) {
   497             SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
   498         }
   499 #endif
   500 #ifdef HAVE_SYSCTLBYNAME
   501         if (SDL_CPUCount <= 0) {
   502             size_t size = sizeof(SDL_CPUCount);
   503             sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
   504         }
   505 #endif
   506 #ifdef __WIN32__
   507         if (SDL_CPUCount <= 0) {
   508             SYSTEM_INFO info;
   509             GetSystemInfo(&info);
   510             SDL_CPUCount = info.dwNumberOfProcessors;
   511         }
   512 #endif
   513 #ifdef __OS2__
   514         if (SDL_CPUCount <= 0) {
   515             DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS,
   516                             &SDL_CPUCount, sizeof(SDL_CPUCount) );
   517         }
   518 #endif
   519 #endif
   520         /* There has to be at least 1, right? :) */
   521         if (SDL_CPUCount <= 0) {
   522             SDL_CPUCount = 1;
   523         }
   524     }
   525     return SDL_CPUCount;
   526 }
   527 
   528 /* Oh, such a sweet sweet trick, just not very useful. :) */
   529 static const char *
   530 SDL_GetCPUType(void)
   531 {
   532     static char SDL_CPUType[13];
   533 
   534     if (!SDL_CPUType[0]) {
   535         int i = 0;
   536 
   537         CPU_calcCPUIDFeatures();
   538         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
   539             int a, b, c, d;
   540             cpuid(0x00000000, a, b, c, d);
   541             (void) a;
   542             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   543             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   544             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   545             SDL_CPUType[i++] = (char)(b & 0xff);
   546 
   547             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   548             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   549             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   550             SDL_CPUType[i++] = (char)(d & 0xff);
   551 
   552             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   553             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   554             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   555             SDL_CPUType[i++] = (char)(c & 0xff);
   556         }
   557         if (!SDL_CPUType[0]) {
   558             SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
   559         }
   560     }
   561     return SDL_CPUType;
   562 }
   563 
   564 
   565 #ifdef TEST_MAIN  /* !!! FIXME: only used for test at the moment. */
   566 static const char *
   567 SDL_GetCPUName(void)
   568 {
   569     static char SDL_CPUName[48];
   570 
   571     if (!SDL_CPUName[0]) {
   572         int i = 0;
   573         int a, b, c, d;
   574 
   575         CPU_calcCPUIDFeatures();
   576         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
   577             cpuid(0x80000000, a, b, c, d);
   578             if (a >= 0x80000004) {
   579                 cpuid(0x80000002, a, b, c, d);
   580                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   581                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   582                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   583                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   584                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   585                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   586                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   587                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   588                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   589                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   590                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   591                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   592                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   593                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   594                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   595                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   596                 cpuid(0x80000003, a, b, c, d);
   597                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   598                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   599                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   600                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   601                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   602                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   603                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   604                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   605                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   606                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   607                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   608                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   609                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   610                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   611                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   612                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   613                 cpuid(0x80000004, a, b, c, d);
   614                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   615                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   616                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   617                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   618                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   619                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   620                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   621                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   622                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   623                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   624                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   625                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   626                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   627                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   628                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   629                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   630             }
   631         }
   632         if (!SDL_CPUName[0]) {
   633             SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
   634         }
   635     }
   636     return SDL_CPUName;
   637 }
   638 #endif
   639 
   640 int
   641 SDL_GetCPUCacheLineSize(void)
   642 {
   643     const char *cpuType = SDL_GetCPUType();
   644     int a, b, c, d;
   645     (void) a; (void) b; (void) c; (void) d;
   646     if (SDL_strcmp(cpuType, "GenuineIntel") == 0) {
   647         cpuid(0x00000001, a, b, c, d);
   648         return (((b >> 8) & 0xff) * 8);
   649     } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) {
   650         cpuid(0x80000005, a, b, c, d);
   651         return (c & 0xff);
   652     } else {
   653         /* Just make a guess here... */
   654         return SDL_CACHELINE_SIZE;
   655     }
   656 }
   657 
   658 static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
   659 static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
   660 
   661 static Uint32
   662 SDL_GetCPUFeatures(void)
   663 {
   664     if (SDL_CPUFeatures == 0xFFFFFFFF) {
   665         CPU_calcCPUIDFeatures();
   666         SDL_CPUFeatures = 0;
   667         SDL_SIMDAlignment = sizeof(void *);  /* a good safe base value */
   668         if (CPU_haveRDTSC()) {
   669             SDL_CPUFeatures |= CPU_HAS_RDTSC;
   670         }
   671         if (CPU_haveAltiVec()) {
   672             SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
   673             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   674         }
   675         if (CPU_haveMMX()) {
   676             SDL_CPUFeatures |= CPU_HAS_MMX;
   677             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
   678         }
   679         if (CPU_have3DNow()) {
   680             SDL_CPUFeatures |= CPU_HAS_3DNOW;
   681             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
   682         }
   683         if (CPU_haveSSE()) {
   684             SDL_CPUFeatures |= CPU_HAS_SSE;
   685             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   686         }
   687         if (CPU_haveSSE2()) {
   688             SDL_CPUFeatures |= CPU_HAS_SSE2;
   689             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   690         }
   691         if (CPU_haveSSE3()) {
   692             SDL_CPUFeatures |= CPU_HAS_SSE3;
   693             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   694         }
   695         if (CPU_haveSSE41()) {
   696             SDL_CPUFeatures |= CPU_HAS_SSE41;
   697             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   698         }
   699         if (CPU_haveSSE42()) {
   700             SDL_CPUFeatures |= CPU_HAS_SSE42;
   701             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   702         }
   703         if (CPU_haveAVX()) {
   704             SDL_CPUFeatures |= CPU_HAS_AVX;
   705             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
   706         }
   707         if (CPU_haveAVX2()) {
   708             SDL_CPUFeatures |= CPU_HAS_AVX2;
   709             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
   710         }
   711         if (CPU_haveAVX512F()) {
   712             SDL_CPUFeatures |= CPU_HAS_AVX512F;
   713             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
   714         }
   715         if (CPU_haveARMSIMD()) {
   716             SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
   717             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   718         }
   719         if (CPU_haveNEON()) {
   720             SDL_CPUFeatures |= CPU_HAS_NEON;
   721             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   722         }
   723     }
   724     return SDL_CPUFeatures;
   725 }
   726 
   727 #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
   728 
   729 SDL_bool SDL_HasRDTSC(void)
   730 {
   731     return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
   732 }
   733 
   734 SDL_bool
   735 SDL_HasAltiVec(void)
   736 {
   737     return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
   738 }
   739 
   740 SDL_bool
   741 SDL_HasMMX(void)
   742 {
   743     return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
   744 }
   745 
   746 SDL_bool
   747 SDL_Has3DNow(void)
   748 {
   749     return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
   750 }
   751 
   752 SDL_bool
   753 SDL_HasSSE(void)
   754 {
   755     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
   756 }
   757 
   758 SDL_bool
   759 SDL_HasSSE2(void)
   760 {
   761     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
   762 }
   763 
   764 SDL_bool
   765 SDL_HasSSE3(void)
   766 {
   767     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
   768 }
   769 
   770 SDL_bool
   771 SDL_HasSSE41(void)
   772 {
   773     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
   774 }
   775 
   776 SDL_bool
   777 SDL_HasSSE42(void)
   778 {
   779     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
   780 }
   781 
   782 SDL_bool
   783 SDL_HasAVX(void)
   784 {
   785     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
   786 }
   787 
   788 SDL_bool
   789 SDL_HasAVX2(void)
   790 {
   791     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
   792 }
   793 
   794 SDL_bool
   795 SDL_HasAVX512F(void)
   796 {
   797     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
   798 }
   799 
   800 SDL_bool
   801 SDL_HasARMSIMD(void)
   802 {
   803     return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD);
   804 }
   805 
   806 SDL_bool
   807 SDL_HasNEON(void)
   808 {
   809     return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
   810 }
   811 
   812 static int SDL_SystemRAM = 0;
   813 
   814 int
   815 SDL_GetSystemRAM(void)
   816 {
   817     if (!SDL_SystemRAM) {
   818 #ifndef SDL_CPUINFO_DISABLED
   819 #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
   820         if (SDL_SystemRAM <= 0) {
   821             SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
   822         }
   823 #endif
   824 #ifdef HAVE_SYSCTLBYNAME
   825         if (SDL_SystemRAM <= 0) {
   826 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__)
   827 #ifdef HW_REALMEM
   828             int mib[2] = {CTL_HW, HW_REALMEM};
   829 #else
   830             /* might only report up to 2 GiB */
   831             int mib[2] = {CTL_HW, HW_PHYSMEM};
   832 #endif /* HW_REALMEM */
   833 #else
   834             int mib[2] = {CTL_HW, HW_MEMSIZE};
   835 #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
   836             Uint64 memsize = 0;
   837             size_t len = sizeof(memsize);
   838             
   839             if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
   840                 SDL_SystemRAM = (int)(memsize / (1024*1024));
   841             }
   842         }
   843 #endif
   844 #ifdef __WIN32__
   845         if (SDL_SystemRAM <= 0) {
   846             MEMORYSTATUSEX stat;
   847             stat.dwLength = sizeof(stat);
   848             if (GlobalMemoryStatusEx(&stat)) {
   849                 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
   850             }
   851         }
   852 #endif
   853 #ifdef __OS2__
   854         if (SDL_SystemRAM <= 0) {
   855             Uint32 sysram = 0;
   856             DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4);
   857             SDL_SystemRAM = (int) (sysram / 0x100000U);
   858         }
   859 #endif
   860 #endif
   861     }
   862     return SDL_SystemRAM;
   863 }
   864 
   865 
   866 size_t
   867 SDL_SIMDGetAlignment(void)
   868 {
   869     if (SDL_SIMDAlignment == 0xFFFFFFFF) {
   870         SDL_GetCPUFeatures();  /* make sure this has been calculated */
   871     }
   872     SDL_assert(SDL_SIMDAlignment != 0);
   873     return SDL_SIMDAlignment;
   874 }
   875 
   876 void *
   877 SDL_SIMDAlloc(const size_t len)
   878 {
   879     const size_t alignment = SDL_SIMDGetAlignment();
   880     const size_t padding = alignment - (len % alignment);
   881     const size_t padded = (padding != alignment) ? (len + padding) : len;
   882     Uint8 *retval = NULL;
   883     Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
   884     if (ptr) {
   885         /* store the actual malloc pointer right before our aligned pointer. */
   886         retval = ptr + sizeof (void *);
   887         retval += alignment - (((size_t) retval) % alignment);
   888         *(((void **) retval) - 1) = ptr;
   889     }
   890     return retval;
   891 }
   892 
   893 void
   894 SDL_SIMDFree(void *ptr)
   895 {
   896     if (ptr) {
   897         void **realptr = (void **) ptr;
   898         realptr--;
   899         SDL_free(*(((void **) ptr) - 1));
   900     }
   901 }
   902 
   903 
   904 #ifdef TEST_MAIN
   905 
   906 #include <stdio.h>
   907 
   908 int
   909 main()
   910 {
   911     printf("CPU count: %d\n", SDL_GetCPUCount());
   912     printf("CPU type: %s\n", SDL_GetCPUType());
   913     printf("CPU name: %s\n", SDL_GetCPUName());
   914     printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
   915     printf("RDTSC: %d\n", SDL_HasRDTSC());
   916     printf("Altivec: %d\n", SDL_HasAltiVec());
   917     printf("MMX: %d\n", SDL_HasMMX());
   918     printf("3DNow: %d\n", SDL_Has3DNow());
   919     printf("SSE: %d\n", SDL_HasSSE());
   920     printf("SSE2: %d\n", SDL_HasSSE2());
   921     printf("SSE3: %d\n", SDL_HasSSE3());
   922     printf("SSE4.1: %d\n", SDL_HasSSE41());
   923     printf("SSE4.2: %d\n", SDL_HasSSE42());
   924     printf("AVX: %d\n", SDL_HasAVX());
   925     printf("AVX2: %d\n", SDL_HasAVX2());
   926     printf("AVX-512F: %d\n", SDL_HasAVX512F());
   927     printf("ARM SIMD: %d\n", SDL_HasARMSIMD());
   928     printf("NEON: %d\n", SDL_HasNEON());
   929     printf("RAM: %d MB\n", SDL_GetSystemRAM());
   930     return 0;
   931 }
   932 
   933 #endif /* TEST_MAIN */
   934 
   935 /* vi: set ts=4 sw=4 expandtab: */