src/cpuinfo/SDL_cpuinfo.c
author Ryan C. Gordon <icculus@icculus.org>
Sat, 01 Dec 2018 12:19:11 -0500
changeset 12439 3a0793e5e134
parent 12242 df7260f149f2
child 12450 1055156e99f3
permissions -rw-r--r--
android: use cpufeatures to support SDL_HasNEON() (thanks, Sylvain!).

Fixes Bugzilla #4406.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #ifdef TEST_MAIN
    22 #include "SDL_config.h"
    23 #else
    24 #include "../SDL_internal.h"
    25 #include "SDL_simd.h"
    26 #endif
    27 
    28 #if defined(__WIN32__)
    29 #include "../core/windows/SDL_windows.h"
    30 #endif
    31 #if defined(__OS2__)
    32 #define INCL_DOS
    33 #include <os2.h>
    34 #ifndef QSV_NUMPROCESSORS
    35 #define QSV_NUMPROCESSORS 26
    36 #endif
    37 #endif
    38 
    39 /* CPU feature detection for SDL */
    40 
    41 #include "SDL_cpuinfo.h"
    42 #include "SDL_assert.h"
    43 
    44 #ifdef HAVE_SYSCONF
    45 #include <unistd.h>
    46 #endif
    47 #ifdef HAVE_SYSCTLBYNAME
    48 #include <sys/types.h>
    49 #include <sys/sysctl.h>
    50 #endif
    51 #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
    52 #include <sys/sysctl.h>         /* For AltiVec check */
    53 #elif defined(__OpenBSD__) && defined(__powerpc__)
    54 #include <sys/param.h>
    55 #include <sys/sysctl.h> /* For AltiVec check */
    56 #include <machine/cpu.h>
    57 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
    58 #include <signal.h>
    59 #include <setjmp.h>
    60 #endif
    61 
    62 #if defined(__QNXNTO__)
    63 #include <sys/syspage.h>
    64 #endif
    65 
    66 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH)
    67 /*#include <asm/hwcap.h>*/
    68 #ifndef AT_HWCAP
    69 #define AT_HWCAP 16
    70 #endif
    71 #ifndef HWCAP_NEON
    72 #define HWCAP_NEON (1 << 12)
    73 #endif
    74 #if defined HAVE_GETAUXVAL
    75 #include <sys/auxv.h>
    76 #else
    77 #include <fcntl.h>
    78 #endif
    79 #endif
    80 
    81 #if defined(__ANDROID__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
    82 #if __ARM_ARCH < 8
    83 #include <cpu-features.h>
    84 #endif
    85 #endif
    86 
    87 #define CPU_HAS_RDTSC   (1 << 0)
    88 #define CPU_HAS_ALTIVEC (1 << 1)
    89 #define CPU_HAS_MMX     (1 << 2)
    90 #define CPU_HAS_3DNOW   (1 << 3)
    91 #define CPU_HAS_SSE     (1 << 4)
    92 #define CPU_HAS_SSE2    (1 << 5)
    93 #define CPU_HAS_SSE3    (1 << 6)
    94 #define CPU_HAS_SSE41   (1 << 7)
    95 #define CPU_HAS_SSE42   (1 << 8)
    96 #define CPU_HAS_AVX     (1 << 9)
    97 #define CPU_HAS_AVX2    (1 << 10)
    98 #define CPU_HAS_NEON    (1 << 11)
    99 #define CPU_HAS_AVX512F (1 << 12)
   100 
   101 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
   102 /* This is the brute force way of detecting instruction sets...
   103    the idea is borrowed from the libmpeg2 library - thanks!
   104  */
   105 static jmp_buf jmpbuf;
   106 static void
   107 illegal_instruction(int sig)
   108 {
   109     longjmp(jmpbuf, 1);
   110 }
   111 #endif /* HAVE_SETJMP */
   112 
   113 static int
   114 CPU_haveCPUID(void)
   115 {
   116     int has_CPUID = 0;
   117 
   118 /* *INDENT-OFF* */
   119 #ifndef SDL_CPUINFO_DISABLED
   120 #if defined(__GNUC__) && defined(i386)
   121     __asm__ (
   122 "        pushfl                      # Get original EFLAGS             \n"
   123 "        popl    %%eax                                                 \n"
   124 "        movl    %%eax,%%ecx                                           \n"
   125 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
   126 "        pushl   %%eax               # Save new EFLAGS value on stack  \n"
   127 "        popfl                       # Replace current EFLAGS value    \n"
   128 "        pushfl                      # Get new EFLAGS                  \n"
   129 "        popl    %%eax               # Store new EFLAGS in EAX         \n"
   130 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
   131 "        jz      1f                  # Processor=80486                 \n"
   132 "        movl    $1,%0               # We have CPUID support           \n"
   133 "1:                                                                    \n"
   134     : "=m" (has_CPUID)
   135     :
   136     : "%eax", "%ecx"
   137     );
   138 #elif defined(__GNUC__) && defined(__x86_64__)
   139 /* Technically, if this is being compiled under __x86_64__ then it has 
   140    CPUid by definition.  But it's nice to be able to prove it.  :)      */
   141     __asm__ (
   142 "        pushfq                      # Get original EFLAGS             \n"
   143 "        popq    %%rax                                                 \n"
   144 "        movq    %%rax,%%rcx                                           \n"
   145 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
   146 "        pushq   %%rax               # Save new EFLAGS value on stack  \n"
   147 "        popfq                       # Replace current EFLAGS value    \n"
   148 "        pushfq                      # Get new EFLAGS                  \n"
   149 "        popq    %%rax               # Store new EFLAGS in EAX         \n"
   150 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
   151 "        jz      1f                  # Processor=80486                 \n"
   152 "        movl    $1,%0               # We have CPUID support           \n"
   153 "1:                                                                    \n"
   154     : "=m" (has_CPUID)
   155     :
   156     : "%rax", "%rcx"
   157     );
   158 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   159     __asm {
   160         pushfd                      ; Get original EFLAGS
   161         pop     eax
   162         mov     ecx, eax
   163         xor     eax, 200000h        ; Flip ID bit in EFLAGS
   164         push    eax                 ; Save new EFLAGS value on stack
   165         popfd                       ; Replace current EFLAGS value
   166         pushfd                      ; Get new EFLAGS
   167         pop     eax                 ; Store new EFLAGS in EAX
   168         xor     eax, ecx            ; Can not toggle ID bit,
   169         jz      done                ; Processor=80486
   170         mov     has_CPUID,1         ; We have CPUID support
   171 done:
   172     }
   173 #elif defined(_MSC_VER) && defined(_M_X64)
   174     has_CPUID = 1;
   175 #elif defined(__sun) && defined(__i386)
   176     __asm (
   177 "       pushfl                 \n"
   178 "       popl    %eax           \n"
   179 "       movl    %eax,%ecx      \n"
   180 "       xorl    $0x200000,%eax \n"
   181 "       pushl   %eax           \n"
   182 "       popfl                  \n"
   183 "       pushfl                 \n"
   184 "       popl    %eax           \n"
   185 "       xorl    %ecx,%eax      \n"
   186 "       jz      1f             \n"
   187 "       movl    $1,-8(%ebp)    \n"
   188 "1:                            \n"
   189     );
   190 #elif defined(__sun) && defined(__amd64)
   191     __asm (
   192 "       pushfq                 \n"
   193 "       popq    %rax           \n"
   194 "       movq    %rax,%rcx      \n"
   195 "       xorl    $0x200000,%eax \n"
   196 "       pushq   %rax           \n"
   197 "       popfq                  \n"
   198 "       pushfq                 \n"
   199 "       popq    %rax           \n"
   200 "       xorl    %ecx,%eax      \n"
   201 "       jz      1f             \n"
   202 "       movl    $1,-8(%rbp)    \n"
   203 "1:                            \n"
   204     );
   205 #endif
   206 #endif
   207 /* *INDENT-ON* */
   208     return has_CPUID;
   209 }
   210 
   211 #if defined(__GNUC__) && defined(i386)
   212 #define cpuid(func, a, b, c, d) \
   213     __asm__ __volatile__ ( \
   214 "        pushl %%ebx        \n" \
   215 "        xorl %%ecx,%%ecx   \n" \
   216 "        cpuid              \n" \
   217 "        movl %%ebx, %%esi  \n" \
   218 "        popl %%ebx         \n" : \
   219             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
   220 #elif defined(__GNUC__) && defined(__x86_64__)
   221 #define cpuid(func, a, b, c, d) \
   222     __asm__ __volatile__ ( \
   223 "        pushq %%rbx        \n" \
   224 "        xorq %%rcx,%%rcx   \n" \
   225 "        cpuid              \n" \
   226 "        movq %%rbx, %%rsi  \n" \
   227 "        popq %%rbx         \n" : \
   228             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
   229 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   230 #define cpuid(func, a, b, c, d) \
   231     __asm { \
   232         __asm mov eax, func \
   233         __asm xor ecx, ecx \
   234         __asm cpuid \
   235         __asm mov a, eax \
   236         __asm mov b, ebx \
   237         __asm mov c, ecx \
   238         __asm mov d, edx \
   239 }
   240 #elif defined(_MSC_VER) && defined(_M_X64)
   241 #define cpuid(func, a, b, c, d) \
   242 { \
   243     int CPUInfo[4]; \
   244     __cpuid(CPUInfo, func); \
   245     a = CPUInfo[0]; \
   246     b = CPUInfo[1]; \
   247     c = CPUInfo[2]; \
   248     d = CPUInfo[3]; \
   249 }
   250 #else
   251 #define cpuid(func, a, b, c, d) \
   252     do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
   253 #endif
   254 
   255 static int CPU_CPUIDFeatures[4];
   256 static int CPU_CPUIDMaxFunction = 0;
   257 static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
   258 static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
   259 
   260 static void
   261 CPU_calcCPUIDFeatures(void)
   262 {
   263     static SDL_bool checked = SDL_FALSE;
   264     if (!checked) {
   265         checked = SDL_TRUE;
   266         if (CPU_haveCPUID()) {
   267             int a, b, c, d;
   268             cpuid(0, a, b, c, d);
   269             CPU_CPUIDMaxFunction = a;
   270             if (CPU_CPUIDMaxFunction >= 1) {
   271                 cpuid(1, a, b, c, d);
   272                 CPU_CPUIDFeatures[0] = a;
   273                 CPU_CPUIDFeatures[1] = b;
   274                 CPU_CPUIDFeatures[2] = c;
   275                 CPU_CPUIDFeatures[3] = d;
   276 
   277                 /* Check to make sure we can call xgetbv */
   278                 if (c & 0x08000000) {
   279                     /* Call xgetbv to see if YMM (etc) register state is saved */
   280 #if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
   281                     __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
   282 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
   283                     a = (int)_xgetbv(0);
   284 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   285                     __asm
   286                     {
   287                         xor ecx, ecx
   288                         _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
   289                         mov a, eax
   290                     }
   291 #endif
   292                     CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
   293                     CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
   294                 }
   295             }
   296         }
   297     }
   298 }
   299 
   300 static int
   301 CPU_haveAltiVec(void)
   302 {
   303     volatile int altivec = 0;
   304 #ifndef SDL_CPUINFO_DISABLED
   305 #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__))
   306 #ifdef __OpenBSD__
   307     int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
   308 #else
   309     int selectors[2] = { CTL_HW, HW_VECTORUNIT };
   310 #endif
   311     int hasVectorUnit = 0;
   312     size_t length = sizeof(hasVectorUnit);
   313     int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
   314     if (0 == error)
   315         altivec = (hasVectorUnit != 0);
   316 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
   317     void (*handler) (int sig);
   318     handler = signal(SIGILL, illegal_instruction);
   319     if (setjmp(jmpbuf) == 0) {
   320         asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
   321         altivec = 1;
   322     }
   323     signal(SIGILL, handler);
   324 #endif
   325 #endif
   326     return altivec;
   327 }
   328 
   329 #if defined(__LINUX__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
   330 static int
   331 readProcAuxvForNeon(void)
   332 {
   333     int neon = 0;
   334     int kv[2];
   335     const int fd = open("/proc/self/auxv", O_RDONLY);
   336     if (fd != -1) {
   337         while (read(fd, kv, sizeof (kv)) == sizeof (kv)) {
   338             if (kv[0] == AT_HWCAP) {
   339                 neon = ((kv[1] & HWCAP_NEON) == HWCAP_NEON);
   340                 break;
   341             }
   342         }
   343         close(fd);
   344     }
   345     return neon;
   346 }
   347 #endif
   348 
   349 
   350 static int
   351 CPU_haveNEON(void)
   352 {
   353 /* The way you detect NEON is a privileged instruction on ARM, so you have
   354    query the OS kernel in a platform-specific way. :/ */
   355 #if defined(SDL_CPUINFO_DISABLED) || !defined(__ARM_ARCH)
   356     return 0;  /* disabled or not an ARM CPU at all. */
   357 #elif __ARM_ARCH >= 8
   358     return 1;  /* ARMv8 always has non-optional NEON support. */
   359 #elif defined(__APPLE__) && (__ARM_ARCH >= 7)
   360     /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
   361     return 1;  /* all Apple ARMv7 chips and later have NEON. */
   362 #elif defined(__APPLE__)
   363     return 0;  /* assume anything else from Apple doesn't have NEON. */
   364 #elif defined(__QNXNTO__)
   365     return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
   366 #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
   367     return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
   368 #elif defined(__LINUX__)
   369     return readProcAuxvForNeon();
   370 #elif defined(__ANDROID__)
   371     /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
   372     {
   373         AndroidCpuFamily cpu_family = android_getCpuFamily();
   374         if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
   375             uint64_t cpu_features = android_getCpuFeatures();
   376             if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) {
   377                 return 1;
   378             }
   379         }
   380         return 0;
   381     }
   382 #elif (defined(__WINDOWS__) || defined(__WINRT__)) && defined(_M_ARM)
   383     /* All WinRT ARM devices are required to support NEON, but just in case. */
   384     return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
   385 #else
   386 #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
   387     return 0;
   388 #endif
   389 }
   390 
   391 static int
   392 CPU_have3DNow(void)
   393 {
   394     if (CPU_CPUIDMaxFunction > 0) {  /* that is, do we have CPUID at all? */
   395         int a, b, c, d;
   396         cpuid(0x80000000, a, b, c, d);
   397         if (a >= 0x80000001) {
   398             cpuid(0x80000001, a, b, c, d);
   399             return (d & 0x80000000);
   400         }
   401     }
   402     return 0;
   403 }
   404 
   405 #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
   406 #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
   407 #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
   408 #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
   409 #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
   410 #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
   411 #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
   412 #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
   413 
   414 static int
   415 CPU_haveAVX2(void)
   416 {
   417     if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
   418         int a, b, c, d;
   419         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
   420         cpuid(7, a, b, c, d);
   421         return (b & 0x00000020);
   422     }
   423     return 0;
   424 }
   425 
   426 static int
   427 CPU_haveAVX512F(void)
   428 {
   429     if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
   430         int a, b, c, d;
   431         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
   432         cpuid(7, a, b, c, d);
   433         return (b & 0x00010000);
   434     }
   435     return 0;
   436 }
   437 
   438 static int SDL_CPUCount = 0;
   439 
   440 int
   441 SDL_GetCPUCount(void)
   442 {
   443     if (!SDL_CPUCount) {
   444 #ifndef SDL_CPUINFO_DISABLED
   445 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
   446         if (SDL_CPUCount <= 0) {
   447             SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
   448         }
   449 #endif
   450 #ifdef HAVE_SYSCTLBYNAME
   451         if (SDL_CPUCount <= 0) {
   452             size_t size = sizeof(SDL_CPUCount);
   453             sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
   454         }
   455 #endif
   456 #ifdef __WIN32__
   457         if (SDL_CPUCount <= 0) {
   458             SYSTEM_INFO info;
   459             GetSystemInfo(&info);
   460             SDL_CPUCount = info.dwNumberOfProcessors;
   461         }
   462 #endif
   463 #ifdef __OS2__
   464         if (SDL_CPUCount <= 0) {
   465             DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS,
   466                             &SDL_CPUCount, sizeof(SDL_CPUCount) );
   467         }
   468 #endif
   469 #endif
   470         /* There has to be at least 1, right? :) */
   471         if (SDL_CPUCount <= 0) {
   472             SDL_CPUCount = 1;
   473         }
   474     }
   475     return SDL_CPUCount;
   476 }
   477 
   478 /* Oh, such a sweet sweet trick, just not very useful. :) */
   479 static const char *
   480 SDL_GetCPUType(void)
   481 {
   482     static char SDL_CPUType[13];
   483 
   484     if (!SDL_CPUType[0]) {
   485         int i = 0;
   486 
   487         CPU_calcCPUIDFeatures();
   488         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
   489             int a, b, c, d;
   490             cpuid(0x00000000, a, b, c, d);
   491             (void) a;
   492             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   493             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   494             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   495             SDL_CPUType[i++] = (char)(b & 0xff);
   496 
   497             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   498             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   499             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   500             SDL_CPUType[i++] = (char)(d & 0xff);
   501 
   502             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   503             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   504             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   505             SDL_CPUType[i++] = (char)(c & 0xff);
   506         }
   507         if (!SDL_CPUType[0]) {
   508             SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
   509         }
   510     }
   511     return SDL_CPUType;
   512 }
   513 
   514 
   515 #ifdef TEST_MAIN  /* !!! FIXME: only used for test at the moment. */
   516 static const char *
   517 SDL_GetCPUName(void)
   518 {
   519     static char SDL_CPUName[48];
   520 
   521     if (!SDL_CPUName[0]) {
   522         int i = 0;
   523         int a, b, c, d;
   524 
   525         CPU_calcCPUIDFeatures();
   526         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
   527             cpuid(0x80000000, a, b, c, d);
   528             if (a >= 0x80000004) {
   529                 cpuid(0x80000002, a, b, c, d);
   530                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   531                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   532                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   533                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   534                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   535                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   536                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   537                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   538                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   539                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   540                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   541                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   542                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   543                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   544                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   545                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   546                 cpuid(0x80000003, a, b, c, d);
   547                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   548                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   549                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   550                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   551                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   552                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   553                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   554                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   555                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   556                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   557                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   558                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   559                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   560                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   561                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   562                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   563                 cpuid(0x80000004, a, b, c, d);
   564                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   565                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   566                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   567                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   568                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   569                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   570                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   571                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   572                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   573                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   574                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   575                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   576                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   577                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   578                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   579                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   580             }
   581         }
   582         if (!SDL_CPUName[0]) {
   583             SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
   584         }
   585     }
   586     return SDL_CPUName;
   587 }
   588 #endif
   589 
   590 int
   591 SDL_GetCPUCacheLineSize(void)
   592 {
   593     const char *cpuType = SDL_GetCPUType();
   594     int a, b, c, d;
   595     (void) a; (void) b; (void) c; (void) d;
   596     if (SDL_strcmp(cpuType, "GenuineIntel") == 0) {
   597         cpuid(0x00000001, a, b, c, d);
   598         return (((b >> 8) & 0xff) * 8);
   599     } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0) {
   600         cpuid(0x80000005, a, b, c, d);
   601         return (c & 0xff);
   602     } else {
   603         /* Just make a guess here... */
   604         return SDL_CACHELINE_SIZE;
   605     }
   606 }
   607 
   608 static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
   609 static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
   610 
   611 static Uint32
   612 SDL_GetCPUFeatures(void)
   613 {
   614     if (SDL_CPUFeatures == 0xFFFFFFFF) {
   615         CPU_calcCPUIDFeatures();
   616         SDL_CPUFeatures = 0;
   617         SDL_SIMDAlignment = 4;  /* a good safe base value */
   618         if (CPU_haveRDTSC()) {
   619             SDL_CPUFeatures |= CPU_HAS_RDTSC;
   620         }
   621         if (CPU_haveAltiVec()) {
   622             SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
   623             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   624         }
   625         if (CPU_haveMMX()) {
   626             SDL_CPUFeatures |= CPU_HAS_MMX;
   627             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
   628         }
   629         if (CPU_have3DNow()) {
   630             SDL_CPUFeatures |= CPU_HAS_3DNOW;
   631             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
   632         }
   633         if (CPU_haveSSE()) {
   634             SDL_CPUFeatures |= CPU_HAS_SSE;
   635             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   636         }
   637         if (CPU_haveSSE2()) {
   638             SDL_CPUFeatures |= CPU_HAS_SSE2;
   639             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   640         }
   641         if (CPU_haveSSE3()) {
   642             SDL_CPUFeatures |= CPU_HAS_SSE3;
   643             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   644         }
   645         if (CPU_haveSSE41()) {
   646             SDL_CPUFeatures |= CPU_HAS_SSE41;
   647             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   648         }
   649         if (CPU_haveSSE42()) {
   650             SDL_CPUFeatures |= CPU_HAS_SSE42;
   651             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   652         }
   653         if (CPU_haveAVX()) {
   654             SDL_CPUFeatures |= CPU_HAS_AVX;
   655             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
   656         }
   657         if (CPU_haveAVX2()) {
   658             SDL_CPUFeatures |= CPU_HAS_AVX2;
   659             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
   660         }
   661         if (CPU_haveAVX512F()) {
   662             SDL_CPUFeatures |= CPU_HAS_AVX512F;
   663             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
   664         }
   665         if (CPU_haveNEON()) {
   666             SDL_CPUFeatures |= CPU_HAS_NEON;
   667             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   668         }
   669     }
   670     return SDL_CPUFeatures;
   671 }
   672 
   673 #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
   674 
   675 SDL_bool SDL_HasRDTSC(void)
   676 {
   677     return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
   678 }
   679 
   680 SDL_bool
   681 SDL_HasAltiVec(void)
   682 {
   683     return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
   684 }
   685 
   686 SDL_bool
   687 SDL_HasMMX(void)
   688 {
   689     return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
   690 }
   691 
   692 SDL_bool
   693 SDL_Has3DNow(void)
   694 {
   695     return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
   696 }
   697 
   698 SDL_bool
   699 SDL_HasSSE(void)
   700 {
   701     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
   702 }
   703 
   704 SDL_bool
   705 SDL_HasSSE2(void)
   706 {
   707     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
   708 }
   709 
   710 SDL_bool
   711 SDL_HasSSE3(void)
   712 {
   713     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
   714 }
   715 
   716 SDL_bool
   717 SDL_HasSSE41(void)
   718 {
   719     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
   720 }
   721 
   722 SDL_bool
   723 SDL_HasSSE42(void)
   724 {
   725     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
   726 }
   727 
   728 SDL_bool
   729 SDL_HasAVX(void)
   730 {
   731     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
   732 }
   733 
   734 SDL_bool
   735 SDL_HasAVX2(void)
   736 {
   737     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
   738 }
   739 
   740 SDL_bool
   741 SDL_HasAVX512F(void)
   742 {
   743     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
   744 }
   745 
   746 SDL_bool
   747 SDL_HasNEON(void)
   748 {
   749     return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
   750 }
   751 
   752 static int SDL_SystemRAM = 0;
   753 
   754 int
   755 SDL_GetSystemRAM(void)
   756 {
   757     if (!SDL_SystemRAM) {
   758 #ifndef SDL_CPUINFO_DISABLED
   759 #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
   760         if (SDL_SystemRAM <= 0) {
   761             SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
   762         }
   763 #endif
   764 #ifdef HAVE_SYSCTLBYNAME
   765         if (SDL_SystemRAM <= 0) {
   766 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__)
   767 #ifdef HW_REALMEM
   768             int mib[2] = {CTL_HW, HW_REALMEM};
   769 #else
   770             /* might only report up to 2 GiB */
   771             int mib[2] = {CTL_HW, HW_PHYSMEM};
   772 #endif /* HW_REALMEM */
   773 #else
   774             int mib[2] = {CTL_HW, HW_MEMSIZE};
   775 #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
   776             Uint64 memsize = 0;
   777             size_t len = sizeof(memsize);
   778             
   779             if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
   780                 SDL_SystemRAM = (int)(memsize / (1024*1024));
   781             }
   782         }
   783 #endif
   784 #ifdef __WIN32__
   785         if (SDL_SystemRAM <= 0) {
   786             MEMORYSTATUSEX stat;
   787             stat.dwLength = sizeof(stat);
   788             if (GlobalMemoryStatusEx(&stat)) {
   789                 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
   790             }
   791         }
   792 #endif
   793 #ifdef __OS2__
   794         if (SDL_SystemRAM <= 0) {
   795             Uint32 sysram = 0;
   796             DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4);
   797             SDL_SystemRAM = (int) (sysram / 0x100000U);
   798         }
   799 #endif
   800 #endif
   801     }
   802     return SDL_SystemRAM;
   803 }
   804 
   805 
   806 size_t
   807 SDL_SIMDGetAlignment(void)
   808 {
   809     if (SDL_SIMDAlignment == 0xFFFFFFFF) {
   810         SDL_GetCPUFeatures();  /* make sure this has been calculated */
   811     }
   812     SDL_assert(SDL_SIMDAlignment != 0);
   813     return SDL_SIMDAlignment;
   814 }
   815 
   816 void *
   817 SDL_SIMDAlloc(const size_t len)
   818 {
   819     const size_t alignment = SDL_SIMDGetAlignment();
   820     const size_t padding = alignment - (len % alignment);
   821     const size_t padded = (padding != alignment) ? (len + padding) : len;
   822     Uint8 *retval = NULL;
   823     Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
   824     if (ptr) {
   825         /* store the actual malloc pointer right before our aligned pointer. */
   826         retval = ptr + sizeof (void *);
   827         retval += alignment - (((size_t) retval) % alignment);
   828         *(((void **) retval) - 1) = ptr;
   829     }
   830     return retval;
   831 }
   832 
   833 void
   834 SDL_SIMDFree(void *ptr)
   835 {
   836     if (ptr) {
   837         void **realptr = (void **) ptr;
   838         realptr--;
   839         SDL_free(*(((void **) ptr) - 1));
   840     }
   841 }
   842 
   843 
   844 #ifdef TEST_MAIN
   845 
   846 #include <stdio.h>
   847 
   848 int
   849 main()
   850 {
   851     printf("CPU count: %d\n", SDL_GetCPUCount());
   852     printf("CPU type: %s\n", SDL_GetCPUType());
   853     printf("CPU name: %s\n", SDL_GetCPUName());
   854     printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
   855     printf("RDTSC: %d\n", SDL_HasRDTSC());
   856     printf("Altivec: %d\n", SDL_HasAltiVec());
   857     printf("MMX: %d\n", SDL_HasMMX());
   858     printf("3DNow: %d\n", SDL_Has3DNow());
   859     printf("SSE: %d\n", SDL_HasSSE());
   860     printf("SSE2: %d\n", SDL_HasSSE2());
   861     printf("SSE3: %d\n", SDL_HasSSE3());
   862     printf("SSE4.1: %d\n", SDL_HasSSE41());
   863     printf("SSE4.2: %d\n", SDL_HasSSE42());
   864     printf("AVX: %d\n", SDL_HasAVX());
   865     printf("AVX2: %d\n", SDL_HasAVX2());
   866     printf("AVX-512F: %d\n", SDL_HasAVX512F());
   867     printf("NEON: %d\n", SDL_HasNEON());
   868     printf("RAM: %d MB\n", SDL_GetSystemRAM());
   869     return 0;
   870 }
   871 
   872 #endif /* TEST_MAIN */
   873 
   874 /* vi: set ts=4 sw=4 expandtab: */