src/cpuinfo/SDL_cpuinfo.c
author Sam Lantinga <slouken@libsdl.org>
Fri, 04 Jan 2019 22:01:14 -0800
changeset 12503 806492103856
parent 12450 1055156e99f3
child 12665 4743da9c3eea
permissions -rw-r--r--
Updated copyright for 2019
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2019 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #ifdef TEST_MAIN
    22 #include "SDL_config.h"
    23 #else
    24 #include "../SDL_internal.h"
    25 #include "SDL_simd.h"
    26 #endif
    27 
    28 #if defined(__WIN32__)
    29 #include "../core/windows/SDL_windows.h"
    30 #endif
    31 #if defined(__OS2__)
    32 #define INCL_DOS
    33 #include <os2.h>
    34 #ifndef QSV_NUMPROCESSORS
    35 #define QSV_NUMPROCESSORS 26
    36 #endif
    37 #endif
    38 
    39 /* CPU feature detection for SDL */
    40 
    41 #include "SDL_cpuinfo.h"
    42 #include "SDL_assert.h"
    43 
    44 #ifdef HAVE_SYSCONF
    45 #include <unistd.h>
    46 #endif
    47 #ifdef HAVE_SYSCTLBYNAME
    48 #include <sys/types.h>
    49 #include <sys/sysctl.h>
    50 #endif
    51 #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
    52 #include <sys/sysctl.h>         /* For AltiVec check */
    53 #elif defined(__OpenBSD__) && defined(__powerpc__)
    54 #include <sys/param.h>
    55 #include <sys/sysctl.h> /* For AltiVec check */
    56 #include <machine/cpu.h>
    57 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
    58 #include <signal.h>
    59 #include <setjmp.h>
    60 #endif
    61 
    62 #if defined(__QNXNTO__)
    63 #include <sys/syspage.h>
    64 #endif
    65 
    66 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH)
    67 /*#include <asm/hwcap.h>*/
    68 #ifndef AT_HWCAP
    69 #define AT_HWCAP 16
    70 #endif
    71 #ifndef HWCAP_NEON
    72 #define HWCAP_NEON (1 << 12)
    73 #endif
    74 #if defined HAVE_GETAUXVAL
    75 #include <sys/auxv.h>
    76 #else
    77 #include <fcntl.h>
    78 #endif
    79 #endif
    80 
    81 #if defined(__ANDROID__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
    82 #if __ARM_ARCH < 8
    83 #include <cpu-features.h>
    84 #endif
    85 #endif
    86 
    87 #define CPU_HAS_RDTSC   (1 << 0)
    88 #define CPU_HAS_ALTIVEC (1 << 1)
    89 #define CPU_HAS_MMX     (1 << 2)
    90 #define CPU_HAS_3DNOW   (1 << 3)
    91 #define CPU_HAS_SSE     (1 << 4)
    92 #define CPU_HAS_SSE2    (1 << 5)
    93 #define CPU_HAS_SSE3    (1 << 6)
    94 #define CPU_HAS_SSE41   (1 << 7)
    95 #define CPU_HAS_SSE42   (1 << 8)
    96 #define CPU_HAS_AVX     (1 << 9)
    97 #define CPU_HAS_AVX2    (1 << 10)
    98 #define CPU_HAS_NEON    (1 << 11)
    99 #define CPU_HAS_AVX512F (1 << 12)
   100 
   101 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
   102 /* This is the brute force way of detecting instruction sets...
   103    the idea is borrowed from the libmpeg2 library - thanks!
   104  */
   105 static jmp_buf jmpbuf;
   106 static void
   107 illegal_instruction(int sig)
   108 {
   109     longjmp(jmpbuf, 1);
   110 }
   111 #endif /* HAVE_SETJMP */
   112 
   113 static int
   114 CPU_haveCPUID(void)
   115 {
   116     int has_CPUID = 0;
   117 
   118 /* *INDENT-OFF* */
   119 #ifndef SDL_CPUINFO_DISABLED
   120 #if defined(__GNUC__) && defined(i386)
   121     __asm__ (
   122 "        pushfl                      # Get original EFLAGS             \n"
   123 "        popl    %%eax                                                 \n"
   124 "        movl    %%eax,%%ecx                                           \n"
   125 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
   126 "        pushl   %%eax               # Save new EFLAGS value on stack  \n"
   127 "        popfl                       # Replace current EFLAGS value    \n"
   128 "        pushfl                      # Get new EFLAGS                  \n"
   129 "        popl    %%eax               # Store new EFLAGS in EAX         \n"
   130 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
   131 "        jz      1f                  # Processor=80486                 \n"
   132 "        movl    $1,%0               # We have CPUID support           \n"
   133 "1:                                                                    \n"
   134     : "=m" (has_CPUID)
   135     :
   136     : "%eax", "%ecx"
   137     );
   138 #elif defined(__GNUC__) && defined(__x86_64__)
   139 /* Technically, if this is being compiled under __x86_64__ then it has 
   140    CPUid by definition.  But it's nice to be able to prove it.  :)      */
   141     __asm__ (
   142 "        pushfq                      # Get original EFLAGS             \n"
   143 "        popq    %%rax                                                 \n"
   144 "        movq    %%rax,%%rcx                                           \n"
   145 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
   146 "        pushq   %%rax               # Save new EFLAGS value on stack  \n"
   147 "        popfq                       # Replace current EFLAGS value    \n"
   148 "        pushfq                      # Get new EFLAGS                  \n"
   149 "        popq    %%rax               # Store new EFLAGS in EAX         \n"
   150 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
   151 "        jz      1f                  # Processor=80486                 \n"
   152 "        movl    $1,%0               # We have CPUID support           \n"
   153 "1:                                                                    \n"
   154     : "=m" (has_CPUID)
   155     :
   156     : "%rax", "%rcx"
   157     );
   158 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   159     __asm {
   160         pushfd                      ; Get original EFLAGS
   161         pop     eax
   162         mov     ecx, eax
   163         xor     eax, 200000h        ; Flip ID bit in EFLAGS
   164         push    eax                 ; Save new EFLAGS value on stack
   165         popfd                       ; Replace current EFLAGS value
   166         pushfd                      ; Get new EFLAGS
   167         pop     eax                 ; Store new EFLAGS in EAX
   168         xor     eax, ecx            ; Can not toggle ID bit,
   169         jz      done                ; Processor=80486
   170         mov     has_CPUID,1         ; We have CPUID support
   171 done:
   172     }
   173 #elif defined(_MSC_VER) && defined(_M_X64)
   174     has_CPUID = 1;
   175 #elif defined(__sun) && defined(__i386)
   176     __asm (
   177 "       pushfl                 \n"
   178 "       popl    %eax           \n"
   179 "       movl    %eax,%ecx      \n"
   180 "       xorl    $0x200000,%eax \n"
   181 "       pushl   %eax           \n"
   182 "       popfl                  \n"
   183 "       pushfl                 \n"
   184 "       popl    %eax           \n"
   185 "       xorl    %ecx,%eax      \n"
   186 "       jz      1f             \n"
   187 "       movl    $1,-8(%ebp)    \n"
   188 "1:                            \n"
   189     );
   190 #elif defined(__sun) && defined(__amd64)
   191     __asm (
   192 "       pushfq                 \n"
   193 "       popq    %rax           \n"
   194 "       movq    %rax,%rcx      \n"
   195 "       xorl    $0x200000,%eax \n"
   196 "       pushq   %rax           \n"
   197 "       popfq                  \n"
   198 "       pushfq                 \n"
   199 "       popq    %rax           \n"
   200 "       xorl    %ecx,%eax      \n"
   201 "       jz      1f             \n"
   202 "       movl    $1,-8(%rbp)    \n"
   203 "1:                            \n"
   204     );
   205 #endif
   206 #endif
   207 /* *INDENT-ON* */
   208     return has_CPUID;
   209 }
   210 
   211 #if defined(__GNUC__) && defined(i386)
   212 #define cpuid(func, a, b, c, d) \
   213     __asm__ __volatile__ ( \
   214 "        pushl %%ebx        \n" \
   215 "        xorl %%ecx,%%ecx   \n" \
   216 "        cpuid              \n" \
   217 "        movl %%ebx, %%esi  \n" \
   218 "        popl %%ebx         \n" : \
   219             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
   220 #elif defined(__GNUC__) && defined(__x86_64__)
   221 #define cpuid(func, a, b, c, d) \
   222     __asm__ __volatile__ ( \
   223 "        pushq %%rbx        \n" \
   224 "        xorq %%rcx,%%rcx   \n" \
   225 "        cpuid              \n" \
   226 "        movq %%rbx, %%rsi  \n" \
   227 "        popq %%rbx         \n" : \
   228             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
   229 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   230 #define cpuid(func, a, b, c, d) \
   231     __asm { \
   232         __asm mov eax, func \
   233         __asm xor ecx, ecx \
   234         __asm cpuid \
   235         __asm mov a, eax \
   236         __asm mov b, ebx \
   237         __asm mov c, ecx \
   238         __asm mov d, edx \
   239 }
   240 #elif defined(_MSC_VER) && defined(_M_X64)
   241 #define cpuid(func, a, b, c, d) \
   242 { \
   243     int CPUInfo[4]; \
   244     __cpuid(CPUInfo, func); \
   245     a = CPUInfo[0]; \
   246     b = CPUInfo[1]; \
   247     c = CPUInfo[2]; \
   248     d = CPUInfo[3]; \
   249 }
   250 #else
   251 #define cpuid(func, a, b, c, d) \
   252     do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
   253 #endif
   254 
   255 static int CPU_CPUIDFeatures[4];
   256 static int CPU_CPUIDMaxFunction = 0;
   257 static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
   258 static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
   259 
   260 static void
   261 CPU_calcCPUIDFeatures(void)
   262 {
   263     static SDL_bool checked = SDL_FALSE;
   264     if (!checked) {
   265         checked = SDL_TRUE;
   266         if (CPU_haveCPUID()) {
   267             int a, b, c, d;
   268             cpuid(0, a, b, c, d);
   269             CPU_CPUIDMaxFunction = a;
   270             if (CPU_CPUIDMaxFunction >= 1) {
   271                 cpuid(1, a, b, c, d);
   272                 CPU_CPUIDFeatures[0] = a;
   273                 CPU_CPUIDFeatures[1] = b;
   274                 CPU_CPUIDFeatures[2] = c;
   275                 CPU_CPUIDFeatures[3] = d;
   276 
   277                 /* Check to make sure we can call xgetbv */
   278                 if (c & 0x08000000) {
   279                     /* Call xgetbv to see if YMM (etc) register state is saved */
   280 #if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
   281                     __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
   282 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
   283                     a = (int)_xgetbv(0);
   284 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   285                     __asm
   286                     {
   287                         xor ecx, ecx
   288                         _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
   289                         mov a, eax
   290                     }
   291 #endif
   292                     CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
   293                     CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
   294                 }
   295             }
   296         }
   297     }
   298 }
   299 
   300 static int
   301 CPU_haveAltiVec(void)
   302 {
   303     volatile int altivec = 0;
   304 #ifndef SDL_CPUINFO_DISABLED
   305 #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__))
   306 #ifdef __OpenBSD__
   307     int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
   308 #else
   309     int selectors[2] = { CTL_HW, HW_VECTORUNIT };
   310 #endif
   311     int hasVectorUnit = 0;
   312     size_t length = sizeof(hasVectorUnit);
   313     int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
   314     if (0 == error)
   315         altivec = (hasVectorUnit != 0);
   316 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
   317     void (*handler) (int sig);
   318     handler = signal(SIGILL, illegal_instruction);
   319     if (setjmp(jmpbuf) == 0) {
   320         asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
   321         altivec = 1;
   322     }
   323     signal(SIGILL, handler);
   324 #endif
   325 #endif
   326     return altivec;
   327 }
   328 
   329 #if defined(__LINUX__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
   330 static int
   331 readProcAuxvForNeon(void)
   332 {
   333     int neon = 0;
   334     int kv[2];
   335     const int fd = open("/proc/self/auxv", O_RDONLY);
   336     if (fd != -1) {
   337         while (read(fd, kv, sizeof (kv)) == sizeof (kv)) {
   338             if (kv[0] == AT_HWCAP) {
   339                 neon = ((kv[1] & HWCAP_NEON) == HWCAP_NEON);
   340                 break;
   341             }
   342         }
   343         close(fd);
   344     }
   345     return neon;
   346 }
   347 #endif
   348 
   349 
   350 static int
   351 CPU_haveNEON(void)
   352 {
   353 /* The way you detect NEON is a privileged instruction on ARM, so you have
   354    query the OS kernel in a platform-specific way. :/ */
   355 #if defined(SDL_CPUINFO_DISABLED)
   356    return 0; /* disabled */
   357 #elif (defined(__WINDOWS__) || defined(__WINRT__)) && (defined(_M_ARM) || defined(_M_ARM64))
   358 /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */
   359 /* Seems to have been removed */
   360 #  if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
   361 #    define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
   362 #  endif
   363 /* All WinRT ARM devices are required to support NEON, but just in case. */
   364     return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
   365 #elif !defined(__ARM_ARCH)
   366     return 0;  /* not an ARM CPU at all. */
   367 #elif __ARM_ARCH >= 8
   368     return 1;  /* ARMv8 always has non-optional NEON support. */
   369 #elif defined(__APPLE__) && (__ARM_ARCH >= 7)
   370     /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
   371     return 1;  /* all Apple ARMv7 chips and later have NEON. */
   372 #elif defined(__APPLE__)
   373     return 0;  /* assume anything else from Apple doesn't have NEON. */
   374 #elif defined(__QNXNTO__)
   375     return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
   376 #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
   377     return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
   378 #elif defined(__LINUX__)
   379     return readProcAuxvForNeon();
   380 #elif defined(__ANDROID__)
   381     /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
   382     {
   383         AndroidCpuFamily cpu_family = android_getCpuFamily();
   384         if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
   385             uint64_t cpu_features = android_getCpuFeatures();
   386             if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) {
   387                 return 1;
   388             }
   389         }
   390         return 0;
   391     }
   392 #else
   393 #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
   394     return 0;
   395 #endif
   396 }
   397 
   398 static int
   399 CPU_have3DNow(void)
   400 {
   401     if (CPU_CPUIDMaxFunction > 0) {  /* that is, do we have CPUID at all? */
   402         int a, b, c, d;
   403         cpuid(0x80000000, a, b, c, d);
   404         if (a >= 0x80000001) {
   405             cpuid(0x80000001, a, b, c, d);
   406             return (d & 0x80000000);
   407         }
   408     }
   409     return 0;
   410 }
   411 
   412 #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
   413 #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
   414 #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
   415 #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
   416 #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
   417 #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
   418 #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
   419 #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
   420 
   421 static int
   422 CPU_haveAVX2(void)
   423 {
   424     if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
   425         int a, b, c, d;
   426         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
   427         cpuid(7, a, b, c, d);
   428         return (b & 0x00000020);
   429     }
   430     return 0;
   431 }
   432 
   433 static int
   434 CPU_haveAVX512F(void)
   435 {
   436     if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
   437         int a, b, c, d;
   438         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
   439         cpuid(7, a, b, c, d);
   440         return (b & 0x00010000);
   441     }
   442     return 0;
   443 }
   444 
   445 static int SDL_CPUCount = 0;
   446 
   447 int
   448 SDL_GetCPUCount(void)
   449 {
   450     if (!SDL_CPUCount) {
   451 #ifndef SDL_CPUINFO_DISABLED
   452 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
   453         if (SDL_CPUCount <= 0) {
   454             SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
   455         }
   456 #endif
   457 #ifdef HAVE_SYSCTLBYNAME
   458         if (SDL_CPUCount <= 0) {
   459             size_t size = sizeof(SDL_CPUCount);
   460             sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
   461         }
   462 #endif
   463 #ifdef __WIN32__
   464         if (SDL_CPUCount <= 0) {
   465             SYSTEM_INFO info;
   466             GetSystemInfo(&info);
   467             SDL_CPUCount = info.dwNumberOfProcessors;
   468         }
   469 #endif
   470 #ifdef __OS2__
   471         if (SDL_CPUCount <= 0) {
   472             DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS,
   473                             &SDL_CPUCount, sizeof(SDL_CPUCount) );
   474         }
   475 #endif
   476 #endif
   477         /* There has to be at least 1, right? :) */
   478         if (SDL_CPUCount <= 0) {
   479             SDL_CPUCount = 1;
   480         }
   481     }
   482     return SDL_CPUCount;
   483 }
   484 
   485 /* Oh, such a sweet sweet trick, just not very useful. :) */
   486 static const char *
   487 SDL_GetCPUType(void)
   488 {
   489     static char SDL_CPUType[13];
   490 
   491     if (!SDL_CPUType[0]) {
   492         int i = 0;
   493 
   494         CPU_calcCPUIDFeatures();
   495         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
   496             int a, b, c, d;
   497             cpuid(0x00000000, a, b, c, d);
   498             (void) a;
   499             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   500             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   501             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   502             SDL_CPUType[i++] = (char)(b & 0xff);
   503 
   504             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   505             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   506             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   507             SDL_CPUType[i++] = (char)(d & 0xff);
   508 
   509             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   510             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   511             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   512             SDL_CPUType[i++] = (char)(c & 0xff);
   513         }
   514         if (!SDL_CPUType[0]) {
   515             SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
   516         }
   517     }
   518     return SDL_CPUType;
   519 }
   520 
   521 
   522 #ifdef TEST_MAIN  /* !!! FIXME: only used for test at the moment. */
   523 static const char *
   524 SDL_GetCPUName(void)
   525 {
   526     static char SDL_CPUName[48];
   527 
   528     if (!SDL_CPUName[0]) {
   529         int i = 0;
   530         int a, b, c, d;
   531 
   532         CPU_calcCPUIDFeatures();
   533         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
   534             cpuid(0x80000000, a, b, c, d);
   535             if (a >= 0x80000004) {
   536                 cpuid(0x80000002, a, b, c, d);
   537                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   538                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   539                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   540                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   541                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   542                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   543                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   544                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   545                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   546                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   547                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   548                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   549                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   550                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   551                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   552                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   553                 cpuid(0x80000003, a, b, c, d);
   554                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   555                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   556                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   557                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   558                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   559                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   560                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   561                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   562                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   563                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   564                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   565                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   566                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   567                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   568                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   569                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   570                 cpuid(0x80000004, a, b, c, d);
   571                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   572                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   573                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   574                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   575                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   576                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   577                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   578                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   579                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   580                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   581                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   582                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   583                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   584                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   585                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   586                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   587             }
   588         }
   589         if (!SDL_CPUName[0]) {
   590             SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
   591         }
   592     }
   593     return SDL_CPUName;
   594 }
   595 #endif
   596 
   597 int
   598 SDL_GetCPUCacheLineSize(void)
   599 {
   600     const char *cpuType = SDL_GetCPUType();
   601     int a, b, c, d;
   602     (void) a; (void) b; (void) c; (void) d;
   603     if (SDL_strcmp(cpuType, "GenuineIntel") == 0) {
   604         cpuid(0x00000001, a, b, c, d);
   605         return (((b >> 8) & 0xff) * 8);
   606     } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0) {
   607         cpuid(0x80000005, a, b, c, d);
   608         return (c & 0xff);
   609     } else {
   610         /* Just make a guess here... */
   611         return SDL_CACHELINE_SIZE;
   612     }
   613 }
   614 
   615 static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
   616 static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
   617 
   618 static Uint32
   619 SDL_GetCPUFeatures(void)
   620 {
   621     if (SDL_CPUFeatures == 0xFFFFFFFF) {
   622         CPU_calcCPUIDFeatures();
   623         SDL_CPUFeatures = 0;
   624         SDL_SIMDAlignment = 4;  /* a good safe base value */
   625         if (CPU_haveRDTSC()) {
   626             SDL_CPUFeatures |= CPU_HAS_RDTSC;
   627         }
   628         if (CPU_haveAltiVec()) {
   629             SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
   630             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   631         }
   632         if (CPU_haveMMX()) {
   633             SDL_CPUFeatures |= CPU_HAS_MMX;
   634             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
   635         }
   636         if (CPU_have3DNow()) {
   637             SDL_CPUFeatures |= CPU_HAS_3DNOW;
   638             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
   639         }
   640         if (CPU_haveSSE()) {
   641             SDL_CPUFeatures |= CPU_HAS_SSE;
   642             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   643         }
   644         if (CPU_haveSSE2()) {
   645             SDL_CPUFeatures |= CPU_HAS_SSE2;
   646             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   647         }
   648         if (CPU_haveSSE3()) {
   649             SDL_CPUFeatures |= CPU_HAS_SSE3;
   650             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   651         }
   652         if (CPU_haveSSE41()) {
   653             SDL_CPUFeatures |= CPU_HAS_SSE41;
   654             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   655         }
   656         if (CPU_haveSSE42()) {
   657             SDL_CPUFeatures |= CPU_HAS_SSE42;
   658             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   659         }
   660         if (CPU_haveAVX()) {
   661             SDL_CPUFeatures |= CPU_HAS_AVX;
   662             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
   663         }
   664         if (CPU_haveAVX2()) {
   665             SDL_CPUFeatures |= CPU_HAS_AVX2;
   666             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
   667         }
   668         if (CPU_haveAVX512F()) {
   669             SDL_CPUFeatures |= CPU_HAS_AVX512F;
   670             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
   671         }
   672         if (CPU_haveNEON()) {
   673             SDL_CPUFeatures |= CPU_HAS_NEON;
   674             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   675         }
   676     }
   677     return SDL_CPUFeatures;
   678 }
   679 
   680 #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
   681 
   682 SDL_bool SDL_HasRDTSC(void)
   683 {
   684     return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
   685 }
   686 
   687 SDL_bool
   688 SDL_HasAltiVec(void)
   689 {
   690     return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
   691 }
   692 
   693 SDL_bool
   694 SDL_HasMMX(void)
   695 {
   696     return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
   697 }
   698 
   699 SDL_bool
   700 SDL_Has3DNow(void)
   701 {
   702     return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
   703 }
   704 
   705 SDL_bool
   706 SDL_HasSSE(void)
   707 {
   708     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
   709 }
   710 
   711 SDL_bool
   712 SDL_HasSSE2(void)
   713 {
   714     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
   715 }
   716 
   717 SDL_bool
   718 SDL_HasSSE3(void)
   719 {
   720     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
   721 }
   722 
   723 SDL_bool
   724 SDL_HasSSE41(void)
   725 {
   726     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
   727 }
   728 
   729 SDL_bool
   730 SDL_HasSSE42(void)
   731 {
   732     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
   733 }
   734 
   735 SDL_bool
   736 SDL_HasAVX(void)
   737 {
   738     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
   739 }
   740 
   741 SDL_bool
   742 SDL_HasAVX2(void)
   743 {
   744     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
   745 }
   746 
   747 SDL_bool
   748 SDL_HasAVX512F(void)
   749 {
   750     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
   751 }
   752 
   753 SDL_bool
   754 SDL_HasNEON(void)
   755 {
   756     return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
   757 }
   758 
   759 static int SDL_SystemRAM = 0;
   760 
   761 int
   762 SDL_GetSystemRAM(void)
   763 {
   764     if (!SDL_SystemRAM) {
   765 #ifndef SDL_CPUINFO_DISABLED
   766 #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
   767         if (SDL_SystemRAM <= 0) {
   768             SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
   769         }
   770 #endif
   771 #ifdef HAVE_SYSCTLBYNAME
   772         if (SDL_SystemRAM <= 0) {
   773 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__)
   774 #ifdef HW_REALMEM
   775             int mib[2] = {CTL_HW, HW_REALMEM};
   776 #else
   777             /* might only report up to 2 GiB */
   778             int mib[2] = {CTL_HW, HW_PHYSMEM};
   779 #endif /* HW_REALMEM */
   780 #else
   781             int mib[2] = {CTL_HW, HW_MEMSIZE};
   782 #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
   783             Uint64 memsize = 0;
   784             size_t len = sizeof(memsize);
   785             
   786             if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
   787                 SDL_SystemRAM = (int)(memsize / (1024*1024));
   788             }
   789         }
   790 #endif
   791 #ifdef __WIN32__
   792         if (SDL_SystemRAM <= 0) {
   793             MEMORYSTATUSEX stat;
   794             stat.dwLength = sizeof(stat);
   795             if (GlobalMemoryStatusEx(&stat)) {
   796                 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
   797             }
   798         }
   799 #endif
   800 #ifdef __OS2__
   801         if (SDL_SystemRAM <= 0) {
   802             Uint32 sysram = 0;
   803             DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4);
   804             SDL_SystemRAM = (int) (sysram / 0x100000U);
   805         }
   806 #endif
   807 #endif
   808     }
   809     return SDL_SystemRAM;
   810 }
   811 
   812 
   813 size_t
   814 SDL_SIMDGetAlignment(void)
   815 {
   816     if (SDL_SIMDAlignment == 0xFFFFFFFF) {
   817         SDL_GetCPUFeatures();  /* make sure this has been calculated */
   818     }
   819     SDL_assert(SDL_SIMDAlignment != 0);
   820     return SDL_SIMDAlignment;
   821 }
   822 
   823 void *
   824 SDL_SIMDAlloc(const size_t len)
   825 {
   826     const size_t alignment = SDL_SIMDGetAlignment();
   827     const size_t padding = alignment - (len % alignment);
   828     const size_t padded = (padding != alignment) ? (len + padding) : len;
   829     Uint8 *retval = NULL;
   830     Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
   831     if (ptr) {
   832         /* store the actual malloc pointer right before our aligned pointer. */
   833         retval = ptr + sizeof (void *);
   834         retval += alignment - (((size_t) retval) % alignment);
   835         *(((void **) retval) - 1) = ptr;
   836     }
   837     return retval;
   838 }
   839 
   840 void
   841 SDL_SIMDFree(void *ptr)
   842 {
   843     if (ptr) {
   844         void **realptr = (void **) ptr;
   845         realptr--;
   846         SDL_free(*(((void **) ptr) - 1));
   847     }
   848 }
   849 
   850 
   851 #ifdef TEST_MAIN
   852 
   853 #include <stdio.h>
   854 
   855 int
   856 main()
   857 {
   858     printf("CPU count: %d\n", SDL_GetCPUCount());
   859     printf("CPU type: %s\n", SDL_GetCPUType());
   860     printf("CPU name: %s\n", SDL_GetCPUName());
   861     printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
   862     printf("RDTSC: %d\n", SDL_HasRDTSC());
   863     printf("Altivec: %d\n", SDL_HasAltiVec());
   864     printf("MMX: %d\n", SDL_HasMMX());
   865     printf("3DNow: %d\n", SDL_Has3DNow());
   866     printf("SSE: %d\n", SDL_HasSSE());
   867     printf("SSE2: %d\n", SDL_HasSSE2());
   868     printf("SSE3: %d\n", SDL_HasSSE3());
   869     printf("SSE4.1: %d\n", SDL_HasSSE41());
   870     printf("SSE4.2: %d\n", SDL_HasSSE42());
   871     printf("AVX: %d\n", SDL_HasAVX());
   872     printf("AVX2: %d\n", SDL_HasAVX2());
   873     printf("AVX-512F: %d\n", SDL_HasAVX512F());
   874     printf("NEON: %d\n", SDL_HasNEON());
   875     printf("RAM: %d MB\n", SDL_GetSystemRAM());
   876     return 0;
   877 }
   878 
   879 #endif /* TEST_MAIN */
   880 
   881 /* vi: set ts=4 sw=4 expandtab: */