src/cpuinfo/SDL_cpuinfo.c
author Ryan C. Gordon <icculus@icculus.org>
Mon, 21 May 2018 11:35:42 -0400
changeset 11986 e307b74aa643
parent 11985 36aa0bf7312b
child 12242 df7260f149f2
permissions -rw-r--r--
cpuinfo: Added SDL_HasAVX512F().

This checks for the "foundation" AVX-512 instructions (that all AVX-512
compatible CPUs support).
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #ifdef TEST_MAIN
    22 #include "SDL_config.h"
    23 #else
    24 #include "../SDL_internal.h"
    25 #endif
    26 
    27 #if defined(__WIN32__)
    28 #include "../core/windows/SDL_windows.h"
    29 #endif
    30 #if defined(__OS2__)
    31 #define INCL_DOS
    32 #include <os2.h>
    33 #ifndef QSV_NUMPROCESSORS
    34 #define QSV_NUMPROCESSORS 26
    35 #endif
    36 #endif
    37 
    38 /* CPU feature detection for SDL */
    39 
    40 #include "SDL_cpuinfo.h"
    41 #include "SDL_assert.h"
    42 
    43 #ifdef HAVE_SYSCONF
    44 #include <unistd.h>
    45 #endif
    46 #ifdef HAVE_SYSCTLBYNAME
    47 #include <sys/types.h>
    48 #include <sys/sysctl.h>
    49 #endif
    50 #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
    51 #include <sys/sysctl.h>         /* For AltiVec check */
    52 #elif defined(__OpenBSD__) && defined(__powerpc__)
    53 #include <sys/param.h>
    54 #include <sys/sysctl.h> /* For AltiVec check */
    55 #include <machine/cpu.h>
    56 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
    57 #include <signal.h>
    58 #include <setjmp.h>
    59 #endif
    60 
    61 #if defined(__QNXNTO__)
    62 #include <sys/syspage.h>
    63 #endif
    64 
    65 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH)
    66 /*#include <asm/hwcap.h>*/
    67 #ifndef AT_HWCAP
    68 #define AT_HWCAP 16
    69 #endif
    70 #ifndef HWCAP_NEON
    71 #define HWCAP_NEON (1 << 12)
    72 #endif
    73 #if defined HAVE_GETAUXVAL
    74 #include <sys/auxv.h>
    75 #else
    76 #include <fcntl.h>
    77 #endif
    78 #endif
    79 
    80 #define CPU_HAS_RDTSC   (1 << 0)
    81 #define CPU_HAS_ALTIVEC (1 << 1)
    82 #define CPU_HAS_MMX     (1 << 2)
    83 #define CPU_HAS_3DNOW   (1 << 3)
    84 #define CPU_HAS_SSE     (1 << 4)
    85 #define CPU_HAS_SSE2    (1 << 5)
    86 #define CPU_HAS_SSE3    (1 << 6)
    87 #define CPU_HAS_SSE41   (1 << 7)
    88 #define CPU_HAS_SSE42   (1 << 8)
    89 #define CPU_HAS_AVX     (1 << 9)
    90 #define CPU_HAS_AVX2    (1 << 10)
    91 #define CPU_HAS_NEON    (1 << 11)
    92 #define CPU_HAS_AVX512F (1 << 12)
    93 
    94 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
    95 /* This is the brute force way of detecting instruction sets...
    96    the idea is borrowed from the libmpeg2 library - thanks!
    97  */
    98 static jmp_buf jmpbuf;
    99 static void
   100 illegal_instruction(int sig)
   101 {
   102     longjmp(jmpbuf, 1);
   103 }
   104 #endif /* HAVE_SETJMP */
   105 
   106 static int
   107 CPU_haveCPUID(void)
   108 {
   109     int has_CPUID = 0;
   110 
   111 /* *INDENT-OFF* */
   112 #ifndef SDL_CPUINFO_DISABLED
   113 #if defined(__GNUC__) && defined(i386)
   114     __asm__ (
   115 "        pushfl                      # Get original EFLAGS             \n"
   116 "        popl    %%eax                                                 \n"
   117 "        movl    %%eax,%%ecx                                           \n"
   118 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
   119 "        pushl   %%eax               # Save new EFLAGS value on stack  \n"
   120 "        popfl                       # Replace current EFLAGS value    \n"
   121 "        pushfl                      # Get new EFLAGS                  \n"
   122 "        popl    %%eax               # Store new EFLAGS in EAX         \n"
   123 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
   124 "        jz      1f                  # Processor=80486                 \n"
   125 "        movl    $1,%0               # We have CPUID support           \n"
   126 "1:                                                                    \n"
   127     : "=m" (has_CPUID)
   128     :
   129     : "%eax", "%ecx"
   130     );
   131 #elif defined(__GNUC__) && defined(__x86_64__)
   132 /* Technically, if this is being compiled under __x86_64__ then it has 
   133    CPUid by definition.  But it's nice to be able to prove it.  :)      */
   134     __asm__ (
   135 "        pushfq                      # Get original EFLAGS             \n"
   136 "        popq    %%rax                                                 \n"
   137 "        movq    %%rax,%%rcx                                           \n"
   138 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
   139 "        pushq   %%rax               # Save new EFLAGS value on stack  \n"
   140 "        popfq                       # Replace current EFLAGS value    \n"
   141 "        pushfq                      # Get new EFLAGS                  \n"
   142 "        popq    %%rax               # Store new EFLAGS in EAX         \n"
   143 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
   144 "        jz      1f                  # Processor=80486                 \n"
   145 "        movl    $1,%0               # We have CPUID support           \n"
   146 "1:                                                                    \n"
   147     : "=m" (has_CPUID)
   148     :
   149     : "%rax", "%rcx"
   150     );
   151 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   152     __asm {
   153         pushfd                      ; Get original EFLAGS
   154         pop     eax
   155         mov     ecx, eax
   156         xor     eax, 200000h        ; Flip ID bit in EFLAGS
   157         push    eax                 ; Save new EFLAGS value on stack
   158         popfd                       ; Replace current EFLAGS value
   159         pushfd                      ; Get new EFLAGS
   160         pop     eax                 ; Store new EFLAGS in EAX
   161         xor     eax, ecx            ; Can not toggle ID bit,
   162         jz      done                ; Processor=80486
   163         mov     has_CPUID,1         ; We have CPUID support
   164 done:
   165     }
   166 #elif defined(_MSC_VER) && defined(_M_X64)
   167     has_CPUID = 1;
   168 #elif defined(__sun) && defined(__i386)
   169     __asm (
   170 "       pushfl                 \n"
   171 "       popl    %eax           \n"
   172 "       movl    %eax,%ecx      \n"
   173 "       xorl    $0x200000,%eax \n"
   174 "       pushl   %eax           \n"
   175 "       popfl                  \n"
   176 "       pushfl                 \n"
   177 "       popl    %eax           \n"
   178 "       xorl    %ecx,%eax      \n"
   179 "       jz      1f             \n"
   180 "       movl    $1,-8(%ebp)    \n"
   181 "1:                            \n"
   182     );
   183 #elif defined(__sun) && defined(__amd64)
   184     __asm (
   185 "       pushfq                 \n"
   186 "       popq    %rax           \n"
   187 "       movq    %rax,%rcx      \n"
   188 "       xorl    $0x200000,%eax \n"
   189 "       pushq   %rax           \n"
   190 "       popfq                  \n"
   191 "       pushfq                 \n"
   192 "       popq    %rax           \n"
   193 "       xorl    %ecx,%eax      \n"
   194 "       jz      1f             \n"
   195 "       movl    $1,-8(%rbp)    \n"
   196 "1:                            \n"
   197     );
   198 #endif
   199 #endif
   200 /* *INDENT-ON* */
   201     return has_CPUID;
   202 }
   203 
   204 #if defined(__GNUC__) && defined(i386)
   205 #define cpuid(func, a, b, c, d) \
   206     __asm__ __volatile__ ( \
   207 "        pushl %%ebx        \n" \
   208 "        xorl %%ecx,%%ecx   \n" \
   209 "        cpuid              \n" \
   210 "        movl %%ebx, %%esi  \n" \
   211 "        popl %%ebx         \n" : \
   212             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
   213 #elif defined(__GNUC__) && defined(__x86_64__)
   214 #define cpuid(func, a, b, c, d) \
   215     __asm__ __volatile__ ( \
   216 "        pushq %%rbx        \n" \
   217 "        xorq %%rcx,%%rcx   \n" \
   218 "        cpuid              \n" \
   219 "        movq %%rbx, %%rsi  \n" \
   220 "        popq %%rbx         \n" : \
   221             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
   222 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   223 #define cpuid(func, a, b, c, d) \
   224     __asm { \
   225         __asm mov eax, func \
   226         __asm xor ecx, ecx \
   227         __asm cpuid \
   228         __asm mov a, eax \
   229         __asm mov b, ebx \
   230         __asm mov c, ecx \
   231         __asm mov d, edx \
   232 }
   233 #elif defined(_MSC_VER) && defined(_M_X64)
   234 #define cpuid(func, a, b, c, d) \
   235 { \
   236     int CPUInfo[4]; \
   237     __cpuid(CPUInfo, func); \
   238     a = CPUInfo[0]; \
   239     b = CPUInfo[1]; \
   240     c = CPUInfo[2]; \
   241     d = CPUInfo[3]; \
   242 }
   243 #else
   244 #define cpuid(func, a, b, c, d) \
   245     do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
   246 #endif
   247 
   248 static int CPU_CPUIDFeatures[4];
   249 static int CPU_CPUIDMaxFunction = 0;
   250 static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
   251 static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
   252 
   253 static void
   254 CPU_calcCPUIDFeatures(void)
   255 {
   256     static SDL_bool checked = SDL_FALSE;
   257     if (!checked) {
   258         checked = SDL_TRUE;
   259         if (CPU_haveCPUID()) {
   260             int a, b, c, d;
   261             cpuid(0, a, b, c, d);
   262             CPU_CPUIDMaxFunction = a;
   263             if (CPU_CPUIDMaxFunction >= 1) {
   264                 cpuid(1, a, b, c, d);
   265                 CPU_CPUIDFeatures[0] = a;
   266                 CPU_CPUIDFeatures[1] = b;
   267                 CPU_CPUIDFeatures[2] = c;
   268                 CPU_CPUIDFeatures[3] = d;
   269 
   270                 /* Check to make sure we can call xgetbv */
   271                 if (c & 0x08000000) {
   272                     /* Call xgetbv to see if YMM (etc) register state is saved */
   273 #if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
   274                     __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
   275 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
   276                     a = (int)_xgetbv(0);
   277 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   278                     __asm
   279                     {
   280                         xor ecx, ecx
   281                         _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
   282                         mov a, eax
   283                     }
   284 #endif
   285                     CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
   286                     CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
   287                 }
   288             }
   289         }
   290     }
   291 }
   292 
   293 static int
   294 CPU_haveAltiVec(void)
   295 {
   296     volatile int altivec = 0;
   297 #ifndef SDL_CPUINFO_DISABLED
   298 #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__))
   299 #ifdef __OpenBSD__
   300     int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
   301 #else
   302     int selectors[2] = { CTL_HW, HW_VECTORUNIT };
   303 #endif
   304     int hasVectorUnit = 0;
   305     size_t length = sizeof(hasVectorUnit);
   306     int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
   307     if (0 == error)
   308         altivec = (hasVectorUnit != 0);
   309 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
   310     void (*handler) (int sig);
   311     handler = signal(SIGILL, illegal_instruction);
   312     if (setjmp(jmpbuf) == 0) {
   313         asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
   314         altivec = 1;
   315     }
   316     signal(SIGILL, handler);
   317 #endif
   318 #endif
   319     return altivec;
   320 }
   321 
   322 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
   323 static int
   324 readProcAuxvForNeon(void)
   325 {
   326     int neon = 0;
   327     int kv[2];
   328     const int fd = open("/proc/self/auxv", O_RDONLY);
   329     if (fd != -1) {
   330         while (read(fd, kv, sizeof (kv)) == sizeof (kv)) {
   331             if (kv[0] == AT_HWCAP) {
   332                 neon = ((kv[1] & HWCAP_NEON) == HWCAP_NEON);
   333                 break;
   334             }
   335         }
   336         close(fd);
   337     }
   338     return neon;
   339 }
   340 #endif
   341 
   342 
   343 static int
   344 CPU_haveNEON(void)
   345 {
   346 /* The way you detect NEON is a privileged instruction on ARM, so you have
   347    query the OS kernel in a platform-specific way. :/ */
   348 #if defined(SDL_CPUINFO_DISABLED) || !defined(__ARM_ARCH)
   349     return 0;  /* disabled or not an ARM CPU at all. */
   350 #elif __ARM_ARCH >= 8
   351     return 1;  /* ARMv8 always has non-optional NEON support. */
   352 #elif defined(__APPLE__) && (__ARM_ARCH >= 7)
   353     /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
   354     return 1;  /* all Apple ARMv7 chips and later have NEON. */
   355 #elif defined(__APPLE__)
   356     return 0;  /* assume anything else from Apple doesn't have NEON. */
   357 #elif defined(__QNXNTO__)
   358     return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
   359 #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
   360     return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
   361 #elif (defined(__LINUX__) || defined(__ANDROID__))
   362     return readProcAuxvForNeon();   /* Android offers a static library for this, but it just parses /proc/self/auxv */
   363 #elif (defined(__WINDOWS__) || defined(__WINRT__)) && defined(_M_ARM)
   364     /* All WinRT ARM devices are required to support NEON, but just in case. */
   365     return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
   366 #else
   367 #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
   368     return 0;
   369 #endif
   370 }
   371 
   372 static int
   373 CPU_have3DNow(void)
   374 {
   375     if (CPU_CPUIDMaxFunction > 0) {  /* that is, do we have CPUID at all? */
   376         int a, b, c, d;
   377         cpuid(0x80000000, a, b, c, d);
   378         if (a >= 0x80000001) {
   379             cpuid(0x80000001, a, b, c, d);
   380             return (d & 0x80000000);
   381         }
   382     }
   383     return 0;
   384 }
   385 
   386 #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
   387 #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
   388 #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
   389 #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
   390 #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
   391 #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
   392 #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
   393 #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
   394 
   395 static int
   396 CPU_haveAVX2(void)
   397 {
   398     if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
   399         int a, b, c, d;
   400         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
   401         cpuid(7, a, b, c, d);
   402         return (b & 0x00000020);
   403     }
   404     return 0;
   405 }
   406 
   407 static int
   408 CPU_haveAVX512F(void)
   409 {
   410     if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
   411         int a, b, c, d;
   412         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
   413         cpuid(7, a, b, c, d);
   414         return (b & 0x00010000);
   415     }
   416     return 0;
   417 }
   418 
   419 static int SDL_CPUCount = 0;
   420 
   421 int
   422 SDL_GetCPUCount(void)
   423 {
   424     if (!SDL_CPUCount) {
   425 #ifndef SDL_CPUINFO_DISABLED
   426 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
   427         if (SDL_CPUCount <= 0) {
   428             SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
   429         }
   430 #endif
   431 #ifdef HAVE_SYSCTLBYNAME
   432         if (SDL_CPUCount <= 0) {
   433             size_t size = sizeof(SDL_CPUCount);
   434             sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
   435         }
   436 #endif
   437 #ifdef __WIN32__
   438         if (SDL_CPUCount <= 0) {
   439             SYSTEM_INFO info;
   440             GetSystemInfo(&info);
   441             SDL_CPUCount = info.dwNumberOfProcessors;
   442         }
   443 #endif
   444 #ifdef __OS2__
   445         if (SDL_CPUCount <= 0) {
   446             DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS,
   447                             &SDL_CPUCount, sizeof(SDL_CPUCount) );
   448         }
   449 #endif
   450 #endif
   451         /* There has to be at least 1, right? :) */
   452         if (SDL_CPUCount <= 0) {
   453             SDL_CPUCount = 1;
   454         }
   455     }
   456     return SDL_CPUCount;
   457 }
   458 
   459 /* Oh, such a sweet sweet trick, just not very useful. :) */
   460 static const char *
   461 SDL_GetCPUType(void)
   462 {
   463     static char SDL_CPUType[13];
   464 
   465     if (!SDL_CPUType[0]) {
   466         int i = 0;
   467 
   468         CPU_calcCPUIDFeatures();
   469         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
   470             int a, b, c, d;
   471             cpuid(0x00000000, a, b, c, d);
   472             (void) a;
   473             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   474             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   475             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   476             SDL_CPUType[i++] = (char)(b & 0xff);
   477 
   478             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   479             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   480             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   481             SDL_CPUType[i++] = (char)(d & 0xff);
   482 
   483             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   484             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   485             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   486             SDL_CPUType[i++] = (char)(c & 0xff);
   487         }
   488         if (!SDL_CPUType[0]) {
   489             SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
   490         }
   491     }
   492     return SDL_CPUType;
   493 }
   494 
   495 
   496 #ifdef TEST_MAIN  /* !!! FIXME: only used for test at the moment. */
   497 static const char *
   498 SDL_GetCPUName(void)
   499 {
   500     static char SDL_CPUName[48];
   501 
   502     if (!SDL_CPUName[0]) {
   503         int i = 0;
   504         int a, b, c, d;
   505 
   506         CPU_calcCPUIDFeatures();
   507         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
   508             cpuid(0x80000000, a, b, c, d);
   509             if (a >= 0x80000004) {
   510                 cpuid(0x80000002, a, b, c, d);
   511                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   512                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   513                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   514                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   515                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   516                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   517                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   518                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   519                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   520                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   521                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   522                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   523                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   524                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   525                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   526                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   527                 cpuid(0x80000003, a, b, c, d);
   528                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   529                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   530                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   531                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   532                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   533                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   534                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   535                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   536                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   537                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   538                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   539                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   540                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   541                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   542                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   543                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   544                 cpuid(0x80000004, a, b, c, d);
   545                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   546                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   547                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   548                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   549                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   550                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   551                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   552                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   553                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   554                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   555                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   556                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   557                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   558                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   559                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   560                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   561             }
   562         }
   563         if (!SDL_CPUName[0]) {
   564             SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
   565         }
   566     }
   567     return SDL_CPUName;
   568 }
   569 #endif
   570 
   571 int
   572 SDL_GetCPUCacheLineSize(void)
   573 {
   574     const char *cpuType = SDL_GetCPUType();
   575     int a, b, c, d;
   576     (void) a; (void) b; (void) c; (void) d;
   577     if (SDL_strcmp(cpuType, "GenuineIntel") == 0) {
   578         cpuid(0x00000001, a, b, c, d);
   579         return (((b >> 8) & 0xff) * 8);
   580     } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0) {
   581         cpuid(0x80000005, a, b, c, d);
   582         return (c & 0xff);
   583     } else {
   584         /* Just make a guess here... */
   585         return SDL_CACHELINE_SIZE;
   586     }
   587 }
   588 
   589 static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
   590 static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
   591 
   592 static Uint32
   593 SDL_GetCPUFeatures(void)
   594 {
   595     if (SDL_CPUFeatures == 0xFFFFFFFF) {
   596         CPU_calcCPUIDFeatures();
   597         SDL_CPUFeatures = 0;
   598         SDL_SIMDAlignment = 4;  /* a good safe base value */
   599         if (CPU_haveRDTSC()) {
   600             SDL_CPUFeatures |= CPU_HAS_RDTSC;
   601         }
   602         if (CPU_haveAltiVec()) {
   603             SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
   604             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   605         }
   606         if (CPU_haveMMX()) {
   607             SDL_CPUFeatures |= CPU_HAS_MMX;
   608             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
   609         }
   610         if (CPU_have3DNow()) {
   611             SDL_CPUFeatures |= CPU_HAS_3DNOW;
   612             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
   613         }
   614         if (CPU_haveSSE()) {
   615             SDL_CPUFeatures |= CPU_HAS_SSE;
   616             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   617         }
   618         if (CPU_haveSSE2()) {
   619             SDL_CPUFeatures |= CPU_HAS_SSE2;
   620             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   621         }
   622         if (CPU_haveSSE3()) {
   623             SDL_CPUFeatures |= CPU_HAS_SSE3;
   624             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   625         }
   626         if (CPU_haveSSE41()) {
   627             SDL_CPUFeatures |= CPU_HAS_SSE41;
   628             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   629         }
   630         if (CPU_haveSSE42()) {
   631             SDL_CPUFeatures |= CPU_HAS_SSE42;
   632             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   633         }
   634         if (CPU_haveAVX()) {
   635             SDL_CPUFeatures |= CPU_HAS_AVX;
   636             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
   637         }
   638         if (CPU_haveAVX2()) {
   639             SDL_CPUFeatures |= CPU_HAS_AVX2;
   640             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
   641         }
   642         if (CPU_haveAVX512F()) {
   643             SDL_CPUFeatures |= CPU_HAS_AVX512F;
   644             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
   645         }
   646         if (CPU_haveNEON()) {
   647             SDL_CPUFeatures |= CPU_HAS_NEON;
   648             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   649         }
   650     }
   651     return SDL_CPUFeatures;
   652 }
   653 
   654 #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
   655 
   656 SDL_bool SDL_HasRDTSC(void)
   657 {
   658     return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
   659 }
   660 
   661 SDL_bool
   662 SDL_HasAltiVec(void)
   663 {
   664     return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
   665 }
   666 
   667 SDL_bool
   668 SDL_HasMMX(void)
   669 {
   670     return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
   671 }
   672 
   673 SDL_bool
   674 SDL_Has3DNow(void)
   675 {
   676     return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
   677 }
   678 
   679 SDL_bool
   680 SDL_HasSSE(void)
   681 {
   682     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
   683 }
   684 
   685 SDL_bool
   686 SDL_HasSSE2(void)
   687 {
   688     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
   689 }
   690 
   691 SDL_bool
   692 SDL_HasSSE3(void)
   693 {
   694     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
   695 }
   696 
   697 SDL_bool
   698 SDL_HasSSE41(void)
   699 {
   700     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
   701 }
   702 
   703 SDL_bool
   704 SDL_HasSSE42(void)
   705 {
   706     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
   707 }
   708 
   709 SDL_bool
   710 SDL_HasAVX(void)
   711 {
   712     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
   713 }
   714 
   715 SDL_bool
   716 SDL_HasAVX2(void)
   717 {
   718     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
   719 }
   720 
   721 SDL_bool
   722 SDL_HasAVX512F(void)
   723 {
   724     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
   725 }
   726 
   727 SDL_bool
   728 SDL_HasNEON(void)
   729 {
   730     return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
   731 }
   732 
   733 static int SDL_SystemRAM = 0;
   734 
   735 int
   736 SDL_GetSystemRAM(void)
   737 {
   738     if (!SDL_SystemRAM) {
   739 #ifndef SDL_CPUINFO_DISABLED
   740 #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
   741         if (SDL_SystemRAM <= 0) {
   742             SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
   743         }
   744 #endif
   745 #ifdef HAVE_SYSCTLBYNAME
   746         if (SDL_SystemRAM <= 0) {
   747 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__)
   748 #ifdef HW_REALMEM
   749             int mib[2] = {CTL_HW, HW_REALMEM};
   750 #else
   751             /* might only report up to 2 GiB */
   752             int mib[2] = {CTL_HW, HW_PHYSMEM};
   753 #endif /* HW_REALMEM */
   754 #else
   755             int mib[2] = {CTL_HW, HW_MEMSIZE};
   756 #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
   757             Uint64 memsize = 0;
   758             size_t len = sizeof(memsize);
   759             
   760             if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
   761                 SDL_SystemRAM = (int)(memsize / (1024*1024));
   762             }
   763         }
   764 #endif
   765 #ifdef __WIN32__
   766         if (SDL_SystemRAM <= 0) {
   767             MEMORYSTATUSEX stat;
   768             stat.dwLength = sizeof(stat);
   769             if (GlobalMemoryStatusEx(&stat)) {
   770                 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
   771             }
   772         }
   773 #endif
   774 #ifdef __OS2__
   775         if (SDL_SystemRAM <= 0) {
   776             Uint32 sysram = 0;
   777             DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4);
   778             SDL_SystemRAM = (int) (sysram / 0x100000U);
   779         }
   780 #endif
   781 #endif
   782     }
   783     return SDL_SystemRAM;
   784 }
   785 
   786 
   787 size_t
   788 SDL_SIMDGetAlignment(void)
   789 {
   790     if (SDL_SIMDAlignment == 0xFFFFFFFF) {
   791         SDL_GetCPUFeatures();  /* make sure this has been calculated */
   792     }
   793     SDL_assert(SDL_SIMDAlignment != 0);
   794     return SDL_SIMDAlignment;
   795 }
   796 
   797 void *
   798 SDL_SIMDAlloc(const size_t len)
   799 {
   800     const size_t alignment = SDL_SIMDGetAlignment();
   801     const size_t padding = alignment - (len % alignment);
   802     const size_t padded = (padding != alignment) ? (len + padding) : len;
   803     Uint8 *retval = NULL;
   804     Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
   805     if (ptr) {
   806         /* store the actual malloc pointer right before our aligned pointer. */
   807         retval = ptr + sizeof (void *);
   808         retval += alignment - (((size_t) retval) % alignment);
   809         *(((void **) retval) - 1) = ptr;
   810     }
   811     return retval;
   812 }
   813 
   814 void
   815 SDL_SIMDFree(void *ptr)
   816 {
   817     if (ptr) {
   818         void **realptr = (void **) ptr;
   819         realptr--;
   820         SDL_free(*(((void **) ptr) - 1));
   821     }
   822 }
   823 
   824 
   825 #ifdef TEST_MAIN
   826 
   827 #include <stdio.h>
   828 
   829 int
   830 main()
   831 {
   832     printf("CPU count: %d\n", SDL_GetCPUCount());
   833     printf("CPU type: %s\n", SDL_GetCPUType());
   834     printf("CPU name: %s\n", SDL_GetCPUName());
   835     printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
   836     printf("RDTSC: %d\n", SDL_HasRDTSC());
   837     printf("Altivec: %d\n", SDL_HasAltiVec());
   838     printf("MMX: %d\n", SDL_HasMMX());
   839     printf("3DNow: %d\n", SDL_Has3DNow());
   840     printf("SSE: %d\n", SDL_HasSSE());
   841     printf("SSE2: %d\n", SDL_HasSSE2());
   842     printf("SSE3: %d\n", SDL_HasSSE3());
   843     printf("SSE4.1: %d\n", SDL_HasSSE41());
   844     printf("SSE4.2: %d\n", SDL_HasSSE42());
   845     printf("AVX: %d\n", SDL_HasAVX());
   846     printf("AVX2: %d\n", SDL_HasAVX2());
   847     printf("AVX-512F: %d\n", SDL_HasAVX512F());
   848     printf("NEON: %d\n", SDL_HasNEON());
   849     printf("RAM: %d MB\n", SDL_GetSystemRAM());
   850     return 0;
   851 }
   852 
   853 #endif /* TEST_MAIN */
   854 
   855 /* vi: set ts=4 sw=4 expandtab: */