src/cpuinfo/SDL_cpuinfo.c
author Ozkan Sezer
Sat, 11 Jul 2020 08:10:02 +0300
changeset 13945 465afae5eb7e
parent 13922 23d4515c00b4
child 14014 1b8e12692202
permissions -rw-r--r--
fix bug #5228 -- Add AltiVec detection for FreeBSD.

thanks Piotr Kubaj.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #ifdef TEST_MAIN
    22 #include "SDL_config.h"
    23 #else
    24 #include "../SDL_internal.h"
    25 #endif
    26 
    27 #if defined(__WIN32__) || defined(__WINRT__)
    28 #include "../core/windows/SDL_windows.h"
    29 #endif
    30 #if defined(__OS2__)
    31 #define INCL_DOS
    32 #include <os2.h>
    33 #ifndef QSV_NUMPROCESSORS
    34 #define QSV_NUMPROCESSORS 26
    35 #endif
    36 #endif
    37 
    38 /* CPU feature detection for SDL */
    39 
    40 #include "SDL_cpuinfo.h"
    41 #include "SDL_assert.h"
    42 
    43 #ifdef HAVE_SYSCONF
    44 #include <unistd.h>
    45 #endif
    46 #ifdef HAVE_SYSCTLBYNAME
    47 #include <sys/types.h>
    48 #include <sys/sysctl.h>
    49 #endif
    50 #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
    51 #include <sys/sysctl.h>         /* For AltiVec check */
    52 #elif (defined(__OpenBSD__) || defined(__FreeBSD__)) && defined(__powerpc__)
    53 #include <sys/param.h>
    54 #include <sys/sysctl.h> /* For AltiVec check */
    55 #include <machine/cpu.h>
    56 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
    57 #include <signal.h>
    58 #include <setjmp.h>
    59 #endif
    60 
    61 #if defined(__QNXNTO__)
    62 #include <sys/syspage.h>
    63 #endif
    64 
    65 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH)
    66 /*#include <asm/hwcap.h>*/
    67 #ifndef AT_HWCAP
    68 #define AT_HWCAP 16
    69 #endif
    70 #ifndef AT_PLATFORM
    71 #define AT_PLATFORM 15
    72 #endif
    73 /* Prevent compilation error when including elf.h would also try to define AT_* as an enum */
    74 #ifndef AT_NULL
    75 #define AT_NULL 0
    76 #endif
    77 #ifndef HWCAP_NEON
    78 #define HWCAP_NEON (1 << 12)
    79 #endif
    80 #if defined HAVE_GETAUXVAL
    81 #include <sys/auxv.h>
    82 #else
    83 #include <fcntl.h>
    84 #endif
    85 #endif
    86 
    87 #if defined(__ANDROID__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
    88 #if __ARM_ARCH < 8
    89 #include <cpu-features.h>
    90 #endif
    91 #endif
    92 
    93 #ifdef __RISCOS__
    94 #include <kernel.h>
    95 #include <swis.h>
    96 #endif
    97 
    98 #define CPU_HAS_RDTSC   (1 << 0)
    99 #define CPU_HAS_ALTIVEC (1 << 1)
   100 #define CPU_HAS_MMX     (1 << 2)
   101 #define CPU_HAS_3DNOW   (1 << 3)
   102 #define CPU_HAS_SSE     (1 << 4)
   103 #define CPU_HAS_SSE2    (1 << 5)
   104 #define CPU_HAS_SSE3    (1 << 6)
   105 #define CPU_HAS_SSE41   (1 << 7)
   106 #define CPU_HAS_SSE42   (1 << 8)
   107 #define CPU_HAS_AVX     (1 << 9)
   108 #define CPU_HAS_AVX2    (1 << 10)
   109 #define CPU_HAS_NEON    (1 << 11)
   110 #define CPU_HAS_AVX512F (1 << 12)
   111 #define CPU_HAS_ARM_SIMD (1 << 13)
   112 
   113 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
   114 /* This is the brute force way of detecting instruction sets...
   115    the idea is borrowed from the libmpeg2 library - thanks!
   116  */
   117 static jmp_buf jmpbuf;
   118 static void
   119 illegal_instruction(int sig)
   120 {
   121     longjmp(jmpbuf, 1);
   122 }
   123 #endif /* HAVE_SETJMP */
   124 
   125 static int
   126 CPU_haveCPUID(void)
   127 {
   128     int has_CPUID = 0;
   129 
   130 /* *INDENT-OFF* */
   131 #ifndef SDL_CPUINFO_DISABLED
   132 #if defined(__GNUC__) && defined(i386)
   133     __asm__ (
   134 "        pushfl                      # Get original EFLAGS             \n"
   135 "        popl    %%eax                                                 \n"
   136 "        movl    %%eax,%%ecx                                           \n"
   137 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
   138 "        pushl   %%eax               # Save new EFLAGS value on stack  \n"
   139 "        popfl                       # Replace current EFLAGS value    \n"
   140 "        pushfl                      # Get new EFLAGS                  \n"
   141 "        popl    %%eax               # Store new EFLAGS in EAX         \n"
   142 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
   143 "        jz      1f                  # Processor=80486                 \n"
   144 "        movl    $1,%0               # We have CPUID support           \n"
   145 "1:                                                                    \n"
   146     : "=m" (has_CPUID)
   147     :
   148     : "%eax", "%ecx"
   149     );
   150 #elif defined(__GNUC__) && defined(__x86_64__)
   151 /* Technically, if this is being compiled under __x86_64__ then it has 
   152    CPUid by definition.  But it's nice to be able to prove it.  :)      */
   153     __asm__ (
   154 "        pushfq                      # Get original EFLAGS             \n"
   155 "        popq    %%rax                                                 \n"
   156 "        movq    %%rax,%%rcx                                           \n"
   157 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
   158 "        pushq   %%rax               # Save new EFLAGS value on stack  \n"
   159 "        popfq                       # Replace current EFLAGS value    \n"
   160 "        pushfq                      # Get new EFLAGS                  \n"
   161 "        popq    %%rax               # Store new EFLAGS in EAX         \n"
   162 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
   163 "        jz      1f                  # Processor=80486                 \n"
   164 "        movl    $1,%0               # We have CPUID support           \n"
   165 "1:                                                                    \n"
   166     : "=m" (has_CPUID)
   167     :
   168     : "%rax", "%rcx"
   169     );
   170 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   171     __asm {
   172         pushfd                      ; Get original EFLAGS
   173         pop     eax
   174         mov     ecx, eax
   175         xor     eax, 200000h        ; Flip ID bit in EFLAGS
   176         push    eax                 ; Save new EFLAGS value on stack
   177         popfd                       ; Replace current EFLAGS value
   178         pushfd                      ; Get new EFLAGS
   179         pop     eax                 ; Store new EFLAGS in EAX
   180         xor     eax, ecx            ; Can not toggle ID bit,
   181         jz      done                ; Processor=80486
   182         mov     has_CPUID,1         ; We have CPUID support
   183 done:
   184     }
   185 #elif defined(_MSC_VER) && defined(_M_X64)
   186     has_CPUID = 1;
   187 #elif defined(__sun) && defined(__i386)
   188     __asm (
   189 "       pushfl                 \n"
   190 "       popl    %eax           \n"
   191 "       movl    %eax,%ecx      \n"
   192 "       xorl    $0x200000,%eax \n"
   193 "       pushl   %eax           \n"
   194 "       popfl                  \n"
   195 "       pushfl                 \n"
   196 "       popl    %eax           \n"
   197 "       xorl    %ecx,%eax      \n"
   198 "       jz      1f             \n"
   199 "       movl    $1,-8(%ebp)    \n"
   200 "1:                            \n"
   201     );
   202 #elif defined(__sun) && defined(__amd64)
   203     __asm (
   204 "       pushfq                 \n"
   205 "       popq    %rax           \n"
   206 "       movq    %rax,%rcx      \n"
   207 "       xorl    $0x200000,%eax \n"
   208 "       pushq   %rax           \n"
   209 "       popfq                  \n"
   210 "       pushfq                 \n"
   211 "       popq    %rax           \n"
   212 "       xorl    %ecx,%eax      \n"
   213 "       jz      1f             \n"
   214 "       movl    $1,-8(%rbp)    \n"
   215 "1:                            \n"
   216     );
   217 #endif
   218 #endif
   219 /* *INDENT-ON* */
   220     return has_CPUID;
   221 }
   222 
   223 #if defined(__GNUC__) && defined(i386)
   224 #define cpuid(func, a, b, c, d) \
   225     __asm__ __volatile__ ( \
   226 "        pushl %%ebx        \n" \
   227 "        xorl %%ecx,%%ecx   \n" \
   228 "        cpuid              \n" \
   229 "        movl %%ebx, %%esi  \n" \
   230 "        popl %%ebx         \n" : \
   231             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
   232 #elif defined(__GNUC__) && defined(__x86_64__)
   233 #define cpuid(func, a, b, c, d) \
   234     __asm__ __volatile__ ( \
   235 "        pushq %%rbx        \n" \
   236 "        xorq %%rcx,%%rcx   \n" \
   237 "        cpuid              \n" \
   238 "        movq %%rbx, %%rsi  \n" \
   239 "        popq %%rbx         \n" : \
   240             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
   241 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   242 #define cpuid(func, a, b, c, d) \
   243     __asm { \
   244         __asm mov eax, func \
   245         __asm xor ecx, ecx \
   246         __asm cpuid \
   247         __asm mov a, eax \
   248         __asm mov b, ebx \
   249         __asm mov c, ecx \
   250         __asm mov d, edx \
   251 }
   252 #elif defined(_MSC_VER) && defined(_M_X64)
   253 #define cpuid(func, a, b, c, d) \
   254 { \
   255     int CPUInfo[4]; \
   256     __cpuid(CPUInfo, func); \
   257     a = CPUInfo[0]; \
   258     b = CPUInfo[1]; \
   259     c = CPUInfo[2]; \
   260     d = CPUInfo[3]; \
   261 }
   262 #else
   263 #define cpuid(func, a, b, c, d) \
   264     do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
   265 #endif
   266 
   267 static int CPU_CPUIDFeatures[4];
   268 static int CPU_CPUIDMaxFunction = 0;
   269 static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
   270 static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
   271 
   272 static void
   273 CPU_calcCPUIDFeatures(void)
   274 {
   275     static SDL_bool checked = SDL_FALSE;
   276     if (!checked) {
   277         checked = SDL_TRUE;
   278         if (CPU_haveCPUID()) {
   279             int a, b, c, d;
   280             cpuid(0, a, b, c, d);
   281             CPU_CPUIDMaxFunction = a;
   282             if (CPU_CPUIDMaxFunction >= 1) {
   283                 cpuid(1, a, b, c, d);
   284                 CPU_CPUIDFeatures[0] = a;
   285                 CPU_CPUIDFeatures[1] = b;
   286                 CPU_CPUIDFeatures[2] = c;
   287                 CPU_CPUIDFeatures[3] = d;
   288 
   289                 /* Check to make sure we can call xgetbv */
   290                 if (c & 0x08000000) {
   291                     /* Call xgetbv to see if YMM (etc) register state is saved */
   292 #if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
   293                     __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
   294 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
   295                     a = (int)_xgetbv(0);
   296 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
   297                     __asm
   298                     {
   299                         xor ecx, ecx
   300                         _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
   301                         mov a, eax
   302                     }
   303 #endif
   304                     CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
   305                     CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
   306                 }
   307             }
   308         }
   309     }
   310 }
   311 
   312 static int
   313 CPU_haveAltiVec(void)
   314 {
   315     volatile int altivec = 0;
   316 #ifndef SDL_CPUINFO_DISABLED
   317 #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__)) || (defined(__FreeBSD__) && defined(__powerpc__))
   318 #ifdef __OpenBSD__
   319     int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
   320 #elif defined(__FreeBSD__)
   321     int selectors[2] = { CTL_HW, PPC_FEATURE_HAS_ALTIVEC };
   322 #else
   323     int selectors[2] = { CTL_HW, HW_VECTORUNIT };
   324 #endif
   325     int hasVectorUnit = 0;
   326     size_t length = sizeof(hasVectorUnit);
   327     int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
   328     if (0 == error)
   329         altivec = (hasVectorUnit != 0);
   330 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
   331     void (*handler) (int sig);
   332     handler = signal(SIGILL, illegal_instruction);
   333     if (setjmp(jmpbuf) == 0) {
   334         asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
   335         altivec = 1;
   336     }
   337     signal(SIGILL, handler);
   338 #endif
   339 #endif
   340     return altivec;
   341 }
   342 
   343 #if defined(__ARM_ARCH) && (__ARM_ARCH >= 6)
   344 static int
   345 CPU_haveARMSIMD(void)
   346 {
   347 	return 1;
   348 }
   349 
   350 #elif !defined(__arm__)
   351 static int
   352 CPU_haveARMSIMD(void)
   353 {
   354 	return 0;
   355 }
   356 
   357 #elif defined(__LINUX__)
   358 #include <unistd.h>
   359 #include <sys/types.h>
   360 #include <sys/stat.h>
   361 #include <fcntl.h>
   362 #include <elf.h>
   363 
   364 static int
   365 CPU_haveARMSIMD(void)
   366 {
   367     int arm_simd = 0;
   368     int fd;
   369 
   370     fd = open("/proc/self/auxv", O_RDONLY);
   371     if (fd >= 0)
   372     {
   373         Elf32_auxv_t aux;
   374         while (read(fd, &aux, sizeof aux) == sizeof aux)
   375         {
   376             if (aux.a_type == AT_PLATFORM)
   377             {
   378                 const char *plat = (const char *) aux.a_un.a_val;
   379                 if (plat) {
   380                     arm_simd = strncmp(plat, "v6l", 3) == 0 ||
   381                                strncmp(plat, "v7l", 3) == 0;
   382                 }
   383             }
   384         }
   385         close(fd);
   386     }
   387     return arm_simd;
   388 }
   389 
   390 #elif defined(__RISCOS__)
   391 
   392 static int
   393 CPU_haveARMSIMD(void)
   394 {
   395 	_kernel_swi_regs regs;
   396 	regs.r[0] = 0;
   397 	if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL)
   398 		return 0;
   399 
   400 	if (!(regs.r[0] & (1<<31)))
   401 		return 0;
   402 
   403 	regs.r[0] = 34;
   404 	regs.r[1] = 29;
   405 	if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL)
   406 		return 0;
   407 
   408 	return regs.r[0];
   409 }
   410 
   411 #else
   412 static int
   413 CPU_haveARMSIMD(void)
   414 {
   415 #warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me.
   416     return 0;
   417 }
   418 #endif
   419 
   420 #if defined(__LINUX__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
   421 static int
   422 readProcAuxvForNeon(void)
   423 {
   424     int neon = 0;
   425     int kv[2];
   426     const int fd = open("/proc/self/auxv", O_RDONLY);
   427     if (fd != -1) {
   428         while (read(fd, kv, sizeof (kv)) == sizeof (kv)) {
   429             if (kv[0] == AT_HWCAP) {
   430                 neon = ((kv[1] & HWCAP_NEON) == HWCAP_NEON);
   431                 break;
   432             }
   433         }
   434         close(fd);
   435     }
   436     return neon;
   437 }
   438 #endif
   439 
   440 static int
   441 CPU_haveNEON(void)
   442 {
   443 /* The way you detect NEON is a privileged instruction on ARM, so you have
   444    query the OS kernel in a platform-specific way. :/ */
   445 #if defined(SDL_CPUINFO_DISABLED)
   446    return 0; /* disabled */
   447 #elif (defined(__WINDOWS__) || defined(__WINRT__)) && (defined(_M_ARM) || defined(_M_ARM64))
   448 /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */
   449 /* Seems to have been removed */
   450 #  if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
   451 #    define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
   452 #  endif
   453 /* All WinRT ARM devices are required to support NEON, but just in case. */
   454     return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
   455 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
   456     return 1;  /* ARMv8 always has non-optional NEON support. */
   457 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
   458     /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
   459     return 1;  /* all Apple ARMv7 chips and later have NEON. */
   460 #elif defined(__APPLE__)
   461     return 0;  /* assume anything else from Apple doesn't have NEON. */
   462 #elif defined(__OpenBSD__)
   463     return 1;  /* OpenBSD only supports ARMv7 CPUs that have NEON. */
   464 #elif !defined(__arm__)
   465     return 0;  /* not an ARM CPU at all. */
   466 #elif defined(__QNXNTO__)
   467     return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
   468 #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
   469     return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
   470 #elif defined(__LINUX__)
   471     return readProcAuxvForNeon();
   472 #elif defined(__ANDROID__)
   473     /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
   474     {
   475         AndroidCpuFamily cpu_family = android_getCpuFamily();
   476         if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
   477             uint64_t cpu_features = android_getCpuFeatures();
   478             if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) {
   479                 return 1;
   480             }
   481         }
   482         return 0;
   483     }
   484 #elif defined(__RISCOS__)
   485     /* Use the VFPSupport_Features SWI to access the MVFR registers */
   486     {
   487         _kernel_swi_regs regs;
   488 	regs.r[0] = 0;
   489         if (_kernel_swi(VFPSupport_Features, &regs, &regs) == NULL) {
   490             if ((regs.r[2] & 0xFFF000) == 0x111000) {
   491                 return 1;
   492             }
   493         }
   494         return 0;
   495     }
   496 #else
   497 #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
   498     return 0;
   499 #endif
   500 }
   501 
   502 static int
   503 CPU_have3DNow(void)
   504 {
   505     if (CPU_CPUIDMaxFunction > 0) {  /* that is, do we have CPUID at all? */
   506         int a, b, c, d;
   507         cpuid(0x80000000, a, b, c, d);
   508         if (a >= 0x80000001) {
   509             cpuid(0x80000001, a, b, c, d);
   510             return (d & 0x80000000);
   511         }
   512     }
   513     return 0;
   514 }
   515 
   516 #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
   517 #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
   518 #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
   519 #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
   520 #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
   521 #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
   522 #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
   523 #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
   524 
   525 static int
   526 CPU_haveAVX2(void)
   527 {
   528     if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
   529         int a, b, c, d;
   530         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
   531         cpuid(7, a, b, c, d);
   532         return (b & 0x00000020);
   533     }
   534     return 0;
   535 }
   536 
   537 static int
   538 CPU_haveAVX512F(void)
   539 {
   540     if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
   541         int a, b, c, d;
   542         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
   543         cpuid(7, a, b, c, d);
   544         return (b & 0x00010000);
   545     }
   546     return 0;
   547 }
   548 
   549 static int SDL_CPUCount = 0;
   550 
   551 int
   552 SDL_GetCPUCount(void)
   553 {
   554     if (!SDL_CPUCount) {
   555 #ifndef SDL_CPUINFO_DISABLED
   556 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
   557         if (SDL_CPUCount <= 0) {
   558             SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
   559         }
   560 #endif
   561 #ifdef HAVE_SYSCTLBYNAME
   562         if (SDL_CPUCount <= 0) {
   563             size_t size = sizeof(SDL_CPUCount);
   564             sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
   565         }
   566 #endif
   567 #ifdef __WIN32__
   568         if (SDL_CPUCount <= 0) {
   569             SYSTEM_INFO info;
   570             GetSystemInfo(&info);
   571             SDL_CPUCount = info.dwNumberOfProcessors;
   572         }
   573 #endif
   574 #ifdef __OS2__
   575         if (SDL_CPUCount <= 0) {
   576             DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS,
   577                             &SDL_CPUCount, sizeof(SDL_CPUCount) );
   578         }
   579 #endif
   580 #endif
   581         /* There has to be at least 1, right? :) */
   582         if (SDL_CPUCount <= 0) {
   583             SDL_CPUCount = 1;
   584         }
   585     }
   586     return SDL_CPUCount;
   587 }
   588 
   589 /* Oh, such a sweet sweet trick, just not very useful. :) */
   590 static const char *
   591 SDL_GetCPUType(void)
   592 {
   593     static char SDL_CPUType[13];
   594 
   595     if (!SDL_CPUType[0]) {
   596         int i = 0;
   597 
   598         CPU_calcCPUIDFeatures();
   599         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
   600             int a, b, c, d;
   601             cpuid(0x00000000, a, b, c, d);
   602             (void) a;
   603             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   604             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   605             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
   606             SDL_CPUType[i++] = (char)(b & 0xff);
   607 
   608             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   609             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   610             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
   611             SDL_CPUType[i++] = (char)(d & 0xff);
   612 
   613             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   614             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   615             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
   616             SDL_CPUType[i++] = (char)(c & 0xff);
   617         }
   618         if (!SDL_CPUType[0]) {
   619             SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
   620         }
   621     }
   622     return SDL_CPUType;
   623 }
   624 
   625 
   626 #ifdef TEST_MAIN  /* !!! FIXME: only used for test at the moment. */
   627 static const char *
   628 SDL_GetCPUName(void)
   629 {
   630     static char SDL_CPUName[48];
   631 
   632     if (!SDL_CPUName[0]) {
   633         int i = 0;
   634         int a, b, c, d;
   635 
   636         CPU_calcCPUIDFeatures();
   637         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
   638             cpuid(0x80000000, a, b, c, d);
   639             if (a >= 0x80000004) {
   640                 cpuid(0x80000002, a, b, c, d);
   641                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   642                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   643                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   644                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   645                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   646                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   647                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   648                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   649                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   650                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   651                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   652                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   653                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   654                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   655                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   656                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   657                 cpuid(0x80000003, a, b, c, d);
   658                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   659                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   660                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   661                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   662                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   663                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   664                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   665                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   666                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   667                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   668                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   669                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   670                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   671                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   672                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   673                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   674                 cpuid(0x80000004, a, b, c, d);
   675                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   676                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   677                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   678                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
   679                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   680                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   681                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   682                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
   683                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   684                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   685                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   686                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
   687                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   688                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   689                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   690                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
   691             }
   692         }
   693         if (!SDL_CPUName[0]) {
   694             SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
   695         }
   696     }
   697     return SDL_CPUName;
   698 }
   699 #endif
   700 
   701 int
   702 SDL_GetCPUCacheLineSize(void)
   703 {
   704     const char *cpuType = SDL_GetCPUType();
   705     int a, b, c, d;
   706     (void) a; (void) b; (void) c; (void) d;
   707     if (SDL_strcmp(cpuType, "GenuineIntel") == 0) {
   708         cpuid(0x00000001, a, b, c, d);
   709         return (((b >> 8) & 0xff) * 8);
   710     } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) {
   711         cpuid(0x80000005, a, b, c, d);
   712         return (c & 0xff);
   713     } else {
   714         /* Just make a guess here... */
   715         return SDL_CACHELINE_SIZE;
   716     }
   717 }
   718 
   719 static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
   720 static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
   721 
   722 static Uint32
   723 SDL_GetCPUFeatures(void)
   724 {
   725     if (SDL_CPUFeatures == 0xFFFFFFFF) {
   726         CPU_calcCPUIDFeatures();
   727         SDL_CPUFeatures = 0;
   728         SDL_SIMDAlignment = sizeof(void *);  /* a good safe base value */
   729         if (CPU_haveRDTSC()) {
   730             SDL_CPUFeatures |= CPU_HAS_RDTSC;
   731         }
   732         if (CPU_haveAltiVec()) {
   733             SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
   734             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   735         }
   736         if (CPU_haveMMX()) {
   737             SDL_CPUFeatures |= CPU_HAS_MMX;
   738             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
   739         }
   740         if (CPU_have3DNow()) {
   741             SDL_CPUFeatures |= CPU_HAS_3DNOW;
   742             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
   743         }
   744         if (CPU_haveSSE()) {
   745             SDL_CPUFeatures |= CPU_HAS_SSE;
   746             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   747         }
   748         if (CPU_haveSSE2()) {
   749             SDL_CPUFeatures |= CPU_HAS_SSE2;
   750             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   751         }
   752         if (CPU_haveSSE3()) {
   753             SDL_CPUFeatures |= CPU_HAS_SSE3;
   754             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   755         }
   756         if (CPU_haveSSE41()) {
   757             SDL_CPUFeatures |= CPU_HAS_SSE41;
   758             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   759         }
   760         if (CPU_haveSSE42()) {
   761             SDL_CPUFeatures |= CPU_HAS_SSE42;
   762             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   763         }
   764         if (CPU_haveAVX()) {
   765             SDL_CPUFeatures |= CPU_HAS_AVX;
   766             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
   767         }
   768         if (CPU_haveAVX2()) {
   769             SDL_CPUFeatures |= CPU_HAS_AVX2;
   770             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
   771         }
   772         if (CPU_haveAVX512F()) {
   773             SDL_CPUFeatures |= CPU_HAS_AVX512F;
   774             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
   775         }
   776         if (CPU_haveARMSIMD()) {
   777             SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
   778             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   779         }
   780         if (CPU_haveNEON()) {
   781             SDL_CPUFeatures |= CPU_HAS_NEON;
   782             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
   783         }
   784     }
   785     return SDL_CPUFeatures;
   786 }
   787 
   788 #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
   789 
   790 SDL_bool SDL_HasRDTSC(void)
   791 {
   792     return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
   793 }
   794 
   795 SDL_bool
   796 SDL_HasAltiVec(void)
   797 {
   798     return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
   799 }
   800 
   801 SDL_bool
   802 SDL_HasMMX(void)
   803 {
   804     return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
   805 }
   806 
   807 SDL_bool
   808 SDL_Has3DNow(void)
   809 {
   810     return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
   811 }
   812 
   813 SDL_bool
   814 SDL_HasSSE(void)
   815 {
   816     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
   817 }
   818 
   819 SDL_bool
   820 SDL_HasSSE2(void)
   821 {
   822     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
   823 }
   824 
   825 SDL_bool
   826 SDL_HasSSE3(void)
   827 {
   828     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
   829 }
   830 
   831 SDL_bool
   832 SDL_HasSSE41(void)
   833 {
   834     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
   835 }
   836 
   837 SDL_bool
   838 SDL_HasSSE42(void)
   839 {
   840     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
   841 }
   842 
   843 SDL_bool
   844 SDL_HasAVX(void)
   845 {
   846     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
   847 }
   848 
   849 SDL_bool
   850 SDL_HasAVX2(void)
   851 {
   852     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
   853 }
   854 
   855 SDL_bool
   856 SDL_HasAVX512F(void)
   857 {
   858     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
   859 }
   860 
   861 SDL_bool
   862 SDL_HasARMSIMD(void)
   863 {
   864     return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD);
   865 }
   866 
   867 SDL_bool
   868 SDL_HasNEON(void)
   869 {
   870     return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
   871 }
   872 
   873 static int SDL_SystemRAM = 0;
   874 
   875 int
   876 SDL_GetSystemRAM(void)
   877 {
   878     if (!SDL_SystemRAM) {
   879 #ifndef SDL_CPUINFO_DISABLED
   880 #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
   881         if (SDL_SystemRAM <= 0) {
   882             SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
   883         }
   884 #endif
   885 #ifdef HAVE_SYSCTLBYNAME
   886         if (SDL_SystemRAM <= 0) {
   887 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__)
   888 #ifdef HW_REALMEM
   889             int mib[2] = {CTL_HW, HW_REALMEM};
   890 #else
   891             /* might only report up to 2 GiB */
   892             int mib[2] = {CTL_HW, HW_PHYSMEM};
   893 #endif /* HW_REALMEM */
   894 #else
   895             int mib[2] = {CTL_HW, HW_MEMSIZE};
   896 #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
   897             Uint64 memsize = 0;
   898             size_t len = sizeof(memsize);
   899             
   900             if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
   901                 SDL_SystemRAM = (int)(memsize / (1024*1024));
   902             }
   903         }
   904 #endif
   905 #ifdef __WIN32__
   906         if (SDL_SystemRAM <= 0) {
   907             MEMORYSTATUSEX stat;
   908             stat.dwLength = sizeof(stat);
   909             if (GlobalMemoryStatusEx(&stat)) {
   910                 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
   911             }
   912         }
   913 #endif
   914 #ifdef __OS2__
   915         if (SDL_SystemRAM <= 0) {
   916             Uint32 sysram = 0;
   917             DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4);
   918             SDL_SystemRAM = (int) (sysram / 0x100000U);
   919         }
   920 #endif
   921 #ifdef __RISCOS__
   922         if (SDL_SystemRAM <= 0) {
   923             _kernel_swi_regs regs;
   924             regs.r[0] = 0x108;
   925             if (_kernel_swi(OS_Memory, &regs, &regs) == NULL) {
   926                 SDL_SystemRAM = (int)(regs.r[1] * regs.r[2] / (1024 * 1024));
   927             }
   928         }
   929 #endif
   930 #endif
   931     }
   932     return SDL_SystemRAM;
   933 }
   934 
   935 
   936 size_t
   937 SDL_SIMDGetAlignment(void)
   938 {
   939     if (SDL_SIMDAlignment == 0xFFFFFFFF) {
   940         SDL_GetCPUFeatures();  /* make sure this has been calculated */
   941     }
   942     SDL_assert(SDL_SIMDAlignment != 0);
   943     return SDL_SIMDAlignment;
   944 }
   945 
   946 void *
   947 SDL_SIMDAlloc(const size_t len)
   948 {
   949     const size_t alignment = SDL_SIMDGetAlignment();
   950     const size_t padding = alignment - (len % alignment);
   951     const size_t padded = (padding != alignment) ? (len + padding) : len;
   952     Uint8 *retval = NULL;
   953     Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
   954     if (ptr) {
   955         /* store the actual malloc pointer right before our aligned pointer. */
   956         retval = ptr + sizeof (void *);
   957         retval += alignment - (((size_t) retval) % alignment);
   958         *(((void **) retval) - 1) = ptr;
   959     }
   960     return retval;
   961 }
   962 
   963 void *
   964 SDL_SIMDRealloc(void *mem, const size_t len)
   965 {
   966     const size_t alignment = SDL_SIMDGetAlignment();
   967     const size_t padding = alignment - (len % alignment);
   968     const size_t padded = (padding != alignment) ? (len + padding) : len;
   969     Uint8 *retval = (Uint8*) mem;
   970     void *oldmem = mem;
   971     size_t memdiff, ptrdiff;
   972     Uint8 *ptr;
   973 
   974     if (mem) {
   975         void **realptr = (void **) mem;
   976         realptr--;
   977         mem = *(((void **) mem) - 1);
   978 
   979         /* Check the delta between the real pointer and user pointer */
   980         memdiff = ((size_t) oldmem) - ((size_t) mem);
   981     }
   982 
   983     ptr = (Uint8 *) SDL_realloc(mem, padded + alignment + sizeof (void *));
   984 
   985     if (ptr == mem) {
   986         return retval; /* Pointer didn't change, nothing to do */
   987     }
   988     if (ptr == NULL) {
   989         return NULL; /* Out of memory, bail! */
   990     }
   991 
   992     /* Store the actual malloc pointer right before our aligned pointer. */
   993     retval = ptr + sizeof (void *);
   994     retval += alignment - (((size_t) retval) % alignment);
   995 
   996     /* Make sure the delta is the same! */
   997     if (mem) {
   998         ptrdiff = ((size_t) retval) - ((size_t) ptr);
   999         if (memdiff != ptrdiff) { /* Delta has changed, copy to new offset! */
  1000             oldmem = (void*) (((size_t) ptr) + memdiff);
  1001 
  1002             /* Even though the data past the old `len` is undefined, this is the
  1003              * only length value we have, and it guarantees that we copy all the
  1004              * previous memory anyhow.
  1005              */
  1006             SDL_memmove(retval, oldmem, len);
  1007         }
  1008     }
  1009 
  1010     /* Actually store the malloc pointer, finally. */
  1011     *(((void **) retval) - 1) = ptr;
  1012     return retval;
  1013 }
  1014 
  1015 void
  1016 SDL_SIMDFree(void *ptr)
  1017 {
  1018     if (ptr) {
  1019         void **realptr = (void **) ptr;
  1020         realptr--;
  1021         SDL_free(*(((void **) ptr) - 1));
  1022     }
  1023 }
  1024 
  1025 
  1026 #ifdef TEST_MAIN
  1027 
  1028 #include <stdio.h>
  1029 
  1030 int
  1031 main()
  1032 {
  1033     printf("CPU count: %d\n", SDL_GetCPUCount());
  1034     printf("CPU type: %s\n", SDL_GetCPUType());
  1035     printf("CPU name: %s\n", SDL_GetCPUName());
  1036     printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
  1037     printf("RDTSC: %d\n", SDL_HasRDTSC());
  1038     printf("Altivec: %d\n", SDL_HasAltiVec());
  1039     printf("MMX: %d\n", SDL_HasMMX());
  1040     printf("3DNow: %d\n", SDL_Has3DNow());
  1041     printf("SSE: %d\n", SDL_HasSSE());
  1042     printf("SSE2: %d\n", SDL_HasSSE2());
  1043     printf("SSE3: %d\n", SDL_HasSSE3());
  1044     printf("SSE4.1: %d\n", SDL_HasSSE41());
  1045     printf("SSE4.2: %d\n", SDL_HasSSE42());
  1046     printf("AVX: %d\n", SDL_HasAVX());
  1047     printf("AVX2: %d\n", SDL_HasAVX2());
  1048     printf("AVX-512F: %d\n", SDL_HasAVX512F());
  1049     printf("ARM SIMD: %d\n", SDL_HasARMSIMD());
  1050     printf("NEON: %d\n", SDL_HasNEON());
  1051     printf("RAM: %d MB\n", SDL_GetSystemRAM());
  1052     return 0;
  1053 }
  1054 
  1055 #endif /* TEST_MAIN */
  1056 
  1057 /* vi: set ts=4 sw=4 expandtab: */