From eb1c6044b2d5b657f9fbffff720c218db35f439f Mon Sep 17 00:00:00 2001 From: Sam Lantinga Date: Fri, 11 Jul 2014 22:02:50 -0700 Subject: [PATCH] Fixed bug in AVX detection and added AVX2 detection --- include/SDL_cpuinfo.h | 5 ++ src/cpuinfo/SDL_cpuinfo.c | 98 ++++++++++++++++++++++++++----- src/dynapi/SDL_dynapi_overrides.h | 1 + src/dynapi/SDL_dynapi_procs.h | 1 + 4 files changed, 89 insertions(+), 16 deletions(-) diff --git a/include/SDL_cpuinfo.h b/include/SDL_cpuinfo.h index 1f6efd384a2db..5b2c7a4591af3 100644 --- a/include/SDL_cpuinfo.h +++ b/include/SDL_cpuinfo.h @@ -139,6 +139,11 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE42(void); */ extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX(void); +/** + * This function returns true if the CPU has AVX2 features. + */ +extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX2(void); + /** * This function returns the amount of RAM configured in the system, in MB. */ diff --git a/src/cpuinfo/SDL_cpuinfo.c b/src/cpuinfo/SDL_cpuinfo.c index ae93a2f74c130..94a682635fe23 100644 --- a/src/cpuinfo/SDL_cpuinfo.c +++ b/src/cpuinfo/SDL_cpuinfo.c @@ -60,6 +60,7 @@ #define CPU_HAS_SSE41 0x00000100 #define CPU_HAS_SSE42 0x00000200 #define CPU_HAS_AVX 0x00000400 +#define CPU_HAS_AVX2 0x00000800 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__ /* This is the brute force way of detecting instruction sets... @@ -73,7 +74,7 @@ illegal_instruction(int sig) } #endif /* HAVE_SETJMP */ -static SDL_INLINE int +static int CPU_haveCPUID(void) { int has_CPUID = 0; @@ -172,6 +173,7 @@ CPU_haveCPUID(void) #define cpuid(func, a, b, c, d) \ __asm__ __volatile__ ( \ " pushl %%ebx \n" \ +" xorl %%ecx,%%ecx \n" \ " cpuid \n" \ " movl %%ebx, %%esi \n" \ " popl %%ebx \n" : \ @@ -180,6 +182,7 @@ CPU_haveCPUID(void) #define cpuid(func, a, b, c, d) \ __asm__ __volatile__ ( \ " pushq %%rbx \n" \ +" xorq %%rcx,%%rcx \n" \ " cpuid \n" \ " movq %%rbx, %%rsi \n" \ " popq %%rbx \n" : \ @@ -188,6 +191,7 @@ CPU_haveCPUID(void) #define cpuid(func, a, b, c, d) \ __asm { \ __asm mov eax, func \ + __asm xor ecx, ecx \ __asm cpuid \ __asm mov a, eax \ __asm mov b, ebx \ @@ -209,7 +213,7 @@ CPU_haveCPUID(void) a = b = c = d = 0 #endif -static SDL_INLINE int +static int CPU_getCPUIDFeatures(void) { int features = 0; @@ -223,7 +227,41 @@ CPU_getCPUIDFeatures(void) return features; } -static SDL_INLINE int +static SDL_bool +CPU_OSSavesYMM(void) +{ + int a, b, c, d; + + /* Check to make sure we can call xgetbv */ + cpuid(0, a, b, c, d); + if (a < 1) { + return SDL_FALSE; + } + cpuid(1, a, b, c, d); + if (!(c & 0x08000000)) { + return SDL_FALSE; + } + + /* Call xgetbv to see if YMM register state is saved */ + a = 0; +#if defined(__GNUC__) && (defined(i386) || defined(__x86_64__)) + asm(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx"); +#elif defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */ + a = (int)_xgetbv(0); +#elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) + __asm + { + xor ecx, ecx + _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 + mov a, xcr0 + } +#else +#error Need xgetbv implementation! +#endif + return ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE; +} + +static int CPU_haveRDTSC(void) { if (CPU_haveCPUID()) { @@ -232,7 +270,7 @@ CPU_haveRDTSC(void) return 0; } -static SDL_INLINE int +static int CPU_haveAltiVec(void) { volatile int altivec = 0; @@ -259,7 +297,7 @@ CPU_haveAltiVec(void) return altivec; } -static SDL_INLINE int +static int CPU_haveMMX(void) { if (CPU_haveCPUID()) { @@ -268,7 +306,7 @@ CPU_haveMMX(void) return 0; } -static SDL_INLINE int +static int CPU_have3DNow(void) { if (CPU_haveCPUID()) { @@ -283,7 +321,7 @@ CPU_have3DNow(void) return 0; } -static SDL_INLINE int +static int CPU_haveSSE(void) { if (CPU_haveCPUID()) { @@ -292,7 +330,7 @@ CPU_haveSSE(void) return 0; } -static SDL_INLINE int +static int CPU_haveSSE2(void) { if (CPU_haveCPUID()) { @@ -301,7 +339,7 @@ CPU_haveSSE2(void) return 0; } -static SDL_INLINE int +static int CPU_haveSSE3(void) { if (CPU_haveCPUID()) { @@ -316,13 +354,13 @@ CPU_haveSSE3(void) return 0; } -static SDL_INLINE int +static int CPU_haveSSE41(void) { if (CPU_haveCPUID()) { int a, b, c, d; - cpuid(1, a, b, c, d); + cpuid(0, a, b, c, d); if (a >= 1) { cpuid(1, a, b, c, d); return (c & 0x00080000); @@ -331,13 +369,13 @@ CPU_haveSSE41(void) return 0; } -static SDL_INLINE int +static int CPU_haveSSE42(void) { if (CPU_haveCPUID()) { int a, b, c, d; - cpuid(1, a, b, c, d); + cpuid(0, a, b, c, d); if (a >= 1) { cpuid(1, a, b, c, d); return (c & 0x00100000); @@ -346,13 +384,13 @@ CPU_haveSSE42(void) return 0; } -static SDL_INLINE int +static int CPU_haveAVX(void) { - if (CPU_haveCPUID()) { + if (CPU_haveCPUID() && CPU_OSSavesYMM()) { int a, b, c, d; - cpuid(1, a, b, c, d); + cpuid(0, a, b, c, d); if (a >= 1) { cpuid(1, a, b, c, d); return (c & 0x10000000); @@ -361,6 +399,21 @@ CPU_haveAVX(void) return 0; } +static int +CPU_haveAVX2(void) +{ + if (CPU_haveCPUID() && CPU_OSSavesYMM()) { + int a, b, c, d; + + cpuid(0, a, b, c, d); + if (a >= 7) { + cpuid(7, a, b, c, d); + return (b & 0x00000020); + } + } + return 0; +} + static int SDL_CPUCount = 0; int @@ -560,6 +613,9 @@ SDL_GetCPUFeatures(void) if (CPU_haveAVX()) { SDL_CPUFeatures |= CPU_HAS_AVX; } + if (CPU_haveAVX2()) { + SDL_CPUFeatures |= CPU_HAS_AVX2; + } } return SDL_CPUFeatures; } @@ -654,6 +710,15 @@ SDL_HasAVX(void) return SDL_FALSE; } +SDL_bool +SDL_HasAVX2(void) +{ + if (SDL_GetCPUFeatures() & CPU_HAS_AVX2) { + return SDL_TRUE; + } + return SDL_FALSE; +} + static int SDL_SystemRAM = 0; int @@ -720,6 +785,7 @@ main() printf("SSE4.1: %d\n", SDL_HasSSE41()); printf("SSE4.2: %d\n", SDL_HasSSE42()); printf("AVX: %d\n", SDL_HasAVX()); + printf("AVX2: %d\n", SDL_HasAVX2()); printf("RAM: %d MB\n", SDL_GetSystemRAM()); return 0; } diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h index 9aac2c963357a..79d6e8ba3fbdd 100644 --- a/src/dynapi/SDL_dynapi_overrides.h +++ b/src/dynapi/SDL_dynapi_overrides.h @@ -587,3 +587,4 @@ #define SDL_CaptureMouse SDL_CaptureMouse_REAL #define SDL_SetWindowHitTest SDL_SetWindowHitTest_REAL #define SDL_GetGlobalMouseState SDL_GetGlobalMouseState_REAL +#define SDL_HasAVX2 SDL_HasAVX2_REAL diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h index bdb3cc4c6cfdf..0112769407737 100644 --- a/src/dynapi/SDL_dynapi_procs.h +++ b/src/dynapi/SDL_dynapi_procs.h @@ -619,3 +619,4 @@ SDL_DYNAPI_PROC(float,SDL_tanf,(float a),(a),return) SDL_DYNAPI_PROC(int,SDL_CaptureMouse,(SDL_bool a),(a),return) SDL_DYNAPI_PROC(int,SDL_SetWindowHitTest,(SDL_Window *a, SDL_HitTest b, void *c),(a,b,c),return) SDL_DYNAPI_PROC(Uint32,SDL_GetGlobalMouseState,(int *a, int *b),(a,b),return) +SDL_DYNAPI_PROC(SDL_bool,SDL_HasAVX2,(void),(),return)