1.1 --- a/src/cpuinfo/SDL_cpuinfo.c Wed Jul 09 01:34:40 2014 -0700
1.2 +++ b/src/cpuinfo/SDL_cpuinfo.c Fri Jul 11 22:02:50 2014 -0700
1.3 @@ -60,6 +60,7 @@
1.4 #define CPU_HAS_SSE41 0x00000100
1.5 #define CPU_HAS_SSE42 0x00000200
1.6 #define CPU_HAS_AVX 0x00000400
1.7 +#define CPU_HAS_AVX2 0x00000800
1.8
1.9 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
1.10 /* This is the brute force way of detecting instruction sets...
1.11 @@ -73,7 +74,7 @@
1.12 }
1.13 #endif /* HAVE_SETJMP */
1.14
1.15 -static SDL_INLINE int
1.16 +static int
1.17 CPU_haveCPUID(void)
1.18 {
1.19 int has_CPUID = 0;
1.20 @@ -172,6 +173,7 @@
1.21 #define cpuid(func, a, b, c, d) \
1.22 __asm__ __volatile__ ( \
1.23 " pushl %%ebx \n" \
1.24 +" xorl %%ecx,%%ecx \n" \
1.25 " cpuid \n" \
1.26 " movl %%ebx, %%esi \n" \
1.27 " popl %%ebx \n" : \
1.28 @@ -180,6 +182,7 @@
1.29 #define cpuid(func, a, b, c, d) \
1.30 __asm__ __volatile__ ( \
1.31 " pushq %%rbx \n" \
1.32 +" xorq %%rcx,%%rcx \n" \
1.33 " cpuid \n" \
1.34 " movq %%rbx, %%rsi \n" \
1.35 " popq %%rbx \n" : \
1.36 @@ -188,6 +191,7 @@
1.37 #define cpuid(func, a, b, c, d) \
1.38 __asm { \
1.39 __asm mov eax, func \
1.40 + __asm xor ecx, ecx \
1.41 __asm cpuid \
1.42 __asm mov a, eax \
1.43 __asm mov b, ebx \
1.44 @@ -209,7 +213,7 @@
1.45 a = b = c = d = 0
1.46 #endif
1.47
1.48 -static SDL_INLINE int
1.49 +static int
1.50 CPU_getCPUIDFeatures(void)
1.51 {
1.52 int features = 0;
1.53 @@ -223,7 +227,41 @@
1.54 return features;
1.55 }
1.56
1.57 -static SDL_INLINE int
1.58 +static SDL_bool
1.59 +CPU_OSSavesYMM(void)
1.60 +{
1.61 + int a, b, c, d;
1.62 +
1.63 + /* Check to make sure we can call xgetbv */
1.64 + cpuid(0, a, b, c, d);
1.65 + if (a < 1) {
1.66 + return SDL_FALSE;
1.67 + }
1.68 + cpuid(1, a, b, c, d);
1.69 + if (!(c & 0x08000000)) {
1.70 + return SDL_FALSE;
1.71 + }
1.72 +
1.73 + /* Call xgetbv to see if YMM register state is saved */
1.74 + a = 0;
1.75 +#if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
1.76 + asm(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
1.77 +#elif defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
1.78 + a = (int)_xgetbv(0);
1.79 +#elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
1.80 + __asm
1.81 + {
1.82 + xor ecx, ecx
1.83 + _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
1.84 + mov a, xcr0
1.85 + }
1.86 +#else
1.87 +#error Need xgetbv implementation!
1.88 +#endif
1.89 + return ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
1.90 +}
1.91 +
1.92 +static int
1.93 CPU_haveRDTSC(void)
1.94 {
1.95 if (CPU_haveCPUID()) {
1.96 @@ -232,7 +270,7 @@
1.97 return 0;
1.98 }
1.99
1.100 -static SDL_INLINE int
1.101 +static int
1.102 CPU_haveAltiVec(void)
1.103 {
1.104 volatile int altivec = 0;
1.105 @@ -259,7 +297,7 @@
1.106 return altivec;
1.107 }
1.108
1.109 -static SDL_INLINE int
1.110 +static int
1.111 CPU_haveMMX(void)
1.112 {
1.113 if (CPU_haveCPUID()) {
1.114 @@ -268,7 +306,7 @@
1.115 return 0;
1.116 }
1.117
1.118 -static SDL_INLINE int
1.119 +static int
1.120 CPU_have3DNow(void)
1.121 {
1.122 if (CPU_haveCPUID()) {
1.123 @@ -283,7 +321,7 @@
1.124 return 0;
1.125 }
1.126
1.127 -static SDL_INLINE int
1.128 +static int
1.129 CPU_haveSSE(void)
1.130 {
1.131 if (CPU_haveCPUID()) {
1.132 @@ -292,7 +330,7 @@
1.133 return 0;
1.134 }
1.135
1.136 -static SDL_INLINE int
1.137 +static int
1.138 CPU_haveSSE2(void)
1.139 {
1.140 if (CPU_haveCPUID()) {
1.141 @@ -301,7 +339,7 @@
1.142 return 0;
1.143 }
1.144
1.145 -static SDL_INLINE int
1.146 +static int
1.147 CPU_haveSSE3(void)
1.148 {
1.149 if (CPU_haveCPUID()) {
1.150 @@ -316,13 +354,13 @@
1.151 return 0;
1.152 }
1.153
1.154 -static SDL_INLINE int
1.155 +static int
1.156 CPU_haveSSE41(void)
1.157 {
1.158 if (CPU_haveCPUID()) {
1.159 int a, b, c, d;
1.160
1.161 - cpuid(1, a, b, c, d);
1.162 + cpuid(0, a, b, c, d);
1.163 if (a >= 1) {
1.164 cpuid(1, a, b, c, d);
1.165 return (c & 0x00080000);
1.166 @@ -331,13 +369,13 @@
1.167 return 0;
1.168 }
1.169
1.170 -static SDL_INLINE int
1.171 +static int
1.172 CPU_haveSSE42(void)
1.173 {
1.174 if (CPU_haveCPUID()) {
1.175 int a, b, c, d;
1.176
1.177 - cpuid(1, a, b, c, d);
1.178 + cpuid(0, a, b, c, d);
1.179 if (a >= 1) {
1.180 cpuid(1, a, b, c, d);
1.181 return (c & 0x00100000);
1.182 @@ -346,13 +384,13 @@
1.183 return 0;
1.184 }
1.185
1.186 -static SDL_INLINE int
1.187 +static int
1.188 CPU_haveAVX(void)
1.189 {
1.190 - if (CPU_haveCPUID()) {
1.191 + if (CPU_haveCPUID() && CPU_OSSavesYMM()) {
1.192 int a, b, c, d;
1.193
1.194 - cpuid(1, a, b, c, d);
1.195 + cpuid(0, a, b, c, d);
1.196 if (a >= 1) {
1.197 cpuid(1, a, b, c, d);
1.198 return (c & 0x10000000);
1.199 @@ -361,6 +399,21 @@
1.200 return 0;
1.201 }
1.202
1.203 +static int
1.204 +CPU_haveAVX2(void)
1.205 +{
1.206 + if (CPU_haveCPUID() && CPU_OSSavesYMM()) {
1.207 + int a, b, c, d;
1.208 +
1.209 + cpuid(0, a, b, c, d);
1.210 + if (a >= 7) {
1.211 + cpuid(7, a, b, c, d);
1.212 + return (b & 0x00000020);
1.213 + }
1.214 + }
1.215 + return 0;
1.216 +}
1.217 +
1.218 static int SDL_CPUCount = 0;
1.219
1.220 int
1.221 @@ -560,6 +613,9 @@
1.222 if (CPU_haveAVX()) {
1.223 SDL_CPUFeatures |= CPU_HAS_AVX;
1.224 }
1.225 + if (CPU_haveAVX2()) {
1.226 + SDL_CPUFeatures |= CPU_HAS_AVX2;
1.227 + }
1.228 }
1.229 return SDL_CPUFeatures;
1.230 }
1.231 @@ -654,6 +710,15 @@
1.232 return SDL_FALSE;
1.233 }
1.234
1.235 +SDL_bool
1.236 +SDL_HasAVX2(void)
1.237 +{
1.238 + if (SDL_GetCPUFeatures() & CPU_HAS_AVX2) {
1.239 + return SDL_TRUE;
1.240 + }
1.241 + return SDL_FALSE;
1.242 +}
1.243 +
1.244 static int SDL_SystemRAM = 0;
1.245
1.246 int
1.247 @@ -720,6 +785,7 @@
1.248 printf("SSE4.1: %d\n", SDL_HasSSE41());
1.249 printf("SSE4.2: %d\n", SDL_HasSSE42());
1.250 printf("AVX: %d\n", SDL_HasAVX());
1.251 + printf("AVX2: %d\n", SDL_HasAVX2());
1.252 printf("RAM: %d MB\n", SDL_GetSystemRAM());
1.253 return 0;
1.254 }