Fixed bug in AVX detection and added AVX2 detection
authorSam Lantinga <slouken@libsdl.org>
Fri, 11 Jul 2014 22:02:50 -0700
changeset 9003446ed0fe0fc3
parent 9002 c1bd00f50724
child 9004 51bc44d49052
Fixed bug in AVX detection and added AVX2 detection
include/SDL_cpuinfo.h
src/cpuinfo/SDL_cpuinfo.c
src/dynapi/SDL_dynapi_overrides.h
src/dynapi/SDL_dynapi_procs.h
     1.1 --- a/include/SDL_cpuinfo.h	Wed Jul 09 01:34:40 2014 -0700
     1.2 +++ b/include/SDL_cpuinfo.h	Fri Jul 11 22:02:50 2014 -0700
     1.3 @@ -140,6 +140,11 @@
     1.4  extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX(void);
     1.5  
     1.6  /**
     1.7 + *  This function returns true if the CPU has AVX2 features.
     1.8 + */
     1.9 +extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX2(void);
    1.10 +
    1.11 +/**
    1.12   *  This function returns the amount of RAM configured in the system, in MB.
    1.13   */
    1.14  extern DECLSPEC int SDLCALL SDL_GetSystemRAM(void);
     2.1 --- a/src/cpuinfo/SDL_cpuinfo.c	Wed Jul 09 01:34:40 2014 -0700
     2.2 +++ b/src/cpuinfo/SDL_cpuinfo.c	Fri Jul 11 22:02:50 2014 -0700
     2.3 @@ -60,6 +60,7 @@
     2.4  #define CPU_HAS_SSE41   0x00000100
     2.5  #define CPU_HAS_SSE42   0x00000200
     2.6  #define CPU_HAS_AVX     0x00000400
     2.7 +#define CPU_HAS_AVX2    0x00000800
     2.8  
     2.9  #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
    2.10  /* This is the brute force way of detecting instruction sets...
    2.11 @@ -73,7 +74,7 @@
    2.12  }
    2.13  #endif /* HAVE_SETJMP */
    2.14  
    2.15 -static SDL_INLINE int
    2.16 +static int
    2.17  CPU_haveCPUID(void)
    2.18  {
    2.19      int has_CPUID = 0;
    2.20 @@ -172,6 +173,7 @@
    2.21  #define cpuid(func, a, b, c, d) \
    2.22      __asm__ __volatile__ ( \
    2.23  "        pushl %%ebx        \n" \
    2.24 +"        xorl %%ecx,%%ecx   \n" \
    2.25  "        cpuid              \n" \
    2.26  "        movl %%ebx, %%esi  \n" \
    2.27  "        popl %%ebx         \n" : \
    2.28 @@ -180,6 +182,7 @@
    2.29  #define cpuid(func, a, b, c, d) \
    2.30      __asm__ __volatile__ ( \
    2.31  "        pushq %%rbx        \n" \
    2.32 +"        xorq %%rcx,%%rcx   \n" \
    2.33  "        cpuid              \n" \
    2.34  "        movq %%rbx, %%rsi  \n" \
    2.35  "        popq %%rbx         \n" : \
    2.36 @@ -188,6 +191,7 @@
    2.37  #define cpuid(func, a, b, c, d) \
    2.38      __asm { \
    2.39          __asm mov eax, func \
    2.40 +        __asm xor ecx, ecx \
    2.41          __asm cpuid \
    2.42          __asm mov a, eax \
    2.43          __asm mov b, ebx \
    2.44 @@ -209,7 +213,7 @@
    2.45      a = b = c = d = 0
    2.46  #endif
    2.47  
    2.48 -static SDL_INLINE int
    2.49 +static int
    2.50  CPU_getCPUIDFeatures(void)
    2.51  {
    2.52      int features = 0;
    2.53 @@ -223,7 +227,41 @@
    2.54      return features;
    2.55  }
    2.56  
    2.57 -static SDL_INLINE int
    2.58 +static SDL_bool
    2.59 +CPU_OSSavesYMM(void)
    2.60 +{
    2.61 +    int a, b, c, d;
    2.62 +
    2.63 +    /* Check to make sure we can call xgetbv */
    2.64 +    cpuid(0, a, b, c, d);
    2.65 +    if (a < 1) {
    2.66 +        return SDL_FALSE;
    2.67 +    }
    2.68 +    cpuid(1, a, b, c, d);
    2.69 +    if (!(c & 0x08000000)) {
    2.70 +        return SDL_FALSE;
    2.71 +    }
    2.72 +
    2.73 +    /* Call xgetbv to see if YMM register state is saved */
    2.74 +    a = 0;
    2.75 +#if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
    2.76 +    asm(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
    2.77 +#elif defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
    2.78 +    a = (int)_xgetbv(0);
    2.79 +#elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
    2.80 +    __asm
    2.81 +    {
    2.82 +        xor ecx, ecx
    2.83 +        _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
    2.84 +        mov a, xcr0
    2.85 +    }
    2.86 +#else
    2.87 +#error Need xgetbv implementation!
    2.88 +#endif
    2.89 +    return ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
    2.90 +}
    2.91 +
    2.92 +static int
    2.93  CPU_haveRDTSC(void)
    2.94  {
    2.95      if (CPU_haveCPUID()) {
    2.96 @@ -232,7 +270,7 @@
    2.97      return 0;
    2.98  }
    2.99  
   2.100 -static SDL_INLINE int
   2.101 +static int
   2.102  CPU_haveAltiVec(void)
   2.103  {
   2.104      volatile int altivec = 0;
   2.105 @@ -259,7 +297,7 @@
   2.106      return altivec;
   2.107  }
   2.108  
   2.109 -static SDL_INLINE int
   2.110 +static int
   2.111  CPU_haveMMX(void)
   2.112  {
   2.113      if (CPU_haveCPUID()) {
   2.114 @@ -268,7 +306,7 @@
   2.115      return 0;
   2.116  }
   2.117  
   2.118 -static SDL_INLINE int
   2.119 +static int
   2.120  CPU_have3DNow(void)
   2.121  {
   2.122      if (CPU_haveCPUID()) {
   2.123 @@ -283,7 +321,7 @@
   2.124      return 0;
   2.125  }
   2.126  
   2.127 -static SDL_INLINE int
   2.128 +static int
   2.129  CPU_haveSSE(void)
   2.130  {
   2.131      if (CPU_haveCPUID()) {
   2.132 @@ -292,7 +330,7 @@
   2.133      return 0;
   2.134  }
   2.135  
   2.136 -static SDL_INLINE int
   2.137 +static int
   2.138  CPU_haveSSE2(void)
   2.139  {
   2.140      if (CPU_haveCPUID()) {
   2.141 @@ -301,7 +339,7 @@
   2.142      return 0;
   2.143  }
   2.144  
   2.145 -static SDL_INLINE int
   2.146 +static int
   2.147  CPU_haveSSE3(void)
   2.148  {
   2.149      if (CPU_haveCPUID()) {
   2.150 @@ -316,13 +354,13 @@
   2.151      return 0;
   2.152  }
   2.153  
   2.154 -static SDL_INLINE int
   2.155 +static int
   2.156  CPU_haveSSE41(void)
   2.157  {
   2.158      if (CPU_haveCPUID()) {
   2.159          int a, b, c, d;
   2.160  
   2.161 -        cpuid(1, a, b, c, d);
   2.162 +        cpuid(0, a, b, c, d);
   2.163          if (a >= 1) {
   2.164              cpuid(1, a, b, c, d);
   2.165              return (c & 0x00080000);
   2.166 @@ -331,13 +369,13 @@
   2.167      return 0;
   2.168  }
   2.169  
   2.170 -static SDL_INLINE int
   2.171 +static int
   2.172  CPU_haveSSE42(void)
   2.173  {
   2.174      if (CPU_haveCPUID()) {
   2.175          int a, b, c, d;
   2.176  
   2.177 -        cpuid(1, a, b, c, d);
   2.178 +        cpuid(0, a, b, c, d);
   2.179          if (a >= 1) {
   2.180              cpuid(1, a, b, c, d);
   2.181              return (c & 0x00100000);
   2.182 @@ -346,13 +384,13 @@
   2.183      return 0;
   2.184  }
   2.185  
   2.186 -static SDL_INLINE int
   2.187 +static int
   2.188  CPU_haveAVX(void)
   2.189  {
   2.190 -    if (CPU_haveCPUID()) {
   2.191 +    if (CPU_haveCPUID() && CPU_OSSavesYMM()) {
   2.192          int a, b, c, d;
   2.193  
   2.194 -        cpuid(1, a, b, c, d);
   2.195 +        cpuid(0, a, b, c, d);
   2.196          if (a >= 1) {
   2.197              cpuid(1, a, b, c, d);
   2.198              return (c & 0x10000000);
   2.199 @@ -361,6 +399,21 @@
   2.200      return 0;
   2.201  }
   2.202  
   2.203 +static int
   2.204 +CPU_haveAVX2(void)
   2.205 +{
   2.206 +    if (CPU_haveCPUID() && CPU_OSSavesYMM()) {
   2.207 +        int a, b, c, d;
   2.208 +
   2.209 +        cpuid(0, a, b, c, d);
   2.210 +        if (a >= 7) {
   2.211 +            cpuid(7, a, b, c, d);
   2.212 +            return (b & 0x00000020);
   2.213 +        }
   2.214 +    }
   2.215 +    return 0;
   2.216 +}
   2.217 +
   2.218  static int SDL_CPUCount = 0;
   2.219  
   2.220  int
   2.221 @@ -560,6 +613,9 @@
   2.222          if (CPU_haveAVX()) {
   2.223              SDL_CPUFeatures |= CPU_HAS_AVX;
   2.224          }
   2.225 +        if (CPU_haveAVX2()) {
   2.226 +            SDL_CPUFeatures |= CPU_HAS_AVX2;
   2.227 +        }
   2.228      }
   2.229      return SDL_CPUFeatures;
   2.230  }
   2.231 @@ -654,6 +710,15 @@
   2.232      return SDL_FALSE;
   2.233  }
   2.234  
   2.235 +SDL_bool
   2.236 +SDL_HasAVX2(void)
   2.237 +{
   2.238 +    if (SDL_GetCPUFeatures() & CPU_HAS_AVX2) {
   2.239 +        return SDL_TRUE;
   2.240 +    }
   2.241 +    return SDL_FALSE;
   2.242 +}
   2.243 +
   2.244  static int SDL_SystemRAM = 0;
   2.245  
   2.246  int
   2.247 @@ -720,6 +785,7 @@
   2.248      printf("SSE4.1: %d\n", SDL_HasSSE41());
   2.249      printf("SSE4.2: %d\n", SDL_HasSSE42());
   2.250      printf("AVX: %d\n", SDL_HasAVX());
   2.251 +    printf("AVX2: %d\n", SDL_HasAVX2());
   2.252      printf("RAM: %d MB\n", SDL_GetSystemRAM());
   2.253      return 0;
   2.254  }
     3.1 --- a/src/dynapi/SDL_dynapi_overrides.h	Wed Jul 09 01:34:40 2014 -0700
     3.2 +++ b/src/dynapi/SDL_dynapi_overrides.h	Fri Jul 11 22:02:50 2014 -0700
     3.3 @@ -587,3 +587,4 @@
     3.4  #define SDL_CaptureMouse SDL_CaptureMouse_REAL
     3.5  #define SDL_SetWindowHitTest SDL_SetWindowHitTest_REAL
     3.6  #define SDL_GetGlobalMouseState SDL_GetGlobalMouseState_REAL
     3.7 +#define SDL_HasAVX2 SDL_HasAVX2_REAL
     4.1 --- a/src/dynapi/SDL_dynapi_procs.h	Wed Jul 09 01:34:40 2014 -0700
     4.2 +++ b/src/dynapi/SDL_dynapi_procs.h	Fri Jul 11 22:02:50 2014 -0700
     4.3 @@ -619,3 +619,4 @@
     4.4  SDL_DYNAPI_PROC(int,SDL_CaptureMouse,(SDL_bool a),(a),return)
     4.5  SDL_DYNAPI_PROC(int,SDL_SetWindowHitTest,(SDL_Window *a, SDL_HitTest b, void *c),(a,b,c),return)
     4.6  SDL_DYNAPI_PROC(Uint32,SDL_GetGlobalMouseState,(int *a, int *b),(a,b),return)
     4.7 +SDL_DYNAPI_PROC(SDL_bool,SDL_HasAVX2,(void),(),return)