src/cpuinfo/SDL_cpuinfo.c
changeset 9003 446ed0fe0fc3
parent 8642 3506de54b829
child 9004 51bc44d49052
     1.1 --- a/src/cpuinfo/SDL_cpuinfo.c	Wed Jul 09 01:34:40 2014 -0700
     1.2 +++ b/src/cpuinfo/SDL_cpuinfo.c	Fri Jul 11 22:02:50 2014 -0700
     1.3 @@ -60,6 +60,7 @@
     1.4  #define CPU_HAS_SSE41   0x00000100
     1.5  #define CPU_HAS_SSE42   0x00000200
     1.6  #define CPU_HAS_AVX     0x00000400
     1.7 +#define CPU_HAS_AVX2    0x00000800
     1.8  
     1.9  #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
    1.10  /* This is the brute force way of detecting instruction sets...
    1.11 @@ -73,7 +74,7 @@
    1.12  }
    1.13  #endif /* HAVE_SETJMP */
    1.14  
    1.15 -static SDL_INLINE int
    1.16 +static int
    1.17  CPU_haveCPUID(void)
    1.18  {
    1.19      int has_CPUID = 0;
    1.20 @@ -172,6 +173,7 @@
    1.21  #define cpuid(func, a, b, c, d) \
    1.22      __asm__ __volatile__ ( \
    1.23  "        pushl %%ebx        \n" \
    1.24 +"        xorl %%ecx,%%ecx   \n" \
    1.25  "        cpuid              \n" \
    1.26  "        movl %%ebx, %%esi  \n" \
    1.27  "        popl %%ebx         \n" : \
    1.28 @@ -180,6 +182,7 @@
    1.29  #define cpuid(func, a, b, c, d) \
    1.30      __asm__ __volatile__ ( \
    1.31  "        pushq %%rbx        \n" \
    1.32 +"        xorq %%rcx,%%rcx   \n" \
    1.33  "        cpuid              \n" \
    1.34  "        movq %%rbx, %%rsi  \n" \
    1.35  "        popq %%rbx         \n" : \
    1.36 @@ -188,6 +191,7 @@
    1.37  #define cpuid(func, a, b, c, d) \
    1.38      __asm { \
    1.39          __asm mov eax, func \
    1.40 +        __asm xor ecx, ecx \
    1.41          __asm cpuid \
    1.42          __asm mov a, eax \
    1.43          __asm mov b, ebx \
    1.44 @@ -209,7 +213,7 @@
    1.45      a = b = c = d = 0
    1.46  #endif
    1.47  
    1.48 -static SDL_INLINE int
    1.49 +static int
    1.50  CPU_getCPUIDFeatures(void)
    1.51  {
    1.52      int features = 0;
    1.53 @@ -223,7 +227,41 @@
    1.54      return features;
    1.55  }
    1.56  
    1.57 -static SDL_INLINE int
    1.58 +static SDL_bool
    1.59 +CPU_OSSavesYMM(void)
    1.60 +{
    1.61 +    int a, b, c, d;
    1.62 +
    1.63 +    /* Check to make sure we can call xgetbv */
    1.64 +    cpuid(0, a, b, c, d);
    1.65 +    if (a < 1) {
    1.66 +        return SDL_FALSE;
    1.67 +    }
    1.68 +    cpuid(1, a, b, c, d);
    1.69 +    if (!(c & 0x08000000)) {
    1.70 +        return SDL_FALSE;
    1.71 +    }
    1.72 +
    1.73 +    /* Call xgetbv to see if YMM register state is saved */
    1.74 +    a = 0;
    1.75 +#if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
    1.76 +    asm(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
    1.77 +#elif defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
    1.78 +    a = (int)_xgetbv(0);
    1.79 +#elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
    1.80 +    __asm
    1.81 +    {
    1.82 +        xor ecx, ecx
    1.83 +        _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
    1.84 +        mov a, xcr0
    1.85 +    }
    1.86 +#else
    1.87 +#error Need xgetbv implementation!
    1.88 +#endif
    1.89 +    return ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
    1.90 +}
    1.91 +
    1.92 +static int
    1.93  CPU_haveRDTSC(void)
    1.94  {
    1.95      if (CPU_haveCPUID()) {
    1.96 @@ -232,7 +270,7 @@
    1.97      return 0;
    1.98  }
    1.99  
   1.100 -static SDL_INLINE int
   1.101 +static int
   1.102  CPU_haveAltiVec(void)
   1.103  {
   1.104      volatile int altivec = 0;
   1.105 @@ -259,7 +297,7 @@
   1.106      return altivec;
   1.107  }
   1.108  
   1.109 -static SDL_INLINE int
   1.110 +static int
   1.111  CPU_haveMMX(void)
   1.112  {
   1.113      if (CPU_haveCPUID()) {
   1.114 @@ -268,7 +306,7 @@
   1.115      return 0;
   1.116  }
   1.117  
   1.118 -static SDL_INLINE int
   1.119 +static int
   1.120  CPU_have3DNow(void)
   1.121  {
   1.122      if (CPU_haveCPUID()) {
   1.123 @@ -283,7 +321,7 @@
   1.124      return 0;
   1.125  }
   1.126  
   1.127 -static SDL_INLINE int
   1.128 +static int
   1.129  CPU_haveSSE(void)
   1.130  {
   1.131      if (CPU_haveCPUID()) {
   1.132 @@ -292,7 +330,7 @@
   1.133      return 0;
   1.134  }
   1.135  
   1.136 -static SDL_INLINE int
   1.137 +static int
   1.138  CPU_haveSSE2(void)
   1.139  {
   1.140      if (CPU_haveCPUID()) {
   1.141 @@ -301,7 +339,7 @@
   1.142      return 0;
   1.143  }
   1.144  
   1.145 -static SDL_INLINE int
   1.146 +static int
   1.147  CPU_haveSSE3(void)
   1.148  {
   1.149      if (CPU_haveCPUID()) {
   1.150 @@ -316,13 +354,13 @@
   1.151      return 0;
   1.152  }
   1.153  
   1.154 -static SDL_INLINE int
   1.155 +static int
   1.156  CPU_haveSSE41(void)
   1.157  {
   1.158      if (CPU_haveCPUID()) {
   1.159          int a, b, c, d;
   1.160  
   1.161 -        cpuid(1, a, b, c, d);
   1.162 +        cpuid(0, a, b, c, d);
   1.163          if (a >= 1) {
   1.164              cpuid(1, a, b, c, d);
   1.165              return (c & 0x00080000);
   1.166 @@ -331,13 +369,13 @@
   1.167      return 0;
   1.168  }
   1.169  
   1.170 -static SDL_INLINE int
   1.171 +static int
   1.172  CPU_haveSSE42(void)
   1.173  {
   1.174      if (CPU_haveCPUID()) {
   1.175          int a, b, c, d;
   1.176  
   1.177 -        cpuid(1, a, b, c, d);
   1.178 +        cpuid(0, a, b, c, d);
   1.179          if (a >= 1) {
   1.180              cpuid(1, a, b, c, d);
   1.181              return (c & 0x00100000);
   1.182 @@ -346,13 +384,13 @@
   1.183      return 0;
   1.184  }
   1.185  
   1.186 -static SDL_INLINE int
   1.187 +static int
   1.188  CPU_haveAVX(void)
   1.189  {
   1.190 -    if (CPU_haveCPUID()) {
   1.191 +    if (CPU_haveCPUID() && CPU_OSSavesYMM()) {
   1.192          int a, b, c, d;
   1.193  
   1.194 -        cpuid(1, a, b, c, d);
   1.195 +        cpuid(0, a, b, c, d);
   1.196          if (a >= 1) {
   1.197              cpuid(1, a, b, c, d);
   1.198              return (c & 0x10000000);
   1.199 @@ -361,6 +399,21 @@
   1.200      return 0;
   1.201  }
   1.202  
   1.203 +static int
   1.204 +CPU_haveAVX2(void)
   1.205 +{
   1.206 +    if (CPU_haveCPUID() && CPU_OSSavesYMM()) {
   1.207 +        int a, b, c, d;
   1.208 +
   1.209 +        cpuid(0, a, b, c, d);
   1.210 +        if (a >= 7) {
   1.211 +            cpuid(7, a, b, c, d);
   1.212 +            return (b & 0x00000020);
   1.213 +        }
   1.214 +    }
   1.215 +    return 0;
   1.216 +}
   1.217 +
   1.218  static int SDL_CPUCount = 0;
   1.219  
   1.220  int
   1.221 @@ -560,6 +613,9 @@
   1.222          if (CPU_haveAVX()) {
   1.223              SDL_CPUFeatures |= CPU_HAS_AVX;
   1.224          }
   1.225 +        if (CPU_haveAVX2()) {
   1.226 +            SDL_CPUFeatures |= CPU_HAS_AVX2;
   1.227 +        }
   1.228      }
   1.229      return SDL_CPUFeatures;
   1.230  }
   1.231 @@ -654,6 +710,15 @@
   1.232      return SDL_FALSE;
   1.233  }
   1.234  
   1.235 +SDL_bool
   1.236 +SDL_HasAVX2(void)
   1.237 +{
   1.238 +    if (SDL_GetCPUFeatures() & CPU_HAS_AVX2) {
   1.239 +        return SDL_TRUE;
   1.240 +    }
   1.241 +    return SDL_FALSE;
   1.242 +}
   1.243 +
   1.244  static int SDL_SystemRAM = 0;
   1.245  
   1.246  int
   1.247 @@ -720,6 +785,7 @@
   1.248      printf("SSE4.1: %d\n", SDL_HasSSE41());
   1.249      printf("SSE4.2: %d\n", SDL_HasSSE42());
   1.250      printf("AVX: %d\n", SDL_HasAVX());
   1.251 +    printf("AVX2: %d\n", SDL_HasAVX2());
   1.252      printf("RAM: %d MB\n", SDL_GetSystemRAM());
   1.253      return 0;
   1.254  }