include/SDL_cpuinfo.h
author Sam Lantinga <slouken@libsdl.org>
Sat, 08 Jun 2019 14:54:37 -0700
changeset 12794 a2dc7ba484fd
parent 12719 b9da55e54f98
child 12905 a7eb3fe5cc41
permissions -rw-r--r--
Fixed bug 4557 - SDL_SIMDAlloc and *Free should be in the public interface

Martin Gerhardy

These functions are really useful and should get exposed imo.
slouken@739
     1
/*
slouken@5535
     2
  Simple DirectMedia Layer
slouken@12503
     3
  Copyright (C) 1997-2019 Sam Lantinga <slouken@libsdl.org>
slouken@739
     4
slouken@5535
     5
  This software is provided 'as-is', without any express or implied
slouken@5535
     6
  warranty.  In no event will the authors be held liable for any damages
slouken@5535
     7
  arising from the use of this software.
slouken@739
     8
slouken@5535
     9
  Permission is granted to anyone to use this software for any purpose,
slouken@5535
    10
  including commercial applications, and to alter it and redistribute it
slouken@5535
    11
  freely, subject to the following restrictions:
slouken@739
    12
slouken@5535
    13
  1. The origin of this software must not be misrepresented; you must not
slouken@5535
    14
     claim that you wrote the original software. If you use this software
slouken@5535
    15
     in a product, an acknowledgment in the product documentation would be
slouken@5535
    16
     appreciated but is not required.
slouken@5535
    17
  2. Altered source versions must be plainly marked as such, and must not be
slouken@5535
    18
     misrepresented as being the original software.
slouken@5535
    19
  3. This notice may not be removed or altered from any source distribution.
slouken@739
    20
*/
slouken@739
    21
slouken@1895
    22
/**
slouken@3407
    23
 *  \file SDL_cpuinfo.h
slouken@7191
    24
 *
slouken@3407
    25
 *  CPU feature detection for SDL.
slouken@1895
    26
 */
slouken@739
    27
slouken@10638
    28
#ifndef SDL_cpuinfo_h_
slouken@10638
    29
#define SDL_cpuinfo_h_
slouken@739
    30
slouken@1402
    31
#include "SDL_stdinc.h"
slouken@1402
    32
slouken@5389
    33
/* Need to do this here because intrin.h has C++ code in it */
slouken@5389
    34
/* Visual Studio 2005 has a bug where intrin.h conflicts with winnt.h */
dludwig@8320
    35
#if defined(_MSC_VER) && (_MSC_VER >= 1500) && (defined(_M_IX86) || defined(_M_X64))
slouken@11330
    36
#ifdef __clang__
slouken@11330
    37
/* Many of the intrinsics SDL uses are not implemented by clang with Visual Studio */
slouken@11330
    38
#undef __MMX__
slouken@11330
    39
#undef __SSE__
slouken@11330
    40
#undef __SSE2__
slouken@11330
    41
#else
slouken@5389
    42
#include <intrin.h>
slouken@5450
    43
#ifndef _WIN64
slouken@12719
    44
#ifndef __MMX__
slouken@5389
    45
#define __MMX__
slouken@12719
    46
#endif
slouken@12719
    47
#ifndef __3dNOW__
slouken@5389
    48
#define __3dNOW__
slouken@5450
    49
#endif
slouken@12719
    50
#endif
slouken@12719
    51
#ifndef __SSE__
slouken@5389
    52
#define __SSE__
slouken@12719
    53
#endif
slouken@12719
    54
#ifndef __SSE2__
slouken@5389
    55
#define __SSE2__
slouken@12719
    56
#endif
slouken@11330
    57
#endif /* __clang__ */
slouken@5389
    58
#elif defined(__MINGW64_VERSION_MAJOR)
slouken@5389
    59
#include <intrin.h>
slouken@5389
    60
#else
slouken@12423
    61
/* altivec.h redefining bool causes a number of problems, see bugs 3993 and 4392, so you need to explicitly define SDL_ENABLE_ALTIVEC_H to have it included. */
slouken@12423
    62
#if defined(HAVE_ALTIVEC_H) && defined(__ALTIVEC__) && !defined(__APPLE_ALTIVEC__) && defined(SDL_ENABLE_ALTIVEC_H)
slouken@5390
    63
#include <altivec.h>
slouken@5390
    64
#endif
sylvain@12450
    65
#if !defined(SDL_DISABLE_ARM_NEON_H)
sylvain@12450
    66
#  if defined(__ARM_NEON)
sylvain@12450
    67
#    include <arm_neon.h>
sylvain@12450
    68
#  elif defined(__WINDOWS__) || defined(__WINRT__)
sylvain@12452
    69
/* Visual Studio doesn't define __ARM_ARCH, but _M_ARM (if set, always 7), and _M_ARM64 (if set, always 1). */
sylvain@12450
    70
#    if defined(_M_ARM)
sylvain@12452
    71
#      include <armintr.h>
sylvain@12450
    72
#      include <arm_neon.h>
sylvain@12450
    73
#    endif
sylvain@12450
    74
#    if defined (_M_ARM64)
sylvain@12452
    75
#      include <armintr.h>
sylvain@12450
    76
#      include <arm_neon.h>
sylvain@12450
    77
#    endif
sylvain@12450
    78
/* Set __ARM_NEON so that it can be used elsewhere, at compile time */
sylvain@12450
    79
#    define __ARM_NEON 1
sylvain@12450
    80
#  endif
icculus@11992
    81
#endif
slouken@11770
    82
#if defined(__3dNOW__) && !defined(SDL_DISABLE_MM3DNOW_H)
slouken@5389
    83
#include <mm3dnow.h>
slouken@5389
    84
#endif
slouken@11928
    85
#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
slouken@11698
    86
#include <immintrin.h>
slouken@11762
    87
#else
slouken@11770
    88
#if defined(__MMX__) && !defined(SDL_DISABLE_MMINTRIN_H)
slouken@11762
    89
#include <mmintrin.h>
slouken@5389
    90
#endif
slouken@11770
    91
#if defined(__SSE__) && !defined(SDL_DISABLE_XMMINTRIN_H)
slouken@11762
    92
#include <xmmintrin.h>
slouken@11762
    93
#endif
slouken@11770
    94
#if defined(__SSE2__) && !defined(SDL_DISABLE_EMMINTRIN_H)
slouken@11762
    95
#include <emmintrin.h>
slouken@11762
    96
#endif
slouken@11770
    97
#if defined(__SSE3__) && !defined(SDL_DISABLE_PMMINTRIN_H)
slouken@11762
    98
#include <pmmintrin.h>
slouken@11762
    99
#endif
slouken@11762
   100
#endif /* HAVE_IMMINTRIN_H */
slouken@11762
   101
#endif /* compiler version */
slouken@5389
   102
slouken@739
   103
#include "begin_code.h"
slouken@739
   104
/* Set up for C function definitions, even when using C++ */
slouken@739
   105
#ifdef __cplusplus
slouken@739
   106
extern "C" {
slouken@739
   107
#endif
slouken@739
   108
slouken@5116
   109
/* This is a guess for the cacheline size used for padding.
slouken@5116
   110
 * Most x86 processors have a 64 byte cache line.
slouken@5116
   111
 * The 64-bit PowerPC processors have a 128 byte cache line.
slouken@5116
   112
 * We'll use the larger value to be generally safe.
slouken@5115
   113
 */
slouken@5116
   114
#define SDL_CACHELINE_SIZE  128
slouken@5115
   115
slouken@3407
   116
/**
slouken@3579
   117
 *  This function returns the number of CPU cores available.
slouken@3579
   118
 */
slouken@3579
   119
extern DECLSPEC int SDLCALL SDL_GetCPUCount(void);
slouken@3579
   120
slouken@3579
   121
/**
slouken@5120
   122
 *  This function returns the L1 cache line size of the CPU
slouken@5120
   123
 *
slouken@5120
   124
 *  This is useful for determining multi-threaded structure padding
slouken@5120
   125
 *  or SIMD prefetch sizes.
slouken@5120
   126
 */
slouken@5120
   127
extern DECLSPEC int SDLCALL SDL_GetCPUCacheLineSize(void);
slouken@5120
   128
slouken@5120
   129
/**
slouken@3407
   130
 *  This function returns true if the CPU has the RDTSC instruction.
slouken@745
   131
 */
slouken@1426
   132
extern DECLSPEC SDL_bool SDLCALL SDL_HasRDTSC(void);
slouken@745
   133
slouken@3407
   134
/**
slouken@5389
   135
 *  This function returns true if the CPU has AltiVec features.
slouken@5389
   136
 */
slouken@5389
   137
extern DECLSPEC SDL_bool SDLCALL SDL_HasAltiVec(void);
slouken@5389
   138
slouken@5389
   139
/**
slouken@3407
   140
 *  This function returns true if the CPU has MMX features.
slouken@739
   141
 */
slouken@1426
   142
extern DECLSPEC SDL_bool SDLCALL SDL_HasMMX(void);
slouken@739
   143
slouken@3407
   144
/**
slouken@5389
   145
 *  This function returns true if the CPU has 3DNow! features.
slouken@5389
   146
 */
slouken@5389
   147
extern DECLSPEC SDL_bool SDLCALL SDL_Has3DNow(void);
slouken@5389
   148
slouken@5389
   149
/**
slouken@3407
   150
 *  This function returns true if the CPU has SSE features.
slouken@739
   151
 */
slouken@1426
   152
extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE(void);
slouken@739
   153
slouken@3407
   154
/**
slouken@3407
   155
 *  This function returns true if the CPU has SSE2 features.
slouken@785
   156
 */
slouken@1426
   157
extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE2(void);
slouken@785
   158
slouken@3407
   159
/**
slouken@5259
   160
 *  This function returns true if the CPU has SSE3 features.
slouken@778
   161
 */
slouken@5259
   162
extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE3(void);
slouken@5259
   163
slouken@5259
   164
/**
slouken@5263
   165
 *  This function returns true if the CPU has SSE4.1 features.
slouken@5259
   166
 */
slouken@5263
   167
extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE41(void);
slouken@5263
   168
slouken@5263
   169
/**
slouken@5263
   170
 *  This function returns true if the CPU has SSE4.2 features.
slouken@5263
   171
 */
slouken@5263
   172
extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE42(void);
slouken@5259
   173
slouken@7821
   174
/**
slouken@8148
   175
 *  This function returns true if the CPU has AVX features.
slouken@8148
   176
 */
slouken@8148
   177
extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX(void);
slouken@8148
   178
slouken@8148
   179
/**
slouken@9003
   180
 *  This function returns true if the CPU has AVX2 features.
slouken@9003
   181
 */
slouken@9003
   182
extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX2(void);
slouken@9003
   183
slouken@9003
   184
/**
icculus@11986
   185
 *  This function returns true if the CPU has AVX-512F (foundation) features.
icculus@11986
   186
 */
icculus@11986
   187
extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX512F(void);
icculus@11986
   188
icculus@11986
   189
/**
icculus@10620
   190
 *  This function returns true if the CPU has NEON (ARM SIMD) features.
icculus@10620
   191
 */
icculus@10620
   192
extern DECLSPEC SDL_bool SDLCALL SDL_HasNEON(void);
icculus@10620
   193
icculus@10620
   194
/**
slouken@7821
   195
 *  This function returns the amount of RAM configured in the system, in MB.
slouken@7821
   196
 */
slouken@7821
   197
extern DECLSPEC int SDLCALL SDL_GetSystemRAM(void);
slouken@7821
   198
slouken@12794
   199
/**
slouken@12794
   200
 * \brief Report the alignment this system needs for SIMD allocations.
slouken@12794
   201
 *
slouken@12794
   202
 * This will return the minimum number of bytes to which a pointer must be
slouken@12794
   203
 *  aligned to be compatible with SIMD instructions on the current machine.
slouken@12794
   204
 *  For example, if the machine supports SSE only, it will return 16, but if
slouken@12794
   205
 *  it supports AVX-512F, it'll return 64 (etc). This only reports values for
slouken@12794
   206
 *  instruction sets SDL knows about, so if your SDL build doesn't have
slouken@12794
   207
 *  SDL_HasAVX512F(), then it might return 16 for the SSE support it sees and
slouken@12794
   208
 *  not 64 for the AVX-512 instructions that exist but SDL doesn't know about.
slouken@12794
   209
 *  Plan accordingly.
slouken@12794
   210
 */
slouken@12794
   211
extern DECLSPEC size_t SDLCALL SDL_SIMDGetAlignment(void);
slouken@12794
   212
slouken@12794
   213
/**
slouken@12794
   214
 * \brief Allocate memory in a SIMD-friendly way.
slouken@12794
   215
 *
slouken@12794
   216
 * This will allocate a block of memory that is suitable for use with SIMD
slouken@12794
   217
 *  instructions. Specifically, it will be properly aligned and padded for
slouken@12794
   218
 *  the system's supported vector instructions.
slouken@12794
   219
 *
slouken@12794
   220
 * The memory returned will be padded such that it is safe to read or write
slouken@12794
   221
 *  an incomplete vector at the end of the memory block. This can be useful
slouken@12794
   222
 *  so you don't have to drop back to a scalar fallback at the end of your
slouken@12794
   223
 *  SIMD processing loop to deal with the final elements without overflowing
slouken@12794
   224
 *  the allocated buffer.
slouken@12794
   225
 *
slouken@12794
   226
 * You must free this memory with SDL_FreeSIMD(), not free() or SDL_free()
slouken@12794
   227
 *  or delete[], etc.
slouken@12794
   228
 *
slouken@12794
   229
 * Note that SDL will only deal with SIMD instruction sets it is aware of;
slouken@12794
   230
 *  for example, SDL 2.0.8 knows that SSE wants 16-byte vectors
slouken@12794
   231
 *  (SDL_HasSSE()), and AVX2 wants 32 bytes (SDL_HasAVX2()), but doesn't
slouken@12794
   232
 *  know that AVX-512 wants 64. To be clear: if you can't decide to use an
slouken@12794
   233
 *  instruction set with an SDL_Has*() function, don't use that instruction
slouken@12794
   234
 *  set with memory allocated through here.
slouken@12794
   235
 *
slouken@12794
   236
 * SDL_AllocSIMD(0) will return a non-NULL pointer, assuming the system isn't
slouken@12794
   237
 *  out of memory.
slouken@12794
   238
 *
slouken@12794
   239
 *  \param len The length, in bytes, of the block to allocated. The actual
slouken@12794
   240
 *             allocated block might be larger due to padding, etc.
slouken@12794
   241
 * \return Pointer to newly-allocated block, NULL if out of memory.
slouken@12794
   242
 *
slouken@12794
   243
 * \sa SDL_SIMDAlignment
slouken@12794
   244
 * \sa SDL_SIMDFree
slouken@12794
   245
 */
slouken@12794
   246
extern DECLSPEC void * SDLCALL SDL_SIMDAlloc(const size_t len);
slouken@12794
   247
slouken@12794
   248
/**
slouken@12794
   249
 * \brief Deallocate memory obtained from SDL_SIMDAlloc
slouken@12794
   250
 *
slouken@12794
   251
 * It is not valid to use this function on a pointer from anything but
slouken@12794
   252
 *  SDL_SIMDAlloc(). It can't be used on pointers from malloc, realloc,
slouken@12794
   253
 *  SDL_malloc, memalign, new[], etc.
slouken@12794
   254
 *
slouken@12794
   255
 * However, SDL_SIMDFree(NULL) is a legal no-op.
slouken@12794
   256
 *
slouken@12794
   257
 * \sa SDL_SIMDAlloc
slouken@12794
   258
 */
slouken@12794
   259
extern DECLSPEC void SDLCALL SDL_SIMDFree(void *ptr);
slouken@12794
   260
slouken@12794
   261
/* vi: set ts=4 sw=4 expandtab: */
slouken@739
   262
/* Ends C function definitions when using C++ */
slouken@739
   263
#ifdef __cplusplus
slouken@739
   264
}
slouken@739
   265
#endif
slouken@739
   266
#include "close_code.h"
slouken@739
   267
slouken@10638
   268
#endif /* SDL_cpuinfo_h_ */
slouken@1895
   269
slouken@1895
   270
/* vi: set ts=4 sw=4 expandtab: */