From 8543ad7df119bc786ae2e0ea50f967e8a3204769 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 21 May 2018 11:34:57 -0400 Subject: [PATCH] cpuinfo: Added some internal SIMD-aligned allocation functions. Fixes Bugzilla #4150 (sort of). --- include/SDL_cpuinfo.h | 1 - src/cpuinfo/SDL_cpuinfo.c | 52 +++++++++++++++++++++++ src/cpuinfo/SDL_simd.h | 88 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 src/cpuinfo/SDL_simd.h diff --git a/include/SDL_cpuinfo.h b/include/SDL_cpuinfo.h index 74d9fdf87ab1d..270ee524489b8 100644 --- a/include/SDL_cpuinfo.h +++ b/include/SDL_cpuinfo.h @@ -169,7 +169,6 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasNEON(void); */ extern DECLSPEC int SDLCALL SDL_GetSystemRAM(void); - /* Ends C function definitions when using C++ */ #ifdef __cplusplus } diff --git a/src/cpuinfo/SDL_cpuinfo.c b/src/cpuinfo/SDL_cpuinfo.c index 4e2c0f101362b..9c74a84bedb35 100644 --- a/src/cpuinfo/SDL_cpuinfo.c +++ b/src/cpuinfo/SDL_cpuinfo.c @@ -38,6 +38,7 @@ /* CPU feature detection for SDL */ #include "SDL_cpuinfo.h" +#include "SDL_assert.h" #ifdef HAVE_SYSCONF #include @@ -571,6 +572,7 @@ SDL_GetCPUCacheLineSize(void) } static Uint32 SDL_CPUFeatures = 0xFFFFFFFF; +static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF; static Uint32 SDL_GetCPUFeatures(void) @@ -578,41 +580,53 @@ SDL_GetCPUFeatures(void) if (SDL_CPUFeatures == 0xFFFFFFFF) { CPU_calcCPUIDFeatures(); SDL_CPUFeatures = 0; + SDL_SIMDAlignment = 4; /* a good safe base value */ if (CPU_haveRDTSC()) { SDL_CPUFeatures |= CPU_HAS_RDTSC; } if (CPU_haveAltiVec()) { SDL_CPUFeatures |= CPU_HAS_ALTIVEC; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } if (CPU_haveMMX()) { SDL_CPUFeatures |= CPU_HAS_MMX; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8); } if (CPU_have3DNow()) { SDL_CPUFeatures |= CPU_HAS_3DNOW; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8); } if (CPU_haveSSE()) { SDL_CPUFeatures |= CPU_HAS_SSE; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } if (CPU_haveSSE2()) { SDL_CPUFeatures |= CPU_HAS_SSE2; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } if (CPU_haveSSE3()) { SDL_CPUFeatures |= CPU_HAS_SSE3; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } if (CPU_haveSSE41()) { SDL_CPUFeatures |= CPU_HAS_SSE41; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } if (CPU_haveSSE42()) { SDL_CPUFeatures |= CPU_HAS_SSE42; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } if (CPU_haveAVX()) { SDL_CPUFeatures |= CPU_HAS_AVX; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32); } if (CPU_haveAVX2()) { SDL_CPUFeatures |= CPU_HAS_AVX2; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32); } if (CPU_haveNEON()) { SDL_CPUFeatures |= CPU_HAS_NEON; + SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); } } return SDL_CPUFeatures; @@ -745,6 +759,44 @@ SDL_GetSystemRAM(void) } +size_t +SDL_SIMDGetAlignment(void) +{ + if (SDL_SIMDAlignment == 0xFFFFFFFF) { + SDL_GetCPUFeatures(); /* make sure this has been calculated */ + } + SDL_assert(SDL_SIMDAlignment != 0); + return SDL_SIMDAlignment; +} + +void * +SDL_SIMDAlloc(const size_t len) +{ + const size_t alignment = SDL_SIMDGetAlignment(); + const size_t padding = alignment - (len % alignment); + const size_t padded = (padding != alignment) ? (len + padding) : len; + Uint8 *retval = NULL; + Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *)); + if (ptr) { + /* store the actual malloc pointer right before our aligned pointer. */ + retval = ptr + sizeof (void *); + retval += alignment - (((size_t) retval) % alignment); + *(((void **) retval) - 1) = ptr; + } + return retval; +} + +void +SDL_SIMDFree(void *ptr) +{ + if (ptr) { + void **realptr = (void **) ptr; + realptr--; + SDL_free(*(((void **) ptr) - 1)); + } +} + + #ifdef TEST_MAIN #include diff --git a/src/cpuinfo/SDL_simd.h b/src/cpuinfo/SDL_simd.h new file mode 100644 index 0000000000000..e2b28bc8cd722 --- /dev/null +++ b/src/cpuinfo/SDL_simd.h @@ -0,0 +1,88 @@ +/* + Simple DirectMedia Layer + Copyright (C) 1997-2018 Sam Lantinga + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SDL.h" +#include "../SDL_internal.h" + +/** + * \brief Report the alignment this system needs for SIMD allocations. + * + * This will return the minimum number of bytes to which a pointer must be + * aligned to be compatible with SIMD instructions on the current machine. + * For example, if the machine supports SSE only, it will return 16, but if + * it supports AVX-512F, it'll return 64 (etc). This only reports values for + * instruction sets SDL knows about, so if your SDL build doesn't have + * SDL_HasAVX512F(), then it might return 16 for the SSE support it sees and + * not 64 for the AVX-512 instructions that exist but SDL doesn't know about. + * Plan accordingly. + */ +extern size_t SDL_SIMDGetAlignment(void); + +/** + * \brief Allocate memory in a SIMD-friendly way. + * + * This will allocate a block of memory that is suitable for use with SIMD + * instructions. Specifically, it will be properly aligned and padded for + * the system's supported vector instructions. + * + * The memory returned will be padded such that it is safe to read or write + * an incomplete vector at the end of the memory block. This can be useful + * so you don't have to drop back to a scalar fallback at the end of your + * SIMD processing loop to deal with the final elements without overflowing + * the allocated buffer. + * + * You must free this memory with SDL_FreeSIMD(), not free() or SDL_free() + * or delete[], etc. + * + * Note that SDL will only deal with SIMD instruction sets it is aware of; + * for example, SDL 2.0.8 knows that SSE wants 16-byte vectors + * (SDL_HasSSE()), and AVX2 wants 32 bytes (SDL_HasAVX2()), but doesn't + * know that AVX-512 wants 64. To be clear: if you can't decide to use an + * instruction set with an SDL_Has*() function, don't use that instruction + * set with memory allocated through here. + * + * SDL_AllocSIMD(0) will return a non-NULL pointer, assuming the system isn't + * out of memory. + * + * \param len The length, in bytes, of the block to allocated. The actual + * allocated block might be larger due to padding, etc. + * \return Pointer to newly-allocated block, NULL if out of memory. + * + * \sa SDL_SIMDAlignment + * \sa SDL_SIMDFree + */ +extern void * SDL_SIMDAlloc(const size_t len); + +/** + * \brief Deallocate memory obtained from SDL_SIMDAlloc + * + * It is not valid to use this function on a pointer from anything but + * SDL_SIMDAlloc(). It can't be used on pointers from malloc, realloc, + * SDL_malloc, memalign, new[], etc. + * + * However, SDL_SIMDFree(NULL) is a legal no-op. + * + * \sa SDL_SIMDAlloc + */ +extern void SDL_SIMDFree(void *ptr); + +/* vi: set ts=4 sw=4 expandtab: */ +