cpuinfo: Added some internal SIMD-aligned allocation functions.
authorRyan C. Gordon <icculus@icculus.org>
Mon, 21 May 2018 11:34:57 -0400
changeset 11984987c5dc71309
parent 11983 3a50eb90e4b2
child 11985 36aa0bf7312b
cpuinfo: Added some internal SIMD-aligned allocation functions.

Fixes Bugzilla #4150 (sort of).
include/SDL_cpuinfo.h
src/cpuinfo/SDL_cpuinfo.c
src/cpuinfo/SDL_simd.h
     1.1 --- a/include/SDL_cpuinfo.h	Fri May 18 13:09:30 2018 -0700
     1.2 +++ b/include/SDL_cpuinfo.h	Mon May 21 11:34:57 2018 -0400
     1.3 @@ -169,7 +169,6 @@
     1.4   */
     1.5  extern DECLSPEC int SDLCALL SDL_GetSystemRAM(void);
     1.6  
     1.7 -
     1.8  /* Ends C function definitions when using C++ */
     1.9  #ifdef __cplusplus
    1.10  }
     2.1 --- a/src/cpuinfo/SDL_cpuinfo.c	Fri May 18 13:09:30 2018 -0700
     2.2 +++ b/src/cpuinfo/SDL_cpuinfo.c	Mon May 21 11:34:57 2018 -0400
     2.3 @@ -38,6 +38,7 @@
     2.4  /* CPU feature detection for SDL */
     2.5  
     2.6  #include "SDL_cpuinfo.h"
     2.7 +#include "SDL_assert.h"
     2.8  
     2.9  #ifdef HAVE_SYSCONF
    2.10  #include <unistd.h>
    2.11 @@ -571,6 +572,7 @@
    2.12  }
    2.13  
    2.14  static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
    2.15 +static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
    2.16  
    2.17  static Uint32
    2.18  SDL_GetCPUFeatures(void)
    2.19 @@ -578,41 +580,53 @@
    2.20      if (SDL_CPUFeatures == 0xFFFFFFFF) {
    2.21          CPU_calcCPUIDFeatures();
    2.22          SDL_CPUFeatures = 0;
    2.23 +        SDL_SIMDAlignment = 4;  /* a good safe base value */
    2.24          if (CPU_haveRDTSC()) {
    2.25              SDL_CPUFeatures |= CPU_HAS_RDTSC;
    2.26          }
    2.27          if (CPU_haveAltiVec()) {
    2.28              SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
    2.29 +            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    2.30          }
    2.31          if (CPU_haveMMX()) {
    2.32              SDL_CPUFeatures |= CPU_HAS_MMX;
    2.33 +            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
    2.34          }
    2.35          if (CPU_have3DNow()) {
    2.36              SDL_CPUFeatures |= CPU_HAS_3DNOW;
    2.37 +            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
    2.38          }
    2.39          if (CPU_haveSSE()) {
    2.40              SDL_CPUFeatures |= CPU_HAS_SSE;
    2.41 +            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    2.42          }
    2.43          if (CPU_haveSSE2()) {
    2.44              SDL_CPUFeatures |= CPU_HAS_SSE2;
    2.45 +            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    2.46          }
    2.47          if (CPU_haveSSE3()) {
    2.48              SDL_CPUFeatures |= CPU_HAS_SSE3;
    2.49 +            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    2.50          }
    2.51          if (CPU_haveSSE41()) {
    2.52              SDL_CPUFeatures |= CPU_HAS_SSE41;
    2.53 +            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    2.54          }
    2.55          if (CPU_haveSSE42()) {
    2.56              SDL_CPUFeatures |= CPU_HAS_SSE42;
    2.57 +            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    2.58          }
    2.59          if (CPU_haveAVX()) {
    2.60              SDL_CPUFeatures |= CPU_HAS_AVX;
    2.61 +            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
    2.62          }
    2.63          if (CPU_haveAVX2()) {
    2.64              SDL_CPUFeatures |= CPU_HAS_AVX2;
    2.65 +            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
    2.66          }
    2.67          if (CPU_haveNEON()) {
    2.68              SDL_CPUFeatures |= CPU_HAS_NEON;
    2.69 +            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
    2.70          }
    2.71      }
    2.72      return SDL_CPUFeatures;
    2.73 @@ -745,6 +759,44 @@
    2.74  }
    2.75  
    2.76  
    2.77 +size_t
    2.78 +SDL_SIMDGetAlignment(void)
    2.79 +{
    2.80 +    if (SDL_SIMDAlignment == 0xFFFFFFFF) {
    2.81 +        SDL_GetCPUFeatures();  /* make sure this has been calculated */
    2.82 +    }
    2.83 +    SDL_assert(SDL_SIMDAlignment != 0);
    2.84 +    return SDL_SIMDAlignment;
    2.85 +}
    2.86 +
    2.87 +void *
    2.88 +SDL_SIMDAlloc(const size_t len)
    2.89 +{
    2.90 +    const size_t alignment = SDL_SIMDGetAlignment();
    2.91 +    const size_t padding = alignment - (len % alignment);
    2.92 +    const size_t padded = (padding != alignment) ? (len + padding) : len;
    2.93 +    Uint8 *retval = NULL;
    2.94 +    Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
    2.95 +    if (ptr) {
    2.96 +        /* store the actual malloc pointer right before our aligned pointer. */
    2.97 +        retval = ptr + sizeof (void *);
    2.98 +        retval += alignment - (((size_t) retval) % alignment);
    2.99 +        *(((void **) retval) - 1) = ptr;
   2.100 +    }
   2.101 +    return retval;
   2.102 +}
   2.103 +
   2.104 +void
   2.105 +SDL_SIMDFree(void *ptr)
   2.106 +{
   2.107 +    if (ptr) {
   2.108 +        void **realptr = (void **) ptr;
   2.109 +        realptr--;
   2.110 +        SDL_free(*(((void **) ptr) - 1));
   2.111 +    }
   2.112 +}
   2.113 +
   2.114 +
   2.115  #ifdef TEST_MAIN
   2.116  
   2.117  #include <stdio.h>
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/src/cpuinfo/SDL_simd.h	Mon May 21 11:34:57 2018 -0400
     3.3 @@ -0,0 +1,88 @@
     3.4 +/*
     3.5 +  Simple DirectMedia Layer
     3.6 +  Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
     3.7 +
     3.8 +  This software is provided 'as-is', without any express or implied
     3.9 +  warranty.  In no event will the authors be held liable for any damages
    3.10 +  arising from the use of this software.
    3.11 +
    3.12 +  Permission is granted to anyone to use this software for any purpose,
    3.13 +  including commercial applications, and to alter it and redistribute it
    3.14 +  freely, subject to the following restrictions:
    3.15 +
    3.16 +  1. The origin of this software must not be misrepresented; you must not
    3.17 +     claim that you wrote the original software. If you use this software
    3.18 +     in a product, an acknowledgment in the product documentation would be
    3.19 +     appreciated but is not required.
    3.20 +  2. Altered source versions must be plainly marked as such, and must not be
    3.21 +     misrepresented as being the original software.
    3.22 +  3. This notice may not be removed or altered from any source distribution.
    3.23 +*/
    3.24 +
    3.25 +#include "SDL.h"
    3.26 +#include "../SDL_internal.h"
    3.27 +
    3.28 +/**
    3.29 + * \brief Report the alignment this system needs for SIMD allocations.
    3.30 + *
    3.31 + * This will return the minimum number of bytes to which a pointer must be
    3.32 + *  aligned to be compatible with SIMD instructions on the current machine.
    3.33 + *  For example, if the machine supports SSE only, it will return 16, but if
    3.34 + *  it supports AVX-512F, it'll return 64 (etc). This only reports values for
    3.35 + *  instruction sets SDL knows about, so if your SDL build doesn't have
    3.36 + *  SDL_HasAVX512F(), then it might return 16 for the SSE support it sees and
    3.37 + *  not 64 for the AVX-512 instructions that exist but SDL doesn't know about.
    3.38 + *  Plan accordingly.
    3.39 + */
    3.40 +extern size_t SDL_SIMDGetAlignment(void);
    3.41 +
    3.42 +/**
    3.43 + * \brief Allocate memory in a SIMD-friendly way.
    3.44 + *
    3.45 + * This will allocate a block of memory that is suitable for use with SIMD
    3.46 + *  instructions. Specifically, it will be properly aligned and padded for
    3.47 + *  the system's supported vector instructions.
    3.48 + *
    3.49 + * The memory returned will be padded such that it is safe to read or write
    3.50 + *  an incomplete vector at the end of the memory block. This can be useful
    3.51 + *  so you don't have to drop back to a scalar fallback at the end of your
    3.52 + *  SIMD processing loop to deal with the final elements without overflowing
    3.53 + *  the allocated buffer.
    3.54 + *
    3.55 + * You must free this memory with SDL_FreeSIMD(), not free() or SDL_free()
    3.56 + *  or delete[], etc.
    3.57 + *
    3.58 + * Note that SDL will only deal with SIMD instruction sets it is aware of;
    3.59 + *  for example, SDL 2.0.8 knows that SSE wants 16-byte vectors
    3.60 + *  (SDL_HasSSE()), and AVX2 wants 32 bytes (SDL_HasAVX2()), but doesn't
    3.61 + *  know that AVX-512 wants 64. To be clear: if you can't decide to use an
    3.62 + *  instruction set with an SDL_Has*() function, don't use that instruction
    3.63 + *  set with memory allocated through here.
    3.64 + *
    3.65 + * SDL_AllocSIMD(0) will return a non-NULL pointer, assuming the system isn't
    3.66 + *  out of memory.
    3.67 + *
    3.68 + *  \param len The length, in bytes, of the block to allocated. The actual
    3.69 + *             allocated block might be larger due to padding, etc.
    3.70 + * \return Pointer to newly-allocated block, NULL if out of memory.
    3.71 + *
    3.72 + * \sa SDL_SIMDAlignment
    3.73 + * \sa SDL_SIMDFree
    3.74 + */
    3.75 +extern void * SDL_SIMDAlloc(const size_t len);
    3.76 +
    3.77 +/**
    3.78 + * \brief Deallocate memory obtained from SDL_SIMDAlloc
    3.79 + *
    3.80 + * It is not valid to use this function on a pointer from anything but
    3.81 + *  SDL_SIMDAlloc(). It can't be used on pointers from malloc, realloc,
    3.82 + *  SDL_malloc, memalign, new[], etc.
    3.83 + *
    3.84 + * However, SDL_SIMDFree(NULL) is a legal no-op.
    3.85 + *
    3.86 + * \sa SDL_SIMDAlloc
    3.87 + */
    3.88 +extern void SDL_SIMDFree(void *ptr);
    3.89 +
    3.90 +/* vi: set ts=4 sw=4 expandtab: */
    3.91 +