audio: Implemented SIMD support for audio data type converters.
authorRyan C. Gordon <icculus@icculus.org>
Mon, 16 Jan 2017 00:58:28 -0500
changeset 1081571bbe3233508
parent 10814 938218064f67
child 10816 c8d44526965e
audio: Implemented SIMD support for audio data type converters.

This currently adds an SSE2 implementation (but it's #ifdef'd out for now,
until it's hooked up to the configure script and such).
src/audio/SDL_audio.c
src/audio/SDL_audio_c.h
src/audio/SDL_audiotypecvt.c
     1.1 --- a/src/audio/SDL_audio.c	Sun Jan 15 05:01:59 2017 -0500
     1.2 +++ b/src/audio/SDL_audio.c	Mon Jan 16 00:58:28 2017 -0500
     1.3 @@ -843,6 +843,8 @@
     1.4      return NULL;
     1.5  }
     1.6  
     1.7 +extern void SDL_ChooseAudioConverters(void);
     1.8 +
     1.9  int
    1.10  SDL_AudioInit(const char *driver_name)
    1.11  {
    1.12 @@ -857,6 +859,8 @@
    1.13      SDL_zero(current_audio);
    1.14      SDL_zero(open_devices);
    1.15  
    1.16 +    SDL_ChooseAudioConverters();
    1.17 +
    1.18      /* Select the proper audio driver */
    1.19      if (driver_name == NULL) {
    1.20          driver_name = SDL_getenv("SDL_AUDIODRIVER");
     2.1 --- a/src/audio/SDL_audio_c.h	Sun Jan 15 05:01:59 2017 -0500
     2.2 +++ b/src/audio/SDL_audio_c.h	Mon Jan 16 00:58:28 2017 -0500
     2.3 @@ -53,16 +53,17 @@
     2.4  /* Function to calculate the size and silence for a SDL_AudioSpec */
     2.5  extern void SDL_CalculateAudioSpec(SDL_AudioSpec * spec);
     2.6  
     2.7 -void SDLCALL SDL_Convert_S8_to_F32(SDL_AudioCVT *cvt, SDL_AudioFormat format);
     2.8 -void SDLCALL SDL_Convert_U8_to_F32(SDL_AudioCVT *cvt, SDL_AudioFormat format);
     2.9 -void SDLCALL SDL_Convert_S16_to_F32(SDL_AudioCVT *cvt, SDL_AudioFormat format);
    2.10 -void SDLCALL SDL_Convert_U16_to_F32(SDL_AudioCVT *cvt, SDL_AudioFormat format);
    2.11 -void SDLCALL SDL_Convert_S32_to_F32(SDL_AudioCVT *cvt, SDL_AudioFormat format);
    2.12 -void SDLCALL SDL_Convert_F32_to_S8(SDL_AudioCVT *cvt, SDL_AudioFormat format);
    2.13 -void SDLCALL SDL_Convert_F32_to_U8(SDL_AudioCVT *cvt, SDL_AudioFormat format);
    2.14 -void SDLCALL SDL_Convert_F32_to_S16(SDL_AudioCVT *cvt, SDL_AudioFormat format);
    2.15 -void SDLCALL SDL_Convert_F32_to_U16(SDL_AudioCVT *cvt, SDL_AudioFormat format);
    2.16 -void SDLCALL SDL_Convert_F32_to_S32(SDL_AudioCVT *cvt, SDL_AudioFormat format);
    2.17 +/* These pointers get set during init to various SIMD implementations. */
    2.18 +extern SDL_AudioFilter SDL_Convert_S8_to_F32;
    2.19 +extern SDL_AudioFilter SDL_Convert_U8_to_F32;
    2.20 +extern SDL_AudioFilter SDL_Convert_S16_to_F32;
    2.21 +extern SDL_AudioFilter SDL_Convert_U16_to_F32;
    2.22 +extern SDL_AudioFilter SDL_Convert_S32_to_F32;
    2.23 +extern SDL_AudioFilter SDL_Convert_F32_to_S8;
    2.24 +extern SDL_AudioFilter SDL_Convert_F32_to_U8;
    2.25 +extern SDL_AudioFilter SDL_Convert_F32_to_S16;
    2.26 +extern SDL_AudioFilter SDL_Convert_F32_to_U16;
    2.27 +extern SDL_AudioFilter SDL_Convert_F32_to_S32;
    2.28  
    2.29  
    2.30  /* SDL_AudioStream is a new audio conversion interface. It
     3.1 --- a/src/audio/SDL_audiotypecvt.c	Sun Jan 15 05:01:59 2017 -0500
     3.2 +++ b/src/audio/SDL_audiotypecvt.c	Mon Jan 16 00:58:28 2017 -0500
     3.3 @@ -22,14 +22,55 @@
     3.4  #include "../SDL_internal.h"
     3.5  #include "SDL_audio.h"
     3.6  #include "SDL_audio_c.h"
     3.7 +#include "SDL_cpuinfo.h"
     3.8  #include "SDL_assert.h"
     3.9  
    3.10 +/* !!! FIXME: write NEON code. */
    3.11 +#define HAVE_NEON_INTRINSICS 0
    3.12 +
    3.13 +/* !!! FIXME: wire this up to the configure script, etc. */
    3.14 +#define HAVE_SSE2_INTRINSICS 0
    3.15 +
    3.16 +#if HAVE_SSE2_INTRINSICS
    3.17 +#include <emmintrin.h>
    3.18 +#endif
    3.19 +
    3.20 +#if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
    3.21 +#define NEED_SCALAR_CONVERTER_FALLBACKS 0  /* x86_64 guarantees SSE2. */
    3.22 +#elif __MACOSX__ && HAVE_SSE2_INTRINSICS
    3.23 +#define NEED_SCALAR_CONVERTER_FALLBACKS 0  /* Mac OS X/Intel guarantees SSE2. */
    3.24 +#elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS
    3.25 +#define NEED_SCALAR_CONVERTER_FALLBACKS 0  /* ARMv8+ promise NEON. */
    3.26 +#elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS
    3.27 +#define NEED_SCALAR_CONVERTER_FALLBACKS 0  /* All Apple ARMv7 chips promise NEON support. */
    3.28 +#endif
    3.29 +
    3.30 +/* Set to zero if platform is guaranteed to use a SIMD codepath here. */
    3.31 +#ifndef NEED_SCALAR_CONVERTER_FALLBACKS
    3.32 +#define NEED_SCALAR_CONVERTER_FALLBACKS 1
    3.33 +#endif
    3.34 +
    3.35 +/* Function pointers set to a CPU-specific implementation. */
    3.36 +SDL_AudioFilter SDL_Convert_S8_to_F32 = NULL;
    3.37 +SDL_AudioFilter SDL_Convert_U8_to_F32 = NULL;
    3.38 +SDL_AudioFilter SDL_Convert_S16_to_F32 = NULL;
    3.39 +SDL_AudioFilter SDL_Convert_U16_to_F32 = NULL;
    3.40 +SDL_AudioFilter SDL_Convert_S32_to_F32 = NULL;
    3.41 +SDL_AudioFilter SDL_Convert_F32_to_S8 = NULL;
    3.42 +SDL_AudioFilter SDL_Convert_F32_to_U8 = NULL;
    3.43 +SDL_AudioFilter SDL_Convert_F32_to_S16 = NULL;
    3.44 +SDL_AudioFilter SDL_Convert_F32_to_U16 = NULL;
    3.45 +SDL_AudioFilter SDL_Convert_F32_to_S32 = NULL;
    3.46 +
    3.47 +
    3.48  #define DIVBY127 0.0078740157480315f
    3.49  #define DIVBY32767 3.05185094759972e-05f
    3.50  #define DIVBY2147483647 4.6566128752458e-10f
    3.51  
    3.52 -void SDLCALL
    3.53 -SDL_Convert_S8_to_F32(SDL_AudioCVT *cvt, SDL_AudioFormat format)
    3.54 +
    3.55 +#if NEED_SCALAR_CONVERTER_FALLBACKS
    3.56 +static void SDLCALL
    3.57 +SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
    3.58  {
    3.59      const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
    3.60      float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
    3.61 @@ -47,8 +88,8 @@
    3.62      }
    3.63  }
    3.64  
    3.65 -void SDLCALL
    3.66 -SDL_Convert_U8_to_F32(SDL_AudioCVT *cvt, SDL_AudioFormat format)
    3.67 +static void SDLCALL
    3.68 +SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
    3.69  {
    3.70      const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
    3.71      float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
    3.72 @@ -66,8 +107,8 @@
    3.73      }
    3.74  }
    3.75  
    3.76 -void SDLCALL
    3.77 -SDL_Convert_S16_to_F32(SDL_AudioCVT *cvt, SDL_AudioFormat format)
    3.78 +static void SDLCALL
    3.79 +SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
    3.80  {
    3.81      const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
    3.82      float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
    3.83 @@ -85,8 +126,8 @@
    3.84      }
    3.85  }
    3.86  
    3.87 -void SDLCALL
    3.88 -SDL_Convert_U16_to_F32(SDL_AudioCVT *cvt, SDL_AudioFormat format)
    3.89 +static void SDLCALL
    3.90 +SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
    3.91  {
    3.92      const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
    3.93      float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
    3.94 @@ -104,8 +145,8 @@
    3.95      }
    3.96  }
    3.97  
    3.98 -void SDLCALL
    3.99 -SDL_Convert_S32_to_F32(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.100 +static void SDLCALL
   3.101 +SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.102  {
   3.103      const Sint32 *src = (const Sint32 *) cvt->buf;
   3.104      float *dst = (float *) cvt->buf;
   3.105 @@ -122,8 +163,8 @@
   3.106      }
   3.107  }
   3.108  
   3.109 -void SDLCALL
   3.110 -SDL_Convert_F32_to_S8(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.111 +static void SDLCALL
   3.112 +SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.113  {
   3.114      const float *src = (const float *) cvt->buf;
   3.115      Sint8 *dst = (Sint8 *) cvt->buf;
   3.116 @@ -141,8 +182,8 @@
   3.117      }
   3.118  }
   3.119  
   3.120 -void SDLCALL
   3.121 -SDL_Convert_F32_to_U8(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.122 +static void SDLCALL
   3.123 +SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.124  {
   3.125      const float *src = (const float *) cvt->buf;
   3.126      Uint8 *dst = (Uint8 *) cvt->buf;
   3.127 @@ -160,8 +201,8 @@
   3.128      }
   3.129  }
   3.130  
   3.131 -void SDLCALL
   3.132 -SDL_Convert_F32_to_S16(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.133 +static void SDLCALL
   3.134 +SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.135  {
   3.136      const float *src = (const float *) cvt->buf;
   3.137      Sint16 *dst = (Sint16 *) cvt->buf;
   3.138 @@ -179,8 +220,8 @@
   3.139      }
   3.140  }
   3.141  
   3.142 -void SDLCALL
   3.143 -SDL_Convert_F32_to_U16(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.144 +static void SDLCALL
   3.145 +SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.146  {
   3.147      const float *src = (const float *) cvt->buf;
   3.148      Uint16 *dst = (Uint16 *) cvt->buf;
   3.149 @@ -198,8 +239,8 @@
   3.150      }
   3.151  }
   3.152  
   3.153 -void SDLCALL
   3.154 -SDL_Convert_F32_to_S32(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.155 +static void SDLCALL
   3.156 +SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.157  {
   3.158      const float *src = (const float *) cvt->buf;
   3.159      Sint32 *dst = (Sint32 *) cvt->buf;
   3.160 @@ -215,5 +256,532 @@
   3.161          cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
   3.162      }
   3.163  }
   3.164 +#endif
   3.165 +
   3.166 +
   3.167 +#if HAVE_SSE2_INTRINSICS
   3.168 +static void SDLCALL
   3.169 +SDL_Convert_S8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.170 +{
   3.171 +    const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
   3.172 +    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
   3.173 +    int i;
   3.174 +
   3.175 +    LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32 (using SSE2)");
   3.176 +
   3.177 +    /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
   3.178 +    for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
   3.179 +        *dst = (((float) *src) * DIVBY127);
   3.180 +    }
   3.181 +
   3.182 +    src -= 15; dst -= 15;  /* adjust to read SSE blocks from the start. */
   3.183 +    SDL_assert(!i || ((((size_t) dst) & 15) == 0));
   3.184 +
   3.185 +    /* Make sure src is aligned too. */
   3.186 +    if ((((size_t) src) & 15) == 0) {
   3.187 +        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
   3.188 +        const __m128i *mmsrc = (const __m128i *) src;
   3.189 +        const __m128i zero = _mm_setzero_si128();
   3.190 +        const __m128 divby127 = _mm_set1_ps(DIVBY127);
   3.191 +        while (i >= 16) {   /* 16 * 8-bit */
   3.192 +            const __m128i bytes = _mm_load_si128(mmsrc);  /* get 16 sint8 into an XMM register. */
   3.193 +            /* treat as int16, shift left to clear every other sint16, then back right with sign-extend. Now sint16. */
   3.194 +            const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
   3.195 +            /* right-shift-sign-extend gets us sint16 with the other set of values. */
   3.196 +            const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
   3.197 +            /* unpack against zero to make these int32, shift to make them sign-extend, convert to float, multiply. Whew! */
   3.198 +            const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1, zero), 16), 16)), divby127);
   3.199 +            const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2, zero), 16), 16)), divby127);
   3.200 +            const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1, zero), 16), 16)), divby127);
   3.201 +            const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2, zero), 16), 16)), divby127);
   3.202 +            /* Interleave back into correct order, store. */
   3.203 +            _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
   3.204 +            _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
   3.205 +            _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
   3.206 +            _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
   3.207 +            i -= 16; mmsrc--; dst -= 16;
   3.208 +        }
   3.209 +
   3.210 +        src = (const Sint8 *) mmsrc;
   3.211 +    }
   3.212 +
   3.213 +    src += 15; dst += 15;  /* adjust for any scalar finishing. */
   3.214 +
   3.215 +    /* Finish off any leftovers with scalar operations. */
   3.216 +    while (i) {
   3.217 +        *dst = (((float) *src) * DIVBY127);
   3.218 +        i--; src--; dst--;
   3.219 +    }
   3.220 +
   3.221 +    cvt->len_cvt *= 4;
   3.222 +    if (cvt->filters[++cvt->filter_index]) {
   3.223 +        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
   3.224 +    }
   3.225 +}
   3.226 +
   3.227 +static void SDLCALL
   3.228 +SDL_Convert_U8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.229 +{
   3.230 +    const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
   3.231 +    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
   3.232 +    int i;
   3.233 +
   3.234 +    LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32 (using SSE2)");
   3.235 +
   3.236 +    /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
   3.237 +    for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
   3.238 +        *dst = ((((float) *src) * DIVBY127) - 1.0f);
   3.239 +    }
   3.240 +
   3.241 +    src -= 15; dst -= 15;  /* adjust to read SSE blocks from the start. */
   3.242 +    SDL_assert(!i || ((((size_t) dst) & 15) == 0));
   3.243 +
   3.244 +    /* Make sure src is aligned too. */
   3.245 +    if ((((size_t) src) & 15) == 0) {
   3.246 +        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
   3.247 +        const __m128i *mmsrc = (const __m128i *) src;
   3.248 +        const __m128i zero = _mm_setzero_si128();
   3.249 +        const __m128 divby127 = _mm_set1_ps(DIVBY127);
   3.250 +        const __m128 minus1 = _mm_set1_ps(-1.0f);
   3.251 +        while (i >= 16) {   /* 16 * 8-bit */
   3.252 +            const __m128i bytes = _mm_load_si128(mmsrc);  /* get 16 uint8 into an XMM register. */
   3.253 +            /* treat as int16, shift left to clear every other sint16, then back right with zero-extend. Now uint16. */
   3.254 +            const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
   3.255 +            /* right-shift-zero-extend gets us uint16 with the other set of values. */
   3.256 +            const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
   3.257 +            /* unpack against zero to make these int32, convert to float, multiply, add. Whew! */
   3.258 +            /* Note that AVX2 can do floating point multiply+add in one instruction, fwiw. SSE2 cannot. */
   3.259 +            const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1, zero)), divby127), minus1);
   3.260 +            const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2, zero)), divby127), minus1);
   3.261 +            const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1, zero)), divby127), minus1);
   3.262 +            const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2, zero)), divby127), minus1);
   3.263 +            /* Interleave back into correct order, store. */
   3.264 +            _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
   3.265 +            _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
   3.266 +            _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
   3.267 +            _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
   3.268 +            i -= 16; mmsrc--; dst -= 16;
   3.269 +        }
   3.270 +
   3.271 +        src = (const Uint8 *) mmsrc;
   3.272 +    }
   3.273 +
   3.274 +    src += 15; dst += 15;  /* adjust for any scalar finishing. */
   3.275 +
   3.276 +    /* Finish off any leftovers with scalar operations. */
   3.277 +    while (i) {
   3.278 +        *dst = ((((float) *src) * DIVBY127) - 1.0f);
   3.279 +        i--; src--; dst--;
   3.280 +    }
   3.281 +
   3.282 +    cvt->len_cvt *= 4;
   3.283 +    if (cvt->filters[++cvt->filter_index]) {
   3.284 +        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
   3.285 +    }
   3.286 +}
   3.287 +
   3.288 +static void SDLCALL
   3.289 +SDL_Convert_S16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.290 +{
   3.291 +    const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
   3.292 +    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
   3.293 +    int i;
   3.294 +
   3.295 +    LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32 (using SSE2)");
   3.296 +
   3.297 +    /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
   3.298 +    for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
   3.299 +        *dst = (((float) *src) * DIVBY32767);
   3.300 +    }
   3.301 +
   3.302 +    src -= 7; dst -= 7;  /* adjust to read SSE blocks from the start. */
   3.303 +    SDL_assert(!i || ((((size_t) dst) & 15) == 0));
   3.304 +
   3.305 +    /* Make sure src is aligned too. */
   3.306 +    if ((((size_t) src) & 15) == 0) {
   3.307 +        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
   3.308 +        const __m128 divby32767 = _mm_set1_ps(DIVBY32767);
   3.309 +        while (i >= 8) {   /* 8 * 16-bit */
   3.310 +            const __m128i ints = _mm_load_si128((__m128i const *) src);  /* get 8 sint16 into an XMM register. */
   3.311 +            /* treat as int32, shift left to clear every other sint16, then back right with sign-extend. Now sint32. */
   3.312 +            const __m128i a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
   3.313 +            /* right-shift-sign-extend gets us sint32 with the other set of values. */
   3.314 +            const __m128i b = _mm_srai_epi32(ints, 16);
   3.315 +            /* Interleave these back into the right order, convert to float, multiply, store. */
   3.316 +            _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32767));
   3.317 +            _mm_store_ps(dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32767));
   3.318 +            i -= 8; src -= 8; dst -= 8;
   3.319 +        }
   3.320 +    }
   3.321 +
   3.322 +    src += 7; dst += 7;  /* adjust for any scalar finishing. */
   3.323 +
   3.324 +    /* Finish off any leftovers with scalar operations. */
   3.325 +    while (i) {
   3.326 +        *dst = (((float) *src) * DIVBY32767);
   3.327 +        i--; src--; dst--;
   3.328 +    }
   3.329 +
   3.330 +    cvt->len_cvt *= 2;
   3.331 +    if (cvt->filters[++cvt->filter_index]) {
   3.332 +        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
   3.333 +    }
   3.334 +}
   3.335 +
   3.336 +static void SDLCALL
   3.337 +SDL_Convert_U16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.338 +{
   3.339 +    const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
   3.340 +    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
   3.341 +    int i;
   3.342 +
   3.343 +    LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32 (using SSE2)");
   3.344 +
   3.345 +    /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
   3.346 +    for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
   3.347 +        *dst = ((((float) *src) * DIVBY32767) - 1.0f);
   3.348 +    }
   3.349 +
   3.350 +    src -= 7; dst -= 7;  /* adjust to read SSE blocks from the start. */
   3.351 +    SDL_assert(!i || ((((size_t) dst) & 15) == 0));
   3.352 +
   3.353 +    /* Make sure src is aligned too. */
   3.354 +    if ((((size_t) src) & 15) == 0) {
   3.355 +        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
   3.356 +        const __m128 divby32767 = _mm_set1_ps(DIVBY32767);
   3.357 +        const __m128 minus1 = _mm_set1_ps(1.0f);
   3.358 +        while (i >= 8) {   /* 8 * 16-bit */
   3.359 +            const __m128i ints = _mm_load_si128((__m128i const *) src);  /* get 8 sint16 into an XMM register. */
   3.360 +            /* treat as int32, shift left to clear every other sint16, then back right with zero-extend. Now sint32. */
   3.361 +            const __m128i a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
   3.362 +            /* right-shift-sign-extend gets us sint32 with the other set of values. */
   3.363 +            const __m128i b = _mm_srli_epi32(ints, 16);
   3.364 +            /* Interleave these back into the right order, convert to float, multiply, store. */
   3.365 +            _mm_store_ps(dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32767), minus1));
   3.366 +            _mm_store_ps(dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32767), minus1));
   3.367 +            i -= 8; src -= 8; dst -= 8;
   3.368 +        }
   3.369 +    }
   3.370 +
   3.371 +    src += 7; dst += 7;  /* adjust for any scalar finishing. */
   3.372 +
   3.373 +    /* Finish off any leftovers with scalar operations. */
   3.374 +    while (i) {
   3.375 +        *dst = ((((float) *src) * DIVBY32767) - 1.0f);
   3.376 +        i--; src--; dst--;
   3.377 +    }
   3.378 +
   3.379 +    cvt->len_cvt *= 2;
   3.380 +    if (cvt->filters[++cvt->filter_index]) {
   3.381 +        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
   3.382 +    }
   3.383 +}
   3.384 +
   3.385 +static void SDLCALL
   3.386 +SDL_Convert_S32_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.387 +{
   3.388 +    const Sint32 *src = (const Sint32 *) cvt->buf;
   3.389 +    float *dst = (float *) cvt->buf;
   3.390 +    int i;
   3.391 +
   3.392 +    LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32 (using SSE2)");
   3.393 +
   3.394 +    /* Get dst aligned to 16 bytes */
   3.395 +    for (i = cvt->len_cvt / sizeof (Sint32); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
   3.396 +        *dst = (float) (((double) *src) * DIVBY2147483647);
   3.397 +    }
   3.398 +
   3.399 +    SDL_assert(!i || ((((size_t) dst) & 15) == 0));
   3.400 +    SDL_assert(!i || ((((size_t) src) & 15) == 0));
   3.401 +
   3.402 +    {
   3.403 +        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
   3.404 +        const __m128d divby2147483647 = _mm_set1_pd(DIVBY2147483647);
   3.405 +        const __m128i *mmsrc = (const __m128i *) src;
   3.406 +        while (i >= 4) {   /* 4 * sint32 */
   3.407 +            const __m128i ints = _mm_load_si128(mmsrc);
   3.408 +            /* bitshift the whole register over, so _mm_cvtepi32_pd can read the top ints in the bottom of the vector. */
   3.409 +            const __m128d doubles1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_bsrli_si128(ints, 8)), divby2147483647);
   3.410 +            const __m128d doubles2 = _mm_mul_pd(_mm_cvtepi32_pd(ints), divby2147483647);
   3.411 +            /* convert to float32, bitshift/or to get these into a vector to store. */
   3.412 +            _mm_store_ps(dst, _mm_castsi128_ps(_mm_or_si128(_mm_bslli_si128(_mm_castps_si128(_mm_cvtpd_ps(doubles1)), 8), _mm_castps_si128(_mm_cvtpd_ps(doubles2)))));
   3.413 +            i -= 4; mmsrc++; dst += 4;
   3.414 +        }
   3.415 +        src = (const Sint32 *) mmsrc;
   3.416 +    }
   3.417 +
   3.418 +    /* Finish off any leftovers with scalar operations. */
   3.419 +    while (i) {
   3.420 +        *dst = (float) (((double) *src) * DIVBY2147483647);
   3.421 +        i--; src++; dst++;
   3.422 +    }
   3.423 +
   3.424 +    if (cvt->filters[++cvt->filter_index]) {
   3.425 +        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
   3.426 +    }
   3.427 +}
   3.428 +
   3.429 +static void SDLCALL
   3.430 +SDL_Convert_F32_to_S8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.431 +{
   3.432 +    const float *src = (const float *) cvt->buf;
   3.433 +    Sint8 *dst = (Sint8 *) cvt->buf;
   3.434 +    int i;
   3.435 +
   3.436 +    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8 (using SSE2)");
   3.437 +
   3.438 +    /* Get dst aligned to 16 bytes */
   3.439 +    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
   3.440 +        *dst = (Sint8) (*src * 127.0f);
   3.441 +    }
   3.442 +
   3.443 +    SDL_assert(!i || ((((size_t) dst) & 15) == 0));
   3.444 +
   3.445 +    /* Make sure src is aligned too. */
   3.446 +    if ((((size_t) src) & 15) == 0) {
   3.447 +        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
   3.448 +        const __m128 mulby127 = _mm_set1_ps(127.0f);
   3.449 +        __m128i *mmdst = (__m128i *) dst;
   3.450 +        while (i >= 16) {   /* 16 * float32 */
   3.451 +            const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby127));  /* load 4 floats, convert to sint32 */
   3.452 +            const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby127));  /* load 4 floats, convert to sint32 */
   3.453 +            const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+8), mulby127));  /* load 4 floats, convert to sint32 */
   3.454 +            const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+12), mulby127));  /* load 4 floats, convert to sint32 */
   3.455 +            _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));  /* pack down, store out. */
   3.456 +            i -= 16; src += 16; mmdst++;
   3.457 +        }
   3.458 +        dst = (Sint8 *) mmdst;
   3.459 +    }
   3.460 +
   3.461 +    /* Finish off any leftovers with scalar operations. */
   3.462 +    while (i) {
   3.463 +        *dst = (Sint8) (*src * 127.0f);
   3.464 +        i--; src++; dst++;
   3.465 +    }
   3.466 +
   3.467 +    cvt->len_cvt /= 4;
   3.468 +    if (cvt->filters[++cvt->filter_index]) {
   3.469 +        cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
   3.470 +    }
   3.471 +}
   3.472 +
   3.473 +static void SDLCALL
   3.474 +SDL_Convert_F32_to_U8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.475 +{
   3.476 +    const float *src = (const float *) cvt->buf;
   3.477 +    Uint8 *dst = (Uint8 *) cvt->buf;
   3.478 +    int i;
   3.479 +
   3.480 +    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8 (using SSE2)");
   3.481 +
   3.482 +    /* Get dst aligned to 16 bytes */
   3.483 +    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
   3.484 +        *dst = (Uint8) ((*src + 1.0f) * 127.0f);
   3.485 +    }
   3.486 +
   3.487 +    SDL_assert(!i || ((((size_t) dst) & 15) == 0));
   3.488 +
   3.489 +    /* Make sure src is aligned too. */
   3.490 +    if ((((size_t) src) & 15) == 0) {
   3.491 +        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
   3.492 +        const __m128 add1 = _mm_set1_ps(1.0f);
   3.493 +        const __m128 mulby127 = _mm_set1_ps(127.0f);
   3.494 +        __m128i *mmdst = (__m128i *) dst;
   3.495 +        while (i >= 16) {   /* 16 * float32 */
   3.496 +            const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src), add1), mulby127));  /* load 4 floats, convert to sint32 */
   3.497 +            const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+4), add1), mulby127));  /* load 4 floats, convert to sint32 */
   3.498 +            const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+8), add1), mulby127));  /* load 4 floats, convert to sint32 */
   3.499 +            const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+12), add1), mulby127));  /* load 4 floats, convert to sint32 */
   3.500 +            _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));  /* pack down, store out. */
   3.501 +            i -= 16; src += 16; mmdst++;
   3.502 +        }
   3.503 +        dst = (Uint8 *) mmdst;
   3.504 +    }
   3.505 +
   3.506 +    /* Finish off any leftovers with scalar operations. */
   3.507 +    while (i) {
   3.508 +        *dst = (Uint8) ((*src + 1.0f) * 127.0f);
   3.509 +        i--; src++; dst++;
   3.510 +    }
   3.511 +
   3.512 +    cvt->len_cvt /= 4;
   3.513 +    if (cvt->filters[++cvt->filter_index]) {
   3.514 +        cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
   3.515 +    }
   3.516 +}
   3.517 +
   3.518 +static void SDLCALL
   3.519 +SDL_Convert_F32_to_S16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.520 +{
   3.521 +    const float *src = (const float *) cvt->buf;
   3.522 +    Sint16 *dst = (Sint16 *) cvt->buf;
   3.523 +    int i;
   3.524 +
   3.525 +    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16 (using SSE2)");
   3.526 +
   3.527 +    /* Get dst aligned to 16 bytes */
   3.528 +    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
   3.529 +        *dst = (Sint16) (*src * 32767.0f);
   3.530 +    }
   3.531 +
   3.532 +    SDL_assert(!i || ((((size_t) dst) & 15) == 0));
   3.533 +
   3.534 +    /* Make sure src is aligned too. */
   3.535 +    if ((((size_t) src) & 15) == 0) {
   3.536 +        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
   3.537 +        const __m128 mulby32767 = _mm_set1_ps(32767.0f);
   3.538 +        __m128i *mmdst = (__m128i *) dst;
   3.539 +        while (i >= 8) {   /* 8 * float32 */
   3.540 +            const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby32767));  /* load 4 floats, convert to sint32 */
   3.541 +            const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby32767));  /* load 4 floats, convert to sint32 */
   3.542 +            _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2));  /* pack to sint16, store out. */
   3.543 +            i -= 8; src += 8; mmdst++;
   3.544 +        }
   3.545 +        dst = (Sint16 *) mmdst;
   3.546 +    }
   3.547 +
   3.548 +    /* Finish off any leftovers with scalar operations. */
   3.549 +    while (i) {
   3.550 +        *dst = (((float) *src) * DIVBY32767);
   3.551 +        i--; src++; dst++;
   3.552 +    }
   3.553 +
   3.554 +    cvt->len_cvt /= 2;
   3.555 +    if (cvt->filters[++cvt->filter_index]) {
   3.556 +        cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
   3.557 +    }
   3.558 +}
   3.559 +
   3.560 +static void SDLCALL
   3.561 +SDL_Convert_F32_to_U16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.562 +{
   3.563 +    const float *src = (const float *) cvt->buf;
   3.564 +    Uint16 *dst = (Uint16 *) cvt->buf;
   3.565 +    int i;
   3.566 +
   3.567 +    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16 (using SSE2)");
   3.568 +
   3.569 +    /* Get dst aligned to 16 bytes */
   3.570 +    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
   3.571 +        *dst = (Uint16) ((*src + 1.0f) * 32767.0f);
   3.572 +    }
   3.573 +
   3.574 +    SDL_assert(!i || ((((size_t) dst) & 15) == 0));
   3.575 +
   3.576 +    /* Make sure src is aligned too. */
   3.577 +    if ((((size_t) src) & 15) == 0) {
   3.578 +        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
   3.579 +        /* This calculates differently than the scalar path because SSE2 can't
   3.580 +           pack int32 data down to unsigned int16. _mm_packs_epi32 does signed
   3.581 +           saturation, so that would corrupt our data. _mm_packus_epi32 exists,
   3.582 +           but not before SSE 4.1. So we convert from float to sint16, packing
   3.583 +           that down with legit signed saturation, and then xor the top bit
   3.584 +           against 1. This results in the correct unsigned 16-bit value, even
   3.585 +           though it looks like dark magic. */
   3.586 +        const __m128 mulby32767 = _mm_set1_ps(32767.0f);
   3.587 +        const __m128i topbit = _mm_set1_epi16(-32768);
   3.588 +        __m128i *mmdst = (__m128i *) dst;
   3.589 +        while (i >= 8) {   /* 8 * float32 */
   3.590 +            const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby32767));  /* load 4 floats, convert to sint32 */
   3.591 +            const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby32767));  /* load 4 floats, convert to sint32 */
   3.592 +            _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit));  /* pack to sint16, xor top bit, store out. */
   3.593 +            i -= 8; src += 8; mmdst++;
   3.594 +        }
   3.595 +        dst = (Uint16 *) mmdst;
   3.596 +    }
   3.597 +
   3.598 +    /* Finish off any leftovers with scalar operations. */
   3.599 +    while (i) {
   3.600 +        *dst = (Uint16) ((*src + 1.0f) * 32767.0f);
   3.601 +        i--; src++; dst++;
   3.602 +    }
   3.603 +
   3.604 +    cvt->len_cvt /= 2;
   3.605 +    if (cvt->filters[++cvt->filter_index]) {
   3.606 +        cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
   3.607 +    }
   3.608 +}
   3.609 +
   3.610 +static void SDLCALL
   3.611 +SDL_Convert_F32_to_S32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
   3.612 +{
   3.613 +    const float *src = (const float *) cvt->buf;
   3.614 +    Sint32 *dst = (Sint32 *) cvt->buf;
   3.615 +    int i;
   3.616 +
   3.617 +    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32 (using SSE2)");
   3.618 +
   3.619 +    /* Get dst aligned to 16 bytes */
   3.620 +    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
   3.621 +        *dst = (Sint32) (((double) *src) * 2147483647.0);
   3.622 +    }
   3.623 +
   3.624 +    SDL_assert(!i || ((((size_t) dst) & 15) == 0));
   3.625 +    SDL_assert(!i || ((((size_t) src) & 15) == 0));
   3.626 +
   3.627 +    {
   3.628 +        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
   3.629 +        const __m128d mulby2147483647 = _mm_set1_pd(2147483647.0);
   3.630 +        __m128i *mmdst = (__m128i *) dst;
   3.631 +        while (i >= 4) {   /* 4 * float32 */
   3.632 +            const __m128 floats = _mm_load_ps(src);
   3.633 +            /* bitshift the whole register over, so _mm_cvtps_pd can read the top floats in the bottom of the vector. */
   3.634 +            const __m128d doubles1 = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_bsrli_si128(_mm_castps_si128(floats), 8))), mulby2147483647);
   3.635 +            const __m128d doubles2 = _mm_mul_pd(_mm_cvtps_pd(floats), mulby2147483647);
   3.636 +            _mm_store_si128(mmdst, _mm_or_si128(_mm_bslli_si128(_mm_cvtpd_epi32(doubles1), 8), _mm_cvtpd_epi32(doubles2)));
   3.637 +            i -= 4; src += 4; mmdst++;
   3.638 +        }
   3.639 +        dst = (Sint32 *) mmdst;
   3.640 +    }
   3.641 +
   3.642 +    /* Finish off any leftovers with scalar operations. */
   3.643 +    while (i) {
   3.644 +        *dst = (Sint32) (((double) *src) * 2147483647.0);
   3.645 +        i--; src++; dst++;
   3.646 +    }
   3.647 +
   3.648 +    if (cvt->filters[++cvt->filter_index]) {
   3.649 +        cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
   3.650 +    }
   3.651 +}
   3.652 +#endif
   3.653 +
   3.654 +
   3.655 +void SDL_ChooseAudioConverters(void)
   3.656 +{
   3.657 +    static SDL_bool converters_chosen = SDL_FALSE;
   3.658 +
   3.659 +    if (converters_chosen) {
   3.660 +        return;
   3.661 +    }
   3.662 +
   3.663 +    #define SET_CONVERTER_FUNCS(fntype) \
   3.664 +        SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
   3.665 +        SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
   3.666 +        SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \
   3.667 +        SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \
   3.668 +        SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \
   3.669 +        SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \
   3.670 +        SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
   3.671 +        SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
   3.672 +        SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \
   3.673 +        SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
   3.674 +        converters_chosen = SDL_TRUE
   3.675 +
   3.676 +    #if HAVE_SSE2_INTRINSICS
   3.677 +    if (SDL_HasSSE2()) {
   3.678 +        SET_CONVERTER_FUNCS(SSE2);
   3.679 +        return;
   3.680 +    }
   3.681 +    #endif
   3.682 +
   3.683 +    #if NEED_SCALAR_CONVERTER_FALLBACKS
   3.684 +    SET_CONVERTER_FUNCS(Scalar);
   3.685 +    #endif
   3.686 +
   3.687 +    #undef SET_CONVERTER_FUNCS
   3.688 +
   3.689 +    SDL_assert(converters_chosen == SDL_TRUE);
   3.690 +}
   3.691  
   3.692  /* vi: set ts=4 sw=4 expandtab: */