audio: Added SSE3 implementation of SDL_ConvertStereoToMono().
authorRyan C. Gordon <icculus@icculus.org>
Mon, 23 Jan 2017 00:57:19 -0500
changeset 10832189266031c6f
parent 10831 fcbb4d7f2344
child 10833 86f6353f1aae
audio: Added SSE3 implementation of SDL_ConvertStereoToMono().
src/audio/SDL_audiocvt.c
     1.1 --- a/src/audio/SDL_audiocvt.c	Sun Jan 22 20:18:59 2017 -0500
     1.2 +++ b/src/audio/SDL_audiocvt.c	Mon Jan 23 00:57:19 2017 -0500
     1.3 @@ -29,6 +29,50 @@
     1.4  #include "SDL_assert.h"
     1.5  #include "../SDL_dataqueue.h"
     1.6  
     1.7 +/* !!! FIXME: wire this up to the configure script, etc. */
     1.8 +#include "SDL_cpuinfo.h"
     1.9 +#define HAVE_SSE3_INTRINSICS 0
    1.10 +
    1.11 +#if HAVE_SSE3_INTRINSICS
    1.12 +#include <pmmintrin.h>
    1.13 +#endif
    1.14 +
    1.15 +#if HAVE_SSE3_INTRINSICS
    1.16 +/* Effectively mix right and left channels into a single channel */
    1.17 +static void SDLCALL
    1.18 +SDL_ConvertStereoToMono_SSE3(SDL_AudioCVT * cvt, SDL_AudioFormat format)
    1.19 +{
    1.20 +    float *dst = (float *) cvt->buf;
    1.21 +    const float *src = dst;
    1.22 +    int i = cvt->len_cvt / 8;
    1.23 +
    1.24 +    LOG_DEBUG_CONVERT("stereo", "mono (using SSE3)");
    1.25 +    SDL_assert(format == AUDIO_F32SYS);
    1.26 +
    1.27 +    /* We can only do this if dst is aligned to 16 bytes; since src is the
    1.28 +       same pointer and it moves by 2, it can't be forcibly aligned. */
    1.29 +    if ((((size_t) dst) & 15) == 0) {
    1.30 +        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
    1.31 +        const __m128 divby2 = _mm_set1_ps(0.5f);
    1.32 +        while (i >= 4) {   /* 4 * float32 */
    1.33 +            _mm_store_ps(dst, _mm_mul_ps(_mm_hadd_ps(_mm_load_ps(src), _mm_load_ps(src+4)), divby2));
    1.34 +            i -= 4; src += 8; dst += 4;
    1.35 +        }
    1.36 +    }
    1.37 +
    1.38 +    /* Finish off any leftovers with scalar operations. */
    1.39 +    while (i) {
    1.40 +        *dst = (src[0] + src[1]) * 0.5f;
    1.41 +        dst++; i--; src += 2;
    1.42 +    }
    1.43 +
    1.44 +    cvt->len_cvt /= 2;
    1.45 +    if (cvt->filters[++cvt->filter_index]) {
    1.46 +        cvt->filters[cvt->filter_index] (cvt, format);
    1.47 +    }
    1.48 +}
    1.49 +#endif
    1.50 +
    1.51  /* Effectively mix right and left channels into a single channel */
    1.52  static void SDLCALL
    1.53  SDL_ConvertStereoToMono(SDL_AudioCVT * cvt, SDL_AudioFormat format)
    1.54 @@ -579,7 +623,20 @@
    1.55           */
    1.56          while (((src_channels % 2) == 0) &&
    1.57                 ((src_channels / 2) >= dst_channels)) {
    1.58 -            cvt->filters[cvt->filter_index++] = SDL_ConvertStereoToMono;
    1.59 +            SDL_AudioFilter filter = NULL;
    1.60 +
    1.61 +            #if HAVE_SSE3_INTRINSICS
    1.62 +            if (SDL_HasSSE3()) {
    1.63 +                filter = SDL_ConvertStereoToMono_SSE3;
    1.64 +            }
    1.65 +            #endif
    1.66 +
    1.67 +            if (!filter) {
    1.68 +                filter = SDL_ConvertStereoToMono;
    1.69 +            }
    1.70 +
    1.71 +            cvt->filters[cvt->filter_index++] = filter;
    1.72 +
    1.73              src_channels /= 2;
    1.74              cvt->len_ratio /= 2;
    1.75          }