audio: Make the simple resampler operate in-place.
authorRyan C. Gordon <icculus@icculus.org>
Sun, 22 Jan 2017 23:48:15 -0500
changeset 1083386f6353f1aae
parent 10832 189266031c6f
child 10834 336efe4fc373
audio: Make the simple resampler operate in-place.

This allows us to avoid an extra copy, allocate less memory and reduce cache
pressure. On the downside: we have to do a lot of tapdancing to resample the
buffer in reverse when the output is growing.
src/audio/SDL_audiocvt.c
     1.1 --- a/src/audio/SDL_audiocvt.c	Mon Jan 23 00:57:19 2017 -0500
     1.2 +++ b/src/audio/SDL_audiocvt.c	Sun Jan 22 23:48:15 2017 -0500
     1.3 @@ -245,27 +245,60 @@
     1.4      const int finalpos = (total * chans) - chans;
     1.5      const int dest_samples = (int)(((double)total) * rate_incr);
     1.6      const double src_incr = 1.0 / rate_incr;
     1.7 -    float *dst = outbuf;
     1.8 -    float *target = (dst + (dest_samples * chans));
     1.9 -    double idx = 0.0;
    1.10 +    float *dst;
    1.11 +    double idx;
    1.12      int i;
    1.13  
    1.14      SDL_assert((dest_samples * framelen) <= outbuflen);
    1.15      SDL_assert((inbuflen % framelen) == 0);
    1.16  
    1.17 -    while (dst < target) {
    1.18 -        const int pos = ((int)idx) * chans;
    1.19 -        const float *src = &inbuf[pos];
    1.20 -        SDL_assert(pos <= finalpos);
    1.21 -        for (i = 0; i < chans; i++) {
    1.22 -            const float val = *(src++);
    1.23 -            *(dst++) = (val + last_sample[i]) * 0.5f;
    1.24 -            last_sample[i] = val;
    1.25 +    if (rate_incr > 1.0) {
    1.26 +        float *target = (outbuf + chans);
    1.27 +        const float *earlier_sample = &inbuf[finalpos];
    1.28 +        float final_sample[8];
    1.29 +        dst = outbuf + (dest_samples * chans);
    1.30 +        idx = (double) total;
    1.31 +
    1.32 +        /* save this off so we can correctly maintain state between runs. */
    1.33 +        SDL_memcpy(final_sample, &inbuf[finalpos], framelen);
    1.34 +
    1.35 +        while (dst > target) {
    1.36 +            const int pos = ((int) idx) * chans;
    1.37 +            const float *src = &inbuf[pos];
    1.38 +            SDL_assert(pos >= 0.0);
    1.39 +            for (i = chans - 1; i >= 0; i--) {
    1.40 +                const float val = *(--src);
    1.41 +                *(--dst) = (val + earlier_sample[i]) * 0.5f;
    1.42 +            }
    1.43 +            earlier_sample = src;
    1.44 +            idx -= src_incr;
    1.45          }
    1.46 -        idx += src_incr;
    1.47 +
    1.48 +        /* do last sample, interpolated against previous run's state. */
    1.49 +        for (i = chans - 1; i >= 0; i--) {
    1.50 +            const float val = inbuf[i];
    1.51 +            *(--dst) = (val + last_sample[i]) * 0.5f;
    1.52 +        }
    1.53 +        SDL_memcpy(last_sample, final_sample, framelen);
    1.54 +        dst = (outbuf + (dest_samples * chans)) - 1;
    1.55 +    } else {
    1.56 +        float *target = (outbuf + (dest_samples * chans));
    1.57 +        dst = outbuf;
    1.58 +        idx = 0.0;
    1.59 +        while (dst < target) {
    1.60 +            const int pos = ((int) idx) * chans;
    1.61 +            const float *src = &inbuf[pos];
    1.62 +            SDL_assert(pos <= finalpos);
    1.63 +            for (i = 0; i < chans; i++) {
    1.64 +                const float val = *(src++);
    1.65 +                *(dst++) = (val + last_sample[i]) * 0.5f;
    1.66 +                last_sample[i] = val;
    1.67 +            }
    1.68 +            idx += src_incr;
    1.69 +        }
    1.70      }
    1.71  
    1.72 -    return (int) ((dst - outbuf) * (int)sizeof(float));
    1.73 +    return (int) ((dst - outbuf) * ((int) sizeof (float)));
    1.74  }
    1.75  
    1.76  
    1.77 @@ -420,8 +453,8 @@
    1.78  {
    1.79      const float *src = (const float *) cvt->buf;
    1.80      const int srclen = cvt->len_cvt;
    1.81 -    float *dst = (float *) (cvt->buf + srclen);
    1.82 -    const int dstlen = (cvt->len * cvt->len_mult) - srclen;
    1.83 +    float *dst = (float *) cvt->buf;
    1.84 +    const int dstlen = (cvt->len * cvt->len_mult);
    1.85      float state[8];
    1.86  
    1.87      SDL_assert(format == AUDIO_F32SYS);
    1.88 @@ -429,8 +462,6 @@
    1.89      SDL_memcpy(state, src, chans*sizeof(*src));
    1.90  
    1.91      cvt->len_cvt = SDL_ResampleAudioSimple(chans, cvt->rate_incr, state, src, srclen, dst, dstlen);
    1.92 -
    1.93 -    SDL_memcpy(cvt->buf, dst, cvt->len_cvt);
    1.94      if (cvt->filters[++cvt->filter_index]) {
    1.95          cvt->filters[cvt->filter_index](cvt, format);
    1.96      }
    1.97 @@ -492,10 +523,6 @@
    1.98          cvt->len_ratio /= ((double) src_rate) / ((double) dst_rate);
    1.99      }
   1.100  
   1.101 -    /* the buffer is big enough to hold the destination now, but
   1.102 -       we need it large enough to hold a separate scratch buffer. */
   1.103 -    cvt->len_mult *= 2;
   1.104 -
   1.105      return 1;               /* added a converter. */
   1.106  }
   1.107