Final merge of Google Summer of Code 2008 work...
authorSam Lantinga <slouken@libsdl.org>
Mon, 25 Aug 2008 15:08:59 +0000
changeset 2716f8f68f47285a
parent 2715 336b604ec15b
child 2717 50bc882455e5
Final merge of Google Summer of Code 2008 work...

Audio Ideas - Resampling and Pitch Shifting
by Aaron Wishnick, mentored by Ryan C. Gordon
src/audio/SDL_audio.c
src/audio/SDL_audiocvt.c
src/audio/SDL_sysaudio.h
     1.1 --- a/src/audio/SDL_audio.c	Mon Aug 25 10:14:21 2008 +0000
     1.2 +++ b/src/audio/SDL_audio.c	Mon Aug 25 15:08:59 2008 +0000
     1.3 @@ -256,6 +256,68 @@
     1.4  #undef FILL_STUB
     1.5  }
     1.6  
     1.7 +/* Streaming functions (for when the input and output buffer sizes are different) */
     1.8 +/* Write [length] bytes from buf into the streamer */
     1.9 +void
    1.10 +SDL_StreamWrite(SDL_AudioStreamer * stream, Uint8 * buf, int length)
    1.11 +{
    1.12 +    int i;
    1.13 +
    1.14 +    for (i = 0; i < length; ++i) {
    1.15 +        stream->buffer[stream->write_pos] = buf[i];
    1.16 +        ++stream->write_pos;
    1.17 +    }
    1.18 +}
    1.19 +
    1.20 +/* Read [length] bytes out of the streamer into buf */
    1.21 +void
    1.22 +SDL_StreamRead(SDL_AudioStreamer * stream, Uint8 * buf, int length)
    1.23 +{
    1.24 +    int i;
    1.25 +
    1.26 +    for (i = 0; i < length; ++i) {
    1.27 +        buf[i] = stream->buffer[stream->read_pos];
    1.28 +        ++stream->read_pos;
    1.29 +    }
    1.30 +}
    1.31 +
    1.32 +int
    1.33 +SDL_StreamLength(SDL_AudioStreamer * stream)
    1.34 +{
    1.35 +    return (stream->write_pos - stream->read_pos) % stream->max_len;
    1.36 +}
    1.37 +
    1.38 +/* Initialize the stream by allocating the buffer and setting the read/write heads to the beginning */
    1.39 +int
    1.40 +SDL_StreamInit(SDL_AudioStreamer * stream, int max_len, Uint8 silence)
    1.41 +{
    1.42 +    int i;
    1.43 +
    1.44 +    /* First try to allocate the buffer */
    1.45 +    stream->buffer = (Uint8 *) malloc(max_len);
    1.46 +    if (stream->buffer == NULL) {
    1.47 +        return -1;
    1.48 +    }
    1.49 +
    1.50 +    stream->max_len = max_len;
    1.51 +    stream->read_pos = 0;
    1.52 +    stream->write_pos = 0;
    1.53 +
    1.54 +    /* Zero out the buffer */
    1.55 +    for (i = 0; i < max_len; ++i) {
    1.56 +        stream->buffer[i] = silence;
    1.57 +    }
    1.58 +}
    1.59 +
    1.60 +/* Deinitialize the stream simply by freeing the buffer */
    1.61 +void
    1.62 +SDL_StreamDeinit(SDL_AudioStreamer * stream)
    1.63 +{
    1.64 +    if (stream->buffer != NULL) {
    1.65 +        free(stream->buffer);
    1.66 +    }
    1.67 +}
    1.68 +
    1.69  
    1.70  /* The general mixing thread function */
    1.71  int SDLCALL
    1.72 @@ -267,6 +329,11 @@
    1.73      void *udata;
    1.74      void (SDLCALL * fill) (void *userdata, Uint8 * stream, int len);
    1.75      int silence;
    1.76 +    int stream_max_len;
    1.77 +
    1.78 +    /* For streaming when the buffer sizes don't match up */
    1.79 +    Uint8 *istream;
    1.80 +    int istream_len;
    1.81  
    1.82      /* Perform any thread setup */
    1.83      device->threadid = SDL_ThreadID();
    1.84 @@ -276,67 +343,188 @@
    1.85      fill = device->spec.callback;
    1.86      udata = device->spec.userdata;
    1.87  
    1.88 +    /* By default do not stream */
    1.89 +    device->use_streamer = 0;
    1.90 +
    1.91      if (device->convert.needed) {
    1.92          if (device->convert.src_format == AUDIO_U8) {
    1.93              silence = 0x80;
    1.94          } else {
    1.95              silence = 0;
    1.96          }
    1.97 -        stream_len = device->convert.len;
    1.98 +
    1.99 +        /* If the result of the conversion alters the length, i.e. resampling is being used, use the streamer */
   1.100 +        if (device->convert.len_mult != 1 || device->convert.len_div != 1) {
   1.101 +            /* The streamer's maximum length should be twice whichever is larger: spec.size or len_cvt */
   1.102 +            stream_max_len = 2 * device->spec.size;
   1.103 +            if (device->convert.len_mult > device->convert.len_div) {
   1.104 +                stream_max_len *= device->convert.len_mult;
   1.105 +                stream_max_len /= device->convert.len_div;
   1.106 +            }
   1.107 +            if (SDL_StreamInit(&device->streamer, stream_max_len, silence) <
   1.108 +                0)
   1.109 +                return -1;
   1.110 +            device->use_streamer = 1;
   1.111 +
   1.112 +            /* istream_len should be the length of what we grab from the callback and feed to conversion,
   1.113 +               so that we get close to spec_size. I.e. we want device.spec_size = istream_len * u / d
   1.114 +             */
   1.115 +            istream_len =
   1.116 +                device->spec.size * device->convert.len_div /
   1.117 +                device->convert.len_mult;
   1.118 +        }
   1.119 +
   1.120 +        /* stream_len = device->convert.len; */
   1.121 +        stream_len = device->spec.size;
   1.122      } else {
   1.123          silence = device->spec.silence;
   1.124          stream_len = device->spec.size;
   1.125      }
   1.126  
   1.127 -    /* Loop, filling the audio buffers */
   1.128 -    while (device->enabled) {
   1.129 +    /* Determine if the streamer is necessary here */
   1.130 +    if (device->use_streamer == 1) {
   1.131 +        /* This code is almost the same as the old code. The difference is, instead of reding
   1.132 +           directly from the callback into "stream", then converting and sending the audio off,
   1.133 +           we go: callback -> "istream" -> (conversion) -> streamer -> stream -> device.
   1.134 +           However, reading and writing with streamer are done separately:
   1.135 +           - We only call the callback and write to the streamer when the streamer does not
   1.136 +           contain enough samples to output to the device.
   1.137 +           - We only read from the streamer and tell the device to play when the streamer
   1.138 +           does have enough samples to output.
   1.139 +           This allows us to perform resampling in the conversion step, where the output of the
   1.140 +           resampling process can be any number. We will have to see what a good size for the
   1.141 +           stream's maximum length is, but I suspect 2*max(len_cvt, stream_len) is a good figure.
   1.142 +         */
   1.143 +        while (device->enabled) {
   1.144 +            /* Only read in audio if the streamer doesn't have enough already (if it does not have enough samples to output) */
   1.145 +            if (SDL_StreamLength(&device->streamer) < stream_len) {
   1.146 +                /* Set up istream */
   1.147 +                if (device->convert.needed) {
   1.148 +                    if (device->convert.buf) {
   1.149 +                        istream = device->convert.buf;
   1.150 +                    } else {
   1.151 +                        continue;
   1.152 +                    }
   1.153 +                } else {
   1.154 +                    istream = current_audio.impl.GetDeviceBuf(device);
   1.155 +                    if (istream == NULL) {
   1.156 +                        istream = device->fake_stream;
   1.157 +                    }
   1.158 +                }
   1.159  
   1.160 -        /* Fill the current buffer with sound */
   1.161 -        if (device->convert.needed) {
   1.162 -            if (device->convert.buf) {
   1.163 -                stream = device->convert.buf;
   1.164 +                /* Read from the callback into the _input_ stream */
   1.165 +                if (!device->paused) {
   1.166 +                    SDL_mutexP(device->mixer_lock);
   1.167 +                    (*fill) (udata, istream, istream_len);
   1.168 +                    SDL_mutexV(device->mixer_lock);
   1.169 +                }
   1.170 +
   1.171 +                /* Convert the audio if necessary and write to the streamer */
   1.172 +                if (device->convert.needed) {
   1.173 +                    SDL_ConvertAudio(&device->convert);
   1.174 +                    if (istream == NULL) {
   1.175 +                        istream = device->fake_stream;
   1.176 +                    }
   1.177 +                    /*SDL_memcpy(istream, device->convert.buf, device->convert.len_cvt); */
   1.178 +                    SDL_StreamWrite(&device->streamer, device->convert.buf,
   1.179 +                                    device->convert.len_cvt);
   1.180 +                } else {
   1.181 +                    SDL_StreamWrite(&device->streamer, istream, istream_len);
   1.182 +                }
   1.183 +            }
   1.184 +
   1.185 +            /* Only output audio if the streamer has enough to output */
   1.186 +            if (SDL_StreamLength(&device->streamer) >= stream_len) {
   1.187 +                /* Set up the output stream */
   1.188 +                if (device->convert.needed) {
   1.189 +                    if (device->convert.buf) {
   1.190 +                        stream = device->convert.buf;
   1.191 +                    } else {
   1.192 +                        continue;
   1.193 +                    }
   1.194 +                } else {
   1.195 +                    stream = current_audio.impl.GetDeviceBuf(device);
   1.196 +                    if (stream == NULL) {
   1.197 +                        stream = device->fake_stream;
   1.198 +                    }
   1.199 +                }
   1.200 +
   1.201 +                /* Now read from the streamer */
   1.202 +                SDL_StreamRead(&device->streamer, stream, stream_len);
   1.203 +
   1.204 +                /* Ready current buffer for play and change current buffer */
   1.205 +                if (stream != device->fake_stream) {
   1.206 +                    current_audio.impl.PlayDevice(device);
   1.207 +                }
   1.208 +
   1.209 +                /* Wait for an audio buffer to become available */
   1.210 +                if (stream == device->fake_stream) {
   1.211 +                    SDL_Delay((device->spec.samples * 1000) /
   1.212 +                              device->spec.freq);
   1.213 +                } else {
   1.214 +                    current_audio.impl.WaitDevice(device);
   1.215 +                }
   1.216 +            }
   1.217 +
   1.218 +        }
   1.219 +    } else {
   1.220 +        /* Otherwise, do not use the streamer. This is the old code. */
   1.221 +
   1.222 +        /* Loop, filling the audio buffers */
   1.223 +        while (device->enabled) {
   1.224 +
   1.225 +            /* Fill the current buffer with sound */
   1.226 +            if (device->convert.needed) {
   1.227 +                if (device->convert.buf) {
   1.228 +                    stream = device->convert.buf;
   1.229 +                } else {
   1.230 +                    continue;
   1.231 +                }
   1.232              } else {
   1.233 -                continue;
   1.234 +                stream = current_audio.impl.GetDeviceBuf(device);
   1.235 +                if (stream == NULL) {
   1.236 +                    stream = device->fake_stream;
   1.237 +                }
   1.238              }
   1.239 -        } else {
   1.240 -            stream = current_audio.impl.GetDeviceBuf(device);
   1.241 -            if (stream == NULL) {
   1.242 -                stream = device->fake_stream;
   1.243 +
   1.244 +            if (!device->paused) {
   1.245 +                SDL_mutexP(device->mixer_lock);
   1.246 +                (*fill) (udata, stream, stream_len);
   1.247 +                SDL_mutexV(device->mixer_lock);
   1.248              }
   1.249 -        }
   1.250  
   1.251 -        if (!device->paused) {
   1.252 -            SDL_mutexP(device->mixer_lock);
   1.253 -            (*fill) (udata, stream, stream_len);
   1.254 -            SDL_mutexV(device->mixer_lock);
   1.255 -        }
   1.256 +            /* Convert the audio if necessary */
   1.257 +            if (device->convert.needed) {
   1.258 +                SDL_ConvertAudio(&device->convert);
   1.259 +                stream = current_audio.impl.GetDeviceBuf(device);
   1.260 +                if (stream == NULL) {
   1.261 +                    stream = device->fake_stream;
   1.262 +                }
   1.263 +                SDL_memcpy(stream, device->convert.buf,
   1.264 +                           device->convert.len_cvt);
   1.265 +            }
   1.266  
   1.267 -        /* Convert the audio if necessary */
   1.268 -        if (device->convert.needed) {
   1.269 -            SDL_ConvertAudio(&device->convert);
   1.270 -            stream = current_audio.impl.GetDeviceBuf(device);
   1.271 -            if (stream == NULL) {
   1.272 -                stream = device->fake_stream;
   1.273 +            /* Ready current buffer for play and change current buffer */
   1.274 +            if (stream != device->fake_stream) {
   1.275 +                current_audio.impl.PlayDevice(device);
   1.276              }
   1.277 -            SDL_memcpy(stream, device->convert.buf, device->convert.len_cvt);
   1.278 -        }
   1.279  
   1.280 -        /* Ready current buffer for play and change current buffer */
   1.281 -        if (stream != device->fake_stream) {
   1.282 -            current_audio.impl.PlayDevice(device);
   1.283 -        }
   1.284 -
   1.285 -        /* Wait for an audio buffer to become available */
   1.286 -        if (stream == device->fake_stream) {
   1.287 -            SDL_Delay((device->spec.samples * 1000) / device->spec.freq);
   1.288 -        } else {
   1.289 -            current_audio.impl.WaitDevice(device);
   1.290 +            /* Wait for an audio buffer to become available */
   1.291 +            if (stream == device->fake_stream) {
   1.292 +                SDL_Delay((device->spec.samples * 1000) / device->spec.freq);
   1.293 +            } else {
   1.294 +                current_audio.impl.WaitDevice(device);
   1.295 +            }
   1.296          }
   1.297      }
   1.298  
   1.299      /* Wait for the audio to drain.. */
   1.300      current_audio.impl.WaitDone(device);
   1.301  
   1.302 +    /* If necessary, deinit the streamer */
   1.303 +    if (device->use_streamer == 1)
   1.304 +        SDL_StreamDeinit(&device->streamer);
   1.305 +
   1.306      return (0);
   1.307  }
   1.308  
     2.1 --- a/src/audio/SDL_audiocvt.c	Mon Aug 25 10:14:21 2008 +0000
     2.2 +++ b/src/audio/SDL_audiocvt.c	Mon Aug 25 15:08:59 2008 +0000
     2.3 @@ -20,12 +20,45 @@
     2.4      slouken@libsdl.org
     2.5  */
     2.6  #include "SDL_config.h"
     2.7 +#include <math.h>
     2.8  
     2.9  /* Functions for audio drivers to perform runtime conversion of audio format */
    2.10  
    2.11  #include "SDL_audio.h"
    2.12  #include "SDL_audio_c.h"
    2.13  
    2.14 +#define DEBUG_CONVERT
    2.15 +
    2.16 +/* These are fractional multiplication routines. That is, their inputs
    2.17 +   are two numbers in the range [-1, 1) and the result falls in that
    2.18 +   same range. The output is the same size as the inputs, i.e.
    2.19 +   32-bit x 32-bit = 32-bit.
    2.20 + */
    2.21 +
    2.22 +/* We hope here that the right shift includes sign extension */
    2.23 +#ifdef SDL_HAS_64BIT_Type
    2.24 +#define SDL_FixMpy32(a, b) ((((Sint64)a * (Sint64)b) >> 31) & 0xffffffff)
    2.25 +#else
    2.26 +/* If we don't have the 64-bit type, do something more complicated. See http://www.8052.com/mul16.phtml or http://www.cs.uaf.edu/~cs301/notes/Chapter5/node5.html */
    2.27 +#define SDL_FixMpy32(a, b) ((((Sint64)a * (Sint64)b) >> 31) & 0xffffffff)
    2.28 +#endif
    2.29 +#define SDL_FixMpy16(a, b) ((((Sint32)a * (Sint32)b) >> 14) & 0xffff)
    2.30 +#define SDL_FixMpy8(a, b) ((((Sint16)a * (Sint16)b) >> 7) & 0xff)
    2.31 +/* This macro just makes the floating point filtering code not have to be a special case. */
    2.32 +#define SDL_FloatMpy(a, b) (a * b)
    2.33 +
    2.34 +/* These macros take floating point numbers in the range [-1.0f, 1.0f) and
    2.35 +   represent them as fixed-point numbers in that same range. There's no
    2.36 +   checking that the floating point argument is inside the appropriate range.
    2.37 + */
    2.38 +
    2.39 +#define SDL_Make_1_7(a) (Sint8)(a * 128.0f)
    2.40 +#define SDL_Make_1_15(a) (Sint16)(a * 32768.0f)
    2.41 +#define SDL_Make_1_31(a) (Sint32)(a * 2147483648.0f)
    2.42 +#define SDL_Make_2_6(a) (Sint8)(a * 64.0f)
    2.43 +#define SDL_Make_2_14(a) (Sint16)(a * 16384.0f)
    2.44 +#define SDL_Make_2_30(a) (Sint32)(a * 1073741824.0f)
    2.45 +
    2.46  /* Effectively mix right and left channels into a single channel */
    2.47  static void SDLCALL
    2.48  SDL_ConvertMono(SDL_AudioCVT * cvt, SDL_AudioFormat format)
    2.49 @@ -1309,6 +1342,468 @@
    2.50      return 0;                   /* no conversion necessary. */
    2.51  }
    2.52  
    2.53 +/* Generate the necessary IIR lowpass coefficients for resampling.
    2.54 +   Assume that the SDL_AudioCVT struct is already set up with
    2.55 +   the correct values for len_mult and len_div, and use the
    2.56 +   type of dst_format. Also assume the buffer is allocated.
    2.57 +   Note the buffer needs to be 6 units long.
    2.58 +   For now, use RBJ's cookbook coefficients. It might be more
    2.59 +   optimal to create a Butterworth filter, but this is more difficult.
    2.60 +*/
    2.61 +int
    2.62 +SDL_BuildIIRLowpass(SDL_AudioCVT * cvt, SDL_AudioFormat format)
    2.63 +{
    2.64 +    float fc;                   /* cutoff frequency */
    2.65 +    float coeff[6];             /* floating point iir coefficients b0, b1, b2, a0, a1, a2 */
    2.66 +    float scale;
    2.67 +    float w0, alpha, cosw0;
    2.68 +    int i;
    2.69 +
    2.70 +    /* The higher Q is, the higher CUTOFF can be. Need to find a good balance to avoid aliasing */
    2.71 +    static const float Q = 5.0f;
    2.72 +    static const float CUTOFF = 0.4f;
    2.73 +
    2.74 +    fc = (cvt->len_mult >
    2.75 +          cvt->len_div) ? CUTOFF / (float) cvt->len_mult : CUTOFF /
    2.76 +        (float) cvt->len_div;
    2.77 +
    2.78 +    w0 = 2.0f * M_PI * fc;
    2.79 +    cosw0 = cosf(w0);
    2.80 +    alpha = sin(w0) / (2.0f * Q);
    2.81 +
    2.82 +    /* Compute coefficients, normalizing by a0 */
    2.83 +    scale = 1.0f / (1.0f + alpha);
    2.84 +
    2.85 +    coeff[0] = (1.0f - cosw0) / 2.0f * scale;
    2.86 +    coeff[1] = (1.0f - cosw0) * scale;
    2.87 +    coeff[2] = coeff[0];
    2.88 +
    2.89 +    coeff[3] = 1.0f;            /* a0 is normalized to 1 */
    2.90 +    coeff[4] = -2.0f * cosw0 * scale;
    2.91 +    coeff[5] = (1.0f - alpha) * scale;
    2.92 +
    2.93 +    /* Copy the coefficients to the struct. If necessary, convert coefficients to fixed point, using the range (-2.0, 2.0) */
    2.94 +#define convert_fixed(type, fix) { \
    2.95 +            type *cvt_coeff = (type *)cvt->coeff; \
    2.96 +            for(i = 0; i < 6; ++i) { \
    2.97 +                cvt_coeff[i] = fix(coeff[i]); \
    2.98 +            } \
    2.99 +        }
   2.100 +
   2.101 +    if (SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) {
   2.102 +        float *cvt_coeff = (float *) cvt->coeff;
   2.103 +        for (i = 0; i < 6; ++i) {
   2.104 +            cvt_coeff[i] = coeff[i];
   2.105 +        }
   2.106 +    } else {
   2.107 +        switch (SDL_AUDIO_BITSIZE(format)) {
   2.108 +        case 8:
   2.109 +            convert_fixed(Uint8, SDL_Make_2_6);
   2.110 +            break;
   2.111 +        case 16:
   2.112 +            convert_fixed(Uint16, SDL_Make_2_14);
   2.113 +            break;
   2.114 +        case 32:
   2.115 +            convert_fixed(Uint32, SDL_Make_2_30);
   2.116 +            break;
   2.117 +        }
   2.118 +    }
   2.119 +
   2.120 +#ifdef DEBUG_CONVERT
   2.121 +#define debug_iir(type) { \
   2.122 +            type *cvt_coeff = (type *)cvt->coeff; \
   2.123 +            for(i = 0; i < 6; ++i) { \
   2.124 +                printf("coeff[%u] = %f = 0x%x\n", i, coeff[i], cvt_coeff[i]); \
   2.125 +            } \
   2.126 +        }
   2.127 +    if (SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) {
   2.128 +        float *cvt_coeff = (float *) cvt->coeff;
   2.129 +        for (i = 0; i < 6; ++i) {
   2.130 +            printf("coeff[%u] = %f = %f\n", i, coeff[i], cvt_coeff[i]);
   2.131 +        }
   2.132 +    } else {
   2.133 +        switch (SDL_AUDIO_BITSIZE(format)) {
   2.134 +        case 8:
   2.135 +            debug_iir(Uint8);
   2.136 +            break;
   2.137 +        case 16:
   2.138 +            debug_iir(Uint16);
   2.139 +            break;
   2.140 +        case 32:
   2.141 +            debug_iir(Uint32);
   2.142 +            break;
   2.143 +        }
   2.144 +    }
   2.145 +#undef debug_iir
   2.146 +#endif
   2.147 +
   2.148 +    /* Initialize the state buffer to all zeroes, and set initial position */
   2.149 +    memset(cvt->state_buf, 0, 4 * SDL_AUDIO_BITSIZE(format) / 4);
   2.150 +    cvt->state_pos = 0;
   2.151 +#undef convert_fixed
   2.152 +}
   2.153 +
   2.154 +/* Apply the lowpass IIR filter to the given SDL_AudioCVT struct */
   2.155 +/* This was implemented because it would be much faster than the fir filter, 
   2.156 +   but it doesn't seem to have a steep enough cutoff so we'd need several
   2.157 +   cascaded biquads, which probably isn't a great idea. Therefore, this
   2.158 +   function can probably be discarded.
   2.159 +*/
   2.160 +static void
   2.161 +SDL_FilterIIR(SDL_AudioCVT * cvt, SDL_AudioFormat format)
   2.162 +{
   2.163 +    Uint32 i, n;
   2.164 +
   2.165 +    /* TODO: Check that n is calculated right */
   2.166 +    n = 8 * cvt->len_cvt / SDL_AUDIO_BITSIZE(format);
   2.167 +
   2.168 +    /* Note that the coefficients are 2_x and the input is 1_x. Do we need to shift left at the end here? The right shift temp = buf[n] >> 1 needs to depend on whether the type is signed or not for sign extension. */
   2.169 +    /* cvt->state_pos = 1: state[0] = x_n-1, state[1] = x_n-2, state[2] = y_n-1, state[3] - y_n-2 */
   2.170 +#define iir_fix(type, mult) {\
   2.171 +            type *coeff = (type *)cvt->coeff; \
   2.172 +            type *state = (type *)cvt->state_buf; \
   2.173 +            type *buf = (type *)cvt->buf; \
   2.174 +            type temp; \
   2.175 +            for(i = 0; i < n; ++i) { \
   2.176 +                    temp = buf[i] >> 1; \
   2.177 +                    if(cvt->state_pos) { \
   2.178 +                        buf[i] = mult(coeff[0], temp) + mult(coeff[1], state[0]) + mult(coeff[2], state[1]) - mult(coeff[4], state[2]) - mult(coeff[5], state[3]); \
   2.179 +                        state[1] = temp; \
   2.180 +                        state[3] = buf[i]; \
   2.181 +                        cvt->state_pos = 0; \
   2.182 +                    } else { \
   2.183 +                        buf[i] = mult(coeff[0], temp) + mult(coeff[1], state[1]) + mult(coeff[2], state[0]) - mult(coeff[4], state[3]) - mult(coeff[5], state[2]); \
   2.184 +                        state[0] = temp; \
   2.185 +                        state[2] = buf[i]; \
   2.186 +                        cvt->state_pos = 1; \
   2.187 +                    } \
   2.188 +                } \
   2.189 +        }
   2.190 +/* Need to test to see if the previous method or this one is faster */
   2.191 +/*#define iir_fix(type, mult) {\
   2.192 +            type *coeff = (type *)cvt->coeff; \
   2.193 +            type *state = (type *)cvt->state_buf; \
   2.194 +            type *buf = (type *)cvt->buf; \
   2.195 +            type temp; \
   2.196 +            for(i = 0; i < n; ++i) { \
   2.197 +                    temp = buf[i] >> 1; \
   2.198 +                    buf[i] = mult(coeff[0], temp) + mult(coeff[1], state[0]) + mult(coeff[2], state[1]) - mult(coeff[4], state[2]) - mult(coeff[5], state[3]); \
   2.199 +                    state[1] = state[0]; \
   2.200 +                    state[0] = temp; \
   2.201 +                    state[3] = state[2]; \
   2.202 +                    state[2] = buf[i]; \
   2.203 +                } \
   2.204 +        }*/
   2.205 +
   2.206 +    if (SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) {
   2.207 +        float *coeff = (float *) cvt->coeff;
   2.208 +        float *state = (float *) cvt->state_buf;
   2.209 +        float *buf = (float *) cvt->buf;
   2.210 +        float temp;
   2.211 +
   2.212 +        for (i = 0; i < n; ++i) {
   2.213 +            /* y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a[2] * y[n-2] */
   2.214 +            temp = buf[i];
   2.215 +            if (cvt->state_pos) {
   2.216 +                buf[i] =
   2.217 +                    coeff[0] * buf[n] + coeff[1] * state[0] +
   2.218 +                    coeff[2] * state[1] - coeff[4] * state[2] -
   2.219 +                    coeff[5] * state[3];
   2.220 +                state[1] = temp;
   2.221 +                state[3] = buf[i];
   2.222 +                cvt->state_pos = 0;
   2.223 +            } else {
   2.224 +                buf[i] =
   2.225 +                    coeff[0] * buf[n] + coeff[1] * state[1] +
   2.226 +                    coeff[2] * state[0] - coeff[4] * state[3] -
   2.227 +                    coeff[5] * state[2];
   2.228 +                state[0] = temp;
   2.229 +                state[2] = buf[i];
   2.230 +                cvt->state_pos = 1;
   2.231 +            }
   2.232 +        }
   2.233 +    } else {
   2.234 +        /* Treat everything as signed! */
   2.235 +        switch (SDL_AUDIO_BITSIZE(format)) {
   2.236 +        case 8:
   2.237 +            iir_fix(Sint8, SDL_FixMpy8);
   2.238 +            break;
   2.239 +        case 16:
   2.240 +            iir_fix(Sint16, SDL_FixMpy16);
   2.241 +            break;
   2.242 +        case 32:
   2.243 +            iir_fix(Sint32, SDL_FixMpy32);
   2.244 +            break;
   2.245 +        }
   2.246 +    }
   2.247 +#undef iir_fix
   2.248 +}
   2.249 +
   2.250 +/* Apply the windowed sinc FIR filter to the given SDL_AudioCVT struct.
   2.251 +*/
   2.252 +static void
   2.253 +SDL_FilterFIR(SDL_AudioCVT * cvt, SDL_AudioFormat format)
   2.254 +{
   2.255 +    int n = 8 * cvt->len_cvt / SDL_AUDIO_BITSIZE(format);
   2.256 +    int m = cvt->len_sinc;
   2.257 +    int i, j;
   2.258 +
   2.259 +    /* 
   2.260 +       Note: We can make a big optimization here by taking advantage
   2.261 +       of the fact that the signal is zero stuffed, so we can do
   2.262 +       significantly fewer multiplications and additions. However, this
   2.263 +       depends on the zero stuffing ratio, so it may not pay off. This would
   2.264 +       basically be a polyphase filter.
   2.265 +     */
   2.266 +    /* One other way to do this fast is to look at the fir filter from a different angle:
   2.267 +       After we zero stuff, we have input of all zeroes, except for every len_mult
   2.268 +       sample. If we choose a sinc length equal to len_mult, then the fir filter becomes
   2.269 +       much more simple: we're just taking a windowed sinc, shifting it to start at each
   2.270 +       len_mult sample, and scaling it by the value of that sample. If we do this, then
   2.271 +       we don't even need to worry about the sample histories, and the inner loop here is
   2.272 +       unnecessary. This probably sacrifices some quality but could really speed things up as well.
   2.273 +     */
   2.274 +    /* We only calculate the values of samples which are 0 (mod len_div) because
   2.275 +       those are the only ones used. All the other ones are discarded in the
   2.276 +       third step of resampling. This is a huge speedup. As a warning, though,
   2.277 +       if for some reason this is used elsewhere where there are no samples discarded,
   2.278 +       the output will not be corrrect if len_div is not 1. To make this filter a
   2.279 +       generic FIR filter, simply remove the if statement "if(i % cvt->len_div == 0)"
   2.280 +       around the inner loop so that every sample is processed.
   2.281 +     */
   2.282 +    /* This is basically just a FIR filter. i.e. for input x_n and m coefficients,
   2.283 +       y_n = x_n*sinc_0 + x_(n-1)*sinc_1 +  x_(n-2)*sinc_2 + ... + x_(n-m+1)*sinc_(m-1)
   2.284 +     */
   2.285 +#define filter_sinc(type, mult) { \
   2.286 +            type *sinc = (type *)cvt->coeff; \
   2.287 +            type *state = (type *)cvt->state_buf; \
   2.288 +            type *buf = (type *)cvt->buf; \
   2.289 +            for(i = 0; i < n; ++i) { \
   2.290 +                state[cvt->state_pos] = buf[i]; \
   2.291 +                buf[i] = 0; \
   2.292 +                if( i % cvt->len_div == 0 ) { \
   2.293 +                    for(j = 0; j < m;  ++j) { \
   2.294 +                        buf[i] += mult(sinc[j], state[(cvt->state_pos + j) % m]); \
   2.295 +                    } \
   2.296 +                }\
   2.297 +                cvt->state_pos = (cvt->state_pos + 1) % m; \
   2.298 +            } \
   2.299 +        }
   2.300 +
   2.301 +    if (SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) {
   2.302 +        filter_sinc(float, SDL_FloatMpy);
   2.303 +    } else {
   2.304 +        switch (SDL_AUDIO_BITSIZE(format)) {
   2.305 +        case 8:
   2.306 +            filter_sinc(Sint8, SDL_FixMpy8);
   2.307 +            break;
   2.308 +        case 16:
   2.309 +            filter_sinc(Sint16, SDL_FixMpy16);
   2.310 +            break;
   2.311 +        case 32:
   2.312 +            filter_sinc(Sint32, SDL_FixMpy32);
   2.313 +            break;
   2.314 +        }
   2.315 +    }
   2.316 +
   2.317 +#undef filter_sinc
   2.318 +
   2.319 +}
   2.320 +
   2.321 +/* Generate the necessary windowed sinc filter for resampling.
   2.322 +   Assume that the SDL_AudioCVT struct is already set up with
   2.323 +   the correct values for len_mult and len_div, and use the
   2.324 +   type of dst_format. Also assume the buffer is allocated.
   2.325 +   Note the buffer needs to be m+1 units long.
   2.326 +*/
   2.327 +int
   2.328 +SDL_BuildWindowedSinc(SDL_AudioCVT * cvt, SDL_AudioFormat format,
   2.329 +                      unsigned int m)
   2.330 +{
   2.331 +    float fScale;               /* scale factor for fixed point */
   2.332 +    float *fSinc;               /* floating point sinc buffer, to be converted to fixed point */
   2.333 +    float fc;                   /* cutoff frequency */
   2.334 +    float two_pi_fc, two_pi_over_m, four_pi_over_m, m_over_two;
   2.335 +    float norm_sum, norm_fact;
   2.336 +    unsigned int i;
   2.337 +
   2.338 +    /* Check that the buffer is allocated */
   2.339 +    if (cvt->coeff == NULL) {
   2.340 +        return -1;
   2.341 +    }
   2.342 +
   2.343 +    /* Set the length */
   2.344 +    cvt->len_sinc = m + 1;
   2.345 +
   2.346 +    /* Allocate the floating point windowed sinc. */
   2.347 +    fSinc = (float *) malloc((m + 1) * sizeof(float));
   2.348 +    if (fSinc == NULL) {
   2.349 +        return -1;
   2.350 +    }
   2.351 +
   2.352 +    /* Set up the filter parameters */
   2.353 +    fc = (cvt->len_mult >
   2.354 +          cvt->len_div) ? 0.5f / (float) cvt->len_mult : 0.5f /
   2.355 +        (float) cvt->len_div;
   2.356 +#ifdef DEBUG_CONVERT
   2.357 +    printf("Lowpass cutoff frequency = %f\n", fc);
   2.358 +#endif
   2.359 +    two_pi_fc = 2.0f * M_PI * fc;
   2.360 +    two_pi_over_m = 2.0f * M_PI / (float) m;
   2.361 +    four_pi_over_m = 2.0f * two_pi_over_m;
   2.362 +    m_over_two = (float) m / 2.0f;
   2.363 +    norm_sum = 0.0f;
   2.364 +
   2.365 +    for (i = 0; i <= m; ++i) {
   2.366 +        if (i == m / 2) {
   2.367 +            fSinc[i] = two_pi_fc;
   2.368 +        } else {
   2.369 +            fSinc[i] =
   2.370 +                sinf(two_pi_fc * ((float) i - m_over_two)) / ((float) i -
   2.371 +                                                              m_over_two);
   2.372 +            /* Apply blackman window */
   2.373 +            fSinc[i] *=
   2.374 +                0.42f - 0.5f * cosf(two_pi_over_m * (float) i) +
   2.375 +                0.08f * cosf(four_pi_over_m * (float) i);
   2.376 +        }
   2.377 +        norm_sum += fabs(fSinc[i]);
   2.378 +    }
   2.379 +
   2.380 +    norm_fact = 1.0f / norm_sum;
   2.381 +
   2.382 +#define convert_fixed(type, fix) { \
   2.383 +        type *dst = (type *)cvt->coeff; \
   2.384 +        for( i = 0; i <= m; ++i ) { \
   2.385 +            dst[i] = fix(fSinc[i] * norm_fact); \
   2.386 +        } \
   2.387 +    }
   2.388 +
   2.389 +    /* If we're using floating point, we only need to normalize */
   2.390 +    if (SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) {
   2.391 +        float *fDest = (float *) cvt->coeff;
   2.392 +        for (i = 0; i <= m; ++i) {
   2.393 +            fDest[i] = fSinc[i] * norm_fact;
   2.394 +        }
   2.395 +    } else {
   2.396 +        switch (SDL_AUDIO_BITSIZE(format)) {
   2.397 +        case 8:
   2.398 +            convert_fixed(Uint8, SDL_Make_1_7);
   2.399 +            break;
   2.400 +        case 16:
   2.401 +            convert_fixed(Uint16, SDL_Make_1_15);
   2.402 +            break;
   2.403 +        case 32:
   2.404 +            convert_fixed(Uint32, SDL_Make_1_31);
   2.405 +            break;
   2.406 +        }
   2.407 +    }
   2.408 +
   2.409 +    /* Initialize the state buffer to all zeroes, and set initial position */
   2.410 +    memset(cvt->state_buf, 0, cvt->len_sinc * SDL_AUDIO_BITSIZE(format) / 4);
   2.411 +    cvt->state_pos = 0;
   2.412 +
   2.413 +    /* Clean up */
   2.414 +#undef convert_fixed
   2.415 +    free(fSinc);
   2.416 +}
   2.417 +
   2.418 +/* This is used to reduce the resampling ratio */
   2.419 +inline int
   2.420 +SDL_GCD(int a, int b)
   2.421 +{
   2.422 +    int temp;
   2.423 +    while (b != 0) {
   2.424 +        temp = a % b;
   2.425 +        a = b;
   2.426 +        b = temp;
   2.427 +    }
   2.428 +    return a;
   2.429 +}
   2.430 +
   2.431 +/* Perform proper resampling. This is pretty slow but it's the best-sounding method. */
   2.432 +static void SDLCALL
   2.433 +SDL_Resample(SDL_AudioCVT * cvt, SDL_AudioFormat format)
   2.434 +{
   2.435 +    int i, j;
   2.436 +
   2.437 +#ifdef DEBUG_CONVERT
   2.438 +    printf("Converting audio rate via proper resampling (mono)\n");
   2.439 +#endif
   2.440 +
   2.441 +#define zerostuff_mono(type) { \
   2.442 +        const type *src = (const type *) (cvt->buf + cvt->len_cvt); \
   2.443 +        type *dst = (type *) (cvt->buf + (cvt->len_cvt * cvt->len_mult)); \
   2.444 +        for (i = cvt->len_cvt / sizeof (type); i; --i) { \
   2.445 +            src--; \
   2.446 +            dst[-1] = src[0]; \
   2.447 +            for( j = -cvt->len_mult; j < -1; ++j ) { \
   2.448 +                dst[j] = 0; \
   2.449 +            } \
   2.450 +            dst -= cvt->len_mult; \
   2.451 +        } \
   2.452 +    }
   2.453 +
   2.454 +#define discard_mono(type) { \
   2.455 +        const type *src = (const type *) (cvt->buf); \
   2.456 +        type *dst = (type *) (cvt->buf); \
   2.457 +        for (i = 0; i < (cvt->len_cvt / sizeof(type)) / cvt->len_div; ++i) { \
   2.458 +            dst[0] = src[0]; \
   2.459 +            src += cvt->len_div; \
   2.460 +            ++dst; \
   2.461 +        } \
   2.462 +    }
   2.463 +
   2.464 +    /* Step 1: Zero stuff the conversion buffer. This upsamples by a factor of len_mult,
   2.465 +       creating aliasing at frequencies above the original nyquist frequency.
   2.466 +     */
   2.467 +#ifdef DEBUG_CONVERT
   2.468 +    printf("Zero-stuffing by a factor of %u\n", cvt->len_mult);
   2.469 +#endif
   2.470 +    switch (SDL_AUDIO_BITSIZE(format)) {
   2.471 +    case 8:
   2.472 +        zerostuff_mono(Uint8);
   2.473 +        break;
   2.474 +    case 16:
   2.475 +        zerostuff_mono(Uint16);
   2.476 +        break;
   2.477 +    case 32:
   2.478 +        zerostuff_mono(Uint32);
   2.479 +        break;
   2.480 +    }
   2.481 +
   2.482 +    cvt->len_cvt *= cvt->len_mult;
   2.483 +
   2.484 +    /* Step 2: Use a windowed sinc FIR filter (lowpass filter) to remove the alias
   2.485 +       frequencies. This is the slow part.
   2.486 +     */
   2.487 +    SDL_FilterFIR(cvt, format);
   2.488 +
   2.489 +    /* Step 3: Now downsample by discarding samples. */
   2.490 +
   2.491 +#ifdef DEBUG_CONVERT
   2.492 +    printf("Discarding samples by a factor of %u\n", cvt->len_div);
   2.493 +#endif
   2.494 +    switch (SDL_AUDIO_BITSIZE(format)) {
   2.495 +    case 8:
   2.496 +        discard_mono(Uint8);
   2.497 +        break;
   2.498 +    case 16:
   2.499 +        discard_mono(Uint16);
   2.500 +        break;
   2.501 +    case 32:
   2.502 +        discard_mono(Uint32);
   2.503 +        break;
   2.504 +    }
   2.505 +
   2.506 +#undef zerostuff_mono
   2.507 +#undef discard_mono
   2.508 +
   2.509 +    cvt->len_cvt /= cvt->len_div;
   2.510 +
   2.511 +    if (cvt->filters[++cvt->filter_index]) {
   2.512 +        cvt->filters[cvt->filter_index] (cvt, format);
   2.513 +    }
   2.514 +}
   2.515  
   2.516  
   2.517  /* Creates a set of audio filters to convert from one format to another.
   2.518 @@ -1399,6 +1894,17 @@
   2.519      }
   2.520  
   2.521      /* Do rate conversion */
   2.522 +    if (src_rate != dst_rate) {
   2.523 +        int rate_gcd;
   2.524 +        rate_gcd = SDL_GCD(src_rate, dst_rate);
   2.525 +        cvt->len_mult = dst_rate / rate_gcd;
   2.526 +        cvt->len_div = src_rate / rate_gcd;
   2.527 +        cvt->len_ratio = (double) cvt->len_mult / (double) cvt->len_div;
   2.528 +        cvt->filters[cvt->filter_index++] = SDL_Resample;
   2.529 +        SDL_BuildWindowedSinc(cvt, dst_fmt, 768);
   2.530 +    }
   2.531 +
   2.532 +/*
   2.533      cvt->rate_incr = 0.0;
   2.534      if ((src_rate / 100) != (dst_rate / 100)) {
   2.535          Uint32 hi_rate, lo_rate;
   2.536 @@ -1448,25 +1954,25 @@
   2.537              }
   2.538              len_mult = 2;
   2.539              len_ratio = 2.0;
   2.540 -        }
   2.541 -        /* If hi_rate = lo_rate*2^x then conversion is easy */
   2.542 -        while (((lo_rate * 2) / 100) <= (hi_rate / 100)) {
   2.543 -            cvt->filters[cvt->filter_index++] = rate_cvt;
   2.544 -            cvt->len_mult *= len_mult;
   2.545 -            lo_rate *= 2;
   2.546 -            cvt->len_ratio *= len_ratio;
   2.547 -        }
   2.548 -        /* We may need a slow conversion here to finish up */
   2.549 -        if ((lo_rate / 100) != (hi_rate / 100)) {
   2.550 -#if 1
   2.551 -            /* The problem with this is that if the input buffer is
   2.552 -               say 1K, and the conversion rate is say 1.1, then the
   2.553 -               output buffer is 1.1K, which may not be an acceptable
   2.554 -               buffer size for the audio driver (not a power of 2)
   2.555 -             */
   2.556 -            /* For now, punt and hope the rate distortion isn't great.
   2.557 -             */
   2.558 -#else
   2.559 +        }*/
   2.560 +    /* If hi_rate = lo_rate*2^x then conversion is easy */
   2.561 +    /*   while (((lo_rate * 2) / 100) <= (hi_rate / 100)) {
   2.562 +       cvt->filters[cvt->filter_index++] = rate_cvt;
   2.563 +       cvt->len_mult *= len_mult;
   2.564 +       lo_rate *= 2;
   2.565 +       cvt->len_ratio *= len_ratio;
   2.566 +       } */
   2.567 +    /* We may need a slow conversion here to finish up */
   2.568 +    /*    if ((lo_rate / 100) != (hi_rate / 100)) {
   2.569 +       #if 1 */
   2.570 +    /* The problem with this is that if the input buffer is
   2.571 +       say 1K, and the conversion rate is say 1.1, then the
   2.572 +       output buffer is 1.1K, which may not be an acceptable
   2.573 +       buffer size for the audio driver (not a power of 2)
   2.574 +     */
   2.575 +    /* For now, punt and hope the rate distortion isn't great.
   2.576 +     */
   2.577 +/*#else
   2.578              if (src_rate < dst_rate) {
   2.579                  cvt->rate_incr = (double) lo_rate / hi_rate;
   2.580                  cvt->len_mult *= 2;
   2.581 @@ -1478,7 +1984,7 @@
   2.582              cvt->filters[cvt->filter_index++] = SDL_RateSLOW;
   2.583  #endif
   2.584          }
   2.585 -    }
   2.586 +    }*/
   2.587  
   2.588      /* Set up the filter information */
   2.589      if (cvt->filter_index != 0) {
   2.590 @@ -1492,4 +1998,15 @@
   2.591      return (cvt->needed);
   2.592  }
   2.593  
   2.594 +#undef SDL_FixMpy8
   2.595 +#undef SDL_FixMpy16
   2.596 +#undef SDL_FixMpy32
   2.597 +#undef SDL_FloatMpy
   2.598 +#undef SDL_Make_1_7
   2.599 +#undef SDL_Make_1_15
   2.600 +#undef SDL_Make_1_31
   2.601 +#undef SDL_Make_2_6
   2.602 +#undef SDL_Make_2_14
   2.603 +#undef SDL_Make_2_30
   2.604 +
   2.605  /* vi: set ts=4 sw=4 expandtab: */
     3.1 --- a/src/audio/SDL_sysaudio.h	Mon Aug 25 10:14:21 2008 +0000
     3.2 +++ b/src/audio/SDL_sysaudio.h	Mon Aug 25 15:08:59 2008 +0000
     3.3 @@ -69,6 +69,15 @@
     3.4  } SDL_AudioDriver;
     3.5  
     3.6  
     3.7 +/* Streamer */
     3.8 +typedef struct
     3.9 +{
    3.10 +    Uint8 *buffer;
    3.11 +    int max_len;                /* the maximum length in bytes */
    3.12 +    int read_pos, write_pos;    /* the position of the write and read heads in bytes */
    3.13 +} SDL_AudioStreamer;
    3.14 +
    3.15 +
    3.16  /* Define the SDL audio driver structure */
    3.17  struct SDL_AudioDevice
    3.18  {
    3.19 @@ -81,6 +90,10 @@
    3.20      /* An audio conversion block for audio format emulation */
    3.21      SDL_AudioCVT convert;
    3.22  
    3.23 +    /* The streamer, if sample rate conversion necessitates it */
    3.24 +    int use_streamer;
    3.25 +    SDL_AudioStreamer streamer;
    3.26 +
    3.27      /* Current state flags */
    3.28      int iscapture;
    3.29      int enabled;