Made a very significant optimization to the FIR filter which I believe I can take a little further. Right now the FIR filter size is 768 and I get some free() bugs, so this is something I need to debug.
1.1 --- a/src/audio/SDL_audiocvt.c Wed Jul 02 08:04:50 2008 +0000
1.2 +++ b/src/audio/SDL_audiocvt.c Thu Jul 10 07:02:18 2008 +0000
1.3 @@ -1536,6 +1536,7 @@
1.4 significantly fewer multiplications and additions. However, this
1.5 depends on the zero stuffing ratio, so it may not pay off.
1.6 */
1.7 + /* We only calculate the values of samples which are 0 (mod len_div) because those are the only ones used */
1.8 #define filter_sinc(type, mult) { \
1.9 type *sinc = (type *)cvt->coeff; \
1.10 type *state = (type *)cvt->state_buf; \
1.11 @@ -1543,9 +1544,11 @@
1.12 for(i = 0; i < n; ++i) { \
1.13 state[cvt->state_pos] = buf[i]; \
1.14 buf[i] = 0; \
1.15 - for(j = 0; j < m; ++j) { \
1.16 - buf[i] += mult(sinc[j], state[(cvt->state_pos + j) % m]); \
1.17 - } \
1.18 + if( i % cvt->len_div == 0 ) { \
1.19 + for(j = 0; j < m; ++j) { \
1.20 + buf[i] += mult(sinc[j], state[(cvt->state_pos + j) % m]); \
1.21 + } \
1.22 + }\
1.23 cvt->state_pos = (cvt->state_pos + 1) % m; \
1.24 } \
1.25 }
1.26 @@ -1616,7 +1619,6 @@
1.27 #ifdef DEBUG_CONVERT
1.28 printf("Lowpass cutoff frequency = %f\n", fc);
1.29 #endif
1.30 -// fc = 0.02f;
1.31 two_pi_fc = 2.0f * M_PI * fc;
1.32 two_pi_over_m = 2.0f * M_PI / (float)m;
1.33 four_pi_over_m = 2.0f * two_pi_over_m;
1.34 @@ -1635,7 +1637,7 @@
1.35 }
1.36
1.37 #define convert_fixed(type, fix) { \
1.38 - norm_fact = 0.7f / norm_sum; \
1.39 + norm_fact = 0.5f / norm_sum; \
1.40 type *dst = (type *)cvt->coeff; \
1.41 for( i = 0; i <= m; ++i ) { \
1.42 dst[i] = fix(fSinc[i] * norm_fact); \
1.43 @@ -1709,7 +1711,7 @@
1.44 #define discard_mono(type) { \
1.45 const type *src = (const type *) (cvt->buf); \
1.46 type *dst = (type *) (cvt->buf); \
1.47 - for (i = 0; i < cvt->len_cvt / cvt->len_div / sizeof (type); ++i) { \
1.48 + for (i = 0; i < (cvt->len_cvt / sizeof(type)) / cvt->len_div; ++i) { \
1.49 dst[0] = src[0]; \
1.50 src += cvt->len_div; \
1.51 ++dst; \
1.52 @@ -1735,9 +1737,12 @@
1.53 cvt->len_cvt *= cvt->len_mult;
1.54
1.55 // Step 2: Use either a windowed sinc FIR filter or IIR lowpass filter to remove all alias frequencies
1.56 - SDL_FilterFIR( cvt, format );
1.57 + QSDL_FilterFIR( cvt, format );
1.58 +
1.59 + // OPTIMIZATION: we only need to calculate the non-discarded samples. This could be a big speedup!
1.60
1.61 // Step 3: Discard unnecessary samples
1.62 +
1.63 #ifdef DEBUG_CONVERT
1.64 printf("Discarding samples by a factor of %u\n", cvt->len_div);
1.65 #endif
1.66 @@ -1859,7 +1864,7 @@
1.67 cvt->len_ratio = (double)cvt->len_mult / (double)cvt->len_div;
1.68 cvt->filters[cvt->filter_index++] = SDL_Resample;
1.69 //SDL_BuildIIRLowpass(cvt, dst_fmt);
1.70 - SDL_BuildWindowedSinc(cvt, dst_fmt, 20);
1.71 + SDL_BuildWindowedSinc(cvt, dst_fmt, 768);
1.72
1.73 /*cvt->rate_incr = 0.0;
1.74 if ((src_rate / 100) != (dst_rate / 100)) {