src/audio/SDL_audiocvt.c
 branch gsoc2008_audio_resampling changeset 2663 0caed045d01b parent 2662 5470680ca587
```     1.1 --- a/src/audio/SDL_audiocvt.c	Thu Jul 10 07:02:18 2008 +0000
1.2 +++ b/src/audio/SDL_audiocvt.c	Tue Aug 12 00:24:42 2008 +0000
1.3 @@ -29,37 +29,35 @@
1.4
1.5  #define DEBUG_CONVERT
1.6
1.7 -/* Perform fractional multiplication of two 32-bit integers to produce a 32-bit result. Assumes sizeof(long) = 4 */
1.8 -/*#define SDL_FixMpy32(v1, v2, dest) { \
1.9 -			long a, b, c, d; \
1.10 -			long x, y; \
1.11 -			a = (v1 >> 16) & 0xffff; \
1.12 -			b = v1 & 0xffff; \
1.13 -			c = (v2 >> 16); \
1.14 -			d = v2 & 0xffff; \
1.15 -			x = a * d + c * b; \
1.16 -			y = (((b*d) >> 16) & 0xffff) + x; \
1.17 -			dest = ((y >> 16) & 0xffff) + (a * c); \
1.18 -		}*/
1.19 -/* TODO: Check if 64-bit type exists. If not, see http://www.8052.com/mul16.phtml or http://www.cs.uaf.edu/~cs301/notes/Chapter5/node5.html */
1.20 +/* These are fractional multiplication routines. That is, their inputs
1.21 +   are two numbers in the range [-1, 1) and the result falls in that
1.22 +   same range. The output is the same size as the inputs, i.e.
1.23 +   32-bit x 32-bit = 32-bit.
1.24 + */
1.25
1.26  /* We hope here that the right shift includes sign extension */
1.27  #ifdef SDL_HAS_64BIT_Type
1.28  #define SDL_FixMpy32(a, b) ((((Sint64)a * (Sint64)b) >> 31) & 0xffffffff)
1.29  #else
1.30 -/* need to do something more complicated here */
1.31 +/* If we don't have the 64-bit type, do something more complicated. See http://www.8052.com/mul16.phtml or http://www.cs.uaf.edu/~cs301/notes/Chapter5/node5.html */
1.32  #define SDL_FixMpy32(a, b) ((((Sint64)a * (Sint64)b) >> 31) & 0xffffffff)
1.33  #endif
1.34 -/* Confirmed that SDL_FixMpy16 works, need to check 8 and 32 */
1.35  #define SDL_FixMpy16(a, b) ((((Sint32)a * (Sint32)b) >> 14) & 0xffff)
1.36  #define SDL_FixMpy8(a, b) ((((Sint16)a * (Sint16)b) >> 7) & 0xff)
1.37 -/* Everything is signed! */
1.38 -#define SDL_Make_1_7(a) (Sint8)(a * 127.0f)
1.39 -#define SDL_Make_1_15(a) (Sint16)(a * 32767.0f)
1.40 -#define SDL_Make_1_31(a) (Sint32)(a * 2147483647.0f)
1.41 -#define SDL_Make_2_6(a) (Sint8)(a * 63.0f)
1.42 -#define SDL_Make_2_14(a) (Sint16)(a * 16383.0f)
1.43 -#define SDL_Make_2_30(a) (Sint32)(a * 1073741823.0f)
1.44 +/* This macro just makes the floating point filtering code not have to be a special case. */
1.45 +#define SDL_FloatMpy(a, b) (a * b)
1.46 +
1.47 +/* These macros take floating point numbers in the range [-1.0f, 1.0f) and
1.48 +   represent them as fixed-point numbers in that same range. There's no
1.49 +   checking that the floating point argument is inside the appropriate range.
1.50 + */
1.51 +
1.52 +#define SDL_Make_1_7(a) (Sint8)(a * 128.0f)
1.53 +#define SDL_Make_1_15(a) (Sint16)(a * 32768.0f)
1.54 +#define SDL_Make_1_31(a) (Sint32)(a * 2147483648.0f)
1.55 +#define SDL_Make_2_6(a) (Sint8)(a * 64.0f)
1.56 +#define SDL_Make_2_14(a) (Sint16)(a * 16384.0f)
1.57 +#define SDL_Make_2_30(a) (Sint32)(a * 1073741824.0f)
1.58
1.59  /* Effectively mix right and left channels into a single channel */
1.60  static void SDLCALL
1.61 @@ -1442,6 +1440,11 @@
1.62  }
1.63
1.64  /* Apply the lowpass IIR filter to the given SDL_AudioCVT struct */
1.65 +/* This was implemented because it would be much faster than the fir filter,
1.66 +   but it doesn't seem to have a steep enough cutoff so we'd need several
1.67 +   cascaded biquads, which probably isn't a great idea. Therefore, this
1.68 +   function can probably be discarded.
1.69 +*/
1.70  static void SDL_FilterIIR(SDL_AudioCVT * cvt, SDL_AudioFormat format) {
1.71  	Uint32 i, n;
1.72
1.73 @@ -1524,7 +1527,8 @@
1.74  #undef iir_fix
1.75  }
1.76
1.77 -/* Apply the windowed sinc FIR filter to the given SDL_AudioCVT struct */
1.78 +/* Apply the windowed sinc FIR filter to the given SDL_AudioCVT struct.
1.79 +*/
1.80  static void SDL_FilterFIR(SDL_AudioCVT * cvt, SDL_AudioFormat format) {
1.81  	int n = 8 * cvt->len_cvt / SDL_AUDIO_BITSIZE(format);
1.82  	int m = cvt->len_sinc;
1.83 @@ -1534,9 +1538,28 @@
1.84  	   Note: We can make a big optimization here by taking advantage
1.85  	   of the fact that the signal is zero stuffed, so we can do
1.86  	   significantly fewer multiplications and additions. However, this
1.87 -	   depends on the zero stuffing ratio, so it may not pay off.
1.88 +	   depends on the zero stuffing ratio, so it may not pay off. This would
1.89 +	   basically be a polyphase filter.
1.90  	*/
1.91 -	/* We only calculate the values of samples which are 0 (mod len_div) because those are the only ones used */
1.92 +	/* One other way to do this fast is to look at the fir filter from a different angle:
1.93 +	   After we zero stuff, we have input of all zeroes, except for every len_mult
1.94 +	   sample. If we choose a sinc length equal to len_mult, then the fir filter becomes
1.95 +	   much more simple: we're just taking a windowed sinc, shifting it to start at each
1.96 +	   len_mult sample, and scaling it by the value of that sample. If we do this, then
1.97 +	   we don't even need to worry about the sample histories, and the inner loop here is
1.98 +	   unnecessary. This probably sacrifices some quality but could really speed things up as well.
1.99 +	*/
1.100 +	/* We only calculate the values of samples which are 0 (mod len_div) because
1.101 +	   those are the only ones used. All the other ones are discarded in the
1.102 +	   third step of resampling. This is a huge speedup. As a warning, though,
1.103 +	   if for some reason this is used elsewhere where there are no samples discarded,
1.104 +	   the output will not be corrrect if len_div is not 1. To make this filter a
1.105 +	   generic FIR filter, simply remove the if statement "if(i % cvt->len_div == 0)"
1.106 +	   around the inner loop so that every sample is processed.
1.107 +	*/
1.108 +	/* This is basically just a FIR filter. i.e. for input x_n and m coefficients,
1.109 +	   y_n = x_n*sinc_0 + x_(n-1)*sinc_1 +  x_(n-2)*sinc_2 + ... + x_(n-m+1)*sinc_(m-1)
1.110 +	*/
1.111  #define filter_sinc(type, mult) { \
1.112  			type *sinc = (type *)cvt->coeff; \
1.113  			type *state = (type *)cvt->state_buf; \
1.114 @@ -1553,20 +1576,8 @@
1.115  			} \
1.116  		}
1.117
1.118 -	/* If it's floating point, do it normally, otherwise used fixed-point code */
1.119  	if(SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) {
1.120 -		float *sinc = (float *)cvt->coeff;
1.121 -		float *state = (float *)cvt->state_buf;
1.122 -		float *buf = (float *)cvt->buf;
1.123 -
1.124 -		for(i = 0; i < n; ++i) {
1.125 -			state[cvt->state_pos] = buf[i];
1.126 -			buf[i] = 0.0f;
1.127 -			for(j = 0; j < m; ++j) {
1.128 -				buf[i] += sinc[j] * state[(cvt->state_pos + j) % m];
1.129 -			}
1.130 -			cvt->state_pos = (cvt->state_pos + 1) % m;
1.131 -		}
1.132 +		filter_sinc(float, SDL_FloatMpy);
1.133  	} else {
1.134  		switch (SDL_AUDIO_BITSIZE(format)) {
1.135  			case 8:
1.136 @@ -1609,7 +1620,7 @@
1.137  	cvt->len_sinc = m + 1;
1.138
1.139  	/* Allocate the floating point windowed sinc. */
1.140 -	fSinc = (float *)malloc(m * sizeof(float));
1.141 +	fSinc = (float *)malloc((m + 1) * sizeof(float));
1.142  	if( fSinc == NULL ) {
1.143  		return -1;
1.144  	}
1.145 @@ -1635,9 +1646,10 @@
1.146  		}
1.147  		norm_sum += fabs(fSinc[i]);
1.148  	}
1.149 -
1.150 +
1.151 +	norm_fact = 1.0f / norm_sum;
1.152 +
1.153  #define convert_fixed(type, fix) { \
1.154 -		norm_fact = 0.5f / norm_sum; \
1.155  		type *dst = (type *)cvt->coeff; \
1.156  		for( i = 0; i <= m; ++i ) { \
1.157  			dst[i] = fix(fSinc[i] * norm_fact); \
1.158 @@ -1647,7 +1659,6 @@
1.159  	/* If we're using floating point, we only need to normalize */
1.160  	if(SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) {
1.161  		float *fDest = (float *)cvt->coeff;
1.162 -		norm_fact = 1.0f / norm_sum;
1.163  		for(i = 0; i <= m; ++i) {
1.164  			fDest[i] = fSinc[i] * norm_fact;
1.165  		}
1.166 @@ -1685,7 +1696,7 @@
1.167  	return a;
1.168  }
1.169
1.170 -/* Perform proper resampling */
1.171 +/* Perform proper resampling. This is pretty slow but it's the best-sounding method. */
1.172  static void SDLCALL
1.173  SDL_Resample(SDL_AudioCVT * cvt, SDL_AudioFormat format)
1.174  {
1.175 @@ -1718,7 +1729,9 @@
1.176          } \
1.177      }
1.178
1.179 -	// Step 1: Zero stuff the conversion buffer
1.180 +	/* Step 1: Zero stuff the conversion buffer. This upsamples by a factor of len_mult,
1.181 +	   creating aliasing at frequencies above the original nyquist frequency.
1.182 +	 */
1.183  #ifdef DEBUG_CONVERT
1.184  	printf("Zero-stuffing by a factor of %u\n", cvt->len_mult);
1.185  #endif
1.186 @@ -1736,12 +1749,12 @@
1.187
1.188  	cvt->len_cvt *= cvt->len_mult;
1.189
1.190 -	// Step 2: Use either a windowed sinc FIR filter or IIR lowpass filter to remove all alias frequencies
1.191 -	QSDL_FilterFIR( cvt, format );
1.192 +	/* Step 2: Use a windowed sinc FIR filter (lowpass filter) to remove the alias
1.193 +	   frequencies. This is the slow part.
1.194 +	 */
1.195 +	SDL_FilterFIR( cvt, format );
1.196
1.197 -	// OPTIMIZATION: we only need to calculate the non-discarded samples. This could be a big speedup!
1.198 -
1.199 -	// Step 3: Discard unnecessary samples
1.200 +	/* Step 3: Now downsample by discarding samples. */
1.201
1.202  #ifdef DEBUG_CONVERT
1.203  	printf("Discarding samples by a factor of %u\n", cvt->len_div);
1.204 @@ -1855,18 +1868,20 @@
1.205              /* Uh oh.. */ ;
1.206          }
1.207      }
1.208 -
1.209 +
1.210      /* Do rate conversion */
1.211 -	int rate_gcd;
1.212 -	rate_gcd = SDL_GCD(src_rate, dst_rate);
1.213 -	cvt->len_mult = dst_rate / rate_gcd;
1.214 -	cvt->len_div = src_rate / rate_gcd;
1.215 -	cvt->len_ratio = (double)cvt->len_mult / (double)cvt->len_div;
1.216 -	cvt->filters[cvt->filter_index++] = SDL_Resample;
1.217 -	//SDL_BuildIIRLowpass(cvt, dst_fmt);
1.218 -	SDL_BuildWindowedSinc(cvt, dst_fmt, 768);
1.219 +	if( src_rate != dst_rate ) {
1.220 +		int rate_gcd;
1.221 +		rate_gcd = SDL_GCD(src_rate, dst_rate);
1.222 +		cvt->len_mult = dst_rate / rate_gcd;
1.223 +		cvt->len_div = src_rate / rate_gcd;
1.224 +		cvt->len_ratio = (double)cvt->len_mult / (double)cvt->len_div;
1.225 +		cvt->filters[cvt->filter_index++] = SDL_Resample;
1.226 +		SDL_BuildWindowedSinc(cvt, dst_fmt, 768);
1.227 +	}
1.228
1.229 -    /*cvt->rate_incr = 0.0;
1.230 +/*
1.231 +    cvt->rate_incr = 0.0;
1.232      if ((src_rate / 100) != (dst_rate / 100)) {
1.233          Uint32 hi_rate, lo_rate;
1.234          int len_mult;
1.235 @@ -1917,15 +1932,15 @@
1.236              len_ratio = 2.0;
1.237          }*/
1.238          /* If hi_rate = lo_rate*2^x then conversion is easy */
1.239 -        /*while (((lo_rate * 2) / 100) <= (hi_rate / 100)) {
1.240 +     /*   while (((lo_rate * 2) / 100) <= (hi_rate / 100)) {
1.241              cvt->filters[cvt->filter_index++] = rate_cvt;
1.242              cvt->len_mult *= len_mult;
1.243              lo_rate *= 2;
1.244              cvt->len_ratio *= len_ratio;
1.245          }*/
1.246          /* We may need a slow conversion here to finish up */
1.247 -        /*if ((lo_rate / 100) != (hi_rate / 100)) {*/
1.248 -#if 1
1.249 +    /*    if ((lo_rate / 100) != (hi_rate / 100)) {
1.250 +#if 1*/
1.251              /* The problem with this is that if the input buffer is
1.252                 say 1K, and the conversion rate is say 1.1, then the
1.253                 output buffer is 1.1K, which may not be an acceptable
1.254 @@ -1933,7 +1948,7 @@
1.255               */
1.256              /* For now, punt and hope the rate distortion isn't great.
1.257               */
1.258 -#else
1.259 +/*#else
1.260              if (src_rate < dst_rate) {
1.261                  cvt->rate_incr = (double) lo_rate / hi_rate;
1.262                  cvt->len_mult *= 2;
1.263 @@ -1944,7 +1959,7 @@
1.264              }
1.265              cvt->filters[cvt->filter_index++] = SDL_RateSLOW;
1.266  #endif
1.267 -/*        }
1.268 +        }
1.269      }*/
1.270
1.271      /* Set up the filter information */
1.272 @@ -1962,6 +1977,7 @@
1.273  #undef SDL_FixMpy8
1.274  #undef SDL_FixMpy16
1.275  #undef SDL_FixMpy32
1.276 +#undef SDL_FloatMpy
1.277  #undef SDL_Make_1_7
1.278  #undef SDL_Make_1_15
1.279  #undef SDL_Make_1_31
```