Added MMX audio mixing code for gcc (thanks Stephane!)
authorSam Lantinga <slouken@libsdl.org>
Sat, 09 Nov 2002 06:13:28 +0000
changeset 539a9e38f3b8e4d
parent 538 d3abe873e3f7
child 540 4bcfb93e0dfe
Added MMX audio mixing code for gcc (thanks Stephane!)
docs.html
src/audio/Makefile.am
src/audio/SDL_mixer.c
src/audio/SDL_mixer_MMX.c
src/audio/SDL_mixer_MMX.h
     1.1 --- a/docs.html	Sat Nov 09 05:52:49 2002 +0000
     1.2 +++ b/docs.html	Sat Nov 09 06:13:28 2002 +0000
     1.3 @@ -20,6 +20,7 @@
     1.4  Major changes since SDL 1.0.0:
     1.5  </H2>
     1.6  <UL>
     1.7 +	<LI> 1.2.6: Added MMX audio mixing code for gcc (thanks Stephane!)
     1.8  	<LI> 1.2.6: Fixed potential dropped events under DirectInput
     1.9  	<LI> 1.2.6: Added Visual C++ 7 (.NET) projects (thanks James!)
    1.10  	<LI> 1.2.5: Added an environment variable SDL_HAS3BUTTONMOUSE for Quartz
     2.1 --- a/src/audio/Makefile.am	Sat Nov 09 05:52:49 2002 +0000
     2.2 +++ b/src/audio/Makefile.am	Sat Nov 09 06:13:28 2002 +0000
     2.3 @@ -22,7 +22,9 @@
     2.4  	SDL_mixer.c		\
     2.5  	SDL_sysaudio.h		\
     2.6  	SDL_wave.c		\
     2.7 -	SDL_wave.h
     2.8 +	SDL_wave.h              \
     2.9 +	SDL_mixer_MMX.c         \
    2.10 +	SDL_mixer_MMX.h
    2.11  
    2.12  libaudio_la_SOURCES = $(COMMON_SRCS)
    2.13  libaudio_la_LIBADD = $(DRIVERS)
     3.1 --- a/src/audio/SDL_mixer.c	Sat Nov 09 05:52:49 2002 +0000
     3.2 +++ b/src/audio/SDL_mixer.c	Sat Nov 09 06:13:28 2002 +0000
     3.3 @@ -35,7 +35,24 @@
     3.4  #include "SDL_mutex.h"
     3.5  #include "SDL_timer.h"
     3.6  #include "SDL_sysaudio.h"
     3.7 +#include "SDL_mixer_MMX.h"
     3.8  
     3.9 +/* Function to check the CPU flags */
    3.10 +#define MMX_CPU		0x800000
    3.11 +#ifdef USE_ASMBLIT
    3.12 +#define CPU_Flags()	Hermes_X86_CPU()
    3.13 +#else
    3.14 +#define CPU_Flags()	0L
    3.15 +#endif
    3.16 +
    3.17 +#ifdef USE_ASMBLIT
    3.18 +#define X86_ASSEMBLER
    3.19 +#define HermesConverterInterface	void
    3.20 +#define HermesClearInterface		void
    3.21 +#define STACKCALL
    3.22 +
    3.23 +#include "HeadX86.h"
    3.24 +#endif
    3.25  
    3.26  /* This table is used to add two sound values together and pin
    3.27   * the value to avoid overflow.  (used with permission from ARDI)
    3.28 @@ -130,6 +147,15 @@
    3.29  		break;
    3.30  
    3.31  		case AUDIO_S8: {
    3.32 +
    3.33 +#if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
    3.34 +			if (CPU_Flags() & MMX_CPU)
    3.35 +			{
    3.36 +				SDL_MixAudio_MMX_S8((char*)dst,(char*)src,(unsigned int)len,(int)volume);
    3.37 +			}
    3.38 +			else
    3.39 +#endif
    3.40 +			{
    3.41  			Sint8 *dst8, *src8;
    3.42  			Sint8 src_sample;
    3.43  			int dst_sample;
    3.44 @@ -153,10 +179,19 @@
    3.45  				++dst8;
    3.46  				++src8;
    3.47  			}
    3.48 +			}
    3.49  		}
    3.50  		break;
    3.51  
    3.52  		case AUDIO_S16LSB: {
    3.53 +#if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
    3.54 +			if (CPU_Flags() & MMX_CPU)
    3.55 +			{
    3.56 +				SDL_MixAudio_MMX_S16((char*)dst,(char*)src,(unsigned int)len,(int)volume);
    3.57 +			}
    3.58 +			else
    3.59 +#endif
    3.60 +			{
    3.61  			Sint16 src1, src2;
    3.62  			int dst_sample;
    3.63  			const int max_audioval = ((1<<(16-1))-1);
    3.64 @@ -180,6 +215,7 @@
    3.65  				dst[1] = dst_sample&0xFF;
    3.66  				dst += 2;
    3.67  			}
    3.68 +			}
    3.69  		}
    3.70  		break;
    3.71  
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/src/audio/SDL_mixer_MMX.c	Sat Nov 09 06:13:28 2002 +0000
     4.3 @@ -0,0 +1,185 @@
     4.4 +// MMX assembler version of SDL_MixAudio for signed little endian 16 bit samples and signed 8 bit samples
     4.5 +// Copyright 2002 Stephane Marchesin (stephane.marchesin@wanadoo.fr)
     4.6 +// This code is licensed under the LGPL (see COPYING for details)
     4.7 +// 
     4.8 +// Assumes buffer size in bytes is a multiple of 16
     4.9 +// Assumes SDL_MIX_MAXVOLUME = 128
    4.10 +
    4.11 +
    4.12 +////////////////////////////////////////////////
    4.13 +// Mixing for 16 bit signed buffers
    4.14 +////////////////////////////////////////////////
    4.15 +
    4.16 +#if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
    4.17 +void SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume)
    4.18 +{
    4.19 +    __asm__ __volatile__ (
    4.20 +
    4.21 +"	movl %0,%%edi\n"	// edi = dst
    4.22 +"	movl %1,%%esi\n"	// esi = src
    4.23 +"	movl %3,%%eax\n"	// eax = volume
    4.24 +
    4.25 +"	movl %2,%%ebx\n"	// ebx = size
    4.26 +
    4.27 +"	shrl $4,%%ebx\n"	// process 16 bytes per iteration = 8 samples
    4.28 +
    4.29 +"	jz .endS16\n"
    4.30 +
    4.31 +"	pxor %%mm0,%%mm0\n"
    4.32 +
    4.33 +"	movd %%eax,%%mm0\n"
    4.34 +"	movq %%mm0,%%mm1\n"
    4.35 +"	psllq $16,%%mm0\n"
    4.36 +"	por %%mm1,%%mm0\n"
    4.37 +"	psllq $16,%%mm0\n"
    4.38 +"	por %%mm1,%%mm0\n"
    4.39 +"	psllq $16,%%mm0\n"
    4.40 +"	por %%mm1,%%mm0\n"		// mm0 = vol|vol|vol|vol
    4.41 +
    4.42 +".align 16\n"
    4.43 +"	.mixloopS16:\n"
    4.44 +
    4.45 +"	movq (%%esi),%%mm1\n" // mm1 = a|b|c|d
    4.46 +
    4.47 +"	movq %%mm1,%%mm2\n" // mm2 = a|b|c|d
    4.48 +
    4.49 +"	movq 8(%%esi),%%mm4\n" // mm4 = e|f|g|h
    4.50 +
    4.51 +	// pré charger le buffer dst dans mm7
    4.52 +"	movq (%%edi),%%mm7\n" // mm7 = dst[0]"
    4.53 +
    4.54 +	// multiplier par le volume
    4.55 +"	pmullw %%mm0,%%mm1\n" // mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v)
    4.56 +
    4.57 +"	pmulhw %%mm0,%%mm2\n" // mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v)
    4.58 +"	movq %%mm4,%%mm5\n" // mm5 = e|f|g|h
    4.59 +
    4.60 +"	pmullw %%mm0,%%mm4\n" // mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v)
    4.61 +
    4.62 +"	pmulhw %%mm0,%%mm5\n" // mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v)
    4.63 +"	movq %%mm1,%%mm3\n" // mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v)
    4.64 +
    4.65 +"	punpckhwd %%mm2,%%mm1\n" // mm1 = a*v|b*v
    4.66 +
    4.67 +"	movq %%mm4,%%mm6\n" // mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v)
    4.68 +"	punpcklwd %%mm2,%%mm3\n" // mm3 = c*v|d*v
    4.69 +
    4.70 +"	punpckhwd %%mm5,%%mm4\n" // mm4 = e*f|f*v
    4.71 +
    4.72 +"	punpcklwd %%mm5,%%mm6\n" // mm6 = g*v|h*v
    4.73 +
    4.74 +	// pré charger le buffer dst dans mm5
    4.75 +"	movq 8(%%edi),%%mm5\n" // mm5 = dst[1]
    4.76 +
    4.77 +	// diviser par 128
    4.78 +"	psrad $7,%%mm1\n" // mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME
    4.79 +"	addl $16,%%esi\n"
    4.80 +
    4.81 +"	psrad $7,%%mm3\n" // mm3 = c*v/128|d*v/128
    4.82 +
    4.83 +"	psrad $7,%%mm4\n" // mm4 = e*v/128|f*v/128
    4.84 +
    4.85 +	// mm1 = le sample avec le volume modifié
    4.86 +"	packssdw %%mm1,%%mm3\n" // mm3 = s(a*v|b*v|c*v|d*v)
    4.87 +
    4.88 +"	psrad $7,%%mm6\n" // mm6= g*v/128|h*v/128
    4.89 +"	paddsw %%mm7,%%mm3\n" // mm3 = adjust_volume(src)+dst
    4.90 +
    4.91 +	// mm4 = le sample avec le volume modifié
    4.92 +"	packssdw %%mm4,%%mm6\n" // mm6 = s(e*v|f*v|g*v|h*v)
    4.93 +"	movq %%mm3,(%%edi)\n"
    4.94 +
    4.95 +"	paddsw %%mm5,%%mm6\n" // mm6 = adjust_volume(src)+dst
    4.96 +
    4.97 +"	movq %%mm6,8(%%edi)\n"
    4.98 +
    4.99 +"	addl $16,%%edi\n"
   4.100 +
   4.101 +"	dec %%ebx\n"
   4.102 +
   4.103 +"	jnz .mixloopS16\n"
   4.104 +
   4.105 +"	emms\n"
   4.106 +
   4.107 +".endS16:\n"
   4.108 +	 :
   4.109 +	 : "m" (dst), "m"(src),"m"(size),
   4.110 +	 "m"(volume)
   4.111 +	 : "eax","ebx", "esi", "edi","memory"
   4.112 +	 );
   4.113 +}
   4.114 +
   4.115 +
   4.116 +
   4.117 +////////////////////////////////////////////////
   4.118 +// Mixing for 8 bit signed buffers
   4.119 +////////////////////////////////////////////////
   4.120 +
   4.121 +void SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume)
   4.122 +{
   4.123 +    __asm__ __volatile__ (
   4.124 +
   4.125 +"	movl %0,%%edi\n"	// edi = dst
   4.126 +"	movl %1,%%esi\n"	// esi = src
   4.127 +"	movl %3,%%eax\n"	// eax = volume
   4.128 +
   4.129 +"	movd %%ebx,%%mm0\n"
   4.130 +"	movq %%mm0,%%mm1\n"
   4.131 +"	psllq $16,%%mm0\n"
   4.132 +"	por %%mm1,%%mm0\n"
   4.133 +"	psllq $16,%%mm0\n"
   4.134 +"	por %%mm1,%%mm0\n"
   4.135 +"	psllq $16,%%mm0\n"
   4.136 +"	por %%mm1,%%mm0\n"
   4.137 +
   4.138 +"	movl %2,%%ebx\n"	// ebx = size
   4.139 +"	shr $3,%%ebx\n"	// process 8 bytes per iteration = 8 samples
   4.140 +
   4.141 +"	cmp $0,%%ebx\n"
   4.142 +"	je .endS8\n"
   4.143 +
   4.144 +".align 16\n"
   4.145 +"	.mixloopS8:\n"
   4.146 +
   4.147 +"	pxor %%mm2,%%mm2\n"		// mm2 = 0
   4.148 +"	movq (%%esi),%%mm1\n"	// mm1 = a|b|c|d|e|f|g|h
   4.149 +
   4.150 +"	movq %%mm1,%%mm3\n" 	// mm3 = a|b|c|d|e|f|g|h
   4.151 +
   4.152 +	// on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0
   4.153 +"	pcmpgtb %%mm1,%%mm2\n"	// mm2 = 11111111|00000000|00000000....
   4.154 +
   4.155 +"	punpckhbw %%mm2,%%mm1\n"	// mm1 = 0|a|0|b|0|c|0|d
   4.156 +
   4.157 +"	punpcklbw %%mm2,%%mm3\n"	// mm3 = 0|e|0|f|0|g|0|h
   4.158 +"	movq (%%edi),%%mm2\n"	// mm2 = destination
   4.159 +
   4.160 +"	pmullw %%mm0,%%mm1\n"	// mm1 = v*a|v*b|v*c|v*d
   4.161 +"	addl $8,%%esi\n"
   4.162 +
   4.163 +"	pmullw %%mm0,%%mm3\n"	// mm3 = v*e|v*f|v*g|v*h
   4.164 +"	psraw $7,%%mm1\n"		// mm1 = v*a/128|v*b/128|v*c/128|v*d/128 
   4.165 +
   4.166 +"	psraw $7,%%mm3\n"		// mm3 = v*e/128|v*f/128|v*g/128|v*h/128
   4.167 +
   4.168 +"	packsswb %%mm1,%%mm3\n"	// mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128
   4.169 +
   4.170 +"	paddsb %%mm2,%%mm3\n"	// add to destination buffer
   4.171 +
   4.172 +"	movq %%mm3,(%%edi)\n"	// store back to ram
   4.173 +"	addl $8,%%edi\n"
   4.174 +
   4.175 +"	dec %%ebx\n"
   4.176 +
   4.177 +"	jnz .mixloopS8\n"
   4.178 +
   4.179 +".endS8:\n"
   4.180 +"	emms\n"
   4.181 +	 :
   4.182 +	 : "m" (dst), "m"(src),"m"(size),
   4.183 +	 "m"(volume)
   4.184 +	 : "eax","ebx", "esi", "edi","memory"
   4.185 +	 );
   4.186 +}
   4.187 +#endif
   4.188 +
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/src/audio/SDL_mixer_MMX.h	Sat Nov 09 06:13:28 2002 +0000
     5.3 @@ -0,0 +1,13 @@
     5.4 +// headers for MMX assembler version of SDL_MixAudio
     5.5 +// Copyright 2002 Stephane Marchesin (stephane.marchesin@wanadoo.fr)
     5.6 +// This code is licensed under the LGPL (see COPYING for details)
     5.7 +// 
     5.8 +// Assumes buffer size in bytes is a multiple of 16
     5.9 +// Assumes SDL_MIX_MAXVOLUME = 128
    5.10 +
    5.11 +
    5.12 +#if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
    5.13 +void SDL_MixAudio_MMX_S16(char* ,char* ,unsigned int ,int );
    5.14 +void SDL_MixAudio_MMX_S8(char* ,char* ,unsigned int ,int );
    5.15 +#endif
    5.16 +