Date: Mon, 10 May 2004 10:17:46 -0400
authorSam Lantinga <slouken@libsdl.org>
Sun, 16 May 2004 17:19:48 +0000
changeset 887b4b64bb88f2f
parent 886 05c551e5bc64
child 888 07def9d03315
Date: Mon, 10 May 2004 10:17:46 -0400
From: Mike Frysinger
Subject: Re: [SDL] gcc-3.4.0 / PIC fix

here's a combined patch (yours and the one i mentioned earlier) that i tested
with gcc-3.4.0 and gcc-3.3.3
src/audio/SDL_mixer_MMX.c
src/cpuinfo/SDL_cpuinfo.c
src/video/SDL_yuv_mmx.c
     1.1 --- a/src/audio/SDL_mixer_MMX.c	Thu May 06 15:55:06 2004 +0000
     1.2 +++ b/src/audio/SDL_mixer_MMX.c	Sun May 16 17:19:48 2004 +0000
     1.3 @@ -15,13 +15,11 @@
     1.4  {
     1.5      __asm__ __volatile__ (
     1.6  
     1.7 -"	movl %0,%%edi\n"	// edi = dst
     1.8 -"	movl %1,%%esi\n"	// esi = src
     1.9  "	movl %3,%%eax\n"	// eax = volume
    1.10  
    1.11 -"	movl %2,%%ebx\n"	// ebx = size
    1.12 +"	movl %2,%%edx\n"	// edx = size
    1.13  
    1.14 -"	shrl $4,%%ebx\n"	// process 16 bytes per iteration = 8 samples
    1.15 +"	shrl $4,%%edx\n"	// process 16 bytes per iteration = 8 samples
    1.16  
    1.17  "	jz .endS16\n"
    1.18  
    1.19 @@ -39,14 +37,14 @@
    1.20  ".align 16\n"
    1.21  "	.mixloopS16:\n"
    1.22  
    1.23 -"	movq (%%esi),%%mm1\n" // mm1 = a|b|c|d
    1.24 +"	movq (%1),%%mm1\n" // mm1 = a|b|c|d
    1.25  
    1.26  "	movq %%mm1,%%mm2\n" // mm2 = a|b|c|d
    1.27  
    1.28 -"	movq 8(%%esi),%%mm4\n" // mm4 = e|f|g|h
    1.29 +"	movq 8(%1),%%mm4\n" // mm4 = e|f|g|h
    1.30  
    1.31  	// pré charger le buffer dst dans mm7
    1.32 -"	movq (%%edi),%%mm7\n" // mm7 = dst[0]"
    1.33 +"	movq (%0),%%mm7\n" // mm7 = dst[0]"
    1.34  
    1.35  	// multiplier par le volume
    1.36  "	pmullw %%mm0,%%mm1\n" // mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v)
    1.37 @@ -69,11 +67,11 @@
    1.38  "	punpcklwd %%mm5,%%mm6\n" // mm6 = g*v|h*v
    1.39  
    1.40  	// pré charger le buffer dst dans mm5
    1.41 -"	movq 8(%%edi),%%mm5\n" // mm5 = dst[1]
    1.42 +"	movq 8(%0),%%mm5\n" // mm5 = dst[1]
    1.43  
    1.44  	// diviser par 128
    1.45  "	psrad $7,%%mm1\n" // mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME
    1.46 -"	addl $16,%%esi\n"
    1.47 +"	add $16,%1\n"
    1.48  
    1.49  "	psrad $7,%%mm3\n" // mm3 = c*v/128|d*v/128
    1.50  
    1.51 @@ -87,15 +85,15 @@
    1.52  
    1.53  	// mm4 = le sample avec le volume modifié
    1.54  "	packssdw %%mm4,%%mm6\n" // mm6 = s(e*v|f*v|g*v|h*v)
    1.55 -"	movq %%mm3,(%%edi)\n"
    1.56 +"	movq %%mm3,(%0)\n"
    1.57  
    1.58  "	paddsw %%mm5,%%mm6\n" // mm6 = adjust_volume(src)+dst
    1.59  
    1.60 -"	movq %%mm6,8(%%edi)\n"
    1.61 +"	movq %%mm6,8(%0)\n"
    1.62  
    1.63 -"	addl $16,%%edi\n"
    1.64 +"	add $16,%0\n"
    1.65  
    1.66 -"	dec %%ebx\n"
    1.67 +"	dec %%edx\n"
    1.68  
    1.69  "	jnz .mixloopS16\n"
    1.70  
    1.71 @@ -103,9 +101,9 @@
    1.72  
    1.73  ".endS16:\n"
    1.74  	 :
    1.75 -	 : "m" (dst), "m"(src),"m"(size),
    1.76 +	 : "r" (dst), "r"(src),"m"(size),
    1.77  	 "m"(volume)
    1.78 -	 : "eax","ebx", "esi", "edi","memory"
    1.79 +	 : "eax","edx","memory"
    1.80  	 );
    1.81  }
    1.82  
    1.83 @@ -119,11 +117,9 @@
    1.84  {
    1.85      __asm__ __volatile__ (
    1.86  
    1.87 -"	movl %0,%%edi\n"	// edi = dst
    1.88 -"	movl %1,%%esi\n"	// esi = src
    1.89  "	movl %3,%%eax\n"	// eax = volume
    1.90  
    1.91 -"	movd %%ebx,%%mm0\n"
    1.92 +"	movd %%edx,%%mm0\n"
    1.93  "	movq %%mm0,%%mm1\n"
    1.94  "	psllq $16,%%mm0\n"
    1.95  "	por %%mm1,%%mm0\n"
    1.96 @@ -132,17 +128,17 @@
    1.97  "	psllq $16,%%mm0\n"
    1.98  "	por %%mm1,%%mm0\n"
    1.99  
   1.100 -"	movl %2,%%ebx\n"	// ebx = size
   1.101 -"	shr $3,%%ebx\n"	// process 8 bytes per iteration = 8 samples
   1.102 +"	movl %2,%%edx\n"	// edx = size
   1.103 +"	shr $3,%%edx\n"	// process 8 bytes per iteration = 8 samples
   1.104  
   1.105 -"	cmp $0,%%ebx\n"
   1.106 +"	cmp $0,%%edx\n"
   1.107  "	je .endS8\n"
   1.108  
   1.109  ".align 16\n"
   1.110  "	.mixloopS8:\n"
   1.111  
   1.112  "	pxor %%mm2,%%mm2\n"		// mm2 = 0
   1.113 -"	movq (%%esi),%%mm1\n"	// mm1 = a|b|c|d|e|f|g|h
   1.114 +"	movq (%1),%%mm1\n"	// mm1 = a|b|c|d|e|f|g|h
   1.115  
   1.116  "	movq %%mm1,%%mm3\n" 	// mm3 = a|b|c|d|e|f|g|h
   1.117  
   1.118 @@ -152,10 +148,10 @@
   1.119  "	punpckhbw %%mm2,%%mm1\n"	// mm1 = 0|a|0|b|0|c|0|d
   1.120  
   1.121  "	punpcklbw %%mm2,%%mm3\n"	// mm3 = 0|e|0|f|0|g|0|h
   1.122 -"	movq (%%edi),%%mm2\n"	// mm2 = destination
   1.123 +"	movq (%0),%%mm2\n"	// mm2 = destination
   1.124  
   1.125  "	pmullw %%mm0,%%mm1\n"	// mm1 = v*a|v*b|v*c|v*d
   1.126 -"	addl $8,%%esi\n"
   1.127 +"	add $8,%1\n"
   1.128  
   1.129  "	pmullw %%mm0,%%mm3\n"	// mm3 = v*e|v*f|v*g|v*h
   1.130  "	psraw $7,%%mm1\n"		// mm1 = v*a/128|v*b/128|v*c/128|v*d/128 
   1.131 @@ -166,19 +162,19 @@
   1.132  
   1.133  "	paddsb %%mm2,%%mm3\n"	// add to destination buffer
   1.134  
   1.135 -"	movq %%mm3,(%%edi)\n"	// store back to ram
   1.136 -"	addl $8,%%edi\n"
   1.137 +"	movq %%mm3,(%0)\n"	// store back to ram
   1.138 +"	add $8,%0\n"
   1.139  
   1.140 -"	dec %%ebx\n"
   1.141 +"	dec %%edx\n"
   1.142  
   1.143  "	jnz .mixloopS8\n"
   1.144  
   1.145  ".endS8:\n"
   1.146  "	emms\n"
   1.147  	 :
   1.148 -	 : "m" (dst), "m"(src),"m"(size),
   1.149 +	 : "r" (dst), "r"(src),"m"(size),
   1.150  	 "m"(volume)
   1.151 -	 : "eax","ebx", "esi", "edi","memory"
   1.152 +	 : "eax","edx","memory"
   1.153  	 );
   1.154  }
   1.155  #endif
     2.1 --- a/src/cpuinfo/SDL_cpuinfo.c	Thu May 06 15:55:06 2004 +0000
     2.2 +++ b/src/cpuinfo/SDL_cpuinfo.c	Sun May 16 17:19:48 2004 +0000
     2.3 @@ -138,7 +138,7 @@
     2.4  "        movl    %%edi,%%ebx\n"
     2.5  	: "=m" (features)
     2.6  	:
     2.7 -	: "%eax", "%ebx", "%ecx", "%edx", "%edi"
     2.8 +	: "%eax", "%ecx", "%edx", "%edi"
     2.9  	);
    2.10  #elif defined(_MSC_VER)
    2.11  	__asm {
    2.12 @@ -173,7 +173,7 @@
    2.13  "        movl    %%edi,%%ebx\n"
    2.14  	: "=m" (features)
    2.15  	:
    2.16 -	: "%eax", "%ebx", "%ecx", "%edx", "%edi"
    2.17 +	: "%eax", "%ecx", "%edx", "%edi"
    2.18  	);
    2.19  #elif defined(_MSC_VER)
    2.20  	__asm {
     3.1 --- a/src/video/SDL_yuv_mmx.c	Thu May 06 15:55:06 2004 +0000
     3.2 +++ b/src/video/SDL_yuv_mmx.c	Sun May 16 17:19:48 2004 +0000
     3.3 @@ -120,12 +120,12 @@
     3.4  		 "movd (%2), %%mm2\n"           //    0  0  0  0 l3 l2 l1 l0
     3.5  		 "punpcklbw %%mm7,%%mm1\n" //         0  v3 0  v2 00 v1 00 v0
     3.6  		 "punpckldq %%mm1,%%mm1\n" //         00 v1 00 v0 00 v1 00 v0
     3.7 -		 "psubw _MMX_0080w,%%mm1\n"  // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0 
     3.8 +		 "psubw %[_MMX_0080w],%%mm1\n"  // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0 
     3.9  
    3.10  		 // create Cr_g (result in mm0)
    3.11  		 "movq %%mm1,%%mm0\n"           // r1 r1 r0 r0 r1 r1 r0 r0
    3.12 -		 "pmullw _MMX_VgrnRGB,%%mm0\n"// red*-46dec=0.7136*64
    3.13 -		 "pmullw _MMX_VredRGB,%%mm1\n"// red*89dec=1.4013*64
    3.14 +		 "pmullw %[_MMX_VgrnRGB],%%mm0\n"// red*-46dec=0.7136*64
    3.15 +		 "pmullw %[_MMX_VredRGB],%%mm1\n"// red*89dec=1.4013*64
    3.16  		 "psraw  $6, %%mm0\n"           // red=red/64
    3.17  		 "psraw  $6, %%mm1\n"           // red=red/64
    3.18  		 
    3.19 @@ -134,8 +134,8 @@
    3.20  		 "movq (%2,%4),%%mm3\n"         //    0  0  0  0 L3 L2 L1 L0
    3.21  		 "punpckldq %%mm3,%%mm2\n"      //   L3 L2 L1 L0 l3 l2 l1 l0
    3.22  		 "movq %%mm2,%%mm4\n"           //   L3 L2 L1 L0 l3 l2 l1 l0
    3.23 -		 "pand _MMX_FF00w,%%mm2\n"      //   L3 0  L1  0 l3  0 l1  0
    3.24 -		 "pand _MMX_00FFw,%%mm4\n"      //   0  L2  0 L0  0 l2  0 l0
    3.25 +		 "pand %[_MMX_FF00w],%%mm2\n"      //   L3 0  L1  0 l3  0 l1  0
    3.26 +		 "pand %[_MMX_00FFw],%%mm4\n"      //   0  L2  0 L0  0 l2  0 l0
    3.27  		 "psrlw $8,%%mm2\n"             //   0  L3  0 L1  0 l3  0 l1
    3.28  
    3.29  		 // create R (result in mm6)
    3.30 @@ -152,11 +152,11 @@
    3.31  		 "movd (%1), %%mm1\n"      //         0  0  0  0  u3 u2 u1 u0
    3.32  		 "punpcklbw %%mm7,%%mm1\n" //         0  u3 0  u2 00 u1 00 u0
    3.33  		 "punpckldq %%mm1,%%mm1\n" //         00 u1 00 u0 00 u1 00 u0
    3.34 -		 "psubw _MMX_0080w,%%mm1\n"  // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0 
    3.35 +		 "psubw %[_MMX_0080w],%%mm1\n"  // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0 
    3.36  		 // create Cb_g (result in mm5)
    3.37  		 "movq %%mm1,%%mm5\n"            // u1 u1 u0 u0 u1 u1 u0 u0
    3.38 -		 "pmullw _MMX_UgrnRGB,%%mm5\n"    // blue*-109dec=1.7129*64
    3.39 -		 "pmullw _MMX_UbluRGB,%%mm1\n"    // blue*114dec=1.78125*64
    3.40 +		 "pmullw %[_MMX_UgrnRGB],%%mm5\n"    // blue*-109dec=1.7129*64
    3.41 +		 "pmullw %[_MMX_UbluRGB],%%mm1\n"    // blue*114dec=1.78125*64
    3.42  		 "psraw  $6, %%mm5\n"            // blue=red/64
    3.43  		 "psraw  $6, %%mm1\n"            // blue=blue/64
    3.44  
    3.45 @@ -238,8 +238,14 @@
    3.46  		 "popl %%ebx\n"
    3.47  		 :
    3.48  		 : "m" (cr), "r"(cb),"r"(lum),
    3.49 -		 "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod)
    3.50 -		 : "%ebx"
    3.51 +		 "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
    3.52 +         [_MMX_0080w] "m" (*_MMX_0080w),
    3.53 +         [_MMX_00FFw] "m" (*_MMX_00FFw),
    3.54 +         [_MMX_FF00w] "m" (*_MMX_FF00w),
    3.55 +         [_MMX_VgrnRGB] "m" (*_MMX_VgrnRGB),
    3.56 +         [_MMX_VredRGB] "m" (*_MMX_VredRGB),
    3.57 +         [_MMX_UgrnRGB] "m" (*_MMX_UgrnRGB),
    3.58 +         [_MMX_UbluRGB] "m" (*_MMX_UbluRGB)
    3.59  		 );
    3.60  }
    3.61  
    3.62 @@ -413,8 +419,16 @@
    3.63  	 "popl %%ebx\n"
    3.64           :
    3.65           :"m" (cr), "r"(cb),"r"(lum),
    3.66 -	 "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod)
    3.67 -	 : "%ebx"
    3.68 +	 "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
    3.69 +     [_MMX_0080w] "m" (*_MMX_0080w),
    3.70 + [_MMX_Ugrn565] "m" (*_MMX_Ugrn565),
    3.71 + [_MMX_Ublu5x5] "m" (*_MMX_Ublu5x5),
    3.72 + [_MMX_00FFw] "m" (*_MMX_00FFw),
    3.73 + [_MMX_Vgrn565] "m" (*_MMX_Vgrn565),
    3.74 + [_MMX_Vred5x5] "m" (*_MMX_Vred5x5),
    3.75 + [_MMX_Ycoeff] "m" (*_MMX_Ycoeff),
    3.76 + [_MMX_red565] "m" (*_MMX_red565),
    3.77 + [_MMX_grn565] "m" (*_MMX_grn565)
    3.78           );
    3.79  }
    3.80