Ugly hack to make this work with gcc 2.x and 3.x
authorSam Lantinga <slouken@libsdl.org>
Sun, 13 Feb 2005 07:10:02 +0000
changeset 103829d7db09776e
parent 1037 c5dedfdb4e42
child 1039 68f2b997758e
Ugly hack to make this work with gcc 2.x and 3.x
Thanks to Stephane Marchesin for the dirty dirty work.
src/video/SDL_yuv_mmx.c
     1.1 --- a/src/video/SDL_yuv_mmx.c	Sat Feb 12 19:39:08 2005 +0000
     1.2 +++ b/src/video/SDL_yuv_mmx.c	Sun Feb 13 07:10:02 2005 +0000
     1.3 @@ -30,29 +30,42 @@
     1.4  
     1.5  #include "SDL_types.h"
     1.6  
     1.7 -static unsigned int  MMX_0080w[]    = {0x00800080, 0x00800080};
     1.8 -static unsigned int  MMX_00FFw[]    = {0x00ff00ff, 0x00ff00ff}; 
     1.9 -static unsigned int  MMX_FF00w[]    = {0xff00ff00, 0xff00ff00}; 
    1.10 +#if __GNUC__ > 2
    1.11 +#    undef GCC2_HACK
    1.12 +#else
    1.13 +#    define GCC2_HACK
    1.14 +#endif
    1.15  
    1.16 -static unsigned short MMX_Ycoeff[]  = {0x004a, 0x004a, 0x004a, 0x004a}; 
    1.17 + 
    1.18 +#if defined(GCC2_HACK) && defined (__ELF__)
    1.19 +#define ASM_VAR(X) _##X
    1.20 +#else
    1.21 +#define ASM_VAR(X) X
    1.22 +#endif
    1.23 + 
    1.24 +static volatile unsigned int  ASM_VAR(MMX_0080w)[]    = {0x00800080, 0x00800080};
    1.25 +static volatile unsigned int  ASM_VAR(MMX_00FFw)[]    = {0x00ff00ff, 0x00ff00ff}; 
    1.26 +static volatile unsigned int  ASM_VAR(MMX_FF00w)[]    = {0xff00ff00, 0xff00ff00}; 
    1.27  
    1.28 -static unsigned short MMX_UbluRGB[] = {0x0072, 0x0072, 0x0072, 0x0072};    
    1.29 -static unsigned short MMX_VredRGB[] = {0x0059, 0x0059, 0x0059, 0x0059};  
    1.30 -static unsigned short MMX_UgrnRGB[] = {0xffea, 0xffea, 0xffea, 0xffea}; 
    1.31 -static unsigned short MMX_VgrnRGB[] = {0xffd2, 0xffd2, 0xffd2, 0xffd2};  
    1.32 +static volatile unsigned short ASM_VAR(MMX_Ycoeff)[]  = {0x004a, 0x004a, 0x004a, 0x004a}; 
    1.33  
    1.34 -static unsigned short MMX_Ublu5x5[] = {0x0081, 0x0081, 0x0081, 0x0081};
    1.35 -static unsigned short MMX_Vred5x5[] = {0x0066, 0x0066, 0x0066, 0x0066};
    1.36 -static unsigned short MMX_Ugrn555[] = {0xffe7, 0xffe7, 0xffe7, 0xffe7};
    1.37 -static unsigned short MMX_Vgrn555[] = {0xffcc, 0xffcc, 0xffcc, 0xffcc};
    1.38 -static unsigned short MMX_Ugrn565[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8};
    1.39 -static unsigned short MMX_Vgrn565[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd};
    1.40 +static volatile unsigned short ASM_VAR(MMX_UbluRGB)[] = {0x0072, 0x0072, 0x0072, 0x0072};    
    1.41 +static volatile unsigned short ASM_VAR(MMX_VredRGB)[] = {0x0059, 0x0059, 0x0059, 0x0059};  
    1.42 +static volatile unsigned short ASM_VAR(MMX_UgrnRGB)[] = {0xffea, 0xffea, 0xffea, 0xffea}; 
    1.43 +static volatile unsigned short ASM_VAR(MMX_VgrnRGB)[] = {0xffd2, 0xffd2, 0xffd2, 0xffd2};  
    1.44  
    1.45 -static unsigned short MMX_red555[]  = {0x7c00, 0x7c00, 0x7c00, 0x7c00};
    1.46 -static unsigned short MMX_red565[]  = {0xf800, 0xf800, 0xf800, 0xf800};
    1.47 -static unsigned short MMX_grn555[]  = {0x03e0, 0x03e0, 0x03e0, 0x03e0};
    1.48 -static unsigned short MMX_grn565[]  = {0x07e0, 0x07e0, 0x07e0, 0x07e0};
    1.49 -static unsigned short MMX_blu5x5[]  = {0x001f, 0x001f, 0x001f, 0x001f};
    1.50 +static volatile unsigned short ASM_VAR(MMX_Ublu5x5)[] = {0x0081, 0x0081, 0x0081, 0x0081};
    1.51 +static volatile unsigned short ASM_VAR(MMX_Vred5x5)[] = {0x0066, 0x0066, 0x0066, 0x0066};
    1.52 +static volatile unsigned short ASM_VAR(MMX_Ugrn555)[] = {0xffe7, 0xffe7, 0xffe7, 0xffe7};
    1.53 +static volatile unsigned short ASM_VAR(MMX_Vgrn555)[] = {0xffcc, 0xffcc, 0xffcc, 0xffcc};
    1.54 +static volatile unsigned short ASM_VAR(MMX_Ugrn565)[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8};
    1.55 +static volatile unsigned short ASM_VAR(MMX_Vgrn565)[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd};
    1.56 +
    1.57 +static volatile unsigned short ASM_VAR(MMX_red555)[]  = {0x7c00, 0x7c00, 0x7c00, 0x7c00};
    1.58 +static volatile unsigned short ASM_VAR(MMX_red565)[]  = {0xf800, 0xf800, 0xf800, 0xf800};
    1.59 +static volatile unsigned short ASM_VAR(MMX_grn555)[]  = {0x03e0, 0x03e0, 0x03e0, 0x03e0};
    1.60 +static volatile unsigned short ASM_VAR(MMX_grn565)[]  = {0x07e0, 0x07e0, 0x07e0, 0x07e0};
    1.61 +static volatile unsigned short ASM_VAR(MMX_blu5x5)[]  = {0x001f, 0x001f, 0x001f, 0x001f};
    1.62  
    1.63  /**
    1.64     This MMX assembler is my first assembler/MMX program ever.
    1.65 @@ -114,12 +127,21 @@
    1.66  		 "movd (%2), %%mm2\n"           //    0  0  0  0 l3 l2 l1 l0
    1.67  		 "punpcklbw %%mm7,%%mm1\n" //         0  v3 0  v2 00 v1 00 v0
    1.68  		 "punpckldq %%mm1,%%mm1\n" //         00 v1 00 v0 00 v1 00 v0
    1.69 +#ifdef GCC2_HACK
    1.70 +		 "psubw _MMX_0080w,%%mm1\n"  // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0 
    1.71 +#else
    1.72  		 "psubw %[_MMX_0080w],%%mm1\n"  // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0 
    1.73 +#endif
    1.74  
    1.75  		 // create Cr_g (result in mm0)
    1.76  		 "movq %%mm1,%%mm0\n"           // r1 r1 r0 r0 r1 r1 r0 r0
    1.77 +#ifdef GCC2_HACK
    1.78 +		 "pmullw _MMX_VgrnRGB,%%mm0\n"// red*-46dec=0.7136*64
    1.79 +		 "pmullw _MMX_VredRGB,%%mm1\n"// red*89dec=1.4013*64
    1.80 +#else
    1.81  		 "pmullw %[_MMX_VgrnRGB],%%mm0\n"// red*-46dec=0.7136*64
    1.82  		 "pmullw %[_MMX_VredRGB],%%mm1\n"// red*89dec=1.4013*64
    1.83 +#endif
    1.84  		 "psraw  $6, %%mm0\n"           // red=red/64
    1.85  		 "psraw  $6, %%mm1\n"           // red=red/64
    1.86  		 
    1.87 @@ -128,8 +150,13 @@
    1.88  		 "movq (%2,%4),%%mm3\n"         //    0  0  0  0 L3 L2 L1 L0
    1.89  		 "punpckldq %%mm3,%%mm2\n"      //   L3 L2 L1 L0 l3 l2 l1 l0
    1.90  		 "movq %%mm2,%%mm4\n"           //   L3 L2 L1 L0 l3 l2 l1 l0
    1.91 +#ifdef GCC2_HACK
    1.92 +		 "pand _MMX_FF00w,%%mm2\n"      //   L3 0  L1  0 l3  0 l1  0
    1.93 +		 "pand _MMX_00FFw,%%mm4\n"      //   0  L2  0 L0  0 l2  0 l0
    1.94 +#else
    1.95  		 "pand %[_MMX_FF00w],%%mm2\n"      //   L3 0  L1  0 l3  0 l1  0
    1.96  		 "pand %[_MMX_00FFw],%%mm4\n"      //   0  L2  0 L0  0 l2  0 l0
    1.97 +#endif
    1.98  		 "psrlw $8,%%mm2\n"             //   0  L3  0 L1  0 l3  0 l1
    1.99  
   1.100  		 // create R (result in mm6)
   1.101 @@ -146,11 +173,20 @@
   1.102  		 "movd (%1), %%mm1\n"      //         0  0  0  0  u3 u2 u1 u0
   1.103  		 "punpcklbw %%mm7,%%mm1\n" //         0  u3 0  u2 00 u1 00 u0
   1.104  		 "punpckldq %%mm1,%%mm1\n" //         00 u1 00 u0 00 u1 00 u0
   1.105 +#ifdef GCC2_HACK
   1.106 +		 "psubw _MMX_0080w,%%mm1\n"  // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0 
   1.107 +#else
   1.108  		 "psubw %[_MMX_0080w],%%mm1\n"  // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0 
   1.109 +#endif
   1.110  		 // create Cb_g (result in mm5)
   1.111  		 "movq %%mm1,%%mm5\n"            // u1 u1 u0 u0 u1 u1 u0 u0
   1.112 +#ifdef GCC2_HACK
   1.113 +		 "pmullw _MMX_UgrnRGB,%%mm5\n"    // blue*-109dec=1.7129*64
   1.114 +		 "pmullw _MMX_UbluRGB,%%mm1\n"    // blue*114dec=1.78125*64
   1.115 +#else
   1.116  		 "pmullw %[_MMX_UgrnRGB],%%mm5\n"    // blue*-109dec=1.7129*64
   1.117  		 "pmullw %[_MMX_UbluRGB],%%mm1\n"    // blue*114dec=1.78125*64
   1.118 +#endif
   1.119  		 "psraw  $6, %%mm5\n"            // blue=red/64
   1.120  		 "psraw  $6, %%mm1\n"            // blue=blue/64
   1.121  
   1.122 @@ -232,15 +268,17 @@
   1.123  		 "popl %%ebx\n"
   1.124  		 :
   1.125  		 : "m" (cr), "r"(cb),"r"(lum),
   1.126 -		   "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
   1.127 -                   [_MMX_0080w] "m" (*MMX_0080w),
   1.128 -                   [_MMX_00FFw] "m" (*MMX_00FFw),
   1.129 -                   [_MMX_FF00w] "m" (*MMX_FF00w),
   1.130 -                   [_MMX_VgrnRGB] "m" (*MMX_VgrnRGB),
   1.131 -                   [_MMX_VredRGB] "m" (*MMX_VredRGB),
   1.132 -                   [_MMX_UgrnRGB] "m" (*MMX_UgrnRGB),
   1.133 -                   [_MMX_UbluRGB] "m" (*MMX_UbluRGB)
   1.134 -		 );
   1.135 +		 "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod)
   1.136 +#ifndef GCC2_HACK
   1.137 +		 ,[_MMX_0080w] "m" (*MMX_0080w),
   1.138 +		 [_MMX_00FFw] "m" (*MMX_00FFw),
   1.139 +		 [_MMX_FF00w] "m" (*MMX_FF00w),
   1.140 +		 [_MMX_VgrnRGB] "m" (*MMX_VgrnRGB),
   1.141 +		 [_MMX_VredRGB] "m" (*MMX_VredRGB),
   1.142 +		 [_MMX_UgrnRGB] "m" (*MMX_UgrnRGB),
   1.143 +		 [_MMX_UbluRGB] "m" (*MMX_UbluRGB)
   1.144 +#endif
   1.145 +			 );
   1.146  }
   1.147  
   1.148  void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   1.149 @@ -269,21 +307,48 @@
   1.150           "movd           (%%ebx),                %%mm1\n" // 4 Cr                0  0  0  0 v3 v2 v1 v0
   1.151           "punpcklbw      %%mm7,                  %%mm0\n" // 4 W cb   0 u3  0 u2  0 u1  0 u0
   1.152           "punpcklbw      %%mm7,                  %%mm1\n" // 4 W cr   0 v3  0 v2  0 v1  0 v0
   1.153 +#ifdef GCC2_HACK
   1.154 +         "psubw          _MMX_0080w,             %%mm0\n"
   1.155 +         "psubw          _MMX_0080w,             %%mm1\n"
   1.156 +#else
   1.157           "psubw          %[_MMX_0080w],             %%mm0\n"
   1.158           "psubw          %[_MMX_0080w],             %%mm1\n"
   1.159 +#endif
   1.160           "movq           %%mm0,                  %%mm2\n" // Cb                   0 u3  0 u2  0 u1  0 u0
   1.161           "movq           %%mm1,                  %%mm3\n" // Cr
   1.162 +#ifdef GCC2_HACK
   1.163 +         "pmullw         _MMX_Ugrn565,           %%mm2\n" // Cb2green 0 R3  0 R2  0 R1  0 R0
   1.164 +#else
   1.165           "pmullw         %[_MMX_Ugrn565],           %%mm2\n" // Cb2green 0 R3  0 R2  0 R1  0 R0
   1.166 +#endif
   1.167           "movq           (%2),                   %%mm6\n" // L1      l7 L6 L5 L4 L3 L2 L1 L0
   1.168 +#ifdef GCC2_HACK
   1.169 +         "pmullw         _MMX_Ublu5x5,           %%mm0\n" // Cb2blue
   1.170 +         "pand           _MMX_00FFw,             %%mm6\n" // L1      00 L6 00 L4 00 L2 00 L0
   1.171 +         "pmullw         _MMX_Vgrn565,           %%mm3\n" // Cr2green
   1.172 +#else
   1.173           "pmullw         %[_MMX_Ublu5x5],           %%mm0\n" // Cb2blue
   1.174           "pand           %[_MMX_00FFw],             %%mm6\n" // L1      00 L6 00 L4 00 L2 00 L0
   1.175           "pmullw         %[_MMX_Vgrn565],           %%mm3\n" // Cr2green
   1.176 +#endif
   1.177           "movq           (%2),                   %%mm7\n" // L2
   1.178 +#ifdef GCC2_HACK
   1.179 +         "pmullw         _MMX_Vred5x5,           %%mm1\n" // Cr2red
   1.180 +#else
   1.181           "pmullw         %[_MMX_Vred5x5],           %%mm1\n" // Cr2red
   1.182 +#endif
   1.183           "psrlw          $8,                     %%mm7\n"        // L2           00 L7 00 L5 00 L3 00 L1
   1.184 +#ifdef GCC2_HACK
   1.185 +         "pmullw         _MMX_Ycoeff,            %%mm6\n" // lum1
   1.186 +#else
   1.187           "pmullw         %[_MMX_Ycoeff],            %%mm6\n" // lum1
   1.188 +#endif
   1.189           "paddw          %%mm3,                  %%mm2\n" // Cb2green + Cr2green == green
   1.190 +#ifdef GCC2_HACK
   1.191 +         "pmullw         _MMX_Ycoeff,            %%mm7\n" // lum2
   1.192 +#else
   1.193           "pmullw         %[_MMX_Ycoeff],            %%mm7\n" // lum2
   1.194 +#endif
   1.195  
   1.196           "movq           %%mm6,                  %%mm4\n" // lum1
   1.197           "paddw          %%mm0,                  %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0
   1.198 @@ -301,11 +366,20 @@
   1.199           "punpcklbw      %%mm4,                  %%mm4\n"
   1.200           "punpcklbw      %%mm5,                  %%mm5\n"
   1.201  
   1.202 +#ifdef GCC2_HACK
   1.203 +         "pand           _MMX_red565,            %%mm4\n"
   1.204 +#else
   1.205           "pand           %[_MMX_red565],            %%mm4\n"
   1.206 +#endif
   1.207           "psllw          $3,                     %%mm5\n" // GREEN       1
   1.208           "punpcklbw      %%mm6,                  %%mm6\n"
   1.209 +#ifdef GCC2_HACK
   1.210 +         "pand           _MMX_grn565,            %%mm5\n"
   1.211 +         "pand           _MMX_red565,            %%mm6\n"
   1.212 +#else
   1.213           "pand           %[_MMX_grn565],            %%mm5\n"
   1.214           "pand           %[_MMX_red565],            %%mm6\n"
   1.215 +#endif
   1.216           "por            %%mm5,                  %%mm4\n" //
   1.217           "psrlw          $11,                    %%mm6\n" // BLUE        1
   1.218           "movq           %%mm3,                  %%mm5\n" // lum2
   1.219 @@ -319,23 +393,44 @@
   1.220           "packuswb       %%mm3,                  %%mm3\n"
   1.221           "packuswb       %%mm5,                  %%mm5\n"
   1.222           "packuswb       %%mm7,                  %%mm7\n"
   1.223 +#ifdef GCC2_HACK
   1.224 +         "pand           _MMX_00FFw,             %%mm6\n" // L3
   1.225 +#else
   1.226           "pand           %[_MMX_00FFw],             %%mm6\n" // L3
   1.227 +#endif
   1.228           "punpcklbw      %%mm3,                  %%mm3\n"
   1.229           "punpcklbw      %%mm5,                  %%mm5\n"
   1.230 +#ifdef GCC2_HACK
   1.231 +         "pmullw         _MMX_Ycoeff,            %%mm6\n" // lum3
   1.232 +#else
   1.233           "pmullw         %[_MMX_Ycoeff],            %%mm6\n" // lum3
   1.234 +#endif
   1.235           "punpcklbw      %%mm7,                  %%mm7\n"
   1.236           "psllw          $3,                     %%mm5\n" // GREEN 2
   1.237 +#ifdef GCC2_HACK
   1.238 +         "pand           _MMX_red565,            %%mm7\n"
   1.239 +         "pand           _MMX_red565,            %%mm3\n"
   1.240 +#else
   1.241           "pand           %[_MMX_red565],            %%mm7\n"
   1.242           "pand           %[_MMX_red565],            %%mm3\n"
   1.243 +#endif
   1.244           "psrlw          $11,                    %%mm7\n" // BLUE  2
   1.245 +#ifdef GCC2_HACK
   1.246 +         "pand           _MMX_grn565,            %%mm5\n"
   1.247 +#else
   1.248           "pand           %[_MMX_grn565],            %%mm5\n"
   1.249 +#endif
   1.250           "por            %%mm7,                  %%mm3\n"
   1.251           "movq           (%2,%4),                %%mm7\n" // L4 load lum2
   1.252           "por            %%mm5,                  %%mm3\n" //
   1.253           "psrlw          $8,                     %%mm7\n" // L4
   1.254           "movq           %%mm4,                  %%mm5\n"
   1.255           "punpcklwd      %%mm3,                  %%mm4\n"
   1.256 +#ifdef GCC2_HACK
   1.257 +         "pmullw         _MMX_Ycoeff,            %%mm7\n" // lum4
   1.258 +#else
   1.259           "pmullw         %[_MMX_Ycoeff],            %%mm7\n" // lum4
   1.260 +#endif
   1.261           "punpckhwd      %%mm3,                  %%mm5\n"
   1.262  
   1.263           "movq           %%mm4,                  (%3)\n"  // write row1
   1.264 @@ -362,11 +457,20 @@
   1.265           "punpcklbw      %%mm5,                  %%mm5\n"
   1.266           "punpcklbw      %%mm6,                  %%mm6\n"
   1.267           "psllw          $3,                     %%mm5\n" // GREEN 3
   1.268 +#ifdef GCC2_HACK
   1.269 +         "pand           _MMX_red565,            %%mm4\n"
   1.270 +#else
   1.271           "pand           %[_MMX_red565],            %%mm4\n"
   1.272 +#endif
   1.273           "psraw          $6,                     %%mm3\n" // psr 6
   1.274           "psraw          $6,                     %%mm0\n"
   1.275 +#ifdef GCC2_HACK
   1.276 +         "pand           _MMX_red565,            %%mm6\n" // BLUE
   1.277 +         "pand           _MMX_grn565,            %%mm5\n"
   1.278 +#else
   1.279           "pand           %[_MMX_red565],            %%mm6\n" // BLUE
   1.280           "pand           %[_MMX_grn565],            %%mm5\n"
   1.281 +#endif
   1.282           "psrlw          $11,                    %%mm6\n" // BLUE  3
   1.283           "por            %%mm5,                  %%mm4\n"
   1.284           "psraw          $6,                     %%mm7\n"
   1.285 @@ -377,11 +481,20 @@
   1.286           "punpcklbw      %%mm3,                  %%mm3\n"
   1.287           "punpcklbw      %%mm0,                  %%mm0\n"
   1.288           "punpcklbw      %%mm7,                  %%mm7\n"
   1.289 +#ifdef GCC2_HACK
   1.290 +         "pand           _MMX_red565,            %%mm3\n"
   1.291 +         "pand           _MMX_red565,            %%mm7\n" // BLUE
   1.292 +#else
   1.293           "pand           %[_MMX_red565],            %%mm3\n"
   1.294           "pand           %[_MMX_red565],            %%mm7\n" // BLUE
   1.295 +#endif
   1.296           "psllw          $3,                     %%mm0\n" // GREEN 4
   1.297           "psrlw          $11,                    %%mm7\n"
   1.298 +#ifdef GCC2_HACK
   1.299 +         "pand           _MMX_grn565,            %%mm0\n"
   1.300 +#else
   1.301           "pand           %[_MMX_grn565],            %%mm0\n"
   1.302 +#endif
   1.303           "por            %%mm7,                  %%mm3\n"
   1.304           "por            %%mm0,                  %%mm3\n"
   1.305  
   1.306 @@ -411,10 +524,11 @@
   1.307  	 "jl             1b\n"
   1.308           "emms\n"
   1.309  	 "popl %%ebx\n"
   1.310 -	:
   1.311 -	:"m" (cr), "r"(cb),"r"(lum),
   1.312 -	 "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
   1.313 -	 [_MMX_0080w] "m" (*MMX_0080w),
   1.314 +         :
   1.315 +         :"m" (cr), "r"(cb),"r"(lum),
   1.316 +	 "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod)
   1.317 +#ifndef GCC2_HACK
   1.318 +	 ,[_MMX_0080w] "m" (*MMX_0080w),
   1.319  	 [_MMX_Ugrn565] "m" (*MMX_Ugrn565),
   1.320  	 [_MMX_Ublu5x5] "m" (*MMX_Ublu5x5),
   1.321  	 [_MMX_00FFw] "m" (*MMX_00FFw),
   1.322 @@ -423,7 +537,10 @@
   1.323  	 [_MMX_Ycoeff] "m" (*MMX_Ycoeff),
   1.324  	 [_MMX_red565] "m" (*MMX_red565),
   1.325  	 [_MMX_grn565] "m" (*MMX_grn565)
   1.326 -         );
   1.327 +#endif
   1.328 +		 );
   1.329  }
   1.330  
   1.331 +#undef GCC2_HACK
   1.332 +
   1.333  #endif /* GCC i386 inline assembly */