Made it possible to create a texture of any format, even if not supported by the renderer.
authorSam Lantinga <slouken@libsdl.org>
Thu, 03 Feb 2011 00:19:40 -0800
changeset 5156307ccc9c135e
parent 5155 f3ebd1950442
child 5157 657543cc92f9
Made it possible to create a texture of any format, even if not supported by the renderer.
This allows me to reduce the set of formats supported by the renderers to the most optimal set, for a nice speed boost.
VisualC/SDL/SDL_VS2008.vcproj
VisualC/SDL/SDL_VS2010.vcxproj
Xcode-iPhoneOS/SDL/SDLiPhoneOS.xcodeproj/project.pbxproj
Xcode/SDL/SDL.xcodeproj/project.pbxproj
include/SDL_pixels.h
include/SDL_rect.h
include/SDL_render.h
src/SDL_compat.c
src/render/SDL_render.c
src/render/SDL_sysrender.h
src/render/SDL_yuv_mmx.c
src/render/SDL_yuv_sw.c
src/render/SDL_yuv_sw_c.h
src/render/direct3d/SDL_d3drender.c
src/render/mmx.h
src/render/opengl/SDL_renderer_gl.c
src/render/opengles/SDL_renderer_gles.c
src/render/software/SDL_renderer_sw.c
src/video/SDL_leaks.h
src/video/SDL_rect.c
src/video/SDL_yuv_mmx.c
src/video/SDL_yuv_sw.c
src/video/SDL_yuv_sw_c.h
src/video/mmx.h
     1.1 --- a/VisualC/SDL/SDL_VS2008.vcproj	Wed Feb 02 22:55:12 2011 -0800
     1.2 +++ b/VisualC/SDL/SDL_VS2008.vcproj	Thu Feb 03 00:19:40 2011 -0800
     1.3 @@ -607,7 +607,7 @@
     1.4  			>
     1.5  		</File>
     1.6  		<File
     1.7 -			RelativePath="..\..\src\video\mmx.h"
     1.8 +			RelativePath="..\..\src\render\mmx.h"
     1.9  			>
    1.10  		</File>
    1.11  		<File
    1.12 @@ -1251,15 +1251,15 @@
    1.13  			>
    1.14  		</File>
    1.15  		<File
    1.16 -			RelativePath="..\..\src\video\SDL_yuv_mmx.c"
    1.17 +			RelativePath="..\..\src\render\SDL_yuv_mmx.c"
    1.18  			>
    1.19  		</File>
    1.20  		<File
    1.21 -			RelativePath="..\..\src\video\SDL_yuv_sw.c"
    1.22 +			RelativePath="..\..\src\render\SDL_yuv_sw.c"
    1.23  			>
    1.24  		</File>
    1.25  		<File
    1.26 -			RelativePath="..\..\src\video\SDL_yuv_sw_c.h"
    1.27 +			RelativePath="..\..\src\render\SDL_yuv_sw_c.h"
    1.28  			>
    1.29  		</File>
    1.30  		<File
     2.1 --- a/VisualC/SDL/SDL_VS2010.vcxproj	Wed Feb 02 22:55:12 2011 -0800
     2.2 +++ b/VisualC/SDL/SDL_VS2010.vcxproj	Thu Feb 03 00:19:40 2011 -0800
     2.3 @@ -282,8 +282,9 @@
     2.4      <ClInclude Include="..\..\src\events\SDL_touch_c.h" />
     2.5      <ClInclude Include="..\..\src\libm\math.h" />
     2.6      <ClInclude Include="..\..\src\libm\math_private.h" />
     2.7 +    <ClInclude Include="..\..\src\render\mmx.h" />
     2.8      <ClInclude Include="..\..\src\render\SDL_sysrender.h" />
     2.9 -    <ClInclude Include="..\..\src\video\mmx.h" />
    2.10 +    <ClInclude Include="..\..\src\render\SDL_yuv_sw_c.h" />
    2.11      <ClInclude Include="..\..\src\video\SDL_alphamult.h" />
    2.12      <ClInclude Include="..\..\src\audio\SDL_audio_c.h" />
    2.13      <ClInclude Include="..\..\src\audio\SDL_audiodev_c.h" />
    2.14 @@ -339,7 +340,6 @@
    2.15      <ClInclude Include="..\..\src\video\windows\SDL_windowsvideo.h" />
    2.16      <ClInclude Include="..\..\src\video\windows\SDL_windowswindow.h" />
    2.17      <ClInclude Include="..\..\src\events\SDL_windowevents_c.h" />
    2.18 -    <ClInclude Include="..\..\src\video\SDL_yuv_sw_c.h" />
    2.19      <ClInclude Include="..\..\src\video\windows\wmmsg.h" />
    2.20    </ItemGroup>
    2.21    <ItemGroup>
    2.22 @@ -365,6 +365,8 @@
    2.23      <ClCompile Include="..\..\src\render\direct3d\SDL_d3drender.c" />
    2.24      <ClCompile Include="..\..\src\render\opengl\SDL_renderer_gl.c" />
    2.25      <ClCompile Include="..\..\src\render\SDL_render.c" />
    2.26 +    <ClCompile Include="..\..\src\render\SDL_yuv_mmx.c" />
    2.27 +    <ClCompile Include="..\..\src\render\SDL_yuv_sw.c" />
    2.28      <ClCompile Include="..\..\src\render\software\SDL_renderer_sw.c" />
    2.29      <ClCompile Include="..\..\src\SDL.c" />
    2.30      <ClCompile Include="..\..\src\video\SDL_alphamult.c" />
    2.31 @@ -452,8 +454,6 @@
    2.32      <ClCompile Include="..\..\src\video\windows\SDL_windowsvideo.c" />
    2.33      <ClCompile Include="..\..\src\video\windows\SDL_windowswindow.c" />
    2.34      <ClCompile Include="..\..\src\events\SDL_windowevents.c" />
    2.35 -    <ClCompile Include="..\..\src\video\SDL_yuv_mmx.c" />
    2.36 -    <ClCompile Include="..\..\src\video\SDL_yuv_sw.c" />
    2.37    </ItemGroup>
    2.38    <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
    2.39    <ImportGroup Label="ExtensionTargets">
     3.1 --- a/Xcode-iPhoneOS/SDL/SDLiPhoneOS.xcodeproj/project.pbxproj	Wed Feb 02 22:55:12 2011 -0800
     3.2 +++ b/Xcode-iPhoneOS/SDL/SDLiPhoneOS.xcodeproj/project.pbxproj	Thu Feb 03 00:19:40 2011 -0800
     3.3 @@ -73,6 +73,10 @@
     3.4  		043DD77010FD8A0000DED673 /* SDL_alphamult.h in Headers */ = {isa = PBXBuildFile; fileRef = 043DD76C10FD8A0000DED673 /* SDL_alphamult.h */; };
     3.5  		043DD77110FD8A0000DED673 /* SDL_blendfillrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 043DD76D10FD8A0000DED673 /* SDL_blendfillrect.c */; };
     3.6  		043DD77210FD8A0000DED673 /* SDL_drawrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 043DD76E10FD8A0000DED673 /* SDL_drawrect.c */; };
     3.7 +		04409BA612FA989600FB9AA8 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409BA212FA989600FB9AA8 /* mmx.h */; };
     3.8 +		04409BA712FA989600FB9AA8 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409BA312FA989600FB9AA8 /* SDL_yuv_mmx.c */; };
     3.9 +		04409BA812FA989600FB9AA8 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409BA412FA989600FB9AA8 /* SDL_yuv_sw_c.h */; };
    3.10 +		04409BA912FA989600FB9AA8 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409BA512FA989600FB9AA8 /* SDL_yuv_sw.c */; };
    3.11  		04461DEE0EA76BA3006C462D /* SDL_haptic.h in Headers */ = {isa = PBXBuildFile; fileRef = 04461DED0EA76BA3006C462D /* SDL_haptic.h */; settings = {ATTRIBUTES = (Public, ); }; };
    3.12  		044E5FB511E6069F0076F181 /* SDL_clipboard.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5FB311E6069F0076F181 /* SDL_clipboard.h */; settings = {ATTRIBUTES = (Public, ); }; };
    3.13  		044E5FB611E6069F0076F181 /* SDL_input.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5FB411E6069F0076F181 /* SDL_input.h */; settings = {ATTRIBUTES = (Public, ); }; };
    3.14 @@ -223,9 +227,6 @@
    3.15  		FDA684660DF2374E00F98A1A /* SDL_surface.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA683190DF2374E00F98A1A /* SDL_surface.c */; };
    3.16  		FDA684670DF2374E00F98A1A /* SDL_sysvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA6831A0DF2374E00F98A1A /* SDL_sysvideo.h */; };
    3.17  		FDA684680DF2374E00F98A1A /* SDL_video.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA6831B0DF2374E00F98A1A /* SDL_video.c */; };
    3.18 -		FDA684690DF2374E00F98A1A /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA6831C0DF2374E00F98A1A /* SDL_yuv_mmx.c */; };
    3.19 -		FDA6846A0DF2374E00F98A1A /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA6831D0DF2374E00F98A1A /* SDL_yuv_sw.c */; };
    3.20 -		FDA6846B0DF2374E00F98A1A /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA6831E0DF2374E00F98A1A /* SDL_yuv_sw_c.h */; };
    3.21  		FDA685FB0DF244C800F98A1A /* SDL_nullevents.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA685F50DF244C800F98A1A /* SDL_nullevents.c */; };
    3.22  		FDA685FC0DF244C800F98A1A /* SDL_nullevents_c.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA685F60DF244C800F98A1A /* SDL_nullevents_c.h */; };
    3.23  		FDA685FF0DF244C800F98A1A /* SDL_nullvideo.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA685F90DF244C800F98A1A /* SDL_nullvideo.c */; };
    3.24 @@ -328,6 +329,10 @@
    3.25  		043DD76C10FD8A0000DED673 /* SDL_alphamult.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_alphamult.h; sourceTree = "<group>"; };
    3.26  		043DD76D10FD8A0000DED673 /* SDL_blendfillrect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_blendfillrect.c; sourceTree = "<group>"; };
    3.27  		043DD76E10FD8A0000DED673 /* SDL_drawrect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_drawrect.c; sourceTree = "<group>"; };
    3.28 +		04409BA212FA989600FB9AA8 /* mmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmx.h; sourceTree = "<group>"; };
    3.29 +		04409BA312FA989600FB9AA8 /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; };
    3.30 +		04409BA412FA989600FB9AA8 /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; };
    3.31 +		04409BA512FA989600FB9AA8 /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; };
    3.32  		04461DED0EA76BA3006C462D /* SDL_haptic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_haptic.h; path = ../../include/SDL_haptic.h; sourceTree = SOURCE_ROOT; };
    3.33  		044E5FB311E6069F0076F181 /* SDL_clipboard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_clipboard.h; path = ../../include/SDL_clipboard.h; sourceTree = SOURCE_ROOT; };
    3.34  		044E5FB411E6069F0076F181 /* SDL_input.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_input.h; path = ../../include/SDL_input.h; sourceTree = SOURCE_ROOT; };
    3.35 @@ -505,9 +510,6 @@
    3.36  		FDA683190DF2374E00F98A1A /* SDL_surface.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_surface.c; sourceTree = "<group>"; };
    3.37  		FDA6831A0DF2374E00F98A1A /* SDL_sysvideo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_sysvideo.h; sourceTree = "<group>"; };
    3.38  		FDA6831B0DF2374E00F98A1A /* SDL_video.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_video.c; sourceTree = "<group>"; };
    3.39 -		FDA6831C0DF2374E00F98A1A /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; };
    3.40 -		FDA6831D0DF2374E00F98A1A /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; };
    3.41 -		FDA6831E0DF2374E00F98A1A /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; };
    3.42  		FDA685F50DF244C800F98A1A /* SDL_nullevents.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_nullevents.c; sourceTree = "<group>"; };
    3.43  		FDA685F60DF244C800F98A1A /* SDL_nullevents_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_nullevents_c.h; sourceTree = "<group>"; };
    3.44  		FDA685F90DF244C800F98A1A /* SDL_nullvideo.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_nullvideo.c; sourceTree = "<group>"; };
    3.45 @@ -659,9 +661,13 @@
    3.46  			isa = PBXGroup;
    3.47  			children = (
    3.48  				041B2CE812FA0F680087D585 /* opengles */,
    3.49 +				041B2CEC12FA0F680087D585 /* software */,
    3.50 +				04409BA212FA989600FB9AA8 /* mmx.h */,
    3.51  				041B2CEA12FA0F680087D585 /* SDL_render.c */,
    3.52  				041B2CEB12FA0F680087D585 /* SDL_sysrender.h */,
    3.53 -				041B2CEC12FA0F680087D585 /* software */,
    3.54 +				04409BA312FA989600FB9AA8 /* SDL_yuv_mmx.c */,
    3.55 +				04409BA412FA989600FB9AA8 /* SDL_yuv_sw_c.h */,
    3.56 +				04409BA512FA989600FB9AA8 /* SDL_yuv_sw.c */,
    3.57  			);
    3.58  			name = render;
    3.59  			path = ../../src/render;
    3.60 @@ -1113,9 +1119,6 @@
    3.61  				FDA683190DF2374E00F98A1A /* SDL_surface.c */,
    3.62  				FDA6831A0DF2374E00F98A1A /* SDL_sysvideo.h */,
    3.63  				FDA6831B0DF2374E00F98A1A /* SDL_video.c */,
    3.64 -				FDA6831C0DF2374E00F98A1A /* SDL_yuv_mmx.c */,
    3.65 -				FDA6831D0DF2374E00F98A1A /* SDL_yuv_sw.c */,
    3.66 -				FDA6831E0DF2374E00F98A1A /* SDL_yuv_sw_c.h */,
    3.67  			);
    3.68  			name = video;
    3.69  			path = ../../src/video;
    3.70 @@ -1179,7 +1182,6 @@
    3.71  				FDA6845D0DF2374E00F98A1A /* SDL_pixels_c.h in Headers */,
    3.72  				FDA684630DF2374E00F98A1A /* SDL_RLEaccel_c.h in Headers */,
    3.73  				FDA684670DF2374E00F98A1A /* SDL_sysvideo.h in Headers */,
    3.74 -				FDA6846B0DF2374E00F98A1A /* SDL_yuv_sw_c.h in Headers */,
    3.75  				FDA685FC0DF244C800F98A1A /* SDL_nullevents_c.h in Headers */,
    3.76  				FDA686000DF244C800F98A1A /* SDL_nullvideo.h in Headers */,
    3.77  				FD5F9D300E0E08B3008E885B /* SDL_joystick_c.h in Headers */,
    3.78 @@ -1220,6 +1222,8 @@
    3.79  				04FFAB9812E23BDC00BA343D /* SDL_shape.h in Headers */,
    3.80  				041B2CD912FA0E9E0087D585 /* SDL_render.h in Headers */,
    3.81  				041B2CF212FA0F680087D585 /* SDL_sysrender.h in Headers */,
    3.82 +				04409BA612FA989600FB9AA8 /* mmx.h in Headers */,
    3.83 +				04409BA812FA989600FB9AA8 /* SDL_yuv_sw_c.h in Headers */,
    3.84  			);
    3.85  			runOnlyForDeploymentPostprocessing = 0;
    3.86  		};
    3.87 @@ -1427,8 +1431,6 @@
    3.88  				FDA684640DF2374E00F98A1A /* SDL_stretch.c in Sources */,
    3.89  				FDA684660DF2374E00F98A1A /* SDL_surface.c in Sources */,
    3.90  				FDA684680DF2374E00F98A1A /* SDL_video.c in Sources */,
    3.91 -				FDA684690DF2374E00F98A1A /* SDL_yuv_mmx.c in Sources */,
    3.92 -				FDA6846A0DF2374E00F98A1A /* SDL_yuv_sw.c in Sources */,
    3.93  				FDA685FB0DF244C800F98A1A /* SDL_nullevents.c in Sources */,
    3.94  				FDA685FF0DF244C800F98A1A /* SDL_nullvideo.c in Sources */,
    3.95  				FD5F9D2F0E0E08B3008E885B /* SDL_joystick.c in Sources */,
    3.96 @@ -1469,6 +1471,8 @@
    3.97  				041B2CF012FA0F680087D585 /* SDL_renderer_gles.c in Sources */,
    3.98  				041B2CF112FA0F680087D585 /* SDL_render.c in Sources */,
    3.99  				041B2CF312FA0F680087D585 /* SDL_renderer_sw.c in Sources */,
   3.100 +				04409BA712FA989600FB9AA8 /* SDL_yuv_mmx.c in Sources */,
   3.101 +				04409BA912FA989600FB9AA8 /* SDL_yuv_sw.c in Sources */,
   3.102  			);
   3.103  			runOnlyForDeploymentPostprocessing = 0;
   3.104  		};
     4.1 --- a/Xcode/SDL/SDL.xcodeproj/project.pbxproj	Wed Feb 02 22:55:12 2011 -0800
     4.2 +++ b/Xcode/SDL/SDL.xcodeproj/project.pbxproj	Thu Feb 03 00:19:40 2011 -0800
     4.3 @@ -131,6 +131,14 @@
     4.4  		041B2CAB12FA0D680087D585 /* SDL_render.c in Sources */ = {isa = PBXBuildFile; fileRef = 041B2C9E12FA0D680087D585 /* SDL_render.c */; };
     4.5  		041B2CAC12FA0D680087D585 /* SDL_sysrender.h in Headers */ = {isa = PBXBuildFile; fileRef = 041B2C9F12FA0D680087D585 /* SDL_sysrender.h */; };
     4.6  		041B2CAD12FA0D680087D585 /* SDL_renderer_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 041B2CA112FA0D680087D585 /* SDL_renderer_sw.c */; };
     4.7 +		04409B9112FA97ED00FB9AA8 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8D12FA97ED00FB9AA8 /* mmx.h */; };
     4.8 +		04409B9212FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */; };
     4.9 +		04409B9312FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */; };
    4.10 +		04409B9412FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */; };
    4.11 +		04409B9512FA97ED00FB9AA8 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8D12FA97ED00FB9AA8 /* mmx.h */; };
    4.12 +		04409B9612FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */; };
    4.13 +		04409B9712FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */; };
    4.14 +		04409B9812FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */; };
    4.15  		044E5F8511E6051C0076F181 /* SDL_clipboard.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5F8411E6051C0076F181 /* SDL_clipboard.h */; settings = {ATTRIBUTES = (Public, ); }; };
    4.16  		044E5F8611E6051C0076F181 /* SDL_clipboard.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5F8411E6051C0076F181 /* SDL_clipboard.h */; };
    4.17  		0469A10B12EE4BF100B846D6 /* SDL_blendmode.h in Headers */ = {isa = PBXBuildFile; fileRef = 0469A10912EE4BF100B846D6 /* SDL_blendmode.h */; settings = {ATTRIBUTES = (Public, ); }; };
    4.18 @@ -244,7 +252,6 @@
    4.19  		04BD011812E6671800899322 /* SDL_nullevents_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEE912E6671800899322 /* SDL_nullevents_c.h */; };
    4.20  		04BD011B12E6671800899322 /* SDL_nullvideo.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFEEC12E6671800899322 /* SDL_nullvideo.c */; };
    4.21  		04BD011C12E6671800899322 /* SDL_nullvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEED12E6671800899322 /* SDL_nullvideo.h */; };
    4.22 -		04BD013212E6671800899322 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF0412E6671800899322 /* mmx.h */; };
    4.23  		04BD016F12E6671800899322 /* SDL_alphamult.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4812E6671800899322 /* SDL_alphamult.c */; };
    4.24  		04BD017012E6671800899322 /* SDL_alphamult.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF4912E6671800899322 /* SDL_alphamult.h */; };
    4.25  		04BD017112E6671800899322 /* SDL_blendfillrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */; };
    4.26 @@ -285,9 +292,6 @@
    4.27  		04BD019B12E6671800899322 /* SDL_surface.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7412E6671800899322 /* SDL_surface.c */; };
    4.28  		04BD019C12E6671800899322 /* SDL_sysvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7512E6671800899322 /* SDL_sysvideo.h */; };
    4.29  		04BD019D12E6671800899322 /* SDL_video.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7612E6671800899322 /* SDL_video.c */; };
    4.30 -		04BD019E12E6671800899322 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */; };
    4.31 -		04BD019F12E6671800899322 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7812E6671800899322 /* SDL_yuv_sw.c */; };
    4.32 -		04BD01A012E6671800899322 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */; };
    4.33  		04BD01DB12E6671800899322 /* imKStoUCS.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFB812E6671800899322 /* imKStoUCS.c */; };
    4.34  		04BD01DC12E6671800899322 /* imKStoUCS.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFFB912E6671800899322 /* imKStoUCS.h */; };
    4.35  		04BD01DD12E6671800899322 /* SDL_x11clipboard.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFBA12E6671800899322 /* SDL_x11clipboard.c */; };
    4.36 @@ -457,7 +461,6 @@
    4.37  		04BD033212E6671800899322 /* SDL_nullevents_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEE912E6671800899322 /* SDL_nullevents_c.h */; };
    4.38  		04BD033512E6671800899322 /* SDL_nullvideo.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFEEC12E6671800899322 /* SDL_nullvideo.c */; };
    4.39  		04BD033612E6671800899322 /* SDL_nullvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEED12E6671800899322 /* SDL_nullvideo.h */; };
    4.40 -		04BD034C12E6671800899322 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF0412E6671800899322 /* mmx.h */; };
    4.41  		04BD038912E6671800899322 /* SDL_alphamult.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4812E6671800899322 /* SDL_alphamult.c */; };
    4.42  		04BD038A12E6671800899322 /* SDL_alphamult.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF4912E6671800899322 /* SDL_alphamult.h */; };
    4.43  		04BD038B12E6671800899322 /* SDL_blendfillrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */; };
    4.44 @@ -498,9 +501,6 @@
    4.45  		04BD03B512E6671800899322 /* SDL_surface.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7412E6671800899322 /* SDL_surface.c */; };
    4.46  		04BD03B612E6671800899322 /* SDL_sysvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7512E6671800899322 /* SDL_sysvideo.h */; };
    4.47  		04BD03B712E6671800899322 /* SDL_video.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7612E6671800899322 /* SDL_video.c */; };
    4.48 -		04BD03B812E6671800899322 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */; };
    4.49 -		04BD03B912E6671800899322 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7812E6671800899322 /* SDL_yuv_sw.c */; };
    4.50 -		04BD03BA12E6671800899322 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */; };
    4.51  		04BD03F312E6671800899322 /* imKStoUCS.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFB812E6671800899322 /* imKStoUCS.c */; };
    4.52  		04BD03F412E6671800899322 /* imKStoUCS.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFFB912E6671800899322 /* imKStoUCS.h */; };
    4.53  		04BD03F512E6671800899322 /* SDL_x11clipboard.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFBA12E6671800899322 /* SDL_x11clipboard.c */; };
    4.54 @@ -701,6 +701,10 @@
    4.55  		041B2C9E12FA0D680087D585 /* SDL_render.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_render.c; sourceTree = "<group>"; };
    4.56  		041B2C9F12FA0D680087D585 /* SDL_sysrender.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_sysrender.h; sourceTree = "<group>"; };
    4.57  		041B2CA112FA0D680087D585 /* SDL_renderer_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_renderer_sw.c; sourceTree = "<group>"; };
    4.58 +		04409B8D12FA97ED00FB9AA8 /* mmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmx.h; sourceTree = "<group>"; };
    4.59 +		04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; };
    4.60 +		04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; };
    4.61 +		04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; };
    4.62  		044E5F8411E6051C0076F181 /* SDL_clipboard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_clipboard.h; path = ../../include/SDL_clipboard.h; sourceTree = SOURCE_ROOT; };
    4.63  		0469A10912EE4BF100B846D6 /* SDL_blendmode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_blendmode.h; path = ../../include/SDL_blendmode.h; sourceTree = SOURCE_ROOT; };
    4.64  		04BDFD7412E6671700899322 /* SDL_atomic.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_atomic.c; sourceTree = "<group>"; };
    4.65 @@ -814,7 +818,6 @@
    4.66  		04BDFEE912E6671800899322 /* SDL_nullevents_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_nullevents_c.h; sourceTree = "<group>"; };
    4.67  		04BDFEEC12E6671800899322 /* SDL_nullvideo.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_nullvideo.c; sourceTree = "<group>"; };
    4.68  		04BDFEED12E6671800899322 /* SDL_nullvideo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_nullvideo.h; sourceTree = "<group>"; };
    4.69 -		04BDFF0412E6671800899322 /* mmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmx.h; sourceTree = "<group>"; };
    4.70  		04BDFF4812E6671800899322 /* SDL_alphamult.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_alphamult.c; sourceTree = "<group>"; };
    4.71  		04BDFF4912E6671800899322 /* SDL_alphamult.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_alphamult.h; sourceTree = "<group>"; };
    4.72  		04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_blendfillrect.c; sourceTree = "<group>"; };
    4.73 @@ -855,9 +858,6 @@
    4.74  		04BDFF7412E6671800899322 /* SDL_surface.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_surface.c; sourceTree = "<group>"; };
    4.75  		04BDFF7512E6671800899322 /* SDL_sysvideo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_sysvideo.h; sourceTree = "<group>"; };
    4.76  		04BDFF7612E6671800899322 /* SDL_video.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_video.c; sourceTree = "<group>"; };
    4.77 -		04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; };
    4.78 -		04BDFF7812E6671800899322 /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; };
    4.79 -		04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; };
    4.80  		04BDFFB812E6671800899322 /* imKStoUCS.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = imKStoUCS.c; sourceTree = "<group>"; };
    4.81  		04BDFFB912E6671800899322 /* imKStoUCS.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = imKStoUCS.h; sourceTree = "<group>"; };
    4.82  		04BDFFBA12E6671800899322 /* SDL_x11clipboard.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_x11clipboard.c; sourceTree = "<group>"; };
    4.83 @@ -1188,8 +1188,12 @@
    4.84  			children = (
    4.85  				041B2C9A12FA0D680087D585 /* opengl */,
    4.86  				041B2CA012FA0D680087D585 /* software */,
    4.87 +				04409B8D12FA97ED00FB9AA8 /* mmx.h */,
    4.88  				041B2C9E12FA0D680087D585 /* SDL_render.c */,
    4.89  				041B2C9F12FA0D680087D585 /* SDL_sysrender.h */,
    4.90 +				04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */,
    4.91 +				04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */,
    4.92 +				04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */,
    4.93  			);
    4.94  			name = render;
    4.95  			path = ../../src/render;
    4.96 @@ -1483,7 +1487,6 @@
    4.97  				04BDFEE712E6671800899322 /* dummy */,
    4.98  				04BDFFB712E6671800899322 /* x11 */,
    4.99  				04BDFFD712E6671800899322 /* Xext */,
   4.100 -				04BDFF0412E6671800899322 /* mmx.h */,
   4.101  				04BDFF4812E6671800899322 /* SDL_alphamult.c */,
   4.102  				04BDFF4912E6671800899322 /* SDL_alphamult.h */,
   4.103  				04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */,
   4.104 @@ -1524,9 +1527,6 @@
   4.105  				04BDFF7412E6671800899322 /* SDL_surface.c */,
   4.106  				04BDFF7512E6671800899322 /* SDL_sysvideo.h */,
   4.107  				04BDFF7612E6671800899322 /* SDL_video.c */,
   4.108 -				04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */,
   4.109 -				04BDFF7812E6671800899322 /* SDL_yuv_sw.c */,
   4.110 -				04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */,
   4.111  			);
   4.112  			name = video;
   4.113  			path = ../../src/video;
   4.114 @@ -1893,7 +1893,6 @@
   4.115  				04BD010312E6671800899322 /* SDL_cocoawindow.h in Headers */,
   4.116  				04BD011812E6671800899322 /* SDL_nullevents_c.h in Headers */,
   4.117  				04BD011C12E6671800899322 /* SDL_nullvideo.h in Headers */,
   4.118 -				04BD013212E6671800899322 /* mmx.h in Headers */,
   4.119  				04BD017012E6671800899322 /* SDL_alphamult.h in Headers */,
   4.120  				04BD017612E6671800899322 /* SDL_blit.h in Headers */,
   4.121  				04BD017B12E6671800899322 /* SDL_blit_auto.h in Headers */,
   4.122 @@ -1907,7 +1906,6 @@
   4.123  				04BD019712E6671800899322 /* SDL_RLEaccel_c.h in Headers */,
   4.124  				04BD019912E6671800899322 /* SDL_shape_internals.h in Headers */,
   4.125  				04BD019C12E6671800899322 /* SDL_sysvideo.h in Headers */,
   4.126 -				04BD01A012E6671800899322 /* SDL_yuv_sw_c.h in Headers */,
   4.127  				04BD01DC12E6671800899322 /* imKStoUCS.h in Headers */,
   4.128  				04BD01DE12E6671800899322 /* SDL_x11clipboard.h in Headers */,
   4.129  				04BD01E012E6671800899322 /* SDL_x11dyn.h in Headers */,
   4.130 @@ -1942,6 +1940,8 @@
   4.131  				0469A10B12EE4BF100B846D6 /* SDL_blendmode.h in Headers */,
   4.132  				041B2C9512FA0D2A0087D585 /* SDL_render.h in Headers */,
   4.133  				041B2CA612FA0D680087D585 /* SDL_sysrender.h in Headers */,
   4.134 +				04409B9112FA97ED00FB9AA8 /* mmx.h in Headers */,
   4.135 +				04409B9312FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */,
   4.136  			);
   4.137  			runOnlyForDeploymentPostprocessing = 0;
   4.138  		};
   4.139 @@ -2016,7 +2016,6 @@
   4.140  				04BD031D12E6671800899322 /* SDL_cocoawindow.h in Headers */,
   4.141  				04BD033212E6671800899322 /* SDL_nullevents_c.h in Headers */,
   4.142  				04BD033612E6671800899322 /* SDL_nullvideo.h in Headers */,
   4.143 -				04BD034C12E6671800899322 /* mmx.h in Headers */,
   4.144  				04BD038A12E6671800899322 /* SDL_alphamult.h in Headers */,
   4.145  				04BD039012E6671800899322 /* SDL_blit.h in Headers */,
   4.146  				04BD039512E6671800899322 /* SDL_blit_auto.h in Headers */,
   4.147 @@ -2030,7 +2029,6 @@
   4.148  				04BD03B112E6671800899322 /* SDL_RLEaccel_c.h in Headers */,
   4.149  				04BD03B312E6671800899322 /* SDL_shape_internals.h in Headers */,
   4.150  				04BD03B612E6671800899322 /* SDL_sysvideo.h in Headers */,
   4.151 -				04BD03BA12E6671800899322 /* SDL_yuv_sw_c.h in Headers */,
   4.152  				04BD03F412E6671800899322 /* imKStoUCS.h in Headers */,
   4.153  				04BD03F612E6671800899322 /* SDL_x11clipboard.h in Headers */,
   4.154  				04BD03F812E6671800899322 /* SDL_x11dyn.h in Headers */,
   4.155 @@ -2065,6 +2063,8 @@
   4.156  				0469A10D12EE4BF100B846D6 /* SDL_blendmode.h in Headers */,
   4.157  				041B2C9612FA0D2A0087D585 /* SDL_render.h in Headers */,
   4.158  				041B2CAC12FA0D680087D585 /* SDL_sysrender.h in Headers */,
   4.159 +				04409B9512FA97ED00FB9AA8 /* mmx.h in Headers */,
   4.160 +				04409B9712FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */,
   4.161  			);
   4.162  			runOnlyForDeploymentPostprocessing = 0;
   4.163  		};
   4.164 @@ -2412,8 +2412,6 @@
   4.165  				04BD019A12E6671800899322 /* SDL_stretch.c in Sources */,
   4.166  				04BD019B12E6671800899322 /* SDL_surface.c in Sources */,
   4.167  				04BD019D12E6671800899322 /* SDL_video.c in Sources */,
   4.168 -				04BD019E12E6671800899322 /* SDL_yuv_mmx.c in Sources */,
   4.169 -				04BD019F12E6671800899322 /* SDL_yuv_sw.c in Sources */,
   4.170  				04BD01DB12E6671800899322 /* imKStoUCS.c in Sources */,
   4.171  				04BD01DD12E6671800899322 /* SDL_x11clipboard.c in Sources */,
   4.172  				04BD01DF12E6671800899322 /* SDL_x11dyn.c in Sources */,
   4.173 @@ -2443,6 +2441,8 @@
   4.174  				041B2CA312FA0D680087D585 /* SDL_renderer_gl.c in Sources */,
   4.175  				041B2CA512FA0D680087D585 /* SDL_render.c in Sources */,
   4.176  				041B2CA712FA0D680087D585 /* SDL_renderer_sw.c in Sources */,
   4.177 +				04409B9212FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */,
   4.178 +				04409B9412FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */,
   4.179  			);
   4.180  			runOnlyForDeploymentPostprocessing = 0;
   4.181  		};
   4.182 @@ -2539,8 +2539,6 @@
   4.183  				04BD03B412E6671800899322 /* SDL_stretch.c in Sources */,
   4.184  				04BD03B512E6671800899322 /* SDL_surface.c in Sources */,
   4.185  				04BD03B712E6671800899322 /* SDL_video.c in Sources */,
   4.186 -				04BD03B812E6671800899322 /* SDL_yuv_mmx.c in Sources */,
   4.187 -				04BD03B912E6671800899322 /* SDL_yuv_sw.c in Sources */,
   4.188  				04BD03F312E6671800899322 /* imKStoUCS.c in Sources */,
   4.189  				04BD03F512E6671800899322 /* SDL_x11clipboard.c in Sources */,
   4.190  				04BD03F712E6671800899322 /* SDL_x11dyn.c in Sources */,
   4.191 @@ -2570,6 +2568,8 @@
   4.192  				041B2CA912FA0D680087D585 /* SDL_renderer_gl.c in Sources */,
   4.193  				041B2CAB12FA0D680087D585 /* SDL_render.c in Sources */,
   4.194  				041B2CAD12FA0D680087D585 /* SDL_renderer_sw.c in Sources */,
   4.195 +				04409B9612FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */,
   4.196 +				04409B9812FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */,
   4.197  			);
   4.198  			runOnlyForDeploymentPostprocessing = 0;
   4.199  		};
     5.1 --- a/include/SDL_pixels.h	Wed Feb 02 22:55:12 2011 -0800
     5.2 +++ b/include/SDL_pixels.h	Thu Feb 03 00:19:40 2011 -0800
     5.3 @@ -122,18 +122,26 @@
     5.4  #define SDL_PIXELORDER(X)	(((X) >> 20) & 0x0F)
     5.5  #define SDL_PIXELLAYOUT(X)	(((X) >> 16) & 0x0F)
     5.6  #define SDL_BITSPERPIXEL(X)	(((X) >> 8) & 0xFF)
     5.7 -#define SDL_BYTESPERPIXEL(X)	(((X) >> 0) & 0xFF)
     5.8 +#define SDL_BYTESPERPIXEL(X) \
     5.9 +    (SDL_ISPIXELFORMAT_FOURCC(X) ? \
    5.10 +        ((((X) == SDL_PIXELFORMAT_YV12) || \
    5.11 +          ((X) == SDL_PIXELFORMAT_IYUV) || \
    5.12 +          ((X) == SDL_PIXELFORMAT_YUY2) || \
    5.13 +          ((X) == SDL_PIXELFORMAT_UYVY) || \
    5.14 +          ((X) == SDL_PIXELFORMAT_YVYU)) ? 2 : 1) : (((X) >> 0) & 0xFF))
    5.15  
    5.16  #define SDL_ISPIXELFORMAT_INDEXED(format)   \
    5.17 -    ((SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX1) || \
    5.18 -     (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX4) || \
    5.19 -     (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX8))
    5.20 +    (!SDL_ISPIXELFORMAT_FOURCC(format) && \
    5.21 +     ((SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX1) || \
    5.22 +      (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX4) || \
    5.23 +      (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX8)))
    5.24  
    5.25  #define SDL_ISPIXELFORMAT_ALPHA(format)   \
    5.26 -    ((SDL_PIXELORDER(format) == SDL_PACKEDORDER_ARGB) || \
    5.27 -     (SDL_PIXELORDER(format) == SDL_PACKEDORDER_RGBA) || \
    5.28 -     (SDL_PIXELORDER(format) == SDL_PACKEDORDER_ABGR) || \
    5.29 -     (SDL_PIXELORDER(format) == SDL_PACKEDORDER_BGRA))
    5.30 +    (!SDL_ISPIXELFORMAT_FOURCC(format) && \
    5.31 +     ((SDL_PIXELORDER(format) == SDL_PACKEDORDER_ARGB) || \
    5.32 +      (SDL_PIXELORDER(format) == SDL_PACKEDORDER_RGBA) || \
    5.33 +      (SDL_PIXELORDER(format) == SDL_PACKEDORDER_ABGR) || \
    5.34 +      (SDL_PIXELORDER(format) == SDL_PACKEDORDER_BGRA)))
    5.35  
    5.36  #define SDL_ISPIXELFORMAT_FOURCC(format)    \
    5.37      ((format) && !((format) & 0x80000000))
     6.1 --- a/include/SDL_rect.h	Wed Feb 02 22:55:12 2011 -0800
     6.2 +++ b/include/SDL_rect.h	Thu Feb 03 00:19:40 2011 -0800
     6.3 @@ -70,25 +70,6 @@
     6.4  } SDL_Rect;
     6.5  
     6.6  /**
     6.7 - *  \brief A structure used to track dirty rectangles
     6.8 - *  
     6.9 - *  \sa SDL_AddDirtyRect
    6.10 - *  \sa SDL_ClearDirtyRects
    6.11 - *  \sa SDL_FreeDirtyRects
    6.12 - */
    6.13 -typedef struct SDL_DirtyRect
    6.14 -{
    6.15 -    SDL_Rect rect;
    6.16 -    struct SDL_DirtyRect *next;
    6.17 -} SDL_DirtyRect;
    6.18 -
    6.19 -typedef struct SDL_DirtyRectList
    6.20 -{
    6.21 -    SDL_DirtyRect *list;
    6.22 -    SDL_DirtyRect *free;
    6.23 -} SDL_DirtyRectList;
    6.24 -
    6.25 -/**
    6.26   *  \brief Returns true if the rectangle has no area.
    6.27   */
    6.28  #define SDL_RectEmpty(X)    (((X)->w <= 0) || ((X)->h <= 0))
    6.29 @@ -143,22 +124,6 @@
    6.30                                                            int *Y1, int *X2,
    6.31                                                            int *Y2);
    6.32  
    6.33 -/**
    6.34 - *  \brief Add a rectangle to a dirty rectangle list
    6.35 - */
    6.36 -extern DECLSPEC void SDLCALL SDL_AddDirtyRect(SDL_DirtyRectList * list, const SDL_Rect * rect);
    6.37 -
    6.38 -/**
    6.39 - *  \brief Remove all rectangles associated with a dirty rectangle list
    6.40 - */
    6.41 -extern DECLSPEC void SDLCALL SDL_ClearDirtyRects(SDL_DirtyRectList * list);
    6.42 -
    6.43 -/**
    6.44 - *  \brief Free memory associated with a dirty rectangle list
    6.45 - */
    6.46 -extern DECLSPEC void SDLCALL SDL_FreeDirtyRects(SDL_DirtyRectList * list);
    6.47 -
    6.48 -
    6.49  /* Ends C function definitions when using C++ */
    6.50  #ifdef __cplusplus
    6.51  /* *INDENT-OFF* */
     7.1 --- a/include/SDL_render.h	Wed Feb 02 22:55:12 2011 -0800
     7.2 +++ b/include/SDL_render.h	Thu Feb 03 00:19:40 2011 -0800
     7.3 @@ -61,7 +61,7 @@
     7.4      const char *name;           /**< The name of the renderer */
     7.5      Uint32 flags;               /**< Supported ::SDL_RendererFlags */
     7.6      Uint32 num_texture_formats; /**< The number of available texture formats */
     7.7 -    Uint32 texture_formats[50]; /**< The available texture formats */
     7.8 +    Uint32 texture_formats[16]; /**< The available texture formats */
     7.9      int max_texture_width;      /**< The maximimum texture width */
    7.10      int max_texture_height;     /**< The maximimum texture height */
    7.11  } SDL_RendererInfo;
    7.12 @@ -204,22 +204,6 @@
    7.13                                               int *w, int *h);
    7.14  
    7.15  /**
    7.16 - *  \brief Query the pixels of a texture, if the texture does not need to be 
    7.17 - *         locked for pixel access.
    7.18 - *  
    7.19 - *  \param texture A texture to be queried, which was created with 
    7.20 - *                   ::SDL_TEXTUREACCESS_STREAMING.
    7.21 - *  \param pixels    A pointer filled with a pointer to the pixels for the 
    7.22 - *                   texture.
    7.23 - *  \param pitch     A pointer filled in with the pitch of the pixel data.
    7.24 - *  
    7.25 - *  \return 0 on success, or -1 if the texture is not valid, or must be locked 
    7.26 - *          for pixel access.
    7.27 - */
    7.28 -extern DECLSPEC int SDLCALL SDL_QueryTexturePixels(SDL_Texture * texture,
    7.29 -                                                   void **pixels, int *pitch);
    7.30 -
    7.31 -/**
    7.32   *  \brief Set an additional color value used in render copy operations.
    7.33   *  
    7.34   *  \param texture The texture to update.
    7.35 @@ -299,7 +283,7 @@
    7.36  /**
    7.37   *  \brief Get the blend mode used for texture copy operations.
    7.38   *  
    7.39 - *  \param texture The texture to query.
    7.40 + *  \param texture   The texture to query.
    7.41   *  \param blendMode A pointer filled in with the current blend mode.
    7.42   *  
    7.43   *  \return 0 on success, or -1 if the texture is not valid.
    7.44 @@ -312,7 +296,7 @@
    7.45  /**
    7.46   *  \brief Update the given texture rectangle with new pixel data.
    7.47   *  
    7.48 - *  \param texture The texture to update
    7.49 + *  \param texture   The texture to update
    7.50   *  \param rect      A pointer to the rectangle of pixels to update, or NULL to 
    7.51   *                   update the entire texture.
    7.52   *  \param pixels    The raw pixel data.
    7.53 @@ -329,51 +313,30 @@
    7.54  /**
    7.55   *  \brief Lock a portion of the texture for pixel access.
    7.56   *  
    7.57 - *  \param texture The texture to lock for access, which was created with 
    7.58 + *  \param texture   The texture to lock for access, which was created with 
    7.59   *                   ::SDL_TEXTUREACCESS_STREAMING.
    7.60   *  \param rect      A pointer to the rectangle to lock for access. If the rect 
    7.61   *                   is NULL, the entire texture will be locked.
    7.62 - *  \param markDirty If this is nonzero, the locked area will be marked dirty 
    7.63 - *                   when the texture is unlocked.
    7.64   *  \param pixels    This is filled in with a pointer to the locked pixels, 
    7.65   *                   appropriately offset by the locked area.
    7.66   *  \param pitch     This is filled in with the pitch of the locked pixels.
    7.67   *  
    7.68 - *  \return 0 on success, or -1 if the texture is not valid or was created with 
    7.69 - *          ::SDL_TEXTUREACCESS_STATIC.
    7.70 + *  \return 0 on success, or -1 if the texture is not valid or was not created with ::SDL_TEXTUREACCESS_STREAMING.
    7.71   *  
    7.72 - *  \sa SDL_DirtyTexture()
    7.73   *  \sa SDL_UnlockTexture()
    7.74   */
    7.75  extern DECLSPEC int SDLCALL SDL_LockTexture(SDL_Texture * texture,
    7.76                                              const SDL_Rect * rect,
    7.77 -                                            int markDirty, void **pixels,
    7.78 -                                            int *pitch);
    7.79 +                                            void **pixels, int *pitch);
    7.80  
    7.81  /**
    7.82 - *  \brief Unlock a texture, uploading the changes to renderer memory, if needed.
    7.83 + *  \brief Unlock a texture, uploading the changes to video memory, if needed.
    7.84   *  
    7.85   *  \sa SDL_LockTexture()
    7.86 - *  \sa SDL_DirtyTexture()
    7.87   */
    7.88  extern DECLSPEC void SDLCALL SDL_UnlockTexture(SDL_Texture * texture);
    7.89  
    7.90  /**
    7.91 - *  \brief Mark the specified rectangles of the texture as dirty.
    7.92 - *  
    7.93 - *  \param texture The texture to mark dirty, which was created with 
    7.94 - *                   ::SDL_TEXTUREACCESS_STREAMING.
    7.95 - *  \param numrects  The number of rectangles pointed to by rects.
    7.96 - *  \param rects     The pointer to an array of dirty rectangles.
    7.97 - *  
    7.98 - *  \sa SDL_LockTexture()
    7.99 - *  \sa SDL_UnlockTexture()
   7.100 - */
   7.101 -extern DECLSPEC void SDLCALL SDL_DirtyTexture(SDL_Texture * texture,
   7.102 -                                              int numrects,
   7.103 -                                              const SDL_Rect * rects);
   7.104 -
   7.105 -/**
   7.106   *  \brief Set the color used for drawing operations (Fill and Line).
   7.107   *  
   7.108   *  \param r The red value used to draw on the rendering target.
     8.1 --- a/src/SDL_compat.c	Wed Feb 02 22:55:12 2011 -0800
     8.2 +++ b/src/SDL_compat.c	Thu Feb 03 00:19:40 2011 -0800
     8.3 @@ -28,7 +28,6 @@
     8.4  
     8.5  #include "video/SDL_sysvideo.h"
     8.6  #include "video/SDL_pixels_c.h"
     8.7 -#include "video/SDL_yuv_sw_c.h"
     8.8  
     8.9  static SDL_Window *SDL_VideoWindow = NULL;
    8.10  static SDL_Renderer *SDL_VideoRenderer = NULL;
    8.11 @@ -344,13 +343,10 @@
    8.12  static SDL_Surface *
    8.13  CreateVideoSurface(SDL_Texture * texture)
    8.14  {
    8.15 -    SDL_Surface *surface;
    8.16      Uint32 format;
    8.17      int w, h;
    8.18      int bpp;
    8.19      Uint32 Rmask, Gmask, Bmask, Amask;
    8.20 -    void *pixels;
    8.21 -    int pitch;
    8.22  
    8.23      if (SDL_QueryTexture(texture, &format, NULL, &w, &h) < 0) {
    8.24          return NULL;
    8.25 @@ -362,15 +358,7 @@
    8.26          return NULL;
    8.27      }
    8.28  
    8.29 -    if (SDL_QueryTexturePixels(texture, &pixels, &pitch) == 0) {
    8.30 -        surface =
    8.31 -            SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask, Gmask,
    8.32 -                                     Bmask, Amask);
    8.33 -    } else {
    8.34 -        surface =
    8.35 -            SDL_CreateRGBSurface(0, w, h, bpp, Rmask, Gmask, Bmask, Amask);
    8.36 -    }
    8.37 -    return surface;
    8.38 +    return SDL_CreateRGBSurface(0, w, h, bpp, Rmask, Gmask, Bmask, Amask);
    8.39  }
    8.40  
    8.41  static void
    8.42 @@ -412,8 +400,6 @@
    8.43      int w, h;
    8.44      Uint32 format;
    8.45      int access;
    8.46 -    void *pixels;
    8.47 -    int pitch;
    8.48  
    8.49      /* We can't resize something we don't have... */
    8.50      if (!SDL_VideoWindow) {
    8.51 @@ -454,15 +440,10 @@
    8.52  
    8.53      SDL_VideoSurface->w = width;
    8.54      SDL_VideoSurface->h = height;
    8.55 -    if (SDL_QueryTexturePixels(SDL_VideoTexture, &pixels, &pitch) == 0) {
    8.56 -        SDL_VideoSurface->pixels = pixels;
    8.57 -        SDL_VideoSurface->pitch = pitch;
    8.58 -    } else {
    8.59 -        SDL_CalculatePitch(SDL_VideoSurface);
    8.60 -        SDL_VideoSurface->pixels =
    8.61 -            SDL_realloc(SDL_VideoSurface->pixels,
    8.62 -                        SDL_VideoSurface->h * SDL_VideoSurface->pitch);
    8.63 -    }
    8.64 +    SDL_CalculatePitch(SDL_VideoSurface);
    8.65 +    SDL_VideoSurface->pixels =
    8.66 +        SDL_realloc(SDL_VideoSurface->pixels,
    8.67 +                    SDL_VideoSurface->h * SDL_VideoSurface->pitch);
    8.68      SDL_SetClipRect(SDL_VideoSurface, NULL);
    8.69      SDL_InvalidateMap(SDL_VideoSurface->map);
    8.70  
    8.71 @@ -830,20 +811,15 @@
    8.72          screen = SDL_VideoSurface;
    8.73      }
    8.74      if (screen == SDL_VideoSurface) {
    8.75 -        if (screen->flags & SDL_PREALLOC) {
    8.76 -            /* The surface memory is maintained by the renderer */
    8.77 -            SDL_DirtyTexture(SDL_VideoTexture, numrects, rects);
    8.78 -        } else {
    8.79 -            /* The surface memory needs to be copied to texture */
    8.80 -            int pitch = screen->pitch;
    8.81 -            int psize = screen->format->BytesPerPixel;
    8.82 -            for (i = 0; i < numrects; ++i) {
    8.83 -                const SDL_Rect *rect = &rects[i];
    8.84 -                void *pixels =
    8.85 -                    (Uint8 *) screen->pixels + rect->y * pitch +
    8.86 -                    rect->x * psize;
    8.87 -                SDL_UpdateTexture(SDL_VideoTexture, rect, pixels, pitch);
    8.88 -            }
    8.89 +        /* The surface memory needs to be copied to texture */
    8.90 +        int pitch = screen->pitch;
    8.91 +        int psize = screen->format->BytesPerPixel;
    8.92 +        for (i = 0; i < numrects; ++i) {
    8.93 +            const SDL_Rect *rect = &rects[i];
    8.94 +            void *pixels =
    8.95 +                (Uint8 *) screen->pixels + rect->y * pitch +
    8.96 +                rect->x * psize;
    8.97 +            SDL_UpdateTexture(SDL_VideoTexture, rect, pixels, pitch);
    8.98          }
    8.99          rect.x = 0;
   8.100          rect.y = 0;
   8.101 @@ -1459,8 +1435,6 @@
   8.102      Uint16 pitches[3];
   8.103      Uint8 *planes[3];
   8.104  
   8.105 -    SDL_SW_YUVTexture *sw;
   8.106 -
   8.107      SDL_Texture *texture;
   8.108      Uint32 texture_format;
   8.109  };
   8.110 @@ -1545,24 +1519,6 @@
   8.111      overlay->hwdata->texture =
   8.112          SDL_CreateTexture(SDL_VideoRenderer, texture_format,
   8.113                            SDL_TEXTUREACCESS_STREAMING, w, h);
   8.114 -    if (overlay->hwdata->texture) {
   8.115 -        overlay->hwdata->sw = NULL;
   8.116 -    } else {
   8.117 -        SDL_DisplayMode current_mode;
   8.118 -
   8.119 -        overlay->hwdata->sw = SDL_SW_CreateYUVTexture(texture_format, w, h);
   8.120 -        if (!overlay->hwdata->sw) {
   8.121 -            SDL_FreeYUVOverlay(overlay);
   8.122 -            return NULL;
   8.123 -        }
   8.124 -
   8.125 -        /* Create a supported RGB format texture for display */
   8.126 -        SDL_GetCurrentDisplayMode(&current_mode);
   8.127 -        texture_format = current_mode.format;
   8.128 -        overlay->hwdata->texture =
   8.129 -            SDL_CreateTexture(SDL_VideoRenderer, texture_format,
   8.130 -                              SDL_TEXTUREACCESS_STREAMING, w, h);
   8.131 -    }
   8.132      if (!overlay->hwdata->texture) {
   8.133          SDL_FreeYUVOverlay(overlay);
   8.134          return NULL;
   8.135 @@ -1582,17 +1538,8 @@
   8.136          SDL_SetError("Passed a NULL overlay");
   8.137          return -1;
   8.138      }
   8.139 -    if (overlay->hwdata->sw) {
   8.140 -        if (SDL_SW_QueryYUVTexturePixels(overlay->hwdata->sw, &pixels, &pitch)
   8.141 -            < 0) {
   8.142 -            return -1;
   8.143 -        }
   8.144 -    } else {
   8.145 -        if (SDL_LockTexture
   8.146 -            (overlay->hwdata->texture, NULL, 1, &pixels, &pitch)
   8.147 -            < 0) {
   8.148 -            return -1;
   8.149 -        }
   8.150 +    if (SDL_LockTexture(overlay->hwdata->texture, NULL, &pixels, &pitch) < 0) {
   8.151 +        return -1;
   8.152      }
   8.153      overlay->pixels[0] = (Uint8 *) pixels;
   8.154      overlay->pitches[0] = pitch;
   8.155 @@ -1620,25 +1567,7 @@
   8.156      if (!overlay) {
   8.157          return;
   8.158      }
   8.159 -    if (overlay->hwdata->sw) {
   8.160 -        void *pixels;
   8.161 -        int pitch;
   8.162 -        if (SDL_LockTexture
   8.163 -            (overlay->hwdata->texture, NULL, 1, &pixels, &pitch) == 0) {
   8.164 -            SDL_Rect srcrect;
   8.165 -
   8.166 -            srcrect.x = 0;
   8.167 -            srcrect.y = 0;
   8.168 -            srcrect.w = overlay->w;
   8.169 -            srcrect.h = overlay->h;
   8.170 -            SDL_SW_CopyYUVToRGB(overlay->hwdata->sw, &srcrect,
   8.171 -                                overlay->hwdata->texture_format,
   8.172 -                                overlay->w, overlay->h, pixels, pitch);
   8.173 -            SDL_UnlockTexture(overlay->hwdata->texture);
   8.174 -        }
   8.175 -    } else {
   8.176 -        SDL_UnlockTexture(overlay->hwdata->texture);
   8.177 -    }
   8.178 +    SDL_UnlockTexture(overlay->hwdata->texture);
   8.179  }
   8.180  
   8.181  int
     9.1 --- a/src/render/SDL_render.c	Wed Feb 02 22:55:12 2011 -0800
     9.2 +++ b/src/render/SDL_render.c	Thu Feb 03 00:19:40 2011 -0800
     9.3 @@ -152,6 +152,34 @@
     9.4      return 0;
     9.5  }
     9.6  
     9.7 +static SDL_bool
     9.8 +IsSupportedFormat(SDL_Renderer * renderer, Uint32 format)
     9.9 +{
    9.10 +    Uint32 i;
    9.11 +
    9.12 +    for (i = 0; i < renderer->info.num_texture_formats; ++i) {
    9.13 +        if (renderer->info.texture_formats[i] == format) {
    9.14 +            return SDL_TRUE;
    9.15 +        }
    9.16 +    }
    9.17 +    return SDL_FALSE;
    9.18 +}
    9.19 +
    9.20 +static Uint32
    9.21 +GetClosestSupportedFormat(SDL_Renderer * renderer, Uint32 format)
    9.22 +{
    9.23 +    Uint32 i;
    9.24 +    SDL_bool hasAlpha = SDL_ISPIXELFORMAT_ALPHA(format);
    9.25 +
    9.26 +    /* We just want to match the first format that has the same channels */
    9.27 +    for (i = 0; i < renderer->info.num_texture_formats; ++i) {
    9.28 +        if (SDL_ISPIXELFORMAT_ALPHA(renderer->info.texture_formats[i]) == hasAlpha) {
    9.29 +            return renderer->info.texture_formats[i];
    9.30 +        }
    9.31 +    }
    9.32 +    return renderer->info.texture_formats[0];
    9.33 +}
    9.34 +
    9.35  SDL_Texture *
    9.36  SDL_CreateTexture(SDL_Renderer * renderer, Uint32 format, int access, int w, int h)
    9.37  {
    9.38 @@ -159,14 +187,18 @@
    9.39  
    9.40      CHECK_RENDERER_MAGIC(renderer, NULL);
    9.41  
    9.42 +    if (SDL_ISPIXELFORMAT_INDEXED(format)) {
    9.43 +        SDL_SetError("Palettized textures are not supported");
    9.44 +        return NULL;
    9.45 +    }
    9.46      if (w <= 0 || h <= 0) {
    9.47          SDL_SetError("Texture dimensions can't be 0");
    9.48 -        return 0;
    9.49 +        return NULL;
    9.50      }
    9.51      texture = (SDL_Texture *) SDL_calloc(1, sizeof(*texture));
    9.52      if (!texture) {
    9.53          SDL_OutOfMemory();
    9.54 -        return 0;
    9.55 +        return NULL;
    9.56      }
    9.57      texture->magic = &texture_magic;
    9.58      texture->format = format;
    9.59 @@ -184,9 +216,35 @@
    9.60      }
    9.61      renderer->textures = texture;
    9.62  
    9.63 -    if (renderer->CreateTexture(renderer, texture) < 0) {
    9.64 -        SDL_DestroyTexture(texture);
    9.65 -        return 0;
    9.66 +    if (IsSupportedFormat(renderer, format)) {
    9.67 +        if (renderer->CreateTexture(renderer, texture) < 0) {
    9.68 +            SDL_DestroyTexture(texture);
    9.69 +            return 0;
    9.70 +        }
    9.71 +    } else {
    9.72 +        texture->native = SDL_CreateTexture(renderer,
    9.73 +                                GetClosestSupportedFormat(renderer, format),
    9.74 +                                access, w, h);
    9.75 +        if (!texture->native) {
    9.76 +            SDL_DestroyTexture(texture);
    9.77 +            return NULL;
    9.78 +        }
    9.79 +
    9.80 +        if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
    9.81 +            texture->yuv = SDL_SW_CreateYUVTexture(format, w, h);
    9.82 +            if (!texture->yuv) {
    9.83 +                SDL_DestroyTexture(texture);
    9.84 +                return NULL;
    9.85 +            }
    9.86 +        } else if (access == SDL_TEXTUREACCESS_STREAMING) {
    9.87 +            /* The pitch is 4 byte aligned */
    9.88 +            texture->pitch = (((w * SDL_BYTESPERPIXEL(format)) + 3) & ~3);
    9.89 +            texture->pixels = SDL_malloc(texture->pitch * h);
    9.90 +            if (!texture->pixels) {
    9.91 +                SDL_DestroyTexture(texture);
    9.92 +                return NULL;
    9.93 +            }
    9.94 +        }
    9.95      }
    9.96      return texture;
    9.97  }
    9.98 @@ -501,21 +559,6 @@
    9.99  }
   9.100  
   9.101  int
   9.102 -SDL_QueryTexturePixels(SDL_Texture * texture, void **pixels, int *pitch)
   9.103 -{
   9.104 -    SDL_Renderer *renderer;
   9.105 -
   9.106 -    CHECK_TEXTURE_MAGIC(texture, -1);
   9.107 -
   9.108 -    renderer = texture->renderer;
   9.109 -    if (!renderer->QueryTexturePixels) {
   9.110 -        SDL_Unsupported();
   9.111 -        return -1;
   9.112 -    }
   9.113 -    return renderer->QueryTexturePixels(renderer, texture, pixels, pitch);
   9.114 -}
   9.115 -
   9.116 -int
   9.117  SDL_SetTextureColorMod(SDL_Texture * texture, Uint8 r, Uint8 g, Uint8 b)
   9.118  {
   9.119      SDL_Renderer *renderer;
   9.120 @@ -531,7 +574,9 @@
   9.121      texture->r = r;
   9.122      texture->g = g;
   9.123      texture->b = b;
   9.124 -    if (renderer->SetTextureColorMod) {
   9.125 +    if (texture->native) {
   9.126 +        return SDL_SetTextureColorMod(texture->native, r, g, b);
   9.127 +    } else if (renderer->SetTextureColorMod) {
   9.128          return renderer->SetTextureColorMod(renderer, texture);
   9.129      } else {
   9.130          return 0;
   9.131 @@ -573,7 +618,9 @@
   9.132          texture->modMode &= ~SDL_TEXTUREMODULATE_ALPHA;
   9.133      }
   9.134      texture->a = alpha;
   9.135 -    if (renderer->SetTextureAlphaMod) {
   9.136 +    if (texture->native) {
   9.137 +        return SDL_SetTextureAlphaMod(texture->native, alpha);
   9.138 +    } else if (renderer->SetTextureAlphaMod) {
   9.139          return renderer->SetTextureAlphaMod(renderer, texture);
   9.140      } else {
   9.141          return 0;
   9.142 @@ -600,7 +647,9 @@
   9.143  
   9.144      renderer = texture->renderer;
   9.145      texture->blendMode = blendMode;
   9.146 -    if (renderer->SetTextureBlendMode) {
   9.147 +    if (texture->native) {
   9.148 +        return SDL_SetTextureBlendMode(texture, blendMode);
   9.149 +    } else if (renderer->SetTextureBlendMode) {
   9.150          return renderer->SetTextureBlendMode(renderer, texture);
   9.151      } else {
   9.152          return 0;
   9.153 @@ -618,6 +667,91 @@
   9.154      return 0;
   9.155  }
   9.156  
   9.157 +static int
   9.158 +SDL_UpdateTextureYUV(SDL_Texture * texture, const SDL_Rect * rect,
   9.159 +                     const void *pixels, int pitch)
   9.160 +{
   9.161 +    SDL_Texture *native = texture->native;
   9.162 +    SDL_Rect full_rect;
   9.163 +
   9.164 +    if (SDL_SW_UpdateYUVTexture(texture->yuv, rect, pixels, pitch) < 0) {
   9.165 +        return -1;
   9.166 +    }
   9.167 +
   9.168 +    full_rect.x = 0;
   9.169 +    full_rect.y = 0;
   9.170 +    full_rect.w = texture->w;
   9.171 +    full_rect.h = texture->h;
   9.172 +    rect = &full_rect;
   9.173 +
   9.174 +    if (texture->access == SDL_TEXTUREACCESS_STREAMING) {
   9.175 +        /* We can lock the texture and copy to it */
   9.176 +        void *native_pixels;
   9.177 +        int native_pitch;
   9.178 +
   9.179 +        if (SDL_LockTexture(native, rect, &native_pixels, &native_pitch) < 0) {
   9.180 +            return -1;
   9.181 +        }
   9.182 +        SDL_SW_CopyYUVToRGB(texture->yuv, rect, native->format,
   9.183 +                            rect->w, rect->h, native_pixels, native_pitch);
   9.184 +        SDL_UnlockTexture(native);
   9.185 +    } else {
   9.186 +        /* Use a temporary buffer for updating */
   9.187 +        void *temp_pixels;
   9.188 +        int temp_pitch;
   9.189 +
   9.190 +        temp_pitch = (((rect->w * SDL_BYTESPERPIXEL(native->format)) + 3) & ~3);
   9.191 +        temp_pixels = SDL_malloc(rect->h * temp_pitch);
   9.192 +        if (!temp_pixels) {
   9.193 +            SDL_OutOfMemory();
   9.194 +            return -1;
   9.195 +        }
   9.196 +        SDL_SW_CopyYUVToRGB(texture->yuv, rect, native->format,
   9.197 +                            rect->w, rect->h, temp_pixels, temp_pitch);
   9.198 +        SDL_UpdateTexture(native, rect, temp_pixels, temp_pitch);
   9.199 +        SDL_free(temp_pixels);
   9.200 +    }
   9.201 +    return 0;
   9.202 +}
   9.203 +
   9.204 +static int
   9.205 +SDL_UpdateTextureNative(SDL_Texture * texture, const SDL_Rect * rect,
   9.206 +                        const void *pixels, int pitch)
   9.207 +{
   9.208 +    SDL_Texture *native = texture->native;
   9.209 +
   9.210 +    if (texture->access == SDL_TEXTUREACCESS_STREAMING) {
   9.211 +        /* We can lock the texture and copy to it */
   9.212 +        void *native_pixels;
   9.213 +        int native_pitch;
   9.214 +
   9.215 +        if (SDL_LockTexture(native, rect, &native_pixels, &native_pitch) < 0) {
   9.216 +            return -1;
   9.217 +        }
   9.218 +        SDL_ConvertPixels(rect->w, rect->h,
   9.219 +                          texture->format, pixels, pitch,
   9.220 +                          native->format, native_pixels, native_pitch);
   9.221 +        SDL_UnlockTexture(native);
   9.222 +    } else {
   9.223 +        /* Use a temporary buffer for updating */
   9.224 +        void *temp_pixels;
   9.225 +        int temp_pitch;
   9.226 +
   9.227 +        temp_pitch = (((rect->w * SDL_BYTESPERPIXEL(native->format)) + 3) & ~3);
   9.228 +        temp_pixels = SDL_malloc(rect->h * temp_pitch);
   9.229 +        if (!temp_pixels) {
   9.230 +            SDL_OutOfMemory();
   9.231 +            return -1;
   9.232 +        }
   9.233 +        SDL_ConvertPixels(rect->w, rect->h,
   9.234 +                          texture->format, pixels, pitch,
   9.235 +                          native->format, temp_pixels, temp_pitch);
   9.236 +        SDL_UpdateTexture(native, rect, temp_pixels, temp_pitch);
   9.237 +        SDL_free(temp_pixels);
   9.238 +    }
   9.239 +    return 0;
   9.240 +}
   9.241 +
   9.242  int
   9.243  SDL_UpdateTexture(SDL_Texture * texture, const SDL_Rect * rect,
   9.244                    const void *pixels, int pitch)
   9.245 @@ -627,11 +761,6 @@
   9.246  
   9.247      CHECK_TEXTURE_MAGIC(texture, -1);
   9.248  
   9.249 -    renderer = texture->renderer;
   9.250 -    if (!renderer->UpdateTexture) {
   9.251 -        SDL_Unsupported();
   9.252 -        return -1;
   9.253 -    }
   9.254      if (!rect) {
   9.255          full_rect.x = 0;
   9.256          full_rect.y = 0;
   9.257 @@ -639,11 +768,38 @@
   9.258          full_rect.h = texture->h;
   9.259          rect = &full_rect;
   9.260      }
   9.261 -    return renderer->UpdateTexture(renderer, texture, rect, pixels, pitch);
   9.262 +
   9.263 +    if (texture->yuv) {
   9.264 +        return SDL_UpdateTextureYUV(texture, rect, pixels, pitch);
   9.265 +    } else if (texture->native) {
   9.266 +        return SDL_UpdateTextureNative(texture, rect, pixels, pitch);
   9.267 +    } else {
   9.268 +        renderer = texture->renderer;
   9.269 +        return renderer->UpdateTexture(renderer, texture, rect, pixels, pitch);
   9.270 +    }
   9.271 +}
   9.272 +
   9.273 +static int
   9.274 +SDL_LockTextureYUV(SDL_Texture * texture, const SDL_Rect * rect,
   9.275 +                   void **pixels, int *pitch)
   9.276 +{
   9.277 +    return SDL_SW_LockYUVTexture(texture->yuv, rect, pixels, pitch);
   9.278 +}
   9.279 +
   9.280 +static int
   9.281 +SDL_LockTextureNative(SDL_Texture * texture, const SDL_Rect * rect,
   9.282 +                      void **pixels, int *pitch)
   9.283 +{
   9.284 +    texture->locked_rect = *rect;
   9.285 +    *pixels = (void *) ((Uint8 *) texture->pixels +
   9.286 +                        rect->y * texture->pitch +
   9.287 +                        rect->x * SDL_BYTESPERPIXEL(texture->format));
   9.288 +    *pitch = texture->pitch;
   9.289 +    return 0;
   9.290  }
   9.291  
   9.292  int
   9.293 -SDL_LockTexture(SDL_Texture * texture, const SDL_Rect * rect, int markDirty,
   9.294 +SDL_LockTexture(SDL_Texture * texture, const SDL_Rect * rect,
   9.295                  void **pixels, int *pitch)
   9.296  {
   9.297      SDL_Renderer *renderer;
   9.298 @@ -655,11 +811,7 @@
   9.299          SDL_SetError("SDL_LockTexture(): texture must be streaming");
   9.300          return -1;
   9.301      }
   9.302 -    renderer = texture->renderer;
   9.303 -    if (!renderer->LockTexture) {
   9.304 -        SDL_Unsupported();
   9.305 -        return -1;
   9.306 -    }
   9.307 +
   9.308      if (!rect) {
   9.309          full_rect.x = 0;
   9.310          full_rect.y = 0;
   9.311 @@ -667,8 +819,57 @@
   9.312          full_rect.h = texture->h;
   9.313          rect = &full_rect;
   9.314      }
   9.315 -    return renderer->LockTexture(renderer, texture, rect, markDirty, pixels,
   9.316 -                                 pitch);
   9.317 +
   9.318 +    if (texture->yuv) {
   9.319 +        return SDL_LockTextureYUV(texture, rect, pixels, pitch);
   9.320 +    } else if (texture->native) {
   9.321 +        return SDL_LockTextureNative(texture, rect, pixels, pitch);
   9.322 +    } else {
   9.323 +        renderer = texture->renderer;
   9.324 +        return renderer->LockTexture(renderer, texture, rect, pixels, pitch);
   9.325 +    }
   9.326 +}
   9.327 +
   9.328 +static void
   9.329 +SDL_UnlockTextureYUV(SDL_Texture * texture)
   9.330 +{
   9.331 +    SDL_Texture *native = texture->native;
   9.332 +    void *native_pixels;
   9.333 +    int native_pitch;
   9.334 +    SDL_Rect rect;
   9.335 +
   9.336 +    rect.x = 0;
   9.337 +    rect.y = 0;
   9.338 +    rect.w = texture->w;
   9.339 +    rect.h = texture->h;
   9.340 +
   9.341 +    if (SDL_LockTexture(native, &rect, &native_pixels, &native_pitch) < 0) {
   9.342 +        return;
   9.343 +    }
   9.344 +    SDL_SW_CopyYUVToRGB(texture->yuv, &rect, native->format,
   9.345 +                        rect.w, rect.h, native_pixels, native_pitch);
   9.346 +    SDL_UnlockTexture(native);
   9.347 +}
   9.348 +
   9.349 +void
   9.350 +SDL_UnlockTextureNative(SDL_Texture * texture)
   9.351 +{
   9.352 +    SDL_Texture *native = texture->native;
   9.353 +    void *native_pixels;
   9.354 +    int native_pitch;
   9.355 +    const SDL_Rect *rect = &texture->locked_rect;
   9.356 +    const void* pixels = (void *) ((Uint8 *) texture->pixels +
   9.357 +                        rect->y * texture->pitch +
   9.358 +                        rect->x * SDL_BYTESPERPIXEL(texture->format));
   9.359 +    int pitch = texture->pitch;
   9.360 +
   9.361 +    if (SDL_LockTexture(native, rect, &native_pixels, &native_pitch) < 0) {
   9.362 +        return;
   9.363 +    }
   9.364 +    SDL_ConvertPixels(rect->w, rect->h,
   9.365 +                      texture->format, pixels, pitch,
   9.366 +                      native->format, native_pixels, native_pitch);
   9.367 +    SDL_UnlockTexture(native);
   9.368  }
   9.369  
   9.370  void
   9.371 @@ -681,29 +882,14 @@
   9.372      if (texture->access != SDL_TEXTUREACCESS_STREAMING) {
   9.373          return;
   9.374      }
   9.375 -    renderer = texture->renderer;
   9.376 -    if (!renderer->UnlockTexture) {
   9.377 -        return;
   9.378 +    if (texture->yuv) {
   9.379 +        SDL_UnlockTextureYUV(texture);
   9.380 +    } else if (texture->native) {
   9.381 +        SDL_UnlockTextureNative(texture);
   9.382 +    } else {
   9.383 +        renderer = texture->renderer;
   9.384 +        renderer->UnlockTexture(renderer, texture);
   9.385      }
   9.386 -    renderer->UnlockTexture(renderer, texture);
   9.387 -}
   9.388 -
   9.389 -void
   9.390 -SDL_DirtyTexture(SDL_Texture * texture, int numrects,
   9.391 -                 const SDL_Rect * rects)
   9.392 -{
   9.393 -    SDL_Renderer *renderer;
   9.394 -
   9.395 -    CHECK_TEXTURE_MAGIC(texture, );
   9.396 -
   9.397 -    if (texture->access != SDL_TEXTUREACCESS_STREAMING) {
   9.398 -        return;
   9.399 -    }
   9.400 -    renderer = texture->renderer;
   9.401 -    if (!renderer->DirtyTexture) {
   9.402 -        return;
   9.403 -    }
   9.404 -    renderer->DirtyTexture(renderer, texture, numrects, rects);
   9.405  }
   9.406  
   9.407  int
   9.408 @@ -979,6 +1165,10 @@
   9.409          }
   9.410      }
   9.411  
   9.412 +    if (texture->native) {
   9.413 +        texture = texture->native;
   9.414 +    }
   9.415 +
   9.416      return renderer->RenderCopy(renderer, texture, &real_srcrect,
   9.417                                  &real_dstrect);
   9.418  }
   9.419 @@ -1087,6 +1277,16 @@
   9.420          renderer->textures = texture->next;
   9.421      }
   9.422  
   9.423 +    if (texture->native) {
   9.424 +        SDL_DestroyTexture(texture->native);
   9.425 +    }
   9.426 +    if (texture->yuv) {
   9.427 +        SDL_SW_DestroyYUVTexture(texture->yuv);
   9.428 +    }
   9.429 +    if (texture->pixels) {
   9.430 +        SDL_free(texture->pixels);
   9.431 +    }
   9.432 +
   9.433      renderer->DestroyTexture(renderer, texture);
   9.434      SDL_free(texture);
   9.435  }
    10.1 --- a/src/render/SDL_sysrender.h	Wed Feb 02 22:55:12 2011 -0800
    10.2 +++ b/src/render/SDL_sysrender.h	Thu Feb 03 00:19:40 2011 -0800
    10.3 @@ -26,6 +26,7 @@
    10.4  
    10.5  #include "SDL_render.h"
    10.6  #include "SDL_events.h"
    10.7 +#include "SDL_yuv_sw_c.h"
    10.8  
    10.9  /* The SDL 2D rendering system */
   10.10  
   10.11 @@ -45,6 +46,13 @@
   10.12  
   10.13      SDL_Renderer *renderer;
   10.14  
   10.15 +    /* Support for formats not supported directly by the renderer */
   10.16 +    SDL_Texture *native;
   10.17 +    SDL_SW_YUVTexture *yuv;
   10.18 +    void *pixels;
   10.19 +    int pitch;
   10.20 +    SDL_Rect locked_rect;
   10.21 +
   10.22      void *driverdata;           /**< Driver specific texture representation */
   10.23  
   10.24      SDL_Texture *prev;
   10.25 @@ -58,8 +66,6 @@
   10.26  
   10.27      void (*WindowEvent) (SDL_Renderer * renderer, const SDL_WindowEvent *event);
   10.28      int (*CreateTexture) (SDL_Renderer * renderer, SDL_Texture * texture);
   10.29 -    int (*QueryTexturePixels) (SDL_Renderer * renderer, SDL_Texture * texture,
   10.30 -                               void **pixels, int *pitch);
   10.31      int (*SetTextureColorMod) (SDL_Renderer * renderer,
   10.32                                 SDL_Texture * texture);
   10.33      int (*SetTextureAlphaMod) (SDL_Renderer * renderer,
   10.34 @@ -70,11 +76,8 @@
   10.35                            const SDL_Rect * rect, const void *pixels,
   10.36                            int pitch);
   10.37      int (*LockTexture) (SDL_Renderer * renderer, SDL_Texture * texture,
   10.38 -                        const SDL_Rect * rect, int markDirty, void **pixels,
   10.39 -                        int *pitch);
   10.40 +                        const SDL_Rect * rect, void **pixels, int *pitch);
   10.41      void (*UnlockTexture) (SDL_Renderer * renderer, SDL_Texture * texture);
   10.42 -    void (*DirtyTexture) (SDL_Renderer * renderer, SDL_Texture * texture,
   10.43 -                          int numrects, const SDL_Rect * rects);
   10.44      int (*RenderClear) (SDL_Renderer * renderer);
   10.45      int (*RenderDrawPoints) (SDL_Renderer * renderer, const SDL_Point * points,
   10.46                               int count);
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/src/render/SDL_yuv_mmx.c	Thu Feb 03 00:19:40 2011 -0800
    11.3 @@ -0,0 +1,432 @@
    11.4 +/*
    11.5 +    SDL - Simple DirectMedia Layer
    11.6 +    Copyright (C) 1997-2010 Sam Lantinga
    11.7 +
    11.8 +    This library is free software; you can redistribute it and/or
    11.9 +    modify it under the terms of the GNU Lesser General Public
   11.10 +    License as published by the Free Software Foundation; either
   11.11 +    version 2.1 of the License, or (at your option) any later version.
   11.12 +
   11.13 +    This library is distributed in the hope that it will be useful,
   11.14 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
   11.15 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   11.16 +    Lesser General Public License for more details.
   11.17 +
   11.18 +    You should have received a copy of the GNU Lesser General Public
   11.19 +    License along with this library; if not, write to the Free Software
   11.20 +    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   11.21 +
   11.22 +    Sam Lantinga
   11.23 +    slouken@libsdl.org
   11.24 +*/
   11.25 +#include "SDL_config.h"
   11.26 +
   11.27 +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   11.28 +
   11.29 +#include "SDL_stdinc.h"
   11.30 +
   11.31 +#include "mmx.h"
   11.32 +
   11.33 +/* *INDENT-OFF* */
   11.34 +
   11.35 +static mmx_t MMX_0080w    = { .ud = {0x00800080, 0x00800080} };
   11.36 +static mmx_t MMX_00FFw    = { .ud = {0x00ff00ff, 0x00ff00ff} };
   11.37 +static mmx_t MMX_FF00w    = { .ud = {0xff00ff00, 0xff00ff00} };
   11.38 +
   11.39 +static mmx_t MMX_Ycoeff   = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} };
   11.40 +
   11.41 +static mmx_t MMX_UbluRGB  = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} };
   11.42 +static mmx_t MMX_VredRGB  = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} };
   11.43 +static mmx_t MMX_UgrnRGB  = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} };
   11.44 +static mmx_t MMX_VgrnRGB  = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} };
   11.45 +
   11.46 +static mmx_t MMX_Ublu5x5  = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} };
   11.47 +static mmx_t MMX_Vred5x5  = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} };
   11.48 +static mmx_t MMX_Ugrn565  = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} };
   11.49 +static mmx_t MMX_Vgrn565  = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} };
   11.50 +
   11.51 +static mmx_t MMX_red565   = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} };
   11.52 +static mmx_t MMX_grn565   = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} };
   11.53 +
   11.54 +/**
   11.55 +   This MMX assembler is my first assembler/MMX program ever.
   11.56 +   Thus it maybe buggy.
   11.57 +   Send patches to:
   11.58 +   mvogt@rhrk.uni-kl.de
   11.59 +
   11.60 +   After it worked fine I have "obfuscated" the code a bit to have
   11.61 +   more parallism in the MMX units. This means I moved
   11.62 +   initilisation around and delayed other instruction.
   11.63 +   Performance measurement did not show that this brought any advantage
   11.64 +   but in theory it _should_ be faster this way.
   11.65 +
   11.66 +   The overall performanve gain to the C based dither was 30%-40%.
   11.67 +   The MMX routine calculates 256bit=8RGB values in each cycle
   11.68 +   (4 for row1 & 4 for row2)
   11.69 +
   11.70 +   The red/green/blue.. coefficents are taken from the mpeg_play 
   11.71 +   player. They look nice, but I dont know if you can have
   11.72 +   better values, to avoid integer rounding errors.
   11.73 +   
   11.74 +
   11.75 +   IMPORTANT:
   11.76 +   ==========
   11.77 +
   11.78 +   It is a requirement that the cr/cb/lum are 8 byte aligned and
   11.79 +   the out are 16byte aligned or you will/may get segfaults
   11.80 +
   11.81 +*/
   11.82 +
   11.83 +void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   11.84 +                              unsigned char *lum, unsigned char *cr,
   11.85 +                              unsigned char *cb, unsigned char *out,
   11.86 +                              int rows, int cols, int mod )
   11.87 +{
   11.88 +    Uint32 *row1;
   11.89 +    Uint32 *row2;
   11.90 +
   11.91 +    unsigned char* y = lum +cols*rows;    // Pointer to the end
   11.92 +    int x = 0;
   11.93 +    row1 = (Uint32 *)out;                 // 32 bit target
   11.94 +    row2 = (Uint32 *)out+cols+mod;        // start of second row
   11.95 +    mod = (mod+cols+mod)*4;               // increment for row1 in byte
   11.96 +
   11.97 +    __asm__ __volatile__ (
   11.98 +        // tap dance to workaround the inability to use %%ebx at will...
   11.99 +        //  move one thing to the stack...
  11.100 +        "pushl $0\n"  // save a slot on the stack.
  11.101 +        "pushl %%ebx\n"  // save %%ebx.
  11.102 +        "movl %0, %%ebx\n"  // put the thing in ebx.
  11.103 +        "movl %%ebx,4(%%esp)\n"  // put the thing in the stack slot.
  11.104 +        "popl %%ebx\n"  // get back %%ebx (the PIC register).
  11.105 +
  11.106 +        ".align 8\n"
  11.107 +        "1:\n"
  11.108 +
  11.109 +        // create Cr (result in mm1)
  11.110 +        "pushl %%ebx\n"
  11.111 +        "movl 4(%%esp),%%ebx\n"
  11.112 +        "movd (%%ebx),%%mm1\n"   //         0  0  0  0  v3 v2 v1 v0
  11.113 +        "popl %%ebx\n"
  11.114 +        "pxor %%mm7,%%mm7\n"      //         00 00 00 00 00 00 00 00
  11.115 +        "movd (%2), %%mm2\n"           //    0  0  0  0 l3 l2 l1 l0
  11.116 +        "punpcklbw %%mm7,%%mm1\n" //         0  v3 0  v2 00 v1 00 v0
  11.117 +        "punpckldq %%mm1,%%mm1\n" //         00 v1 00 v0 00 v1 00 v0
  11.118 +        "psubw %9,%%mm1\n"        // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0
  11.119 +
  11.120 +        // create Cr_g (result in mm0)
  11.121 +        "movq %%mm1,%%mm0\n"           // r1 r1 r0 r0 r1 r1 r0 r0
  11.122 +        "pmullw %10,%%mm0\n"           // red*-46dec=0.7136*64
  11.123 +        "pmullw %11,%%mm1\n"           // red*89dec=1.4013*64
  11.124 +        "psraw  $6, %%mm0\n"           // red=red/64
  11.125 +        "psraw  $6, %%mm1\n"           // red=red/64
  11.126 +
  11.127 +        // create L1 L2 (result in mm2,mm4)
  11.128 +        // L2=lum+cols
  11.129 +        "movq (%2,%4),%%mm3\n"         //    0  0  0  0 L3 L2 L1 L0
  11.130 +        "punpckldq %%mm3,%%mm2\n"      //   L3 L2 L1 L0 l3 l2 l1 l0
  11.131 +        "movq %%mm2,%%mm4\n"           //   L3 L2 L1 L0 l3 l2 l1 l0
  11.132 +        "pand %12,%%mm2\n"             //   L3 0  L1  0 l3  0 l1  0
  11.133 +        "pand %13,%%mm4\n"             //   0  L2  0 L0  0 l2  0 l0
  11.134 +        "psrlw $8,%%mm2\n"             //   0  L3  0 L1  0 l3  0 l1
  11.135 +
  11.136 +        // create R (result in mm6)
  11.137 +        "movq %%mm2,%%mm5\n"           //   0 L3  0 L1  0 l3  0 l1
  11.138 +        "movq %%mm4,%%mm6\n"           //   0 L2  0 L0  0 l2  0 l0
  11.139 +        "paddsw  %%mm1, %%mm5\n"       // lum1+red:x R3 x R1 x r3 x r1
  11.140 +        "paddsw  %%mm1, %%mm6\n"       // lum1+red:x R2 x R0 x r2 x r0
  11.141 +        "packuswb %%mm5,%%mm5\n"       //  R3 R1 r3 r1 R3 R1 r3 r1
  11.142 +        "packuswb %%mm6,%%mm6\n"       //  R2 R0 r2 r0 R2 R0 r2 r0
  11.143 +        "pxor %%mm7,%%mm7\n"      //         00 00 00 00 00 00 00 00
  11.144 +        "punpcklbw %%mm5,%%mm6\n"      //  R3 R2 R1 R0 r3 r2 r1 r0
  11.145 +
  11.146 +        // create Cb (result in mm1)
  11.147 +        "movd (%1), %%mm1\n"      //         0  0  0  0  u3 u2 u1 u0
  11.148 +        "punpcklbw %%mm7,%%mm1\n" //         0  u3 0  u2 00 u1 00 u0
  11.149 +        "punpckldq %%mm1,%%mm1\n" //         00 u1 00 u0 00 u1 00 u0
  11.150 +        "psubw %9,%%mm1\n"        // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0
  11.151 +
  11.152 +        // create Cb_g (result in mm5)
  11.153 +        "movq %%mm1,%%mm5\n"            // u1 u1 u0 u0 u1 u1 u0 u0
  11.154 +        "pmullw %14,%%mm5\n"            // blue*-109dec=1.7129*64
  11.155 +        "pmullw %15,%%mm1\n"            // blue*114dec=1.78125*64
  11.156 +        "psraw  $6, %%mm5\n"            // blue=red/64
  11.157 +        "psraw  $6, %%mm1\n"            // blue=blue/64
  11.158 +
  11.159 +        // create G (result in mm7)
  11.160 +        "movq %%mm2,%%mm3\n"      //   0  L3  0 L1  0 l3  0 l1
  11.161 +        "movq %%mm4,%%mm7\n"      //   0  L2  0 L0  0 l2  0 l1
  11.162 +        "paddsw  %%mm5, %%mm3\n"  // lum1+Cb_g:x G3t x G1t x g3t x g1t
  11.163 +        "paddsw  %%mm5, %%mm7\n"  // lum1+Cb_g:x G2t x G0t x g2t x g0t
  11.164 +        "paddsw  %%mm0, %%mm3\n"  // lum1+Cr_g:x G3  x G1  x g3  x g1
  11.165 +        "paddsw  %%mm0, %%mm7\n"  // lum1+blue:x G2  x G0  x g2  x g0
  11.166 +        "packuswb %%mm3,%%mm3\n"  // G3 G1 g3 g1 G3 G1 g3 g1
  11.167 +        "packuswb %%mm7,%%mm7\n"  // G2 G0 g2 g0 G2 G0 g2 g0
  11.168 +        "punpcklbw %%mm3,%%mm7\n" // G3 G2 G1 G0 g3 g2 g1 g0
  11.169 +
  11.170 +        // create B (result in mm5)
  11.171 +        "movq %%mm2,%%mm3\n"         //   0  L3  0 L1  0 l3  0 l1
  11.172 +        "movq %%mm4,%%mm5\n"         //   0  L2  0 L0  0 l2  0 l1
  11.173 +        "paddsw  %%mm1, %%mm3\n"     // lum1+blue:x B3 x B1 x b3 x b1
  11.174 +        "paddsw  %%mm1, %%mm5\n"     // lum1+blue:x B2 x B0 x b2 x b0
  11.175 +        "packuswb %%mm3,%%mm3\n"     // B3 B1 b3 b1 B3 B1 b3 b1
  11.176 +        "packuswb %%mm5,%%mm5\n"     // B2 B0 b2 b0 B2 B0 b2 b0
  11.177 +        "punpcklbw %%mm3,%%mm5\n"    // B3 B2 B1 B0 b3 b2 b1 b0
  11.178 +
  11.179 +        // fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
  11.180 +
  11.181 +        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
  11.182 +        "pxor %%mm4,%%mm4\n"           //  0  0  0  0  0  0  0  0
  11.183 +        "movq %%mm6,%%mm1\n"           // R3 R2 R1 R0 r3 r2 r1 r0
  11.184 +        "movq %%mm5,%%mm3\n"           // B3 B2 B1 B0 b3 b2 b1 b0
  11.185 +
  11.186 +        // process lower lum
  11.187 +        "punpcklbw %%mm4,%%mm1\n"      //  0 r3  0 r2  0 r1  0 r0
  11.188 +        "punpcklbw %%mm4,%%mm3\n"      //  0 b3  0 b2  0 b1  0 b0
  11.189 +        "movq %%mm1,%%mm2\n"           //  0 r3  0 r2  0 r1  0 r0
  11.190 +        "movq %%mm3,%%mm0\n"           //  0 b3  0 b2  0 b1  0 b0
  11.191 +        "punpcklwd %%mm1,%%mm3\n"      //  0 r1  0 b1  0 r0  0 b0
  11.192 +        "punpckhwd %%mm2,%%mm0\n"      //  0 r3  0 b3  0 r2  0 b2
  11.193 +
  11.194 +        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
  11.195 +        "movq %%mm7,%%mm1\n"           // G3 G2 G1 G0 g3 g2 g1 g0
  11.196 +        "punpcklbw %%mm1,%%mm2\n"      // g3  0 g2  0 g1  0 g0  0
  11.197 +        "punpcklwd %%mm4,%%mm2\n"      //  0  0 g1  0  0  0 g0  0
  11.198 +        "por %%mm3, %%mm2\n"          //  0 r1 g1 b1  0 r0 g0 b0
  11.199 +        "movq %%mm2,(%3)\n"          // wrote out ! row1
  11.200 +
  11.201 +        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
  11.202 +        "punpcklbw %%mm1,%%mm4\n"      // g3  0 g2  0 g1  0 g0  0
  11.203 +        "punpckhwd %%mm2,%%mm4\n"      //  0  0 g3  0  0  0 g2  0
  11.204 +        "por %%mm0, %%mm4\n"          //  0 r3 g3 b3  0 r2 g2 b2
  11.205 +        "movq %%mm4,8(%3)\n"         // wrote out ! row1
  11.206 +
  11.207 +        // fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
  11.208 +        // this can be done "destructive"
  11.209 +        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
  11.210 +        "punpckhbw %%mm2,%%mm6\n"      //  0 R3  0 R2  0 R1  0 R0
  11.211 +        "punpckhbw %%mm1,%%mm5\n"      // G3 B3 G2 B2 G1 B1 G0 B0
  11.212 +        "movq %%mm5,%%mm1\n"           // G3 B3 G2 B2 G1 B1 G0 B0
  11.213 +        "punpcklwd %%mm6,%%mm1\n"      //  0 R1 G1 B1  0 R0 G0 B0
  11.214 +        "movq %%mm1,(%5)\n"          // wrote out ! row2
  11.215 +        "punpckhwd %%mm6,%%mm5\n"      //  0 R3 G3 B3  0 R2 G2 B2
  11.216 +        "movq %%mm5,8(%5)\n"         // wrote out ! row2
  11.217 +
  11.218 +        "addl $4,%2\n"            // lum+4
  11.219 +        "leal 16(%3),%3\n"        // row1+16
  11.220 +        "leal 16(%5),%5\n"        // row2+16
  11.221 +        "addl $2,(%%esp)\n"        // cr+2
  11.222 +        "addl $2,%1\n"           // cb+2
  11.223 +
  11.224 +        "addl $4,%6\n"            // x+4
  11.225 +        "cmpl %4,%6\n"
  11.226 +
  11.227 +        "jl 1b\n"
  11.228 +        "addl %4,%2\n" // lum += cols
  11.229 +        "addl %8,%3\n" // row1+= mod
  11.230 +        "addl %8,%5\n" // row2+= mod
  11.231 +        "movl $0,%6\n" // x=0
  11.232 +        "cmpl %7,%2\n"
  11.233 +        "jl 1b\n"
  11.234 +
  11.235 +        "addl $4,%%esp\n"  // get rid of the stack slot we reserved.
  11.236 +        "emms\n"  // reset MMX registers.
  11.237 +        :
  11.238 +        : "m" (cr), "r"(cb),"r"(lum),
  11.239 +          "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
  11.240 +          "m"(MMX_0080w),"m"(MMX_VgrnRGB),"m"(MMX_VredRGB),
  11.241 +          "m"(MMX_FF00w),"m"(MMX_00FFw),"m"(MMX_UgrnRGB),
  11.242 +          "m"(MMX_UbluRGB)
  11.243 +    );
  11.244 +}
  11.245 +
  11.246 +void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
  11.247 +                             unsigned char *lum, unsigned char *cr,
  11.248 +                             unsigned char *cb, unsigned char *out,
  11.249 +                             int rows, int cols, int mod )
  11.250 +{
  11.251 +    Uint16 *row1;
  11.252 +    Uint16 *row2;
  11.253 +
  11.254 +    unsigned char* y = lum +cols*rows;    /* Pointer to the end */
  11.255 +    int x = 0;
  11.256 +    row1 = (Uint16 *)out;                 /* 16 bit target */
  11.257 +    row2 = (Uint16 *)out+cols+mod;        /* start of second row  */
  11.258 +    mod = (mod+cols+mod)*2;               /* increment for row1 in byte */
  11.259 +
  11.260 +    __asm__ __volatile__(
  11.261 +        // tap dance to workaround the inability to use %%ebx at will...
  11.262 +        //  move one thing to the stack...
  11.263 +        "pushl $0\n"  // save a slot on the stack.
  11.264 +        "pushl %%ebx\n"  // save %%ebx.
  11.265 +        "movl %0, %%ebx\n"  // put the thing in ebx.
  11.266 +        "movl %%ebx, 4(%%esp)\n"  // put the thing in the stack slot.
  11.267 +        "popl %%ebx\n"  // get back %%ebx (the PIC register).
  11.268 +
  11.269 +        ".align 8\n"
  11.270 +        "1:\n"
  11.271 +
  11.272 +        "movd           (%1),                   %%mm0\n" // 4 Cb         0  0  0  0 u3 u2 u1 u0
  11.273 +        "pxor           %%mm7,                  %%mm7\n"
  11.274 +        "pushl %%ebx\n"
  11.275 +        "movl 4(%%esp), %%ebx\n"
  11.276 +        "movd (%%ebx), %%mm1\n"   // 4 Cr                0  0  0  0 v3 v2 v1 v0
  11.277 +        "popl %%ebx\n"
  11.278 +
  11.279 +        "punpcklbw      %%mm7,                  %%mm0\n" // 4 W cb   0 u3  0 u2  0 u1  0 u0
  11.280 +        "punpcklbw      %%mm7,                  %%mm1\n" // 4 W cr   0 v3  0 v2  0 v1  0 v0
  11.281 +        "psubw          %9,                     %%mm0\n"
  11.282 +        "psubw          %9,                     %%mm1\n"
  11.283 +        "movq           %%mm0,                  %%mm2\n" // Cb                   0 u3  0 u2  0 u1  0 u0
  11.284 +        "movq           %%mm1,                  %%mm3\n" // Cr
  11.285 +        "pmullw         %10,                    %%mm2\n" // Cb2green 0 R3  0 R2  0 R1  0 R0
  11.286 +        "movq           (%2),                   %%mm6\n" // L1      l7 L6 L5 L4 L3 L2 L1 L0
  11.287 +        "pmullw         %11,                    %%mm0\n" // Cb2blue
  11.288 +        "pand           %12,                    %%mm6\n" // L1      00 L6 00 L4 00 L2 00 L0
  11.289 +        "pmullw         %13,                    %%mm3\n" // Cr2green
  11.290 +        "movq           (%2),                   %%mm7\n" // L2
  11.291 +        "pmullw         %14,                    %%mm1\n" // Cr2red
  11.292 +        "psrlw          $8,                     %%mm7\n"        // L2           00 L7 00 L5 00 L3 00 L1
  11.293 +        "pmullw         %15,                    %%mm6\n" // lum1
  11.294 +        "paddw          %%mm3,                  %%mm2\n" // Cb2green + Cr2green == green
  11.295 +        "pmullw         %15,                    %%mm7\n" // lum2
  11.296 +
  11.297 +        "movq           %%mm6,                  %%mm4\n" // lum1
  11.298 +        "paddw          %%mm0,                  %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0
  11.299 +        "movq           %%mm4,                  %%mm5\n" // lum1
  11.300 +        "paddw          %%mm1,                  %%mm4\n" // lum1 +red  00 R6 00 R4 00 R2 00 R0
  11.301 +        "paddw          %%mm2,                  %%mm5\n" // lum1 +green 00 G6 00 G4 00 G2 00 G0
  11.302 +        "psraw          $6,                     %%mm4\n" // R1 0 .. 64
  11.303 +        "movq           %%mm7,                  %%mm3\n" // lum2                       00 L7 00 L5 00 L3 00 L1
  11.304 +        "psraw          $6,                     %%mm5\n" // G1  - .. +
  11.305 +        "paddw          %%mm0,                  %%mm7\n" // Lum2 +blue 00 B7 00 B5 00 B3 00 B1
  11.306 +        "psraw          $6,                     %%mm6\n" // B1         0 .. 64
  11.307 +        "packuswb       %%mm4,                  %%mm4\n" // R1 R1
  11.308 +        "packuswb       %%mm5,                  %%mm5\n" // G1 G1
  11.309 +        "packuswb       %%mm6,                  %%mm6\n" // B1 B1
  11.310 +        "punpcklbw      %%mm4,                  %%mm4\n"
  11.311 +        "punpcklbw      %%mm5,                  %%mm5\n"
  11.312 +
  11.313 +        "pand           %16,                    %%mm4\n"
  11.314 +        "psllw          $3,                     %%mm5\n" // GREEN       1
  11.315 +        "punpcklbw      %%mm6,                  %%mm6\n"
  11.316 +        "pand           %17,                    %%mm5\n"
  11.317 +        "pand           %16,                    %%mm6\n"
  11.318 +        "por            %%mm5,                  %%mm4\n" //
  11.319 +        "psrlw          $11,                    %%mm6\n" // BLUE        1
  11.320 +        "movq           %%mm3,                  %%mm5\n" // lum2
  11.321 +        "paddw          %%mm1,                  %%mm3\n" // lum2 +red      00 R7 00 R5 00 R3 00 R1
  11.322 +        "paddw          %%mm2,                  %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1
  11.323 +        "psraw          $6,                     %%mm3\n" // R2
  11.324 +        "por            %%mm6,                  %%mm4\n" // MM4
  11.325 +        "psraw          $6,                     %%mm5\n" // G2
  11.326 +        "movq           (%2, %4),               %%mm6\n" // L3 load lum2
  11.327 +        "psraw          $6,                     %%mm7\n"
  11.328 +        "packuswb       %%mm3,                  %%mm3\n"
  11.329 +        "packuswb       %%mm5,                  %%mm5\n"
  11.330 +        "packuswb       %%mm7,                  %%mm7\n"
  11.331 +        "pand           %12,                    %%mm6\n" // L3
  11.332 +        "punpcklbw      %%mm3,                  %%mm3\n"
  11.333 +        "punpcklbw      %%mm5,                  %%mm5\n"
  11.334 +        "pmullw         %15,                    %%mm6\n" // lum3
  11.335 +        "punpcklbw      %%mm7,                  %%mm7\n"
  11.336 +        "psllw          $3,                     %%mm5\n" // GREEN 2
  11.337 +        "pand           %16,                    %%mm7\n"
  11.338 +        "pand           %16,                    %%mm3\n"
  11.339 +        "psrlw          $11,                    %%mm7\n" // BLUE  2
  11.340 +        "pand           %17,                    %%mm5\n"
  11.341 +        "por            %%mm7,                  %%mm3\n"
  11.342 +        "movq           (%2,%4),                %%mm7\n" // L4 load lum2
  11.343 +        "por            %%mm5,                  %%mm3\n" //
  11.344 +        "psrlw          $8,                     %%mm7\n" // L4
  11.345 +        "movq           %%mm4,                  %%mm5\n"
  11.346 +        "punpcklwd      %%mm3,                  %%mm4\n"
  11.347 +        "pmullw         %15,                    %%mm7\n" // lum4
  11.348 +        "punpckhwd      %%mm3,                  %%mm5\n"
  11.349 +
  11.350 +        "movq           %%mm4,                  (%3)\n"  // write row1
  11.351 +        "movq           %%mm5,                  8(%3)\n" // write row1
  11.352 +
  11.353 +        "movq           %%mm6,                  %%mm4\n" // Lum3
  11.354 +        "paddw          %%mm0,                  %%mm6\n" // Lum3 +blue
  11.355 +
  11.356 +        "movq           %%mm4,                  %%mm5\n" // Lum3
  11.357 +        "paddw          %%mm1,                  %%mm4\n" // Lum3 +red
  11.358 +        "paddw          %%mm2,                  %%mm5\n" // Lum3 +green
  11.359 +        "psraw          $6,                     %%mm4\n"
  11.360 +        "movq           %%mm7,                  %%mm3\n" // Lum4
  11.361 +        "psraw          $6,                     %%mm5\n"
  11.362 +        "paddw          %%mm0,                  %%mm7\n" // Lum4 +blue
  11.363 +        "psraw          $6,                     %%mm6\n" // Lum3 +blue
  11.364 +        "movq           %%mm3,                  %%mm0\n" // Lum4
  11.365 +        "packuswb       %%mm4,                  %%mm4\n"
  11.366 +        "paddw          %%mm1,                  %%mm3\n" // Lum4 +red
  11.367 +        "packuswb       %%mm5,                  %%mm5\n"
  11.368 +        "paddw          %%mm2,                  %%mm0\n" // Lum4 +green
  11.369 +        "packuswb       %%mm6,                  %%mm6\n"
  11.370 +        "punpcklbw      %%mm4,                  %%mm4\n"
  11.371 +        "punpcklbw      %%mm5,                  %%mm5\n"
  11.372 +        "punpcklbw      %%mm6,                  %%mm6\n"
  11.373 +        "psllw          $3,                     %%mm5\n" // GREEN 3
  11.374 +        "pand           %16,                    %%mm4\n"
  11.375 +        "psraw          $6,                     %%mm3\n" // psr 6
  11.376 +        "psraw          $6,                     %%mm0\n"
  11.377 +        "pand           %16,                    %%mm6\n" // BLUE
  11.378 +        "pand           %17,                    %%mm5\n"
  11.379 +        "psrlw          $11,                    %%mm6\n" // BLUE  3
  11.380 +        "por            %%mm5,                  %%mm4\n"
  11.381 +        "psraw          $6,                     %%mm7\n"
  11.382 +        "por            %%mm6,                  %%mm4\n"
  11.383 +        "packuswb       %%mm3,                  %%mm3\n"
  11.384 +        "packuswb       %%mm0,                  %%mm0\n"
  11.385 +        "packuswb       %%mm7,                  %%mm7\n"
  11.386 +        "punpcklbw      %%mm3,                  %%mm3\n"
  11.387 +        "punpcklbw      %%mm0,                  %%mm0\n"
  11.388 +        "punpcklbw      %%mm7,                  %%mm7\n"
  11.389 +        "pand           %16,                    %%mm3\n"
  11.390 +        "pand           %16,                    %%mm7\n" // BLUE
  11.391 +        "psllw          $3,                     %%mm0\n" // GREEN 4
  11.392 +        "psrlw          $11,                    %%mm7\n"
  11.393 +        "pand           %17,                    %%mm0\n"
  11.394 +        "por            %%mm7,                  %%mm3\n"
  11.395 +        "por            %%mm0,                  %%mm3\n"
  11.396 +
  11.397 +        "movq           %%mm4,                  %%mm5\n"
  11.398 +
  11.399 +        "punpcklwd      %%mm3,                  %%mm4\n"
  11.400 +        "punpckhwd      %%mm3,                  %%mm5\n"
  11.401 +
  11.402 +        "movq           %%mm4,                  (%5)\n"
  11.403 +        "movq           %%mm5,                  8(%5)\n"
  11.404 +
  11.405 +        "addl           $8,                     %6\n"
  11.406 +        "addl           $8,                     %2\n"
  11.407 +        "addl           $4,                     (%%esp)\n"
  11.408 +        "addl           $4,                     %1\n"
  11.409 +        "cmpl           %4,                     %6\n"
  11.410 +        "leal           16(%3),                 %3\n"
  11.411 +        "leal           16(%5),%5\n" // row2+16
  11.412 +
  11.413 +        "jl             1b\n"
  11.414 +        "addl           %4,     %2\n" // lum += cols
  11.415 +        "addl           %8,     %3\n" // row1+= mod
  11.416 +        "addl           %8,     %5\n" // row2+= mod
  11.417 +        "movl           $0,     %6\n" // x=0
  11.418 +        "cmpl           %7,     %2\n"
  11.419 +        "jl             1b\n"
  11.420 +        "addl $4, %%esp\n"  // get rid of the stack slot we reserved.
  11.421 +        "emms\n"
  11.422 +        :
  11.423 +        : "m" (cr), "r"(cb),"r"(lum),
  11.424 +          "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
  11.425 +          "m"(MMX_0080w),"m"(MMX_Ugrn565),"m"(MMX_Ublu5x5),
  11.426 +          "m"(MMX_00FFw),"m"(MMX_Vgrn565),"m"(MMX_Vred5x5),
  11.427 +          "m"(MMX_Ycoeff),"m"(MMX_red565),"m"(MMX_grn565)
  11.428 +    );
  11.429 +}
  11.430 +
  11.431 +/* *INDENT-ON* */
  11.432 +
  11.433 +#endif /* GCC3 i386 inline assembly */
  11.434 +
  11.435 +/* vi: set ts=4 sw=4 expandtab: */
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/src/render/SDL_yuv_sw.c	Thu Feb 03 00:19:40 2011 -0800
    12.3 @@ -0,0 +1,1322 @@
    12.4 +/*
    12.5 +    SDL - Simple DirectMedia Layer
    12.6 +    Copyright (C) 1997-2010 Sam Lantinga
    12.7 +
    12.8 +    This library is free software; you can redistribute it and/or
    12.9 +    modify it under the terms of the GNU Lesser General Public
   12.10 +    License as published by the Free Software Foundation; either
   12.11 +    version 2.1 of the License, or (at your option) any later version.
   12.12 +
   12.13 +    This library is distributed in the hope that it will be useful,
   12.14 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
   12.15 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   12.16 +    Lesser General Public License for more details.
   12.17 +
   12.18 +    You should have received a copy of the GNU Lesser General Public
   12.19 +    License along with this library; if not, write to the Free Software
   12.20 +    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   12.21 +
   12.22 +    Sam Lantinga
   12.23 +    slouken@libsdl.org
   12.24 +*/
   12.25 +#include "SDL_config.h"
   12.26 +
   12.27 +/* This is the software implementation of the YUV texture support */
   12.28 +
   12.29 +/* This code was derived from code carrying the following copyright notices:
   12.30 +
   12.31 + * Copyright (c) 1995 The Regents of the University of California.
   12.32 + * All rights reserved.
   12.33 + * 
   12.34 + * Permission to use, copy, modify, and distribute this software and its
   12.35 + * documentation for any purpose, without fee, and without written agreement is
   12.36 + * hereby granted, provided that the above copyright notice and the following
   12.37 + * two paragraphs appear in all copies of this software.
   12.38 + * 
   12.39 + * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
   12.40 + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
   12.41 + * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
   12.42 + * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   12.43 + * 
   12.44 + * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
   12.45 + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
   12.46 + * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
   12.47 + * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
   12.48 + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
   12.49 +
   12.50 + * Copyright (c) 1995 Erik Corry
   12.51 + * All rights reserved.
   12.52 + * 
   12.53 + * Permission to use, copy, modify, and distribute this software and its
   12.54 + * documentation for any purpose, without fee, and without written agreement is
   12.55 + * hereby granted, provided that the above copyright notice and the following
   12.56 + * two paragraphs appear in all copies of this software.
   12.57 + * 
   12.58 + * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
   12.59 + * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
   12.60 + * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
   12.61 + * OF THE POSSIBILITY OF SUCH DAMAGE.
   12.62 + * 
   12.63 + * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
   12.64 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
   12.65 + * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
   12.66 + * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
   12.67 + * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
   12.68 +
   12.69 + * Portions of this software Copyright (c) 1995 Brown University.
   12.70 + * All rights reserved.
   12.71 + * 
   12.72 + * Permission to use, copy, modify, and distribute this software and its
   12.73 + * documentation for any purpose, without fee, and without written agreement
   12.74 + * is hereby granted, provided that the above copyright notice and the
   12.75 + * following two paragraphs appear in all copies of this software.
   12.76 + * 
   12.77 + * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
   12.78 + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
   12.79 + * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
   12.80 + * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   12.81 + * 
   12.82 + * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
   12.83 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
   12.84 + * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
   12.85 + * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
   12.86 + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
   12.87 + */
   12.88 +
   12.89 +#include "SDL_video.h"
   12.90 +#include "SDL_cpuinfo.h"
   12.91 +#include "SDL_yuv_sw_c.h"
   12.92 +
   12.93 +
   12.94 +/* The colorspace conversion functions */
   12.95 +
   12.96 +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   12.97 +extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
   12.98 +                                    unsigned char *lum, unsigned char *cr,
   12.99 +                                    unsigned char *cb, unsigned char *out,
  12.100 +                                    int rows, int cols, int mod);
  12.101 +extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
  12.102 +                                    unsigned char *lum, unsigned char *cr,
  12.103 +                                    unsigned char *cb, unsigned char *out,
  12.104 +                                    int rows, int cols, int mod);
  12.105 +#endif
  12.106 +
  12.107 +static void
  12.108 +Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
  12.109 +                       unsigned char *lum, unsigned char *cr,
  12.110 +                       unsigned char *cb, unsigned char *out,
  12.111 +                       int rows, int cols, int mod)
  12.112 +{
  12.113 +    unsigned short *row1;
  12.114 +    unsigned short *row2;
  12.115 +    unsigned char *lum2;
  12.116 +    int x, y;
  12.117 +    int cr_r;
  12.118 +    int crb_g;
  12.119 +    int cb_b;
  12.120 +    int cols_2 = cols / 2;
  12.121 +
  12.122 +    row1 = (unsigned short *) out;
  12.123 +    row2 = row1 + cols + mod;
  12.124 +    lum2 = lum + cols;
  12.125 +
  12.126 +    mod += cols + mod;
  12.127 +
  12.128 +    y = rows / 2;
  12.129 +    while (y--) {
  12.130 +        x = cols_2;
  12.131 +        while (x--) {
  12.132 +            register int L;
  12.133 +
  12.134 +            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  12.135 +            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  12.136 +                + colortab[*cb + 2 * 256];
  12.137 +            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  12.138 +            ++cr;
  12.139 +            ++cb;
  12.140 +
  12.141 +            L = *lum++;
  12.142 +            *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
  12.143 +                                        rgb_2_pix[L + crb_g] |
  12.144 +                                        rgb_2_pix[L + cb_b]);
  12.145 +
  12.146 +            L = *lum++;
  12.147 +            *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
  12.148 +                                        rgb_2_pix[L + crb_g] |
  12.149 +                                        rgb_2_pix[L + cb_b]);
  12.150 +
  12.151 +
  12.152 +            /* Now, do second row.  */
  12.153 +
  12.154 +            L = *lum2++;
  12.155 +            *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
  12.156 +                                        rgb_2_pix[L + crb_g] |
  12.157 +                                        rgb_2_pix[L + cb_b]);
  12.158 +
  12.159 +            L = *lum2++;
  12.160 +            *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
  12.161 +                                        rgb_2_pix[L + crb_g] |
  12.162 +                                        rgb_2_pix[L + cb_b]);
  12.163 +        }
  12.164 +
  12.165 +        /*
  12.166 +         * These values are at the start of the next line, (due
  12.167 +         * to the ++'s above),but they need to be at the start
  12.168 +         * of the line after that.
  12.169 +         */
  12.170 +        lum += cols;
  12.171 +        lum2 += cols;
  12.172 +        row1 += mod;
  12.173 +        row2 += mod;
  12.174 +    }
  12.175 +}
  12.176 +
  12.177 +static void
  12.178 +Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
  12.179 +                       unsigned char *lum, unsigned char *cr,
  12.180 +                       unsigned char *cb, unsigned char *out,
  12.181 +                       int rows, int cols, int mod)
  12.182 +{
  12.183 +    unsigned int value;
  12.184 +    unsigned char *row1;
  12.185 +    unsigned char *row2;
  12.186 +    unsigned char *lum2;
  12.187 +    int x, y;
  12.188 +    int cr_r;
  12.189 +    int crb_g;
  12.190 +    int cb_b;
  12.191 +    int cols_2 = cols / 2;
  12.192 +
  12.193 +    row1 = out;
  12.194 +    row2 = row1 + cols * 3 + mod * 3;
  12.195 +    lum2 = lum + cols;
  12.196 +
  12.197 +    mod += cols + mod;
  12.198 +    mod *= 3;
  12.199 +
  12.200 +    y = rows / 2;
  12.201 +    while (y--) {
  12.202 +        x = cols_2;
  12.203 +        while (x--) {
  12.204 +            register int L;
  12.205 +
  12.206 +            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  12.207 +            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  12.208 +                + colortab[*cb + 2 * 256];
  12.209 +            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  12.210 +            ++cr;
  12.211 +            ++cb;
  12.212 +
  12.213 +            L = *lum++;
  12.214 +            value = (rgb_2_pix[L + cr_r] |
  12.215 +                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.216 +            *row1++ = (value) & 0xFF;
  12.217 +            *row1++ = (value >> 8) & 0xFF;
  12.218 +            *row1++ = (value >> 16) & 0xFF;
  12.219 +
  12.220 +            L = *lum++;
  12.221 +            value = (rgb_2_pix[L + cr_r] |
  12.222 +                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.223 +            *row1++ = (value) & 0xFF;
  12.224 +            *row1++ = (value >> 8) & 0xFF;
  12.225 +            *row1++ = (value >> 16) & 0xFF;
  12.226 +
  12.227 +
  12.228 +            /* Now, do second row.  */
  12.229 +
  12.230 +            L = *lum2++;
  12.231 +            value = (rgb_2_pix[L + cr_r] |
  12.232 +                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.233 +            *row2++ = (value) & 0xFF;
  12.234 +            *row2++ = (value >> 8) & 0xFF;
  12.235 +            *row2++ = (value >> 16) & 0xFF;
  12.236 +
  12.237 +            L = *lum2++;
  12.238 +            value = (rgb_2_pix[L + cr_r] |
  12.239 +                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.240 +            *row2++ = (value) & 0xFF;
  12.241 +            *row2++ = (value >> 8) & 0xFF;
  12.242 +            *row2++ = (value >> 16) & 0xFF;
  12.243 +        }
  12.244 +
  12.245 +        /*
  12.246 +         * These values are at the start of the next line, (due
  12.247 +         * to the ++'s above),but they need to be at the start
  12.248 +         * of the line after that.
  12.249 +         */
  12.250 +        lum += cols;
  12.251 +        lum2 += cols;
  12.252 +        row1 += mod;
  12.253 +        row2 += mod;
  12.254 +    }
  12.255 +}
  12.256 +
  12.257 +static void
  12.258 +Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
  12.259 +                       unsigned char *lum, unsigned char *cr,
  12.260 +                       unsigned char *cb, unsigned char *out,
  12.261 +                       int rows, int cols, int mod)
  12.262 +{
  12.263 +    unsigned int *row1;
  12.264 +    unsigned int *row2;
  12.265 +    unsigned char *lum2;
  12.266 +    int x, y;
  12.267 +    int cr_r;
  12.268 +    int crb_g;
  12.269 +    int cb_b;
  12.270 +    int cols_2 = cols / 2;
  12.271 +
  12.272 +    row1 = (unsigned int *) out;
  12.273 +    row2 = row1 + cols + mod;
  12.274 +    lum2 = lum + cols;
  12.275 +
  12.276 +    mod += cols + mod;
  12.277 +
  12.278 +    y = rows / 2;
  12.279 +    while (y--) {
  12.280 +        x = cols_2;
  12.281 +        while (x--) {
  12.282 +            register int L;
  12.283 +
  12.284 +            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  12.285 +            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  12.286 +                + colortab[*cb + 2 * 256];
  12.287 +            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  12.288 +            ++cr;
  12.289 +            ++cb;
  12.290 +
  12.291 +            L = *lum++;
  12.292 +            *row1++ = (rgb_2_pix[L + cr_r] |
  12.293 +                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.294 +
  12.295 +            L = *lum++;
  12.296 +            *row1++ = (rgb_2_pix[L + cr_r] |
  12.297 +                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.298 +
  12.299 +
  12.300 +            /* Now, do second row.  */
  12.301 +
  12.302 +            L = *lum2++;
  12.303 +            *row2++ = (rgb_2_pix[L + cr_r] |
  12.304 +                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.305 +
  12.306 +            L = *lum2++;
  12.307 +            *row2++ = (rgb_2_pix[L + cr_r] |
  12.308 +                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.309 +        }
  12.310 +
  12.311 +        /*
  12.312 +         * These values are at the start of the next line, (due
  12.313 +         * to the ++'s above),but they need to be at the start
  12.314 +         * of the line after that.
  12.315 +         */
  12.316 +        lum += cols;
  12.317 +        lum2 += cols;
  12.318 +        row1 += mod;
  12.319 +        row2 += mod;
  12.320 +    }
  12.321 +}
  12.322 +
  12.323 +/*
  12.324 + * In this function I make use of a nasty trick. The tables have the lower
  12.325 + * 16 bits replicated in the upper 16. This means I can write ints and get
  12.326 + * the horisontal doubling for free (almost).
  12.327 + */
  12.328 +static void
  12.329 +Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
  12.330 +                       unsigned char *lum, unsigned char *cr,
  12.331 +                       unsigned char *cb, unsigned char *out,
  12.332 +                       int rows, int cols, int mod)
  12.333 +{
  12.334 +    unsigned int *row1 = (unsigned int *) out;
  12.335 +    const int next_row = cols + (mod / 2);
  12.336 +    unsigned int *row2 = row1 + 2 * next_row;
  12.337 +    unsigned char *lum2;
  12.338 +    int x, y;
  12.339 +    int cr_r;
  12.340 +    int crb_g;
  12.341 +    int cb_b;
  12.342 +    int cols_2 = cols / 2;
  12.343 +
  12.344 +    lum2 = lum + cols;
  12.345 +
  12.346 +    mod = (next_row * 3) + (mod / 2);
  12.347 +
  12.348 +    y = rows / 2;
  12.349 +    while (y--) {
  12.350 +        x = cols_2;
  12.351 +        while (x--) {
  12.352 +            register int L;
  12.353 +
  12.354 +            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  12.355 +            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  12.356 +                + colortab[*cb + 2 * 256];
  12.357 +            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  12.358 +            ++cr;
  12.359 +            ++cb;
  12.360 +
  12.361 +            L = *lum++;
  12.362 +            row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
  12.363 +                                        rgb_2_pix[L + crb_g] |
  12.364 +                                        rgb_2_pix[L + cb_b]);
  12.365 +            row1++;
  12.366 +
  12.367 +            L = *lum++;
  12.368 +            row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
  12.369 +                                        rgb_2_pix[L + crb_g] |
  12.370 +                                        rgb_2_pix[L + cb_b]);
  12.371 +            row1++;
  12.372 +
  12.373 +
  12.374 +            /* Now, do second row. */
  12.375 +
  12.376 +            L = *lum2++;
  12.377 +            row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
  12.378 +                                        rgb_2_pix[L + crb_g] |
  12.379 +                                        rgb_2_pix[L + cb_b]);
  12.380 +            row2++;
  12.381 +
  12.382 +            L = *lum2++;
  12.383 +            row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
  12.384 +                                        rgb_2_pix[L + crb_g] |
  12.385 +                                        rgb_2_pix[L + cb_b]);
  12.386 +            row2++;
  12.387 +        }
  12.388 +
  12.389 +        /*
  12.390 +         * These values are at the start of the next line, (due
  12.391 +         * to the ++'s above),but they need to be at the start
  12.392 +         * of the line after that.
  12.393 +         */
  12.394 +        lum += cols;
  12.395 +        lum2 += cols;
  12.396 +        row1 += mod;
  12.397 +        row2 += mod;
  12.398 +    }
  12.399 +}
  12.400 +
  12.401 +static void
  12.402 +Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
  12.403 +                       unsigned char *lum, unsigned char *cr,
  12.404 +                       unsigned char *cb, unsigned char *out,
  12.405 +                       int rows, int cols, int mod)
  12.406 +{
  12.407 +    unsigned int value;
  12.408 +    unsigned char *row1 = out;
  12.409 +    const int next_row = (cols * 2 + mod) * 3;
  12.410 +    unsigned char *row2 = row1 + 2 * next_row;
  12.411 +    unsigned char *lum2;
  12.412 +    int x, y;
  12.413 +    int cr_r;
  12.414 +    int crb_g;
  12.415 +    int cb_b;
  12.416 +    int cols_2 = cols / 2;
  12.417 +
  12.418 +    lum2 = lum + cols;
  12.419 +
  12.420 +    mod = next_row * 3 + mod * 3;
  12.421 +
  12.422 +    y = rows / 2;
  12.423 +    while (y--) {
  12.424 +        x = cols_2;
  12.425 +        while (x--) {
  12.426 +            register int L;
  12.427 +
  12.428 +            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  12.429 +            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  12.430 +                + colortab[*cb + 2 * 256];
  12.431 +            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  12.432 +            ++cr;
  12.433 +            ++cb;
  12.434 +
  12.435 +            L = *lum++;
  12.436 +            value = (rgb_2_pix[L + cr_r] |
  12.437 +                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.438 +            row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
  12.439 +                row1[next_row + 3 + 0] = (value) & 0xFF;
  12.440 +            row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
  12.441 +                row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
  12.442 +            row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
  12.443 +                row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
  12.444 +            row1 += 2 * 3;
  12.445 +
  12.446 +            L = *lum++;
  12.447 +            value = (rgb_2_pix[L + cr_r] |
  12.448 +                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.449 +            row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
  12.450 +                row1[next_row + 3 + 0] = (value) & 0xFF;
  12.451 +            row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
  12.452 +                row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
  12.453 +            row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
  12.454 +                row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
  12.455 +            row1 += 2 * 3;
  12.456 +
  12.457 +
  12.458 +            /* Now, do second row. */
  12.459 +
  12.460 +            L = *lum2++;
  12.461 +            value = (rgb_2_pix[L + cr_r] |
  12.462 +                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.463 +            row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
  12.464 +                row2[next_row + 3 + 0] = (value) & 0xFF;
  12.465 +            row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
  12.466 +                row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
  12.467 +            row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
  12.468 +                row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
  12.469 +            row2 += 2 * 3;
  12.470 +
  12.471 +            L = *lum2++;
  12.472 +            value = (rgb_2_pix[L + cr_r] |
  12.473 +                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.474 +            row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
  12.475 +                row2[next_row + 3 + 0] = (value) & 0xFF;
  12.476 +            row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
  12.477 +                row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
  12.478 +            row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
  12.479 +                row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
  12.480 +            row2 += 2 * 3;
  12.481 +        }
  12.482 +
  12.483 +        /*
  12.484 +         * These values are at the start of the next line, (due
  12.485 +         * to the ++'s above),but they need to be at the start
  12.486 +         * of the line after that.
  12.487 +         */
  12.488 +        lum += cols;
  12.489 +        lum2 += cols;
  12.490 +        row1 += mod;
  12.491 +        row2 += mod;
  12.492 +    }
  12.493 +}
  12.494 +
  12.495 +static void
  12.496 +Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
  12.497 +                       unsigned char *lum, unsigned char *cr,
  12.498 +                       unsigned char *cb, unsigned char *out,
  12.499 +                       int rows, int cols, int mod)
  12.500 +{
  12.501 +    unsigned int *row1 = (unsigned int *) out;
  12.502 +    const int next_row = cols * 2 + mod;
  12.503 +    unsigned int *row2 = row1 + 2 * next_row;
  12.504 +    unsigned char *lum2;
  12.505 +    int x, y;
  12.506 +    int cr_r;
  12.507 +    int crb_g;
  12.508 +    int cb_b;
  12.509 +    int cols_2 = cols / 2;
  12.510 +
  12.511 +    lum2 = lum + cols;
  12.512 +
  12.513 +    mod = (next_row * 3) + mod;
  12.514 +
  12.515 +    y = rows / 2;
  12.516 +    while (y--) {
  12.517 +        x = cols_2;
  12.518 +        while (x--) {
  12.519 +            register int L;
  12.520 +
  12.521 +            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  12.522 +            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  12.523 +                + colortab[*cb + 2 * 256];
  12.524 +            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  12.525 +            ++cr;
  12.526 +            ++cb;
  12.527 +
  12.528 +            L = *lum++;
  12.529 +            row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
  12.530 +                (rgb_2_pix[L + cr_r] |
  12.531 +                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.532 +            row1 += 2;
  12.533 +
  12.534 +            L = *lum++;
  12.535 +            row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
  12.536 +                (rgb_2_pix[L + cr_r] |
  12.537 +                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.538 +            row1 += 2;
  12.539 +
  12.540 +
  12.541 +            /* Now, do second row. */
  12.542 +
  12.543 +            L = *lum2++;
  12.544 +            row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
  12.545 +                (rgb_2_pix[L + cr_r] |
  12.546 +                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.547 +            row2 += 2;
  12.548 +
  12.549 +            L = *lum2++;
  12.550 +            row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
  12.551 +                (rgb_2_pix[L + cr_r] |
  12.552 +                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.553 +            row2 += 2;
  12.554 +        }
  12.555 +
  12.556 +        /*
  12.557 +         * These values are at the start of the next line, (due
  12.558 +         * to the ++'s above),but they need to be at the start
  12.559 +         * of the line after that.
  12.560 +         */
  12.561 +        lum += cols;
  12.562 +        lum2 += cols;
  12.563 +        row1 += mod;
  12.564 +        row2 += mod;
  12.565 +    }
  12.566 +}
  12.567 +
  12.568 +static void
  12.569 +Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
  12.570 +                       unsigned char *lum, unsigned char *cr,
  12.571 +                       unsigned char *cb, unsigned char *out,
  12.572 +                       int rows, int cols, int mod)
  12.573 +{
  12.574 +    unsigned short *row;
  12.575 +    int x, y;
  12.576 +    int cr_r;
  12.577 +    int crb_g;
  12.578 +    int cb_b;
  12.579 +    int cols_2 = cols / 2;
  12.580 +
  12.581 +    row = (unsigned short *) out;
  12.582 +
  12.583 +    y = rows;
  12.584 +    while (y--) {
  12.585 +        x = cols_2;
  12.586 +        while (x--) {
  12.587 +            register int L;
  12.588 +
  12.589 +            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  12.590 +            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  12.591 +                + colortab[*cb + 2 * 256];
  12.592 +            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  12.593 +            cr += 4;
  12.594 +            cb += 4;
  12.595 +
  12.596 +            L = *lum;
  12.597 +            lum += 2;
  12.598 +            *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
  12.599 +                                       rgb_2_pix[L + crb_g] |
  12.600 +                                       rgb_2_pix[L + cb_b]);
  12.601 +
  12.602 +            L = *lum;
  12.603 +            lum += 2;
  12.604 +            *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
  12.605 +                                       rgb_2_pix[L + crb_g] |
  12.606 +                                       rgb_2_pix[L + cb_b]);
  12.607 +
  12.608 +        }
  12.609 +
  12.610 +        row += mod;
  12.611 +    }
  12.612 +}
  12.613 +
  12.614 +static void
  12.615 +Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
  12.616 +                       unsigned char *lum, unsigned char *cr,
  12.617 +                       unsigned char *cb, unsigned char *out,
  12.618 +                       int rows, int cols, int mod)
  12.619 +{
  12.620 +    unsigned int value;
  12.621 +    unsigned char *row;
  12.622 +    int x, y;
  12.623 +    int cr_r;
  12.624 +    int crb_g;
  12.625 +    int cb_b;
  12.626 +    int cols_2 = cols / 2;
  12.627 +
  12.628 +    row = (unsigned char *) out;
  12.629 +    mod *= 3;
  12.630 +    y = rows;
  12.631 +    while (y--) {
  12.632 +        x = cols_2;
  12.633 +        while (x--) {
  12.634 +            register int L;
  12.635 +
  12.636 +            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  12.637 +            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  12.638 +                + colortab[*cb + 2 * 256];
  12.639 +            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  12.640 +            cr += 4;
  12.641 +            cb += 4;
  12.642 +
  12.643 +            L = *lum;
  12.644 +            lum += 2;
  12.645 +            value = (rgb_2_pix[L + cr_r] |
  12.646 +                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.647 +            *row++ = (value) & 0xFF;
  12.648 +            *row++ = (value >> 8) & 0xFF;
  12.649 +            *row++ = (value >> 16) & 0xFF;
  12.650 +
  12.651 +            L = *lum;
  12.652 +            lum += 2;
  12.653 +            value = (rgb_2_pix[L + cr_r] |
  12.654 +                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.655 +            *row++ = (value) & 0xFF;
  12.656 +            *row++ = (value >> 8) & 0xFF;
  12.657 +            *row++ = (value >> 16) & 0xFF;
  12.658 +
  12.659 +        }
  12.660 +        row += mod;
  12.661 +    }
  12.662 +}
  12.663 +
  12.664 +static void
  12.665 +Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
  12.666 +                       unsigned char *lum, unsigned char *cr,
  12.667 +                       unsigned char *cb, unsigned char *out,
  12.668 +                       int rows, int cols, int mod)
  12.669 +{
  12.670 +    unsigned int *row;
  12.671 +    int x, y;
  12.672 +    int cr_r;
  12.673 +    int crb_g;
  12.674 +    int cb_b;
  12.675 +    int cols_2 = cols / 2;
  12.676 +
  12.677 +    row = (unsigned int *) out;
  12.678 +    y = rows;
  12.679 +    while (y--) {
  12.680 +        x = cols_2;
  12.681 +        while (x--) {
  12.682 +            register int L;
  12.683 +
  12.684 +            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  12.685 +            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  12.686 +                + colortab[*cb + 2 * 256];
  12.687 +            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  12.688 +            cr += 4;
  12.689 +            cb += 4;
  12.690 +
  12.691 +            L = *lum;
  12.692 +            lum += 2;
  12.693 +            *row++ = (rgb_2_pix[L + cr_r] |
  12.694 +                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.695 +
  12.696 +            L = *lum;
  12.697 +            lum += 2;
  12.698 +            *row++ = (rgb_2_pix[L + cr_r] |
  12.699 +                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.700 +
  12.701 +
  12.702 +        }
  12.703 +        row += mod;
  12.704 +    }
  12.705 +}
  12.706 +
  12.707 +/*
  12.708 + * In this function I make use of a nasty trick. The tables have the lower
  12.709 + * 16 bits replicated in the upper 16. This means I can write ints and get
  12.710 + * the horisontal doubling for free (almost).
  12.711 + */
  12.712 +static void
  12.713 +Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
  12.714 +                       unsigned char *lum, unsigned char *cr,
  12.715 +                       unsigned char *cb, unsigned char *out,
  12.716 +                       int rows, int cols, int mod)
  12.717 +{
  12.718 +    unsigned int *row = (unsigned int *) out;
  12.719 +    const int next_row = cols + (mod / 2);
  12.720 +    int x, y;
  12.721 +    int cr_r;
  12.722 +    int crb_g;
  12.723 +    int cb_b;
  12.724 +    int cols_2 = cols / 2;
  12.725 +
  12.726 +    y = rows;
  12.727 +    while (y--) {
  12.728 +        x = cols_2;
  12.729 +        while (x--) {
  12.730 +            register int L;
  12.731 +
  12.732 +            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  12.733 +            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  12.734 +                + colortab[*cb + 2 * 256];
  12.735 +            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  12.736 +            cr += 4;
  12.737 +            cb += 4;
  12.738 +
  12.739 +            L = *lum;
  12.740 +            lum += 2;
  12.741 +            row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
  12.742 +                                      rgb_2_pix[L + crb_g] |
  12.743 +                                      rgb_2_pix[L + cb_b]);
  12.744 +            row++;
  12.745 +
  12.746 +            L = *lum;
  12.747 +            lum += 2;
  12.748 +            row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
  12.749 +                                      rgb_2_pix[L + crb_g] |
  12.750 +                                      rgb_2_pix[L + cb_b]);
  12.751 +            row++;
  12.752 +
  12.753 +        }
  12.754 +        row += next_row;
  12.755 +    }
  12.756 +}
  12.757 +
  12.758 +static void
  12.759 +Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
  12.760 +                       unsigned char *lum, unsigned char *cr,
  12.761 +                       unsigned char *cb, unsigned char *out,
  12.762 +                       int rows, int cols, int mod)
  12.763 +{
  12.764 +    unsigned int value;
  12.765 +    unsigned char *row = out;
  12.766 +    const int next_row = (cols * 2 + mod) * 3;
  12.767 +    int x, y;
  12.768 +    int cr_r;
  12.769 +    int crb_g;
  12.770 +    int cb_b;
  12.771 +    int cols_2 = cols / 2;
  12.772 +    y = rows;
  12.773 +    while (y--) {
  12.774 +        x = cols_2;
  12.775 +        while (x--) {
  12.776 +            register int L;
  12.777 +
  12.778 +            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  12.779 +            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  12.780 +                + colortab[*cb + 2 * 256];
  12.781 +            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  12.782 +            cr += 4;
  12.783 +            cb += 4;
  12.784 +
  12.785 +            L = *lum;
  12.786 +            lum += 2;
  12.787 +            value = (rgb_2_pix[L + cr_r] |
  12.788 +                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.789 +            row[0 + 0] = row[3 + 0] = row[next_row + 0] =
  12.790 +                row[next_row + 3 + 0] = (value) & 0xFF;
  12.791 +            row[0 + 1] = row[3 + 1] = row[next_row + 1] =
  12.792 +                row[next_row + 3 + 1] = (value >> 8) & 0xFF;
  12.793 +            row[0 + 2] = row[3 + 2] = row[next_row + 2] =
  12.794 +                row[next_row + 3 + 2] = (value >> 16) & 0xFF;
  12.795 +            row += 2 * 3;
  12.796 +
  12.797 +            L = *lum;
  12.798 +            lum += 2;
  12.799 +            value = (rgb_2_pix[L + cr_r] |
  12.800 +                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.801 +            row[0 + 0] = row[3 + 0] = row[next_row + 0] =
  12.802 +                row[next_row + 3 + 0] = (value) & 0xFF;
  12.803 +            row[0 + 1] = row[3 + 1] = row[next_row + 1] =
  12.804 +                row[next_row + 3 + 1] = (value >> 8) & 0xFF;
  12.805 +            row[0 + 2] = row[3 + 2] = row[next_row + 2] =
  12.806 +                row[next_row + 3 + 2] = (value >> 16) & 0xFF;
  12.807 +            row += 2 * 3;
  12.808 +
  12.809 +        }
  12.810 +        row += next_row;
  12.811 +    }
  12.812 +}
  12.813 +
  12.814 +static void
  12.815 +Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
  12.816 +                       unsigned char *lum, unsigned char *cr,
  12.817 +                       unsigned char *cb, unsigned char *out,
  12.818 +                       int rows, int cols, int mod)
  12.819 +{
  12.820 +    unsigned int *row = (unsigned int *) out;
  12.821 +    const int next_row = cols * 2 + mod;
  12.822 +    int x, y;
  12.823 +    int cr_r;
  12.824 +    int crb_g;
  12.825 +    int cb_b;
  12.826 +    int cols_2 = cols / 2;
  12.827 +    mod += mod;
  12.828 +    y = rows;
  12.829 +    while (y--) {
  12.830 +        x = cols_2;
  12.831 +        while (x--) {
  12.832 +            register int L;
  12.833 +
  12.834 +            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  12.835 +            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  12.836 +                + colortab[*cb + 2 * 256];
  12.837 +            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  12.838 +            cr += 4;
  12.839 +            cb += 4;
  12.840 +
  12.841 +            L = *lum;
  12.842 +            lum += 2;
  12.843 +            row[0] = row[1] = row[next_row] = row[next_row + 1] =
  12.844 +                (rgb_2_pix[L + cr_r] |
  12.845 +                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.846 +            row += 2;
  12.847 +
  12.848 +            L = *lum;
  12.849 +            lum += 2;
  12.850 +            row[0] = row[1] = row[next_row] = row[next_row + 1] =
  12.851 +                (rgb_2_pix[L + cr_r] |
  12.852 +                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  12.853 +            row += 2;
  12.854 +
  12.855 +
  12.856 +        }
  12.857 +
  12.858 +        row += next_row;
  12.859 +    }
  12.860 +}
  12.861 +
  12.862 +/*
  12.863 + * How many 1 bits are there in the Uint32.
  12.864 + * Low performance, do not call often.
  12.865 + */
  12.866 +static int
  12.867 +number_of_bits_set(Uint32 a)
  12.868 +{
  12.869 +    if (!a)
  12.870 +        return 0;
  12.871 +    if (a & 1)
  12.872 +        return 1 + number_of_bits_set(a >> 1);
  12.873 +    return (number_of_bits_set(a >> 1));
  12.874 +}
  12.875 +
  12.876 +/*
  12.877 + * How many 0 bits are there at least significant end of Uint32.
  12.878 + * Low performance, do not call often.
  12.879 + */
  12.880 +static int
  12.881 +free_bits_at_bottom(Uint32 a)
  12.882 +{
  12.883 +    /* assume char is 8 bits */
  12.884 +    if (!a)
  12.885 +        return sizeof(Uint32) * 8;
  12.886 +    if (((Sint32) a) & 1l)
  12.887 +        return 0;
  12.888 +    return 1 + free_bits_at_bottom(a >> 1);
  12.889 +}
  12.890 +
  12.891 +static int
  12.892 +SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
  12.893 +{
  12.894 +    Uint32 *r_2_pix_alloc;
  12.895 +    Uint32 *g_2_pix_alloc;
  12.896 +    Uint32 *b_2_pix_alloc;
  12.897 +    int i;
  12.898 +    int bpp;
  12.899 +    Uint32 Rmask, Gmask, Bmask, Amask;
  12.900 +
  12.901 +    if (!SDL_PixelFormatEnumToMasks
  12.902 +        (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
  12.903 +        SDL_SetError("Unsupported YUV destination format");
  12.904 +        return -1;
  12.905 +    }
  12.906 +
  12.907 +    swdata->target_format = target_format;
  12.908 +    r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
  12.909 +    g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
  12.910 +    b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
  12.911 +
  12.912 +    /* 
  12.913 +     * Set up entries 0-255 in rgb-to-pixel value tables.
  12.914 +     */
  12.915 +    for (i = 0; i < 256; ++i) {
  12.916 +        r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
  12.917 +        r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
  12.918 +        r_2_pix_alloc[i + 256] |= Amask;
  12.919 +        g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
  12.920 +        g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
  12.921 +        g_2_pix_alloc[i + 256] |= Amask;
  12.922 +        b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
  12.923 +        b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
  12.924 +        b_2_pix_alloc[i + 256] |= Amask;
  12.925 +    }
  12.926 +
  12.927 +    /*
  12.928 +     * If we have 16-bit output depth, then we double the value
  12.929 +     * in the top word. This means that we can write out both
  12.930 +     * pixels in the pixel doubling mode with one op. It is 
  12.931 +     * harmless in the normal case as storing a 32-bit value
  12.932 +     * through a short pointer will lose the top bits anyway.
  12.933 +     */
  12.934 +    if (SDL_BYTESPERPIXEL(target_format) == 2) {
  12.935 +        for (i = 0; i < 256; ++i) {
  12.936 +            r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
  12.937 +            g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
  12.938 +            b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
  12.939 +        }
  12.940 +    }
  12.941 +
  12.942 +    /*
  12.943 +     * Spread out the values we have to the rest of the array so that
  12.944 +     * we do not need to check for overflow.
  12.945 +     */
  12.946 +    for (i = 0; i < 256; ++i) {
  12.947 +        r_2_pix_alloc[i] = r_2_pix_alloc[256];
  12.948 +        r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
  12.949 +        g_2_pix_alloc[i] = g_2_pix_alloc[256];
  12.950 +        g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
  12.951 +        b_2_pix_alloc[i] = b_2_pix_alloc[256];
  12.952 +        b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
  12.953 +    }
  12.954 +
  12.955 +    /* You have chosen wisely... */
  12.956 +    switch (swdata->format) {
  12.957 +    case SDL_PIXELFORMAT_YV12:
  12.958 +    case SDL_PIXELFORMAT_IYUV:
  12.959 +        if (SDL_BYTESPERPIXEL(target_format) == 2) {
  12.960 +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
  12.961 +            /* inline assembly functions */
  12.962 +            if (SDL_HasMMX() && (Rmask == 0xF800) &&
  12.963 +                (Gmask == 0x07E0) && (Bmask == 0x001F)
  12.964 +                && (swdata->w & 15) == 0) {
  12.965 +/*printf("Using MMX 16-bit 565 dither\n");*/
  12.966 +                swdata->Display1X = Color565DitherYV12MMX1X;
  12.967 +            } else {
  12.968 +/*printf("Using C 16-bit dither\n");*/
  12.969 +                swdata->Display1X = Color16DitherYV12Mod1X;
  12.970 +            }
  12.971 +#else
  12.972 +            swdata->Display1X = Color16DitherYV12Mod1X;
  12.973 +#endif
  12.974 +            swdata->Display2X = Color16DitherYV12Mod2X;
  12.975 +        }
  12.976 +        if (SDL_BYTESPERPIXEL(target_format) == 3) {
  12.977 +            swdata->Display1X = Color24DitherYV12Mod1X;
  12.978 +            swdata->Display2X = Color24DitherYV12Mod2X;
  12.979 +        }
  12.980 +        if (SDL_BYTESPERPIXEL(target_format) == 4) {
  12.981 +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
  12.982 +            /* inline assembly functions */
  12.983 +            if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
  12.984 +                (Gmask == 0x0000FF00) &&
  12.985 +                (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
  12.986 +/*printf("Using MMX 32-bit dither\n");*/
  12.987 +                swdata->Display1X = ColorRGBDitherYV12MMX1X;
  12.988 +            } else {
  12.989 +/*printf("Using C 32-bit dither\n");*/
  12.990 +                swdata->Display1X = Color32DitherYV12Mod1X;
  12.991 +            }
  12.992 +#else
  12.993 +            swdata->Display1X = Color32DitherYV12Mod1X;
  12.994 +#endif
  12.995 +            swdata->Display2X = Color32DitherYV12Mod2X;
  12.996 +        }
  12.997 +        break;
  12.998 +    case SDL_PIXELFORMAT_YUY2:
  12.999 +    case SDL_PIXELFORMAT_UYVY:
 12.1000 +    case SDL_PIXELFORMAT_YVYU:
 12.1001 +        if (SDL_BYTESPERPIXEL(target_format) == 2) {
 12.1002 +            swdata->Display1X = Color16DitherYUY2Mod1X;
 12.1003 +            swdata->Display2X = Color16DitherYUY2Mod2X;
 12.1004 +        }
 12.1005 +        if (SDL_BYTESPERPIXEL(target_format) == 3) {
 12.1006 +            swdata->Display1X = Color24DitherYUY2Mod1X;
 12.1007 +            swdata->Display2X = Color24DitherYUY2Mod2X;
 12.1008 +        }
 12.1009 +        if (SDL_BYTESPERPIXEL(target_format) == 4) {
 12.1010 +            swdata->Display1X = Color32DitherYUY2Mod1X;
 12.1011 +            swdata->Display2X = Color32DitherYUY2Mod2X;
 12.1012 +        }
 12.1013 +        break;
 12.1014 +    default:
 12.1015 +        /* We should never get here (caught above) */
 12.1016 +        break;
 12.1017 +    }
 12.1018 +
 12.1019 +    if (swdata->display) {
 12.1020 +        SDL_FreeSurface(swdata->display);
 12.1021 +        swdata->display = NULL;
 12.1022 +    }
 12.1023 +    return 0;
 12.1024 +}
 12.1025 +
 12.1026 +SDL_SW_YUVTexture *
 12.1027 +SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
 12.1028 +{
 12.1029 +    SDL_SW_YUVTexture *swdata;
 12.1030 +    int *Cr_r_tab;
 12.1031 +    int *Cr_g_tab;
 12.1032 +    int *Cb_g_tab;
 12.1033 +    int *Cb_b_tab;
 12.1034 +    int i;
 12.1035 +    int CR, CB;
 12.1036 +
 12.1037 +    swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
 12.1038 +    if (!swdata) {
 12.1039 +        SDL_OutOfMemory();
 12.1040 +        return NULL;
 12.1041 +    }
 12.1042 +
 12.1043 +    switch (format) {
 12.1044 +    case SDL_PIXELFORMAT_YV12:
 12.1045 +    case SDL_PIXELFORMAT_IYUV:
 12.1046 +    case SDL_PIXELFORMAT_YUY2:
 12.1047 +    case SDL_PIXELFORMAT_UYVY:
 12.1048 +    case SDL_PIXELFORMAT_YVYU:
 12.1049 +        break;
 12.1050 +    default:
 12.1051 +        SDL_SetError("Unsupported YUV format");
 12.1052 +        return NULL;
 12.1053 +    }
 12.1054 +
 12.1055 +    swdata->format = format;
 12.1056 +    swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
 12.1057 +    swdata->w = w;
 12.1058 +    swdata->h = h;
 12.1059 +    swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
 12.1060 +    swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
 12.1061 +    swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
 12.1062 +    if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
 12.1063 +        SDL_OutOfMemory();
 12.1064 +        SDL_SW_DestroyYUVTexture(swdata);
 12.1065 +        return NULL;
 12.1066 +    }
 12.1067 +
 12.1068 +    /* Generate the tables for the display surface */
 12.1069 +    Cr_r_tab = &swdata->colortab[0 * 256];
 12.1070 +    Cr_g_tab = &swdata->colortab[1 * 256];
 12.1071 +    Cb_g_tab = &swdata->colortab[2 * 256];
 12.1072 +    Cb_b_tab = &swdata->colortab[3 * 256];
 12.1073 +    for (i = 0; i < 256; i++) {
 12.1074 +        /* Gamma correction (luminescence table) and chroma correction
 12.1075 +           would be done here.  See the Berkeley mpeg_play sources.
 12.1076 +         */
 12.1077 +        CB = CR = (i - 128);
 12.1078 +        Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
 12.1079 +        Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
 12.1080 +        Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
 12.1081 +        Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
 12.1082 +    }
 12.1083 +
 12.1084 +    /* Find the pitch and offset values for the overlay */
 12.1085 +    switch (format) {
 12.1086 +    case SDL_PIXELFORMAT_YV12:
 12.1087 +    case SDL_PIXELFORMAT_IYUV:
 12.1088 +        swdata->pitches[0] = w;
 12.1089 +        swdata->pitches[1] = swdata->pitches[0] / 2;
 12.1090 +        swdata->pitches[2] = swdata->pitches[0] / 2;
 12.1091 +        swdata->planes[0] = swdata->pixels;
 12.1092 +        swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
 12.1093 +        swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
 12.1094 +        break;
 12.1095 +    case SDL_PIXELFORMAT_YUY2:
 12.1096 +    case SDL_PIXELFORMAT_UYVY:
 12.1097 +    case SDL_PIXELFORMAT_YVYU:
 12.1098 +        swdata->pitches[0] = w * 2;
 12.1099 +        swdata->planes[0] = swdata->pixels;
 12.1100 +        break;
 12.1101 +    default:
 12.1102 +        /* We should never get here (caught above) */
 12.1103 +        break;
 12.1104 +    }
 12.1105 +
 12.1106 +    /* We're all done.. */
 12.1107 +    return (swdata);
 12.1108 +}
 12.1109 +
 12.1110 +int
 12.1111 +SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
 12.1112 +                             int *pitch)
 12.1113 +{
 12.1114 +    *pixels = swdata->planes[0];
 12.1115 +    *pitch = swdata->pitches[0];
 12.1116 +    return 0;
 12.1117 +}
 12.1118 +
 12.1119 +int
 12.1120 +SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
 12.1121 +                        const void *pixels, int pitch)
 12.1122 +{
 12.1123 +    switch (swdata->format) {
 12.1124 +    case SDL_PIXELFORMAT_YV12:
 12.1125 +    case SDL_PIXELFORMAT_IYUV:
 12.1126 +        if (rect
 12.1127 +            && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
 12.1128 +                || rect->h != swdata->h)) {
 12.1129 +            SDL_SetError
 12.1130 +                ("YV12 and IYUV textures only support full surface updates");
 12.1131 +            return -1;
 12.1132 +        }
 12.1133 +        SDL_memcpy(swdata->pixels, pixels, swdata->h * swdata->w * 2);
 12.1134 +        break;
 12.1135 +    case SDL_PIXELFORMAT_YUY2:
 12.1136 +    case SDL_PIXELFORMAT_UYVY:
 12.1137 +    case SDL_PIXELFORMAT_YVYU:
 12.1138 +        {
 12.1139 +            Uint8 *src, *dst;
 12.1140 +            int row;
 12.1141 +            size_t length;
 12.1142 +
 12.1143 +            src = (Uint8 *) pixels;
 12.1144 +            dst =
 12.1145 +                swdata->planes[0] + rect->y * swdata->pitches[0] +
 12.1146 +                rect->x * 2;
 12.1147 +            length = rect->w * 2;
 12.1148 +            for (row = 0; row < rect->h; ++row) {
 12.1149 +                SDL_memcpy(dst, src, length);
 12.1150 +                src += pitch;
 12.1151 +                dst += swdata->pitches[0];
 12.1152 +            }
 12.1153 +        }
 12.1154 +        break;
 12.1155 +    }
 12.1156 +    return 0;
 12.1157 +}
 12.1158 +
 12.1159 +int
 12.1160 +SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
 12.1161 +                      void **pixels, int *pitch)
 12.1162 +{
 12.1163 +    switch (swdata->format) {
 12.1164 +    case SDL_PIXELFORMAT_YV12:
 12.1165 +    case SDL_PIXELFORMAT_IYUV:
 12.1166 +        if (rect
 12.1167 +            && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
 12.1168 +                || rect->h != swdata->h)) {
 12.1169 +            SDL_SetError
 12.1170 +                ("YV12 and IYUV textures only support full surface locks");
 12.1171 +            return -1;
 12.1172 +        }
 12.1173 +        break;
 12.1174 +    }
 12.1175 +
 12.1176 +    *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
 12.1177 +    *pitch = swdata->pitches[0];
 12.1178 +    return 0;
 12.1179 +}
 12.1180 +
 12.1181 +void
 12.1182 +SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
 12.1183 +{
 12.1184 +}
 12.1185 +
 12.1186 +int
 12.1187 +SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
 12.1188 +                    Uint32 target_format, int w, int h, void *pixels,
 12.1189 +                    int pitch)
 12.1190 +{
 12.1191 +    int stretch;
 12.1192 +    int scale_2x;
 12.1193 +    Uint8 *lum, *Cr, *Cb;
 12.1194 +    int mod;
 12.1195 +
 12.1196 +    /* Make sure we're set up to display in the desired format */
 12.1197 +    if (target_format != swdata->target_format) {
 12.1198 +        if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
 12.1199 +            return -1;
 12.1200 +        }
 12.1201 +    }
 12.1202 +
 12.1203 +    stretch = 0;
 12.1204 +    scale_2x = 0;
 12.1205 +    if (srcrect->x || srcrect->y || srcrect->w < swdata->w
 12.1206 +        || srcrect->h < swdata->h) {
 12.1207 +        /* The source rectangle has been clipped.
 12.1208 +           Using a scratch surface is easier than adding clipped
 12.1209 +           source support to all the blitters, plus that would
 12.1210 +           slow them down in the general unclipped case.
 12.1211 +         */
 12.1212 +        stretch = 1;
 12.1213 +    } else if ((srcrect->w != w) || (srcrect->h != h)) {
 12.1214 +        if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
 12.1215 +            scale_2x = 1;
 12.1216 +        } else {
 12.1217 +            stretch = 1;
 12.1218 +        }
 12.1219 +    }
 12.1220 +    if (stretch) {
 12.1221 +        int bpp;
 12.1222 +        Uint32 Rmask, Gmask, Bmask, Amask;
 12.1223 +
 12.1224 +        if (swdata->display) {
 12.1225 +            swdata->display->w = w;
 12.1226 +            swdata->display->h = h;
 12.1227 +            swdata->display->pixels = pixels;
 12.1228 +            swdata->display->pitch = pitch;
 12.1229 +        } else {
 12.1230 +            /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
 12.1231 +            SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
 12.1232 +                                       &Bmask, &Amask);
 12.1233 +            swdata->display =
 12.1234 +                SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
 12.1235 +                                         Gmask, Bmask, Amask);
 12.1236 +            if (!swdata->display) {
 12.1237 +                return (-1);
 12.1238 +            }
 12.1239 +        }
 12.1240 +        if (!swdata->stretch) {
 12.1241 +            /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
 12.1242 +            SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
 12.1243 +                                       &Bmask, &Amask);
 12.1244 +            swdata->stretch =
 12.1245 +                SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
 12.1246 +                                     Gmask, Bmask, Amask);
 12.1247 +            if (!swdata->stretch) {
 12.1248 +                return (-1);
 12.1249 +            }
 12.1250 +        }
 12.1251 +        pixels = swdata->stretch->pixels;
 12.1252 +        pitch = swdata->stretch->pitch;
 12.1253 +    }
 12.1254 +    switch (swdata->format) {
 12.1255 +    case SDL_PIXELFORMAT_YV12:
 12.1256 +        lum = swdata->planes[0];
 12.1257 +        Cr = swdata->planes[1];
 12.1258 +        Cb = swdata->planes[2];
 12.1259 +        break;
 12.1260 +    case SDL_PIXELFORMAT_IYUV:
 12.1261 +        lum = swdata->planes[0];
 12.1262 +        Cr = swdata->planes[2];
 12.1263 +        Cb = swdata->planes[1];
 12.1264 +        break;
 12.1265 +    case SDL_PIXELFORMAT_YUY2:
 12.1266 +        lum = swdata->planes[0];
 12.1267 +        Cr = lum + 3;
 12.1268 +        Cb = lum + 1;
 12.1269 +        break;
 12.1270 +    case SDL_PIXELFORMAT_UYVY:
 12.1271 +        lum = swdata->planes[0] + 1;
 12.1272 +        Cr = lum + 1;
 12.1273 +        Cb = lum - 1;
 12.1274 +        break;
 12.1275 +    case SDL_PIXELFORMAT_YVYU:
 12.1276 +        lum = swdata->planes[0];
 12.1277 +        Cr = lum + 1;
 12.1278 +        Cb = lum + 3;
 12.1279 +        break;
 12.1280 +    default:
 12.1281 +        SDL_SetError("Unsupported YUV format in copy");
 12.1282 +        return (-1);
 12.1283 +    }
 12.1284 +    mod = (pitch / SDL_BYTESPERPIXEL(target_format));
 12.1285 +
 12.1286 +    if (scale_2x) {
 12.1287 +        mod -= (swdata->w * 2);
 12.1288 +        swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
 12.1289 +                          lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
 12.1290 +    } else {
 12.1291 +        mod -= swdata->w;
 12.1292 +        swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
 12.1293 +                          lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
 12.1294 +    }
 12.1295 +    if (stretch) {
 12.1296 +        SDL_Rect rect = *srcrect;
 12.1297 +        SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
 12.1298 +    }
 12.1299 +    return 0;
 12.1300 +}
 12.1301 +
 12.1302 +void
 12.1303 +SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
 12.1304 +{
 12.1305 +    if (swdata) {
 12.1306 +        if (swdata->pixels) {
 12.1307 +            SDL_free(swdata->pixels);
 12.1308 +        }
 12.1309 +        if (swdata->colortab) {
 12.1310 +            SDL_free(swdata->colortab);
 12.1311 +        }
 12.1312 +        if (swdata->rgb_2_pix) {
 12.1313 +            SDL_free(swdata->rgb_2_pix);
 12.1314 +        }
 12.1315 +        if (swdata->stretch) {
 12.1316 +            SDL_FreeSurface(swdata->stretch);
 12.1317 +        }
 12.1318 +        if (swdata->display) {
 12.1319 +            SDL_FreeSurface(swdata->display);
 12.1320 +        }
 12.1321 +        SDL_free(swdata);
 12.1322 +    }
 12.1323 +}
 12.1324 +
 12.1325 +/* vi: set ts=4 sw=4 expandtab: */
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/src/render/SDL_yuv_sw_c.h	Thu Feb 03 00:19:40 2011 -0800
    13.3 @@ -0,0 +1,69 @@
    13.4 +/*
    13.5 +    SDL - Simple DirectMedia Layer
    13.6 +    Copyright (C) 1997-2010 Sam Lantinga
    13.7 +
    13.8 +    This library is free software; you can redistribute it and/or
    13.9 +    modify it under the terms of the GNU Lesser General Public
   13.10 +    License as published by the Free Software Foundation; either
   13.11 +    version 2.1 of the License, or (at your option) any later version.
   13.12 +
   13.13 +    This library is distributed in the hope that it will be useful,
   13.14 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
   13.15 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13.16 +    Lesser General Public License for more details.
   13.17 +
   13.18 +    You should have received a copy of the GNU Lesser General Public
   13.19 +    License along with this library; if not, write to the Free Software
   13.20 +    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   13.21 +
   13.22 +    Sam Lantinga
   13.23 +    slouken@libsdl.org
   13.24 +*/
   13.25 +#include "SDL_config.h"
   13.26 +
   13.27 +#include "SDL_video.h"
   13.28 +
   13.29 +/* This is the software implementation of the YUV texture support */
   13.30 +
   13.31 +struct SDL_SW_YUVTexture
   13.32 +{
   13.33 +    Uint32 format;
   13.34 +    Uint32 target_format;
   13.35 +    int w, h;
   13.36 +    Uint8 *pixels;
   13.37 +    int *colortab;
   13.38 +    Uint32 *rgb_2_pix;
   13.39 +    void (*Display1X) (int *colortab, Uint32 * rgb_2_pix,
   13.40 +                       unsigned char *lum, unsigned char *cr,
   13.41 +                       unsigned char *cb, unsigned char *out,
   13.42 +                       int rows, int cols, int mod);
   13.43 +    void (*Display2X) (int *colortab, Uint32 * rgb_2_pix,
   13.44 +                       unsigned char *lum, unsigned char *cr,
   13.45 +                       unsigned char *cb, unsigned char *out,
   13.46 +                       int rows, int cols, int mod);
   13.47 +
   13.48 +    /* These are just so we don't have to allocate them separately */
   13.49 +    Uint16 pitches[3];
   13.50 +    Uint8 *planes[3];
   13.51 +
   13.52 +    /* This is a temporary surface in case we have to stretch copy */
   13.53 +    SDL_Surface *stretch;
   13.54 +    SDL_Surface *display;
   13.55 +};
   13.56 +
   13.57 +typedef struct SDL_SW_YUVTexture SDL_SW_YUVTexture;
   13.58 +
   13.59 +SDL_SW_YUVTexture *SDL_SW_CreateYUVTexture(Uint32 format, int w, int h);
   13.60 +int SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
   13.61 +                                 int *pitch);
   13.62 +int SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
   13.63 +                            const void *pixels, int pitch);
   13.64 +int SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
   13.65 +                          void **pixels, int *pitch);
   13.66 +void SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata);
   13.67 +int SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
   13.68 +                        Uint32 target_format, int w, int h, void *pixels,
   13.69 +                        int pitch);
   13.70 +void SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata);
   13.71 +
   13.72 +/* vi: set ts=4 sw=4 expandtab: */
    14.1 --- a/src/render/direct3d/SDL_d3drender.c	Wed Feb 02 22:55:12 2011 -0800
    14.2 +++ b/src/render/direct3d/SDL_d3drender.c	Thu Feb 03 00:19:40 2011 -0800
    14.3 @@ -28,7 +28,6 @@
    14.4  #include "SDL_loadso.h"
    14.5  #include "SDL_syswm.h"
    14.6  #include "../SDL_sysrender.h"
    14.7 -#include "../../video/SDL_yuv_sw_c.h"
    14.8  
    14.9  #if SDL_VIDEO_RENDER_D3D
   14.10  #define D3D_DEBUG_INFO
   14.11 @@ -89,7 +88,8 @@
   14.12  
   14.13  /* Direct3D renderer implementation */
   14.14  
   14.15 -#if 1                           /* This takes more memory but you won't lose your texture data */
   14.16 +#if 1
   14.17 +/* This takes more memory but you won't lose your texture data */
   14.18  #define D3DPOOL_SDL    D3DPOOL_MANAGED
   14.19  #define SDL_MEMORY_POOL_MANAGED
   14.20  #else
   14.21 @@ -99,18 +99,12 @@
   14.22  
   14.23  static SDL_Renderer *D3D_CreateRenderer(SDL_Window * window, Uint32 flags);
   14.24  static int D3D_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
   14.25 -static int D3D_QueryTexturePixels(SDL_Renderer * renderer,
   14.26 -                                  SDL_Texture * texture, void **pixels,
   14.27 -                                  int *pitch);
   14.28  static int D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
   14.29                               const SDL_Rect * rect, const void *pixels,
   14.30                               int pitch);
   14.31  static int D3D_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
   14.32 -                           const SDL_Rect * rect, int markDirty,
   14.33 -                           void **pixels, int *pitch);
   14.34 +                           const SDL_Rect * rect, void **pixels, int *pitch);
   14.35  static void D3D_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
   14.36 -static void D3D_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
   14.37 -                             int numrects, const SDL_Rect * rects);
   14.38  static int D3D_RenderDrawPoints(SDL_Renderer * renderer,
   14.39                                  const SDL_Point * points, int count);
   14.40  static int D3D_RenderDrawLines(SDL_Renderer * renderer,
   14.41 @@ -134,8 +128,8 @@
   14.42      {
   14.43       "d3d",
   14.44       (SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED),
   14.45 -     0,
   14.46 -     {0},
   14.47 +     1,
   14.48 +     {SDL_PIXELFORMAT_ARGB8888},
   14.49       0,
   14.50       0}
   14.51  };
   14.52 @@ -152,7 +146,6 @@
   14.53  
   14.54  typedef struct
   14.55  {
   14.56 -    SDL_SW_YUVTexture *yuv;
   14.57      Uint32 format;
   14.58      IDirect3DTexture9 *texture;
   14.59  } D3D_TextureData;
   14.60 @@ -248,113 +241,30 @@
   14.61  PixelFormatToD3DFMT(Uint32 format)
   14.62  {
   14.63      switch (format) {
   14.64 -    case SDL_PIXELFORMAT_INDEX8:
   14.65 -        return D3DFMT_P8;
   14.66 -    case SDL_PIXELFORMAT_RGB332:
   14.67 -        return D3DFMT_R3G3B2;
   14.68 -    case SDL_PIXELFORMAT_RGB444:
   14.69 -        return D3DFMT_X4R4G4B4;
   14.70 -    case SDL_PIXELFORMAT_RGB555:
   14.71 -        return D3DFMT_X1R5G5B5;
   14.72 -    case SDL_PIXELFORMAT_ARGB4444:
   14.73 -        return D3DFMT_A4R4G4B4;
   14.74 -    case SDL_PIXELFORMAT_ARGB1555:
   14.75 -        return D3DFMT_A1R5G5B5;
   14.76      case SDL_PIXELFORMAT_RGB565:
   14.77          return D3DFMT_R5G6B5;
   14.78      case SDL_PIXELFORMAT_RGB888:
   14.79          return D3DFMT_X8R8G8B8;
   14.80      case SDL_PIXELFORMAT_ARGB8888:
   14.81          return D3DFMT_A8R8G8B8;
   14.82 -    case SDL_PIXELFORMAT_ARGB2101010:
   14.83 -        return D3DFMT_A2R10G10B10;
   14.84 -    case SDL_PIXELFORMAT_YV12:
   14.85 -        return MAKEFOURCC('Y','V','1','2');
   14.86 -    case SDL_PIXELFORMAT_IYUV:
   14.87 -        return MAKEFOURCC('I','4','2','0');
   14.88 -    case SDL_PIXELFORMAT_UYVY:
   14.89 -        return D3DFMT_UYVY;
   14.90 -    case SDL_PIXELFORMAT_YUY2:
   14.91 -        return D3DFMT_YUY2;
   14.92      default:
   14.93          return D3DFMT_UNKNOWN;
   14.94      }
   14.95  }
   14.96  
   14.97 -static SDL_bool
   14.98 -D3D_IsTextureFormatAvailable(IDirect3D9 * d3d, UINT adapter,
   14.99 -                             D3DFORMAT display_format,
  14.100 -                             D3DFORMAT texture_format)
  14.101 +static Uint32
  14.102 +D3DFMTToPixelFormat(D3DFORMAT format)
  14.103  {
  14.104 -    HRESULT result;
  14.105 -
  14.106 -    result = IDirect3D9_CheckDeviceFormat(d3d, adapter,
  14.107 -                                          D3DDEVTYPE_HAL,
  14.108 -                                          display_format,
  14.109 -                                          0,
  14.110 -                                          D3DRTYPE_TEXTURE,
  14.111 -                                          texture_format);
  14.112 -    return FAILED(result) ? SDL_FALSE : SDL_TRUE;
  14.113 -}
  14.114 -
  14.115 -static void
  14.116 -UpdateYUVTextureData(SDL_Texture * texture)
  14.117 -{
  14.118 -    D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
  14.119 -    SDL_Rect rect;
  14.120 -    RECT d3drect;
  14.121 -    D3DLOCKED_RECT locked;
  14.122 -    HRESULT result;
  14.123 -
  14.124 -    d3drect.left = 0;
  14.125 -    d3drect.right = texture->w;
  14.126 -    d3drect.top = 0;
  14.127 -    d3drect.bottom = texture->h;
  14.128 -
  14.129 -    result =
  14.130 -        IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0);
  14.131 -    if (FAILED(result)) {
  14.132 -        return;
  14.133 +    switch (format) {
  14.134 +    case D3DFMT_R5G6B5:
  14.135 +        return SDL_PIXELFORMAT_RGB565;
  14.136 +    case D3DFMT_X8R8G8B8:
  14.137 +        return SDL_PIXELFORMAT_RGB888;
  14.138 +    case D3DFMT_A8R8G8B8:
  14.139 +        return SDL_PIXELFORMAT_ARGB8888;
  14.140 +    default:
  14.141 +        return SDL_PIXELFORMAT_UNKNOWN;
  14.142      }
  14.143 -
  14.144 -    rect.x = 0;
  14.145 -    rect.y = 0;
  14.146 -    rect.w = texture->w;
  14.147 -    rect.h = texture->h;
  14.148 -    SDL_SW_CopyYUVToRGB(data->yuv, &rect, data->format, texture->w,
  14.149 -                        texture->h, locked.pBits, locked.Pitch);
  14.150 -
  14.151 -    IDirect3DTexture9_UnlockRect(data->texture, 0);
  14.152 -}
  14.153 -
  14.154 -static void
  14.155 -D3D_AddTextureFormats(D3D_RenderData *data, SDL_RendererInfo *info)
  14.156 -{
  14.157 -    int i;
  14.158 -    int formats[] = {
  14.159 -        SDL_PIXELFORMAT_RGB332,
  14.160 -        SDL_PIXELFORMAT_RGB444,
  14.161 -        SDL_PIXELFORMAT_RGB555,
  14.162 -        SDL_PIXELFORMAT_ARGB4444,
  14.163 -        SDL_PIXELFORMAT_ARGB1555,
  14.164 -        SDL_PIXELFORMAT_RGB565,
  14.165 -        SDL_PIXELFORMAT_RGB888,
  14.166 -        SDL_PIXELFORMAT_ARGB8888,
  14.167 -        SDL_PIXELFORMAT_ARGB2101010,
  14.168 -    };
  14.169 -
  14.170 -    info->num_texture_formats = 0;
  14.171 -    for (i = 0; i < SDL_arraysize(formats); ++i) {
  14.172 -        if (D3D_IsTextureFormatAvailable
  14.173 -            (data->d3d, data->adapter, data->pparams.BackBufferFormat, PixelFormatToD3DFMT(formats[i]))) {
  14.174 -            info->texture_formats[info->num_texture_formats++] = formats[i];
  14.175 -        }
  14.176 -    }
  14.177 -    info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_YV12;
  14.178 -    info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_IYUV;
  14.179 -    info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_YUY2;
  14.180 -    info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_UYVY;
  14.181 -    info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_YVYU;
  14.182  }
  14.183  
  14.184  SDL_Renderer *
  14.185 @@ -367,6 +277,9 @@
  14.186      D3DPRESENT_PARAMETERS pparams;
  14.187      IDirect3DSwapChain9 *chain;
  14.188      D3DCAPS9 caps;
  14.189 +    Uint32 window_flags;
  14.190 +    int w, h;
  14.191 +    SDL_DisplayMode fullscreen_mode;
  14.192  
  14.193      renderer = (SDL_Renderer *) SDL_calloc(1, sizeof(*renderer));
  14.194      if (!renderer) {
  14.195 @@ -404,11 +317,9 @@
  14.196      }
  14.197  
  14.198      renderer->CreateTexture = D3D_CreateTexture;
  14.199 -    renderer->QueryTexturePixels = D3D_QueryTexturePixels;
  14.200      renderer->UpdateTexture = D3D_UpdateTexture;
  14.201      renderer->LockTexture = D3D_LockTexture;
  14.202      renderer->UnlockTexture = D3D_UnlockTexture;
  14.203 -    renderer->DirtyTexture = D3D_DirtyTexture;
  14.204      renderer->RenderDrawPoints = D3D_RenderDrawPoints;
  14.205      renderer->RenderDrawLines = D3D_RenderDrawLines;
  14.206      renderer->RenderFillRects = D3D_RenderFillRects;
  14.207 @@ -427,23 +338,27 @@
  14.208      SDL_VERSION(&windowinfo.version);
  14.209      SDL_GetWindowWMInfo(window, &windowinfo);
  14.210  
  14.211 +    window_flags = SDL_GetWindowFlags(window);
  14.212 +    SDL_GetWindowSize(window, &w, &h);
  14.213 +    SDL_GetWindowDisplayMode(window, &fullscreen_mode);
  14.214 +
  14.215      SDL_zero(pparams);
  14.216      pparams.hDeviceWindow = windowinfo.info.win.window;
  14.217 -    pparams.BackBufferWidth = window->w;
  14.218 -    pparams.BackBufferHeight = window->h;
  14.219 -    if (window->flags & SDL_WINDOW_FULLSCREEN) {
  14.220 +    pparams.BackBufferWidth = w;
  14.221 +    pparams.BackBufferHeight = h;
  14.222 +    if (window_flags & SDL_WINDOW_FULLSCREEN) {
  14.223          pparams.BackBufferFormat =
  14.224 -            PixelFormatToD3DFMT(window->fullscreen_mode.format);
  14.225 +            PixelFormatToD3DFMT(fullscreen_mode.format);
  14.226      } else {
  14.227          pparams.BackBufferFormat = D3DFMT_UNKNOWN;
  14.228      }
  14.229      pparams.BackBufferCount = 1;
  14.230      pparams.SwapEffect = D3DSWAPEFFECT_DISCARD;
  14.231  
  14.232 -    if (window->flags & SDL_WINDOW_FULLSCREEN) {
  14.233 +    if (window_flags & SDL_WINDOW_FULLSCREEN) {
  14.234          pparams.Windowed = FALSE;
  14.235          pparams.FullScreen_RefreshRateInHz =
  14.236 -            window->fullscreen_mode.refresh_rate;
  14.237 +            fullscreen_mode.refresh_rate;
  14.238      } else {
  14.239          pparams.Windowed = TRUE;
  14.240          pparams.FullScreen_RefreshRateInHz = 0;
  14.241 @@ -494,8 +409,6 @@
  14.242      }
  14.243      data->pparams = pparams;
  14.244  
  14.245 -    D3D_AddTextureFormats(data, &renderer->info);
  14.246 -
  14.247      IDirect3DDevice9_GetDeviceCaps(data->device, &caps);
  14.248      renderer->info.max_texture_width = caps.MaxTextureWidth;
  14.249      renderer->info.max_texture_height = caps.MaxTextureHeight;
  14.250 @@ -594,22 +507,7 @@
  14.251  
  14.252      texture->driverdata = data;
  14.253  
  14.254 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format) &&
  14.255 -        (texture->format != SDL_PIXELFORMAT_YUY2 ||
  14.256 -         !D3D_IsTextureFormatAvailable(renderdata->d3d, renderdata->adapter,
  14.257 -                                       display_format, PixelFormatToD3DFMT(texture->format)))
  14.258 -        && (texture->format != SDL_PIXELFORMAT_YVYU
  14.259 -            || !D3D_IsTextureFormatAvailable(renderdata->d3d, renderdata->adapter,
  14.260 -                                             display_format, PixelFormatToD3DFMT(texture->format)))) {
  14.261 -        data->yuv =
  14.262 -            SDL_SW_CreateYUVTexture(texture->format, texture->w, texture->h);
  14.263 -        if (!data->yuv) {
  14.264 -            return -1;
  14.265 -        }
  14.266 -        data->format = SDL_GetWindowPixelFormat(window);
  14.267 -    } else {
  14.268 -        data->format = texture->format;
  14.269 -    }
  14.270 +    data->format = texture->format;
  14.271  
  14.272      result =
  14.273          IDirect3DDevice9_CreateTexture(renderdata->device, texture->w,
  14.274 @@ -625,153 +523,118 @@
  14.275  }
  14.276  
  14.277  static int
  14.278 -D3D_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
  14.279 -                       void **pixels, int *pitch)
  14.280 -{
  14.281 -    D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
  14.282 -
  14.283 -    if (data->yuv) {
  14.284 -        return SDL_SW_QueryYUVTexturePixels(data->yuv, pixels, pitch);
  14.285 -    } else {
  14.286 -        /* D3D textures don't have their pixels hanging out */
  14.287 -        return -1;
  14.288 -    }
  14.289 -}
  14.290 -
  14.291 -static int
  14.292  D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
  14.293                    const SDL_Rect * rect, const void *pixels, int pitch)
  14.294  {
  14.295      D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
  14.296      D3D_RenderData *renderdata = (D3D_RenderData *) renderer->driverdata;
  14.297  
  14.298 -    if (data->yuv) {
  14.299 -        if (SDL_SW_UpdateYUVTexture(data->yuv, rect, pixels, pitch) < 0) {
  14.300 -            return -1;
  14.301 -        }
  14.302 -        UpdateYUVTextureData(texture);
  14.303 -        return 0;
  14.304 -    } else {
  14.305  #ifdef SDL_MEMORY_POOL_DEFAULT
  14.306 -        IDirect3DTexture9 *temp;
  14.307 -        RECT d3drect;
  14.308 -        D3DLOCKED_RECT locked;
  14.309 -        const Uint8 *src;
  14.310 -        Uint8 *dst;
  14.311 -        int row, length;
  14.312 -        HRESULT result;
  14.313 +    IDirect3DTexture9 *temp;
  14.314 +    RECT d3drect;
  14.315 +    D3DLOCKED_RECT locked;
  14.316 +    const Uint8 *src;
  14.317 +    Uint8 *dst;
  14.318 +    int row, length;
  14.319 +    HRESULT result;
  14.320  
  14.321 -        result =
  14.322 -            IDirect3DDevice9_CreateTexture(renderdata->device, texture->w,
  14.323 -                                           texture->h, 1, 0,
  14.324 -                                           PixelFormatToD3DFMT(texture->
  14.325 -                                                               format),
  14.326 -                                           D3DPOOL_SYSTEMMEM, &temp, NULL);
  14.327 -        if (FAILED(result)) {
  14.328 -            D3D_SetError("CreateTexture()", result);
  14.329 -            return -1;
  14.330 -        }
  14.331 +    result =
  14.332 +        IDirect3DDevice9_CreateTexture(renderdata->device, texture->w,
  14.333 +                                       texture->h, 1, 0,
  14.334 +                                       PixelFormatToD3DFMT(texture-> format),
  14.335 +                                       D3DPOOL_SYSTEMMEM, &temp, NULL);
  14.336 +    if (FAILED(result)) {
  14.337 +        D3D_SetError("CreateTexture()", result);
  14.338 +        return -1;
  14.339 +    }
  14.340  
  14.341 -        d3drect.left = rect->x;
  14.342 -        d3drect.right = rect->x + rect->w;
  14.343 -        d3drect.top = rect->y;
  14.344 -        d3drect.bottom = rect->y + rect->h;
  14.345 +    d3drect.left = rect->x;
  14.346 +    d3drect.right = rect->x + rect->w;
  14.347 +    d3drect.top = rect->y;
  14.348 +    d3drect.bottom = rect->y + rect->h;
  14.349  
  14.350 -        result = IDirect3DTexture9_LockRect(temp, 0, &locked, &d3drect, 0);
  14.351 -        if (FAILED(result)) {
  14.352 -            IDirect3DTexture9_Release(temp);
  14.353 -            D3D_SetError("LockRect()", result);
  14.354 -            return -1;
  14.355 -        }
  14.356 +    result = IDirect3DTexture9_LockRect(temp, 0, &locked, &d3drect, 0);
  14.357 +    if (FAILED(result)) {
  14.358 +        IDirect3DTexture9_Release(temp);
  14.359 +        D3D_SetError("LockRect()", result);
  14.360 +        return -1;
  14.361 +    }
  14.362  
  14.363 -        src = pixels;
  14.364 -        dst = locked.pBits;
  14.365 -        length = rect->w * SDL_BYTESPERPIXEL(texture->format);
  14.366 -        for (row = 0; row < rect->h; ++row) {
  14.367 -            SDL_memcpy(dst, src, length);
  14.368 -            src += pitch;
  14.369 -            dst += locked.Pitch;
  14.370 -        }
  14.371 -        IDirect3DTexture9_UnlockRect(temp, 0);
  14.372 +    src = pixels;
  14.373 +    dst = locked.pBits;
  14.374 +    length = rect->w * SDL_BYTESPERPIXEL(texture->format);
  14.375 +    for (row = 0; row < rect->h; ++row) {
  14.376 +        SDL_memcpy(dst, src, length);
  14.377 +        src += pitch;
  14.378 +        dst += locked.Pitch;
  14.379 +    }
  14.380 +    IDirect3DTexture9_UnlockRect(temp, 0);
  14.381  
  14.382 -        result =
  14.383 -            IDirect3DDevice9_UpdateTexture(renderdata->device,
  14.384 -                                           (IDirect3DBaseTexture9 *) temp,
  14.385 -                                           (IDirect3DBaseTexture9 *)
  14.386 -                                           data->texture);
  14.387 -        IDirect3DTexture9_Release(temp);
  14.388 -        if (FAILED(result)) {
  14.389 -            D3D_SetError("UpdateTexture()", result);
  14.390 -            return -1;
  14.391 -        }
  14.392 +    result =
  14.393 +        IDirect3DDevice9_UpdateTexture(renderdata->device,
  14.394 +                                       (IDirect3DBaseTexture9 *) temp,
  14.395 +                                       (IDirect3DBaseTexture9 *)
  14.396 +                                       data->texture);
  14.397 +    IDirect3DTexture9_Release(temp);
  14.398 +    if (FAILED(result)) {
  14.399 +        D3D_SetError("UpdateTexture()", result);
  14.400 +        return -1;
  14.401 +    }
  14.402  #else
  14.403 -        RECT d3drect;
  14.404 -        D3DLOCKED_RECT locked;
  14.405 -        const Uint8 *src;
  14.406 -        Uint8 *dst;
  14.407 -        int row, length;
  14.408 -        HRESULT result;
  14.409 +    RECT d3drect;
  14.410 +    D3DLOCKED_RECT locked;
  14.411 +    const Uint8 *src;
  14.412 +    Uint8 *dst;
  14.413 +    int row, length;
  14.414 +    HRESULT result;
  14.415  
  14.416 -        d3drect.left = rect->x;
  14.417 -        d3drect.right = rect->x + rect->w;
  14.418 -        d3drect.top = rect->y;
  14.419 -        d3drect.bottom = rect->y + rect->h;
  14.420 +    d3drect.left = rect->x;
  14.421 +    d3drect.right = rect->x + rect->w;
  14.422 +    d3drect.top = rect->y;
  14.423 +    d3drect.bottom = rect->y + rect->h;
  14.424  
  14.425 -        result =
  14.426 -            IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect,
  14.427 -                                       0);
  14.428 -        if (FAILED(result)) {
  14.429 -            D3D_SetError("LockRect()", result);
  14.430 -            return -1;
  14.431 -        }
  14.432 +    result = IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0);
  14.433 +    if (FAILED(result)) {
  14.434 +        D3D_SetError("LockRect()", result);
  14.435 +        return -1;
  14.436 +    }
  14.437  
  14.438 -        src = pixels;
  14.439 -        dst = locked.pBits;
  14.440 -        length = rect->w * SDL_BYTESPERPIXEL(texture->format);
  14.441 -        for (row = 0; row < rect->h; ++row) {
  14.442 -            SDL_memcpy(dst, src, length);
  14.443 -            src += pitch;
  14.444 -            dst += locked.Pitch;
  14.445 -        }
  14.446 -        IDirect3DTexture9_UnlockRect(data->texture, 0);
  14.447 +    src = pixels;
  14.448 +    dst = locked.pBits;
  14.449 +    length = rect->w * SDL_BYTESPERPIXEL(texture->format);
  14.450 +    for (row = 0; row < rect->h; ++row) {
  14.451 +        SDL_memcpy(dst, src, length);
  14.452 +        src += pitch;
  14.453 +        dst += locked.Pitch;
  14.454 +    }
  14.455 +    IDirect3DTexture9_UnlockRect(data->texture, 0);
  14.456  #endif // SDL_MEMORY_POOL_DEFAULT
  14.457  
  14.458 -        return 0;
  14.459 -    }
  14.460 +    return 0;
  14.461  }
  14.462  
  14.463  static int
  14.464  D3D_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
  14.465 -                const SDL_Rect * rect, int markDirty, void **pixels,
  14.466 -                int *pitch)
  14.467 +                const SDL_Rect * rect, void **pixels, int *pitch)
  14.468  {
  14.469      D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
  14.470 +    RECT d3drect;
  14.471 +    D3DLOCKED_RECT locked;
  14.472 +    HRESULT result;
  14.473  
  14.474 -    if (data->yuv) {
  14.475 -        return SDL_SW_LockYUVTexture(data->yuv, rect, markDirty, pixels,
  14.476 -                                     pitch);
  14.477 -    } else {
  14.478 -        RECT d3drect;
  14.479 -        D3DLOCKED_RECT locked;
  14.480 -        HRESULT result;
  14.481 +    d3drect.left = rect->x;
  14.482 +    d3drect.right = rect->x + rect->w;
  14.483 +    d3drect.top = rect->y;
  14.484 +    d3drect.bottom = rect->y + rect->h;
  14.485  
  14.486 -        d3drect.left = rect->x;
  14.487 -        d3drect.right = rect->x + rect->w;
  14.488 -        d3drect.top = rect->y;
  14.489 -        d3drect.bottom = rect->y + rect->h;
  14.490 -
  14.491 -        result =
  14.492 -            IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect,
  14.493 -                                       markDirty ? 0 :
  14.494 -                                       D3DLOCK_NO_DIRTY_UPDATE);
  14.495 -        if (FAILED(result)) {
  14.496 -            D3D_SetError("LockRect()", result);
  14.497 -            return -1;
  14.498 -        }
  14.499 -        *pixels = locked.pBits;
  14.500 -        *pitch = locked.Pitch;
  14.501 -        return 0;
  14.502 +    result = IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0);
  14.503 +    if (FAILED(result)) {
  14.504 +        D3D_SetError("LockRect()", result);
  14.505 +        return -1;
  14.506      }
  14.507 +    *pixels = locked.pBits;
  14.508 +    *pitch = locked.Pitch;
  14.509 +    return 0;
  14.510  }
  14.511  
  14.512  static void
  14.513 @@ -779,32 +642,7 @@
  14.514  {
  14.515      D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
  14.516  
  14.517 -    if (data->yuv) {
  14.518 -        SDL_SW_UnlockYUVTexture(data->yuv);
  14.519 -        UpdateYUVTextureData(texture);
  14.520 -    } else {
  14.521 -        IDirect3DTexture9_UnlockRect(data->texture, 0);
  14.522 -    }
  14.523 -}
  14.524 -
  14.525 -static void
  14.526 -D3D_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture, int numrects,
  14.527 -                 const SDL_Rect * rects)
  14.528 -{
  14.529 -    D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
  14.530 -    RECT d3drect;
  14.531 -    int i;
  14.532 -
  14.533 -    for (i = 0; i < numrects; ++i) {
  14.534 -        const SDL_Rect *rect = &rects[i];
  14.535 -
  14.536 -        d3drect.left = rect->x;
  14.537 -        d3drect.right = rect->x + rect->w;
  14.538 -        d3drect.top = rect->y;
  14.539 -        d3drect.bottom = rect->y + rect->h;
  14.540 -
  14.541 -        IDirect3DTexture9_AddDirtyRect(data->texture, &d3drect);
  14.542 -    }
  14.543 +    IDirect3DTexture9_UnlockRect(data->texture, 0);
  14.544  }
  14.545  
  14.546  static void
  14.547 @@ -1123,8 +961,6 @@
  14.548                       Uint32 format, void * pixels, int pitch)
  14.549  {
  14.550      D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
  14.551 -    SDL_Window *window = renderer->window;
  14.552 -    SDL_VideoDisplay *display = window->display;
  14.553      D3DSURFACE_DESC desc;
  14.554      LPDIRECT3DSURFACE9 backBuffer;
  14.555      LPDIRECT3DSURFACE9 surface;
  14.556 @@ -1174,7 +1010,7 @@
  14.557      }
  14.558  
  14.559      SDL_ConvertPixels(rect->w, rect->h,
  14.560 -                      display->current_mode.format, locked.pBits, locked.Pitch,
  14.561 +                      D3DFMTToPixelFormat(desc.Format), locked.pBits, locked.Pitch,
  14.562                        format, pixels, pitch);
  14.563  
  14.564      IDirect3DSurface9_UnlockRect(surface);
  14.565 @@ -1227,9 +1063,6 @@
  14.566      if (!data) {
  14.567          return;
  14.568      }
  14.569 -    if (data->yuv) {
  14.570 -        SDL_SW_DestroyYUVTexture(data->yuv);
  14.571 -    }
  14.572      if (data->texture) {
  14.573          IDirect3DTexture9_Release(data->texture);
  14.574      }
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/src/render/mmx.h	Thu Feb 03 00:19:40 2011 -0800
    15.3 @@ -0,0 +1,642 @@
    15.4 +/*	mmx.h
    15.5 +
    15.6 +	MultiMedia eXtensions GCC interface library for IA32.
    15.7 +
    15.8 +	To use this library, simply include this header file
    15.9 +	and compile with GCC.  You MUST have inlining enabled
   15.10 +	in order for mmx_ok() to work; this can be done by
   15.11 +	simply using -O on the GCC command line.
   15.12 +
   15.13 +	Compiling with -DMMX_TRACE will cause detailed trace
   15.14 +	output to be sent to stderr for each mmx operation.
   15.15 +	This adds lots of code, and obviously slows execution to
   15.16 +	a crawl, but can be very useful for debugging.
   15.17 +
   15.18 +	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
   15.19 +	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
   15.20 +	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
   15.21 +	AND FITNESS FOR ANY PARTICULAR PURPOSE.
   15.22 +
   15.23 +	1997-99 by H. Dietz and R. Fisher
   15.24 +
   15.25 + Notes:
   15.26 +	It appears that the latest gas has the pand problem fixed, therefore
   15.27 +	  I'll undefine BROKEN_PAND by default.
   15.28 +*/
   15.29 +
   15.30 +#ifndef _MMX_H
   15.31 +#define _MMX_H
   15.32 +
   15.33 +
   15.34 +/*	Warning:  at this writing, the version of GAS packaged
   15.35 +	with most Linux distributions does not handle the
   15.36 +	parallel AND operation mnemonic correctly.  If the
   15.37 +	symbol BROKEN_PAND is defined, a slower alternative
   15.38 +	coding will be used.  If execution of mmxtest results
   15.39 +	in an illegal instruction fault, define this symbol.
   15.40 +*/
   15.41 +#undef	BROKEN_PAND
   15.42 +
   15.43 +
   15.44 +/*	The type of an value that fits in an MMX register
   15.45 +	(note that long long constant values MUST be suffixed
   15.46 +	 by LL and unsigned long long values by ULL, lest
   15.47 +	 they be truncated by the compiler)
   15.48 +*/
   15.49 +typedef union
   15.50 +{
   15.51 +    long long q;                /* Quadword (64-bit) value */
   15.52 +    unsigned long long uq;      /* Unsigned Quadword */
   15.53 +    int d[2];                   /* 2 Doubleword (32-bit) values */
   15.54 +    unsigned int ud[2];         /* 2 Unsigned Doubleword */
   15.55 +    short w[4];                 /* 4 Word (16-bit) values */
   15.56 +    unsigned short uw[4];       /* 4 Unsigned Word */
   15.57 +    char b[8];                  /* 8 Byte (8-bit) values */
   15.58 +    unsigned char ub[8];        /* 8 Unsigned Byte */
   15.59 +    float s[2];                 /* Single-precision (32-bit) value */
   15.60 +} __attribute__ ((aligned(8))) mmx_t;   /* On an 8-byte (64-bit) boundary */
   15.61 +
   15.62 +
   15.63 +#if 0
   15.64 +/*	Function to test if multimedia instructions are supported...
   15.65 +*/
   15.66 +inline extern int
   15.67 +mm_support(void)
   15.68 +{
   15.69 +    /* Returns 1 if MMX instructions are supported,
   15.70 +       3 if Cyrix MMX and Extended MMX instructions are supported
   15.71 +       5 if AMD MMX and 3DNow! instructions are supported
   15.72 +       0 if hardware does not support any of these
   15.73 +     */
   15.74 +    register int rval = 0;
   15.75 +
   15.76 +    __asm__ __volatile__(
   15.77 +                            /* See if CPUID instruction is supported ... */
   15.78 +                            /* ... Get copies of EFLAGS into eax and ecx */
   15.79 +                            "pushf\n\t"
   15.80 +                            "popl %%eax\n\t" "movl %%eax, %%ecx\n\t"
   15.81 +                            /* ... Toggle the ID bit in one copy and store */
   15.82 +                            /*     to the EFLAGS reg */
   15.83 +                            "xorl $0x200000, %%eax\n\t"
   15.84 +                            "push %%eax\n\t" "popf\n\t"
   15.85 +                            /* ... Get the (hopefully modified) EFLAGS */
   15.86 +                            "pushf\n\t" "popl %%eax\n\t"
   15.87 +                            /* ... Compare and test result */
   15.88 +                            "xorl %%eax, %%ecx\n\t" "testl $0x200000, %%ecx\n\t" "jz NotSupported1\n\t" /* CPUID not supported */
   15.89 +                            /* Get standard CPUID information, and
   15.90 +                               go to a specific vendor section */
   15.91 +                            "movl $0, %%eax\n\t" "cpuid\n\t"
   15.92 +                            /* Check for Intel */
   15.93 +                            "cmpl $0x756e6547, %%ebx\n\t"
   15.94 +                            "jne TryAMD\n\t"
   15.95 +                            "cmpl $0x49656e69, %%edx\n\t"
   15.96 +                            "jne TryAMD\n\t"
   15.97 +                            "cmpl $0x6c65746e, %%ecx\n"
   15.98 +                            "jne TryAMD\n\t" "jmp Intel\n\t"
   15.99 +                            /* Check for AMD */
  15.100 +                            "\nTryAMD:\n\t"
  15.101 +                            "cmpl $0x68747541, %%ebx\n\t"
  15.102 +                            "jne TryCyrix\n\t"
  15.103 +                            "cmpl $0x69746e65, %%edx\n\t"
  15.104 +                            "jne TryCyrix\n\t"
  15.105 +                            "cmpl $0x444d4163, %%ecx\n"
  15.106 +                            "jne TryCyrix\n\t" "jmp AMD\n\t"
  15.107 +                            /* Check for Cyrix */
  15.108 +                            "\nTryCyrix:\n\t"
  15.109 +                            "cmpl $0x69727943, %%ebx\n\t"
  15.110 +                            "jne NotSupported2\n\t"
  15.111 +                            "cmpl $0x736e4978, %%edx\n\t"
  15.112 +                            "jne NotSupported3\n\t"
  15.113 +                            "cmpl $0x64616574, %%ecx\n\t"
  15.114 +                            "jne NotSupported4\n\t"
  15.115 +                            /* Drop through to Cyrix... */
  15.116 +                            /* Cyrix Section */
  15.117 +                            /* See if extended CPUID level 80000001 is supported */
  15.118 +                            /* The value of CPUID/80000001 for the 6x86MX is undefined
  15.119 +                               according to the Cyrix CPU Detection Guide (Preliminary
  15.120 +                               Rev. 1.01 table 1), so we'll check the value of eax for
  15.121 +                               CPUID/0 to see if standard CPUID level 2 is supported.
  15.122 +                               According to the table, the only CPU which supports level
  15.123 +                               2 is also the only one which supports extended CPUID levels.
  15.124 +                             */
  15.125 +                            "cmpl $0x2, %%eax\n\t" "jne MMXtest\n\t"    /* Use standard CPUID instead */
  15.126 +                            /* Extended CPUID supported (in theory), so get extended
  15.127 +                               features */
  15.128 +                            "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%eax\n\t"    /* Test for MMX */
  15.129 +                            "jz NotSupported5\n\t"      /* MMX not supported */
  15.130 +                            "testl $0x01000000, %%eax\n\t"      /* Test for Ext'd MMX */
  15.131 +                            "jnz EMMXSupported\n\t" "movl $1, %0:\n\n\t"        /* MMX Supported */
  15.132 +                            "jmp Return\n\n" "EMMXSupported:\n\t" "movl $3, %0:\n\n\t"  /* EMMX and MMX Supported */
  15.133 +                            "jmp Return\n\t"
  15.134 +                            /* AMD Section */
  15.135 +                            "AMD:\n\t"
  15.136 +                            /* See if extended CPUID is supported */
  15.137 +                            "movl $0x80000000, %%eax\n\t" "cpuid\n\t" "cmpl $0x80000000, %%eax\n\t" "jl MMXtest\n\t"    /* Use standard CPUID instead */
  15.138 +                            /* Extended CPUID supported, so get extended features */
  15.139 +                            "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t"    /* Test for MMX */
  15.140 +                            "jz NotSupported6\n\t"      /* MMX not supported */
  15.141 +                            "testl $0x80000000, %%edx\n\t"      /* Test for 3DNow! */
  15.142 +                            "jnz ThreeDNowSupported\n\t" "movl $1, %0:\n\n\t"   /* MMX Supported */
  15.143 +                            "jmp Return\n\n" "ThreeDNowSupported:\n\t" "movl $5, %0:\n\n\t"     /* 3DNow! and MMX Supported */
  15.144 +                            "jmp Return\n\t"
  15.145 +                            /* Intel Section */
  15.146 +                            "Intel:\n\t"
  15.147 +                            /* Check for MMX */
  15.148 +                            "MMXtest:\n\t" "movl $1, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t"      /* Test for MMX */
  15.149 +                            "jz NotSupported7\n\t"      /* MMX Not supported */
  15.150 +                            "movl $1, %0:\n\n\t"        /* MMX Supported */
  15.151 +                            "jmp Return\n\t"
  15.152 +                            /* Nothing supported */
  15.153 +                            "\nNotSupported1:\n\t" "#movl $101, %0:\n\n\t" "\nNotSupported2:\n\t" "#movl $102, %0:\n\n\t" "\nNotSupported3:\n\t" "#movl $103, %0:\n\n\t" "\nNotSupported4:\n\t" "#movl $104, %0:\n\n\t" "\nNotSupported5:\n\t" "#movl $105, %0:\n\n\t" "\nNotSupported6:\n\t" "#movl $106, %0:\n\n\t" "\nNotSupported7:\n\t" "#movl $107, %0:\n\n\t" "movl $0, %0:\n\n\t" "Return:\n\t":"=a"(rval):     /* no input */
  15.154 +                            :"eax", "ebx", "ecx", "edx");
  15.155 +
  15.156 +    /* Return */
  15.157 +    return (rval);
  15.158 +}
  15.159 +
  15.160 +/*	Function to test if mmx instructions are supported...
  15.161 +*/
  15.162 +inline extern int
  15.163 +mmx_ok(void)
  15.164 +{
  15.165 +    /* Returns 1 if MMX instructions are supported, 0 otherwise */
  15.166 +    return (mm_support() & 0x1);
  15.167 +}
  15.168 +#endif
  15.169 +
  15.170 +/*	Helper functions for the instruction macros that follow...
  15.171 +	(note that memory-to-register, m2r, instructions are nearly
  15.172 +	 as efficient as register-to-register, r2r, instructions;
  15.173 +	 however, memory-to-memory instructions are really simulated
  15.174 +	 as a convenience, and are only 1/3 as efficient)
  15.175 +*/
  15.176 +#ifdef	MMX_TRACE
  15.177 +
  15.178 +/*	Include the stuff for printing a trace to stderr...
  15.179 +*/
  15.180 +
  15.181 +#define	mmx_i2r(op, imm, reg) \
  15.182 +	{ \
  15.183 +		mmx_t mmx_trace; \
  15.184 +		mmx_trace.uq = (imm); \
  15.185 +		printf(#op "_i2r(" #imm "=0x%08x%08x, ", \
  15.186 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.187 +		__asm__ __volatile__ ("movq %%" #reg ", %0" \
  15.188 +				      : "=X" (mmx_trace) \
  15.189 +				      : /* nothing */ ); \
  15.190 +		printf(#reg "=0x%08x%08x) => ", \
  15.191 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.192 +		__asm__ __volatile__ (#op " %0, %%" #reg \
  15.193 +				      : /* nothing */ \
  15.194 +				      : "X" (imm)); \
  15.195 +		__asm__ __volatile__ ("movq %%" #reg ", %0" \
  15.196 +				      : "=X" (mmx_trace) \
  15.197 +				      : /* nothing */ ); \
  15.198 +		printf(#reg "=0x%08x%08x\n", \
  15.199 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.200 +	}
  15.201 +
  15.202 +#define	mmx_m2r(op, mem, reg) \
  15.203 +	{ \
  15.204 +		mmx_t mmx_trace; \
  15.205 +		mmx_trace = (mem); \
  15.206 +		printf(#op "_m2r(" #mem "=0x%08x%08x, ", \
  15.207 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.208 +		__asm__ __volatile__ ("movq %%" #reg ", %0" \
  15.209 +				      : "=X" (mmx_trace) \
  15.210 +				      : /* nothing */ ); \
  15.211 +		printf(#reg "=0x%08x%08x) => ", \
  15.212 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.213 +		__asm__ __volatile__ (#op " %0, %%" #reg \
  15.214 +				      : /* nothing */ \
  15.215 +				      : "X" (mem)); \
  15.216 +		__asm__ __volatile__ ("movq %%" #reg ", %0" \
  15.217 +				      : "=X" (mmx_trace) \
  15.218 +				      : /* nothing */ ); \
  15.219 +		printf(#reg "=0x%08x%08x\n", \
  15.220 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.221 +	}
  15.222 +
  15.223 +#define	mmx_r2m(op, reg, mem) \
  15.224 +	{ \
  15.225 +		mmx_t mmx_trace; \
  15.226 +		__asm__ __volatile__ ("movq %%" #reg ", %0" \
  15.227 +				      : "=X" (mmx_trace) \
  15.228 +				      : /* nothing */ ); \
  15.229 +		printf(#op "_r2m(" #reg "=0x%08x%08x, ", \
  15.230 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.231 +		mmx_trace = (mem); \
  15.232 +		printf(#mem "=0x%08x%08x) => ", \
  15.233 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.234 +		__asm__ __volatile__ (#op " %%" #reg ", %0" \
  15.235 +				      : "=X" (mem) \
  15.236 +				      : /* nothing */ ); \
  15.237 +		mmx_trace = (mem); \
  15.238 +		printf(#mem "=0x%08x%08x\n", \
  15.239 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.240 +	}
  15.241 +
  15.242 +#define	mmx_r2r(op, regs, regd) \
  15.243 +	{ \
  15.244 +		mmx_t mmx_trace; \
  15.245 +		__asm__ __volatile__ ("movq %%" #regs ", %0" \
  15.246 +				      : "=X" (mmx_trace) \
  15.247 +				      : /* nothing */ ); \
  15.248 +		printf(#op "_r2r(" #regs "=0x%08x%08x, ", \
  15.249 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.250 +		__asm__ __volatile__ ("movq %%" #regd ", %0" \
  15.251 +				      : "=X" (mmx_trace) \
  15.252 +				      : /* nothing */ ); \
  15.253 +		printf(#regd "=0x%08x%08x) => ", \
  15.254 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.255 +		__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
  15.256 +		__asm__ __volatile__ ("movq %%" #regd ", %0" \
  15.257 +				      : "=X" (mmx_trace) \
  15.258 +				      : /* nothing */ ); \
  15.259 +		printf(#regd "=0x%08x%08x\n", \
  15.260 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.261 +	}
  15.262 +
  15.263 +#define	mmx_m2m(op, mems, memd) \
  15.264 +	{ \
  15.265 +		mmx_t mmx_trace; \
  15.266 +		mmx_trace = (mems); \
  15.267 +		printf(#op "_m2m(" #mems "=0x%08x%08x, ", \
  15.268 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.269 +		mmx_trace = (memd); \
  15.270 +		printf(#memd "=0x%08x%08x) => ", \
  15.271 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.272 +		__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
  15.273 +				      #op " %1, %%mm0\n\t" \
  15.274 +				      "movq %%mm0, %0" \
  15.275 +				      : "=X" (memd) \
  15.276 +				      : "X" (mems)); \
  15.277 +		mmx_trace = (memd); \
  15.278 +		printf(#memd "=0x%08x%08x\n", \
  15.279 +			mmx_trace.d[1], mmx_trace.d[0]); \
  15.280 +	}
  15.281 +
  15.282 +#else
  15.283 +
  15.284 +/*	These macros are a lot simpler without the tracing...
  15.285 +*/
  15.286 +
  15.287 +#define	mmx_i2r(op, imm, reg) \
  15.288 +	__asm__ __volatile__ (#op " %0, %%" #reg \
  15.289 +			      : /* nothing */ \
  15.290 +			      : "X" (imm) )
  15.291 +
  15.292 +#define	mmx_m2r(op, mem, reg) \
  15.293 +	__asm__ __volatile__ (#op " %0, %%" #reg \
  15.294 +			      : /* nothing */ \
  15.295 +			      : "m" (mem))
  15.296 +
  15.297 +#define	mmx_r2m(op, reg, mem) \
  15.298 +	__asm__ __volatile__ (#op " %%" #reg ", %0" \
  15.299 +			      : "=m" (mem) \
  15.300 +			      : /* nothing */ )
  15.301 +
  15.302 +#define	mmx_r2r(op, regs, regd) \
  15.303 +	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
  15.304 +
  15.305 +#define	mmx_m2m(op, mems, memd) \
  15.306 +	__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
  15.307 +			      #op " %1, %%mm0\n\t" \
  15.308 +			      "movq %%mm0, %0" \
  15.309 +			      : "=X" (memd) \
  15.310 +			      : "X" (mems))
  15.311 +
  15.312 +#endif
  15.313 +
  15.314 +
  15.315 +/*	1x64 MOVe Quadword
  15.316 +	(this is both a load and a store...
  15.317 +	 in fact, it is the only way to store)
  15.318 +*/
  15.319 +#define	movq_m2r(var, reg)	mmx_m2r(movq, var, reg)
  15.320 +#define	movq_r2m(reg, var)	mmx_r2m(movq, reg, var)
  15.321 +#define	movq_r2r(regs, regd)	mmx_r2r(movq, regs, regd)
  15.322 +#define	movq(vars, vard) \
  15.323 +	__asm__ __volatile__ ("movq %1, %%mm0\n\t" \
  15.324 +			      "movq %%mm0, %0" \
  15.325 +			      : "=X" (vard) \
  15.326 +			      : "X" (vars))
  15.327 +
  15.328 +
  15.329 +/*	1x32 MOVe Doubleword
  15.330 +	(like movq, this is both load and store...
  15.331 +	 but is most useful for moving things between
  15.332 +	 mmx registers and ordinary registers)
  15.333 +*/
  15.334 +#define	movd_m2r(var, reg)	mmx_m2r(movd, var, reg)
  15.335 +#define	movd_r2m(reg, var)	mmx_r2m(movd, reg, var)
  15.336 +#define	movd_r2r(regs, regd)	mmx_r2r(movd, regs, regd)
  15.337 +#define	movd(vars, vard) \
  15.338 +	__asm__ __volatile__ ("movd %1, %%mm0\n\t" \
  15.339 +			      "movd %%mm0, %0" \
  15.340 +			      : "=X" (vard) \
  15.341 +			      : "X" (vars))
  15.342 +
  15.343 +
  15.344 +/*	2x32, 4x16, and 8x8 Parallel ADDs
  15.345 +*/
  15.346 +#define	paddd_m2r(var, reg)	mmx_m2r(paddd, var, reg)
  15.347 +#define	paddd_r2r(regs, regd)	mmx_r2r(paddd, regs, regd)
  15.348 +#define	paddd(vars, vard)	mmx_m2m(paddd, vars, vard)
  15.349 +
  15.350 +#define	paddw_m2r(var, reg)	mmx_m2r(paddw, var, reg)
  15.351 +#define	paddw_r2r(regs, regd)	mmx_r2r(paddw, regs, regd)
  15.352 +#define	paddw(vars, vard)	mmx_m2m(paddw, vars, vard)
  15.353 +
  15.354 +#define	paddb_m2r(var, reg)	mmx_m2r(paddb, var, reg)
  15.355 +#define	paddb_r2r(regs, regd)	mmx_r2r(paddb, regs, regd)
  15.356 +#define	paddb(vars, vard)	mmx_m2m(paddb, vars, vard)
  15.357 +
  15.358 +
  15.359 +/*	4x16 and 8x8 Parallel ADDs using Saturation arithmetic
  15.360 +*/
  15.361 +#define	paddsw_m2r(var, reg)	mmx_m2r(paddsw, var, reg)
  15.362 +#define	paddsw_r2r(regs, regd)	mmx_r2r(paddsw, regs, regd)
  15.363 +#define	paddsw(vars, vard)	mmx_m2m(paddsw, vars, vard)
  15.364 +
  15.365 +#define	paddsb_m2r(var, reg)	mmx_m2r(paddsb, var, reg)
  15.366 +#define	paddsb_r2r(regs, regd)	mmx_r2r(paddsb, regs, regd)
  15.367 +#define	paddsb(vars, vard)	mmx_m2m(paddsb, vars, vard)
  15.368 +
  15.369 +
  15.370 +/*	4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
  15.371 +*/
  15.372 +#define	paddusw_m2r(var, reg)	mmx_m2r(paddusw, var, reg)
  15.373 +#define	paddusw_r2r(regs, regd)	mmx_r2r(paddusw, regs, regd)
  15.374 +#define	paddusw(vars, vard)	mmx_m2m(paddusw, vars, vard)
  15.375 +
  15.376 +#define	paddusb_m2r(var, reg)	mmx_m2r(paddusb, var, reg)
  15.377 +#define	paddusb_r2r(regs, regd)	mmx_r2r(paddusb, regs, regd)
  15.378 +#define	paddusb(vars, vard)	mmx_m2m(paddusb, vars, vard)
  15.379 +
  15.380 +
  15.381 +/*	2x32, 4x16, and 8x8 Parallel SUBs
  15.382 +*/
  15.383 +#define	psubd_m2r(var, reg)	mmx_m2r(psubd, var, reg)
  15.384 +#define	psubd_r2r(regs, regd)	mmx_r2r(psubd, regs, regd)
  15.385 +#define	psubd(vars, vard)	mmx_m2m(psubd, vars, vard)
  15.386 +
  15.387 +#define	psubw_m2r(var, reg)	mmx_m2r(psubw, var, reg)
  15.388 +#define	psubw_r2r(regs, regd)	mmx_r2r(psubw, regs, regd)
  15.389 +#define	psubw(vars, vard)	mmx_m2m(psubw, vars, vard)
  15.390 +
  15.391 +#define	psubb_m2r(var, reg)	mmx_m2r(psubb, var, reg)
  15.392 +#define	psubb_r2r(regs, regd)	mmx_r2r(psubb, regs, regd)
  15.393 +#define	psubb(vars, vard)	mmx_m2m(psubb, vars, vard)
  15.394 +
  15.395 +
  15.396 +/*	4x16 and 8x8 Parallel SUBs using Saturation arithmetic
  15.397 +*/
  15.398 +#define	psubsw_m2r(var, reg)	mmx_m2r(psubsw, var, reg)
  15.399 +#define	psubsw_r2r(regs, regd)	mmx_r2r(psubsw, regs, regd)
  15.400 +#define	psubsw(vars, vard)	mmx_m2m(psubsw, vars, vard)
  15.401 +
  15.402 +#define	psubsb_m2r(var, reg)	mmx_m2r(psubsb, var, reg)
  15.403 +#define	psubsb_r2r(regs, regd)	mmx_r2r(psubsb, regs, regd)
  15.404 +#define	psubsb(vars, vard)	mmx_m2m(psubsb, vars, vard)
  15.405 +
  15.406 +
  15.407 +/*	4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
  15.408 +*/
  15.409 +#define	psubusw_m2r(var, reg)	mmx_m2r(psubusw, var, reg)
  15.410 +#define	psubusw_r2r(regs, regd)	mmx_r2r(psubusw, regs, regd)
  15.411 +#define	psubusw(vars, vard)	mmx_m2m(psubusw, vars, vard)
  15.412 +
  15.413 +#define	psubusb_m2r(var, reg)	mmx_m2r(psubusb, var, reg)
  15.414 +#define	psubusb_r2r(regs, regd)	mmx_r2r(psubusb, regs, regd)
  15.415 +#define	psubusb(vars, vard)	mmx_m2m(psubusb, vars, vard)
  15.416 +
  15.417 +
  15.418 +/*	4x16 Parallel MULs giving Low 4x16 portions of results
  15.419 +*/
  15.420 +#define	pmullw_m2r(var, reg)	mmx_m2r(pmullw, var, reg)
  15.421 +#define	pmullw_r2r(regs, regd)	mmx_r2r(pmullw, regs, regd)
  15.422 +#define	pmullw(vars, vard)	mmx_m2m(pmullw, vars, vard)
  15.423 +
  15.424 +
  15.425 +/*	4x16 Parallel MULs giving High 4x16 portions of results
  15.426 +*/
  15.427 +#define	pmulhw_m2r(var, reg)	mmx_m2r(pmulhw, var, reg)
  15.428 +#define	pmulhw_r2r(regs, regd)	mmx_r2r(pmulhw, regs, regd)
  15.429 +#define	pmulhw(vars, vard)	mmx_m2m(pmulhw, vars, vard)
  15.430 +
  15.431 +
  15.432 +/*	4x16->2x32 Parallel Mul-ADD
  15.433 +	(muls like pmullw, then adds adjacent 16-bit fields
  15.434 +	 in the multiply result to make the final 2x32 result)
  15.435 +*/
  15.436 +#define	pmaddwd_m2r(var, reg)	mmx_m2r(pmaddwd, var, reg)
  15.437 +#define	pmaddwd_r2r(regs, regd)	mmx_r2r(pmaddwd, regs, regd)
  15.438 +#define	pmaddwd(vars, vard)	mmx_m2m(pmaddwd, vars, vard)
  15.439 +
  15.440 +
  15.441 +/*	1x64 bitwise AND
  15.442 +*/
  15.443 +#ifdef	BROKEN_PAND
  15.444 +#define	pand_m2r(var, reg) \
  15.445 +	{ \
  15.446 +		mmx_m2r(pandn, (mmx_t) -1LL, reg); \
  15.447 +		mmx_m2r(pandn, var, reg); \
  15.448 +	}
  15.449 +#define	pand_r2r(regs, regd) \
  15.450 +	{ \
  15.451 +		mmx_m2r(pandn, (mmx_t) -1LL, regd); \
  15.452 +		mmx_r2r(pandn, regs, regd) \
  15.453 +	}
  15.454 +#define	pand(vars, vard) \
  15.455 +	{ \
  15.456 +		movq_m2r(vard, mm0); \
  15.457 +		mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
  15.458 +		mmx_m2r(pandn, vars, mm0); \
  15.459 +		movq_r2m(mm0, vard); \
  15.460 +	}
  15.461 +#else
  15.462 +#define	pand_m2r(var, reg)	mmx_m2r(pand, var, reg)
  15.463 +#define	pand_r2r(regs, regd)	mmx_r2r(pand, regs, regd)
  15.464 +#define	pand(vars, vard)	mmx_m2m(pand, vars, vard)
  15.465 +#endif
  15.466 +
  15.467 +
  15.468 +/*	1x64 bitwise AND with Not the destination
  15.469 +*/
  15.470 +#define	pandn_m2r(var, reg)	mmx_m2r(pandn, var, reg)
  15.471 +#define	pandn_r2r(regs, regd)	mmx_r2r(pandn, regs, regd)
  15.472 +#define	pandn(vars, vard)	mmx_m2m(pandn, vars, vard)
  15.473 +
  15.474 +
  15.475 +/*	1x64 bitwise OR
  15.476 +*/
  15.477 +#define	por_m2r(var, reg)	mmx_m2r(por, var, reg)
  15.478 +#define	por_r2r(regs, regd)	mmx_r2r(por, regs, regd)
  15.479 +#define	por(vars, vard)	mmx_m2m(por, vars, vard)
  15.480 +
  15.481 +
  15.482 +/*	1x64 bitwise eXclusive OR
  15.483 +*/
  15.484 +#define	pxor_m2r(var, reg)	mmx_m2r(pxor, var, reg)
  15.485 +#define	pxor_r2r(regs, regd)	mmx_r2r(pxor, regs, regd)
  15.486 +#define	pxor(vars, vard)	mmx_m2m(pxor, vars, vard)
  15.487 +
  15.488 +
  15.489 +/*	2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
  15.490 +	(resulting fields are either 0 or -1)
  15.491 +*/
  15.492 +#define	pcmpeqd_m2r(var, reg)	mmx_m2r(pcmpeqd, var, reg)
  15.493 +#define	pcmpeqd_r2r(regs, regd)	mmx_r2r(pcmpeqd, regs, regd)
  15.494 +#define	pcmpeqd(vars, vard)	mmx_m2m(pcmpeqd, vars, vard)
  15.495 +
  15.496 +#define	pcmpeqw_m2r(var, reg)	mmx_m2r(pcmpeqw, var, reg)
  15.497 +#define	pcmpeqw_r2r(regs, regd)	mmx_r2r(pcmpeqw, regs, regd)
  15.498 +#define	pcmpeqw(vars, vard)	mmx_m2m(pcmpeqw, vars, vard)
  15.499 +
  15.500 +#define	pcmpeqb_m2r(var, reg)	mmx_m2r(pcmpeqb, var, reg)
  15.501 +#define	pcmpeqb_r2r(regs, regd)	mmx_r2r(pcmpeqb, regs, regd)
  15.502 +#define	pcmpeqb(vars, vard)	mmx_m2m(pcmpeqb, vars, vard)
  15.503 +
  15.504 +
  15.505 +/*	2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
  15.506 +	(resulting fields are either 0 or -1)
  15.507 +*/
  15.508 +#define	pcmpgtd_m2r(var, reg)	mmx_m2r(pcmpgtd, var, reg)
  15.509 +#define	pcmpgtd_r2r(regs, regd)	mmx_r2r(pcmpgtd, regs, regd)
  15.510 +#define	pcmpgtd(vars, vard)	mmx_m2m(pcmpgtd, vars, vard)
  15.511 +
  15.512 +#define	pcmpgtw_m2r(var, reg)	mmx_m2r(pcmpgtw, var, reg)
  15.513 +#define	pcmpgtw_r2r(regs, regd)	mmx_r2r(pcmpgtw, regs, regd)
  15.514 +#define	pcmpgtw(vars, vard)	mmx_m2m(pcmpgtw, vars, vard)
  15.515 +
  15.516 +#define	pcmpgtb_m2r(var, reg)	mmx_m2r(pcmpgtb, var, reg)
  15.517 +#define	pcmpgtb_r2r(regs, regd)	mmx_r2r(pcmpgtb, regs, regd)
  15.518 +#define	pcmpgtb(vars, vard)	mmx_m2m(pcmpgtb, vars, vard)
  15.519 +
  15.520 +
  15.521 +/*	1x64, 2x32, and 4x16 Parallel Shift Left Logical
  15.522 +*/
  15.523 +#define	psllq_i2r(imm, reg)	mmx_i2r(psllq, imm, reg)
  15.524 +#define	psllq_m2r(var, reg)	mmx_m2r(psllq, var, reg)
  15.525 +#define	psllq_r2r(regs, regd)	mmx_r2r(psllq, regs, regd)
  15.526 +#define	psllq(vars, vard)	mmx_m2m(psllq, vars, vard)
  15.527 +
  15.528 +#define	pslld_i2r(imm, reg)	mmx_i2r(pslld, imm, reg)
  15.529 +#define	pslld_m2r(var, reg)	mmx_m2r(pslld, var, reg)
  15.530 +#define	pslld_r2r(regs, regd)	mmx_r2r(pslld, regs, regd)
  15.531 +#define	pslld(vars, vard)	mmx_m2m(pslld, vars, vard)
  15.532 +
  15.533 +#define	psllw_i2r(imm, reg)	mmx_i2r(psllw, imm, reg)
  15.534 +#define	psllw_m2r(var, reg)	mmx_m2r(psllw, var, reg)
  15.535 +#define	psllw_r2r(regs, regd)	mmx_r2r(psllw, regs, regd)
  15.536 +#define	psllw(vars, vard)	mmx_m2m(psllw, vars, vard)
  15.537 +
  15.538 +
  15.539 +/*	1x64, 2x32, and 4x16 Parallel Shift Right Logical
  15.540 +*/
  15.541 +#define	psrlq_i2r(imm, reg)	mmx_i2r(psrlq, imm, reg)
  15.542 +#define	psrlq_m2r(var, reg)	mmx_m2r(psrlq, var, reg)
  15.543 +#define	psrlq_r2r(regs, regd)	mmx_r2r(psrlq, regs, regd)
  15.544 +#define	psrlq(vars, vard)	mmx_m2m(psrlq, vars, vard)
  15.545 +
  15.546 +#define	psrld_i2r(imm, reg)	mmx_i2r(psrld, imm, reg)
  15.547 +#define	psrld_m2r(var, reg)	mmx_m2r(psrld, var, reg)
  15.548 +#define	psrld_r2r(regs, regd)	mmx_r2r(psrld, regs, regd)
  15.549 +#define	psrld(vars, vard)	mmx_m2m(psrld, vars, vard)
  15.550 +
  15.551 +#define	psrlw_i2r(imm, reg)	mmx_i2r(psrlw, imm, reg)
  15.552 +#define	psrlw_m2r(var, reg)	mmx_m2r(psrlw, var, reg)
  15.553 +#define	psrlw_r2r(regs, regd)	mmx_r2r(psrlw, regs, regd)
  15.554 +#define	psrlw(vars, vard)	mmx_m2m(psrlw, vars, vard)
  15.555 +
  15.556 +
  15.557 +/*	2x32 and 4x16 Parallel Shift Right Arithmetic
  15.558 +*/
  15.559 +#define	psrad_i2r(imm, reg)	mmx_i2r(psrad, imm, reg)
  15.560 +#define	psrad_m2r(var, reg)	mmx_m2r(psrad, var, reg)
  15.561 +#define	psrad_r2r(regs, regd)	mmx_r2r(psrad, regs, regd)
  15.562 +#define	psrad(vars, vard)	mmx_m2m(psrad, vars, vard)
  15.563 +
  15.564 +#define	psraw_i2r(imm, reg)	mmx_i2r(psraw, imm, reg)
  15.565 +#define	psraw_m2r(var, reg)	mmx_m2r(psraw, var, reg)
  15.566 +#define	psraw_r2r(regs, regd)	mmx_r2r(psraw, regs, regd)
  15.567 +#define	psraw(vars, vard)	mmx_m2m(psraw, vars, vard)
  15.568 +
  15.569 +
  15.570 +/*	2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
  15.571 +	(packs source and dest fields into dest in that order)
  15.572 +*/
  15.573 +#define	packssdw_m2r(var, reg)	mmx_m2r(packssdw, var, reg)
  15.574 +#define	packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
  15.575 +#define	packssdw(vars, vard)	mmx_m2m(packssdw, vars, vard)
  15.576 +
  15.577 +#define	packsswb_m2r(var, reg)	mmx_m2r(packsswb, var, reg)
  15.578 +#define	packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
  15.579 +#define	packsswb(vars, vard)	mmx_m2m(packsswb, vars, vard)
  15.580 +
  15.581 +
  15.582 +/*	4x16->8x8 PACK and Unsigned Saturate
  15.583 +	(packs source and dest fields into dest in that order)
  15.584 +*/
  15.585 +#define	packuswb_m2r(var, reg)	mmx_m2r(packuswb, var, reg)
  15.586 +#define	packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
  15.587 +#define	packuswb(vars, vard)	mmx_m2m(packuswb, vars, vard)
  15.588 +
  15.589 +
  15.590 +/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
  15.591 +	(interleaves low half of dest with low half of source
  15.592 +	 as padding in each result field)
  15.593 +*/
  15.594 +#define	punpckldq_m2r(var, reg)	mmx_m2r(punpckldq, var, reg)
  15.595 +#define	punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
  15.596 +#define	punpckldq(vars, vard)	mmx_m2m(punpckldq, vars, vard)
  15.597 +
  15.598 +#define	punpcklwd_m2r(var, reg)	mmx_m2r(punpcklwd, var, reg)
  15.599 +#define	punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
  15.600 +#define	punpcklwd(vars, vard)	mmx_m2m(punpcklwd, vars, vard)
  15.601 +
  15.602 +#define	punpcklbw_m2r(var, reg)	mmx_m2r(punpcklbw, var, reg)
  15.603 +#define	punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
  15.604 +#define	punpcklbw(vars, vard)	mmx_m2m(punpcklbw, vars, vard)
  15.605 +
  15.606 +
  15.607 +/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
  15.608 +	(interleaves high half of dest with high half of source
  15.609 +	 as padding in each result field)
  15.610 +*/
  15.611 +#define	punpckhdq_m2r(var, reg)	mmx_m2r(punpckhdq, var, reg)
  15.612 +#define	punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
  15.613 +#define	punpckhdq(vars, vard)	mmx_m2m(punpckhdq, vars, vard)
  15.614 +
  15.615 +#define	punpckhwd_m2r(var, reg)	mmx_m2r(punpckhwd, var, reg)
  15.616 +#define	punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
  15.617 +#define	punpckhwd(vars, vard)	mmx_m2m(punpckhwd, vars, vard)
  15.618 +
  15.619 +#define	punpckhbw_m2r(var, reg)	mmx_m2r(punpckhbw, var, reg)
  15.620 +#define	punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
  15.621 +#define	punpckhbw(vars, vard)	mmx_m2m(punpckhbw, vars, vard)
  15.622 +
  15.623 +
  15.624 +/*	Empty MMx State
  15.625 +	(used to clean-up when going from mmx to float use
  15.626 +	 of the registers that are shared by both; note that
  15.627 +	 there is no float-to-mmx operation needed, because
  15.628 +	 only the float tag word info is corruptible)
  15.629 +*/
  15.630 +#ifdef	MMX_TRACE
  15.631 +
  15.632 +#define	emms() \
  15.633 +	{ \
  15.634 +		printf("emms()\n"); \
  15.635 +		__asm__ __volatile__ ("emms"); \
  15.636 +	}
  15.637 +
  15.638 +#else
  15.639 +
  15.640 +#define	emms()			__asm__ __volatile__ ("emms")
  15.641 +
  15.642 +#endif
  15.643 +
  15.644 +#endif
  15.645 +/* vi: set ts=4 sw=4 expandtab: */
    16.1 --- a/src/render/opengl/SDL_renderer_gl.c	Wed Feb 02 22:55:12 2011 -0800
    16.2 +++ b/src/render/opengl/SDL_renderer_gl.c	Thu Feb 03 00:19:40 2011 -0800
    16.3 @@ -37,27 +37,6 @@
    16.4     http://developer.apple.com/documentation/GraphicsImaging/Conceptual/OpenGL-MacProgGuide/opengl_texturedata/chapter_10_section_2.html
    16.5  */
    16.6  
    16.7 -/* !!! FIXME: this should go in a higher level than the GL renderer. */
    16.8 -static __inline__ int
    16.9 -bytes_per_pixel(const Uint32 format)
   16.10 -{
   16.11 -    if (!SDL_ISPIXELFORMAT_FOURCC(format)) {
   16.12 -        return SDL_BYTESPERPIXEL(format);
   16.13 -    }
   16.14 -
   16.15 -    /* FOURCC format */
   16.16 -    switch (format) {
   16.17 -    case SDL_PIXELFORMAT_YV12:
   16.18 -    case SDL_PIXELFORMAT_IYUV:
   16.19 -    case SDL_PIXELFORMAT_YUY2:
   16.20 -    case SDL_PIXELFORMAT_UYVY:
   16.21 -    case SDL_PIXELFORMAT_YVYU:
   16.22 -        return 2;
   16.23 -    default:
   16.24 -        return 1;               /* shouldn't ever hit this. */
   16.25 -    }
   16.26 -}
   16.27 -
   16.28  /* Used to re-create the window with OpenGL capability */
   16.29  extern int SDL_RecreateWindow(SDL_Window * window, Uint32 flags);
   16.30  
   16.31 @@ -67,18 +46,12 @@
   16.32  static void GL_WindowEvent(SDL_Renderer * renderer,
   16.33                             const SDL_WindowEvent *event);
   16.34  static int GL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
   16.35 -static int GL_QueryTexturePixels(SDL_Renderer * renderer,
   16.36 -                                 SDL_Texture * texture, void **pixels,
   16.37 -                                 int *pitch);
   16.38  static int GL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
   16.39                              const SDL_Rect * rect, const void *pixels,
   16.40                              int pitch);
   16.41  static int GL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
   16.42 -                          const SDL_Rect * rect, int markDirty, void **pixels,
   16.43 -                          int *pitch);
   16.44 +                          const SDL_Rect * rect, void **pixels, int *pitch);
   16.45  static void GL_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
   16.46 -static void GL_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
   16.47 -                            int numrects, const SDL_Rect * rects);
   16.48  static int GL_RenderClear(SDL_Renderer * renderer);
   16.49  static int GL_RenderDrawPoints(SDL_Renderer * renderer,
   16.50                                 const SDL_Point * points, int count);
   16.51 @@ -102,21 +75,8 @@
   16.52      {
   16.53       "opengl",
   16.54       (SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED),
   16.55 -     13,
   16.56 -     {
   16.57 -      SDL_PIXELFORMAT_RGB332,
   16.58 -      SDL_PIXELFORMAT_RGB444,
   16.59 -      SDL_PIXELFORMAT_RGB555,
   16.60 -      SDL_PIXELFORMAT_ARGB4444,
   16.61 -      SDL_PIXELFORMAT_ARGB1555,
   16.62 -      SDL_PIXELFORMAT_RGB565,
   16.63 -      SDL_PIXELFORMAT_RGB24,
   16.64 -      SDL_PIXELFORMAT_BGR24,
   16.65 -      SDL_PIXELFORMAT_RGB888,
   16.66 -      SDL_PIXELFORMAT_BGR888,
   16.67 -      SDL_PIXELFORMAT_ARGB8888,
   16.68 -      SDL_PIXELFORMAT_ABGR8888,
   16.69 -      SDL_PIXELFORMAT_ARGB2101010},
   16.70 +     1,
   16.71 +     {SDL_PIXELFORMAT_ARGB8888},
   16.72       0,
   16.73       0}
   16.74  };
   16.75 @@ -126,10 +86,6 @@
   16.76      SDL_GLContext context;
   16.77      SDL_bool updateSize;
   16.78      SDL_bool GL_ARB_texture_rectangle_supported;
   16.79 -    SDL_bool GL_EXT_paletted_texture_supported;
   16.80 -    SDL_bool GL_APPLE_ycbcr_422_supported;
   16.81 -    SDL_bool GL_MESA_ycbcr_texture_supported;
   16.82 -    SDL_bool GL_ARB_fragment_program_supported;
   16.83      int blendMode;
   16.84  
   16.85      /* OpenGL functions */
   16.86 @@ -139,33 +95,18 @@
   16.87  
   16.88      void (*glTextureRangeAPPLE) (GLenum target, GLsizei length,
   16.89                                   const GLvoid * pointer);
   16.90 -
   16.91 -    PFNGLGETPROGRAMIVARBPROC glGetProgramivARB;
   16.92 -    PFNGLGETPROGRAMSTRINGARBPROC glGetProgramStringARB;
   16.93 -    PFNGLPROGRAMLOCALPARAMETER4FVARBPROC glProgramLocalParameter4fvARB;
   16.94 -    PFNGLDELETEPROGRAMSARBPROC glDeleteProgramsARB;
   16.95 -    PFNGLGENPROGRAMSARBPROC glGenProgramsARB;
   16.96 -    PFNGLBINDPROGRAMARBPROC glBindProgramARB;
   16.97 -    PFNGLPROGRAMSTRINGARBPROC glProgramStringARB;
   16.98 -
   16.99 -    /* (optional) fragment programs */
  16.100 -    GLuint fragment_program_UYVY;
  16.101  } GL_RenderData;
  16.102  
  16.103  typedef struct
  16.104  {
  16.105      GLuint texture;
  16.106 -    GLuint shader;
  16.107      GLenum type;
  16.108      GLfloat texw;
  16.109      GLfloat texh;
  16.110      GLenum format;
  16.111      GLenum formattype;
  16.112 -    Uint8 *palette;
  16.113      void *pixels;
  16.114      int pitch;
  16.115 -    SDL_DirtyRectList dirty;
  16.116 -    int HACK_RYAN_FIXME;
  16.117  } GL_TextureData;
  16.118  
  16.119  
  16.120 @@ -257,11 +198,9 @@
  16.121  
  16.122      renderer->WindowEvent = GL_WindowEvent;
  16.123      renderer->CreateTexture = GL_CreateTexture;
  16.124 -    renderer->QueryTexturePixels = GL_QueryTexturePixels;
  16.125      renderer->UpdateTexture = GL_UpdateTexture;
  16.126      renderer->LockTexture = GL_LockTexture;
  16.127      renderer->UnlockTexture = GL_UnlockTexture;
  16.128 -    renderer->DirtyTexture = GL_DirtyTexture;
  16.129      renderer->RenderClear = GL_RenderClear;
  16.130      renderer->RenderDrawPoints = GL_RenderDrawPoints;
  16.131      renderer->RenderDrawLines = GL_RenderDrawLines;
  16.132 @@ -317,40 +256,12 @@
  16.133          || SDL_GL_ExtensionSupported("GL_EXT_texture_rectangle")) {
  16.134          data->GL_ARB_texture_rectangle_supported = SDL_TRUE;
  16.135      }
  16.136 -    if (SDL_GL_ExtensionSupported("GL_APPLE_ycbcr_422")) {
  16.137 -        data->GL_APPLE_ycbcr_422_supported = SDL_TRUE;
  16.138 -    }
  16.139 -    if (SDL_GL_ExtensionSupported("GL_MESA_ycbcr_texture")) {
  16.140 -        data->GL_MESA_ycbcr_texture_supported = SDL_TRUE;
  16.141 -    }
  16.142      if (SDL_GL_ExtensionSupported("GL_APPLE_texture_range")) {
  16.143          data->glTextureRangeAPPLE =
  16.144              (void (*)(GLenum, GLsizei, const GLvoid *))
  16.145              SDL_GL_GetProcAddress("glTextureRangeAPPLE");
  16.146      }
  16.147  
  16.148 -    /* we might use fragment programs for YUV data, etc. */
  16.149 -    if (SDL_GL_ExtensionSupported("GL_ARB_fragment_program")) {
  16.150 -        /* !!! FIXME: this doesn't check for errors. */
  16.151 -        /* !!! FIXME: this should really reuse the glfuncs.h stuff. */
  16.152 -        data->glGetProgramivARB = (PFNGLGETPROGRAMIVARBPROC)
  16.153 -            SDL_GL_GetProcAddress("glGetProgramivARB");
  16.154 -        data->glGetProgramStringARB = (PFNGLGETPROGRAMSTRINGARBPROC)
  16.155 -            SDL_GL_GetProcAddress("glGetProgramStringARB");
  16.156 -        data->glProgramLocalParameter4fvARB =
  16.157 -            (PFNGLPROGRAMLOCALPARAMETER4FVARBPROC)
  16.158 -            SDL_GL_GetProcAddress("glProgramLocalParameter4fvARB");
  16.159 -        data->glDeleteProgramsARB = (PFNGLDELETEPROGRAMSARBPROC)
  16.160 -            SDL_GL_GetProcAddress("glDeleteProgramsARB");
  16.161 -        data->glGenProgramsARB = (PFNGLGENPROGRAMSARBPROC)
  16.162 -            SDL_GL_GetProcAddress("glGenProgramsARB");
  16.163 -        data->glBindProgramARB = (PFNGLBINDPROGRAMARBPROC)
  16.164 -            SDL_GL_GetProcAddress("glBindProgramARB");
  16.165 -        data->glProgramStringARB = (PFNGLPROGRAMSTRINGARBPROC)
  16.166 -            SDL_GL_GetProcAddress("glProgramStringARB");
  16.167 -        data->GL_ARB_fragment_program_supported = SDL_TRUE;
  16.168 -    }
  16.169 -
  16.170      /* Set up parameters for rendering */
  16.171      data->blendMode = -1;
  16.172      data->glDisable(GL_DEPTH_TEST);
  16.173 @@ -419,240 +330,16 @@
  16.174      return value;
  16.175  }
  16.176  
  16.177 -
  16.178 -//#define DEBUG_PROGRAM_COMPILE 1
  16.179 -
  16.180 -static void
  16.181 -set_shader_error(GL_RenderData * data, const char *prefix)
  16.182 -{
  16.183 -    GLint pos = 0;
  16.184 -    const GLubyte *errstr;
  16.185 -    data->glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
  16.186 -    errstr = data->glGetString(GL_PROGRAM_ERROR_STRING_ARB);
  16.187 -    SDL_SetError("%s: shader compile error at position %d: %s",
  16.188 -           prefix, (int) pos, (const char *) errstr);
  16.189 -}
  16.190 -
  16.191 -static GLuint
  16.192 -compile_shader(GL_RenderData * data, GLenum shader_type, const char *_code)
  16.193 -{
  16.194 -    const int have_texture_rects = data->GL_ARB_texture_rectangle_supported;
  16.195 -    const char *replacement = have_texture_rects ? "RECT" : "2D";
  16.196 -    const size_t replacementlen = SDL_strlen(replacement);
  16.197 -    const char *token = "%TEXTURETARGET%";
  16.198 -    const size_t tokenlen = SDL_strlen(token);
  16.199 -    char *code = NULL;
  16.200 -    char *ptr = NULL;
  16.201 -    GLuint program = 0;
  16.202 -
  16.203 -    /*
  16.204 -     * The TEX instruction needs a different target depending on what we use.
  16.205 -     *  To handle this, we use "%TEXTURETARGET%" and replace the string before
  16.206 -     *  compiling the shader.
  16.207 -     */
  16.208 -    code = SDL_strdup(_code);
  16.209 -    if (code == NULL)
  16.210 -        return 0;
  16.211 -
  16.212 -    for (ptr = SDL_strstr(code, token); ptr; ptr = SDL_strstr(ptr + 1, token)) {
  16.213 -        SDL_memcpy(ptr, replacement, replacementlen);
  16.214 -        SDL_memmove(ptr + replacementlen, ptr + tokenlen,
  16.215 -                    SDL_strlen(ptr + tokenlen) + 1);
  16.216 -    }
  16.217 -
  16.218 -#if DEBUG_PROGRAM_COMPILE
  16.219 -    printf("compiling shader:\n%s\n\n", code);
  16.220 -#endif
  16.221 -
  16.222 -    data->glGetError();         /* flush any existing error state. */
  16.223 -    data->glGenProgramsARB(1, &program);
  16.224 -    data->glBindProgramARB(shader_type, program);
  16.225 -    data->glProgramStringARB(shader_type, GL_PROGRAM_FORMAT_ASCII_ARB,
  16.226 -                             (GLsizei)SDL_strlen(code), code);
  16.227 -
  16.228 -    SDL_free(code);
  16.229 -
  16.230 -    if (data->glGetError() == GL_INVALID_OPERATION) {
  16.231 -#if DEBUG_PROGRAM_COMPILE
  16.232 -        GLint pos = 0;
  16.233 -        const GLubyte *errstr;
  16.234 -        data->glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
  16.235 -        errstr = data->glGetString(GL_PROGRAM_ERROR_STRING_ARB);
  16.236 -        printf("program compile error at position %d: %s\n\n",
  16.237 -               (int) pos, (const char *) errstr);
  16.238 -#endif
  16.239 -        data->glBindProgramARB(shader_type, 0);
  16.240 -        data->glDeleteProgramsARB(1, &program);
  16.241 -        return 0;
  16.242 -    }
  16.243 -
  16.244 -    return program;
  16.245 -}
  16.246 -
  16.247 -
  16.248 -/*
  16.249 - * Fragment program that renders from UYVY textures.
  16.250 - * The UYVY to RGB equasion is:
  16.251 - *   R = 1.164(Y-16) + 1.596(Cr-128)
  16.252 - *   G = 1.164(Y-16) - 0.813(Cr-128) - 0.391(Cb-128)
  16.253 - *   B = 1.164(Y-16) + 2.018(Cb-128)
  16.254 - * Byte layout is Cb, Y1, Cr, Y2, stored in the R, G, B, A channels.
  16.255 - * 4 bytes == 2 pixels: Y1/Cb/Cr, Y2/Cb/Cr
  16.256 - *
  16.257 - * !!! FIXME: this ignores blendmodes, etc.
  16.258 - * !!! FIXME: this could be more efficient...use a dot product for green, etc.
  16.259 - */
  16.260 -static const char *fragment_program_UYVY_source_code = "!!ARBfp1.0\n"
  16.261 -    /* outputs... */
  16.262 -    "OUTPUT outcolor = result.color;\n"
  16.263 -    /* scratch registers... */
  16.264 -    "TEMP uyvy;\n" "TEMP luminance;\n" "TEMP work;\n"
  16.265 -    /* Halve the coordinates to grab the correct 32 bits for the fragment. */
  16.266 -    "MUL work, fragment.texcoord, { 0.5, 1.0, 1.0, 1.0 };\n"
  16.267 -    /* Sample the YUV texture. Cb, Y1, Cr, Y2, are stored in x, y, z, w. */
  16.268 -    "TEX uyvy, work, texture[0], %TEXTURETARGET%;\n"
  16.269 -    /* Do subtractions (128/255, 16/255, 128/255, 16/255) */
  16.270 -    "SUB uyvy, uyvy, { 0.501960784313726, 0.06274509803922, 0.501960784313726, 0.06274509803922 };\n"
  16.271 -    /* Choose the luminance component by texcoord. */
  16.272 -    /* !!! FIXME: laziness wins out for now... just average Y1 and Y2. */
  16.273 -    "ADD luminance, uyvy.yyyy, uyvy.wwww;\n"
  16.274 -    "MUL luminance, luminance, { 0.5, 0.5, 0.5, 0.5 };\n"
  16.275 -    /* Multiply luminance by its magic value. */
  16.276 -    "MUL luminance, luminance, { 1.164, 1.164, 1.164, 1.164 };\n"
  16.277 -    /* uyvy.xyzw becomes Cr/Cr/Cb/Cb, with multiplications. */
  16.278 -    "MUL uyvy, uyvy.zzxx, { 1.596, -0.813, 2.018, -0.391 };\n"
  16.279 -    /* Add luminance to Cr and Cb, store to RGB channels. */
  16.280 -    "ADD work.rgb, luminance, uyvy;\n"
  16.281 -    /* Do final addition for Green channel.  (!!! FIXME: this should be a DPH?) */
  16.282 -    "ADD work.g, work.g, uyvy.w;\n"
  16.283 -    /* Make sure alpha channel is fully opaque.  (!!! FIXME: blend modes!) */
  16.284 -    "MOV work.a, { 1.0 };\n"
  16.285 -    /* Store out the final fragment color... */
  16.286 -    "MOV outcolor, work;\n"
  16.287 -    /* ...and we're done! */
  16.288 -    "END\n";
  16.289 -
  16.290  static __inline__ SDL_bool
  16.291  convert_format(GL_RenderData *renderdata, Uint32 pixel_format,
  16.292                 GLint* internalFormat, GLenum* format, GLenum* type)
  16.293  {
  16.294      switch (pixel_format) {
  16.295 -    case SDL_PIXELFORMAT_RGB332:
  16.296 -        *internalFormat = GL_R3_G3_B2;
  16.297 -        *format = GL_RGB;
  16.298 -        *type = GL_UNSIGNED_BYTE_3_3_2;
  16.299 -        break;
  16.300 -    case SDL_PIXELFORMAT_RGB444:
  16.301 -        *internalFormat = GL_RGB4;
  16.302 -        *format = GL_RGB;
  16.303 -        *type = GL_UNSIGNED_SHORT_4_4_4_4;
  16.304 -        break;
  16.305 -    case SDL_PIXELFORMAT_RGB555:
  16.306 -        *internalFormat = GL_RGB5;
  16.307 -        *format = GL_RGB;
  16.308 -        *type = GL_UNSIGNED_SHORT_5_5_5_1;
  16.309 -        break;
  16.310 -    case SDL_PIXELFORMAT_ARGB4444:
  16.311 -        *internalFormat = GL_RGBA4;
  16.312 -        *format = GL_BGRA;
  16.313 -        *type = GL_UNSIGNED_SHORT_4_4_4_4_REV;
  16.314 -        break;
  16.315 -    case SDL_PIXELFORMAT_ARGB1555:
  16.316 -        *internalFormat = GL_RGB5_A1;
  16.317 -        *format = GL_BGRA;
  16.318 -        *type = GL_UNSIGNED_SHORT_1_5_5_5_REV;
  16.319 -        break;
  16.320 -    case SDL_PIXELFORMAT_RGB565:
  16.321 -        *internalFormat = GL_RGB8;
  16.322 -        *format = GL_RGB;
  16.323 -        *type = GL_UNSIGNED_SHORT_5_6_5;
  16.324 -        break;
  16.325 -    case SDL_PIXELFORMAT_RGB24:
  16.326 -        *internalFormat = GL_RGB8;
  16.327 -        *format = GL_RGB;
  16.328 -        *type = GL_UNSIGNED_BYTE;
  16.329 -        break;
  16.330      case SDL_PIXELFORMAT_RGB888:
  16.331 -        *internalFormat = GL_RGB8;
  16.332 -        *format = GL_BGRA;
  16.333 -        *type = GL_UNSIGNED_BYTE;
  16.334 -        break;
  16.335 -    case SDL_PIXELFORMAT_BGR24:
  16.336 -        *internalFormat = GL_RGB8;
  16.337 -        *format = GL_BGR;
  16.338 -        *type = GL_UNSIGNED_BYTE;
  16.339 -        break;
  16.340 -    case SDL_PIXELFORMAT_BGR888:
  16.341 -        *internalFormat = GL_RGB8;
  16.342 -        *format = GL_RGBA;
  16.343 -        *type = GL_UNSIGNED_BYTE;
  16.344 -        break;
  16.345      case SDL_PIXELFORMAT_ARGB8888:
  16.346 -#ifdef __MACOSX__
  16.347 -        *internalFormat = GL_RGBA;
  16.348 +        *internalFormat = GL_RGBA8;
  16.349          *format = GL_BGRA;
  16.350          *type = GL_UNSIGNED_INT_8_8_8_8_REV;
  16.351 -#else
  16.352 -        *internalFormat = GL_RGBA8;
  16.353 -        *format = GL_BGRA;
  16.354 -        *type = GL_UNSIGNED_BYTE;
  16.355 -#endif
  16.356 -        break;
  16.357 -    case SDL_PIXELFORMAT_ABGR8888:
  16.358 -        *internalFormat = GL_RGBA8;
  16.359 -        *format = GL_RGBA;
  16.360 -        *type = GL_UNSIGNED_BYTE;
  16.361 -        break;
  16.362 -    case SDL_PIXELFORMAT_ARGB2101010:
  16.363 -        *internalFormat = GL_RGB10_A2;
  16.364 -        *format = GL_BGRA;
  16.365 -        *type = GL_UNSIGNED_INT_2_10_10_10_REV;
  16.366 -        break;
  16.367 -    case SDL_PIXELFORMAT_UYVY:
  16.368 -        if (renderdata->GL_APPLE_ycbcr_422_supported) {
  16.369 -            *internalFormat = GL_RGB;
  16.370 -            *format = GL_YCBCR_422_APPLE;
  16.371 -#if SDL_BYTEORDER == SDL_LIL_ENDIAN
  16.372 -            *type = GL_UNSIGNED_SHORT_8_8_APPLE;
  16.373 -#else
  16.374 -            *type = GL_UNSIGNED_SHORT_8_8_REV_APPLE;
  16.375 -#endif
  16.376 -        } else if (renderdata->GL_MESA_ycbcr_texture_supported) {
  16.377 -            *internalFormat = GL_YCBCR_MESA;
  16.378 -            *format = GL_YCBCR_MESA;
  16.379 -#if SDL_BYTEORDER == SDL_LIL_ENDIAN
  16.380 -            *type = GL_UNSIGNED_SHORT_8_8_MESA;
  16.381 -#else
  16.382 -            *type = GL_UNSIGNED_SHORT_8_8_REV_MESA;
  16.383 -#endif
  16.384 -        } else if (renderdata->GL_ARB_fragment_program_supported) {
  16.385 -            *internalFormat = GL_RGBA;
  16.386 -            *format = GL_RGBA;
  16.387 -            *type = GL_UNSIGNED_BYTE;
  16.388 -        } else {
  16.389 -            return SDL_FALSE;
  16.390 -        }
  16.391 -        break;
  16.392 -    case SDL_PIXELFORMAT_YUY2:
  16.393 -        if (renderdata->GL_APPLE_ycbcr_422_supported) {
  16.394 -            *internalFormat = GL_RGB;
  16.395 -            *format = GL_YCBCR_422_APPLE;
  16.396 -#if SDL_BYTEORDER == SDL_LIL_ENDIAN
  16.397 -            *type = GL_UNSIGNED_SHORT_8_8_REV_APPLE;
  16.398 -#else
  16.399 -            *type = GL_UNSIGNED_SHORT_8_8_APPLE;
  16.400 -#endif
  16.401 -        } else if (renderdata->GL_MESA_ycbcr_texture_supported) {
  16.402 -            *internalFormat = GL_YCBCR_MESA;
  16.403 -            *format = GL_YCBCR_MESA;
  16.404 -#if SDL_BYTEORDER == SDL_LIL_ENDIAN
  16.405 -            *type = GL_UNSIGNED_SHORT_8_8_REV_MESA;
  16.406 -#else
  16.407 -            *type = GL_UNSIGNED_SHORT_8_8_MESA;
  16.408 -#endif
  16.409 -        } else {
  16.410 -            return SDL_FALSE;
  16.411 -        }
  16.412          break;
  16.413      default:
  16.414          return SDL_FALSE;
  16.415 @@ -668,7 +355,6 @@
  16.416      GLint internalFormat;
  16.417      GLenum format, type;
  16.418      int texture_w, texture_h;
  16.419 -    GLuint shader = 0;
  16.420      GLenum result;
  16.421  
  16.422      GL_ActivateRenderer(renderer);
  16.423 @@ -679,21 +365,6 @@
  16.424                       SDL_GetPixelFormatName(texture->format));
  16.425          return -1;
  16.426      }
  16.427 -    if (texture->format == SDL_PIXELFORMAT_UYVY &&
  16.428 -        !renderdata->GL_APPLE_ycbcr_422_supported &&
  16.429 -        !renderdata->GL_MESA_ycbcr_texture_supported &&
  16.430 -        renderdata->GL_ARB_fragment_program_supported) {
  16.431 -        if (renderdata->fragment_program_UYVY == 0) {
  16.432 -            renderdata->fragment_program_UYVY =
  16.433 -                compile_shader(renderdata, GL_FRAGMENT_PROGRAM_ARB,
  16.434 -                               fragment_program_UYVY_source_code);
  16.435 -            if (renderdata->fragment_program_UYVY == 0) {
  16.436 -                set_shader_error(renderdata, "UYVY");
  16.437 -                return -1;
  16.438 -            }
  16.439 -        }
  16.440 -        shader = renderdata->fragment_program_UYVY;
  16.441 -    }
  16.442  
  16.443      data = (GL_TextureData *) SDL_calloc(1, sizeof(*data));
  16.444      if (!data) {
  16.445 @@ -701,10 +372,8 @@
  16.446          return -1;
  16.447      }
  16.448  
  16.449 -    data->shader = shader;
  16.450 -
  16.451      if (texture->access == SDL_TEXTUREACCESS_STREAMING) {
  16.452 -        data->pitch = texture->w * bytes_per_pixel(texture->format);
  16.453 +        data->pitch = texture->w * SDL_BYTESPERPIXEL(texture->format);
  16.454          data->pixels = SDL_malloc(texture->h * data->pitch);
  16.455          if (!data->pixels) {
  16.456              SDL_OutOfMemory();
  16.457 @@ -731,17 +400,6 @@
  16.458          data->texh = (GLfloat) texture->h / texture_h;
  16.459      }
  16.460  
  16.461 -    /* YUV formats use RGBA but are really two bytes per pixel */
  16.462 -    if (internalFormat == GL_RGBA && bytes_per_pixel(texture->format) < 4) {
  16.463 -        texture_w /= 2;
  16.464 -        if (data->type == GL_TEXTURE_2D) {
  16.465 -            data->texw *= 2.0f;
  16.466 -        }
  16.467 -        data->HACK_RYAN_FIXME = 2;
  16.468 -    } else {
  16.469 -        data->HACK_RYAN_FIXME = 1;
  16.470 -    }
  16.471 -
  16.472      data->format = format;
  16.473      data->formattype = type;
  16.474      renderdata->glEnable(data->type);
  16.475 @@ -771,22 +429,13 @@
  16.476          renderdata->glTexParameteri(data->type, GL_TEXTURE_STORAGE_HINT_APPLE,
  16.477                                      GL_STORAGE_CACHED_APPLE);
  16.478      }
  16.479 -/* This causes a crash in testoverlay for some reason.  Apple bug? */
  16.480 -#if 0
  16.481      if (texture->access == SDL_TEXTUREACCESS_STREAMING
  16.482          && texture->format == SDL_PIXELFORMAT_ARGB8888) {
  16.483 -        /*
  16.484 -           if (renderdata->glTextureRangeAPPLE) {
  16.485 -           renderdata->glTextureRangeAPPLE(data->type,
  16.486 -           texture->h * data->pitch,
  16.487 -           data->pixels);
  16.488 -           }
  16.489 -         */
  16.490          renderdata->glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
  16.491          renderdata->glTexImage2D(data->type, 0, internalFormat, texture_w,
  16.492                                   texture_h, 0, format, type, data->pixels);
  16.493 -    } else
  16.494 -#endif
  16.495 +    }
  16.496 +    else
  16.497  #endif
  16.498      {
  16.499          renderdata->glTexImage2D(data->type, 0, internalFormat, texture_w,
  16.500 @@ -801,26 +450,13 @@
  16.501      return 0;
  16.502  }
  16.503  
  16.504 -static int
  16.505 -GL_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
  16.506 -                      void **pixels, int *pitch)
  16.507 -{
  16.508 -    GL_TextureData *data = (GL_TextureData *) texture->driverdata;
  16.509 -
  16.510 -    *pixels = data->pixels;
  16.511 -    *pitch = data->pitch;
  16.512 -    return 0;
  16.513 -}
  16.514 -
  16.515  static void
  16.516  SetupTextureUpdate(GL_RenderData * renderdata, SDL_Texture * texture,
  16.517                     int pitch)
  16.518  {
  16.519      renderdata->glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
  16.520      renderdata->glPixelStorei(GL_UNPACK_ROW_LENGTH,
  16.521 -                              (pitch / bytes_per_pixel(texture->format)) /
  16.522 -                              ((GL_TextureData *) texture->driverdata)->
  16.523 -                              HACK_RYAN_FIXME);
  16.524 +                              (pitch / SDL_BYTESPERPIXEL(texture->format)));
  16.525  }
  16.526  
  16.527  static int
  16.528 @@ -851,18 +487,13 @@
  16.529  
  16.530  static int
  16.531  GL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
  16.532 -               const SDL_Rect * rect, int markDirty, void **pixels,
  16.533 -               int *pitch)
  16.534 +               const SDL_Rect * rect, void **pixels, int *pitch)
  16.535  {
  16.536      GL_TextureData *data = (GL_TextureData *) texture->driverdata;
  16.537  
  16.538 -    if (markDirty) {
  16.539 -        SDL_AddDirtyRect(&data->dirty, rect);
  16.540 -    }
  16.541 -
  16.542      *pixels =
  16.543          (void *) ((Uint8 *) data->pixels + rect->y * data->pitch +
  16.544 -                  rect->x * bytes_per_pixel(texture->format));
  16.545 +                  rect->x * SDL_BYTESPERPIXEL(texture->format));
  16.546      *pitch = data->pitch;
  16.547      return 0;
  16.548  }
  16.549 @@ -870,18 +501,17 @@
  16.550  static void
  16.551  GL_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
  16.552  {
  16.553 -}
  16.554 +    GL_RenderData *renderdata = (GL_RenderData *) renderer->driverdata;
  16.555 +    GL_TextureData *data = (GL_TextureData *) texture->driverdata;
  16.556  
  16.557 -static void
  16.558 -GL_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture, int numrects,
  16.559 -                const SDL_Rect * rects)
  16.560 -{
  16.561 -    GL_TextureData *data = (GL_TextureData *) texture->driverdata;
  16.562 -    int i;
  16.563 +    GL_ActivateRenderer(renderer);
  16.564  
  16.565 -    for (i = 0; i < numrects; ++i) {
  16.566 -        SDL_AddDirtyRect(&data->dirty, &rects[i]);
  16.567 -    }
  16.568 +    SetupTextureUpdate(renderdata, texture, data->pitch);
  16.569 +    renderdata->glEnable(data->type);
  16.570 +    renderdata->glBindTexture(data->type, data->texture);
  16.571 +    renderdata->glTexSubImage2D(data->type, 0, 0, 0, texture->w, texture->h,
  16.572 +                                data->format, data->formattype, data->pixels);
  16.573 +    renderdata->glDisable(data->type);
  16.574  }
  16.575  
  16.576  static void
  16.577 @@ -1056,28 +686,6 @@
  16.578  
  16.579      GL_ActivateRenderer(renderer);
  16.580  
  16.581 -    if (texturedata->dirty.list) {
  16.582 -        SDL_DirtyRect *dirty;
  16.583 -        void *pixels;
  16.584 -        int bpp = bytes_per_pixel(texture->format);
  16.585 -        int pitch = texturedata->pitch;
  16.586 -
  16.587 -        SetupTextureUpdate(data, texture, pitch);
  16.588 -        data->glEnable(texturedata->type);
  16.589 -        data->glBindTexture(texturedata->type, texturedata->texture);
  16.590 -        for (dirty = texturedata->dirty.list; dirty; dirty = dirty->next) {
  16.591 -            SDL_Rect *rect = &dirty->rect;
  16.592 -            pixels =
  16.593 -                (void *) ((Uint8 *) texturedata->pixels + rect->y * pitch +
  16.594 -                          rect->x * bpp);
  16.595 -            data->glTexSubImage2D(texturedata->type, 0, rect->x, rect->y,
  16.596 -                                  rect->w / texturedata->HACK_RYAN_FIXME,
  16.597 -                                  rect->h, texturedata->format,
  16.598 -                                  texturedata->formattype, pixels);
  16.599 -        }
  16.600 -        SDL_ClearDirtyRects(&texturedata->dirty);
  16.601 -    }
  16.602 -
  16.603      minx = dstrect->x;
  16.604      miny = dstrect->y;
  16.605      maxx = dstrect->x + dstrect->w;
  16.606 @@ -1106,12 +714,6 @@
  16.607  
  16.608      GL_SetBlendMode(data, texture->blendMode);
  16.609  
  16.610 -    /* Set up the shader for the copy, if any */
  16.611 -    if (texturedata->shader) {
  16.612 -        data->glEnable(GL_FRAGMENT_PROGRAM_ARB);
  16.613 -        data->glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, texturedata->shader);
  16.614 -    }
  16.615 -
  16.616      data->glBegin(GL_TRIANGLE_STRIP);
  16.617      data->glTexCoord2f(minu, minv);
  16.618      data->glVertex2f((GLfloat) minx, (GLfloat) miny);
  16.619 @@ -1123,10 +725,6 @@
  16.620      data->glVertex2f((GLfloat) maxx, (GLfloat) maxy);
  16.621      data->glEnd();
  16.622  
  16.623 -    if (texturedata->shader) {
  16.624 -        data->glDisable(GL_FRAGMENT_PROGRAM_ARB);
  16.625 -    }
  16.626 -
  16.627      data->glDisable(texturedata->type);
  16.628  
  16.629      return 0;
  16.630 @@ -1155,13 +753,13 @@
  16.631  
  16.632      data->glPixelStorei(GL_PACK_ALIGNMENT, 1);
  16.633      data->glPixelStorei(GL_PACK_ROW_LENGTH,
  16.634 -                        (pitch / bytes_per_pixel(pixel_format)));
  16.635 +                        (pitch / SDL_BYTESPERPIXEL(pixel_format)));
  16.636  
  16.637      data->glReadPixels(rect->x, (h-rect->y)-rect->h, rect->w, rect->h,
  16.638                         format, type, pixels);
  16.639  
  16.640      /* Flip the rows to be top-down */
  16.641 -    length = rect->w * bytes_per_pixel(pixel_format);
  16.642 +    length = rect->w * SDL_BYTESPERPIXEL(pixel_format);
  16.643      src = (Uint8*)pixels + (rect->h-1)*pitch;
  16.644      dst = (Uint8*)pixels;
  16.645      tmp = SDL_stack_alloc(Uint8, length);
  16.646 @@ -1201,7 +799,7 @@
  16.647  
  16.648      data->glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
  16.649      data->glPixelStorei(GL_UNPACK_ROW_LENGTH,
  16.650 -                        (pitch / bytes_per_pixel(pixel_format)));
  16.651 +                        (pitch / SDL_BYTESPERPIXEL(pixel_format)));
  16.652  
  16.653      /* Flip the rows to be bottom-up */
  16.654      length = rect->h * rect->w * pitch;
  16.655 @@ -1244,13 +842,9 @@
  16.656      if (data->texture) {
  16.657          renderdata->glDeleteTextures(1, &data->texture);
  16.658      }
  16.659 -    if (data->palette) {
  16.660 -        SDL_free(data->palette);
  16.661 -    }
  16.662      if (data->pixels) {
  16.663          SDL_free(data->pixels);
  16.664      }
  16.665 -    SDL_FreeDirtyRects(&data->dirty);
  16.666      SDL_free(data);
  16.667      texture->driverdata = NULL;
  16.668  }
  16.669 @@ -1262,16 +856,6 @@
  16.670  
  16.671      if (data) {
  16.672          if (data->context) {
  16.673 -            if (data->GL_ARB_fragment_program_supported) {
  16.674 -                data->glDisable(GL_FRAGMENT_PROGRAM_ARB);
  16.675 -                data->glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0);
  16.676 -                if (data->fragment_program_UYVY &&
  16.677 -                    data->fragment_program_UYVY != ~0) {
  16.678 -                    data->glDeleteProgramsARB(1,
  16.679 -                                              &data->fragment_program_UYVY);
  16.680 -                }
  16.681 -            }
  16.682 -
  16.683              /* SDL_GL_MakeCurrent(0, NULL); *//* doesn't do anything */
  16.684              SDL_GL_DeleteContext(data->context);
  16.685          }
    17.1 --- a/src/render/opengles/SDL_renderer_gles.c	Wed Feb 02 22:55:12 2011 -0800
    17.2 +++ b/src/render/opengles/SDL_renderer_gles.c	Thu Feb 03 00:19:40 2011 -0800
    17.3 @@ -49,19 +49,13 @@
    17.4  static void GLES_WindowEvent(SDL_Renderer * renderer,
    17.5                               const SDL_WindowEvent *event);
    17.6  static int GLES_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
    17.7 -static int GLES_QueryTexturePixels(SDL_Renderer * renderer,
    17.8 -                                   SDL_Texture * texture, void **pixels,
    17.9 -                                   int *pitch);
   17.10  static int GLES_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
   17.11                                const SDL_Rect * rect, const void *pixels,
   17.12                                int pitch);
   17.13  static int GLES_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
   17.14 -                            const SDL_Rect * rect, int markDirty,
   17.15 -                            void **pixels, int *pitch);
   17.16 +                            const SDL_Rect * rect, void **pixels, int *pitch);
   17.17  static void GLES_UnlockTexture(SDL_Renderer * renderer,
   17.18                                 SDL_Texture * texture);
   17.19 -static void GLES_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
   17.20 -                              int numrects, const SDL_Rect * rects);
   17.21  static int GLES_RenderDrawPoints(SDL_Renderer * renderer,
   17.22                                   const SDL_Point * points, int count);
   17.23  static int GLES_RenderDrawLines(SDL_Renderer * renderer,
   17.24 @@ -82,15 +76,8 @@
   17.25      {
   17.26       "opengl_es",
   17.27       (SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED),
   17.28 -     6,
   17.29 -     {
   17.30 -      /* OpenGL ES 1.x supported formats list */
   17.31 -      SDL_PIXELFORMAT_RGBA4444,
   17.32 -      SDL_PIXELFORMAT_RGBA5551,
   17.33 -      SDL_PIXELFORMAT_RGB565,
   17.34 -      SDL_PIXELFORMAT_RGB24,
   17.35 -      SDL_PIXELFORMAT_BGR888,
   17.36 -      SDL_PIXELFORMAT_ABGR8888},
   17.37 +     1,
   17.38 +     {SDL_PIXELFORMAT_ABGR8888},
   17.39       0,
   17.40       0}
   17.41  };
   17.42 @@ -125,7 +112,6 @@
   17.43      GLenum formattype;
   17.44      void *pixels;
   17.45      int pitch;
   17.46 -    SDL_DirtyRectList dirty;
   17.47  } GLES_TextureData;
   17.48  
   17.49  static void
   17.50 @@ -205,11 +191,9 @@
   17.51  
   17.52      renderer->WindowEvent = GLES_WindowEvent;
   17.53      renderer->CreateTexture = GLES_CreateTexture;
   17.54 -    renderer->QueryTexturePixels = GLES_QueryTexturePixels;
   17.55      renderer->UpdateTexture = GLES_UpdateTexture;
   17.56      renderer->LockTexture = GLES_LockTexture;
   17.57      renderer->UnlockTexture = GLES_UnlockTexture;
   17.58 -    renderer->DirtyTexture = GLES_DirtyTexture;
   17.59      renderer->RenderDrawPoints = GLES_RenderDrawPoints;
   17.60      renderer->RenderDrawLines = GLES_RenderDrawLines;
   17.61      renderer->RenderFillRects = GLES_RenderFillRects;
   17.62 @@ -343,32 +327,11 @@
   17.63      GLES_ActivateRenderer(renderer);
   17.64  
   17.65      switch (texture->format) {
   17.66 -    case SDL_PIXELFORMAT_RGB24:
   17.67 -        internalFormat = GL_RGB;
   17.68 -        format = GL_RGB;
   17.69 -        type = GL_UNSIGNED_BYTE;
   17.70 -        break;
   17.71 -    case SDL_PIXELFORMAT_BGR888:
   17.72      case SDL_PIXELFORMAT_ABGR8888:
   17.73          internalFormat = GL_RGBA;
   17.74          format = GL_RGBA;
   17.75          type = GL_UNSIGNED_BYTE;
   17.76          break;
   17.77 -    case SDL_PIXELFORMAT_RGB565:
   17.78 -        internalFormat = GL_RGB;
   17.79 -        format = GL_RGB;
   17.80 -        type = GL_UNSIGNED_SHORT_5_6_5;
   17.81 -        break;
   17.82 -    case SDL_PIXELFORMAT_RGBA5551:
   17.83 -        internalFormat = GL_RGBA;
   17.84 -        format = GL_RGBA;
   17.85 -        type = GL_UNSIGNED_SHORT_5_5_5_1;
   17.86 -        break;
   17.87 -    case SDL_PIXELFORMAT_RGBA4444:
   17.88 -        internalFormat = GL_RGBA;
   17.89 -        format = GL_RGBA;
   17.90 -        type = GL_UNSIGNED_SHORT_4_4_4_4;
   17.91 -        break;
   17.92      default:
   17.93          SDL_SetError("Texture format %s not supported by OpenGL ES",
   17.94                       SDL_GetPixelFormatName(texture->format));
   17.95 @@ -428,23 +391,10 @@
   17.96      return 0;
   17.97  }
   17.98  
   17.99 -static int
  17.100 -GLES_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
  17.101 -                        void **pixels, int *pitch)
  17.102 -{
  17.103 -    GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
  17.104 -
  17.105 -    *pixels = data->pixels;
  17.106 -    *pitch = data->pitch;
  17.107 -    return 0;
  17.108 -}
  17.109 -
  17.110  static void
  17.111  SetupTextureUpdate(GLES_RenderData * renderdata, SDL_Texture * texture,
  17.112                     int pitch)
  17.113  {
  17.114 -    GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
  17.115 -    renderdata->glBindTexture(data->type, data->texture);
  17.116      renderdata->glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
  17.117  }
  17.118  
  17.119 @@ -463,8 +413,9 @@
  17.120      GLES_ActivateRenderer(renderer);
  17.121  
  17.122      renderdata->glGetError();
  17.123 +    SetupTextureUpdate(renderdata, texture, pitch);
  17.124      renderdata->glEnable(data->type);
  17.125 -    SetupTextureUpdate(renderdata, texture, pitch);
  17.126 +    renderdata->glBindTexture(data->type, data->texture);
  17.127  
  17.128      if( rect->w * bpp == pitch ) {
  17.129           temp_buffer = (void *)pixels; /* No need to reformat */
  17.130 @@ -498,15 +449,10 @@
  17.131  
  17.132  static int
  17.133  GLES_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
  17.134 -                 const SDL_Rect * rect, int markDirty, void **pixels,
  17.135 -                 int *pitch)
  17.136 +                 const SDL_Rect * rect, void **pixels, int *pitch)
  17.137  {
  17.138      GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
  17.139  
  17.140 -    if (markDirty) {
  17.141 -        SDL_AddDirtyRect(&data->dirty, rect);
  17.142 -    }
  17.143 -
  17.144      *pixels =
  17.145          (void *) ((Uint8 *) data->pixels + rect->y * data->pitch +
  17.146                    rect->x * SDL_BYTESPERPIXEL(texture->format));
  17.147 @@ -517,18 +463,18 @@
  17.148  static void
  17.149  GLES_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
  17.150  {
  17.151 -}
  17.152 +    GLES_RenderData *renderdata = (GLES_RenderData *) renderer->driverdata;
  17.153 +    GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
  17.154  
  17.155 -static void
  17.156 -GLES_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
  17.157 -                  int numrects, const SDL_Rect * rects)
  17.158 -{
  17.159 -    GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
  17.160 -    int i;
  17.161 +    GLES_ActivateRenderer(renderer);
  17.162  
  17.163 -    for (i = 0; i < numrects; ++i) {
  17.164 -        SDL_AddDirtyRect(&data->dirty, &rects[i]);
  17.165 -    }
  17.166 +    SetupTextureUpdate(renderdata, texture, data->pitch);
  17.167 +    renderdata->glEnable(data->type);
  17.168 +    renderdata->glBindTexture(data->type, data->texture);
  17.169 +    renderdata->glTexSubImage2D(data->type, 0, 0, 0, texture->w,
  17.170 +                                texture->h, data->format, data->formattype,
  17.171 +                                data->pixels);
  17.172 +    renderdata->glDisable(data->type);
  17.173  }
  17.174  
  17.175  static void
  17.176 @@ -676,49 +622,6 @@
  17.177  
  17.178      data->glEnable(GL_TEXTURE_2D);
  17.179  
  17.180 -    if (texturedata->dirty.list) {
  17.181 -        SDL_DirtyRect *dirty;
  17.182 -        void *pixels;
  17.183 -        int bpp = SDL_BYTESPERPIXEL(texture->format);
  17.184 -        int pitch = texturedata->pitch;
  17.185 -
  17.186 -        SetupTextureUpdate(data, texture, pitch);
  17.187 -
  17.188 -        data->glBindTexture(texturedata->type, texturedata->texture);
  17.189 -        for (dirty = texturedata->dirty.list; dirty; dirty = dirty->next) {
  17.190 -            SDL_Rect *rect = &dirty->rect;
  17.191 -            pixels =
  17.192 -                (void *) ((Uint8 *) texturedata->pixels + rect->y * pitch +
  17.193 -                          rect->x * bpp);
  17.194 -            /*      There is no GL_UNPACK_ROW_LENGTH in OpenGLES 
  17.195 -               we must do this reformatting ourselves(!)
  17.196 -
  17.197 -               maybe it'd be a good idea to keep a temp buffer around
  17.198 -               for this purpose rather than allocating it each time
  17.199 -             */
  17.200 -            if( rect->x == 0 && rect->w * bpp == pitch ) {
  17.201 -                temp_buffer = pixels; /* Updating whole texture, no need to reformat */
  17.202 -            } else {
  17.203 -                temp_buffer = SDL_malloc(rect->w * rect->h * bpp);
  17.204 -                temp_ptr = temp_buffer;
  17.205 -                for (i = 0; i < rect->h; i++) {
  17.206 -                    SDL_memcpy(temp_ptr, pixels, rect->w * bpp);
  17.207 -                    temp_ptr += rect->w * bpp;
  17.208 -                    pixels += pitch;
  17.209 -                }
  17.210 -            }
  17.211 -
  17.212 -            data->glTexSubImage2D(texturedata->type, 0, rect->x, rect->y,
  17.213 -                                  rect->w, rect->h, texturedata->format,
  17.214 -                                  texturedata->formattype, temp_buffer);
  17.215 -
  17.216 -            if( temp_buffer != pixels ) {
  17.217 -                SDL_free(temp_buffer);
  17.218 -            }
  17.219 -        }
  17.220 -        SDL_ClearDirtyRects(&texturedata->dirty);
  17.221 -    }
  17.222 -
  17.223      data->glBindTexture(texturedata->type, texturedata->texture);
  17.224  
  17.225      if (texture->modMode) {
  17.226 @@ -818,7 +721,6 @@
  17.227      if (data->pixels) {
  17.228          SDL_free(data->pixels);
  17.229      }
  17.230 -    SDL_FreeDirtyRects(&data->dirty);
  17.231      SDL_free(data);
  17.232      texture->driverdata = NULL;
  17.233  }
    18.1 --- a/src/render/software/SDL_renderer_sw.c	Wed Feb 02 22:55:12 2011 -0800
    18.2 +++ b/src/render/software/SDL_renderer_sw.c	Thu Feb 03 00:19:40 2011 -0800
    18.3 @@ -23,7 +23,6 @@
    18.4  
    18.5  #include "../SDL_sysrender.h"
    18.6  #include "../../video/SDL_pixels_c.h"
    18.7 -#include "../../video/SDL_yuv_sw_c.h"
    18.8  
    18.9  
   18.10  /* SDL surface based renderer implementation */
   18.11 @@ -32,9 +31,6 @@
   18.12  static void SW_WindowEvent(SDL_Renderer * renderer,
   18.13                             const SDL_WindowEvent *event);
   18.14  static int SW_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
   18.15 -static int SW_QueryTexturePixels(SDL_Renderer * renderer,
   18.16 -                                 SDL_Texture * texture, void **pixels,
   18.17 -                                 int *pitch);
   18.18  static int SW_SetTextureColorMod(SDL_Renderer * renderer,
   18.19                                   SDL_Texture * texture);
   18.20  static int SW_SetTextureAlphaMod(SDL_Renderer * renderer,
   18.21 @@ -45,8 +41,7 @@
   18.22                              const SDL_Rect * rect, const void *pixels,
   18.23                              int pitch);
   18.24  static int SW_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
   18.25 -                          const SDL_Rect * rect, int markDirty, void **pixels,
   18.26 -                          int *pitch);
   18.27 +                          const SDL_Rect * rect, void **pixels, int *pitch);
   18.28  static void SW_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
   18.29  static int SW_RenderDrawPoints(SDL_Renderer * renderer,
   18.30                                 const SDL_Point * points, int count);
   18.31 @@ -70,7 +65,7 @@
   18.32      {
   18.33       "software",
   18.34       (SDL_RENDERER_PRESENTVSYNC),
   18.35 -     13,
   18.36 +     8,
   18.37       {
   18.38        SDL_PIXELFORMAT_RGB555,
   18.39        SDL_PIXELFORMAT_RGB565,
   18.40 @@ -79,12 +74,8 @@
   18.41        SDL_PIXELFORMAT_ARGB8888,
   18.42        SDL_PIXELFORMAT_RGBA8888,
   18.43        SDL_PIXELFORMAT_ABGR8888,
   18.44 -      SDL_PIXELFORMAT_BGRA8888,
   18.45 -      SDL_PIXELFORMAT_YV12,
   18.46 -      SDL_PIXELFORMAT_IYUV,
   18.47 -      SDL_PIXELFORMAT_YUY2,
   18.48 -      SDL_PIXELFORMAT_UYVY,
   18.49 -      SDL_PIXELFORMAT_YVYU},
   18.50 +      SDL_PIXELFORMAT_BGRA8888
   18.51 +     },
   18.52       0,
   18.53       0}
   18.54  };
   18.55 @@ -96,7 +87,6 @@
   18.56      SDL_Texture *texture;
   18.57      SDL_Surface surface;
   18.58      SDL_Renderer *renderer;
   18.59 -    SDL_DirtyRectList dirty;
   18.60  } SW_RenderData;
   18.61  
   18.62  static SDL_Texture *
   18.63 @@ -136,6 +126,7 @@
   18.64      SDL_Renderer *renderer;
   18.65      SW_RenderData *data;
   18.66      int i;
   18.67 +    int w, h;
   18.68      Uint32 format;
   18.69      int bpp;
   18.70      Uint32 Rmask, Gmask, Bmask, Amask;
   18.71 @@ -163,7 +154,6 @@
   18.72      }
   18.73      renderer->WindowEvent = SW_WindowEvent;
   18.74      renderer->CreateTexture = SW_CreateTexture;
   18.75 -    renderer->QueryTexturePixels = SW_QueryTexturePixels;
   18.76      renderer->SetTextureColorMod = SW_SetTextureColorMod;
   18.77      renderer->SetTextureAlphaMod = SW_SetTextureAlphaMod;
   18.78      renderer->SetTextureBlendMode = SW_SetTextureBlendMode;
   18.79 @@ -217,8 +207,8 @@
   18.80      }
   18.81  
   18.82      /* Create the textures we'll use for display */
   18.83 -    data->texture =
   18.84 -        CreateTexture(data->renderer, data->format, window->w, window->h);
   18.85 +    SDL_GetWindowSize(window, &w, &h);
   18.86 +    data->texture = CreateTexture(data->renderer, data->format, w, h);
   18.87      if (!data->texture) {
   18.88          SW_DestroyRenderer(renderer);
   18.89          return NULL;
   18.90 @@ -243,11 +233,12 @@
   18.91  
   18.92      if (data->updateSize) {
   18.93          /* Recreate the textures for the new window size */
   18.94 +        int w, h;
   18.95          if (data->texture) {
   18.96              DestroyTexture(data->renderer, data->texture);
   18.97          }
   18.98 -        data->texture = CreateTexture(data->renderer, data->format,
   18.99 -                                      window->w, window->h);
  18.100 +        SDL_GetWindowSize(window, &w, &h);
  18.101 +        data->texture = CreateTexture(data->renderer, data->format, w, h);
  18.102          if (data->texture) {
  18.103              data->updateSize = SDL_FALSE;
  18.104          }
  18.105 @@ -268,30 +259,25 @@
  18.106  static int
  18.107  SW_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
  18.108  {
  18.109 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  18.110 -        texture->driverdata =
  18.111 -            SDL_SW_CreateYUVTexture(texture->format, texture->w, texture->h);
  18.112 -    } else {
  18.113 -        int bpp;
  18.114 -        Uint32 Rmask, Gmask, Bmask, Amask;
  18.115 +    int bpp;
  18.116 +    Uint32 Rmask, Gmask, Bmask, Amask;
  18.117  
  18.118 -        if (!SDL_PixelFormatEnumToMasks
  18.119 -            (texture->format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) {
  18.120 -            SDL_SetError("Unknown texture format");
  18.121 -            return -1;
  18.122 -        }
  18.123 +    if (!SDL_PixelFormatEnumToMasks
  18.124 +        (texture->format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) {
  18.125 +        SDL_SetError("Unknown texture format");
  18.126 +        return -1;
  18.127 +    }
  18.128  
  18.129 -        texture->driverdata =
  18.130 -            SDL_CreateRGBSurface(0, texture->w, texture->h, bpp, Rmask, Gmask,
  18.131 -                                 Bmask, Amask);
  18.132 -        SDL_SetSurfaceColorMod(texture->driverdata, texture->r, texture->g,
  18.133 -                               texture->b);
  18.134 -        SDL_SetSurfaceAlphaMod(texture->driverdata, texture->a);
  18.135 -        SDL_SetSurfaceBlendMode(texture->driverdata, texture->blendMode);
  18.136 +    texture->driverdata =
  18.137 +        SDL_CreateRGBSurface(0, texture->w, texture->h, bpp, Rmask, Gmask,
  18.138 +                             Bmask, Amask);
  18.139 +    SDL_SetSurfaceColorMod(texture->driverdata, texture->r, texture->g,
  18.140 +                           texture->b);
  18.141 +    SDL_SetSurfaceAlphaMod(texture->driverdata, texture->a);
  18.142 +    SDL_SetSurfaceBlendMode(texture->driverdata, texture->blendMode);
  18.143  
  18.144 -        if (texture->access == SDL_TEXTUREACCESS_STATIC) {
  18.145 -            SDL_SetSurfaceRLE(texture->driverdata, 1);
  18.146 -        }
  18.147 +    if (texture->access == SDL_TEXTUREACCESS_STATIC) {
  18.148 +        SDL_SetSurfaceRLE(texture->driverdata, 1);
  18.149      }
  18.150  
  18.151      if (!texture->driverdata) {
  18.152 @@ -301,23 +287,6 @@
  18.153  }
  18.154  
  18.155  static int
  18.156 -SW_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
  18.157 -                      void **pixels, int *pitch)
  18.158 -{
  18.159 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  18.160 -        return SDL_SW_QueryYUVTexturePixels((SDL_SW_YUVTexture *)
  18.161 -                                            texture->driverdata, pixels,
  18.162 -                                            pitch);
  18.163 -    } else {
  18.164 -        SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
  18.165 -
  18.166 -        *pixels = surface->pixels;
  18.167 -        *pitch = surface->pitch;
  18.168 -        return 0;
  18.169 -    }
  18.170 -}
  18.171 -
  18.172 -static int
  18.173  SW_SetTextureColorMod(SDL_Renderer * renderer, SDL_Texture * texture)
  18.174  {
  18.175      SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
  18.176 @@ -343,56 +312,40 @@
  18.177  SW_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
  18.178                   const SDL_Rect * rect, const void *pixels, int pitch)
  18.179  {
  18.180 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  18.181 -        return SDL_SW_UpdateYUVTexture((SDL_SW_YUVTexture *)
  18.182 -                                       texture->driverdata, rect, pixels,
  18.183 -                                       pitch);
  18.184 -    } else {
  18.185 -        SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
  18.186 -        Uint8 *src, *dst;
  18.187 -        int row;
  18.188 -        size_t length;
  18.189 +    SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
  18.190 +    Uint8 *src, *dst;
  18.191 +    int row;
  18.192 +    size_t length;
  18.193  
  18.194 -        src = (Uint8 *) pixels;
  18.195 -        dst =
  18.196 -            (Uint8 *) surface->pixels + rect->y * surface->pitch +
  18.197 -            rect->x * surface->format->BytesPerPixel;
  18.198 -        length = rect->w * surface->format->BytesPerPixel;
  18.199 -        for (row = 0; row < rect->h; ++row) {
  18.200 -            SDL_memcpy(dst, src, length);
  18.201 -            src += pitch;
  18.202 -            dst += surface->pitch;
  18.203 -        }
  18.204 -        return 0;
  18.205 +    src = (Uint8 *) pixels;
  18.206 +    dst = (Uint8 *) surface->pixels +
  18.207 +                        rect->y * surface->pitch +
  18.208 +                        rect->x * surface->format->BytesPerPixel;
  18.209 +    length = rect->w * surface->format->BytesPerPixel;
  18.210 +    for (row = 0; row < rect->h; ++row) {
  18.211 +        SDL_memcpy(dst, src, length);
  18.212 +        src += pitch;
  18.213 +        dst += surface->pitch;
  18.214      }
  18.215 +    return 0;
  18.216  }
  18.217  
  18.218  static int
  18.219  SW_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
  18.220 -               const SDL_Rect * rect, int markDirty, void **pixels,
  18.221 -               int *pitch)
  18.222 +               const SDL_Rect * rect, void **pixels, int *pitch)
  18.223  {
  18.224 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  18.225 -        return SDL_SW_LockYUVTexture((SDL_SW_YUVTexture *)
  18.226 -                                     texture->driverdata, rect, markDirty,
  18.227 -                                     pixels, pitch);
  18.228 -    } else {
  18.229 -        SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
  18.230 +    SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
  18.231  
  18.232 -        *pixels =
  18.233 -            (void *) ((Uint8 *) surface->pixels + rect->y * surface->pitch +
  18.234 -                      rect->x * surface->format->BytesPerPixel);
  18.235 -        *pitch = surface->pitch;
  18.236 -        return 0;
  18.237 -    }
  18.238 +    *pixels =
  18.239 +        (void *) ((Uint8 *) surface->pixels + rect->y * surface->pitch +
  18.240 +                  rect->x * surface->format->BytesPerPixel);
  18.241 +    *pitch = surface->pitch;
  18.242 +    return 0;
  18.243  }
  18.244  
  18.245  static void
  18.246  SW_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
  18.247  {
  18.248 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  18.249 -        SDL_SW_UnlockYUVTexture((SDL_SW_YUVTexture *) texture->driverdata);
  18.250 -    }
  18.251  }
  18.252  
  18.253  static int
  18.254 @@ -420,7 +373,7 @@
  18.255          return 0;
  18.256      }
  18.257  
  18.258 -    if (data->renderer->LockTexture(data->renderer, texture, &rect, 1,
  18.259 +    if (data->renderer->LockTexture(data->renderer, texture, &rect,
  18.260                                      &data->surface.pixels,
  18.261                                      &data->surface.pitch) < 0) {
  18.262          return -1;
  18.263 @@ -484,7 +437,7 @@
  18.264          return 0;
  18.265      }
  18.266  
  18.267 -    if (data->renderer->LockTexture(data->renderer, texture, &rect, 1,
  18.268 +    if (data->renderer->LockTexture(data->renderer, texture, &rect,
  18.269                                      &data->surface.pixels,
  18.270                                      &data->surface.pitch) < 0) {
  18.271          return -1;
  18.272 @@ -558,7 +511,7 @@
  18.273              continue;
  18.274          }
  18.275  
  18.276 -        if (data->renderer->LockTexture(data->renderer, texture, &rect, 1,
  18.277 +        if (data->renderer->LockTexture(data->renderer, texture, &rect,
  18.278                                          &data->surface.pixels,
  18.279                                          &data->surface.pitch) < 0) {
  18.280              return -1;
  18.281 @@ -586,38 +539,31 @@
  18.282                const SDL_Rect * srcrect, const SDL_Rect * dstrect)
  18.283  {
  18.284      SW_RenderData *data = (SW_RenderData *) renderer->driverdata;
  18.285 +    SDL_Surface *surface;
  18.286 +    SDL_Rect real_srcrect;
  18.287 +    SDL_Rect real_dstrect;
  18.288      int status;
  18.289  
  18.290      if (!SW_ActivateRenderer(renderer)) {
  18.291          return -1;
  18.292      }
  18.293  
  18.294 -    if (data->renderer->LockTexture(data->renderer, data->texture,
  18.295 -                                    dstrect, 1, &data->surface.pixels,
  18.296 +    if (data->renderer->LockTexture(data->renderer, data->texture, dstrect,
  18.297 +                                    &data->surface.pixels,
  18.298                                      &data->surface.pitch) < 0) {
  18.299          return -1;
  18.300      }
  18.301  
  18.302 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  18.303 -        status =
  18.304 -            SDL_SW_CopyYUVToRGB((SDL_SW_YUVTexture *) texture->driverdata,
  18.305 -                                srcrect, data->format, dstrect->w, dstrect->h,
  18.306 -                                data->surface.pixels, data->surface.pitch);
  18.307 -    } else {
  18.308 -        SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
  18.309 -        SDL_Rect real_srcrect = *srcrect;
  18.310 -        SDL_Rect real_dstrect;
  18.311 +    surface = (SDL_Surface *) texture->driverdata;
  18.312 +    real_srcrect = *srcrect;
  18.313  
  18.314 -        data->surface.w = dstrect->w;
  18.315 -        data->surface.h = dstrect->h;
  18.316 -        data->surface.clip_rect.w = dstrect->w;
  18.317 -        data->surface.clip_rect.h = dstrect->h;
  18.318 -        real_dstrect = data->surface.clip_rect;
  18.319 +    data->surface.w = dstrect->w;
  18.320 +    data->surface.h = dstrect->h;
  18.321 +    data->surface.clip_rect.w = dstrect->w;
  18.322 +    data->surface.clip_rect.h = dstrect->h;
  18.323 +    real_dstrect = data->surface.clip_rect;
  18.324  
  18.325 -        status =
  18.326 -            SDL_LowerBlit(surface, &real_srcrect, &data->surface,
  18.327 -                          &real_dstrect);
  18.328 -    }
  18.329 +    status = SDL_LowerBlit(surface, &real_srcrect, &data->surface, &real_dstrect);
  18.330      data->renderer->UnlockTexture(data->renderer, data->texture);
  18.331      return status;
  18.332  }
  18.333 @@ -632,8 +578,8 @@
  18.334          return -1;
  18.335      }
  18.336  
  18.337 -    if (data->renderer->LockTexture(data->renderer, data->texture,
  18.338 -                                    rect, 0, &data->surface.pixels,
  18.339 +    if (data->renderer->LockTexture(data->renderer, data->texture, rect,
  18.340 +                                    &data->surface.pixels,
  18.341                                      &data->surface.pitch) < 0) {
  18.342          return -1;
  18.343      }
  18.344 @@ -656,8 +602,8 @@
  18.345          return -1;
  18.346      }
  18.347  
  18.348 -    if (data->renderer->LockTexture(data->renderer, data->texture,
  18.349 -                                    rect, 1, &data->surface.pixels,
  18.350 +    if (data->renderer->LockTexture(data->renderer, data->texture, rect,
  18.351 +                                    &data->surface.pixels,
  18.352                                      &data->surface.pitch) < 0) {
  18.353          return -1;
  18.354      }
  18.355 @@ -692,13 +638,9 @@
  18.356  static void
  18.357  SW_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture)
  18.358  {
  18.359 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  18.360 -        SDL_SW_DestroyYUVTexture((SDL_SW_YUVTexture *) texture->driverdata);
  18.361 -    } else {
  18.362 -        SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
  18.363 +    SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
  18.364  
  18.365 -        SDL_FreeSurface(surface);
  18.366 -    }
  18.367 +    SDL_FreeSurface(surface);
  18.368  }
  18.369  
  18.370  static void
  18.371 @@ -717,7 +659,6 @@
  18.372          if (data->renderer) {
  18.373              data->renderer->DestroyRenderer(data->renderer);
  18.374          }
  18.375 -        SDL_FreeDirtyRects(&data->dirty);
  18.376          SDL_free(data);
  18.377      }
  18.378      SDL_free(renderer);
    19.1 --- a/src/video/SDL_leaks.h	Wed Feb 02 22:55:12 2011 -0800
    19.2 +++ b/src/video/SDL_leaks.h	Thu Feb 03 00:19:40 2011 -0800
    19.3 @@ -29,4 +29,5 @@
    19.4  #ifdef CHECK_LEAKS
    19.5  extern int surfaces_allocated;
    19.6  #endif
    19.7 +
    19.8  /* vi: set ts=4 sw=4 expandtab: */
    20.1 --- a/src/video/SDL_rect.c	Wed Feb 02 22:55:12 2011 -0800
    20.2 +++ b/src/video/SDL_rect.c	Thu Feb 03 00:19:40 2011 -0800
    20.3 @@ -339,66 +339,4 @@
    20.4      return SDL_TRUE;
    20.5  }
    20.6  
    20.7 -void
    20.8 -SDL_AddDirtyRect(SDL_DirtyRectList * list, const SDL_Rect * rect)
    20.9 -{
   20.10 -    SDL_DirtyRect *dirty;
   20.11 -
   20.12 -    /* FIXME: At what point is this optimization too expensive? */
   20.13 -    for (dirty = list->list; dirty; dirty = dirty->next) {
   20.14 -        if (SDL_HasIntersection(&dirty->rect, rect)) {
   20.15 -            SDL_UnionRect(&dirty->rect, rect, &dirty->rect);
   20.16 -            return;
   20.17 -        }
   20.18 -    }
   20.19 -
   20.20 -    if (list->free) {
   20.21 -        dirty = list->free;
   20.22 -        list->free = dirty->next;
   20.23 -    } else {
   20.24 -        dirty = (SDL_DirtyRect *) SDL_malloc(sizeof(*dirty));
   20.25 -        if (!dirty) {
   20.26 -            return;
   20.27 -        }
   20.28 -    }
   20.29 -    dirty->rect = *rect;
   20.30 -    dirty->next = list->list;
   20.31 -    list->list = dirty;
   20.32 -}
   20.33 -
   20.34 -void
   20.35 -SDL_ClearDirtyRects(SDL_DirtyRectList * list)
   20.36 -{
   20.37 -    SDL_DirtyRect *prev, *curr;
   20.38 -
   20.39 -    /* Skip to the end of the free list */
   20.40 -    prev = NULL;
   20.41 -    for (curr = list->free; curr; curr = curr->next) {
   20.42 -        prev = curr;
   20.43 -    }
   20.44 -
   20.45 -    /* Add the list entries to the end */
   20.46 -    if (prev) {
   20.47 -        prev->next = list->list;
   20.48 -    } else {
   20.49 -        list->free = list->list;
   20.50 -    }
   20.51 -    list->list = NULL;
   20.52 -}
   20.53 -
   20.54 -void
   20.55 -SDL_FreeDirtyRects(SDL_DirtyRectList * list)
   20.56 -{
   20.57 -    while (list->list) {
   20.58 -        SDL_DirtyRect *elem = list->list;
   20.59 -        list->list = elem->next;
   20.60 -        SDL_free(elem);
   20.61 -    }
   20.62 -    while (list->free) {
   20.63 -        SDL_DirtyRect *elem = list->free;
   20.64 -        list->free = elem->next;
   20.65 -        SDL_free(elem);
   20.66 -    }
   20.67 -}
   20.68 -
   20.69  /* vi: set ts=4 sw=4 expandtab: */
    21.1 --- a/src/video/SDL_yuv_mmx.c	Wed Feb 02 22:55:12 2011 -0800
    21.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.3 @@ -1,432 +0,0 @@
    21.4 -/*
    21.5 -    SDL - Simple DirectMedia Layer
    21.6 -    Copyright (C) 1997-2010 Sam Lantinga
    21.7 -
    21.8 -    This library is free software; you can redistribute it and/or
    21.9 -    modify it under the terms of the GNU Lesser General Public
   21.10 -    License as published by the Free Software Foundation; either
   21.11 -    version 2.1 of the License, or (at your option) any later version.
   21.12 -
   21.13 -    This library is distributed in the hope that it will be useful,
   21.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
   21.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   21.16 -    Lesser General Public License for more details.
   21.17 -
   21.18 -    You should have received a copy of the GNU Lesser General Public
   21.19 -    License along with this library; if not, write to the Free Software
   21.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   21.21 -
   21.22 -    Sam Lantinga
   21.23 -    slouken@libsdl.org
   21.24 -*/
   21.25 -#include "SDL_config.h"
   21.26 -
   21.27 -#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   21.28 -
   21.29 -#include "SDL_stdinc.h"
   21.30 -
   21.31 -#include "mmx.h"
   21.32 -
   21.33 -/* *INDENT-OFF* */
   21.34 -
   21.35 -static mmx_t MMX_0080w    = { .ud = {0x00800080, 0x00800080} };
   21.36 -static mmx_t MMX_00FFw    = { .ud = {0x00ff00ff, 0x00ff00ff} };
   21.37 -static mmx_t MMX_FF00w    = { .ud = {0xff00ff00, 0xff00ff00} };
   21.38 -
   21.39 -static mmx_t MMX_Ycoeff   = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} };
   21.40 -
   21.41 -static mmx_t MMX_UbluRGB  = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} };
   21.42 -static mmx_t MMX_VredRGB  = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} };
   21.43 -static mmx_t MMX_UgrnRGB  = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} };
   21.44 -static mmx_t MMX_VgrnRGB  = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} };
   21.45 -
   21.46 -static mmx_t MMX_Ublu5x5  = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} };
   21.47 -static mmx_t MMX_Vred5x5  = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} };
   21.48 -static mmx_t MMX_Ugrn565  = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} };
   21.49 -static mmx_t MMX_Vgrn565  = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} };
   21.50 -
   21.51 -static mmx_t MMX_red565   = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} };
   21.52 -static mmx_t MMX_grn565   = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} };
   21.53 -
   21.54 -/**
   21.55 -   This MMX assembler is my first assembler/MMX program ever.
   21.56 -   Thus it maybe buggy.
   21.57 -   Send patches to:
   21.58 -   mvogt@rhrk.uni-kl.de
   21.59 -
   21.60 -   After it worked fine I have "obfuscated" the code a bit to have
   21.61 -   more parallism in the MMX units. This means I moved
   21.62 -   initilisation around and delayed other instruction.
   21.63 -   Performance measurement did not show that this brought any advantage
   21.64 -   but in theory it _should_ be faster this way.
   21.65 -
   21.66 -   The overall performanve gain to the C based dither was 30%-40%.
   21.67 -   The MMX routine calculates 256bit=8RGB values in each cycle
   21.68 -   (4 for row1 & 4 for row2)
   21.69 -
   21.70 -   The red/green/blue.. coefficents are taken from the mpeg_play 
   21.71 -   player. They look nice, but I dont know if you can have
   21.72 -   better values, to avoid integer rounding errors.
   21.73 -   
   21.74 -
   21.75 -   IMPORTANT:
   21.76 -   ==========
   21.77 -
   21.78 -   It is a requirement that the cr/cb/lum are 8 byte aligned and
   21.79 -   the out are 16byte aligned or you will/may get segfaults
   21.80 -
   21.81 -*/
   21.82 -
   21.83 -void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   21.84 -                              unsigned char *lum, unsigned char *cr,
   21.85 -                              unsigned char *cb, unsigned char *out,
   21.86 -                              int rows, int cols, int mod )
   21.87 -{
   21.88 -    Uint32 *row1;
   21.89 -    Uint32 *row2;
   21.90 -
   21.91 -    unsigned char* y = lum +cols*rows;    // Pointer to the end
   21.92 -    int x = 0;
   21.93 -    row1 = (Uint32 *)out;                 // 32 bit target
   21.94 -    row2 = (Uint32 *)out+cols+mod;        // start of second row
   21.95 -    mod = (mod+cols+mod)*4;               // increment for row1 in byte
   21.96 -
   21.97 -    __asm__ __volatile__ (
   21.98 -        // tap dance to workaround the inability to use %%ebx at will...
   21.99 -        //  move one thing to the stack...
  21.100 -        "pushl $0\n"  // save a slot on the stack.
  21.101 -        "pushl %%ebx\n"  // save %%ebx.
  21.102 -        "movl %0, %%ebx\n"  // put the thing in ebx.
  21.103 -        "movl %%ebx,4(%%esp)\n"  // put the thing in the stack slot.
  21.104 -        "popl %%ebx\n"  // get back %%ebx (the PIC register).
  21.105 -
  21.106 -        ".align 8\n"
  21.107 -        "1:\n"
  21.108 -
  21.109 -        // create Cr (result in mm1)
  21.110 -        "pushl %%ebx\n"
  21.111 -        "movl 4(%%esp),%%ebx\n"
  21.112 -        "movd (%%ebx),%%mm1\n"   //         0  0  0  0  v3 v2 v1 v0
  21.113 -        "popl %%ebx\n"
  21.114 -        "pxor %%mm7,%%mm7\n"      //         00 00 00 00 00 00 00 00
  21.115 -        "movd (%2), %%mm2\n"           //    0  0  0  0 l3 l2 l1 l0
  21.116 -        "punpcklbw %%mm7,%%mm1\n" //         0  v3 0  v2 00 v1 00 v0
  21.117 -        "punpckldq %%mm1,%%mm1\n" //         00 v1 00 v0 00 v1 00 v0
  21.118 -        "psubw %9,%%mm1\n"        // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0
  21.119 -
  21.120 -        // create Cr_g (result in mm0)
  21.121 -        "movq %%mm1,%%mm0\n"           // r1 r1 r0 r0 r1 r1 r0 r0
  21.122 -        "pmullw %10,%%mm0\n"           // red*-46dec=0.7136*64
  21.123 -        "pmullw %11,%%mm1\n"           // red*89dec=1.4013*64
  21.124 -        "psraw  $6, %%mm0\n"           // red=red/64
  21.125 -        "psraw  $6, %%mm1\n"           // red=red/64
  21.126 -
  21.127 -        // create L1 L2 (result in mm2,mm4)
  21.128 -        // L2=lum+cols
  21.129 -        "movq (%2,%4),%%mm3\n"         //    0  0  0  0 L3 L2 L1 L0
  21.130 -        "punpckldq %%mm3,%%mm2\n"      //   L3 L2 L1 L0 l3 l2 l1 l0
  21.131 -        "movq %%mm2,%%mm4\n"           //   L3 L2 L1 L0 l3 l2 l1 l0
  21.132 -        "pand %12,%%mm2\n"             //   L3 0  L1  0 l3  0 l1  0
  21.133 -        "pand %13,%%mm4\n"             //   0  L2  0 L0  0 l2  0 l0
  21.134 -        "psrlw $8,%%mm2\n"             //   0  L3  0 L1  0 l3  0 l1
  21.135 -
  21.136 -        // create R (result in mm6)
  21.137 -        "movq %%mm2,%%mm5\n"           //   0 L3  0 L1  0 l3  0 l1
  21.138 -        "movq %%mm4,%%mm6\n"           //   0 L2  0 L0  0 l2  0 l0
  21.139 -        "paddsw  %%mm1, %%mm5\n"       // lum1+red:x R3 x R1 x r3 x r1
  21.140 -        "paddsw  %%mm1, %%mm6\n"       // lum1+red:x R2 x R0 x r2 x r0
  21.141 -        "packuswb %%mm5,%%mm5\n"       //  R3 R1 r3 r1 R3 R1 r3 r1
  21.142 -        "packuswb %%mm6,%%mm6\n"       //  R2 R0 r2 r0 R2 R0 r2 r0
  21.143 -        "pxor %%mm7,%%mm7\n"      //         00 00 00 00 00 00 00 00
  21.144 -        "punpcklbw %%mm5,%%mm6\n"      //  R3 R2 R1 R0 r3 r2 r1 r0
  21.145 -
  21.146 -        // create Cb (result in mm1)
  21.147 -        "movd (%1), %%mm1\n"      //         0  0  0  0  u3 u2 u1 u0
  21.148 -        "punpcklbw %%mm7,%%mm1\n" //         0  u3 0  u2 00 u1 00 u0
  21.149 -        "punpckldq %%mm1,%%mm1\n" //         00 u1 00 u0 00 u1 00 u0
  21.150 -        "psubw %9,%%mm1\n"        // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0
  21.151 -
  21.152 -        // create Cb_g (result in mm5)
  21.153 -        "movq %%mm1,%%mm5\n"            // u1 u1 u0 u0 u1 u1 u0 u0
  21.154 -        "pmullw %14,%%mm5\n"            // blue*-109dec=1.7129*64
  21.155 -        "pmullw %15,%%mm1\n"            // blue*114dec=1.78125*64
  21.156 -        "psraw  $6, %%mm5\n"            // blue=red/64
  21.157 -        "psraw  $6, %%mm1\n"            // blue=blue/64
  21.158 -
  21.159 -        // create G (result in mm7)
  21.160 -        "movq %%mm2,%%mm3\n"      //   0  L3  0 L1  0 l3  0 l1
  21.161 -        "movq %%mm4,%%mm7\n"      //   0  L2  0 L0  0 l2  0 l1
  21.162 -        "paddsw  %%mm5, %%mm3\n"  // lum1+Cb_g:x G3t x G1t x g3t x g1t
  21.163 -        "paddsw  %%mm5, %%mm7\n"  // lum1+Cb_g:x G2t x G0t x g2t x g0t
  21.164 -        "paddsw  %%mm0, %%mm3\n"  // lum1+Cr_g:x G3  x G1  x g3  x g1
  21.165 -        "paddsw  %%mm0, %%mm7\n"  // lum1+blue:x G2  x G0  x g2  x g0
  21.166 -        "packuswb %%mm3,%%mm3\n"  // G3 G1 g3 g1 G3 G1 g3 g1
  21.167 -        "packuswb %%mm7,%%mm7\n"  // G2 G0 g2 g0 G2 G0 g2 g0
  21.168 -        "punpcklbw %%mm3,%%mm7\n" // G3 G2 G1 G0 g3 g2 g1 g0
  21.169 -
  21.170 -        // create B (result in mm5)
  21.171 -        "movq %%mm2,%%mm3\n"         //   0  L3  0 L1  0 l3  0 l1
  21.172 -        "movq %%mm4,%%mm5\n"         //   0  L2  0 L0  0 l2  0 l1
  21.173 -        "paddsw  %%mm1, %%mm3\n"     // lum1+blue:x B3 x B1 x b3 x b1
  21.174 -        "paddsw  %%mm1, %%mm5\n"     // lum1+blue:x B2 x B0 x b2 x b0
  21.175 -        "packuswb %%mm3,%%mm3\n"     // B3 B1 b3 b1 B3 B1 b3 b1
  21.176 -        "packuswb %%mm5,%%mm5\n"     // B2 B0 b2 b0 B2 B0 b2 b0
  21.177 -        "punpcklbw %%mm3,%%mm5\n"    // B3 B2 B1 B0 b3 b2 b1 b0
  21.178 -
  21.179 -        // fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
  21.180 -
  21.181 -        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
  21.182 -        "pxor %%mm4,%%mm4\n"           //  0  0  0  0  0  0  0  0
  21.183 -        "movq %%mm6,%%mm1\n"           // R3 R2 R1 R0 r3 r2 r1 r0
  21.184 -        "movq %%mm5,%%mm3\n"           // B3 B2 B1 B0 b3 b2 b1 b0
  21.185 -
  21.186 -        // process lower lum
  21.187 -        "punpcklbw %%mm4,%%mm1\n"      //  0 r3  0 r2  0 r1  0 r0
  21.188 -        "punpcklbw %%mm4,%%mm3\n"      //  0 b3  0 b2  0 b1  0 b0
  21.189 -        "movq %%mm1,%%mm2\n"           //  0 r3  0 r2  0 r1  0 r0
  21.190 -        "movq %%mm3,%%mm0\n"           //  0 b3  0 b2  0 b1  0 b0
  21.191 -        "punpcklwd %%mm1,%%mm3\n"      //  0 r1  0 b1  0 r0  0 b0
  21.192 -        "punpckhwd %%mm2,%%mm0\n"      //  0 r3  0 b3  0 r2  0 b2
  21.193 -
  21.194 -        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
  21.195 -        "movq %%mm7,%%mm1\n"           // G3 G2 G1 G0 g3 g2 g1 g0
  21.196 -        "punpcklbw %%mm1,%%mm2\n"      // g3  0 g2  0 g1  0 g0  0
  21.197 -        "punpcklwd %%mm4,%%mm2\n"      //  0  0 g1  0  0  0 g0  0
  21.198 -        "por %%mm3, %%mm2\n"          //  0 r1 g1 b1  0 r0 g0 b0
  21.199 -        "movq %%mm2,(%3)\n"          // wrote out ! row1
  21.200 -
  21.201 -        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
  21.202 -        "punpcklbw %%mm1,%%mm4\n"      // g3  0 g2  0 g1  0 g0  0
  21.203 -        "punpckhwd %%mm2,%%mm4\n"      //  0  0 g3  0  0  0 g2  0
  21.204 -        "por %%mm0, %%mm4\n"          //  0 r3 g3 b3  0 r2 g2 b2
  21.205 -        "movq %%mm4,8(%3)\n"         // wrote out ! row1
  21.206 -
  21.207 -        // fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
  21.208 -        // this can be done "destructive"
  21.209 -        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
  21.210 -        "punpckhbw %%mm2,%%mm6\n"      //  0 R3  0 R2  0 R1  0 R0
  21.211 -        "punpckhbw %%mm1,%%mm5\n"      // G3 B3 G2 B2 G1 B1 G0 B0
  21.212 -        "movq %%mm5,%%mm1\n"           // G3 B3 G2 B2 G1 B1 G0 B0
  21.213 -        "punpcklwd %%mm6,%%mm1\n"      //  0 R1 G1 B1  0 R0 G0 B0
  21.214 -        "movq %%mm1,(%5)\n"          // wrote out ! row2
  21.215 -        "punpckhwd %%mm6,%%mm5\n"      //  0 R3 G3 B3  0 R2 G2 B2
  21.216 -        "movq %%mm5,8(%5)\n"         // wrote out ! row2
  21.217 -
  21.218 -        "addl $4,%2\n"            // lum+4
  21.219 -        "leal 16(%3),%3\n"        // row1+16
  21.220 -        "leal 16(%5),%5\n"        // row2+16
  21.221 -        "addl $2,(%%esp)\n"        // cr+2
  21.222 -        "addl $2,%1\n"           // cb+2
  21.223 -
  21.224 -        "addl $4,%6\n"            // x+4
  21.225 -        "cmpl %4,%6\n"
  21.226 -
  21.227 -        "jl 1b\n"
  21.228 -        "addl %4,%2\n" // lum += cols
  21.229 -        "addl %8,%3\n" // row1+= mod
  21.230 -        "addl %8,%5\n" // row2+= mod
  21.231 -        "movl $0,%6\n" // x=0
  21.232 -        "cmpl %7,%2\n"
  21.233 -        "jl 1b\n"
  21.234 -
  21.235 -        "addl $4,%%esp\n"  // get rid of the stack slot we reserved.
  21.236 -        "emms\n"  // reset MMX registers.
  21.237 -        :
  21.238 -        : "m" (cr), "r"(cb),"r"(lum),
  21.239 -          "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
  21.240 -          "m"(MMX_0080w),"m"(MMX_VgrnRGB),"m"(MMX_VredRGB),
  21.241 -          "m"(MMX_FF00w),"m"(MMX_00FFw),"m"(MMX_UgrnRGB),
  21.242 -          "m"(MMX_UbluRGB)
  21.243 -    );
  21.244 -}
  21.245 -
  21.246 -void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
  21.247 -                             unsigned char *lum, unsigned char *cr,
  21.248 -                             unsigned char *cb, unsigned char *out,
  21.249 -                             int rows, int cols, int mod )
  21.250 -{
  21.251 -    Uint16 *row1;
  21.252 -    Uint16 *row2;
  21.253 -
  21.254 -    unsigned char* y = lum +cols*rows;    /* Pointer to the end */
  21.255 -    int x = 0;
  21.256 -    row1 = (Uint16 *)out;                 /* 16 bit target */
  21.257 -    row2 = (Uint16 *)out+cols+mod;        /* start of second row  */
  21.258 -    mod = (mod+cols+mod)*2;               /* increment for row1 in byte */
  21.259 -
  21.260 -    __asm__ __volatile__(
  21.261 -        // tap dance to workaround the inability to use %%ebx at will...
  21.262 -        //  move one thing to the stack...
  21.263 -        "pushl $0\n"  // save a slot on the stack.
  21.264 -        "pushl %%ebx\n"  // save %%ebx.
  21.265 -        "movl %0, %%ebx\n"  // put the thing in ebx.
  21.266 -        "movl %%ebx, 4(%%esp)\n"  // put the thing in the stack slot.
  21.267 -        "popl %%ebx\n"  // get back %%ebx (the PIC register).
  21.268 -
  21.269 -        ".align 8\n"
  21.270 -        "1:\n"
  21.271 -
  21.272 -        "movd           (%1),                   %%mm0\n" // 4 Cb         0  0  0  0 u3 u2 u1 u0
  21.273 -        "pxor           %%mm7,                  %%mm7\n"
  21.274 -        "pushl %%ebx\n"
  21.275 -        "movl 4(%%esp), %%ebx\n"
  21.276 -        "movd (%%ebx), %%mm1\n"   // 4 Cr                0  0  0  0 v3 v2 v1 v0
  21.277 -        "popl %%ebx\n"
  21.278 -
  21.279 -        "punpcklbw      %%mm7,                  %%mm0\n" // 4 W cb   0 u3  0 u2  0 u1  0 u0
  21.280 -        "punpcklbw      %%mm7,                  %%mm1\n" // 4 W cr   0 v3  0 v2  0 v1  0 v0
  21.281 -        "psubw          %9,                     %%mm0\n"
  21.282 -        "psubw          %9,                     %%mm1\n"
  21.283 -        "movq           %%mm0,                  %%mm2\n" // Cb                   0 u3  0 u2  0 u1  0 u0
  21.284 -        "movq           %%mm1,                  %%mm3\n" // Cr
  21.285 -        "pmullw         %10,                    %%mm2\n" // Cb2green 0 R3  0 R2  0 R1  0 R0
  21.286 -        "movq           (%2),                   %%mm6\n" // L1      l7 L6 L5 L4 L3 L2 L1 L0
  21.287 -        "pmullw         %11,                    %%mm0\n" // Cb2blue
  21.288 -        "pand           %12,                    %%mm6\n" // L1      00 L6 00 L4 00 L2 00 L0
  21.289 -        "pmullw         %13,                    %%mm3\n" // Cr2green
  21.290 -        "movq           (%2),                   %%mm7\n" // L2
  21.291 -        "pmullw         %14,                    %%mm1\n" // Cr2red
  21.292 -        "psrlw          $8,                     %%mm7\n"        // L2           00 L7 00 L5 00 L3 00 L1
  21.293 -        "pmullw         %15,                    %%mm6\n" // lum1
  21.294 -        "paddw          %%mm3,                  %%mm2\n" // Cb2green + Cr2green == green
  21.295 -        "pmullw         %15,                    %%mm7\n" // lum2
  21.296 -
  21.297 -        "movq           %%mm6,                  %%mm4\n" // lum1
  21.298 -        "paddw          %%mm0,                  %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0
  21.299 -        "movq           %%mm4,                  %%mm5\n" // lum1
  21.300 -        "paddw          %%mm1,                  %%mm4\n" // lum1 +red  00 R6 00 R4 00 R2 00 R0
  21.301 -        "paddw          %%mm2,                  %%mm5\n" // lum1 +green 00 G6 00 G4 00 G2 00 G0
  21.302 -        "psraw          $6,                     %%mm4\n" // R1 0 .. 64
  21.303 -        "movq           %%mm7,                  %%mm3\n" // lum2                       00 L7 00 L5 00 L3 00 L1
  21.304 -        "psraw          $6,                     %%mm5\n" // G1  - .. +
  21.305 -        "paddw          %%mm0,                  %%mm7\n" // Lum2 +blue 00 B7 00 B5 00 B3 00 B1
  21.306 -        "psraw          $6,                     %%mm6\n" // B1         0 .. 64
  21.307 -        "packuswb       %%mm4,                  %%mm4\n" // R1 R1
  21.308 -        "packuswb       %%mm5,                  %%mm5\n" // G1 G1
  21.309 -        "packuswb       %%mm6,                  %%mm6\n" // B1 B1
  21.310 -        "punpcklbw      %%mm4,                  %%mm4\n"
  21.311 -        "punpcklbw      %%mm5,                  %%mm5\n"
  21.312 -
  21.313 -        "pand           %16,                    %%mm4\n"
  21.314 -        "psllw          $3,                     %%mm5\n" // GREEN       1
  21.315 -        "punpcklbw      %%mm6,                  %%mm6\n"
  21.316 -        "pand           %17,                    %%mm5\n"
  21.317 -        "pand           %16,                    %%mm6\n"
  21.318 -        "por            %%mm5,                  %%mm4\n" //
  21.319 -        "psrlw          $11,                    %%mm6\n" // BLUE        1
  21.320 -        "movq           %%mm3,                  %%mm5\n" // lum2
  21.321 -        "paddw          %%mm1,                  %%mm3\n" // lum2 +red      00 R7 00 R5 00 R3 00 R1
  21.322 -        "paddw          %%mm2,                  %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1
  21.323 -        "psraw          $6,                     %%mm3\n" // R2
  21.324 -        "por            %%mm6,                  %%mm4\n" // MM4
  21.325 -        "psraw          $6,                     %%mm5\n" // G2
  21.326 -        "movq           (%2, %4),               %%mm6\n" // L3 load lum2
  21.327 -        "psraw          $6,                     %%mm7\n"
  21.328 -        "packuswb       %%mm3,                  %%mm3\n"
  21.329 -        "packuswb       %%mm5,                  %%mm5\n"
  21.330 -        "packuswb       %%mm7,                  %%mm7\n"
  21.331 -        "pand           %12,                    %%mm6\n" // L3
  21.332 -        "punpcklbw      %%mm3,                  %%mm3\n"
  21.333 -        "punpcklbw      %%mm5,                  %%mm5\n"
  21.334 -        "pmullw         %15,                    %%mm6\n" // lum3
  21.335 -        "punpcklbw      %%mm7,                  %%mm7\n"
  21.336 -        "psllw          $3,                     %%mm5\n" // GREEN 2
  21.337 -        "pand           %16,                    %%mm7\n"
  21.338 -        "pand           %16,                    %%mm3\n"
  21.339 -        "psrlw          $11,                    %%mm7\n" // BLUE  2
  21.340 -        "pand           %17,                    %%mm5\n"
  21.341 -        "por            %%mm7,                  %%mm3\n"
  21.342 -        "movq           (%2,%4),                %%mm7\n" // L4 load lum2
  21.343 -        "por            %%mm5,                  %%mm3\n" //
  21.344 -        "psrlw          $8,                     %%mm7\n" // L4
  21.345 -        "movq           %%mm4,                  %%mm5\n"
  21.346 -        "punpcklwd      %%mm3,                  %%mm4\n"
  21.347 -        "pmullw         %15,                    %%mm7\n" // lum4
  21.348 -        "punpckhwd      %%mm3,                  %%mm5\n"
  21.349 -
  21.350 -        "movq           %%mm4,                  (%3)\n"  // write row1
  21.351 -        "movq           %%mm5,                  8(%3)\n" // write row1
  21.352 -
  21.353 -        "movq           %%mm6,                  %%mm4\n" // Lum3
  21.354 -        "paddw          %%mm0,                  %%mm6\n" // Lum3 +blue
  21.355 -
  21.356 -        "movq           %%mm4,                  %%mm5\n" // Lum3
  21.357 -        "paddw          %%mm1,                  %%mm4\n" // Lum3 +red
  21.358 -        "paddw          %%mm2,                  %%mm5\n" // Lum3 +green
  21.359 -        "psraw          $6,                     %%mm4\n"
  21.360 -        "movq           %%mm7,                  %%mm3\n" // Lum4
  21.361 -        "psraw          $6,                     %%mm5\n"
  21.362 -        "paddw          %%mm0,                  %%mm7\n" // Lum4 +blue
  21.363 -        "psraw          $6,                     %%mm6\n" // Lum3 +blue
  21.364 -        "movq           %%mm3,                  %%mm0\n" // Lum4
  21.365 -        "packuswb       %%mm4,                  %%mm4\n"
  21.366 -        "paddw          %%mm1,                  %%mm3\n" // Lum4 +red
  21.367 -        "packuswb       %%mm5,                  %%mm5\n"
  21.368 -        "paddw          %%mm2,                  %%mm0\n" // Lum4 +green
  21.369 -        "packuswb       %%mm6,                  %%mm6\n"
  21.370 -        "punpcklbw      %%mm4,                  %%mm4\n"
  21.371 -        "punpcklbw      %%mm5,                  %%mm5\n"
  21.372 -        "punpcklbw      %%mm6,                  %%mm6\n"
  21.373 -        "psllw          $3,                     %%mm5\n" // GREEN 3
  21.374 -        "pand           %16,                    %%mm4\n"
  21.375 -        "psraw          $6,                     %%mm3\n" // psr 6
  21.376 -        "psraw          $6,                     %%mm0\n"
  21.377 -        "pand           %16,                    %%mm6\n" // BLUE
  21.378 -        "pand           %17,                    %%mm5\n"
  21.379 -        "psrlw          $11,                    %%mm6\n" // BLUE  3
  21.380 -        "por            %%mm5,                  %%mm4\n"
  21.381 -        "psraw          $6,                     %%mm7\n"
  21.382 -        "por            %%mm6,                  %%mm4\n"
  21.383 -        "packuswb       %%mm3,                  %%mm3\n"
  21.384 -        "packuswb       %%mm0,                  %%mm0\n"
  21.385 -        "packuswb       %%mm7,                  %%mm7\n"
  21.386 -        "punpcklbw      %%mm3,                  %%mm3\n"
  21.387 -        "punpcklbw      %%mm0,                  %%mm0\n"
  21.388 -        "punpcklbw      %%mm7,                  %%mm7\n"
  21.389 -        "pand           %16,                    %%mm3\n"
  21.390 -        "pand           %16,                    %%mm7\n" // BLUE
  21.391 -        "psllw          $3,                     %%mm0\n" // GREEN 4
  21.392 -        "psrlw          $11,                    %%mm7\n"
  21.393 -        "pand           %17,                    %%mm0\n"
  21.394 -        "por            %%mm7,                  %%mm3\n"
  21.395 -        "por            %%mm0,                  %%mm3\n"
  21.396 -
  21.397 -        "movq           %%mm4,                  %%mm5\n"
  21.398 -
  21.399 -        "punpcklwd      %%mm3,                  %%mm4\n"
  21.400 -        "punpckhwd      %%mm3,                  %%mm5\n"
  21.401 -
  21.402 -        "movq           %%mm4,                  (%5)\n"
  21.403 -        "movq           %%mm5,                  8(%5)\n"
  21.404 -
  21.405 -        "addl           $8,                     %6\n"
  21.406 -        "addl           $8,                     %2\n"
  21.407 -        "addl           $4,                     (%%esp)\n"
  21.408 -        "addl           $4,                     %1\n"
  21.409 -        "cmpl           %4,                     %6\n"
  21.410 -        "leal           16(%3),                 %3\n"
  21.411 -        "leal           16(%5),%5\n" // row2+16
  21.412 -
  21.413 -        "jl             1b\n"
  21.414 -        "addl           %4,     %2\n" // lum += cols
  21.415 -        "addl           %8,     %3\n" // row1+= mod
  21.416 -        "addl           %8,     %5\n" // row2+= mod
  21.417 -        "movl           $0,     %6\n" // x=0
  21.418 -        "cmpl           %7,     %2\n"
  21.419 -        "jl             1b\n"
  21.420 -        "addl $4, %%esp\n"  // get rid of the stack slot we reserved.
  21.421 -        "emms\n"
  21.422 -        :
  21.423 -        : "m" (cr), "r"(cb),"r"(lum),
  21.424 -          "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
  21.425 -          "m"(MMX_0080w),"m"(MMX_Ugrn565),"m"(MMX_Ublu5x5),
  21.426 -          "m"(MMX_00FFw),"m"(MMX_Vgrn565),"m"(MMX_Vred5x5),
  21.427 -          "m"(MMX_Ycoeff),"m"(MMX_red565),"m"(MMX_grn565)
  21.428 -    );
  21.429 -}
  21.430 -
  21.431 -/* *INDENT-ON* */
  21.432 -
  21.433 -#endif /* GCC3 i386 inline assembly */
  21.434 -
  21.435 -/* vi: set ts=4 sw=4 expandtab: */
    22.1 --- a/src/video/SDL_yuv_sw.c	Wed Feb 02 22:55:12 2011 -0800
    22.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.3 @@ -1,1322 +0,0 @@
    22.4 -/*
    22.5 -    SDL - Simple DirectMedia Layer
    22.6 -    Copyright (C) 1997-2010 Sam Lantinga
    22.7 -
    22.8 -    This library is free software; you can redistribute it and/or
    22.9 -    modify it under the terms of the GNU Lesser General Public
   22.10 -    License as published by the Free Software Foundation; either
   22.11 -    version 2.1 of the License, or (at your option) any later version.
   22.12 -
   22.13 -    This library is distributed in the hope that it will be useful,
   22.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
   22.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   22.16 -    Lesser General Public License for more details.
   22.17 -
   22.18 -    You should have received a copy of the GNU Lesser General Public
   22.19 -    License along with this library; if not, write to the Free Software
   22.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   22.21 -
   22.22 -    Sam Lantinga
   22.23 -    slouken@libsdl.org
   22.24 -*/
   22.25 -#include "SDL_config.h"
   22.26 -
   22.27 -/* This is the software implementation of the YUV texture support */
   22.28 -
   22.29 -/* This code was derived from code carrying the following copyright notices:
   22.30 -
   22.31 - * Copyright (c) 1995 The Regents of the University of California.
   22.32 - * All rights reserved.
   22.33 - * 
   22.34 - * Permission to use, copy, modify, and distribute this software and its
   22.35 - * documentation for any purpose, without fee, and without written agreement is
   22.36 - * hereby granted, provided that the above copyright notice and the following
   22.37 - * two paragraphs appear in all copies of this software.
   22.38 - * 
   22.39 - * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
   22.40 - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
   22.41 - * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
   22.42 - * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   22.43 - * 
   22.44 - * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
   22.45 - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
   22.46 - * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
   22.47 - * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
   22.48 - * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
   22.49 -
   22.50 - * Copyright (c) 1995 Erik Corry
   22.51 - * All rights reserved.
   22.52 - * 
   22.53 - * Permission to use, copy, modify, and distribute this software and its
   22.54 - * documentation for any purpose, without fee, and without written agreement is
   22.55 - * hereby granted, provided that the above copyright notice and the following
   22.56 - * two paragraphs appear in all copies of this software.
   22.57 - * 
   22.58 - * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
   22.59 - * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
   22.60 - * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
   22.61 - * OF THE POSSIBILITY OF SUCH DAMAGE.
   22.62 - * 
   22.63 - * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
   22.64 - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
   22.65 - * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
   22.66 - * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
   22.67 - * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
   22.68 -
   22.69 - * Portions of this software Copyright (c) 1995 Brown University.
   22.70 - * All rights reserved.
   22.71 - * 
   22.72 - * Permission to use, copy, modify, and distribute this software and its
   22.73 - * documentation for any purpose, without fee, and without written agreement
   22.74 - * is hereby granted, provided that the above copyright notice and the
   22.75 - * following two paragraphs appear in all copies of this software.
   22.76 - * 
   22.77 - * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
   22.78 - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
   22.79 - * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
   22.80 - * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   22.81 - * 
   22.82 - * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
   22.83 - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
   22.84 - * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
   22.85 - * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
   22.86 - * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
   22.87 - */
   22.88 -
   22.89 -#include "SDL_video.h"
   22.90 -#include "SDL_cpuinfo.h"
   22.91 -#include "SDL_yuv_sw_c.h"
   22.92 -
   22.93 -
   22.94 -/* The colorspace conversion functions */
   22.95 -
   22.96 -#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   22.97 -extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
   22.98 -                                    unsigned char *lum, unsigned char *cr,
   22.99 -                                    unsigned char *cb, unsigned char *out,
  22.100 -                                    int rows, int cols, int mod);
  22.101 -extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
  22.102 -                                    unsigned char *lum, unsigned char *cr,
  22.103 -                                    unsigned char *cb, unsigned char *out,
  22.104 -                                    int rows, int cols, int mod);
  22.105 -#endif
  22.106 -
  22.107 -static void
  22.108 -Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
  22.109 -                       unsigned char *lum, unsigned char *cr,
  22.110 -                       unsigned char *cb, unsigned char *out,
  22.111 -                       int rows, int cols, int mod)
  22.112 -{
  22.113 -    unsigned short *row1;
  22.114 -    unsigned short *row2;
  22.115 -    unsigned char *lum2;
  22.116 -    int x, y;
  22.117 -    int cr_r;
  22.118 -    int crb_g;
  22.119 -    int cb_b;
  22.120 -    int cols_2 = cols / 2;
  22.121 -
  22.122 -    row1 = (unsigned short *) out;
  22.123 -    row2 = row1 + cols + mod;
  22.124 -    lum2 = lum + cols;
  22.125 -
  22.126 -    mod += cols + mod;
  22.127 -
  22.128 -    y = rows / 2;
  22.129 -    while (y--) {
  22.130 -        x = cols_2;
  22.131 -        while (x--) {
  22.132 -            register int L;
  22.133 -
  22.134 -            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  22.135 -            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  22.136 -                + colortab[*cb + 2 * 256];
  22.137 -            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  22.138 -            ++cr;
  22.139 -            ++cb;
  22.140 -
  22.141 -            L = *lum++;
  22.142 -            *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
  22.143 -                                        rgb_2_pix[L + crb_g] |
  22.144 -                                        rgb_2_pix[L + cb_b]);
  22.145 -
  22.146 -            L = *lum++;
  22.147 -            *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
  22.148 -                                        rgb_2_pix[L + crb_g] |
  22.149 -                                        rgb_2_pix[L + cb_b]);
  22.150 -
  22.151 -
  22.152 -            /* Now, do second row.  */
  22.153 -
  22.154 -            L = *lum2++;
  22.155 -            *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
  22.156 -                                        rgb_2_pix[L + crb_g] |
  22.157 -                                        rgb_2_pix[L + cb_b]);
  22.158 -
  22.159 -            L = *lum2++;
  22.160 -            *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
  22.161 -                                        rgb_2_pix[L + crb_g] |
  22.162 -                                        rgb_2_pix[L + cb_b]);
  22.163 -        }
  22.164 -
  22.165 -        /*
  22.166 -         * These values are at the start of the next line, (due
  22.167 -         * to the ++'s above),but they need to be at the start
  22.168 -         * of the line after that.
  22.169 -         */
  22.170 -        lum += cols;
  22.171 -        lum2 += cols;
  22.172 -        row1 += mod;
  22.173 -        row2 += mod;
  22.174 -    }
  22.175 -}
  22.176 -
  22.177 -static void
  22.178 -Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
  22.179 -                       unsigned char *lum, unsigned char *cr,
  22.180 -                       unsigned char *cb, unsigned char *out,
  22.181 -                       int rows, int cols, int mod)
  22.182 -{
  22.183 -    unsigned int value;
  22.184 -    unsigned char *row1;
  22.185 -    unsigned char *row2;
  22.186 -    unsigned char *lum2;
  22.187 -    int x, y;
  22.188 -    int cr_r;
  22.189 -    int crb_g;
  22.190 -    int cb_b;
  22.191 -    int cols_2 = cols / 2;
  22.192 -
  22.193 -    row1 = out;
  22.194 -    row2 = row1 + cols * 3 + mod * 3;
  22.195 -    lum2 = lum + cols;
  22.196 -
  22.197 -    mod += cols + mod;
  22.198 -    mod *= 3;
  22.199 -
  22.200 -    y = rows / 2;
  22.201 -    while (y--) {
  22.202 -        x = cols_2;
  22.203 -        while (x--) {
  22.204 -            register int L;
  22.205 -
  22.206 -            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  22.207 -            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  22.208 -                + colortab[*cb + 2 * 256];
  22.209 -            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  22.210 -            ++cr;
  22.211 -            ++cb;
  22.212 -
  22.213 -            L = *lum++;
  22.214 -            value = (rgb_2_pix[L + cr_r] |
  22.215 -                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.216 -            *row1++ = (value) & 0xFF;
  22.217 -            *row1++ = (value >> 8) & 0xFF;
  22.218 -            *row1++ = (value >> 16) & 0xFF;
  22.219 -
  22.220 -            L = *lum++;
  22.221 -            value = (rgb_2_pix[L + cr_r] |
  22.222 -                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.223 -            *row1++ = (value) & 0xFF;
  22.224 -            *row1++ = (value >> 8) & 0xFF;
  22.225 -            *row1++ = (value >> 16) & 0xFF;
  22.226 -
  22.227 -
  22.228 -            /* Now, do second row.  */
  22.229 -
  22.230 -            L = *lum2++;
  22.231 -            value = (rgb_2_pix[L + cr_r] |
  22.232 -                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.233 -            *row2++ = (value) & 0xFF;
  22.234 -            *row2++ = (value >> 8) & 0xFF;
  22.235 -            *row2++ = (value >> 16) & 0xFF;
  22.236 -
  22.237 -            L = *lum2++;
  22.238 -            value = (rgb_2_pix[L + cr_r] |
  22.239 -                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.240 -            *row2++ = (value) & 0xFF;
  22.241 -            *row2++ = (value >> 8) & 0xFF;
  22.242 -            *row2++ = (value >> 16) & 0xFF;
  22.243 -        }
  22.244 -
  22.245 -        /*
  22.246 -         * These values are at the start of the next line, (due
  22.247 -         * to the ++'s above),but they need to be at the start
  22.248 -         * of the line after that.
  22.249 -         */
  22.250 -        lum += cols;
  22.251 -        lum2 += cols;
  22.252 -        row1 += mod;
  22.253 -        row2 += mod;
  22.254 -    }
  22.255 -}
  22.256 -
  22.257 -static void
  22.258 -Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
  22.259 -                       unsigned char *lum, unsigned char *cr,
  22.260 -                       unsigned char *cb, unsigned char *out,
  22.261 -                       int rows, int cols, int mod)
  22.262 -{
  22.263 -    unsigned int *row1;
  22.264 -    unsigned int *row2;
  22.265 -    unsigned char *lum2;
  22.266 -    int x, y;
  22.267 -    int cr_r;
  22.268 -    int crb_g;
  22.269 -    int cb_b;
  22.270 -    int cols_2 = cols / 2;
  22.271 -
  22.272 -    row1 = (unsigned int *) out;
  22.273 -    row2 = row1 + cols + mod;
  22.274 -    lum2 = lum + cols;
  22.275 -
  22.276 -    mod += cols + mod;
  22.277 -
  22.278 -    y = rows / 2;
  22.279 -    while (y--) {
  22.280 -        x = cols_2;
  22.281 -        while (x--) {
  22.282 -            register int L;
  22.283 -
  22.284 -            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  22.285 -            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  22.286 -                + colortab[*cb + 2 * 256];
  22.287 -            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  22.288 -            ++cr;
  22.289 -            ++cb;
  22.290 -
  22.291 -            L = *lum++;
  22.292 -            *row1++ = (rgb_2_pix[L + cr_r] |
  22.293 -                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.294 -
  22.295 -            L = *lum++;
  22.296 -            *row1++ = (rgb_2_pix[L + cr_r] |
  22.297 -                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.298 -
  22.299 -
  22.300 -            /* Now, do second row.  */
  22.301 -
  22.302 -            L = *lum2++;
  22.303 -            *row2++ = (rgb_2_pix[L + cr_r] |
  22.304 -                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.305 -
  22.306 -            L = *lum2++;
  22.307 -            *row2++ = (rgb_2_pix[L + cr_r] |
  22.308 -                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.309 -        }
  22.310 -
  22.311 -        /*
  22.312 -         * These values are at the start of the next line, (due
  22.313 -         * to the ++'s above),but they need to be at the start
  22.314 -         * of the line after that.
  22.315 -         */
  22.316 -        lum += cols;
  22.317 -        lum2 += cols;
  22.318 -        row1 += mod;
  22.319 -        row2 += mod;
  22.320 -    }
  22.321 -}
  22.322 -
  22.323 -/*
  22.324 - * In this function I make use of a nasty trick. The tables have the lower
  22.325 - * 16 bits replicated in the upper 16. This means I can write ints and get
  22.326 - * the horisontal doubling for free (almost).
  22.327 - */
  22.328 -static void
  22.329 -Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
  22.330 -                       unsigned char *lum, unsigned char *cr,
  22.331 -                       unsigned char *cb, unsigned char *out,
  22.332 -                       int rows, int cols, int mod)
  22.333 -{
  22.334 -    unsigned int *row1 = (unsigned int *) out;
  22.335 -    const int next_row = cols + (mod / 2);
  22.336 -    unsigned int *row2 = row1 + 2 * next_row;
  22.337 -    unsigned char *lum2;
  22.338 -    int x, y;
  22.339 -    int cr_r;
  22.340 -    int crb_g;
  22.341 -    int cb_b;
  22.342 -    int cols_2 = cols / 2;
  22.343 -
  22.344 -    lum2 = lum + cols;
  22.345 -
  22.346 -    mod = (next_row * 3) + (mod / 2);
  22.347 -
  22.348 -    y = rows / 2;
  22.349 -    while (y--) {
  22.350 -        x = cols_2;
  22.351 -        while (x--) {
  22.352 -            register int L;
  22.353 -
  22.354 -            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  22.355 -            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  22.356 -                + colortab[*cb + 2 * 256];
  22.357 -            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  22.358 -            ++cr;
  22.359 -            ++cb;
  22.360 -
  22.361 -            L = *lum++;
  22.362 -            row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
  22.363 -                                        rgb_2_pix[L + crb_g] |
  22.364 -                                        rgb_2_pix[L + cb_b]);
  22.365 -            row1++;
  22.366 -
  22.367 -            L = *lum++;
  22.368 -            row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
  22.369 -                                        rgb_2_pix[L + crb_g] |
  22.370 -                                        rgb_2_pix[L + cb_b]);
  22.371 -            row1++;
  22.372 -
  22.373 -
  22.374 -            /* Now, do second row. */
  22.375 -
  22.376 -            L = *lum2++;
  22.377 -            row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
  22.378 -                                        rgb_2_pix[L + crb_g] |
  22.379 -                                        rgb_2_pix[L + cb_b]);
  22.380 -            row2++;
  22.381 -
  22.382 -            L = *lum2++;
  22.383 -            row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
  22.384 -                                        rgb_2_pix[L + crb_g] |
  22.385 -                                        rgb_2_pix[L + cb_b]);
  22.386 -            row2++;
  22.387 -        }
  22.388 -
  22.389 -        /*
  22.390 -         * These values are at the start of the next line, (due
  22.391 -         * to the ++'s above),but they need to be at the start
  22.392 -         * of the line after that.
  22.393 -         */
  22.394 -        lum += cols;
  22.395 -        lum2 += cols;
  22.396 -        row1 += mod;
  22.397 -        row2 += mod;
  22.398 -    }
  22.399 -}
  22.400 -
  22.401 -static void
  22.402 -Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
  22.403 -                       unsigned char *lum, unsigned char *cr,
  22.404 -                       unsigned char *cb, unsigned char *out,
  22.405 -                       int rows, int cols, int mod)
  22.406 -{
  22.407 -    unsigned int value;
  22.408 -    unsigned char *row1 = out;
  22.409 -    const int next_row = (cols * 2 + mod) * 3;
  22.410 -    unsigned char *row2 = row1 + 2 * next_row;
  22.411 -    unsigned char *lum2;
  22.412 -    int x, y;
  22.413 -    int cr_r;
  22.414 -    int crb_g;
  22.415 -    int cb_b;
  22.416 -    int cols_2 = cols / 2;
  22.417 -
  22.418 -    lum2 = lum + cols;
  22.419 -
  22.420 -    mod = next_row * 3 + mod * 3;
  22.421 -
  22.422 -    y = rows / 2;
  22.423 -    while (y--) {
  22.424 -        x = cols_2;
  22.425 -        while (x--) {
  22.426 -            register int L;
  22.427 -
  22.428 -            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  22.429 -            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  22.430 -                + colortab[*cb + 2 * 256];
  22.431 -            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  22.432 -            ++cr;
  22.433 -            ++cb;
  22.434 -
  22.435 -            L = *lum++;
  22.436 -            value = (rgb_2_pix[L + cr_r] |
  22.437 -                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.438 -            row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
  22.439 -                row1[next_row + 3 + 0] = (value) & 0xFF;
  22.440 -            row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
  22.441 -                row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
  22.442 -            row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
  22.443 -                row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
  22.444 -            row1 += 2 * 3;
  22.445 -
  22.446 -            L = *lum++;
  22.447 -            value = (rgb_2_pix[L + cr_r] |
  22.448 -                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.449 -            row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
  22.450 -                row1[next_row + 3 + 0] = (value) & 0xFF;
  22.451 -            row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
  22.452 -                row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
  22.453 -            row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
  22.454 -                row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
  22.455 -            row1 += 2 * 3;
  22.456 -
  22.457 -
  22.458 -            /* Now, do second row. */
  22.459 -
  22.460 -            L = *lum2++;
  22.461 -            value = (rgb_2_pix[L + cr_r] |
  22.462 -                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.463 -            row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
  22.464 -                row2[next_row + 3 + 0] = (value) & 0xFF;
  22.465 -            row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
  22.466 -                row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
  22.467 -            row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
  22.468 -                row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
  22.469 -            row2 += 2 * 3;
  22.470 -
  22.471 -            L = *lum2++;
  22.472 -            value = (rgb_2_pix[L + cr_r] |
  22.473 -                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.474 -            row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
  22.475 -                row2[next_row + 3 + 0] = (value) & 0xFF;
  22.476 -            row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
  22.477 -                row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
  22.478 -            row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
  22.479 -                row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
  22.480 -            row2 += 2 * 3;
  22.481 -        }
  22.482 -
  22.483 -        /*
  22.484 -         * These values are at the start of the next line, (due
  22.485 -         * to the ++'s above),but they need to be at the start
  22.486 -         * of the line after that.
  22.487 -         */
  22.488 -        lum += cols;
  22.489 -        lum2 += cols;
  22.490 -        row1 += mod;
  22.491 -        row2 += mod;
  22.492 -    }
  22.493 -}
  22.494 -
  22.495 -static void
  22.496 -Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
  22.497 -                       unsigned char *lum, unsigned char *cr,
  22.498 -                       unsigned char *cb, unsigned char *out,
  22.499 -                       int rows, int cols, int mod)
  22.500 -{
  22.501 -    unsigned int *row1 = (unsigned int *) out;
  22.502 -    const int next_row = cols * 2 + mod;
  22.503 -    unsigned int *row2 = row1 + 2 * next_row;
  22.504 -    unsigned char *lum2;
  22.505 -    int x, y;
  22.506 -    int cr_r;
  22.507 -    int crb_g;
  22.508 -    int cb_b;
  22.509 -    int cols_2 = cols / 2;
  22.510 -
  22.511 -    lum2 = lum + cols;
  22.512 -
  22.513 -    mod = (next_row * 3) + mod;
  22.514 -
  22.515 -    y = rows / 2;
  22.516 -    while (y--) {
  22.517 -        x = cols_2;
  22.518 -        while (x--) {
  22.519 -            register int L;
  22.520 -
  22.521 -            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  22.522 -            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  22.523 -                + colortab[*cb + 2 * 256];
  22.524 -            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  22.525 -            ++cr;
  22.526 -            ++cb;
  22.527 -
  22.528 -            L = *lum++;
  22.529 -            row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
  22.530 -                (rgb_2_pix[L + cr_r] |
  22.531 -                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.532 -            row1 += 2;
  22.533 -
  22.534 -            L = *lum++;
  22.535 -            row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
  22.536 -                (rgb_2_pix[L + cr_r] |
  22.537 -                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.538 -            row1 += 2;
  22.539 -
  22.540 -
  22.541 -            /* Now, do second row. */
  22.542 -
  22.543 -            L = *lum2++;
  22.544 -            row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
  22.545 -                (rgb_2_pix[L + cr_r] |
  22.546 -                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.547 -            row2 += 2;
  22.548 -
  22.549 -            L = *lum2++;
  22.550 -            row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
  22.551 -                (rgb_2_pix[L + cr_r] |
  22.552 -                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.553 -            row2 += 2;
  22.554 -        }
  22.555 -
  22.556 -        /*
  22.557 -         * These values are at the start of the next line, (due
  22.558 -         * to the ++'s above),but they need to be at the start
  22.559 -         * of the line after that.
  22.560 -         */
  22.561 -        lum += cols;
  22.562 -        lum2 += cols;
  22.563 -        row1 += mod;
  22.564 -        row2 += mod;
  22.565 -    }
  22.566 -}
  22.567 -
  22.568 -static void
  22.569 -Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
  22.570 -                       unsigned char *lum, unsigned char *cr,
  22.571 -                       unsigned char *cb, unsigned char *out,
  22.572 -                       int rows, int cols, int mod)
  22.573 -{
  22.574 -    unsigned short *row;
  22.575 -    int x, y;
  22.576 -    int cr_r;
  22.577 -    int crb_g;
  22.578 -    int cb_b;
  22.579 -    int cols_2 = cols / 2;
  22.580 -
  22.581 -    row = (unsigned short *) out;
  22.582 -
  22.583 -    y = rows;
  22.584 -    while (y--) {
  22.585 -        x = cols_2;
  22.586 -        while (x--) {
  22.587 -            register int L;
  22.588 -
  22.589 -            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  22.590 -            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  22.591 -                + colortab[*cb + 2 * 256];
  22.592 -            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  22.593 -            cr += 4;
  22.594 -            cb += 4;
  22.595 -
  22.596 -            L = *lum;
  22.597 -            lum += 2;
  22.598 -            *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
  22.599 -                                       rgb_2_pix[L + crb_g] |
  22.600 -                                       rgb_2_pix[L + cb_b]);
  22.601 -
  22.602 -            L = *lum;
  22.603 -            lum += 2;
  22.604 -            *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
  22.605 -                                       rgb_2_pix[L + crb_g] |
  22.606 -                                       rgb_2_pix[L + cb_b]);
  22.607 -
  22.608 -        }
  22.609 -
  22.610 -        row += mod;
  22.611 -    }
  22.612 -}
  22.613 -
  22.614 -static void
  22.615 -Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
  22.616 -                       unsigned char *lum, unsigned char *cr,
  22.617 -                       unsigned char *cb, unsigned char *out,
  22.618 -                       int rows, int cols, int mod)
  22.619 -{
  22.620 -    unsigned int value;
  22.621 -    unsigned char *row;
  22.622 -    int x, y;
  22.623 -    int cr_r;
  22.624 -    int crb_g;
  22.625 -    int cb_b;
  22.626 -    int cols_2 = cols / 2;
  22.627 -
  22.628 -    row = (unsigned char *) out;
  22.629 -    mod *= 3;
  22.630 -    y = rows;
  22.631 -    while (y--) {
  22.632 -        x = cols_2;
  22.633 -        while (x--) {
  22.634 -            register int L;
  22.635 -
  22.636 -            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  22.637 -            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  22.638 -                + colortab[*cb + 2 * 256];
  22.639 -            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  22.640 -            cr += 4;
  22.641 -            cb += 4;
  22.642 -
  22.643 -            L = *lum;
  22.644 -            lum += 2;
  22.645 -            value = (rgb_2_pix[L + cr_r] |
  22.646 -                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.647 -            *row++ = (value) & 0xFF;
  22.648 -            *row++ = (value >> 8) & 0xFF;
  22.649 -            *row++ = (value >> 16) & 0xFF;
  22.650 -
  22.651 -            L = *lum;
  22.652 -            lum += 2;
  22.653 -            value = (rgb_2_pix[L + cr_r] |
  22.654 -                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.655 -            *row++ = (value) & 0xFF;
  22.656 -            *row++ = (value >> 8) & 0xFF;
  22.657 -            *row++ = (value >> 16) & 0xFF;
  22.658 -
  22.659 -        }
  22.660 -        row += mod;
  22.661 -    }
  22.662 -}
  22.663 -
  22.664 -static void
  22.665 -Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
  22.666 -                       unsigned char *lum, unsigned char *cr,
  22.667 -                       unsigned char *cb, unsigned char *out,
  22.668 -                       int rows, int cols, int mod)
  22.669 -{
  22.670 -    unsigned int *row;
  22.671 -    int x, y;
  22.672 -    int cr_r;
  22.673 -    int crb_g;
  22.674 -    int cb_b;
  22.675 -    int cols_2 = cols / 2;
  22.676 -
  22.677 -    row = (unsigned int *) out;
  22.678 -    y = rows;
  22.679 -    while (y--) {
  22.680 -        x = cols_2;
  22.681 -        while (x--) {
  22.682 -            register int L;
  22.683 -
  22.684 -            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  22.685 -            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  22.686 -                + colortab[*cb + 2 * 256];
  22.687 -            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  22.688 -            cr += 4;
  22.689 -            cb += 4;
  22.690 -
  22.691 -            L = *lum;
  22.692 -            lum += 2;
  22.693 -            *row++ = (rgb_2_pix[L + cr_r] |
  22.694 -                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.695 -
  22.696 -            L = *lum;
  22.697 -            lum += 2;
  22.698 -            *row++ = (rgb_2_pix[L + cr_r] |
  22.699 -                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.700 -
  22.701 -
  22.702 -        }
  22.703 -        row += mod;
  22.704 -    }
  22.705 -}
  22.706 -
  22.707 -/*
  22.708 - * In this function I make use of a nasty trick. The tables have the lower
  22.709 - * 16 bits replicated in the upper 16. This means I can write ints and get
  22.710 - * the horisontal doubling for free (almost).
  22.711 - */
  22.712 -static void
  22.713 -Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
  22.714 -                       unsigned char *lum, unsigned char *cr,
  22.715 -                       unsigned char *cb, unsigned char *out,
  22.716 -                       int rows, int cols, int mod)
  22.717 -{
  22.718 -    unsigned int *row = (unsigned int *) out;
  22.719 -    const int next_row = cols + (mod / 2);
  22.720 -    int x, y;
  22.721 -    int cr_r;
  22.722 -    int crb_g;
  22.723 -    int cb_b;
  22.724 -    int cols_2 = cols / 2;
  22.725 -
  22.726 -    y = rows;
  22.727 -    while (y--) {
  22.728 -        x = cols_2;
  22.729 -        while (x--) {
  22.730 -            register int L;
  22.731 -
  22.732 -            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  22.733 -            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  22.734 -                + colortab[*cb + 2 * 256];
  22.735 -            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  22.736 -            cr += 4;
  22.737 -            cb += 4;
  22.738 -
  22.739 -            L = *lum;
  22.740 -            lum += 2;
  22.741 -            row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
  22.742 -                                      rgb_2_pix[L + crb_g] |
  22.743 -                                      rgb_2_pix[L + cb_b]);
  22.744 -            row++;
  22.745 -
  22.746 -            L = *lum;
  22.747 -            lum += 2;
  22.748 -            row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
  22.749 -                                      rgb_2_pix[L + crb_g] |
  22.750 -                                      rgb_2_pix[L + cb_b]);
  22.751 -            row++;
  22.752 -
  22.753 -        }
  22.754 -        row += next_row;
  22.755 -    }
  22.756 -}
  22.757 -
  22.758 -static void
  22.759 -Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
  22.760 -                       unsigned char *lum, unsigned char *cr,
  22.761 -                       unsigned char *cb, unsigned char *out,
  22.762 -                       int rows, int cols, int mod)
  22.763 -{
  22.764 -    unsigned int value;
  22.765 -    unsigned char *row = out;
  22.766 -    const int next_row = (cols * 2 + mod) * 3;
  22.767 -    int x, y;
  22.768 -    int cr_r;
  22.769 -    int crb_g;
  22.770 -    int cb_b;
  22.771 -    int cols_2 = cols / 2;
  22.772 -    y = rows;
  22.773 -    while (y--) {
  22.774 -        x = cols_2;
  22.775 -        while (x--) {
  22.776 -            register int L;
  22.777 -
  22.778 -            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  22.779 -            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  22.780 -                + colortab[*cb + 2 * 256];
  22.781 -            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  22.782 -            cr += 4;
  22.783 -            cb += 4;
  22.784 -
  22.785 -            L = *lum;
  22.786 -            lum += 2;
  22.787 -            value = (rgb_2_pix[L + cr_r] |
  22.788 -                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.789 -            row[0 + 0] = row[3 + 0] = row[next_row + 0] =
  22.790 -                row[next_row + 3 + 0] = (value) & 0xFF;
  22.791 -            row[0 + 1] = row[3 + 1] = row[next_row + 1] =
  22.792 -                row[next_row + 3 + 1] = (value >> 8) & 0xFF;
  22.793 -            row[0 + 2] = row[3 + 2] = row[next_row + 2] =
  22.794 -                row[next_row + 3 + 2] = (value >> 16) & 0xFF;
  22.795 -            row += 2 * 3;
  22.796 -
  22.797 -            L = *lum;
  22.798 -            lum += 2;
  22.799 -            value = (rgb_2_pix[L + cr_r] |
  22.800 -                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.801 -            row[0 + 0] = row[3 + 0] = row[next_row + 0] =
  22.802 -                row[next_row + 3 + 0] = (value) & 0xFF;
  22.803 -            row[0 + 1] = row[3 + 1] = row[next_row + 1] =
  22.804 -                row[next_row + 3 + 1] = (value >> 8) & 0xFF;
  22.805 -            row[0 + 2] = row[3 + 2] = row[next_row + 2] =
  22.806 -                row[next_row + 3 + 2] = (value >> 16) & 0xFF;
  22.807 -            row += 2 * 3;
  22.808 -
  22.809 -        }
  22.810 -        row += next_row;
  22.811 -    }
  22.812 -}
  22.813 -
  22.814 -static void
  22.815 -Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
  22.816 -                       unsigned char *lum, unsigned char *cr,
  22.817 -                       unsigned char *cb, unsigned char *out,
  22.818 -                       int rows, int cols, int mod)
  22.819 -{
  22.820 -    unsigned int *row = (unsigned int *) out;
  22.821 -    const int next_row = cols * 2 + mod;
  22.822 -    int x, y;
  22.823 -    int cr_r;
  22.824 -    int crb_g;
  22.825 -    int cb_b;
  22.826 -    int cols_2 = cols / 2;
  22.827 -    mod += mod;
  22.828 -    y = rows;
  22.829 -    while (y--) {
  22.830 -        x = cols_2;
  22.831 -        while (x--) {
  22.832 -            register int L;
  22.833 -
  22.834 -            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
  22.835 -            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
  22.836 -                + colortab[*cb + 2 * 256];
  22.837 -            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
  22.838 -            cr += 4;
  22.839 -            cb += 4;
  22.840 -
  22.841 -            L = *lum;
  22.842 -            lum += 2;
  22.843 -            row[0] = row[1] = row[next_row] = row[next_row + 1] =
  22.844 -                (rgb_2_pix[L + cr_r] |
  22.845 -                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.846 -            row += 2;
  22.847 -
  22.848 -            L = *lum;
  22.849 -            lum += 2;
  22.850 -            row[0] = row[1] = row[next_row] = row[next_row + 1] =
  22.851 -                (rgb_2_pix[L + cr_r] |
  22.852 -                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
  22.853 -            row += 2;
  22.854 -
  22.855 -
  22.856 -        }
  22.857 -
  22.858 -        row += next_row;
  22.859 -    }
  22.860 -}
  22.861 -
  22.862 -/*
  22.863 - * How many 1 bits are there in the Uint32.
  22.864 - * Low performance, do not call often.
  22.865 - */
  22.866 -static int
  22.867 -number_of_bits_set(Uint32 a)
  22.868 -{
  22.869 -    if (!a)
  22.870 -        return 0;
  22.871 -    if (a & 1)
  22.872 -        return 1 + number_of_bits_set(a >> 1);
  22.873 -    return (number_of_bits_set(a >> 1));
  22.874 -}
  22.875 -
  22.876 -/*
  22.877 - * How many 0 bits are there at least significant end of Uint32.
  22.878 - * Low performance, do not call often.
  22.879 - */
  22.880 -static int
  22.881 -free_bits_at_bottom(Uint32 a)
  22.882 -{
  22.883 -    /* assume char is 8 bits */
  22.884 -    if (!a)
  22.885 -        return sizeof(Uint32) * 8;
  22.886 -    if (((Sint32) a) & 1l)
  22.887 -        return 0;
  22.888 -    return 1 + free_bits_at_bottom(a >> 1);
  22.889 -}
  22.890 -
  22.891 -static int
  22.892 -SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
  22.893 -{
  22.894 -    Uint32 *r_2_pix_alloc;
  22.895 -    Uint32 *g_2_pix_alloc;
  22.896 -    Uint32 *b_2_pix_alloc;
  22.897 -    int i;
  22.898 -    int bpp;
  22.899 -    Uint32 Rmask, Gmask, Bmask, Amask;
  22.900 -
  22.901 -    if (!SDL_PixelFormatEnumToMasks
  22.902 -        (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
  22.903 -        SDL_SetError("Unsupported YUV destination format");
  22.904 -        return -1;
  22.905 -    }
  22.906 -
  22.907 -    swdata->target_format = target_format;
  22.908 -    r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
  22.909 -    g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
  22.910 -    b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
  22.911 -
  22.912 -    /* 
  22.913 -     * Set up entries 0-255 in rgb-to-pixel value tables.
  22.914 -     */
  22.915 -    for (i = 0; i < 256; ++i) {
  22.916 -        r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
  22.917 -        r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
  22.918 -        r_2_pix_alloc[i + 256] |= Amask;
  22.919 -        g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
  22.920 -        g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
  22.921 -        g_2_pix_alloc[i + 256] |= Amask;
  22.922 -        b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
  22.923 -        b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
  22.924 -        b_2_pix_alloc[i + 256] |= Amask;
  22.925 -    }
  22.926 -
  22.927 -    /*
  22.928 -     * If we have 16-bit output depth, then we double the value
  22.929 -     * in the top word. This means that we can write out both
  22.930 -     * pixels in the pixel doubling mode with one op. It is 
  22.931 -     * harmless in the normal case as storing a 32-bit value
  22.932 -     * through a short pointer will lose the top bits anyway.
  22.933 -     */
  22.934 -    if (SDL_BYTESPERPIXEL(target_format) == 2) {
  22.935 -        for (i = 0; i < 256; ++i) {
  22.936 -            r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
  22.937 -            g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
  22.938 -            b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
  22.939 -        }
  22.940 -    }
  22.941 -
  22.942 -    /*
  22.943 -     * Spread out the values we have to the rest of the array so that
  22.944 -     * we do not need to check for overflow.
  22.945 -     */
  22.946 -    for (i = 0; i < 256; ++i) {
  22.947 -        r_2_pix_alloc[i] = r_2_pix_alloc[256];
  22.948 -        r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
  22.949 -        g_2_pix_alloc[i] = g_2_pix_alloc[256];
  22.950 -        g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
  22.951 -        b_2_pix_alloc[i] = b_2_pix_alloc[256];
  22.952 -        b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
  22.953 -    }
  22.954 -
  22.955 -    /* You have chosen wisely... */
  22.956 -    switch (swdata->format) {
  22.957 -    case SDL_PIXELFORMAT_YV12:
  22.958 -    case SDL_PIXELFORMAT_IYUV:
  22.959 -        if (SDL_BYTESPERPIXEL(target_format) == 2) {
  22.960 -#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
  22.961 -            /* inline assembly functions */
  22.962 -            if (SDL_HasMMX() && (Rmask == 0xF800) &&
  22.963 -                (Gmask == 0x07E0) && (Bmask == 0x001F)
  22.964 -                && (swdata->w & 15) == 0) {
  22.965 -/*printf("Using MMX 16-bit 565 dither\n");*/
  22.966 -                swdata->Display1X = Color565DitherYV12MMX1X;
  22.967 -            } else {
  22.968 -/*printf("Using C 16-bit dither\n");*/
  22.969 -                swdata->Display1X = Color16DitherYV12Mod1X;
  22.970 -            }
  22.971 -#else
  22.972 -            swdata->Display1X = Color16DitherYV12Mod1X;
  22.973 -#endif
  22.974 -            swdata->Display2X = Color16DitherYV12Mod2X;
  22.975 -        }
  22.976 -        if (SDL_BYTESPERPIXEL(target_format) == 3) {
  22.977 -            swdata->Display1X = Color24DitherYV12Mod1X;
  22.978 -            swdata->Display2X = Color24DitherYV12Mod2X;
  22.979 -        }
  22.980 -        if (SDL_BYTESPERPIXEL(target_format) == 4) {
  22.981 -#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
  22.982 -            /* inline assembly functions */
  22.983 -            if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
  22.984 -                (Gmask == 0x0000FF00) &&
  22.985 -                (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
  22.986 -/*printf("Using MMX 32-bit dither\n");*/
  22.987 -                swdata->Display1X = ColorRGBDitherYV12MMX1X;
  22.988 -            } else {
  22.989 -/*printf("Using C 32-bit dither\n");*/
  22.990 -                swdata->Display1X = Color32DitherYV12Mod1X;
  22.991 -            }
  22.992 -#else
  22.993 -            swdata->Display1X = Color32DitherYV12Mod1X;
  22.994 -#endif
  22.995 -            swdata->Display2X = Color32DitherYV12Mod2X;
  22.996 -        }
  22.997 -        break;
  22.998 -    case SDL_PIXELFORMAT_YUY2:
  22.999 -    case SDL_PIXELFORMAT_UYVY:
 22.1000 -    case SDL_PIXELFORMAT_YVYU:
 22.1001 -        if (SDL_BYTESPERPIXEL(target_format) == 2) {
 22.1002 -            swdata->Display1X = Color16DitherYUY2Mod1X;
 22.1003 -            swdata->Display2X = Color16DitherYUY2Mod2X;
 22.1004 -        }
 22.1005 -        if (SDL_BYTESPERPIXEL(target_format) == 3) {
 22.1006 -            swdata->Display1X = Color24DitherYUY2Mod1X;
 22.1007 -            swdata->Display2X = Color24DitherYUY2Mod2X;
 22.1008 -        }
 22.1009 -        if (SDL_BYTESPERPIXEL(target_format) == 4) {
 22.1010 -            swdata->Display1X = Color32DitherYUY2Mod1X;
 22.1011 -            swdata->Display2X = Color32DitherYUY2Mod2X;
 22.1012 -        }
 22.1013 -        break;
 22.1014 -    default:
 22.1015 -        /* We should never get here (caught above) */
 22.1016 -        break;
 22.1017 -    }
 22.1018 -
 22.1019 -    if (swdata->display) {
 22.1020 -        SDL_FreeSurface(swdata->display);
 22.1021 -        swdata->display = NULL;
 22.1022 -    }
 22.1023 -    return 0;
 22.1024 -}
 22.1025 -
 22.1026 -SDL_SW_YUVTexture *
 22.1027 -SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
 22.1028 -{
 22.1029 -    SDL_SW_YUVTexture *swdata;
 22.1030 -    int *Cr_r_tab;
 22.1031 -    int *Cr_g_tab;
 22.1032 -    int *Cb_g_tab;
 22.1033 -    int *Cb_b_tab;
 22.1034 -    int i;
 22.1035 -    int CR, CB;
 22.1036 -
 22.1037 -    swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
 22.1038 -    if (!swdata) {
 22.1039 -        SDL_OutOfMemory();
 22.1040 -        return NULL;
 22.1041 -    }
 22.1042 -
 22.1043 -    switch (format) {
 22.1044 -    case SDL_PIXELFORMAT_YV12:
 22.1045 -    case SDL_PIXELFORMAT_IYUV:
 22.1046 -    case SDL_PIXELFORMAT_YUY2:
 22.1047 -    case SDL_PIXELFORMAT_UYVY:
 22.1048 -    case SDL_PIXELFORMAT_YVYU:
 22.1049 -        break;
 22.1050 -    default:
 22.1051 -        SDL_SetError("Unsupported YUV format");
 22.1052 -        return NULL;
 22.1053 -    }
 22.1054 -
 22.1055 -    swdata->format = format;
 22.1056 -    swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
 22.1057 -    swdata->w = w;
 22.1058 -    swdata->h = h;
 22.1059 -    swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
 22.1060 -    swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
 22.1061 -    swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
 22.1062 -    if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
 22.1063 -        SDL_OutOfMemory();
 22.1064 -        SDL_SW_DestroyYUVTexture(swdata);
 22.1065 -        return NULL;
 22.1066 -    }
 22.1067 -
 22.1068 -    /* Generate the tables for the display surface */
 22.1069 -    Cr_r_tab = &swdata->colortab[0 * 256];
 22.1070 -    Cr_g_tab = &swdata->colortab[1 * 256];
 22.1071 -    Cb_g_tab = &swdata->colortab[2 * 256];
 22.1072 -    Cb_b_tab = &swdata->colortab[3 * 256];
 22.1073 -    for (i = 0; i < 256; i++) {
 22.1074 -        /* Gamma correction (luminescence table) and chroma correction
 22.1075 -           would be done here.  See the Berkeley mpeg_play sources.
 22.1076 -         */
 22.1077 -        CB = CR = (i - 128);
 22.1078 -        Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
 22.1079 -        Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
 22.1080 -        Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
 22.1081 -        Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
 22.1082 -    }
 22.1083 -
 22.1084 -    /* Find the pitch and offset values for the overlay */
 22.1085 -    switch (format) {
 22.1086 -    case SDL_PIXELFORMAT_YV12:
 22.1087 -    case SDL_PIXELFORMAT_IYUV:
 22.1088 -        swdata->pitches[0] = w;
 22.1089 -        swdata->pitches[1] = swdata->pitches[0] / 2;
 22.1090 -        swdata->pitches[2] = swdata->pitches[0] / 2;
 22.1091 -        swdata->planes[0] = swdata->pixels;
 22.1092 -        swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
 22.1093 -        swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
 22.1094 -        break;
 22.1095 -    case SDL_PIXELFORMAT_YUY2:
 22.1096 -    case SDL_PIXELFORMAT_UYVY:
 22.1097 -    case SDL_PIXELFORMAT_YVYU:
 22.1098 -        swdata->pitches[0] = w * 2;
 22.1099 -        swdata->planes[0] = swdata->pixels;
 22.1100 -        break;
 22.1101 -    default:
 22.1102 -        /* We should never get here (caught above) */
 22.1103 -        break;
 22.1104 -    }
 22.1105 -
 22.1106 -    /* We're all done.. */
 22.1107 -    return (swdata);
 22.1108 -}
 22.1109 -
 22.1110 -int
 22.1111 -SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
 22.1112 -                             int *pitch)
 22.1113 -{
 22.1114 -    *pixels = swdata->planes[0];
 22.1115 -    *pitch = swdata->pitches[0];
 22.1116 -    return 0;
 22.1117 -}
 22.1118 -
 22.1119 -int
 22.1120 -SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
 22.1121 -                        const void *pixels, int pitch)
 22.1122 -{
 22.1123 -    switch (swdata->format) {
 22.1124 -    case SDL_PIXELFORMAT_YV12:
 22.1125 -    case SDL_PIXELFORMAT_IYUV:
 22.1126 -        if (rect
 22.1127 -            && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
 22.1128 -                || rect->h != swdata->h)) {
 22.1129 -            SDL_SetError
 22.1130 -                ("YV12 and IYUV textures only support full surface updates");
 22.1131 -            return -1;
 22.1132 -        }
 22.1133 -        SDL_memcpy(swdata->pixels, pixels, swdata->h * swdata->w * 2);
 22.1134 -        break;
 22.1135 -    case SDL_PIXELFORMAT_YUY2:
 22.1136 -    case SDL_PIXELFORMAT_UYVY:
 22.1137 -    case SDL_PIXELFORMAT_YVYU:
 22.1138 -        {
 22.1139 -            Uint8 *src, *dst;
 22.1140 -            int row;
 22.1141 -            size_t length;
 22.1142 -
 22.1143 -            src = (Uint8 *) pixels;
 22.1144 -            dst =
 22.1145 -                swdata->planes[0] + rect->y * swdata->pitches[0] +
 22.1146 -                rect->x * 2;
 22.1147 -            length = rect->w * 2;
 22.1148 -            for (row = 0; row < rect->h; ++row) {
 22.1149 -                SDL_memcpy(dst, src, length);
 22.1150 -                src += pitch;
 22.1151 -                dst += swdata->pitches[0];
 22.1152 -            }
 22.1153 -        }
 22.1154 -        break;
 22.1155 -    }
 22.1156 -    return 0;
 22.1157 -}
 22.1158 -
 22.1159 -int
 22.1160 -SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
 22.1161 -                      int markDirty, void **pixels, int *pitch)
 22.1162 -{
 22.1163 -    switch (swdata->format) {
 22.1164 -    case SDL_PIXELFORMAT_YV12:
 22.1165 -    case SDL_PIXELFORMAT_IYUV:
 22.1166 -        if (rect
 22.1167 -            && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
 22.1168 -                || rect->h != swdata->h)) {
 22.1169 -            SDL_SetError
 22.1170 -                ("YV12 and IYUV textures only support full surface locks");
 22.1171 -            return -1;
 22.1172 -        }
 22.1173 -        break;
 22.1174 -    }
 22.1175 -
 22.1176 -    *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
 22.1177 -    *pitch = swdata->pitches[0];
 22.1178 -    return 0;
 22.1179 -}
 22.1180 -
 22.1181 -void
 22.1182 -SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
 22.1183 -{
 22.1184 -}
 22.1185 -
 22.1186 -int
 22.1187 -SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
 22.1188 -                    Uint32 target_format, int w, int h, void *pixels,
 22.1189 -                    int pitch)
 22.1190 -{
 22.1191 -    int stretch;
 22.1192 -    int scale_2x;
 22.1193 -    Uint8 *lum, *Cr, *Cb;
 22.1194 -    int mod;
 22.1195 -
 22.1196 -    /* Make sure we're set up to display in the desired format */
 22.1197 -    if (target_format != swdata->target_format) {
 22.1198 -        if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
 22.1199 -            return -1;
 22.1200 -        }
 22.1201 -    }
 22.1202 -
 22.1203 -    stretch = 0;
 22.1204 -    scale_2x = 0;
 22.1205 -    if (srcrect->x || srcrect->y || srcrect->w < swdata->w
 22.1206 -        || srcrect->h < swdata->h) {
 22.1207 -        /* The source rectangle has been clipped.
 22.1208 -           Using a scratch surface is easier than adding clipped
 22.1209 -           source support to all the blitters, plus that would
 22.1210 -           slow them down in the general unclipped case.
 22.1211 -         */
 22.1212 -        stretch = 1;
 22.1213 -    } else if ((srcrect->w != w) || (srcrect->h != h)) {
 22.1214 -        if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
 22.1215 -            scale_2x = 1;
 22.1216 -        } else {
 22.1217 -            stretch = 1;
 22.1218 -        }
 22.1219 -    }
 22.1220 -    if (stretch) {
 22.1221 -        int bpp;
 22.1222 -        Uint32 Rmask, Gmask, Bmask, Amask;
 22.1223 -
 22.1224 -        if (swdata->display) {
 22.1225 -            swdata->display->w = w;
 22.1226 -            swdata->display->h = h;
 22.1227 -            swdata->display->pixels = pixels;
 22.1228 -            swdata->display->pitch = pitch;
 22.1229 -        } else {
 22.1230 -            /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
 22.1231 -            SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
 22.1232 -                                       &Bmask, &Amask);
 22.1233 -            swdata->display =
 22.1234 -                SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
 22.1235 -                                         Gmask, Bmask, Amask);
 22.1236 -            if (!swdata->display) {
 22.1237 -                return (-1);
 22.1238 -            }
 22.1239 -        }
 22.1240 -        if (!swdata->stretch) {
 22.1241 -            /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
 22.1242 -            SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
 22.1243 -                                       &Bmask, &Amask);
 22.1244 -            swdata->stretch =
 22.1245 -                SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
 22.1246 -                                     Gmask, Bmask, Amask);
 22.1247 -            if (!swdata->stretch) {
 22.1248 -                return (-1);
 22.1249 -            }
 22.1250 -        }
 22.1251 -        pixels = swdata->stretch->pixels;
 22.1252 -        pitch = swdata->stretch->pitch;
 22.1253 -    }
 22.1254 -    switch (swdata->format) {
 22.1255 -    case SDL_PIXELFORMAT_YV12:
 22.1256 -        lum = swdata->planes[0];
 22.1257 -        Cr = swdata->planes[1];
 22.1258 -        Cb = swdata->planes[2];
 22.1259 -        break;
 22.1260 -    case SDL_PIXELFORMAT_IYUV:
 22.1261 -        lum = swdata->planes[0];
 22.1262 -        Cr = swdata->planes[2];
 22.1263 -        Cb = swdata->planes[1];
 22.1264 -        break;
 22.1265 -    case SDL_PIXELFORMAT_YUY2:
 22.1266 -        lum = swdata->planes[0];
 22.1267 -        Cr = lum + 3;
 22.1268 -        Cb = lum + 1;
 22.1269 -        break;
 22.1270 -    case SDL_PIXELFORMAT_UYVY:
 22.1271 -        lum = swdata->planes[0] + 1;
 22.1272 -        Cr = lum + 1;
 22.1273 -        Cb = lum - 1;
 22.1274 -        break;
 22.1275 -    case SDL_PIXELFORMAT_YVYU:
 22.1276 -        lum = swdata->planes[0];
 22.1277 -        Cr = lum + 1;
 22.1278 -        Cb = lum + 3;
 22.1279 -        break;
 22.1280 -    default:
 22.1281 -        SDL_SetError("Unsupported YUV format in copy");
 22.1282 -        return (-1);
 22.1283 -    }
 22.1284 -    mod = (pitch / SDL_BYTESPERPIXEL(target_format));
 22.1285 -
 22.1286 -    if (scale_2x) {
 22.1287 -        mod -= (swdata->w * 2);
 22.1288 -        swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
 22.1289 -                          lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
 22.1290 -    } else {
 22.1291 -        mod -= swdata->w;
 22.1292 -        swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
 22.1293 -                          lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
 22.1294 -    }
 22.1295 -    if (stretch) {
 22.1296 -        SDL_Rect rect = *srcrect;
 22.1297 -        SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
 22.1298 -    }
 22.1299 -    return 0;
 22.1300 -}
 22.1301 -
 22.1302 -void
 22.1303 -SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
 22.1304 -{
 22.1305 -    if (swdata) {
 22.1306 -        if (swdata->pixels) {
 22.1307 -            SDL_free(swdata->pixels);
 22.1308 -        }
 22.1309 -        if (swdata->colortab) {
 22.1310 -            SDL_free(swdata->colortab);
 22.1311 -        }
 22.1312 -        if (swdata->rgb_2_pix) {
 22.1313 -            SDL_free(swdata->rgb_2_pix);
 22.1314 -        }
 22.1315 -        if (swdata->stretch) {
 22.1316 -            SDL_FreeSurface(swdata->stretch);
 22.1317 -        }
 22.1318 -        if (swdata->display) {
 22.1319 -            SDL_FreeSurface(swdata->display);
 22.1320 -        }
 22.1321 -        SDL_free(swdata);
 22.1322 -    }
 22.1323 -}
 22.1324 -
 22.1325 -/* vi: set ts=4 sw=4 expandtab: */
    23.1 --- a/src/video/SDL_yuv_sw_c.h	Wed Feb 02 22:55:12 2011 -0800
    23.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.3 @@ -1,70 +0,0 @@
    23.4 -/*
    23.5 -    SDL - Simple DirectMedia Layer
    23.6 -    Copyright (C) 1997-2010 Sam Lantinga
    23.7 -
    23.8 -    This library is free software; you can redistribute it and/or
    23.9 -    modify it under the terms of the GNU Lesser General Public
   23.10 -    License as published by the Free Software Foundation; either
   23.11 -    version 2.1 of the License, or (at your option) any later version.
   23.12 -
   23.13 -    This library is distributed in the hope that it will be useful,
   23.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
   23.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   23.16 -    Lesser General Public License for more details.
   23.17 -
   23.18 -    You should have received a copy of the GNU Lesser General Public
   23.19 -    License along with this library; if not, write to the Free Software
   23.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   23.21 -
   23.22 -    Sam Lantinga
   23.23 -    slouken@libsdl.org
   23.24 -*/
   23.25 -#include "SDL_config.h"
   23.26 -
   23.27 -#include "SDL_video.h"
   23.28 -#include "SDL_sysvideo.h"
   23.29 -
   23.30 -/* This is the software implementation of the YUV texture support */
   23.31 -
   23.32 -struct SDL_SW_YUVTexture
   23.33 -{
   23.34 -    Uint32 format;
   23.35 -    Uint32 target_format;
   23.36 -    int w, h;
   23.37 -    Uint8 *pixels;
   23.38 -    int *colortab;
   23.39 -    Uint32 *rgb_2_pix;
   23.40 -    void (*Display1X) (int *colortab, Uint32 * rgb_2_pix,
   23.41 -                       unsigned char *lum, unsigned char *cr,
   23.42 -                       unsigned char *cb, unsigned char *out,
   23.43 -                       int rows, int cols, int mod);
   23.44 -    void (*Display2X) (int *colortab, Uint32 * rgb_2_pix,
   23.45 -                       unsigned char *lum, unsigned char *cr,
   23.46 -                       unsigned char *cb, unsigned char *out,
   23.47 -                       int rows, int cols, int mod);
   23.48 -
   23.49 -    /* These are just so we don't have to allocate them separately */
   23.50 -    Uint16 pitches[3];
   23.51 -    Uint8 *planes[3];
   23.52 -
   23.53 -    /* This is a temporary surface in case we have to stretch copy */
   23.54 -    SDL_Surface *stretch;
   23.55 -    SDL_Surface *display;
   23.56 -};
   23.57 -
   23.58 -typedef struct SDL_SW_YUVTexture SDL_SW_YUVTexture;
   23.59 -
   23.60 -SDL_SW_YUVTexture *SDL_SW_CreateYUVTexture(Uint32 format, int w, int h);
   23.61 -int SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
   23.62 -                                 int *pitch);
   23.63 -int SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
   23.64 -                            const void *pixels, int pitch);
   23.65 -int SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
   23.66 -                          int markDirty, void **pixels, int *pitch);
   23.67 -void SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata);
   23.68 -int SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
   23.69 -                        Uint32 target_format, int w, int h, void *pixels,
   23.70 -                        int pitch);
   23.71 -void SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata);
   23.72 -
   23.73 -/* vi: set ts=4 sw=4 expandtab: */
    24.1 --- a/src/video/mmx.h	Wed Feb 02 22:55:12 2011 -0800
    24.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.3 @@ -1,642 +0,0 @@
    24.4 -/*	mmx.h
    24.5 -
    24.6 -	MultiMedia eXtensions GCC interface library for IA32.
    24.7 -
    24.8 -	To use this library, simply include this header file
    24.9 -	and compile with GCC.  You MUST have inlining enabled
   24.10 -	in order for mmx_ok() to work; this can be done by
   24.11 -	simply using -O on the GCC command line.
   24.12 -
   24.13 -	Compiling with -DMMX_TRACE will cause detailed trace
   24.14 -	output to be sent to stderr for each mmx operation.
   24.15 -	This adds lots of code, and obviously slows execution to
   24.16 -	a crawl, but can be very useful for debugging.
   24.17 -
   24.18 -	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
   24.19 -	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
   24.20 -	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
   24.21 -	AND FITNESS FOR ANY PARTICULAR PURPOSE.
   24.22 -
   24.23 -	1997-99 by H. Dietz and R. Fisher
   24.24 -
   24.25 - Notes:
   24.26 -	It appears that the latest gas has the pand problem fixed, therefore
   24.27 -	  I'll undefine BROKEN_PAND by default.
   24.28 -*/
   24.29 -
   24.30 -#ifndef _MMX_H
   24.31 -#define _MMX_H
   24.32 -
   24.33 -
   24.34 -/*	Warning:  at this writing, the version of GAS packaged
   24.35 -	with most Linux distributions does not handle the
   24.36 -	parallel AND operation mnemonic correctly.  If the
   24.37 -	symbol BROKEN_PAND is defined, a slower alternative
   24.38 -	coding will be used.  If execution of mmxtest results
   24.39 -	in an illegal instruction fault, define this symbol.
   24.40 -*/
   24.41 -#undef	BROKEN_PAND
   24.42 -
   24.43 -
   24.44 -/*	The type of an value that fits in an MMX register
   24.45 -	(note that long long constant values MUST be suffixed
   24.46 -	 by LL and unsigned long long values by ULL, lest
   24.47 -	 they be truncated by the compiler)
   24.48 -*/
   24.49 -typedef union
   24.50 -{
   24.51 -    long long q;                /* Quadword (64-bit) value */
   24.52 -    unsigned long long uq;      /* Unsigned Quadword */
   24.53 -    int d[2];                   /* 2 Doubleword (32-bit) values */
   24.54 -    unsigned int ud[2];         /* 2 Unsigned Doubleword */
   24.55 -    short w[4];                 /* 4 Word (16-bit) values */
   24.56 -    unsigned short uw[4];       /* 4 Unsigned Word */
   24.57 -    char b[8];                  /* 8 Byte (8-bit) values */
   24.58 -    unsigned char ub[8];        /* 8 Unsigned Byte */
   24.59 -    float s[2];                 /* Single-precision (32-bit) value */
   24.60 -} __attribute__ ((aligned(8))) mmx_t;   /* On an 8-byte (64-bit) boundary */
   24.61 -
   24.62 -
   24.63 -#if 0
   24.64 -/*	Function to test if multimedia instructions are supported...
   24.65 -*/
   24.66 -inline extern int
   24.67 -mm_support(void)
   24.68 -{
   24.69 -    /* Returns 1 if MMX instructions are supported,
   24.70 -       3 if Cyrix MMX and Extended MMX instructions are supported
   24.71 -       5 if AMD MMX and 3DNow! instructions are supported
   24.72 -       0 if hardware does not support any of these
   24.73 -     */
   24.74 -    register int rval = 0;
   24.75 -
   24.76 -    __asm__ __volatile__(
   24.77 -                            /* See if CPUID instruction is supported ... */
   24.78 -                            /* ... Get copies of EFLAGS into eax and ecx */
   24.79 -                            "pushf\n\t"
   24.80 -                            "popl %%eax\n\t" "movl %%eax, %%ecx\n\t"
   24.81 -                            /* ... Toggle the ID bit in one copy and store */
   24.82 -                            /*     to the EFLAGS reg */
   24.83 -                            "xorl $0x200000, %%eax\n\t"
   24.84 -                            "push %%eax\n\t" "popf\n\t"
   24.85 -                            /* ... Get the (hopefully modified) EFLAGS */
   24.86 -                            "pushf\n\t" "popl %%eax\n\t"
   24.87 -                            /* ... Compare and test result */
   24.88 -                            "xorl %%eax, %%ecx\n\t" "testl $0x200000, %%ecx\n\t" "jz NotSupported1\n\t" /* CPUID not supported */
   24.89 -                            /* Get standard CPUID information, and
   24.90 -                               go to a specific vendor section */
   24.91 -                            "movl $0, %%eax\n\t" "cpuid\n\t"
   24.92 -                            /* Check for Intel */
   24.93 -                            "cmpl $0x756e6547, %%ebx\n\t"
   24.94 -                            "jne TryAMD\n\t"
   24.95 -                            "cmpl $0x49656e69, %%edx\n\t"
   24.96 -                            "jne TryAMD\n\t"
   24.97 -                            "cmpl $0x6c65746e, %%ecx\n"
   24.98 -                            "jne TryAMD\n\t" "jmp Intel\n\t"
   24.99 -                            /* Check for AMD */
  24.100 -                            "\nTryAMD:\n\t"
  24.101 -                            "cmpl $0x68747541, %%ebx\n\t"
  24.102 -                            "jne TryCyrix\n\t"
  24.103 -                            "cmpl $0x69746e65, %%edx\n\t"
  24.104 -                            "jne TryCyrix\n\t"
  24.105 -                            "cmpl $0x444d4163, %%ecx\n"
  24.106 -                            "jne TryCyrix\n\t" "jmp AMD\n\t"
  24.107 -                            /* Check for Cyrix */
  24.108 -                            "\nTryCyrix:\n\t"
  24.109 -                            "cmpl $0x69727943, %%ebx\n\t"
  24.110 -                            "jne NotSupported2\n\t"
  24.111 -                            "cmpl $0x736e4978, %%edx\n\t"
  24.112 -                            "jne NotSupported3\n\t"
  24.113 -                            "cmpl $0x64616574, %%ecx\n\t"
  24.114 -                            "jne NotSupported4\n\t"
  24.115 -                            /* Drop through to Cyrix... */
  24.116 -                            /* Cyrix Section */
  24.117 -                            /* See if extended CPUID level 80000001 is supported */
  24.118 -                            /* The value of CPUID/80000001 for the 6x86MX is undefined
  24.119 -                               according to the Cyrix CPU Detection Guide (Preliminary
  24.120 -                               Rev. 1.01 table 1), so we'll check the value of eax for
  24.121 -                               CPUID/0 to see if standard CPUID level 2 is supported.
  24.122 -                               According to the table, the only CPU which supports level
  24.123 -                               2 is also the only one which supports extended CPUID levels.
  24.124 -                             */
  24.125 -                            "cmpl $0x2, %%eax\n\t" "jne MMXtest\n\t"    /* Use standard CPUID instead */
  24.126 -                            /* Extended CPUID supported (in theory), so get extended
  24.127 -                               features */
  24.128 -                            "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%eax\n\t"    /* Test for MMX */
  24.129 -                            "jz NotSupported5\n\t"      /* MMX not supported */
  24.130 -                            "testl $0x01000000, %%eax\n\t"      /* Test for Ext'd MMX */
  24.131 -                            "jnz EMMXSupported\n\t" "movl $1, %0:\n\n\t"        /* MMX Supported */
  24.132 -                            "jmp Return\n\n" "EMMXSupported:\n\t" "movl $3, %0:\n\n\t"  /* EMMX and MMX Supported */
  24.133 -                            "jmp Return\n\t"
  24.134 -                            /* AMD Section */
  24.135 -                            "AMD:\n\t"
  24.136 -                            /* See if extended CPUID is supported */
  24.137 -                            "movl $0x80000000, %%eax\n\t" "cpuid\n\t" "cmpl $0x80000000, %%eax\n\t" "jl MMXtest\n\t"    /* Use standard CPUID instead */
  24.138 -                            /* Extended CPUID supported, so get extended features */
  24.139 -                            "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t"    /* Test for MMX */
  24.140 -                            "jz NotSupported6\n\t"      /* MMX not supported */
  24.141 -                            "testl $0x80000000, %%edx\n\t"      /* Test for 3DNow! */
  24.142 -                            "jnz ThreeDNowSupported\n\t" "movl $1, %0:\n\n\t"   /* MMX Supported */
  24.143 -                            "jmp Return\n\n" "ThreeDNowSupported:\n\t" "movl $5, %0:\n\n\t"     /* 3DNow! and MMX Supported */
  24.144 -                            "jmp Return\n\t"
  24.145 -                            /* Intel Section */
  24.146 -                            "Intel:\n\t"
  24.147 -                            /* Check for MMX */
  24.148 -                            "MMXtest:\n\t" "movl $1, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t"      /* Test for MMX */
  24.149 -                            "jz NotSupported7\n\t"      /* MMX Not supported */
  24.150 -                            "movl $1, %0:\n\n\t"        /* MMX Supported */
  24.151 -                            "jmp Return\n\t"
  24.152 -                            /* Nothing supported */
  24.153 -                            "\nNotSupported1:\n\t" "#movl $101, %0:\n\n\t" "\nNotSupported2:\n\t" "#movl $102, %0:\n\n\t" "\nNotSupported3:\n\t" "#movl $103, %0:\n\n\t" "\nNotSupported4:\n\t" "#movl $104, %0:\n\n\t" "\nNotSupported5:\n\t" "#movl $105, %0:\n\n\t" "\nNotSupported6:\n\t" "#movl $106, %0:\n\n\t" "\nNotSupported7:\n\t" "#movl $107, %0:\n\n\t" "movl $0, %0:\n\n\t" "Return:\n\t":"=a"(rval):     /* no input */
  24.154 -                            :"eax", "ebx", "ecx", "edx");
  24.155 -
  24.156 -    /* Return */
  24.157 -    return (rval);
  24.158 -}
  24.159 -
  24.160 -/*	Function to test if mmx instructions are supported...
  24.161 -*/
  24.162 -inline extern int
  24.163 -mmx_ok(void)
  24.164 -{
  24.165 -    /* Returns 1 if MMX instructions are supported, 0 otherwise */
  24.166 -    return (mm_support() & 0x1);
  24.167 -}
  24.168 -#endif
  24.169 -
  24.170 -/*	Helper functions for the instruction macros that follow...
  24.171 -	(note that memory-to-register, m2r, instructions are nearly
  24.172 -	 as efficient as register-to-register, r2r, instructions;
  24.173 -	 however, memory-to-memory instructions are really simulated
  24.174 -	 as a convenience, and are only 1/3 as efficient)
  24.175 -*/
  24.176 -#ifdef	MMX_TRACE
  24.177 -
  24.178 -/*	Include the stuff for printing a trace to stderr...
  24.179 -*/
  24.180 -
  24.181 -#define	mmx_i2r(op, imm, reg) \
  24.182 -	{ \
  24.183 -		mmx_t mmx_trace; \
  24.184 -		mmx_trace.uq = (imm); \
  24.185 -		printf(#op "_i2r(" #imm "=0x%08x%08x, ", \
  24.186 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.187 -		__asm__ __volatile__ ("movq %%" #reg ", %0" \
  24.188 -				      : "=X" (mmx_trace) \
  24.189 -				      : /* nothing */ ); \
  24.190 -		printf(#reg "=0x%08x%08x) => ", \
  24.191 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.192 -		__asm__ __volatile__ (#op " %0, %%" #reg \
  24.193 -				      : /* nothing */ \
  24.194 -				      : "X" (imm)); \
  24.195 -		__asm__ __volatile__ ("movq %%" #reg ", %0" \
  24.196 -				      : "=X" (mmx_trace) \
  24.197 -				      : /* nothing */ ); \
  24.198 -		printf(#reg "=0x%08x%08x\n", \
  24.199 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.200 -	}
  24.201 -
  24.202 -#define	mmx_m2r(op, mem, reg) \
  24.203 -	{ \
  24.204 -		mmx_t mmx_trace; \
  24.205 -		mmx_trace = (mem); \
  24.206 -		printf(#op "_m2r(" #mem "=0x%08x%08x, ", \
  24.207 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.208 -		__asm__ __volatile__ ("movq %%" #reg ", %0" \
  24.209 -				      : "=X" (mmx_trace) \
  24.210 -				      : /* nothing */ ); \
  24.211 -		printf(#reg "=0x%08x%08x) => ", \
  24.212 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.213 -		__asm__ __volatile__ (#op " %0, %%" #reg \
  24.214 -				      : /* nothing */ \
  24.215 -				      : "X" (mem)); \
  24.216 -		__asm__ __volatile__ ("movq %%" #reg ", %0" \
  24.217 -				      : "=X" (mmx_trace) \
  24.218 -				      : /* nothing */ ); \
  24.219 -		printf(#reg "=0x%08x%08x\n", \
  24.220 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.221 -	}
  24.222 -
  24.223 -#define	mmx_r2m(op, reg, mem) \
  24.224 -	{ \
  24.225 -		mmx_t mmx_trace; \
  24.226 -		__asm__ __volatile__ ("movq %%" #reg ", %0" \
  24.227 -				      : "=X" (mmx_trace) \
  24.228 -				      : /* nothing */ ); \
  24.229 -		printf(#op "_r2m(" #reg "=0x%08x%08x, ", \
  24.230 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.231 -		mmx_trace = (mem); \
  24.232 -		printf(#mem "=0x%08x%08x) => ", \
  24.233 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.234 -		__asm__ __volatile__ (#op " %%" #reg ", %0" \
  24.235 -				      : "=X" (mem) \
  24.236 -				      : /* nothing */ ); \
  24.237 -		mmx_trace = (mem); \
  24.238 -		printf(#mem "=0x%08x%08x\n", \
  24.239 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.240 -	}
  24.241 -
  24.242 -#define	mmx_r2r(op, regs, regd) \
  24.243 -	{ \
  24.244 -		mmx_t mmx_trace; \
  24.245 -		__asm__ __volatile__ ("movq %%" #regs ", %0" \
  24.246 -				      : "=X" (mmx_trace) \
  24.247 -				      : /* nothing */ ); \
  24.248 -		printf(#op "_r2r(" #regs "=0x%08x%08x, ", \
  24.249 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.250 -		__asm__ __volatile__ ("movq %%" #regd ", %0" \
  24.251 -				      : "=X" (mmx_trace) \
  24.252 -				      : /* nothing */ ); \
  24.253 -		printf(#regd "=0x%08x%08x) => ", \
  24.254 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.255 -		__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
  24.256 -		__asm__ __volatile__ ("movq %%" #regd ", %0" \
  24.257 -				      : "=X" (mmx_trace) \
  24.258 -				      : /* nothing */ ); \
  24.259 -		printf(#regd "=0x%08x%08x\n", \
  24.260 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.261 -	}
  24.262 -
  24.263 -#define	mmx_m2m(op, mems, memd) \
  24.264 -	{ \
  24.265 -		mmx_t mmx_trace; \
  24.266 -		mmx_trace = (mems); \
  24.267 -		printf(#op "_m2m(" #mems "=0x%08x%08x, ", \
  24.268 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.269 -		mmx_trace = (memd); \
  24.270 -		printf(#memd "=0x%08x%08x) => ", \
  24.271 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.272 -		__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
  24.273 -				      #op " %1, %%mm0\n\t" \
  24.274 -				      "movq %%mm0, %0" \
  24.275 -				      : "=X" (memd) \
  24.276 -				      : "X" (mems)); \
  24.277 -		mmx_trace = (memd); \
  24.278 -		printf(#memd "=0x%08x%08x\n", \
  24.279 -			mmx_trace.d[1], mmx_trace.d[0]); \
  24.280 -	}
  24.281 -
  24.282 -#else
  24.283 -
  24.284 -/*	These macros are a lot simpler without the tracing...
  24.285 -*/
  24.286 -
  24.287 -#define	mmx_i2r(op, imm, reg) \
  24.288 -	__asm__ __volatile__ (#op " %0, %%" #reg \
  24.289 -			      : /* nothing */ \
  24.290 -			      : "X" (imm) )
  24.291 -
  24.292 -#define	mmx_m2r(op, mem, reg) \
  24.293 -	__asm__ __volatile__ (#op " %0, %%" #reg \
  24.294 -			      : /* nothing */ \
  24.295 -			      : "m" (mem))