From 1946a4e0577d9451c4b5b20d1f345e2764f168a1 Mon Sep 17 00:00:00 2001
From: Sam Lantinga <slouken@libsdl.org>
Date: Thu, 16 Aug 2007 21:43:19 +0000
Subject: [PATCH] Added notes on the next steps for SDL 1.3 Moved fill and copy
 routines to their own files.

---
 NOTES                                     |  10 +
 src/video/SDL_blit.c                      |   2 +-
 src/video/{SDL_blit_copy.c => SDL_copy.c} |  56 ++--
 src/video/{SDL_blit_copy.h => SDL_copy.h} |   0
 src/video/SDL_fill.c                      | 365 ++++++++++++++++++++++
 src/video/SDL_surface.c                   | 336 --------------------
 6 files changed, 405 insertions(+), 364 deletions(-)
 rename src/video/{SDL_blit_copy.c => SDL_copy.c} (97%)
 rename src/video/{SDL_blit_copy.h => SDL_copy.h} (100%)
 create mode 100644 src/video/SDL_fill.c

diff --git a/NOTES b/NOTES
index e0c512605..49dd2d058 100644
--- a/NOTES
+++ b/NOTES
@@ -157,6 +157,16 @@ Change textures to static/streaming.  Static textures are not lockable,
 streaming textures are lockable and may have system memory pixels available.
 SDL_compat will use a streaming video texture, and will never be HWSURFACE,
 but may be PREALLOC, if system memory pixels are available.
+*** DONE Thu Aug 16 14:18:42 PDT 2007
 
 The software renderer will be abstracted so the surface management can be
 used by any renderer that provides functions to copy surfaces to the window.
+
+Blitters...
+----
+Copy blit and fill rect are optimized with MMX and SSE now.
+
+Here are the pieces we still need:
+- Merging SDL texture capabilities into the SDL surface system
+- Generic fallback blitter architecture
+- Custom fast path blitters
diff --git a/src/video/SDL_blit.c b/src/video/SDL_blit.c
index cbc7e3f1f..d3384a761 100644
--- a/src/video/SDL_blit.c
+++ b/src/video/SDL_blit.c
@@ -24,7 +24,7 @@
 #include "SDL_video.h"
 #include "SDL_sysvideo.h"
 #include "SDL_blit.h"
-#include "SDL_blit_copy.h"
+#include "SDL_copy.h"
 #include "SDL_RLEaccel_c.h"
 #include "SDL_pixels_c.h"
 
diff --git a/src/video/SDL_blit_copy.c b/src/video/SDL_copy.c
similarity index 97%
rename from src/video/SDL_blit_copy.c
rename to src/video/SDL_copy.c
index 01be9ecc5..7fd902ca9 100644
--- a/src/video/SDL_blit_copy.c
+++ b/src/video/SDL_copy.c
@@ -23,10 +23,38 @@
 
 #include "SDL_video.h"
 #include "SDL_blit.h"
-#include "SDL_blit_copy.h"
+#include "SDL_copy.h"
 
 
+#ifdef __SSE__
+/* This assumes 16-byte aligned src and dst */
+static __inline__ void
+SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
+{
+    int i;
+
+    __m128 values[4];
+    for (i = len / 64; i--;) {
+        _mm_prefetch(src, _MM_HINT_NTA);
+        values[0] = *(__m128 *) (src + 0);
+        values[1] = *(__m128 *) (src + 16);
+        values[2] = *(__m128 *) (src + 32);
+        values[3] = *(__m128 *) (src + 48);
+        _mm_stream_ps((float *) (dst + 0), values[0]);
+        _mm_stream_ps((float *) (dst + 16), values[1]);
+        _mm_stream_ps((float *) (dst + 32), values[2]);
+        _mm_stream_ps((float *) (dst + 48), values[3]);
+        src += 64;
+        dst += 64;
+    }
+
+    if (len & 63)
+        SDL_memcpy(dst, src, len & 63);
+}
+#endif /* __SSE__ */
+
 #ifdef __MMX__
+/* This assumes 8-byte aligned src and dst */
 static __inline__ void
 SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
 {
@@ -60,32 +88,6 @@ SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
 }
 #endif /* __MMX__ */
 
-#ifdef __SSE__
-static __inline__ void
-SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
-{
-    int i;
-
-    __m128 values[4];
-    for (i = len / 64; i--;) {
-        _mm_prefetch(src, _MM_HINT_NTA);
-        values[0] = *(__m128 *) (src + 0);
-        values[1] = *(__m128 *) (src + 16);
-        values[2] = *(__m128 *) (src + 32);
-        values[3] = *(__m128 *) (src + 48);
-        _mm_stream_ps((float *) (dst + 0), values[0]);
-        _mm_stream_ps((float *) (dst + 16), values[1]);
-        _mm_stream_ps((float *) (dst + 32), values[2]);
-        _mm_stream_ps((float *) (dst + 48), values[3]);
-        src += 64;
-        dst += 64;
-    }
-
-    if (len & 63)
-        SDL_memcpy(dst, src, len & 63);
-}
-#endif /* __SSE__ */
-
 void
 SDL_BlitCopy(SDL_BlitInfo * info)
 {
diff --git a/src/video/SDL_blit_copy.h b/src/video/SDL_copy.h
similarity index 100%
rename from src/video/SDL_blit_copy.h
rename to src/video/SDL_copy.h
diff --git a/src/video/SDL_fill.c b/src/video/SDL_fill.c
new file mode 100644
index 000000000..ce74e286d
--- /dev/null
+++ b/src/video/SDL_fill.c
@@ -0,0 +1,365 @@
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2006 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+#include "SDL_video.h"
+#include "SDL_blit.h"
+
+
+#ifdef __SSE__
+/* *INDENT-OFF* */
+
+#ifdef _MSC_VER
+#define SSE_BEGIN \
+    __m128 c128; \
+	c128.m128_u32[0] = color; \
+	c128.m128_u32[1] = color; \
+	c128.m128_u32[2] = color; \
+	c128.m128_u32[3] = color;
+#else
+#define SSE_BEGIN \
+    DECLARE_ALIGNED(Uint32, cccc[4], 16); \
+    cccc[0] = color; \
+    cccc[1] = color; \
+    cccc[2] = color; \
+    cccc[3] = color; \
+    __m128 c128 = *(__m128 *)cccc;
+#endif
+
+#define SSE_WORK \
+    for (i = n / 64; i--;) { \
+        _mm_stream_ps((float *)(p+0), c128); \
+        _mm_stream_ps((float *)(p+16), c128); \
+        _mm_stream_ps((float *)(p+32), c128); \
+        _mm_stream_ps((float *)(p+48), c128); \
+        p += 64; \
+    }
+
+#define SSE_END
+
+#define DEFINE_SSE_FILLRECT(bpp, type) \
+static void \
+SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
+{ \
+    SSE_BEGIN; \
+ \
+    while (h--) { \
+        int i, n = w * bpp; \
+        Uint8 *p = pixels; \
+ \
+        if (n > 15) { \
+            int adjust = 16 - ((uintptr_t)p & 15); \
+            if (adjust < 16) { \
+                n -= adjust; \
+                adjust /= bpp; \
+                while(adjust--) { \
+                    *((type *)p) = (type)color; \
+                    p += bpp; \
+                } \
+            } \
+            SSE_WORK; \
+        } \
+        if (n & 63) { \
+            int remainder = (n & 63); \
+            remainder /= bpp; \
+            while(remainder--) { \
+                *((type *)p) = (type)color; \
+                p += bpp; \
+            } \
+        } \
+        pixels += pitch; \
+    } \
+ \
+    SSE_END; \
+}
+
+DEFINE_SSE_FILLRECT(1, Uint8)
+DEFINE_SSE_FILLRECT(2, Uint16)
+DEFINE_SSE_FILLRECT(4, Uint32)
+
+/* *INDENT-ON* */
+#endif /* __SSE__ */
+
+#ifdef __MMX__
+/* *INDENT-OFF* */
+
+#define MMX_BEGIN \
+    __m64 c64 = _mm_set_pi32(color, color)
+
+#define MMX_WORK \
+    for (i = n / 64; i--;) { \
+        _mm_stream_pi((__m64 *)(p+0), c64); \
+        _mm_stream_pi((__m64 *)(p+8), c64); \
+        _mm_stream_pi((__m64 *)(p+16), c64); \
+        _mm_stream_pi((__m64 *)(p+24), c64); \
+        _mm_stream_pi((__m64 *)(p+32), c64); \
+        _mm_stream_pi((__m64 *)(p+40), c64); \
+        _mm_stream_pi((__m64 *)(p+48), c64); \
+        _mm_stream_pi((__m64 *)(p+56), c64); \
+        p += 64; \
+    }
+
+#define MMX_END \
+    _mm_empty()
+
+#define DEFINE_MMX_FILLRECT(bpp, type) \
+static void \
+SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
+{ \
+    MMX_BEGIN; \
+ \
+    while (h--) { \
+        int i, n = w * bpp; \
+        Uint8 *p = pixels; \
+ \
+        if (n > 7) { \
+            int adjust = 8 - ((uintptr_t)p & 7); \
+            if (adjust < 8) { \
+                n -= adjust; \
+                adjust /= bpp; \
+                while(adjust--) { \
+                    *((type *)p) = (type)color; \
+                    p += bpp; \
+                } \
+            } \
+            MMX_WORK; \
+        } \
+        if (n & 63) { \
+            int remainder = (n & 63); \
+            remainder /= bpp; \
+            while(remainder--) { \
+                *((type *)p) = (type)color; \
+                p += bpp; \
+            } \
+        } \
+        pixels += pitch; \
+    } \
+ \
+    MMX_END; \
+}
+
+DEFINE_MMX_FILLRECT(1, Uint8)
+DEFINE_MMX_FILLRECT(2, Uint16)
+DEFINE_MMX_FILLRECT(4, Uint32)
+
+/* *INDENT-ON* */
+#endif /* __MMX__ */
+
+static void
+SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
+{
+    while (h--) {
+        int n = w;
+        Uint8 *p = pixels;
+
+        if (n > 3) {
+            switch ((uintptr_t) p & 3) {
+            case 1:
+                *p++ = (Uint8) color;
+                --n;
+            case 2:
+                *p++ = (Uint8) color;
+                --n;
+            case 3:
+                *p++ = (Uint8) color;
+                --n;
+            }
+            SDL_memset4(p, color, (n >> 2));
+        }
+        if (n & 3) {
+            p += (n & ~3);
+            switch (n & 3) {
+            case 3:
+                *p++ = (Uint8) color;
+            case 2:
+                *p++ = (Uint8) color;
+            case 1:
+                *p++ = (Uint8) color;
+            }
+        }
+        pixels += pitch;
+    }
+}
+
+static void
+SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
+{
+    while (h--) {
+        int n = w;
+        Uint16 *p = (Uint16 *) pixels;
+
+        if (n > 1) {
+            if ((uintptr_t) p & 2) {
+                *p++ = (Uint16) color;
+                --n;
+            }
+            SDL_memset4(p, color, (n >> 1));
+        }
+        if (n & 1) {
+            p[n - 1] = (Uint16) color;
+        }
+        pixels += pitch;
+    }
+}
+
+static void
+SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
+{
+    Uint8 r = (Uint8) ((color >> 16) & 0xFF);
+    Uint8 g = (Uint8) ((color >> 8) & 0xFF);
+    Uint8 b = (Uint8) (color & 0xFF);
+
+    while (h--) {
+        int n = w;
+        Uint8 *p = pixels;
+
+        while (n--) {
+            *p++ = r;
+            *p++ = g;
+            *p++ = b;
+        }
+        pixels += pitch;
+    }
+}
+
+static void
+SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
+{
+    while (h--) {
+        SDL_memset4(pixels, color, w);
+        pixels += pitch;
+    }
+}
+
+/* 
+ * This function performs a fast fill of the given rectangle with 'color'
+ */
+int
+SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
+{
+    Uint8 *pixels;
+
+    /* This function doesn't work on surfaces < 8 bpp */
+    if (dst->format->BitsPerPixel < 8) {
+        SDL_SetError("SDL_FillRect(): Unsupported surface format");
+        return (-1);
+    }
+
+    /* If 'dstrect' == NULL, then fill the whole surface */
+    if (dstrect) {
+        /* Perform clipping */
+        if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) {
+            return (0);
+        }
+    } else {
+        dstrect = &dst->clip_rect;
+    }
+
+    /* Perform software fill */
+    if (!dst->pixels) {
+        SDL_SetError("SDL_FillRect(): You must lock the surface");
+        return (-1);
+    }
+
+    pixels =
+        (Uint8 *) dst->pixels + dstrect->y * dst->pitch +
+        dstrect->x * dst->format->BytesPerPixel;
+
+    switch (dst->format->BytesPerPixel) {
+    case 1:
+        {
+            color |= (color << 8);
+            color |= (color << 16);
+#ifdef __SSE__
+            if (SDL_HasSSE()) {
+                SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w,
+                                 dstrect->h);
+                break;
+            }
+#endif
+#ifdef __MMX__
+            if (SDL_HasMMX()) {
+                SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w,
+                                 dstrect->h);
+                break;
+            }
+#endif
+            SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h);
+            break;
+        }
+
+    case 2:
+        {
+            color |= (color << 16);
+#ifdef __SSE__
+            if (SDL_HasSSE()) {
+                SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w,
+                                 dstrect->h);
+                break;
+            }
+#endif
+#ifdef __MMX__
+            if (SDL_HasMMX()) {
+                SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w,
+                                 dstrect->h);
+                break;
+            }
+#endif
+            SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h);
+            break;
+        }
+
+    case 3:
+        /* 24-bit RGB is a slow path, at least for now. */
+        {
+            SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h);
+            break;
+        }
+
+    case 4:
+        {
+#ifdef __SSE__
+            if (SDL_HasSSE()) {
+                SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w,
+                                 dstrect->h);
+                break;
+            }
+#endif
+#ifdef __MMX__
+            if (SDL_HasMMX()) {
+                SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w,
+                                 dstrect->h);
+                break;
+            }
+#endif
+            SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h);
+            break;
+        }
+    }
+
+    SDL_UnlockSurface(dst);
+
+    /* We're done! */
+    return (0);
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
diff --git a/src/video/SDL_surface.c b/src/video/SDL_surface.c
index fa1daaf0e..e3614cd70 100644
--- a/src/video/SDL_surface.c
+++ b/src/video/SDL_surface.c
@@ -509,342 +509,6 @@ SDL_UpperBlit(SDL_Surface * src, SDL_Rect * srcrect,
     return 0;
 }
 
-#ifdef __SSE__
-/* *INDENT-OFF* */
-
-#ifdef _MSC_VER
-#define SSE_BEGIN \
-    __m128 c128; \
-	c128.m128_u32[0] = color; \
-	c128.m128_u32[1] = color; \
-	c128.m128_u32[2] = color; \
-	c128.m128_u32[3] = color;
-#else
-#define SSE_BEGIN \
-    DECLARE_ALIGNED(Uint32, cccc[4], 16); \
-    cccc[0] = color; \
-    cccc[1] = color; \
-    cccc[2] = color; \
-    cccc[3] = color; \
-    __m128 c128 = *(__m128 *)cccc;
-#endif
-
-#define SSE_WORK \
-    for (i = n / 64; i--;) { \
-        _mm_stream_ps((float *)(p+0), c128); \
-        _mm_stream_ps((float *)(p+16), c128); \
-        _mm_stream_ps((float *)(p+32), c128); \
-        _mm_stream_ps((float *)(p+48), c128); \
-        p += 64; \
-    }
-
-#define SSE_END
-
-#define DEFINE_SSE_FILLRECT(bpp, type) \
-static void \
-SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
-{ \
-    SSE_BEGIN; \
- \
-    while (h--) { \
-        int i, n = w * bpp; \
-        Uint8 *p = pixels; \
- \
-        if (n > 15) { \
-            int adjust = 16 - ((uintptr_t)p & 15); \
-            if (adjust < 16) { \
-                n -= adjust; \
-                adjust /= bpp; \
-                while(adjust--) { \
-                    *((type *)p) = (type)color; \
-                    p += bpp; \
-                } \
-            } \
-            SSE_WORK; \
-        } \
-        if (n & 63) { \
-            int remainder = (n & 63); \
-            remainder /= bpp; \
-            while(remainder--) { \
-                *((type *)p) = (type)color; \
-                p += bpp; \
-            } \
-        } \
-        pixels += pitch; \
-    } \
- \
-    SSE_END; \
-}
-
-DEFINE_SSE_FILLRECT(1, Uint8)
-DEFINE_SSE_FILLRECT(2, Uint16)
-DEFINE_SSE_FILLRECT(4, Uint32)
-
-/* *INDENT-ON* */
-#endif /* __SSE__ */
-
-#ifdef __MMX__
-/* *INDENT-OFF* */
-
-#define MMX_BEGIN \
-    __m64 c64 = _mm_set_pi32(color, color)
-
-#define MMX_WORK \
-    for (i = n / 64; i--;) { \
-        _mm_stream_pi((__m64 *)(p+0), c64); \
-        _mm_stream_pi((__m64 *)(p+8), c64); \
-        _mm_stream_pi((__m64 *)(p+16), c64); \
-        _mm_stream_pi((__m64 *)(p+24), c64); \
-        _mm_stream_pi((__m64 *)(p+32), c64); \
-        _mm_stream_pi((__m64 *)(p+40), c64); \
-        _mm_stream_pi((__m64 *)(p+48), c64); \
-        _mm_stream_pi((__m64 *)(p+56), c64); \
-        p += 64; \
-    }
-
-#define MMX_END \
-    _mm_empty()
-
-#define DEFINE_MMX_FILLRECT(bpp, type) \
-static void \
-SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
-{ \
-    MMX_BEGIN; \
- \
-    while (h--) { \
-        int i, n = w * bpp; \
-        Uint8 *p = pixels; \
- \
-        if (n > 7) { \
-            int adjust = 8 - ((uintptr_t)p & 7); \
-            if (adjust < 8) { \
-                n -= adjust; \
-                adjust /= bpp; \
-                while(adjust--) { \
-                    *((type *)p) = (type)color; \
-                    p += bpp; \
-                } \
-            } \
-            MMX_WORK; \
-        } \
-        if (n & 63) { \
-            int remainder = (n & 63); \
-            remainder /= bpp; \
-            while(remainder--) { \
-                *((type *)p) = (type)color; \
-                p += bpp; \
-            } \
-        } \
-        pixels += pitch; \
-    } \
- \
-    MMX_END; \
-}
-
-DEFINE_MMX_FILLRECT(1, Uint8)
-DEFINE_MMX_FILLRECT(2, Uint16)
-DEFINE_MMX_FILLRECT(4, Uint32)
-
-/* *INDENT-ON* */
-#endif /* __MMX__ */
-
-static void
-SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
-{
-    while (h--) {
-        int n = w;
-        Uint8 *p = pixels;
-
-        if (n > 3) {
-            switch ((uintptr_t) p & 3) {
-            case 1:
-                *p++ = (Uint8) color;
-                --n;
-            case 2:
-                *p++ = (Uint8) color;
-                --n;
-            case 3:
-                *p++ = (Uint8) color;
-                --n;
-            }
-            SDL_memset4(p, color, (n >> 2));
-        }
-        if (n & 3) {
-            p += (n & ~3);
-            switch (n & 3) {
-            case 3:
-                *p++ = (Uint8) color;
-            case 2:
-                *p++ = (Uint8) color;
-            case 1:
-                *p++ = (Uint8) color;
-            }
-        }
-        pixels += pitch;
-    }
-}
-
-static void
-SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
-{
-    while (h--) {
-        int n = w;
-        Uint16 *p = (Uint16 *) pixels;
-
-        if (n > 1) {
-            if ((uintptr_t) p & 2) {
-                *p++ = (Uint16) color;
-                --n;
-            }
-            SDL_memset4(p, color, (n >> 1));
-        }
-        if (n & 1) {
-            p[n - 1] = (Uint16) color;
-        }
-        pixels += pitch;
-    }
-}
-
-static void
-SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
-{
-    Uint8 r = (Uint8) (color & 0xFF);
-    Uint8 g = (Uint8) ((color >> 8) & 0xFF);
-    Uint8 b = (Uint8) ((color >> 16) & 0xFF);
-
-    while (h--) {
-        int n = w;
-        Uint8 *p = pixels;
-
-        while (n--) {
-            *p++ = r;
-            *p++ = g;
-            *p++ = b;
-        }
-        pixels += pitch;
-    }
-}
-
-static void
-SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
-{
-    while (h--) {
-        SDL_memset4(pixels, color, w);
-        pixels += pitch;
-    }
-}
-
-/* 
- * This function performs a fast fill of the given rectangle with 'color'
- */
-int
-SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
-{
-    Uint8 *pixels;
-
-    /* This function doesn't work on surfaces < 8 bpp */
-    if (dst->format->BitsPerPixel < 8) {
-        SDL_SetError("Fill rect on unsupported surface format");
-        return (-1);
-    }
-
-    /* If 'dstrect' == NULL, then fill the whole surface */
-    if (dstrect) {
-        /* Perform clipping */
-        if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) {
-            return (0);
-        }
-    } else {
-        dstrect = &dst->clip_rect;
-    }
-
-    /* Perform software fill */
-    if (SDL_LockSurface(dst) != 0) {
-        return (-1);
-    }
-
-    pixels =
-        (Uint8 *) dst->pixels + dstrect->y * dst->pitch +
-        dstrect->x * dst->format->BytesPerPixel;
-
-    switch (dst->format->BytesPerPixel) {
-    case 1:
-        {
-            color |= (color << 8);
-            color |= (color << 16);
-#ifdef __SSE__
-            if (SDL_HasSSE()) {
-                SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w,
-                                 dstrect->h);
-                break;
-            }
-#endif
-#ifdef __MMX__
-            if (SDL_HasMMX()) {
-                SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w,
-                                 dstrect->h);
-                break;
-            }
-#endif
-            SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h);
-            break;
-        }
-
-    case 2:
-        {
-            color |= (color << 16);
-#ifdef __SSE__
-            if (SDL_HasSSE()) {
-                SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w,
-                                 dstrect->h);
-                break;
-            }
-#endif
-#ifdef __MMX__
-            if (SDL_HasMMX()) {
-                SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w,
-                                 dstrect->h);
-                break;
-            }
-#endif
-            SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h);
-            break;
-        }
-
-    case 3:
-        /* 24-bit RGB is a slow path, at least for now. */
-        {
-            SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h);
-            break;
-        }
-
-    case 4:
-        {
-#ifdef __SSE__
-            if (SDL_HasSSE()) {
-                SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w,
-                                 dstrect->h);
-                break;
-            }
-#endif
-#ifdef __MMX__
-            if (SDL_HasMMX()) {
-                SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w,
-                                 dstrect->h);
-                break;
-            }
-#endif
-            SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h);
-            break;
-        }
-    }
-
-    SDL_UnlockSurface(dst);
-
-    /* We're done! */
-    return (0);
-}
-
 /*
  * Lock a surface to directly access the pixels
  */