From 15ecc894fe904e37b0e419c0bcbd85089c8113eb Mon Sep 17 00:00:00 2001
From: Sam Lantinga <slouken@libsdl.org>
Date: Fri, 11 Feb 2011 14:51:04 -0800
Subject: [PATCH] Updated CPU detection code for SSE3 and SSE4 and removed
 obsolete 3DNow! and Altivec support.

---
 configure.in                | 103 ----
 include/SDL_config.h.in     |   2 -
 include/SDL_config_macosx.h |   3 -
 include/SDL_cpuinfo.h       |  25 +-
 src/cpuinfo/SDL_cpuinfo.c   | 157 ++----
 src/video/SDL_blit.c        |  34 --
 src/video/SDL_blit.h        |   6 -
 src/video/SDL_blit_A.c      | 926 +-----------------------------------
 src/video/SDL_blit_N.c      | 869 ---------------------------------
 test/testplatform.c         |   7 +-
 10 files changed, 57 insertions(+), 2075 deletions(-)

diff --git a/configure.in b/configure.in
index 454c3db8a..3fa9a8178 100644
--- a/configure.in
+++ b/configure.in
@@ -501,33 +501,6 @@ AC_HELP_STRING([--enable-mmx], [use MMX assembly routines [[default=yes]]]),
         fi
     fi
 
-    AC_ARG_ENABLE(3dnow,
-AC_HELP_STRING([--enable-3dnow], [use MMX assembly routines [[default=yes]]]),
-                  , enable_3dnow=yes)
-    if test x$enable_3dnow = xyes; then
-        save_CFLAGS="$CFLAGS"
-        have_gcc_3dnow=no
-        AC_MSG_CHECKING(for GCC -m3dnow option)
-        amd3dnow_CFLAGS="-m3dnow"
-        CFLAGS="$save_CFLAGS $amd3dnow_CFLAGS"
-
-        AC_TRY_COMPILE([
-        #include <mm3dnow.h>
-        #ifndef __3dNOW__
-        #error Assembler CPP flag not enabled
-        #endif
-        ],[
-        ],[
-        have_gcc_3dnow=yes
-        ])
-        AC_MSG_RESULT($have_gcc_3dnow)
-        CFLAGS="$save_CFLAGS"
-
-        if test x$have_gcc_3dnow = xyes; then
-            EXTRA_CFLAGS="$EXTRA_CFLAGS $amd3dnow_CFLAGS"
-        fi
-    fi
-
     AC_ARG_ENABLE(sse,
 AC_HELP_STRING([--enable-sse], [use SSE assembly routines [[default=yes]]]),
                   , enable_sse=yes)
@@ -599,82 +572,6 @@ AC_HELP_STRING([--enable-sse2], [use SSE2 assembly routines [[default=no]]]),
             EXTRA_CFLAGS="$EXTRA_CFLAGS $sse2_CFLAGS"
         fi
     fi
-
-    AC_ARG_ENABLE(altivec,
-AC_HELP_STRING([--enable-altivec], [use Altivec assembly routines [[default=yes]]]),
-                  , enable_altivec=yes)
-    if test x$enable_altivec = xyes; then
-        save_CFLAGS="$CFLAGS"
-        have_gcc_altivec=no
-        have_altivec_h_hdr=no
-        altivec_CFLAGS="-maltivec"
-        CFLAGS="$save_CFLAGS $altivec_CFLAGS"
-
-        AC_MSG_CHECKING(for Altivec with GCC altivec.h and -maltivec option)
-        AC_TRY_COMPILE([
-        #include <altivec.h>
-        vector unsigned int vzero() {
-            return vec_splat_u32(0);
-        }
-        ],[
-        ],[
-        have_gcc_altivec=yes
-        have_altivec_h_hdr=yes
-        ])
-        AC_MSG_RESULT($have_gcc_altivec)
-
-        if test x$have_gcc_altivec = xno; then
-            AC_MSG_CHECKING(for Altivec with GCC -maltivec option)
-            AC_TRY_COMPILE([
-            vector unsigned int vzero() {
-                return vec_splat_u32(0);
-            }
-            ],[
-            ],[
-            have_gcc_altivec=yes
-            ])
-            AC_MSG_RESULT($have_gcc_altivec)
-        fi
-
-        if test x$have_gcc_altivec = xno; then
-            AC_MSG_CHECKING(for Altivec with GCC altivec.h and -faltivec option)
-            altivec_CFLAGS="-faltivec"
-            CFLAGS="$save_CFLAGS $altivec_CFLAGS"
-            AC_TRY_COMPILE([
-            #include <altivec.h>
-            vector unsigned int vzero() {
-                return vec_splat_u32(0);
-            }
-            ],[
-            ],[
-            have_gcc_altivec=yes
-            have_altivec_h_hdr=yes
-            ])
-            AC_MSG_RESULT($have_gcc_altivec)
-        fi
-
-        if test x$have_gcc_altivec = xno; then
-            AC_MSG_CHECKING(for Altivec with GCC -faltivec option)
-            AC_TRY_COMPILE([
-            vector unsigned int vzero() {
-                return vec_splat_u32(0);
-            }
-            ],[
-            ],[
-            have_gcc_altivec=yes
-            ])
-            AC_MSG_RESULT($have_gcc_altivec)
-        fi
-        CFLAGS="$save_CFLAGS"
-
-        if test x$have_gcc_altivec = xyes; then
-            AC_DEFINE(SDL_ALTIVEC_BLITTERS)
-            if test x$have_altivec_h_hdr = xyes; then
-              AC_DEFINE(HAVE_ALTIVEC_H)
-            fi
-            EXTRA_CFLAGS="$EXTRA_CFLAGS $altivec_CFLAGS"
-        fi
-    fi
 fi
 
 dnl See if the OSS audio interface is supported
diff --git a/include/SDL_config.h.in b/include/SDL_config.h.in
index a9526f694..b4c4cd834 100644
--- a/include/SDL_config.h.in
+++ b/include/SDL_config.h.in
@@ -82,7 +82,6 @@
 #undef HAVE_MATH_H
 #undef HAVE_ICONV_H
 #undef HAVE_SIGNAL_H
-#undef HAVE_ALTIVEC_H
 
 /* C library functions */
 #undef HAVE_MALLOC
@@ -303,6 +302,5 @@
 
 /* Enable assembly routines */
 #undef SDL_ASSEMBLY_ROUTINES
-#undef SDL_ALTIVEC_BLITTERS
 
 #endif /* _SDL_config_h */
diff --git a/include/SDL_config_macosx.h b/include/SDL_config_macosx.h
index e01fcf985..38b106852 100644
--- a/include/SDL_config_macosx.h
+++ b/include/SDL_config_macosx.h
@@ -168,8 +168,5 @@
 
 /* Enable assembly routines */
 #define SDL_ASSEMBLY_ROUTINES	1
-#ifdef __ppc__
-#define SDL_ALTIVEC_BLITTERS	1
-#endif
 
 #endif /* _SDL_config_macosx_h */
diff --git a/include/SDL_cpuinfo.h b/include/SDL_cpuinfo.h
index b15393834..0ab895738 100644
--- a/include/SDL_cpuinfo.h
+++ b/include/SDL_cpuinfo.h
@@ -69,21 +69,6 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasRDTSC(void);
  */
 extern DECLSPEC SDL_bool SDLCALL SDL_HasMMX(void);
 
-/**
- *  This function returns true if the CPU has MMX Ext.\ features.
- */
-extern DECLSPEC SDL_bool SDLCALL SDL_HasMMXExt(void);
-
-/**
- *  This function returns true if the CPU has 3DNow!\ features.
- */
-extern DECLSPEC SDL_bool SDLCALL SDL_Has3DNow(void);
-
-/**
- *  This function returns true if the CPU has 3DNow!\ Ext.\ features.
- */
-extern DECLSPEC SDL_bool SDLCALL SDL_Has3DNowExt(void);
-
 /**
  *  This function returns true if the CPU has SSE features.
  */
@@ -95,9 +80,15 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE(void);
 extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE2(void);
 
 /**
- *  This function returns true if the CPU has AltiVec features.
+ *  This function returns true if the CPU has SSE3 features.
+ */
+extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE3(void);
+
+/**
+ *  This function returns true if the CPU has SSE4 features.
  */
-extern DECLSPEC SDL_bool SDLCALL SDL_HasAltiVec(void);
+extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE4(void);
+
 
 /* Ends C function definitions when using C++ */
 #ifdef __cplusplus
diff --git a/src/cpuinfo/SDL_cpuinfo.c b/src/cpuinfo/SDL_cpuinfo.c
index e365ad76f..7e73b8308 100644
--- a/src/cpuinfo/SDL_cpuinfo.c
+++ b/src/cpuinfo/SDL_cpuinfo.c
@@ -32,36 +32,17 @@
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #endif
-#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
-#include <sys/sysctl.h>         /* For AltiVec check */
-#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
-#include <signal.h>
-#include <setjmp.h>
-#endif
 #ifdef __WIN32__
 #include "../core/windows/SDL_windows.h"
 #endif
 
 #define CPU_HAS_RDTSC   0x00000001
 #define CPU_HAS_MMX     0x00000002
-#define CPU_HAS_MMXEXT  0x00000004
-#define CPU_HAS_3DNOW   0x00000010
-#define CPU_HAS_3DNOWEXT 0x00000020
-#define CPU_HAS_SSE     0x00000040
-#define CPU_HAS_SSE2    0x00000080
-#define CPU_HAS_ALTIVEC 0x00000100
-
-#if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__
-/* This is the brute force way of detecting instruction sets...
-   the idea is borrowed from the libmpeg2 library - thanks!
- */
-static jmp_buf jmpbuf;
-static void
-illegal_instruction(int sig)
-{
-    longjmp(jmpbuf, 1);
-}
-#endif /* HAVE_SETJMP */
+#define CPU_HAS_SSE     0x00000010
+#define CPU_HAS_SSE2    0x00000020
+#define CPU_HAS_SSE3    0x00000040
+#define CPU_HAS_SSE4    0x00000080
+
 
 static __inline__ int
 CPU_haveCPUID(void)
@@ -201,20 +182,6 @@ CPU_getCPUIDFeatures(void)
     return features;
 }
 
-static __inline__ int
-CPU_getCPUIDFeaturesExt(void)
-{
-    int features = 0;
-    int a, b, c, d;
-
-    cpuid(0x80000000, a, b, c, d);
-    if (a >= 0x80000001) {
-        cpuid(0x80000001, a, b, c, d);
-        features = d;
-    }
-    return features;
-}
-
 static __inline__ int
 CPU_haveRDTSC(void)
 {
@@ -234,71 +201,51 @@ CPU_haveMMX(void)
 }
 
 static __inline__ int
-CPU_haveMMXExt(void)
+CPU_haveSSE(void)
 {
     if (CPU_haveCPUID()) {
-        return (CPU_getCPUIDFeaturesExt() & 0x00400000);
+        return (CPU_getCPUIDFeatures() & 0x02000000);
     }
     return 0;
 }
 
 static __inline__ int
-CPU_have3DNow(void)
+CPU_haveSSE2(void)
 {
     if (CPU_haveCPUID()) {
-        return (CPU_getCPUIDFeaturesExt() & 0x80000000);
+        return (CPU_getCPUIDFeatures() & 0x04000000);
     }
     return 0;
 }
 
 static __inline__ int
-CPU_have3DNowExt(void)
+CPU_haveSSE3(void)
 {
     if (CPU_haveCPUID()) {
-        return (CPU_getCPUIDFeaturesExt() & 0x40000000);
-    }
-    return 0;
-}
+        int a, b, c, d;
 
-static __inline__ int
-CPU_haveSSE(void)
-{
-    if (CPU_haveCPUID()) {
-        return (CPU_getCPUIDFeatures() & 0x02000000);
+        cpuid(0, a, b, c, d);
+        if (a >= 1) {
+            cpuid(1, a, b, c, d);
+            return (c & 0x00000001);
+        }
     }
     return 0;
 }
 
 static __inline__ int
-CPU_haveSSE2(void)
+CPU_haveSSE4(void)
 {
     if (CPU_haveCPUID()) {
-        return (CPU_getCPUIDFeatures() & 0x04000000);
-    }
-    return 0;
-}
+        int a, b, c, d;
 
-static __inline__ int
-CPU_haveAltiVec(void)
-{
-    volatile int altivec = 0;
-#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
-    int selectors[2] = { CTL_HW, HW_VECTORUNIT };
-    int hasVectorUnit = 0;
-    size_t length = sizeof(hasVectorUnit);
-    int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
-    if (0 == error)
-        altivec = (hasVectorUnit != 0);
-#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
-    void (*handler) (int sig);
-    handler = signal(SIGILL, illegal_instruction);
-    if (setjmp(jmpbuf) == 0) {
-        asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
-        altivec = 1;
+        cpuid(0, a, b, c, d);
+        if (a >= 1) {
+            cpuid(1, a, b, c, d);
+            return (c & 0x00000100);
+        }
     }
-    signal(SIGILL, handler);
-#endif
-    return altivec;
+    return 0;
 }
 
 static int SDL_CPUCount = 0;
@@ -471,23 +418,17 @@ SDL_GetCPUFeatures(void)
         if (CPU_haveMMX()) {
             SDL_CPUFeatures |= CPU_HAS_MMX;
         }
-        if (CPU_haveMMXExt()) {
-            SDL_CPUFeatures |= CPU_HAS_MMXEXT;
-        }
-        if (CPU_have3DNow()) {
-            SDL_CPUFeatures |= CPU_HAS_3DNOW;
-        }
-        if (CPU_have3DNowExt()) {
-            SDL_CPUFeatures |= CPU_HAS_3DNOWEXT;
-        }
         if (CPU_haveSSE()) {
             SDL_CPUFeatures |= CPU_HAS_SSE;
         }
         if (CPU_haveSSE2()) {
             SDL_CPUFeatures |= CPU_HAS_SSE2;
         }
-        if (CPU_haveAltiVec()) {
-            SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
+        if (CPU_haveSSE3()) {
+            SDL_CPUFeatures |= CPU_HAS_SSE3;
+        }
+        if (CPU_haveSSE4()) {
+            SDL_CPUFeatures |= CPU_HAS_SSE4;
         }
     }
     return SDL_CPUFeatures;
@@ -512,54 +453,36 @@ SDL_HasMMX(void)
 }
 
 SDL_bool
-SDL_HasMMXExt(void)
-{
-    if (SDL_GetCPUFeatures() & CPU_HAS_MMXEXT) {
-        return SDL_TRUE;
-    }
-    return SDL_FALSE;
-}
-
-SDL_bool
-SDL_Has3DNow(void)
-{
-    if (SDL_GetCPUFeatures() & CPU_HAS_3DNOW) {
-        return SDL_TRUE;
-    }
-    return SDL_FALSE;
-}
-
-SDL_bool
-SDL_Has3DNowExt(void)
+SDL_HasSSE(void)
 {
-    if (SDL_GetCPUFeatures() & CPU_HAS_3DNOWEXT) {
+    if (SDL_GetCPUFeatures() & CPU_HAS_SSE) {
         return SDL_TRUE;
     }
     return SDL_FALSE;
 }
 
 SDL_bool
-SDL_HasSSE(void)
+SDL_HasSSE2(void)
 {
-    if (SDL_GetCPUFeatures() & CPU_HAS_SSE) {
+    if (SDL_GetCPUFeatures() & CPU_HAS_SSE2) {
         return SDL_TRUE;
     }
     return SDL_FALSE;
 }
 
 SDL_bool
-SDL_HasSSE2(void)
+SDL_HasSSE3(void)
 {
-    if (SDL_GetCPUFeatures() & CPU_HAS_SSE2) {
+    if (SDL_GetCPUFeatures() & CPU_HAS_SSE3) {
         return SDL_TRUE;
     }
     return SDL_FALSE;
 }
 
 SDL_bool
-SDL_HasAltiVec(void)
+SDL_HasSSE4(void)
 {
-    if (SDL_GetCPUFeatures() & CPU_HAS_ALTIVEC) {
+    if (SDL_GetCPUFeatures() & CPU_HAS_SSE4) {
         return SDL_TRUE;
     }
     return SDL_FALSE;
@@ -578,12 +501,10 @@ main()
     printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
     printf("RDTSC: %d\n", SDL_HasRDTSC());
     printf("MMX: %d\n", SDL_HasMMX());
-    printf("MMXExt: %d\n", SDL_HasMMXExt());
-    printf("3DNow: %d\n", SDL_Has3DNow());
-    printf("3DNowExt: %d\n", SDL_Has3DNowExt());
     printf("SSE: %d\n", SDL_HasSSE());
     printf("SSE2: %d\n", SDL_HasSSE2());
-    printf("AltiVec: %d\n", SDL_HasAltiVec());
+    printf("SSE3: %d\n", SDL_HasSSE3());
+    printf("SSE4: %d\n", SDL_HasSSE4());
     return 0;
 }
 
diff --git a/src/video/SDL_blit.c b/src/video/SDL_blit.c
index 01b5b7938..171586369 100644
--- a/src/video/SDL_blit.c
+++ b/src/video/SDL_blit.c
@@ -100,30 +100,6 @@ SDL_SoftBlit(SDL_Surface * src, SDL_Rect * srcrect,
     return (okay ? 0 : -1);
 }
 
-#ifdef __MACOSX__
-#include <sys/sysctl.h>
-
-static SDL_bool
-SDL_UseAltivecPrefetch()
-{
-    const char key[] = "hw.l3cachesize";
-    u_int64_t result = 0;
-    size_t typeSize = sizeof(result);
-
-    if (sysctlbyname(key, &result, &typeSize, NULL, 0) == 0 && result > 0) {
-        return SDL_TRUE;
-    } else {
-        return SDL_FALSE;
-    }
-}
-#else
-static SDL_bool
-SDL_UseAltivecPrefetch()
-{
-    /* Just guess G4 */
-    return SDL_TRUE;
-}
-#endif /* __MACOSX__ */
 
 static SDL_BlitFunc
 SDL_ChooseBlitFunc(Uint32 src_format, Uint32 dst_format, int flags,
@@ -145,22 +121,12 @@ SDL_ChooseBlitFunc(Uint32 src_format, Uint32 dst_format, int flags,
             if (SDL_HasMMX()) {
                 features |= SDL_CPU_MMX;
             }
-            if (SDL_Has3DNow()) {
-                features |= SDL_CPU_3DNOW;
-            }
             if (SDL_HasSSE()) {
                 features |= SDL_CPU_SSE;
             }
             if (SDL_HasSSE2()) {
                 features |= SDL_CPU_SSE2;
             }
-            if (SDL_HasAltiVec()) {
-                if (SDL_UseAltivecPrefetch()) {
-                    features |= SDL_CPU_ALTIVEC_PREFETCH;
-                } else {
-                    features |= SDL_CPU_ALTIVEC_NOPREFETCH;
-                }
-            }
         }
     }
 
diff --git a/src/video/SDL_blit.h b/src/video/SDL_blit.h
index b831c368e..c29bb98b3 100644
--- a/src/video/SDL_blit.h
+++ b/src/video/SDL_blit.h
@@ -34,9 +34,6 @@
 #ifdef __MMX__
 #include <mmintrin.h>
 #endif
-#ifdef __3dNOW__
-#include <mm3dnow.h>
-#endif
 #ifdef __SSE__
 #include <xmmintrin.h>
 #endif
@@ -65,11 +62,8 @@
 /* SDL blit CPU flags */
 #define SDL_CPU_ANY                 0x00000000
 #define SDL_CPU_MMX                 0x00000001
-#define SDL_CPU_3DNOW               0x00000002
 #define SDL_CPU_SSE                 0x00000004
 #define SDL_CPU_SSE2                0x00000008
-#define SDL_CPU_ALTIVEC_PREFETCH    0x00000010
-#define SDL_CPU_ALTIVEC_NOPREFETCH  0x00000020
 
 typedef struct
 {
diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c
index 50f5545b8..959013f92 100644
--- a/src/video/SDL_blit_A.c
+++ b/src/video/SDL_blit_A.c
@@ -419,806 +419,6 @@ BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info)
 
 #endif /* __MMX__ */
 
-#if SDL_ALTIVEC_BLITTERS
-#if __MWERKS__
-#pragma altivec_model on
-#endif
-#if HAVE_ALTIVEC_H
-#include <altivec.h>
-#endif
-#include <assert.h>
-
-#if (defined(__MACOSX__) && (__GNUC__ < 4))
-#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
-        (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
-#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
-        (vector unsigned short) ( a,b,c,d,e,f,g,h )
-#else
-#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
-        (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
-#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
-        (vector unsigned short) { a,b,c,d,e,f,g,h }
-#endif
-
-#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
-#define VECPRINT(msg, v) do { \
-    vector unsigned int tmpvec = (vector unsigned int)(v); \
-    unsigned int *vp = (unsigned int *)&tmpvec; \
-    printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \
-} while (0)
-
-/* the permuation vector that takes the high bytes out of all the appropriate shorts 
-    (vector unsigned char)(
-        0x00, 0x10, 0x02, 0x12,
-        0x04, 0x14, 0x06, 0x16,
-        0x08, 0x18, 0x0A, 0x1A,
-        0x0C, 0x1C, 0x0E, 0x1E );
-*/
-#define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F)))
-#define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12)))
-#define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24()))
-#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
-    ? vec_lvsl(0, src) \
-    : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
-
-
-#define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \
-    /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \
-    vector unsigned short vtemp1 = vec_mule(vs, valpha); \
-    /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \
-    vector unsigned short vtemp2 = vec_mulo(vs, valpha); \
-    /* valpha2 is 255-alpha */ \
-    vector unsigned char valpha2 = vec_nor(valpha, valpha); \
-    /* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \
-    vector unsigned short vtemp3 = vec_mule(vd, valpha2); \
-    /* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \
-    vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \
-    /* add source and dest */ \
-    vtemp1 = vec_add(vtemp1, vtemp3); \
-    vtemp2 = vec_add(vtemp2, vtemp4); \
-    /* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \
-    vtemp1 = vec_add(vtemp1, v1_16); \
-    vtemp3 = vec_sr(vtemp1, v8_16); \
-    vtemp1 = vec_add(vtemp1, vtemp3); \
-    /* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \
-    vtemp2 = vec_add(vtemp2, v1_16); \
-    vtemp4 = vec_sr(vtemp2, v8_16); \
-    vtemp2 = vec_add(vtemp2, vtemp4); \
-    /* (>>8) and get ARGBARGBARGBARGB */ \
-    vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \
-} while (0)
-
-/* Calculate the permute vector used for 32->32 swizzling */
-static vector unsigned char
-calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
-{
-    /*
-     * We have to assume that the bits that aren't used by other
-     *  colors is alpha, and it's one complete byte, since some formats
-     *  leave alpha with a zero mask, but we should still swizzle the bits.
-     */
-    /* ARGB */
-    const static struct SDL_PixelFormat default_pixel_format = {
-        NULL, 0, 0,
-        0, 0, 0, 0,
-        16, 8, 0, 24,
-        0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000
-    };
-    if (!srcfmt) {
-        srcfmt = &default_pixel_format;
-    }
-    if (!dstfmt) {
-        dstfmt = &default_pixel_format;
-    }
-    const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
-                                                       0x04, 0x04, 0x04, 0x04,
-                                                       0x08, 0x08, 0x08, 0x08,
-                                                       0x0C, 0x0C, 0x0C,
-                                                       0x0C);
-    vector unsigned char vswiz;
-    vector unsigned int srcvec;
-#define RESHIFT(X) (3 - ((X) >> 3))
-    Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
-    Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
-    Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
-    Uint32 amask;
-    /* Use zero for alpha if either surface doesn't have alpha */
-    if (dstfmt->Amask) {
-        amask =
-            ((srcfmt->Amask) ? RESHIFT(srcfmt->
-                                       Ashift) : 0x10) << (dstfmt->Ashift);
-    } else {
-        amask =
-            0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
-                          0xFFFFFFFF);
-    }
-#undef RESHIFT
-    ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
-    vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
-    return (vswiz);
-}
-
-static void
-Blit32to565PixelAlphaAltivec(SDL_BlitInfo * info)
-{
-    int height = info->dst_h;
-    Uint8 *src = (Uint8 *) info->src;
-    int srcskip = info->src_skip;
-    Uint8 *dst = (Uint8 *) info->dst;
-    int dstskip = info->dst_skip;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-
-    vector unsigned char v0 = vec_splat_u8(0);
-    vector unsigned short v8_16 = vec_splat_u16(8);
-    vector unsigned short v1_16 = vec_splat_u16(1);
-    vector unsigned short v2_16 = vec_splat_u16(2);
-    vector unsigned short v3_16 = vec_splat_u16(3);
-    vector unsigned int v8_32 = vec_splat_u32(8);
-    vector unsigned int v16_32 = vec_add(v8_32, v8_32);
-    vector unsigned short v3f =
-        VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
-                          0x003f, 0x003f, 0x003f, 0x003f);
-    vector unsigned short vfc =
-        VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
-                          0x00fc, 0x00fc, 0x00fc, 0x00fc);
-
-    /* 
-       0x10 - 0x1f is the alpha
-       0x00 - 0x0e evens are the red
-       0x01 - 0x0f odds are zero
-     */
-    vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
-                                                       0x10, 0x02, 0x01, 0x01,
-                                                       0x10, 0x04, 0x01, 0x01,
-                                                       0x10, 0x06, 0x01,
-                                                       0x01);
-    vector unsigned char vredalpha2 =
-        (vector unsigned char) (vec_add((vector unsigned int) vredalpha1,
-                                        vec_sl(v8_32, v16_32))
-        );
-    /*
-       0x00 - 0x0f is ARxx ARxx ARxx ARxx
-       0x11 - 0x0f odds are blue
-     */
-    vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
-                                                   0x04, 0x05, 0x06, 0x13,
-                                                   0x08, 0x09, 0x0a, 0x15,
-                                                   0x0c, 0x0d, 0x0e, 0x17);
-    vector unsigned char vblue2 =
-        (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8_32)
-        );
-    /*
-       0x00 - 0x0f is ARxB ARxB ARxB ARxB
-       0x10 - 0x0e evens are green
-     */
-    vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
-                                                    0x04, 0x05, 0x12, 0x07,
-                                                    0x08, 0x09, 0x14, 0x0b,
-                                                    0x0c, 0x0d, 0x16, 0x0f);
-    vector unsigned char vgreen2 =
-        (vector unsigned
-         char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8_32, v8_32))
-        );
-    vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
-                                                    0x00, 0x0a, 0x00, 0x0e,
-                                                    0x00, 0x12, 0x00, 0x16,
-                                                    0x00, 0x1a, 0x00, 0x1e);
-    vector unsigned char mergePermute = VEC_MERGE_PERMUTE();
-    vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
-    vector unsigned char valphaPermute =
-        vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
-
-    vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
-    vf800 = vec_sl(vf800, vec_splat_u16(8));
-
-    while (height--) {
-        int extrawidth;
-        vector unsigned char valigner;
-        vector unsigned char vsrc;
-        vector unsigned char voverflow;
-        int width = info->dst_w;
-
-#define ONE_PIXEL_BLEND(condition, widthvar) \
-        while (condition) { \
-            Uint32 Pixel; \
-            unsigned sR, sG, sB, dR, dG, dB, sA; \
-            DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \
-            if(sA) { \
-                unsigned short dstpixel = *((unsigned short *)dst); \
-                dR = (dstpixel >> 8) & 0xf8; \
-                dG = (dstpixel >> 3) & 0xfc; \
-                dB = (dstpixel << 3) & 0xf8; \
-                ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
-                *((unsigned short *)dst) = ( \
-                    ((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \
-                ); \
-            } \
-            src += 4; \
-            dst += 2; \
-            widthvar--; \
-        }
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width);
-        extrawidth = (width % 8);
-        valigner = VEC_ALIGNER(src);
-        vsrc = (vector unsigned char) vec_ld(0, src);
-        width -= extrawidth;
-        while (width) {
-            vector unsigned char valpha;
-            vector unsigned char vsrc1, vsrc2;
-            vector unsigned char vdst1, vdst2;
-            vector unsigned short vR, vG, vB;
-            vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
-
-            /* Load 8 pixels from src as ARGB */
-            voverflow = (vector unsigned char) vec_ld(15, src);
-            vsrc = vec_perm(vsrc, voverflow, valigner);
-            vsrc1 = vec_perm(vsrc, vsrc, vpermute);
-            src += 16;
-            vsrc = (vector unsigned char) vec_ld(15, src);
-            voverflow = vec_perm(voverflow, vsrc, valigner);
-            vsrc2 = vec_perm(voverflow, voverflow, vpermute);
-            src += 16;
-
-            /* Load 8 pixels from dst as XRGB */
-            voverflow = vec_ld(0, dst);
-            vR = vec_and((vector unsigned short) voverflow, vf800);
-            vB = vec_sl((vector unsigned short) voverflow, v3_16);
-            vG = vec_sl(vB, v2_16);
-            vdst1 =
-                (vector unsigned char) vec_perm((vector unsigned char) vR,
-                                                (vector unsigned char) vR,
-                                                vredalpha1);
-            vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
-            vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
-            vdst2 =
-                (vector unsigned char) vec_perm((vector unsigned char) vR,
-                                                (vector unsigned char) vR,
-                                                vredalpha2);
-            vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
-            vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
-
-            /* Alpha blend 8 pixels as ARGB */
-            valpha = vec_perm(vsrc1, v0, valphaPermute);
-            VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16,
-                               v8_16);
-            valpha = vec_perm(vsrc2, v0, valphaPermute);
-            VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16,
-                               v8_16);
-
-            /* Convert 8 pixels to 565 */
-            vpixel = (vector unsigned short) vec_packpx((vector unsigned int)
-                                                        vdst1,
-                                                        (vector unsigned int)
-                                                        vdst2);
-            vgpixel = (vector unsigned short) vec_perm(vdst1, vdst2, vgmerge);
-            vgpixel = vec_and(vgpixel, vfc);
-            vgpixel = vec_sl(vgpixel, v3_16);
-            vrpixel = vec_sl(vpixel, v1_16);
-            vrpixel = vec_and(vrpixel, vf800);
-            vbpixel = vec_and(vpixel, v3f);
-            vdst1 =
-                vec_or((vector unsigned char) vrpixel,
-                       (vector unsigned char) vgpixel);
-            vdst1 = vec_or(vdst1, (vector unsigned char) vbpixel);
-
-            /* Store 8 pixels */
-            vec_st(vdst1, 0, dst);
-
-            width -= 8;
-            dst += 16;
-        }
-        ONE_PIXEL_BLEND((extrawidth), extrawidth);
-#undef ONE_PIXEL_BLEND
-        src += srcskip;
-        dst += dstskip;
-    }
-}
-
-static void
-Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo * info)
-{
-    int height = info->dst_h;
-    Uint32 *srcp = (Uint32 *) info->src;
-    int srcskip = info->src_skip >> 2;
-    Uint32 *dstp = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip >> 2;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-    SDL_PixelFormat *dstfmt = info->dst_fmt;
-    unsigned sA = info->a;
-    unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
-    Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
-    Uint32 ckey = info->colorkey;
-    vector unsigned char mergePermute;
-    vector unsigned char vsrcPermute;
-    vector unsigned char vdstPermute;
-    vector unsigned char vsdstPermute;
-    vector unsigned char valpha;
-    vector unsigned char valphamask;
-    vector unsigned char vbits;
-    vector unsigned char v0;
-    vector unsigned short v1;
-    vector unsigned short v8;
-    vector unsigned int vckey;
-    vector unsigned int vrgbmask;
-
-    mergePermute = VEC_MERGE_PERMUTE();
-    v0 = vec_splat_u8(0);
-    v1 = vec_splat_u16(1);
-    v8 = vec_splat_u16(8);
-
-    /* set the alpha to 255 on the destination surf */
-    valphamask = VEC_ALPHA_MASK();
-
-    vsrcPermute = calc_swizzle32(srcfmt, NULL);
-    vdstPermute = calc_swizzle32(NULL, dstfmt);
-    vsdstPermute = calc_swizzle32(dstfmt, NULL);
-
-    /* set a vector full of alpha and 255-alpha */
-    ((unsigned char *) &valpha)[0] = sA;
-    valpha = vec_splat(valpha, 0);
-    vbits = (vector unsigned char) vec_splat_s8(-1);
-
-    ckey &= rgbmask;
-    ((unsigned int *) (char *) &vckey)[0] = ckey;
-    vckey = vec_splat(vckey, 0);
-    ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
-    vrgbmask = vec_splat(vrgbmask, 0);
-
-    while (height--) {
-        int width = info->dst_w;
-#define ONE_PIXEL_BLEND(condition, widthvar) \
-        while (condition) { \
-            Uint32 Pixel; \
-            unsigned sR, sG, sB, dR, dG, dB; \
-            RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \
-            if(sA && Pixel != ckey) { \
-                RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
-                DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
-                ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
-                ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
-            } \
-            dstp++; \
-            srcp++; \
-            widthvar--; \
-        }
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
-        if (width > 0) {
-            int extrawidth = (width % 4);
-            vector unsigned char valigner = VEC_ALIGNER(srcp);
-            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
-            width -= extrawidth;
-            while (width) {
-                vector unsigned char vsel;
-                vector unsigned char voverflow;
-                vector unsigned char vd;
-                vector unsigned char vd_orig;
-
-                /* s = *srcp */
-                voverflow = (vector unsigned char) vec_ld(15, srcp);
-                vs = vec_perm(vs, voverflow, valigner);
-
-                /* vsel is set for items that match the key */
-                vsel =
-                    (vector unsigned char) vec_and((vector unsigned int) vs,
-                                                   vrgbmask);
-                vsel = (vector unsigned char) vec_cmpeq((vector unsigned int)
-                                                        vsel, vckey);
-
-                /* permute to source format */
-                vs = vec_perm(vs, valpha, vsrcPermute);
-
-                /* d = *dstp */
-                vd = (vector unsigned char) vec_ld(0, dstp);
-                vd_orig = vd = vec_perm(vd, v0, vsdstPermute);
-
-                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
-
-                /* set the alpha channel to full on */
-                vd = vec_or(vd, valphamask);
-
-                /* mask out color key */
-                vd = vec_sel(vd, vd_orig, vsel);
-
-                /* permute to dest format */
-                vd = vec_perm(vd, vbits, vdstPermute);
-
-                /* *dstp = res */
-                vec_st((vector unsigned int) vd, 0, dstp);
-
-                srcp += 4;
-                dstp += 4;
-                width -= 4;
-                vs = voverflow;
-            }
-            ONE_PIXEL_BLEND((extrawidth), extrawidth);
-        }
-#undef ONE_PIXEL_BLEND
-
-        srcp += srcskip;
-        dstp += dstskip;
-    }
-}
-
-
-static void
-Blit32to32PixelAlphaAltivec(SDL_BlitInfo * info)
-{
-    int width = info->dst_w;
-    int height = info->dst_h;
-    Uint32 *srcp = (Uint32 *) info->src;
-    int srcskip = info->src_skip >> 2;
-    Uint32 *dstp = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip >> 2;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-    SDL_PixelFormat *dstfmt = info->dst_fmt;
-    vector unsigned char mergePermute;
-    vector unsigned char valphaPermute;
-    vector unsigned char vsrcPermute;
-    vector unsigned char vdstPermute;
-    vector unsigned char vsdstPermute;
-    vector unsigned char valphamask;
-    vector unsigned char vpixelmask;
-    vector unsigned char v0;
-    vector unsigned short v1;
-    vector unsigned short v8;
-
-    v0 = vec_splat_u8(0);
-    v1 = vec_splat_u16(1);
-    v8 = vec_splat_u16(8);
-    mergePermute = VEC_MERGE_PERMUTE();
-    valphamask = VEC_ALPHA_MASK();
-    valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
-    vpixelmask = vec_nor(valphamask, v0);
-    vsrcPermute = calc_swizzle32(srcfmt, NULL);
-    vdstPermute = calc_swizzle32(NULL, dstfmt);
-    vsdstPermute = calc_swizzle32(dstfmt, NULL);
-
-    while (height--) {
-        width = info->dst_w;
-#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
-            Uint32 Pixel; \
-            unsigned sR, sG, sB, dR, dG, dB, sA, dA; \
-            DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \
-            if(sA) { \
-              DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \
-              ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
-              ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \
-            } \
-            ++srcp; \
-            ++dstp; \
-            widthvar--; \
-        }
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
-        if (width > 0) {
-            /* vsrcPermute */
-            /* vdstPermute */
-            int extrawidth = (width % 4);
-            vector unsigned char valigner = VEC_ALIGNER(srcp);
-            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
-            width -= extrawidth;
-            while (width) {
-                vector unsigned char voverflow;
-                vector unsigned char vd;
-                vector unsigned char valpha;
-                vector unsigned char vdstalpha;
-                /* s = *srcp */
-                voverflow = (vector unsigned char) vec_ld(15, srcp);
-                vs = vec_perm(vs, voverflow, valigner);
-                vs = vec_perm(vs, v0, vsrcPermute);
-
-                valpha = vec_perm(vs, v0, valphaPermute);
-
-                /* d = *dstp */
-                vd = (vector unsigned char) vec_ld(0, dstp);
-                vd = vec_perm(vd, v0, vsdstPermute);
-                vdstalpha = vec_and(vd, valphamask);
-
-                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
-
-                /* set the alpha to the dest alpha */
-                vd = vec_and(vd, vpixelmask);
-                vd = vec_or(vd, vdstalpha);
-                vd = vec_perm(vd, v0, vdstPermute);
-
-                /* *dstp = res */
-                vec_st((vector unsigned int) vd, 0, dstp);
-
-                srcp += 4;
-                dstp += 4;
-                width -= 4;
-                vs = voverflow;
-
-            }
-            ONE_PIXEL_BLEND((extrawidth), extrawidth);
-        }
-        srcp += srcskip;
-        dstp += dstskip;
-#undef ONE_PIXEL_BLEND
-    }
-}
-
-/* fast ARGB888->(A)RGB888 blending with pixel alpha */
-static void
-BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo * info)
-{
-    int width = info->dst_w;
-    int height = info->dst_h;
-    Uint32 *srcp = (Uint32 *) info->src;
-    int srcskip = info->src_skip >> 2;
-    Uint32 *dstp = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip >> 2;
-    vector unsigned char mergePermute;
-    vector unsigned char valphaPermute;
-    vector unsigned char valphamask;
-    vector unsigned char vpixelmask;
-    vector unsigned char v0;
-    vector unsigned short v1;
-    vector unsigned short v8;
-    v0 = vec_splat_u8(0);
-    v1 = vec_splat_u16(1);
-    v8 = vec_splat_u16(8);
-    mergePermute = VEC_MERGE_PERMUTE();
-    valphamask = VEC_ALPHA_MASK();
-    valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
-
-
-    vpixelmask = vec_nor(valphamask, v0);
-    while (height--) {
-        width = info->dst_w;
-#define ONE_PIXEL_BLEND(condition, widthvar) \
-        while ((condition)) { \
-            Uint32 dalpha; \
-            Uint32 d; \
-            Uint32 s1; \
-            Uint32 d1; \
-            Uint32 s = *srcp; \
-            Uint32 alpha = s >> 24; \
-            if(alpha) { \
-              if(alpha == SDL_ALPHA_OPAQUE) { \
-                *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \
-              } else { \
-                d = *dstp; \
-                dalpha = d & 0xff000000; \
-                s1 = s & 0xff00ff; \
-                d1 = d & 0xff00ff; \
-                d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
-                s &= 0xff00; \
-                d &= 0xff00; \
-                d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
-                *dstp = d1 | d | dalpha; \
-              } \
-            } \
-            ++srcp; \
-            ++dstp; \
-            widthvar--; \
-	    }
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
-        if (width > 0) {
-            int extrawidth = (width % 4);
-            vector unsigned char valigner = VEC_ALIGNER(srcp);
-            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
-            width -= extrawidth;
-            while (width) {
-                vector unsigned char voverflow;
-                vector unsigned char vd;
-                vector unsigned char valpha;
-                vector unsigned char vdstalpha;
-                /* s = *srcp */
-                voverflow = (vector unsigned char) vec_ld(15, srcp);
-                vs = vec_perm(vs, voverflow, valigner);
-
-                valpha = vec_perm(vs, v0, valphaPermute);
-
-                /* d = *dstp */
-                vd = (vector unsigned char) vec_ld(0, dstp);
-                vdstalpha = vec_and(vd, valphamask);
-
-                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
-
-                /* set the alpha to the dest alpha */
-                vd = vec_and(vd, vpixelmask);
-                vd = vec_or(vd, vdstalpha);
-
-                /* *dstp = res */
-                vec_st((vector unsigned int) vd, 0, dstp);
-
-                srcp += 4;
-                dstp += 4;
-                width -= 4;
-                vs = voverflow;
-            }
-            ONE_PIXEL_BLEND((extrawidth), extrawidth);
-        }
-        srcp += srcskip;
-        dstp += dstskip;
-    }
-#undef ONE_PIXEL_BLEND
-}
-
-static void
-Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo * info)
-{
-    /* XXX : 6 */
-    int height = info->dst_h;
-    Uint32 *srcp = (Uint32 *) info->src;
-    int srcskip = info->src_skip >> 2;
-    Uint32 *dstp = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip >> 2;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-    SDL_PixelFormat *dstfmt = info->dst_fmt;
-    unsigned sA = info->a;
-    unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
-    vector unsigned char mergePermute;
-    vector unsigned char vsrcPermute;
-    vector unsigned char vdstPermute;
-    vector unsigned char vsdstPermute;
-    vector unsigned char valpha;
-    vector unsigned char valphamask;
-    vector unsigned char vbits;
-    vector unsigned short v1;
-    vector unsigned short v8;
-
-    mergePermute = VEC_MERGE_PERMUTE();
-    v1 = vec_splat_u16(1);
-    v8 = vec_splat_u16(8);
-
-    /* set the alpha to 255 on the destination surf */
-    valphamask = VEC_ALPHA_MASK();
-
-    vsrcPermute = calc_swizzle32(srcfmt, NULL);
-    vdstPermute = calc_swizzle32(NULL, dstfmt);
-    vsdstPermute = calc_swizzle32(dstfmt, NULL);
-
-    /* set a vector full of alpha and 255-alpha */
-    ((unsigned char *) &valpha)[0] = sA;
-    valpha = vec_splat(valpha, 0);
-    vbits = (vector unsigned char) vec_splat_s8(-1);
-
-    while (height--) {
-        int width = info->dst_w;
-#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
-            Uint32 Pixel; \
-            unsigned sR, sG, sB, dR, dG, dB; \
-            DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \
-            DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
-            ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
-            ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
-            ++srcp; \
-            ++dstp; \
-            widthvar--; \
-        }
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
-        if (width > 0) {
-            int extrawidth = (width % 4);
-            vector unsigned char valigner = VEC_ALIGNER(srcp);
-            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
-            width -= extrawidth;
-            while (width) {
-                vector unsigned char voverflow;
-                vector unsigned char vd;
-
-                /* s = *srcp */
-                voverflow = (vector unsigned char) vec_ld(15, srcp);
-                vs = vec_perm(vs, voverflow, valigner);
-                vs = vec_perm(vs, valpha, vsrcPermute);
-
-                /* d = *dstp */
-                vd = (vector unsigned char) vec_ld(0, dstp);
-                vd = vec_perm(vd, vd, vsdstPermute);
-
-                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
-
-                /* set the alpha channel to full on */
-                vd = vec_or(vd, valphamask);
-                vd = vec_perm(vd, vbits, vdstPermute);
-
-                /* *dstp = res */
-                vec_st((vector unsigned int) vd, 0, dstp);
-
-                srcp += 4;
-                dstp += 4;
-                width -= 4;
-                vs = voverflow;
-            }
-            ONE_PIXEL_BLEND((extrawidth), extrawidth);
-        }
-#undef ONE_PIXEL_BLEND
-
-        srcp += srcskip;
-        dstp += dstskip;
-    }
-
-}
-
-
-/* fast RGB888->(A)RGB888 blending */
-static void
-BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo * info)
-{
-    unsigned alpha = info->a;
-    int height = info->dst_h;
-    Uint32 *srcp = (Uint32 *) info->src;
-    int srcskip = info->src_skip >> 2;
-    Uint32 *dstp = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip >> 2;
-    vector unsigned char mergePermute;
-    vector unsigned char valpha;
-    vector unsigned char valphamask;
-    vector unsigned short v1;
-    vector unsigned short v8;
-
-    mergePermute = VEC_MERGE_PERMUTE();
-    v1 = vec_splat_u16(1);
-    v8 = vec_splat_u16(8);
-
-    /* set the alpha to 255 on the destination surf */
-    valphamask = VEC_ALPHA_MASK();
-
-    /* set a vector full of alpha and 255-alpha */
-    ((unsigned char *) &valpha)[0] = alpha;
-    valpha = vec_splat(valpha, 0);
-
-    while (height--) {
-        int width = info->dst_w;
-#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
-            Uint32 s = *srcp; \
-            Uint32 d = *dstp; \
-            Uint32 s1 = s & 0xff00ff; \
-            Uint32 d1 = d & 0xff00ff; \
-            d1 = (d1 + ((s1 - d1) * alpha >> 8)) \
-                 & 0xff00ff; \
-            s &= 0xff00; \
-            d &= 0xff00; \
-            d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
-            *dstp = d1 | d | 0xff000000; \
-            ++srcp; \
-            ++dstp; \
-            widthvar--; \
-        }
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
-        if (width > 0) {
-            int extrawidth = (width % 4);
-            vector unsigned char valigner = VEC_ALIGNER(srcp);
-            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
-            width -= extrawidth;
-            while (width) {
-                vector unsigned char voverflow;
-                vector unsigned char vd;
-
-                /* s = *srcp */
-                voverflow = (vector unsigned char) vec_ld(15, srcp);
-                vs = vec_perm(vs, voverflow, valigner);
-
-                /* d = *dstp */
-                vd = (vector unsigned char) vec_ld(0, dstp);
-
-                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
-
-                /* set the alpha channel to full on */
-                vd = vec_or(vd, valphamask);
-
-                /* *dstp = res */
-                vec_st((vector unsigned int) vd, 0, dstp);
-
-                srcp += 4;
-                dstp += 4;
-                width -= 4;
-                vs = voverflow;
-            }
-            ONE_PIXEL_BLEND((extrawidth), extrawidth);
-        }
-#undef ONE_PIXEL_BLEND
-
-        srcp += srcskip;
-        dstp += dstskip;
-    }
-}
-
-#if __MWERKS__
-#pragma altivec_model off
-#endif
-#endif /* SDL_ALTIVEC_BLITTERS */
-
 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
 static void
 BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info)
@@ -1338,79 +538,6 @@ BlitRGBtoRGBPixelAlpha(SDL_BlitInfo * info)
     }
 }
 
-#ifdef __3dNOW__
-/* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
-static void
-BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
-{
-    int width = info->dst_w;
-    int height = info->dst_h;
-    Uint32 *srcp = (Uint32 *) info->src;
-    int srcskip = info->src_skip >> 2;
-    Uint32 *dstp = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip >> 2;
-    SDL_PixelFormat *sf = info->src_fmt;
-    Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
-    Uint32 amask = sf->Amask;
-    Uint32 ashift = sf->Ashift;
-    Uint64 multmask;
-
-    __m64 src1, dst1, mm_alpha, mm_zero, dmask;
-
-    mm_zero = _mm_setzero_si64();       /* 0 -> mm_zero */
-    multmask = 0xFFFF;
-    multmask <<= (ashift * 2);
-    multmask = ~multmask;
-    dmask = *(__m64 *) & multmask;      /* dst alpha mask -> dmask */
-
-    while (height--) {
-	    /* *INDENT-OFF* */
-	    DUFFS_LOOP4({
-		Uint32 alpha;
-
-		_m_prefetch(srcp + 16);
-		_m_prefetch(dstp + 16);
-
-		alpha = *srcp & amask;
-		if (alpha == 0) {
-			/* do nothing */
-		} else if (alpha == amask) {
-			/* copy RGB, keep dst alpha */
-			*dstp = (*srcp & chanmask) | (*dstp & ~chanmask);
-		} else {
-			src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/
-			src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
-
-			dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/
-			dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
-
-			mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
-			mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
-			mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
-			mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
-			mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */
-
-			/* blend */		    
-			src1 = _mm_sub_pi16(src1, dst1);/* src - dst -> src1 */
-			src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src - dst) * alpha -> src1 */
-			src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */
-			dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst) -> dst1(0A0R0G0B) */
-			dst1 = _mm_packs_pu16(dst1, mm_zero);  /* 0000ARGB -> dst1 */
-			
-			*dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
-		}
-		++srcp;
-		++dstp;
-	    }, width);
-	    /* *INDENT-ON* */
-        srcp += srcskip;
-        dstp += dstskip;
-    }
-    _mm_empty();
-}
-
-#endif /* __MMX__ */
-
 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
 
 /* blend a single 16 bit pixel at 50% */
@@ -2130,17 +1257,10 @@ SDL_CalculateBlitA(SDL_Surface * surface)
             return BlitNto1PixelAlpha;
 
         case 2:
-#if SDL_ALTIVEC_BLITTERS
-            if (sf->BytesPerPixel == 4
-                && df->Gmask == 0x7e0 && df->Bmask == 0x1f
-                && SDL_HasAltiVec())
-                return Blit32to565PixelAlphaAltivec;
-            else
-#endif
-                if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
-                    && sf->Gmask == 0xff00
-                    && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
-                        || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
+            if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
+                && sf->Gmask == 0xff00
+                && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
+                    || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
                 if (df->Gmask == 0x7e0)
                     return BlitARGBto565PixelAlpha;
                 else if (df->Gmask == 0x3e0)
@@ -2152,35 +1272,20 @@ SDL_CalculateBlitA(SDL_Surface * surface)
             if (sf->Rmask == df->Rmask
                 && sf->Gmask == df->Gmask
                 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
-#if defined(__MMX__) || defined(__3dNOW__)
+#if defined(__MMX__)
                 if (sf->Rshift % 8 == 0
                     && sf->Gshift % 8 == 0
                     && sf->Bshift % 8 == 0
                     && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
-#ifdef __3dNOW__
-                    if (SDL_Has3DNow())
-                        return BlitRGBtoRGBPixelAlphaMMX3DNOW;
-#endif
-#ifdef __MMX__
                     if (SDL_HasMMX())
                         return BlitRGBtoRGBPixelAlphaMMX;
-#endif
                 }
-#endif /* __MMX__ || __3dNOW__ */
+#endif /* __MMX__ */
                 if (sf->Amask == 0xff000000) {
-#if SDL_ALTIVEC_BLITTERS
-                    if (SDL_HasAltiVec())
-                        return BlitRGBtoRGBPixelAlphaAltivec;
-#endif
                     return BlitRGBtoRGBPixelAlpha;
                 }
             }
-#if SDL_ALTIVEC_BLITTERS
-            if (sf->Amask && sf->BytesPerPixel == 4 && SDL_HasAltiVec())
-                return Blit32to32PixelAlphaAltivec;
-            else
-#endif
-                return BlitNtoNPixelAlpha;
+            return BlitNtoNPixelAlpha;
 
         case 3:
         default:
@@ -2226,19 +1331,10 @@ SDL_CalculateBlitA(SDL_Surface * surface)
                         return BlitRGBtoRGBSurfaceAlphaMMX;
 #endif
                     if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
-#if SDL_ALTIVEC_BLITTERS
-                        if (SDL_HasAltiVec())
-                            return BlitRGBtoRGBSurfaceAlphaAltivec;
-#endif
                         return BlitRGBtoRGBSurfaceAlpha;
                     }
                 }
-#if SDL_ALTIVEC_BLITTERS
-                if ((sf->BytesPerPixel == 4) && SDL_HasAltiVec())
-                    return Blit32to32SurfaceAlphaAltivec;
-                else
-#endif
-                    return BlitNtoNSurfaceAlpha;
+                return BlitNtoNSurfaceAlpha;
 
             case 3:
             default:
@@ -2252,12 +1348,6 @@ SDL_CalculateBlitA(SDL_Surface * surface)
             if (df->BytesPerPixel == 1)
                 return BlitNto1SurfaceAlphaKey;
             else
-#if SDL_ALTIVEC_BLITTERS
-            if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 &&
-                    SDL_HasAltiVec())
-                return Blit32to32SurfaceAlphaKeyAltivec;
-            else
-#endif
                 return BlitNtoNSurfaceAlphaKey;
         }
         break;
diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c
index 600153f6c..a01a387ff 100644
--- a/src/video/SDL_blit_N.c
+++ b/src/video/SDL_blit_N.c
@@ -28,846 +28,8 @@
 
 /* Functions to blit from N-bit surfaces to other surfaces */
 
-#if SDL_ALTIVEC_BLITTERS
-#if __MWERKS__
-#pragma altivec_model on
-#endif
-#ifdef HAVE_ALTIVEC_H
-#include <altivec.h>
-#endif
-#define assert(X)
-#ifdef __MACOSX__
-#include <sys/sysctl.h>
-static size_t
-GetL3CacheSize(void)
-{
-    const char key[] = "hw.l3cachesize";
-    u_int64_t result = 0;
-    size_t typeSize = sizeof(result);
-
-
-    int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
-    if (0 != err)
-        return 0;
-
-    return result;
-}
-#else
-static size_t
-GetL3CacheSize(void)
-{
-    /* XXX: Just guess G4 */
-    return 2097152;
-}
-#endif /* __MACOSX__ */
-
-#if (defined(__MACOSX__) && (__GNUC__ < 4))
-#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
-        (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
-#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
-        (vector unsigned short) ( a,b,c,d,e,f,g,h )
-#else
-#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
-        (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
-#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
-        (vector unsigned short) { a,b,c,d,e,f,g,h }
-#endif
-
-#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
-#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
-                               ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
-                                 0x04+a, 0x04+b, 0x04+c, 0x04+d, \
-                                 0x08+a, 0x08+b, 0x08+c, 0x08+d, \
-                                 0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
-
-#define MAKE8888(dstfmt, r, g, b, a)  \
-    ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
-      ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
-      ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
-      ((a<<dstfmt->Ashift)&dstfmt->Amask) )
-
-/*
- * Data Stream Touch...Altivec cache prefetching.
- *
- *  Don't use this on a G5...however, the speed boost is very significant
- *   on a G4.
- */
-#define DST_CHAN_SRC 1
-#define DST_CHAN_DEST 2
-
-/* macro to set DST control word value... */
-#define DST_CTRL(size, count, stride) \
-    (((size) << 24) | ((count) << 16) | (stride))
-
-#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
-    ? vec_lvsl(0, src) \
-    : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
-
-/* Calculate the permute vector used for 32->32 swizzling */
-static vector unsigned char
-calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
-{
-    /*
-     * We have to assume that the bits that aren't used by other
-     *  colors is alpha, and it's one complete byte, since some formats
-     *  leave alpha with a zero mask, but we should still swizzle the bits.
-     */
-    /* ARGB */
-    const static const struct SDL_PixelFormat default_pixel_format = {
-        NULL, 32, 4,
-        0, 0, 0, 0,
-        16, 8, 0, 24,
-        0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000
-    };
-    if (!srcfmt) {
-        srcfmt = &default_pixel_format;
-    }
-    if (!dstfmt) {
-        dstfmt = &default_pixel_format;
-    }
-    const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
-                                                       0x04, 0x04, 0x04, 0x04,
-                                                       0x08, 0x08, 0x08, 0x08,
-                                                       0x0C, 0x0C, 0x0C,
-                                                       0x0C);
-    vector unsigned char vswiz;
-    vector unsigned int srcvec;
-#define RESHIFT(X) (3 - ((X) >> 3))
-    Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
-    Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
-    Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
-    Uint32 amask;
-    /* Use zero for alpha if either surface doesn't have alpha */
-    if (dstfmt->Amask) {
-        amask =
-            ((srcfmt->Amask) ? RESHIFT(srcfmt->
-                                       Ashift) : 0x10) << (dstfmt->Ashift);
-    } else {
-        amask =
-            0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
-                          0xFFFFFFFF);
-    }
-#undef RESHIFT
-    ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
-    vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
-    return (vswiz);
-}
-
-static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
-static void
-Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
-{
-    int height = info->dst_h;
-    Uint8 *src = (Uint8 *) info->src;
-    int srcskip = info->src_skip;
-    Uint8 *dst = (Uint8 *) info->dst;
-    int dstskip = info->dst_skip;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-    vector unsigned char valpha = vec_splat_u8(0);
-    vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
-    vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
-                                                    0x00, 0x0a, 0x00, 0x0e,
-                                                    0x00, 0x12, 0x00, 0x16,
-                                                    0x00, 0x1a, 0x00, 0x1e);
-    vector unsigned short v1 = vec_splat_u16(1);
-    vector unsigned short v3 = vec_splat_u16(3);
-    vector unsigned short v3f =
-        VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
-                          0x003f, 0x003f, 0x003f, 0x003f);
-    vector unsigned short vfc =
-        VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
-                          0x00fc, 0x00fc, 0x00fc, 0x00fc);
-    vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
-    vf800 = vec_sl(vf800, vec_splat_u16(8));
-
-    while (height--) {
-        vector unsigned char valigner;
-        vector unsigned char voverflow;
-        vector unsigned char vsrc;
-
-        int width = info->dst_w;
-        int extrawidth;
-
-        /* do scalar until we can align... */
-#define ONE_PIXEL_BLEND(condition, widthvar) \
-        while (condition) { \
-            Uint32 Pixel; \
-            unsigned sR, sG, sB, sA; \
-            DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
-                          sR, sG, sB, sA); \
-            *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
-                                ((sG << 3) & 0x000007E0) | \
-                                ((sB >> 3) & 0x0000001F)); \
-            dst += 2; \
-            src += 4; \
-            widthvar--; \
-        }
-
-        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
-
-        /* After all that work, here's the vector part! */
-        extrawidth = (width % 8);       /* trailing unaligned stores */
-        width -= extrawidth;
-        vsrc = vec_ld(0, src);
-        valigner = VEC_ALIGNER(src);
-
-        while (width) {
-            vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
-            vector unsigned int vsrc1, vsrc2;
-            vector unsigned char vdst;
-
-            voverflow = vec_ld(15, src);
-            vsrc = vec_perm(vsrc, voverflow, valigner);
-            vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
-            src += 16;
-            vsrc = voverflow;
-            voverflow = vec_ld(15, src);
-            vsrc = vec_perm(vsrc, voverflow, valigner);
-            vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
-            /* 1555 */
-            vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
-            vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
-            vgpixel = vec_and(vgpixel, vfc);
-            vgpixel = vec_sl(vgpixel, v3);
-            vrpixel = vec_sl(vpixel, v1);
-            vrpixel = vec_and(vrpixel, vf800);
-            vbpixel = vec_and(vpixel, v3f);
-            vdst =
-                vec_or((vector unsigned char) vrpixel,
-                       (vector unsigned char) vgpixel);
-            /* 565 */
-            vdst = vec_or(vdst, (vector unsigned char) vbpixel);
-            vec_st(vdst, 0, dst);
-
-            width -= 8;
-            src += 16;
-            dst += 16;
-            vsrc = voverflow;
-        }
-
-        assert(width == 0);
-
-        /* do scalar until we can align... */
-        ONE_PIXEL_BLEND((extrawidth), extrawidth);
-#undef ONE_PIXEL_BLEND
-
-        src += srcskip;         /* move to next row, accounting for pitch. */
-        dst += dstskip;
-    }
-
-
-}
-
-static void
-Blit_RGB565_32Altivec(SDL_BlitInfo * info)
-{
-    int height = info->dst_h;
-    Uint8 *src = (Uint8 *) info->src;
-    int srcskip = info->src_skip;
-    Uint8 *dst = (Uint8 *) info->dst;
-    int dstskip = info->dst_skip;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-    SDL_PixelFormat *dstfmt = info->dst_fmt;
-    unsigned alpha;
-    vector unsigned char valpha;
-    vector unsigned char vpermute;
-    vector unsigned short vf800;
-    vector unsigned int v8 = vec_splat_u32(8);
-    vector unsigned int v16 = vec_add(v8, v8);
-    vector unsigned short v2 = vec_splat_u16(2);
-    vector unsigned short v3 = vec_splat_u16(3);
-    /* 
-       0x10 - 0x1f is the alpha
-       0x00 - 0x0e evens are the red
-       0x01 - 0x0f odds are zero
-     */
-    vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
-                                                       0x10, 0x02, 0x01, 0x01,
-                                                       0x10, 0x04, 0x01, 0x01,
-                                                       0x10, 0x06, 0x01,
-                                                       0x01);
-    vector unsigned char vredalpha2 =
-        (vector unsigned
-         char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
-        );
-    /*
-       0x00 - 0x0f is ARxx ARxx ARxx ARxx
-       0x11 - 0x0f odds are blue
-     */
-    vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
-                                                   0x04, 0x05, 0x06, 0x13,
-                                                   0x08, 0x09, 0x0a, 0x15,
-                                                   0x0c, 0x0d, 0x0e, 0x17);
-    vector unsigned char vblue2 =
-        (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
-        );
-    /*
-       0x00 - 0x0f is ARxB ARxB ARxB ARxB
-       0x10 - 0x0e evens are green
-     */
-    vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
-                                                    0x04, 0x05, 0x12, 0x07,
-                                                    0x08, 0x09, 0x14, 0x0b,
-                                                    0x0c, 0x0d, 0x16, 0x0f);
-    vector unsigned char vgreen2 =
-        (vector unsigned
-         char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
-        );
-
-
-    assert(srcfmt->BytesPerPixel == 2);
-    assert(dstfmt->BytesPerPixel == 4);
-
-    vf800 = (vector unsigned short) vec_splat_u8(-7);
-    vf800 = vec_sl(vf800, vec_splat_u16(8));
-
-    if (dstfmt->Amask && info->a) {
-        ((unsigned char *) &valpha)[0] = alpha = info->a;
-        valpha = vec_splat(valpha, 0);
-    } else {
-        alpha = 0;
-        valpha = vec_splat_u8(0);
-    }
-
-    vpermute = calc_swizzle32(NULL, dstfmt);
-    while (height--) {
-        vector unsigned char valigner;
-        vector unsigned char voverflow;
-        vector unsigned char vsrc;
-
-        int width = info->dst_w;
-        int extrawidth;
-
-        /* do scalar until we can align... */
-#define ONE_PIXEL_BLEND(condition, widthvar) \
-        while (condition) { \
-            unsigned sR, sG, sB; \
-            unsigned short Pixel = *((unsigned short *)src); \
-            sR = (Pixel >> 8) & 0xf8; \
-            sG = (Pixel >> 3) & 0xfc; \
-            sB = (Pixel << 3) & 0xf8; \
-            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
-            src += 2; \
-            dst += 4; \
-            widthvar--; \
-        }
-        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
-
-        /* After all that work, here's the vector part! */
-        extrawidth = (width % 8);       /* trailing unaligned stores */
-        width -= extrawidth;
-        vsrc = vec_ld(0, src);
-        valigner = VEC_ALIGNER(src);
-
-        while (width) {
-            vector unsigned short vR, vG, vB;
-            vector unsigned char vdst1, vdst2;
-
-            voverflow = vec_ld(15, src);
-            vsrc = vec_perm(vsrc, voverflow, valigner);
-
-            vR = vec_and((vector unsigned short) vsrc, vf800);
-            vB = vec_sl((vector unsigned short) vsrc, v3);
-            vG = vec_sl(vB, v2);
-
-            vdst1 =
-                (vector unsigned char) vec_perm((vector unsigned char) vR,
-                                                valpha, vredalpha1);
-            vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
-            vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
-            vdst1 = vec_perm(vdst1, valpha, vpermute);
-            vec_st(vdst1, 0, dst);
-
-            vdst2 =
-                (vector unsigned char) vec_perm((vector unsigned char) vR,
-                                                valpha, vredalpha2);
-            vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
-            vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
-            vdst2 = vec_perm(vdst2, valpha, vpermute);
-            vec_st(vdst2, 16, dst);
-
-            width -= 8;
-            dst += 32;
-            src += 16;
-            vsrc = voverflow;
-        }
-
-        assert(width == 0);
-
-
-        /* do scalar until we can align... */
-        ONE_PIXEL_BLEND((extrawidth), extrawidth);
-#undef ONE_PIXEL_BLEND
-
-        src += srcskip;         /* move to next row, accounting for pitch. */
-        dst += dstskip;
-    }
-
-}
-
-
-static void
-Blit_RGB555_32Altivec(SDL_BlitInfo * info)
-{
-    int height = info->dst_h;
-    Uint8 *src = (Uint8 *) info->src;
-    int srcskip = info->src_skip;
-    Uint8 *dst = (Uint8 *) info->dst;
-    int dstskip = info->dst_skip;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-    SDL_PixelFormat *dstfmt = info->dst_fmt;
-    unsigned alpha;
-    vector unsigned char valpha;
-    vector unsigned char vpermute;
-    vector unsigned short vf800;
-    vector unsigned int v8 = vec_splat_u32(8);
-    vector unsigned int v16 = vec_add(v8, v8);
-    vector unsigned short v1 = vec_splat_u16(1);
-    vector unsigned short v3 = vec_splat_u16(3);
-    /* 
-       0x10 - 0x1f is the alpha
-       0x00 - 0x0e evens are the red
-       0x01 - 0x0f odds are zero
-     */
-    vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
-                                                       0x10, 0x02, 0x01, 0x01,
-                                                       0x10, 0x04, 0x01, 0x01,
-                                                       0x10, 0x06, 0x01,
-                                                       0x01);
-    vector unsigned char vredalpha2 =
-        (vector unsigned
-         char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
-        );
-    /*
-       0x00 - 0x0f is ARxx ARxx ARxx ARxx
-       0x11 - 0x0f odds are blue
-     */
-    vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
-                                                   0x04, 0x05, 0x06, 0x13,
-                                                   0x08, 0x09, 0x0a, 0x15,
-                                                   0x0c, 0x0d, 0x0e, 0x17);
-    vector unsigned char vblue2 =
-        (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
-        );
-    /*
-       0x00 - 0x0f is ARxB ARxB ARxB ARxB
-       0x10 - 0x0e evens are green
-     */
-    vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
-                                                    0x04, 0x05, 0x12, 0x07,
-                                                    0x08, 0x09, 0x14, 0x0b,
-                                                    0x0c, 0x0d, 0x16, 0x0f);
-    vector unsigned char vgreen2 =
-        (vector unsigned
-         char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
-        );
-
-
-    assert(srcfmt->BytesPerPixel == 2);
-    assert(dstfmt->BytesPerPixel == 4);
-
-    vf800 = (vector unsigned short) vec_splat_u8(-7);
-    vf800 = vec_sl(vf800, vec_splat_u16(8));
-
-    if (dstfmt->Amask && info->a) {
-        ((unsigned char *) &valpha)[0] = alpha = info->a;
-        valpha = vec_splat(valpha, 0);
-    } else {
-        alpha = 0;
-        valpha = vec_splat_u8(0);
-    }
-
-    vpermute = calc_swizzle32(NULL, dstfmt);
-    while (height--) {
-        vector unsigned char valigner;
-        vector unsigned char voverflow;
-        vector unsigned char vsrc;
-
-        int width = info->dst_w;
-        int extrawidth;
-
-        /* do scalar until we can align... */
-#define ONE_PIXEL_BLEND(condition, widthvar) \
-        while (condition) { \
-            unsigned sR, sG, sB; \
-            unsigned short Pixel = *((unsigned short *)src); \
-            sR = (Pixel >> 7) & 0xf8; \
-            sG = (Pixel >> 2) & 0xf8; \
-            sB = (Pixel << 3) & 0xf8; \
-            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
-            src += 2; \
-            dst += 4; \
-            widthvar--; \
-        }
-        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
-
-        /* After all that work, here's the vector part! */
-        extrawidth = (width % 8);       /* trailing unaligned stores */
-        width -= extrawidth;
-        vsrc = vec_ld(0, src);
-        valigner = VEC_ALIGNER(src);
-
-        while (width) {
-            vector unsigned short vR, vG, vB;
-            vector unsigned char vdst1, vdst2;
-
-            voverflow = vec_ld(15, src);
-            vsrc = vec_perm(vsrc, voverflow, valigner);
-
-            vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
-            vB = vec_sl((vector unsigned short) vsrc, v3);
-            vG = vec_sl(vB, v3);
-
-            vdst1 =
-                (vector unsigned char) vec_perm((vector unsigned char) vR,
-                                                valpha, vredalpha1);
-            vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
-            vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
-            vdst1 = vec_perm(vdst1, valpha, vpermute);
-            vec_st(vdst1, 0, dst);
-
-            vdst2 =
-                (vector unsigned char) vec_perm((vector unsigned char) vR,
-                                                valpha, vredalpha2);
-            vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
-            vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
-            vdst2 = vec_perm(vdst2, valpha, vpermute);
-            vec_st(vdst2, 16, dst);
-
-            width -= 8;
-            dst += 32;
-            src += 16;
-            vsrc = voverflow;
-        }
-
-        assert(width == 0);
-
-
-        /* do scalar until we can align... */
-        ONE_PIXEL_BLEND((extrawidth), extrawidth);
-#undef ONE_PIXEL_BLEND
-
-        src += srcskip;         /* move to next row, accounting for pitch. */
-        dst += dstskip;
-    }
-
-}
-
-static void BlitNtoNKey(SDL_BlitInfo * info);
-static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
-static void
-Blit32to32KeyAltivec(SDL_BlitInfo * info)
-{
-    int height = info->dst_h;
-    Uint32 *srcp = (Uint32 *) info->src;
-    int srcskip = info->src_skip / 4;
-    Uint32 *dstp = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip / 4;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-    int srcbpp = srcfmt->BytesPerPixel;
-    SDL_PixelFormat *dstfmt = info->dst_fmt;
-    int dstbpp = dstfmt->BytesPerPixel;
-    int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
-    unsigned alpha = dstfmt->Amask ? info->a : 0;
-    Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
-    Uint32 ckey = info->colorkey;
-    vector unsigned int valpha;
-    vector unsigned char vpermute;
-    vector unsigned char vzero;
-    vector unsigned int vckey;
-    vector unsigned int vrgbmask;
-    vpermute = calc_swizzle32(srcfmt, dstfmt);
-    if (info->dst_w < 16) {
-        if (copy_alpha) {
-            BlitNtoNKeyCopyAlpha(info);
-        } else {
-            BlitNtoNKey(info);
-        }
-        return;
-    }
-    vzero = vec_splat_u8(0);
-    if (alpha) {
-        ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
-        valpha =
-            (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
-    } else {
-        valpha = (vector unsigned int) vzero;
-    }
-    ckey &= rgbmask;
-    ((unsigned int *) (char *) &vckey)[0] = ckey;
-    vckey = vec_splat(vckey, 0);
-    ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
-    vrgbmask = vec_splat(vrgbmask, 0);
-
-    while (height--) {
-#define ONE_PIXEL_BLEND(condition, widthvar) \
-        if (copy_alpha) { \
-            while (condition) { \
-                Uint32 Pixel; \
-                unsigned sR, sG, sB, sA; \
-                DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
-                          sR, sG, sB, sA); \
-                if ( (Pixel & rgbmask) != ckey ) { \
-                      ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
-                            sR, sG, sB, sA); \
-                } \
-                dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
-                srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
-                widthvar--; \
-            } \
-        } else { \
-            while (condition) { \
-                Uint32 Pixel; \
-                unsigned sR, sG, sB; \
-                RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
-                if ( Pixel != ckey ) { \
-                    RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
-                    ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
-                              sR, sG, sB, alpha); \
-                } \
-                dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
-                srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
-                widthvar--; \
-            } \
-        }
-        int width = info->dst_w;
-        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
-        assert(width > 0);
-        if (width > 0) {
-            int extrawidth = (width % 4);
-            vector unsigned char valigner = VEC_ALIGNER(srcp);
-            vector unsigned int vs = vec_ld(0, srcp);
-            width -= extrawidth;
-            assert(width >= 4);
-            while (width) {
-                vector unsigned char vsel;
-                vector unsigned int vd;
-                vector unsigned int voverflow = vec_ld(15, srcp);
-                /* load the source vec */
-                vs = vec_perm(vs, voverflow, valigner);
-                /* vsel is set for items that match the key */
-                vsel = (vector unsigned char) vec_and(vs, vrgbmask);
-                vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
-                /* permute the src vec to the dest format */
-                vs = vec_perm(vs, valpha, vpermute);
-                /* load the destination vec */
-                vd = vec_ld(0, dstp);
-                /* select the source and dest into vs */
-                vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
-                                                   (vector unsigned char) vd,
-                                                   vsel);
-
-                vec_st(vd, 0, dstp);
-                srcp += 4;
-                width -= 4;
-                dstp += 4;
-                vs = voverflow;
-            }
-            ONE_PIXEL_BLEND((extrawidth), extrawidth);
-#undef ONE_PIXEL_BLEND
-            srcp += srcskip;
-            dstp += dstskip;
-        }
-    }
-}
-
-/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
-/* Use this on a G5 */
-static void
-ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
-{
-    int height = info->dst_h;
-    Uint32 *src = (Uint32 *) info->src;
-    int srcskip = info->src_skip / 4;
-    Uint32 *dst = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip / 4;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-    SDL_PixelFormat *dstfmt = info->dst_fmt;
-    vector unsigned int vzero = vec_splat_u32(0);
-    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
-    if (dstfmt->Amask && !srcfmt->Amask) {
-        if (info->a) {
-            vector unsigned char valpha;
-            ((unsigned char *) &valpha)[0] = info->a;
-            vzero = (vector unsigned int) vec_splat(valpha, 0);
-        }
-    }
-
-    assert(srcfmt->BytesPerPixel == 4);
-    assert(dstfmt->BytesPerPixel == 4);
-
-    while (height--) {
-        vector unsigned char valigner;
-        vector unsigned int vbits;
-        vector unsigned int voverflow;
-        Uint32 bits;
-        Uint8 r, g, b, a;
-
-        int width = info->dst_w;
-        int extrawidth;
-
-        /* do scalar until we can align... */
-        while ((UNALIGNED_PTR(dst)) && (width)) {
-            bits = *(src++);
-            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
-            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
-            width--;
-        }
-
-        /* After all that work, here's the vector part! */
-        extrawidth = (width % 4);
-        width -= extrawidth;
-        valigner = VEC_ALIGNER(src);
-        vbits = vec_ld(0, src);
-
-        while (width) {
-            voverflow = vec_ld(15, src);
-            src += 4;
-            width -= 4;
-            vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
-            vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
-            vec_st(vbits, 0, dst);      /* store it back out. */
-            dst += 4;
-            vbits = voverflow;
-        }
-
-        assert(width == 0);
-
-        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
-        while (extrawidth) {
-            bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
-            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
-            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
-            extrawidth--;
-        }
-
-        src += srcskip;
-        dst += dstskip;
-    }
-
-}
-
-/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
-/* Use this on a G4 */
-static void
-ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
-{
-    const int scalar_dst_lead = sizeof(Uint32) * 4;
-    const int vector_dst_lead = sizeof(Uint32) * 16;
-
-    int height = info->dst_h;
-    Uint32 *src = (Uint32 *) info->src;
-    int srcskip = info->src_skip / 4;
-    Uint32 *dst = (Uint32 *) info->dst;
-    int dstskip = info->dst_skip / 4;
-    SDL_PixelFormat *srcfmt = info->src_fmt;
-    SDL_PixelFormat *dstfmt = info->dst_fmt;
-    vector unsigned int vzero = vec_splat_u32(0);
-    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
-    if (dstfmt->Amask && !srcfmt->Amask) {
-        if (info->a) {
-            vector unsigned char valpha;
-            ((unsigned char *) &valpha)[0] = info->a;
-            vzero = (vector unsigned int) vec_splat(valpha, 0);
-        }
-    }
-
-    assert(srcfmt->BytesPerPixel == 4);
-    assert(dstfmt->BytesPerPixel == 4);
-
-    while (height--) {
-        vector unsigned char valigner;
-        vector unsigned int vbits;
-        vector unsigned int voverflow;
-        Uint32 bits;
-        Uint8 r, g, b, a;
-
-        int width = info->dst_w;
-        int extrawidth;
-
-        /* do scalar until we can align... */
-        while ((UNALIGNED_PTR(dst)) && (width)) {
-            vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
-                     DST_CHAN_SRC);
-            vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
-                      DST_CHAN_DEST);
-            bits = *(src++);
-            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
-            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
-            width--;
-        }
-
-        /* After all that work, here's the vector part! */
-        extrawidth = (width % 4);
-        width -= extrawidth;
-        valigner = VEC_ALIGNER(src);
-        vbits = vec_ld(0, src);
-
-        while (width) {
-            vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
-                     DST_CHAN_SRC);
-            vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
-                      DST_CHAN_DEST);
-            voverflow = vec_ld(15, src);
-            src += 4;
-            width -= 4;
-            vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
-            vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
-            vec_st(vbits, 0, dst);      /* store it back out. */
-            dst += 4;
-            vbits = voverflow;
-        }
-
-        assert(width == 0);
-
-        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
-        while (extrawidth) {
-            bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
-            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
-            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
-            extrawidth--;
-        }
-
-        src += srcskip;
-        dst += dstskip;
-    }
-
-    vec_dss(DST_CHAN_SRC);
-    vec_dss(DST_CHAN_DEST);
-}
-
-static Uint32
-GetBlitFeatures(void)
-{
-    static Uint32 features = 0xffffffff;
-    if (features == 0xffffffff) {
-        /* Provide an override for testing .. */
-        char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
-        if (override) {
-            features = 0;
-            SDL_sscanf(override, "%u", &features);
-        } else {
-            features = (0
-                        /* Feature 1 is has-MMX */
-                        | ((SDL_HasMMX())? 1 : 0)
-                        /* Feature 2 is has-AltiVec */
-                        | ((SDL_HasAltiVec())? 2 : 0)
-                        /* Feature 4 is dont-use-prefetch */
-                        /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
-                        | ((GetL3CacheSize() == 0) ? 4 : 0)
-                );
-        }
-    }
-    return features;
-}
-
-#if __MWERKS__
-#pragma altivec_model off
-#endif
-#else
 /* Feature 1 is has-MMX */
 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
-#endif
 
 /* This is now endian dependent */
 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
@@ -2346,15 +1508,6 @@ static const struct blit_table normal_blit_1[] = {
 };
 
 static const struct blit_table normal_blit_2[] = {
-#if SDL_ALTIVEC_BLITTERS
-    /* has-altivec */
-    {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000,
-     0x00000000,
-     2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
-    {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000,
-     0x00000000,
-     2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
-#endif
     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00,
      0x000000FF,
      0, Blit_RGB565_ARGB8888, SET_ALPHA},
@@ -2378,22 +1531,6 @@ static const struct blit_table normal_blit_3[] = {
 };
 
 static const struct blit_table normal_blit_4[] = {
-#if SDL_ALTIVEC_BLITTERS
-    /* has-altivec | dont-use-prefetch */
-    {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
-     0x00000000,
-     6, ConvertAltivec32to32_noprefetch,
-     NO_ALPHA | COPY_ALPHA | SET_ALPHA},
-    /* has-altivec */
-    {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
-     0x00000000,
-     2, ConvertAltivec32to32_prefetch,
-     NO_ALPHA | COPY_ALPHA | SET_ALPHA},
-    /* has-altivec */
-    {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0,
-     0x0000001F,
-     2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
-#endif
     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
      0x0000001F,
      0, Blit_RGB888_RGB565, NO_ALPHA},
@@ -2491,12 +1628,6 @@ SDL_CalculateBlitN(SDL_Surface * surface)
         else if (dstfmt->BytesPerPixel == 1)
             return BlitNto1Key;
         else {
-#if SDL_ALTIVEC_BLITTERS
-            if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
-                && SDL_HasAltiVec()) {
-                return Blit32to32KeyAltivec;
-            } else
-#endif
             if (srcfmt->Amask && dstfmt->Amask) {
                 return BlitNtoNKeyCopyAlpha;
             } else {
diff --git a/test/testplatform.c b/test/testplatform.c
index 75e538ec8..3badbafe9 100644
--- a/test/testplatform.c
+++ b/test/testplatform.c
@@ -143,13 +143,10 @@ TestCPUInfo(SDL_bool verbose)
 	printf("CPU cache line size: %d\n", SDL_GetCPUCacheLineSize());
         printf("RDTSC %s\n", SDL_HasRDTSC()? "detected" : "not detected");
         printf("MMX %s\n", SDL_HasMMX()? "detected" : "not detected");
-        printf("MMX Ext %s\n", SDL_HasMMXExt()? "detected" : "not detected");
-        printf("3DNow %s\n", SDL_Has3DNow()? "detected" : "not detected");
-        printf("3DNow Ext %s\n",
-               SDL_Has3DNowExt()? "detected" : "not detected");
         printf("SSE %s\n", SDL_HasSSE()? "detected" : "not detected");
         printf("SSE2 %s\n", SDL_HasSSE2()? "detected" : "not detected");
-        printf("AltiVec %s\n", SDL_HasAltiVec()? "detected" : "not detected");
+        printf("SSE3 %s\n", SDL_HasSSE3()? "detected" : "not detected");
+        printf("SSE4 %s\n", SDL_HasSSE4()? "detected" : "not detected");
     }
     return (0);
 }