Updated CPU detection code for SSE3 and SSE4 and removed obsolete 3DNow! and Altivec support.
authorSam Lantinga <slouken@libsdl.org>
Fri, 11 Feb 2011 14:51:04 -0800
changeset 52596a65c1fc07af
parent 5258 f26314c20071
child 5260 62d2bc792002
Updated CPU detection code for SSE3 and SSE4 and removed obsolete 3DNow! and Altivec support.
configure.in
include/SDL_config.h.in
include/SDL_config_macosx.h
include/SDL_cpuinfo.h
src/cpuinfo/SDL_cpuinfo.c
src/video/SDL_blit.c
src/video/SDL_blit.h
src/video/SDL_blit_A.c
src/video/SDL_blit_N.c
test/testplatform.c
     1.1 --- a/configure.in	Fri Feb 11 14:42:58 2011 -0800
     1.2 +++ b/configure.in	Fri Feb 11 14:51:04 2011 -0800
     1.3 @@ -501,33 +501,6 @@
     1.4          fi
     1.5      fi
     1.6  
     1.7 -    AC_ARG_ENABLE(3dnow,
     1.8 -AC_HELP_STRING([--enable-3dnow], [use MMX assembly routines [[default=yes]]]),
     1.9 -                  , enable_3dnow=yes)
    1.10 -    if test x$enable_3dnow = xyes; then
    1.11 -        save_CFLAGS="$CFLAGS"
    1.12 -        have_gcc_3dnow=no
    1.13 -        AC_MSG_CHECKING(for GCC -m3dnow option)
    1.14 -        amd3dnow_CFLAGS="-m3dnow"
    1.15 -        CFLAGS="$save_CFLAGS $amd3dnow_CFLAGS"
    1.16 -
    1.17 -        AC_TRY_COMPILE([
    1.18 -        #include <mm3dnow.h>
    1.19 -        #ifndef __3dNOW__
    1.20 -        #error Assembler CPP flag not enabled
    1.21 -        #endif
    1.22 -        ],[
    1.23 -        ],[
    1.24 -        have_gcc_3dnow=yes
    1.25 -        ])
    1.26 -        AC_MSG_RESULT($have_gcc_3dnow)
    1.27 -        CFLAGS="$save_CFLAGS"
    1.28 -
    1.29 -        if test x$have_gcc_3dnow = xyes; then
    1.30 -            EXTRA_CFLAGS="$EXTRA_CFLAGS $amd3dnow_CFLAGS"
    1.31 -        fi
    1.32 -    fi
    1.33 -
    1.34      AC_ARG_ENABLE(sse,
    1.35  AC_HELP_STRING([--enable-sse], [use SSE assembly routines [[default=yes]]]),
    1.36                    , enable_sse=yes)
    1.37 @@ -599,82 +572,6 @@
    1.38              EXTRA_CFLAGS="$EXTRA_CFLAGS $sse2_CFLAGS"
    1.39          fi
    1.40      fi
    1.41 -
    1.42 -    AC_ARG_ENABLE(altivec,
    1.43 -AC_HELP_STRING([--enable-altivec], [use Altivec assembly routines [[default=yes]]]),
    1.44 -                  , enable_altivec=yes)
    1.45 -    if test x$enable_altivec = xyes; then
    1.46 -        save_CFLAGS="$CFLAGS"
    1.47 -        have_gcc_altivec=no
    1.48 -        have_altivec_h_hdr=no
    1.49 -        altivec_CFLAGS="-maltivec"
    1.50 -        CFLAGS="$save_CFLAGS $altivec_CFLAGS"
    1.51 -
    1.52 -        AC_MSG_CHECKING(for Altivec with GCC altivec.h and -maltivec option)
    1.53 -        AC_TRY_COMPILE([
    1.54 -        #include <altivec.h>
    1.55 -        vector unsigned int vzero() {
    1.56 -            return vec_splat_u32(0);
    1.57 -        }
    1.58 -        ],[
    1.59 -        ],[
    1.60 -        have_gcc_altivec=yes
    1.61 -        have_altivec_h_hdr=yes
    1.62 -        ])
    1.63 -        AC_MSG_RESULT($have_gcc_altivec)
    1.64 -
    1.65 -        if test x$have_gcc_altivec = xno; then
    1.66 -            AC_MSG_CHECKING(for Altivec with GCC -maltivec option)
    1.67 -            AC_TRY_COMPILE([
    1.68 -            vector unsigned int vzero() {
    1.69 -                return vec_splat_u32(0);
    1.70 -            }
    1.71 -            ],[
    1.72 -            ],[
    1.73 -            have_gcc_altivec=yes
    1.74 -            ])
    1.75 -            AC_MSG_RESULT($have_gcc_altivec)
    1.76 -        fi
    1.77 -
    1.78 -        if test x$have_gcc_altivec = xno; then
    1.79 -            AC_MSG_CHECKING(for Altivec with GCC altivec.h and -faltivec option)
    1.80 -            altivec_CFLAGS="-faltivec"
    1.81 -            CFLAGS="$save_CFLAGS $altivec_CFLAGS"
    1.82 -            AC_TRY_COMPILE([
    1.83 -            #include <altivec.h>
    1.84 -            vector unsigned int vzero() {
    1.85 -                return vec_splat_u32(0);
    1.86 -            }
    1.87 -            ],[
    1.88 -            ],[
    1.89 -            have_gcc_altivec=yes
    1.90 -            have_altivec_h_hdr=yes
    1.91 -            ])
    1.92 -            AC_MSG_RESULT($have_gcc_altivec)
    1.93 -        fi
    1.94 -
    1.95 -        if test x$have_gcc_altivec = xno; then
    1.96 -            AC_MSG_CHECKING(for Altivec with GCC -faltivec option)
    1.97 -            AC_TRY_COMPILE([
    1.98 -            vector unsigned int vzero() {
    1.99 -                return vec_splat_u32(0);
   1.100 -            }
   1.101 -            ],[
   1.102 -            ],[
   1.103 -            have_gcc_altivec=yes
   1.104 -            ])
   1.105 -            AC_MSG_RESULT($have_gcc_altivec)
   1.106 -        fi
   1.107 -        CFLAGS="$save_CFLAGS"
   1.108 -
   1.109 -        if test x$have_gcc_altivec = xyes; then
   1.110 -            AC_DEFINE(SDL_ALTIVEC_BLITTERS)
   1.111 -            if test x$have_altivec_h_hdr = xyes; then
   1.112 -              AC_DEFINE(HAVE_ALTIVEC_H)
   1.113 -            fi
   1.114 -            EXTRA_CFLAGS="$EXTRA_CFLAGS $altivec_CFLAGS"
   1.115 -        fi
   1.116 -    fi
   1.117  fi
   1.118  
   1.119  dnl See if the OSS audio interface is supported
     2.1 --- a/include/SDL_config.h.in	Fri Feb 11 14:42:58 2011 -0800
     2.2 +++ b/include/SDL_config.h.in	Fri Feb 11 14:51:04 2011 -0800
     2.3 @@ -82,7 +82,6 @@
     2.4  #undef HAVE_MATH_H
     2.5  #undef HAVE_ICONV_H
     2.6  #undef HAVE_SIGNAL_H
     2.7 -#undef HAVE_ALTIVEC_H
     2.8  
     2.9  /* C library functions */
    2.10  #undef HAVE_MALLOC
    2.11 @@ -303,6 +302,5 @@
    2.12  
    2.13  /* Enable assembly routines */
    2.14  #undef SDL_ASSEMBLY_ROUTINES
    2.15 -#undef SDL_ALTIVEC_BLITTERS
    2.16  
    2.17  #endif /* _SDL_config_h */
     3.1 --- a/include/SDL_config_macosx.h	Fri Feb 11 14:42:58 2011 -0800
     3.2 +++ b/include/SDL_config_macosx.h	Fri Feb 11 14:51:04 2011 -0800
     3.3 @@ -168,8 +168,5 @@
     3.4  
     3.5  /* Enable assembly routines */
     3.6  #define SDL_ASSEMBLY_ROUTINES	1
     3.7 -#ifdef __ppc__
     3.8 -#define SDL_ALTIVEC_BLITTERS	1
     3.9 -#endif
    3.10  
    3.11  #endif /* _SDL_config_macosx_h */
     4.1 --- a/include/SDL_cpuinfo.h	Fri Feb 11 14:42:58 2011 -0800
     4.2 +++ b/include/SDL_cpuinfo.h	Fri Feb 11 14:51:04 2011 -0800
     4.3 @@ -70,21 +70,6 @@
     4.4  extern DECLSPEC SDL_bool SDLCALL SDL_HasMMX(void);
     4.5  
     4.6  /**
     4.7 - *  This function returns true if the CPU has MMX Ext.\ features.
     4.8 - */
     4.9 -extern DECLSPEC SDL_bool SDLCALL SDL_HasMMXExt(void);
    4.10 -
    4.11 -/**
    4.12 - *  This function returns true if the CPU has 3DNow!\ features.
    4.13 - */
    4.14 -extern DECLSPEC SDL_bool SDLCALL SDL_Has3DNow(void);
    4.15 -
    4.16 -/**
    4.17 - *  This function returns true if the CPU has 3DNow!\ Ext.\ features.
    4.18 - */
    4.19 -extern DECLSPEC SDL_bool SDLCALL SDL_Has3DNowExt(void);
    4.20 -
    4.21 -/**
    4.22   *  This function returns true if the CPU has SSE features.
    4.23   */
    4.24  extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE(void);
    4.25 @@ -95,9 +80,15 @@
    4.26  extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE2(void);
    4.27  
    4.28  /**
    4.29 - *  This function returns true if the CPU has AltiVec features.
    4.30 + *  This function returns true if the CPU has SSE3 features.
    4.31   */
    4.32 -extern DECLSPEC SDL_bool SDLCALL SDL_HasAltiVec(void);
    4.33 +extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE3(void);
    4.34 +
    4.35 +/**
    4.36 + *  This function returns true if the CPU has SSE4 features.
    4.37 + */
    4.38 +extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE4(void);
    4.39 +
    4.40  
    4.41  /* Ends C function definitions when using C++ */
    4.42  #ifdef __cplusplus
     5.1 --- a/src/cpuinfo/SDL_cpuinfo.c	Fri Feb 11 14:42:58 2011 -0800
     5.2 +++ b/src/cpuinfo/SDL_cpuinfo.c	Fri Feb 11 14:51:04 2011 -0800
     5.3 @@ -32,36 +32,17 @@
     5.4  #include <sys/types.h>
     5.5  #include <sys/sysctl.h>
     5.6  #endif
     5.7 -#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
     5.8 -#include <sys/sysctl.h>         /* For AltiVec check */
     5.9 -#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
    5.10 -#include <signal.h>
    5.11 -#include <setjmp.h>
    5.12 -#endif
    5.13  #ifdef __WIN32__
    5.14  #include "../core/windows/SDL_windows.h"
    5.15  #endif
    5.16  
    5.17  #define CPU_HAS_RDTSC   0x00000001
    5.18  #define CPU_HAS_MMX     0x00000002
    5.19 -#define CPU_HAS_MMXEXT  0x00000004
    5.20 -#define CPU_HAS_3DNOW   0x00000010
    5.21 -#define CPU_HAS_3DNOWEXT 0x00000020
    5.22 -#define CPU_HAS_SSE     0x00000040
    5.23 -#define CPU_HAS_SSE2    0x00000080
    5.24 -#define CPU_HAS_ALTIVEC 0x00000100
    5.25 +#define CPU_HAS_SSE     0x00000010
    5.26 +#define CPU_HAS_SSE2    0x00000020
    5.27 +#define CPU_HAS_SSE3    0x00000040
    5.28 +#define CPU_HAS_SSE4    0x00000080
    5.29  
    5.30 -#if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__
    5.31 -/* This is the brute force way of detecting instruction sets...
    5.32 -   the idea is borrowed from the libmpeg2 library - thanks!
    5.33 - */
    5.34 -static jmp_buf jmpbuf;
    5.35 -static void
    5.36 -illegal_instruction(int sig)
    5.37 -{
    5.38 -    longjmp(jmpbuf, 1);
    5.39 -}
    5.40 -#endif /* HAVE_SETJMP */
    5.41  
    5.42  static __inline__ int
    5.43  CPU_haveCPUID(void)
    5.44 @@ -202,20 +183,6 @@
    5.45  }
    5.46  
    5.47  static __inline__ int
    5.48 -CPU_getCPUIDFeaturesExt(void)
    5.49 -{
    5.50 -    int features = 0;
    5.51 -    int a, b, c, d;
    5.52 -
    5.53 -    cpuid(0x80000000, a, b, c, d);
    5.54 -    if (a >= 0x80000001) {
    5.55 -        cpuid(0x80000001, a, b, c, d);
    5.56 -        features = d;
    5.57 -    }
    5.58 -    return features;
    5.59 -}
    5.60 -
    5.61 -static __inline__ int
    5.62  CPU_haveRDTSC(void)
    5.63  {
    5.64      if (CPU_haveCPUID()) {
    5.65 @@ -234,33 +201,6 @@
    5.66  }
    5.67  
    5.68  static __inline__ int
    5.69 -CPU_haveMMXExt(void)
    5.70 -{
    5.71 -    if (CPU_haveCPUID()) {
    5.72 -        return (CPU_getCPUIDFeaturesExt() & 0x00400000);
    5.73 -    }
    5.74 -    return 0;
    5.75 -}
    5.76 -
    5.77 -static __inline__ int
    5.78 -CPU_have3DNow(void)
    5.79 -{
    5.80 -    if (CPU_haveCPUID()) {
    5.81 -        return (CPU_getCPUIDFeaturesExt() & 0x80000000);
    5.82 -    }
    5.83 -    return 0;
    5.84 -}
    5.85 -
    5.86 -static __inline__ int
    5.87 -CPU_have3DNowExt(void)
    5.88 -{
    5.89 -    if (CPU_haveCPUID()) {
    5.90 -        return (CPU_getCPUIDFeaturesExt() & 0x40000000);
    5.91 -    }
    5.92 -    return 0;
    5.93 -}
    5.94 -
    5.95 -static __inline__ int
    5.96  CPU_haveSSE(void)
    5.97  {
    5.98      if (CPU_haveCPUID()) {
    5.99 @@ -279,26 +219,33 @@
   5.100  }
   5.101  
   5.102  static __inline__ int
   5.103 -CPU_haveAltiVec(void)
   5.104 +CPU_haveSSE3(void)
   5.105  {
   5.106 -    volatile int altivec = 0;
   5.107 -#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
   5.108 -    int selectors[2] = { CTL_HW, HW_VECTORUNIT };
   5.109 -    int hasVectorUnit = 0;
   5.110 -    size_t length = sizeof(hasVectorUnit);
   5.111 -    int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
   5.112 -    if (0 == error)
   5.113 -        altivec = (hasVectorUnit != 0);
   5.114 -#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
   5.115 -    void (*handler) (int sig);
   5.116 -    handler = signal(SIGILL, illegal_instruction);
   5.117 -    if (setjmp(jmpbuf) == 0) {
   5.118 -        asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
   5.119 -        altivec = 1;
   5.120 +    if (CPU_haveCPUID()) {
   5.121 +        int a, b, c, d;
   5.122 +
   5.123 +        cpuid(0, a, b, c, d);
   5.124 +        if (a >= 1) {
   5.125 +            cpuid(1, a, b, c, d);
   5.126 +            return (c & 0x00000001);
   5.127 +        }
   5.128      }
   5.129 -    signal(SIGILL, handler);
   5.130 -#endif
   5.131 -    return altivec;
   5.132 +    return 0;
   5.133 +}
   5.134 +
   5.135 +static __inline__ int
   5.136 +CPU_haveSSE4(void)
   5.137 +{
   5.138 +    if (CPU_haveCPUID()) {
   5.139 +        int a, b, c, d;
   5.140 +
   5.141 +        cpuid(0, a, b, c, d);
   5.142 +        if (a >= 1) {
   5.143 +            cpuid(1, a, b, c, d);
   5.144 +            return (c & 0x00000100);
   5.145 +        }
   5.146 +    }
   5.147 +    return 0;
   5.148  }
   5.149  
   5.150  static int SDL_CPUCount = 0;
   5.151 @@ -471,23 +418,17 @@
   5.152          if (CPU_haveMMX()) {
   5.153              SDL_CPUFeatures |= CPU_HAS_MMX;
   5.154          }
   5.155 -        if (CPU_haveMMXExt()) {
   5.156 -            SDL_CPUFeatures |= CPU_HAS_MMXEXT;
   5.157 -        }
   5.158 -        if (CPU_have3DNow()) {
   5.159 -            SDL_CPUFeatures |= CPU_HAS_3DNOW;
   5.160 -        }
   5.161 -        if (CPU_have3DNowExt()) {
   5.162 -            SDL_CPUFeatures |= CPU_HAS_3DNOWEXT;
   5.163 -        }
   5.164          if (CPU_haveSSE()) {
   5.165              SDL_CPUFeatures |= CPU_HAS_SSE;
   5.166          }
   5.167          if (CPU_haveSSE2()) {
   5.168              SDL_CPUFeatures |= CPU_HAS_SSE2;
   5.169          }
   5.170 -        if (CPU_haveAltiVec()) {
   5.171 -            SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
   5.172 +        if (CPU_haveSSE3()) {
   5.173 +            SDL_CPUFeatures |= CPU_HAS_SSE3;
   5.174 +        }
   5.175 +        if (CPU_haveSSE4()) {
   5.176 +            SDL_CPUFeatures |= CPU_HAS_SSE4;
   5.177          }
   5.178      }
   5.179      return SDL_CPUFeatures;
   5.180 @@ -512,33 +453,6 @@
   5.181  }
   5.182  
   5.183  SDL_bool
   5.184 -SDL_HasMMXExt(void)
   5.185 -{
   5.186 -    if (SDL_GetCPUFeatures() & CPU_HAS_MMXEXT) {
   5.187 -        return SDL_TRUE;
   5.188 -    }
   5.189 -    return SDL_FALSE;
   5.190 -}
   5.191 -
   5.192 -SDL_bool
   5.193 -SDL_Has3DNow(void)
   5.194 -{
   5.195 -    if (SDL_GetCPUFeatures() & CPU_HAS_3DNOW) {
   5.196 -        return SDL_TRUE;
   5.197 -    }
   5.198 -    return SDL_FALSE;
   5.199 -}
   5.200 -
   5.201 -SDL_bool
   5.202 -SDL_Has3DNowExt(void)
   5.203 -{
   5.204 -    if (SDL_GetCPUFeatures() & CPU_HAS_3DNOWEXT) {
   5.205 -        return SDL_TRUE;
   5.206 -    }
   5.207 -    return SDL_FALSE;
   5.208 -}
   5.209 -
   5.210 -SDL_bool
   5.211  SDL_HasSSE(void)
   5.212  {
   5.213      if (SDL_GetCPUFeatures() & CPU_HAS_SSE) {
   5.214 @@ -557,9 +471,18 @@
   5.215  }
   5.216  
   5.217  SDL_bool
   5.218 -SDL_HasAltiVec(void)
   5.219 +SDL_HasSSE3(void)
   5.220  {
   5.221 -    if (SDL_GetCPUFeatures() & CPU_HAS_ALTIVEC) {
   5.222 +    if (SDL_GetCPUFeatures() & CPU_HAS_SSE3) {
   5.223 +        return SDL_TRUE;
   5.224 +    }
   5.225 +    return SDL_FALSE;
   5.226 +}
   5.227 +
   5.228 +SDL_bool
   5.229 +SDL_HasSSE4(void)
   5.230 +{
   5.231 +    if (SDL_GetCPUFeatures() & CPU_HAS_SSE4) {
   5.232          return SDL_TRUE;
   5.233      }
   5.234      return SDL_FALSE;
   5.235 @@ -578,12 +501,10 @@
   5.236      printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
   5.237      printf("RDTSC: %d\n", SDL_HasRDTSC());
   5.238      printf("MMX: %d\n", SDL_HasMMX());
   5.239 -    printf("MMXExt: %d\n", SDL_HasMMXExt());
   5.240 -    printf("3DNow: %d\n", SDL_Has3DNow());
   5.241 -    printf("3DNowExt: %d\n", SDL_Has3DNowExt());
   5.242      printf("SSE: %d\n", SDL_HasSSE());
   5.243      printf("SSE2: %d\n", SDL_HasSSE2());
   5.244 -    printf("AltiVec: %d\n", SDL_HasAltiVec());
   5.245 +    printf("SSE3: %d\n", SDL_HasSSE3());
   5.246 +    printf("SSE4: %d\n", SDL_HasSSE4());
   5.247      return 0;
   5.248  }
   5.249  
     6.1 --- a/src/video/SDL_blit.c	Fri Feb 11 14:42:58 2011 -0800
     6.2 +++ b/src/video/SDL_blit.c	Fri Feb 11 14:51:04 2011 -0800
     6.3 @@ -100,30 +100,6 @@
     6.4      return (okay ? 0 : -1);
     6.5  }
     6.6  
     6.7 -#ifdef __MACOSX__
     6.8 -#include <sys/sysctl.h>
     6.9 -
    6.10 -static SDL_bool
    6.11 -SDL_UseAltivecPrefetch()
    6.12 -{
    6.13 -    const char key[] = "hw.l3cachesize";
    6.14 -    u_int64_t result = 0;
    6.15 -    size_t typeSize = sizeof(result);
    6.16 -
    6.17 -    if (sysctlbyname(key, &result, &typeSize, NULL, 0) == 0 && result > 0) {
    6.18 -        return SDL_TRUE;
    6.19 -    } else {
    6.20 -        return SDL_FALSE;
    6.21 -    }
    6.22 -}
    6.23 -#else
    6.24 -static SDL_bool
    6.25 -SDL_UseAltivecPrefetch()
    6.26 -{
    6.27 -    /* Just guess G4 */
    6.28 -    return SDL_TRUE;
    6.29 -}
    6.30 -#endif /* __MACOSX__ */
    6.31  
    6.32  static SDL_BlitFunc
    6.33  SDL_ChooseBlitFunc(Uint32 src_format, Uint32 dst_format, int flags,
    6.34 @@ -145,22 +121,12 @@
    6.35              if (SDL_HasMMX()) {
    6.36                  features |= SDL_CPU_MMX;
    6.37              }
    6.38 -            if (SDL_Has3DNow()) {
    6.39 -                features |= SDL_CPU_3DNOW;
    6.40 -            }
    6.41              if (SDL_HasSSE()) {
    6.42                  features |= SDL_CPU_SSE;
    6.43              }
    6.44              if (SDL_HasSSE2()) {
    6.45                  features |= SDL_CPU_SSE2;
    6.46              }
    6.47 -            if (SDL_HasAltiVec()) {
    6.48 -                if (SDL_UseAltivecPrefetch()) {
    6.49 -                    features |= SDL_CPU_ALTIVEC_PREFETCH;
    6.50 -                } else {
    6.51 -                    features |= SDL_CPU_ALTIVEC_NOPREFETCH;
    6.52 -                }
    6.53 -            }
    6.54          }
    6.55      }
    6.56  
     7.1 --- a/src/video/SDL_blit.h	Fri Feb 11 14:42:58 2011 -0800
     7.2 +++ b/src/video/SDL_blit.h	Fri Feb 11 14:51:04 2011 -0800
     7.3 @@ -34,9 +34,6 @@
     7.4  #ifdef __MMX__
     7.5  #include <mmintrin.h>
     7.6  #endif
     7.7 -#ifdef __3dNOW__
     7.8 -#include <mm3dnow.h>
     7.9 -#endif
    7.10  #ifdef __SSE__
    7.11  #include <xmmintrin.h>
    7.12  #endif
    7.13 @@ -65,11 +62,8 @@
    7.14  /* SDL blit CPU flags */
    7.15  #define SDL_CPU_ANY                 0x00000000
    7.16  #define SDL_CPU_MMX                 0x00000001
    7.17 -#define SDL_CPU_3DNOW               0x00000002
    7.18  #define SDL_CPU_SSE                 0x00000004
    7.19  #define SDL_CPU_SSE2                0x00000008
    7.20 -#define SDL_CPU_ALTIVEC_PREFETCH    0x00000010
    7.21 -#define SDL_CPU_ALTIVEC_NOPREFETCH  0x00000020
    7.22  
    7.23  typedef struct
    7.24  {
     8.1 --- a/src/video/SDL_blit_A.c	Fri Feb 11 14:42:58 2011 -0800
     8.2 +++ b/src/video/SDL_blit_A.c	Fri Feb 11 14:51:04 2011 -0800
     8.3 @@ -419,806 +419,6 @@
     8.4  
     8.5  #endif /* __MMX__ */
     8.6  
     8.7 -#if SDL_ALTIVEC_BLITTERS
     8.8 -#if __MWERKS__
     8.9 -#pragma altivec_model on
    8.10 -#endif
    8.11 -#if HAVE_ALTIVEC_H
    8.12 -#include <altivec.h>
    8.13 -#endif
    8.14 -#include <assert.h>
    8.15 -
    8.16 -#if (defined(__MACOSX__) && (__GNUC__ < 4))
    8.17 -#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    8.18 -        (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    8.19 -#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    8.20 -        (vector unsigned short) ( a,b,c,d,e,f,g,h )
    8.21 -#else
    8.22 -#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    8.23 -        (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    8.24 -#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    8.25 -        (vector unsigned short) { a,b,c,d,e,f,g,h }
    8.26 -#endif
    8.27 -
    8.28 -#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    8.29 -#define VECPRINT(msg, v) do { \
    8.30 -    vector unsigned int tmpvec = (vector unsigned int)(v); \
    8.31 -    unsigned int *vp = (unsigned int *)&tmpvec; \
    8.32 -    printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \
    8.33 -} while (0)
    8.34 -
    8.35 -/* the permuation vector that takes the high bytes out of all the appropriate shorts 
    8.36 -    (vector unsigned char)(
    8.37 -        0x00, 0x10, 0x02, 0x12,
    8.38 -        0x04, 0x14, 0x06, 0x16,
    8.39 -        0x08, 0x18, 0x0A, 0x1A,
    8.40 -        0x0C, 0x1C, 0x0E, 0x1E );
    8.41 -*/
    8.42 -#define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F)))
    8.43 -#define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12)))
    8.44 -#define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24()))
    8.45 -#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    8.46 -    ? vec_lvsl(0, src) \
    8.47 -    : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
    8.48 -
    8.49 -
    8.50 -#define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \
    8.51 -    /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \
    8.52 -    vector unsigned short vtemp1 = vec_mule(vs, valpha); \
    8.53 -    /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \
    8.54 -    vector unsigned short vtemp2 = vec_mulo(vs, valpha); \
    8.55 -    /* valpha2 is 255-alpha */ \
    8.56 -    vector unsigned char valpha2 = vec_nor(valpha, valpha); \
    8.57 -    /* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \
    8.58 -    vector unsigned short vtemp3 = vec_mule(vd, valpha2); \
    8.59 -    /* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \
    8.60 -    vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \
    8.61 -    /* add source and dest */ \
    8.62 -    vtemp1 = vec_add(vtemp1, vtemp3); \
    8.63 -    vtemp2 = vec_add(vtemp2, vtemp4); \
    8.64 -    /* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \
    8.65 -    vtemp1 = vec_add(vtemp1, v1_16); \
    8.66 -    vtemp3 = vec_sr(vtemp1, v8_16); \
    8.67 -    vtemp1 = vec_add(vtemp1, vtemp3); \
    8.68 -    /* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \
    8.69 -    vtemp2 = vec_add(vtemp2, v1_16); \
    8.70 -    vtemp4 = vec_sr(vtemp2, v8_16); \
    8.71 -    vtemp2 = vec_add(vtemp2, vtemp4); \
    8.72 -    /* (>>8) and get ARGBARGBARGBARGB */ \
    8.73 -    vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \
    8.74 -} while (0)
    8.75 -
    8.76 -/* Calculate the permute vector used for 32->32 swizzling */
    8.77 -static vector unsigned char
    8.78 -calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
    8.79 -{
    8.80 -    /*
    8.81 -     * We have to assume that the bits that aren't used by other
    8.82 -     *  colors is alpha, and it's one complete byte, since some formats
    8.83 -     *  leave alpha with a zero mask, but we should still swizzle the bits.
    8.84 -     */
    8.85 -    /* ARGB */
    8.86 -    const static struct SDL_PixelFormat default_pixel_format = {
    8.87 -        NULL, 0, 0,
    8.88 -        0, 0, 0, 0,
    8.89 -        16, 8, 0, 24,
    8.90 -        0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000
    8.91 -    };
    8.92 -    if (!srcfmt) {
    8.93 -        srcfmt = &default_pixel_format;
    8.94 -    }
    8.95 -    if (!dstfmt) {
    8.96 -        dstfmt = &default_pixel_format;
    8.97 -    }
    8.98 -    const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
    8.99 -                                                       0x04, 0x04, 0x04, 0x04,
   8.100 -                                                       0x08, 0x08, 0x08, 0x08,
   8.101 -                                                       0x0C, 0x0C, 0x0C,
   8.102 -                                                       0x0C);
   8.103 -    vector unsigned char vswiz;
   8.104 -    vector unsigned int srcvec;
   8.105 -#define RESHIFT(X) (3 - ((X) >> 3))
   8.106 -    Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   8.107 -    Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   8.108 -    Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   8.109 -    Uint32 amask;
   8.110 -    /* Use zero for alpha if either surface doesn't have alpha */
   8.111 -    if (dstfmt->Amask) {
   8.112 -        amask =
   8.113 -            ((srcfmt->Amask) ? RESHIFT(srcfmt->
   8.114 -                                       Ashift) : 0x10) << (dstfmt->Ashift);
   8.115 -    } else {
   8.116 -        amask =
   8.117 -            0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
   8.118 -                          0xFFFFFFFF);
   8.119 -    }
   8.120 -#undef RESHIFT
   8.121 -    ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
   8.122 -    vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
   8.123 -    return (vswiz);
   8.124 -}
   8.125 -
   8.126 -static void
   8.127 -Blit32to565PixelAlphaAltivec(SDL_BlitInfo * info)
   8.128 -{
   8.129 -    int height = info->dst_h;
   8.130 -    Uint8 *src = (Uint8 *) info->src;
   8.131 -    int srcskip = info->src_skip;
   8.132 -    Uint8 *dst = (Uint8 *) info->dst;
   8.133 -    int dstskip = info->dst_skip;
   8.134 -    SDL_PixelFormat *srcfmt = info->src_fmt;
   8.135 -
   8.136 -    vector unsigned char v0 = vec_splat_u8(0);
   8.137 -    vector unsigned short v8_16 = vec_splat_u16(8);
   8.138 -    vector unsigned short v1_16 = vec_splat_u16(1);
   8.139 -    vector unsigned short v2_16 = vec_splat_u16(2);
   8.140 -    vector unsigned short v3_16 = vec_splat_u16(3);
   8.141 -    vector unsigned int v8_32 = vec_splat_u32(8);
   8.142 -    vector unsigned int v16_32 = vec_add(v8_32, v8_32);
   8.143 -    vector unsigned short v3f =
   8.144 -        VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
   8.145 -                          0x003f, 0x003f, 0x003f, 0x003f);
   8.146 -    vector unsigned short vfc =
   8.147 -        VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
   8.148 -                          0x00fc, 0x00fc, 0x00fc, 0x00fc);
   8.149 -
   8.150 -    /* 
   8.151 -       0x10 - 0x1f is the alpha
   8.152 -       0x00 - 0x0e evens are the red
   8.153 -       0x01 - 0x0f odds are zero
   8.154 -     */
   8.155 -    vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   8.156 -                                                       0x10, 0x02, 0x01, 0x01,
   8.157 -                                                       0x10, 0x04, 0x01, 0x01,
   8.158 -                                                       0x10, 0x06, 0x01,
   8.159 -                                                       0x01);
   8.160 -    vector unsigned char vredalpha2 =
   8.161 -        (vector unsigned char) (vec_add((vector unsigned int) vredalpha1,
   8.162 -                                        vec_sl(v8_32, v16_32))
   8.163 -        );
   8.164 -    /*
   8.165 -       0x00 - 0x0f is ARxx ARxx ARxx ARxx
   8.166 -       0x11 - 0x0f odds are blue
   8.167 -     */
   8.168 -    vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   8.169 -                                                   0x04, 0x05, 0x06, 0x13,
   8.170 -                                                   0x08, 0x09, 0x0a, 0x15,
   8.171 -                                                   0x0c, 0x0d, 0x0e, 0x17);
   8.172 -    vector unsigned char vblue2 =
   8.173 -        (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8_32)
   8.174 -        );
   8.175 -    /*
   8.176 -       0x00 - 0x0f is ARxB ARxB ARxB ARxB
   8.177 -       0x10 - 0x0e evens are green
   8.178 -     */
   8.179 -    vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   8.180 -                                                    0x04, 0x05, 0x12, 0x07,
   8.181 -                                                    0x08, 0x09, 0x14, 0x0b,
   8.182 -                                                    0x0c, 0x0d, 0x16, 0x0f);
   8.183 -    vector unsigned char vgreen2 =
   8.184 -        (vector unsigned
   8.185 -         char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8_32, v8_32))
   8.186 -        );
   8.187 -    vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
   8.188 -                                                    0x00, 0x0a, 0x00, 0x0e,
   8.189 -                                                    0x00, 0x12, 0x00, 0x16,
   8.190 -                                                    0x00, 0x1a, 0x00, 0x1e);
   8.191 -    vector unsigned char mergePermute = VEC_MERGE_PERMUTE();
   8.192 -    vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   8.193 -    vector unsigned char valphaPermute =
   8.194 -        vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
   8.195 -
   8.196 -    vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
   8.197 -    vf800 = vec_sl(vf800, vec_splat_u16(8));
   8.198 -
   8.199 -    while (height--) {
   8.200 -        int extrawidth;
   8.201 -        vector unsigned char valigner;
   8.202 -        vector unsigned char vsrc;
   8.203 -        vector unsigned char voverflow;
   8.204 -        int width = info->dst_w;
   8.205 -
   8.206 -#define ONE_PIXEL_BLEND(condition, widthvar) \
   8.207 -        while (condition) { \
   8.208 -            Uint32 Pixel; \
   8.209 -            unsigned sR, sG, sB, dR, dG, dB, sA; \
   8.210 -            DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \
   8.211 -            if(sA) { \
   8.212 -                unsigned short dstpixel = *((unsigned short *)dst); \
   8.213 -                dR = (dstpixel >> 8) & 0xf8; \
   8.214 -                dG = (dstpixel >> 3) & 0xfc; \
   8.215 -                dB = (dstpixel << 3) & 0xf8; \
   8.216 -                ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
   8.217 -                *((unsigned short *)dst) = ( \
   8.218 -                    ((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \
   8.219 -                ); \
   8.220 -            } \
   8.221 -            src += 4; \
   8.222 -            dst += 2; \
   8.223 -            widthvar--; \
   8.224 -        }
   8.225 -        ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width);
   8.226 -        extrawidth = (width % 8);
   8.227 -        valigner = VEC_ALIGNER(src);
   8.228 -        vsrc = (vector unsigned char) vec_ld(0, src);
   8.229 -        width -= extrawidth;
   8.230 -        while (width) {
   8.231 -            vector unsigned char valpha;
   8.232 -            vector unsigned char vsrc1, vsrc2;
   8.233 -            vector unsigned char vdst1, vdst2;
   8.234 -            vector unsigned short vR, vG, vB;
   8.235 -            vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   8.236 -
   8.237 -            /* Load 8 pixels from src as ARGB */
   8.238 -            voverflow = (vector unsigned char) vec_ld(15, src);
   8.239 -            vsrc = vec_perm(vsrc, voverflow, valigner);
   8.240 -            vsrc1 = vec_perm(vsrc, vsrc, vpermute);
   8.241 -            src += 16;
   8.242 -            vsrc = (vector unsigned char) vec_ld(15, src);
   8.243 -            voverflow = vec_perm(voverflow, vsrc, valigner);
   8.244 -            vsrc2 = vec_perm(voverflow, voverflow, vpermute);
   8.245 -            src += 16;
   8.246 -
   8.247 -            /* Load 8 pixels from dst as XRGB */
   8.248 -            voverflow = vec_ld(0, dst);
   8.249 -            vR = vec_and((vector unsigned short) voverflow, vf800);
   8.250 -            vB = vec_sl((vector unsigned short) voverflow, v3_16);
   8.251 -            vG = vec_sl(vB, v2_16);
   8.252 -            vdst1 =
   8.253 -                (vector unsigned char) vec_perm((vector unsigned char) vR,
   8.254 -                                                (vector unsigned char) vR,
   8.255 -                                                vredalpha1);
   8.256 -            vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   8.257 -            vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   8.258 -            vdst2 =
   8.259 -                (vector unsigned char) vec_perm((vector unsigned char) vR,
   8.260 -                                                (vector unsigned char) vR,
   8.261 -                                                vredalpha2);
   8.262 -            vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   8.263 -            vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   8.264 -
   8.265 -            /* Alpha blend 8 pixels as ARGB */
   8.266 -            valpha = vec_perm(vsrc1, v0, valphaPermute);
   8.267 -            VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16,
   8.268 -                               v8_16);
   8.269 -            valpha = vec_perm(vsrc2, v0, valphaPermute);
   8.270 -            VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16,
   8.271 -                               v8_16);
   8.272 -
   8.273 -            /* Convert 8 pixels to 565 */
   8.274 -            vpixel = (vector unsigned short) vec_packpx((vector unsigned int)
   8.275 -                                                        vdst1,
   8.276 -                                                        (vector unsigned int)
   8.277 -                                                        vdst2);
   8.278 -            vgpixel = (vector unsigned short) vec_perm(vdst1, vdst2, vgmerge);
   8.279 -            vgpixel = vec_and(vgpixel, vfc);
   8.280 -            vgpixel = vec_sl(vgpixel, v3_16);
   8.281 -            vrpixel = vec_sl(vpixel, v1_16);
   8.282 -            vrpixel = vec_and(vrpixel, vf800);
   8.283 -            vbpixel = vec_and(vpixel, v3f);
   8.284 -            vdst1 =
   8.285 -                vec_or((vector unsigned char) vrpixel,
   8.286 -                       (vector unsigned char) vgpixel);
   8.287 -            vdst1 = vec_or(vdst1, (vector unsigned char) vbpixel);
   8.288 -
   8.289 -            /* Store 8 pixels */
   8.290 -            vec_st(vdst1, 0, dst);
   8.291 -
   8.292 -            width -= 8;
   8.293 -            dst += 16;
   8.294 -        }
   8.295 -        ONE_PIXEL_BLEND((extrawidth), extrawidth);
   8.296 -#undef ONE_PIXEL_BLEND
   8.297 -        src += srcskip;
   8.298 -        dst += dstskip;
   8.299 -    }
   8.300 -}
   8.301 -
   8.302 -static void
   8.303 -Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo * info)
   8.304 -{
   8.305 -    int height = info->dst_h;
   8.306 -    Uint32 *srcp = (Uint32 *) info->src;
   8.307 -    int srcskip = info->src_skip >> 2;
   8.308 -    Uint32 *dstp = (Uint32 *) info->dst;
   8.309 -    int dstskip = info->dst_skip >> 2;
   8.310 -    SDL_PixelFormat *srcfmt = info->src_fmt;
   8.311 -    SDL_PixelFormat *dstfmt = info->dst_fmt;
   8.312 -    unsigned sA = info->a;
   8.313 -    unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
   8.314 -    Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   8.315 -    Uint32 ckey = info->colorkey;
   8.316 -    vector unsigned char mergePermute;
   8.317 -    vector unsigned char vsrcPermute;
   8.318 -    vector unsigned char vdstPermute;
   8.319 -    vector unsigned char vsdstPermute;
   8.320 -    vector unsigned char valpha;
   8.321 -    vector unsigned char valphamask;
   8.322 -    vector unsigned char vbits;
   8.323 -    vector unsigned char v0;
   8.324 -    vector unsigned short v1;
   8.325 -    vector unsigned short v8;
   8.326 -    vector unsigned int vckey;
   8.327 -    vector unsigned int vrgbmask;
   8.328 -
   8.329 -    mergePermute = VEC_MERGE_PERMUTE();
   8.330 -    v0 = vec_splat_u8(0);
   8.331 -    v1 = vec_splat_u16(1);
   8.332 -    v8 = vec_splat_u16(8);
   8.333 -
   8.334 -    /* set the alpha to 255 on the destination surf */
   8.335 -    valphamask = VEC_ALPHA_MASK();
   8.336 -
   8.337 -    vsrcPermute = calc_swizzle32(srcfmt, NULL);
   8.338 -    vdstPermute = calc_swizzle32(NULL, dstfmt);
   8.339 -    vsdstPermute = calc_swizzle32(dstfmt, NULL);
   8.340 -
   8.341 -    /* set a vector full of alpha and 255-alpha */
   8.342 -    ((unsigned char *) &valpha)[0] = sA;
   8.343 -    valpha = vec_splat(valpha, 0);
   8.344 -    vbits = (vector unsigned char) vec_splat_s8(-1);
   8.345 -
   8.346 -    ckey &= rgbmask;
   8.347 -    ((unsigned int *) (char *) &vckey)[0] = ckey;
   8.348 -    vckey = vec_splat(vckey, 0);
   8.349 -    ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
   8.350 -    vrgbmask = vec_splat(vrgbmask, 0);
   8.351 -
   8.352 -    while (height--) {
   8.353 -        int width = info->dst_w;
   8.354 -#define ONE_PIXEL_BLEND(condition, widthvar) \
   8.355 -        while (condition) { \
   8.356 -            Uint32 Pixel; \
   8.357 -            unsigned sR, sG, sB, dR, dG, dB; \
   8.358 -            RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \
   8.359 -            if(sA && Pixel != ckey) { \
   8.360 -                RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   8.361 -                DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
   8.362 -                ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
   8.363 -                ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
   8.364 -            } \
   8.365 -            dstp++; \
   8.366 -            srcp++; \
   8.367 -            widthvar--; \
   8.368 -        }
   8.369 -        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   8.370 -        if (width > 0) {
   8.371 -            int extrawidth = (width % 4);
   8.372 -            vector unsigned char valigner = VEC_ALIGNER(srcp);
   8.373 -            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
   8.374 -            width -= extrawidth;
   8.375 -            while (width) {
   8.376 -                vector unsigned char vsel;
   8.377 -                vector unsigned char voverflow;
   8.378 -                vector unsigned char vd;
   8.379 -                vector unsigned char vd_orig;
   8.380 -
   8.381 -                /* s = *srcp */
   8.382 -                voverflow = (vector unsigned char) vec_ld(15, srcp);
   8.383 -                vs = vec_perm(vs, voverflow, valigner);
   8.384 -
   8.385 -                /* vsel is set for items that match the key */
   8.386 -                vsel =
   8.387 -                    (vector unsigned char) vec_and((vector unsigned int) vs,
   8.388 -                                                   vrgbmask);
   8.389 -                vsel = (vector unsigned char) vec_cmpeq((vector unsigned int)
   8.390 -                                                        vsel, vckey);
   8.391 -
   8.392 -                /* permute to source format */
   8.393 -                vs = vec_perm(vs, valpha, vsrcPermute);
   8.394 -
   8.395 -                /* d = *dstp */
   8.396 -                vd = (vector unsigned char) vec_ld(0, dstp);
   8.397 -                vd_orig = vd = vec_perm(vd, v0, vsdstPermute);
   8.398 -
   8.399 -                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
   8.400 -
   8.401 -                /* set the alpha channel to full on */
   8.402 -                vd = vec_or(vd, valphamask);
   8.403 -
   8.404 -                /* mask out color key */
   8.405 -                vd = vec_sel(vd, vd_orig, vsel);
   8.406 -
   8.407 -                /* permute to dest format */
   8.408 -                vd = vec_perm(vd, vbits, vdstPermute);
   8.409 -
   8.410 -                /* *dstp = res */
   8.411 -                vec_st((vector unsigned int) vd, 0, dstp);
   8.412 -
   8.413 -                srcp += 4;
   8.414 -                dstp += 4;
   8.415 -                width -= 4;
   8.416 -                vs = voverflow;
   8.417 -            }
   8.418 -            ONE_PIXEL_BLEND((extrawidth), extrawidth);
   8.419 -        }
   8.420 -#undef ONE_PIXEL_BLEND
   8.421 -
   8.422 -        srcp += srcskip;
   8.423 -        dstp += dstskip;
   8.424 -    }
   8.425 -}
   8.426 -
   8.427 -
   8.428 -static void
   8.429 -Blit32to32PixelAlphaAltivec(SDL_BlitInfo * info)
   8.430 -{
   8.431 -    int width = info->dst_w;
   8.432 -    int height = info->dst_h;
   8.433 -    Uint32 *srcp = (Uint32 *) info->src;
   8.434 -    int srcskip = info->src_skip >> 2;
   8.435 -    Uint32 *dstp = (Uint32 *) info->dst;
   8.436 -    int dstskip = info->dst_skip >> 2;
   8.437 -    SDL_PixelFormat *srcfmt = info->src_fmt;
   8.438 -    SDL_PixelFormat *dstfmt = info->dst_fmt;
   8.439 -    vector unsigned char mergePermute;
   8.440 -    vector unsigned char valphaPermute;
   8.441 -    vector unsigned char vsrcPermute;
   8.442 -    vector unsigned char vdstPermute;
   8.443 -    vector unsigned char vsdstPermute;
   8.444 -    vector unsigned char valphamask;
   8.445 -    vector unsigned char vpixelmask;
   8.446 -    vector unsigned char v0;
   8.447 -    vector unsigned short v1;
   8.448 -    vector unsigned short v8;
   8.449 -
   8.450 -    v0 = vec_splat_u8(0);
   8.451 -    v1 = vec_splat_u16(1);
   8.452 -    v8 = vec_splat_u16(8);
   8.453 -    mergePermute = VEC_MERGE_PERMUTE();
   8.454 -    valphamask = VEC_ALPHA_MASK();
   8.455 -    valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
   8.456 -    vpixelmask = vec_nor(valphamask, v0);
   8.457 -    vsrcPermute = calc_swizzle32(srcfmt, NULL);
   8.458 -    vdstPermute = calc_swizzle32(NULL, dstfmt);
   8.459 -    vsdstPermute = calc_swizzle32(dstfmt, NULL);
   8.460 -
   8.461 -    while (height--) {
   8.462 -        width = info->dst_w;
   8.463 -#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
   8.464 -            Uint32 Pixel; \
   8.465 -            unsigned sR, sG, sB, dR, dG, dB, sA, dA; \
   8.466 -            DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \
   8.467 -            if(sA) { \
   8.468 -              DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \
   8.469 -              ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
   8.470 -              ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \
   8.471 -            } \
   8.472 -            ++srcp; \
   8.473 -            ++dstp; \
   8.474 -            widthvar--; \
   8.475 -        }
   8.476 -        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   8.477 -        if (width > 0) {
   8.478 -            /* vsrcPermute */
   8.479 -            /* vdstPermute */
   8.480 -            int extrawidth = (width % 4);
   8.481 -            vector unsigned char valigner = VEC_ALIGNER(srcp);
   8.482 -            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
   8.483 -            width -= extrawidth;
   8.484 -            while (width) {
   8.485 -                vector unsigned char voverflow;
   8.486 -                vector unsigned char vd;
   8.487 -                vector unsigned char valpha;
   8.488 -                vector unsigned char vdstalpha;
   8.489 -                /* s = *srcp */
   8.490 -                voverflow = (vector unsigned char) vec_ld(15, srcp);
   8.491 -                vs = vec_perm(vs, voverflow, valigner);
   8.492 -                vs = vec_perm(vs, v0, vsrcPermute);
   8.493 -
   8.494 -                valpha = vec_perm(vs, v0, valphaPermute);
   8.495 -
   8.496 -                /* d = *dstp */
   8.497 -                vd = (vector unsigned char) vec_ld(0, dstp);
   8.498 -                vd = vec_perm(vd, v0, vsdstPermute);
   8.499 -                vdstalpha = vec_and(vd, valphamask);
   8.500 -
   8.501 -                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
   8.502 -
   8.503 -                /* set the alpha to the dest alpha */
   8.504 -                vd = vec_and(vd, vpixelmask);
   8.505 -                vd = vec_or(vd, vdstalpha);
   8.506 -                vd = vec_perm(vd, v0, vdstPermute);
   8.507 -
   8.508 -                /* *dstp = res */
   8.509 -                vec_st((vector unsigned int) vd, 0, dstp);
   8.510 -
   8.511 -                srcp += 4;
   8.512 -                dstp += 4;
   8.513 -                width -= 4;
   8.514 -                vs = voverflow;
   8.515 -
   8.516 -            }
   8.517 -            ONE_PIXEL_BLEND((extrawidth), extrawidth);
   8.518 -        }
   8.519 -        srcp += srcskip;
   8.520 -        dstp += dstskip;
   8.521 -#undef ONE_PIXEL_BLEND
   8.522 -    }
   8.523 -}
   8.524 -
   8.525 -/* fast ARGB888->(A)RGB888 blending with pixel alpha */
   8.526 -static void
   8.527 -BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo * info)
   8.528 -{
   8.529 -    int width = info->dst_w;
   8.530 -    int height = info->dst_h;
   8.531 -    Uint32 *srcp = (Uint32 *) info->src;
   8.532 -    int srcskip = info->src_skip >> 2;
   8.533 -    Uint32 *dstp = (Uint32 *) info->dst;
   8.534 -    int dstskip = info->dst_skip >> 2;
   8.535 -    vector unsigned char mergePermute;
   8.536 -    vector unsigned char valphaPermute;
   8.537 -    vector unsigned char valphamask;
   8.538 -    vector unsigned char vpixelmask;
   8.539 -    vector unsigned char v0;
   8.540 -    vector unsigned short v1;
   8.541 -    vector unsigned short v8;
   8.542 -    v0 = vec_splat_u8(0);
   8.543 -    v1 = vec_splat_u16(1);
   8.544 -    v8 = vec_splat_u16(8);
   8.545 -    mergePermute = VEC_MERGE_PERMUTE();
   8.546 -    valphamask = VEC_ALPHA_MASK();
   8.547 -    valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
   8.548 -
   8.549 -
   8.550 -    vpixelmask = vec_nor(valphamask, v0);
   8.551 -    while (height--) {
   8.552 -        width = info->dst_w;
   8.553 -#define ONE_PIXEL_BLEND(condition, widthvar) \
   8.554 -        while ((condition)) { \
   8.555 -            Uint32 dalpha; \
   8.556 -            Uint32 d; \
   8.557 -            Uint32 s1; \
   8.558 -            Uint32 d1; \
   8.559 -            Uint32 s = *srcp; \
   8.560 -            Uint32 alpha = s >> 24; \
   8.561 -            if(alpha) { \
   8.562 -              if(alpha == SDL_ALPHA_OPAQUE) { \
   8.563 -                *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \
   8.564 -              } else { \
   8.565 -                d = *dstp; \
   8.566 -                dalpha = d & 0xff000000; \
   8.567 -                s1 = s & 0xff00ff; \
   8.568 -                d1 = d & 0xff00ff; \
   8.569 -                d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
   8.570 -                s &= 0xff00; \
   8.571 -                d &= 0xff00; \
   8.572 -                d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
   8.573 -                *dstp = d1 | d | dalpha; \
   8.574 -              } \
   8.575 -            } \
   8.576 -            ++srcp; \
   8.577 -            ++dstp; \
   8.578 -            widthvar--; \
   8.579 -	    }
   8.580 -        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   8.581 -        if (width > 0) {
   8.582 -            int extrawidth = (width % 4);
   8.583 -            vector unsigned char valigner = VEC_ALIGNER(srcp);
   8.584 -            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
   8.585 -            width -= extrawidth;
   8.586 -            while (width) {
   8.587 -                vector unsigned char voverflow;
   8.588 -                vector unsigned char vd;
   8.589 -                vector unsigned char valpha;
   8.590 -                vector unsigned char vdstalpha;
   8.591 -                /* s = *srcp */
   8.592 -                voverflow = (vector unsigned char) vec_ld(15, srcp);
   8.593 -                vs = vec_perm(vs, voverflow, valigner);
   8.594 -
   8.595 -                valpha = vec_perm(vs, v0, valphaPermute);
   8.596 -
   8.597 -                /* d = *dstp */
   8.598 -                vd = (vector unsigned char) vec_ld(0, dstp);
   8.599 -                vdstalpha = vec_and(vd, valphamask);
   8.600 -
   8.601 -                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
   8.602 -
   8.603 -                /* set the alpha to the dest alpha */
   8.604 -                vd = vec_and(vd, vpixelmask);
   8.605 -                vd = vec_or(vd, vdstalpha);
   8.606 -
   8.607 -                /* *dstp = res */
   8.608 -                vec_st((vector unsigned int) vd, 0, dstp);
   8.609 -
   8.610 -                srcp += 4;
   8.611 -                dstp += 4;
   8.612 -                width -= 4;
   8.613 -                vs = voverflow;
   8.614 -            }
   8.615 -            ONE_PIXEL_BLEND((extrawidth), extrawidth);
   8.616 -        }
   8.617 -        srcp += srcskip;
   8.618 -        dstp += dstskip;
   8.619 -    }
   8.620 -#undef ONE_PIXEL_BLEND
   8.621 -}
   8.622 -
   8.623 -static void
   8.624 -Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo * info)
   8.625 -{
   8.626 -    /* XXX : 6 */
   8.627 -    int height = info->dst_h;
   8.628 -    Uint32 *srcp = (Uint32 *) info->src;
   8.629 -    int srcskip = info->src_skip >> 2;
   8.630 -    Uint32 *dstp = (Uint32 *) info->dst;
   8.631 -    int dstskip = info->dst_skip >> 2;
   8.632 -    SDL_PixelFormat *srcfmt = info->src_fmt;
   8.633 -    SDL_PixelFormat *dstfmt = info->dst_fmt;
   8.634 -    unsigned sA = info->a;
   8.635 -    unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
   8.636 -    vector unsigned char mergePermute;
   8.637 -    vector unsigned char vsrcPermute;
   8.638 -    vector unsigned char vdstPermute;
   8.639 -    vector unsigned char vsdstPermute;
   8.640 -    vector unsigned char valpha;
   8.641 -    vector unsigned char valphamask;
   8.642 -    vector unsigned char vbits;
   8.643 -    vector unsigned short v1;
   8.644 -    vector unsigned short v8;
   8.645 -
   8.646 -    mergePermute = VEC_MERGE_PERMUTE();
   8.647 -    v1 = vec_splat_u16(1);
   8.648 -    v8 = vec_splat_u16(8);
   8.649 -
   8.650 -    /* set the alpha to 255 on the destination surf */
   8.651 -    valphamask = VEC_ALPHA_MASK();
   8.652 -
   8.653 -    vsrcPermute = calc_swizzle32(srcfmt, NULL);
   8.654 -    vdstPermute = calc_swizzle32(NULL, dstfmt);
   8.655 -    vsdstPermute = calc_swizzle32(dstfmt, NULL);
   8.656 -
   8.657 -    /* set a vector full of alpha and 255-alpha */
   8.658 -    ((unsigned char *) &valpha)[0] = sA;
   8.659 -    valpha = vec_splat(valpha, 0);
   8.660 -    vbits = (vector unsigned char) vec_splat_s8(-1);
   8.661 -
   8.662 -    while (height--) {
   8.663 -        int width = info->dst_w;
   8.664 -#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
   8.665 -            Uint32 Pixel; \
   8.666 -            unsigned sR, sG, sB, dR, dG, dB; \
   8.667 -            DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \
   8.668 -            DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
   8.669 -            ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
   8.670 -            ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
   8.671 -            ++srcp; \
   8.672 -            ++dstp; \
   8.673 -            widthvar--; \
   8.674 -        }
   8.675 -        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   8.676 -        if (width > 0) {
   8.677 -            int extrawidth = (width % 4);
   8.678 -            vector unsigned char valigner = VEC_ALIGNER(srcp);
   8.679 -            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
   8.680 -            width -= extrawidth;
   8.681 -            while (width) {
   8.682 -                vector unsigned char voverflow;
   8.683 -                vector unsigned char vd;
   8.684 -
   8.685 -                /* s = *srcp */
   8.686 -                voverflow = (vector unsigned char) vec_ld(15, srcp);
   8.687 -                vs = vec_perm(vs, voverflow, valigner);
   8.688 -                vs = vec_perm(vs, valpha, vsrcPermute);
   8.689 -
   8.690 -                /* d = *dstp */
   8.691 -                vd = (vector unsigned char) vec_ld(0, dstp);
   8.692 -                vd = vec_perm(vd, vd, vsdstPermute);
   8.693 -
   8.694 -                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
   8.695 -
   8.696 -                /* set the alpha channel to full on */
   8.697 -                vd = vec_or(vd, valphamask);
   8.698 -                vd = vec_perm(vd, vbits, vdstPermute);
   8.699 -
   8.700 -                /* *dstp = res */
   8.701 -                vec_st((vector unsigned int) vd, 0, dstp);
   8.702 -
   8.703 -                srcp += 4;
   8.704 -                dstp += 4;
   8.705 -                width -= 4;
   8.706 -                vs = voverflow;
   8.707 -            }
   8.708 -            ONE_PIXEL_BLEND((extrawidth), extrawidth);
   8.709 -        }
   8.710 -#undef ONE_PIXEL_BLEND
   8.711 -
   8.712 -        srcp += srcskip;
   8.713 -        dstp += dstskip;
   8.714 -    }
   8.715 -
   8.716 -}
   8.717 -
   8.718 -
   8.719 -/* fast RGB888->(A)RGB888 blending */
   8.720 -static void
   8.721 -BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo * info)
   8.722 -{
   8.723 -    unsigned alpha = info->a;
   8.724 -    int height = info->dst_h;
   8.725 -    Uint32 *srcp = (Uint32 *) info->src;
   8.726 -    int srcskip = info->src_skip >> 2;
   8.727 -    Uint32 *dstp = (Uint32 *) info->dst;
   8.728 -    int dstskip = info->dst_skip >> 2;
   8.729 -    vector unsigned char mergePermute;
   8.730 -    vector unsigned char valpha;
   8.731 -    vector unsigned char valphamask;
   8.732 -    vector unsigned short v1;
   8.733 -    vector unsigned short v8;
   8.734 -
   8.735 -    mergePermute = VEC_MERGE_PERMUTE();
   8.736 -    v1 = vec_splat_u16(1);
   8.737 -    v8 = vec_splat_u16(8);
   8.738 -
   8.739 -    /* set the alpha to 255 on the destination surf */
   8.740 -    valphamask = VEC_ALPHA_MASK();
   8.741 -
   8.742 -    /* set a vector full of alpha and 255-alpha */
   8.743 -    ((unsigned char *) &valpha)[0] = alpha;
   8.744 -    valpha = vec_splat(valpha, 0);
   8.745 -
   8.746 -    while (height--) {
   8.747 -        int width = info->dst_w;
   8.748 -#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
   8.749 -            Uint32 s = *srcp; \
   8.750 -            Uint32 d = *dstp; \
   8.751 -            Uint32 s1 = s & 0xff00ff; \
   8.752 -            Uint32 d1 = d & 0xff00ff; \
   8.753 -            d1 = (d1 + ((s1 - d1) * alpha >> 8)) \
   8.754 -                 & 0xff00ff; \
   8.755 -            s &= 0xff00; \
   8.756 -            d &= 0xff00; \
   8.757 -            d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
   8.758 -            *dstp = d1 | d | 0xff000000; \
   8.759 -            ++srcp; \
   8.760 -            ++dstp; \
   8.761 -            widthvar--; \
   8.762 -        }
   8.763 -        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   8.764 -        if (width > 0) {
   8.765 -            int extrawidth = (width % 4);
   8.766 -            vector unsigned char valigner = VEC_ALIGNER(srcp);
   8.767 -            vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
   8.768 -            width -= extrawidth;
   8.769 -            while (width) {
   8.770 -                vector unsigned char voverflow;
   8.771 -                vector unsigned char vd;
   8.772 -
   8.773 -                /* s = *srcp */
   8.774 -                voverflow = (vector unsigned char) vec_ld(15, srcp);
   8.775 -                vs = vec_perm(vs, voverflow, valigner);
   8.776 -
   8.777 -                /* d = *dstp */
   8.778 -                vd = (vector unsigned char) vec_ld(0, dstp);
   8.779 -
   8.780 -                VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
   8.781 -
   8.782 -                /* set the alpha channel to full on */
   8.783 -                vd = vec_or(vd, valphamask);
   8.784 -
   8.785 -                /* *dstp = res */
   8.786 -                vec_st((vector unsigned int) vd, 0, dstp);
   8.787 -
   8.788 -                srcp += 4;
   8.789 -                dstp += 4;
   8.790 -                width -= 4;
   8.791 -                vs = voverflow;
   8.792 -            }
   8.793 -            ONE_PIXEL_BLEND((extrawidth), extrawidth);
   8.794 -        }
   8.795 -#undef ONE_PIXEL_BLEND
   8.796 -
   8.797 -        srcp += srcskip;
   8.798 -        dstp += dstskip;
   8.799 -    }
   8.800 -}
   8.801 -
   8.802 -#if __MWERKS__
   8.803 -#pragma altivec_model off
   8.804 -#endif
   8.805 -#endif /* SDL_ALTIVEC_BLITTERS */
   8.806 -
   8.807  /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
   8.808  static void
   8.809  BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info)
   8.810 @@ -1338,79 +538,6 @@
   8.811      }
   8.812  }
   8.813  
   8.814 -#ifdef __3dNOW__
   8.815 -/* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
   8.816 -static void
   8.817 -BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
   8.818 -{
   8.819 -    int width = info->dst_w;
   8.820 -    int height = info->dst_h;
   8.821 -    Uint32 *srcp = (Uint32 *) info->src;
   8.822 -    int srcskip = info->src_skip >> 2;
   8.823 -    Uint32 *dstp = (Uint32 *) info->dst;
   8.824 -    int dstskip = info->dst_skip >> 2;
   8.825 -    SDL_PixelFormat *sf = info->src_fmt;
   8.826 -    Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
   8.827 -    Uint32 amask = sf->Amask;
   8.828 -    Uint32 ashift = sf->Ashift;
   8.829 -    Uint64 multmask;
   8.830 -
   8.831 -    __m64 src1, dst1, mm_alpha, mm_zero, dmask;
   8.832 -
   8.833 -    mm_zero = _mm_setzero_si64();       /* 0 -> mm_zero */
   8.834 -    multmask = 0xFFFF;
   8.835 -    multmask <<= (ashift * 2);
   8.836 -    multmask = ~multmask;
   8.837 -    dmask = *(__m64 *) & multmask;      /* dst alpha mask -> dmask */
   8.838 -
   8.839 -    while (height--) {
   8.840 -	    /* *INDENT-OFF* */
   8.841 -	    DUFFS_LOOP4({
   8.842 -		Uint32 alpha;
   8.843 -
   8.844 -		_m_prefetch(srcp + 16);
   8.845 -		_m_prefetch(dstp + 16);
   8.846 -
   8.847 -		alpha = *srcp & amask;
   8.848 -		if (alpha == 0) {
   8.849 -			/* do nothing */
   8.850 -		} else if (alpha == amask) {
   8.851 -			/* copy RGB, keep dst alpha */
   8.852 -			*dstp = (*srcp & chanmask) | (*dstp & ~chanmask);
   8.853 -		} else {
   8.854 -			src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/
   8.855 -			src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
   8.856 -
   8.857 -			dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/
   8.858 -			dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
   8.859 -
   8.860 -			mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
   8.861 -			mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
   8.862 -			mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
   8.863 -			mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
   8.864 -			mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */
   8.865 -
   8.866 -			/* blend */		    
   8.867 -			src1 = _mm_sub_pi16(src1, dst1);/* src - dst -> src1 */
   8.868 -			src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src - dst) * alpha -> src1 */
   8.869 -			src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */
   8.870 -			dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst) -> dst1(0A0R0G0B) */
   8.871 -			dst1 = _mm_packs_pu16(dst1, mm_zero);  /* 0000ARGB -> dst1 */
   8.872 -			
   8.873 -			*dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
   8.874 -		}
   8.875 -		++srcp;
   8.876 -		++dstp;
   8.877 -	    }, width);
   8.878 -	    /* *INDENT-ON* */
   8.879 -        srcp += srcskip;
   8.880 -        dstp += dstskip;
   8.881 -    }
   8.882 -    _mm_empty();
   8.883 -}
   8.884 -
   8.885 -#endif /* __MMX__ */
   8.886 -
   8.887  /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
   8.888  
   8.889  /* blend a single 16 bit pixel at 50% */
   8.890 @@ -2130,17 +1257,10 @@
   8.891              return BlitNto1PixelAlpha;
   8.892  
   8.893          case 2:
   8.894 -#if SDL_ALTIVEC_BLITTERS
   8.895 -            if (sf->BytesPerPixel == 4
   8.896 -                && df->Gmask == 0x7e0 && df->Bmask == 0x1f
   8.897 -                && SDL_HasAltiVec())
   8.898 -                return Blit32to565PixelAlphaAltivec;
   8.899 -            else
   8.900 -#endif
   8.901 -                if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
   8.902 -                    && sf->Gmask == 0xff00
   8.903 -                    && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
   8.904 -                        || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
   8.905 +            if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
   8.906 +                && sf->Gmask == 0xff00
   8.907 +                && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
   8.908 +                    || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
   8.909                  if (df->Gmask == 0x7e0)
   8.910                      return BlitARGBto565PixelAlpha;
   8.911                  else if (df->Gmask == 0x3e0)
   8.912 @@ -2152,35 +1272,20 @@
   8.913              if (sf->Rmask == df->Rmask
   8.914                  && sf->Gmask == df->Gmask
   8.915                  && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
   8.916 -#if defined(__MMX__) || defined(__3dNOW__)
   8.917 +#if defined(__MMX__)
   8.918                  if (sf->Rshift % 8 == 0
   8.919                      && sf->Gshift % 8 == 0
   8.920                      && sf->Bshift % 8 == 0
   8.921                      && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
   8.922 -#ifdef __3dNOW__
   8.923 -                    if (SDL_Has3DNow())
   8.924 -                        return BlitRGBtoRGBPixelAlphaMMX3DNOW;
   8.925 -#endif
   8.926 -#ifdef __MMX__
   8.927                      if (SDL_HasMMX())
   8.928                          return BlitRGBtoRGBPixelAlphaMMX;
   8.929 -#endif
   8.930                  }
   8.931 -#endif /* __MMX__ || __3dNOW__ */
   8.932 +#endif /* __MMX__ */
   8.933                  if (sf->Amask == 0xff000000) {
   8.934 -#if SDL_ALTIVEC_BLITTERS
   8.935 -                    if (SDL_HasAltiVec())
   8.936 -                        return BlitRGBtoRGBPixelAlphaAltivec;
   8.937 -#endif
   8.938                      return BlitRGBtoRGBPixelAlpha;
   8.939                  }
   8.940              }
   8.941 -#if SDL_ALTIVEC_BLITTERS
   8.942 -            if (sf->Amask && sf->BytesPerPixel == 4 && SDL_HasAltiVec())
   8.943 -                return Blit32to32PixelAlphaAltivec;
   8.944 -            else
   8.945 -#endif
   8.946 -                return BlitNtoNPixelAlpha;
   8.947 +            return BlitNtoNPixelAlpha;
   8.948  
   8.949          case 3:
   8.950          default:
   8.951 @@ -2226,19 +1331,10 @@
   8.952                          return BlitRGBtoRGBSurfaceAlphaMMX;
   8.953  #endif
   8.954                      if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
   8.955 -#if SDL_ALTIVEC_BLITTERS
   8.956 -                        if (SDL_HasAltiVec())
   8.957 -                            return BlitRGBtoRGBSurfaceAlphaAltivec;
   8.958 -#endif
   8.959                          return BlitRGBtoRGBSurfaceAlpha;
   8.960                      }
   8.961                  }
   8.962 -#if SDL_ALTIVEC_BLITTERS
   8.963 -                if ((sf->BytesPerPixel == 4) && SDL_HasAltiVec())
   8.964 -                    return Blit32to32SurfaceAlphaAltivec;
   8.965 -                else
   8.966 -#endif
   8.967 -                    return BlitNtoNSurfaceAlpha;
   8.968 +                return BlitNtoNSurfaceAlpha;
   8.969  
   8.970              case 3:
   8.971              default:
   8.972 @@ -2252,12 +1348,6 @@
   8.973              if (df->BytesPerPixel == 1)
   8.974                  return BlitNto1SurfaceAlphaKey;
   8.975              else
   8.976 -#if SDL_ALTIVEC_BLITTERS
   8.977 -            if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 &&
   8.978 -                    SDL_HasAltiVec())
   8.979 -                return Blit32to32SurfaceAlphaKeyAltivec;
   8.980 -            else
   8.981 -#endif
   8.982                  return BlitNtoNSurfaceAlphaKey;
   8.983          }
   8.984          break;
     9.1 --- a/src/video/SDL_blit_N.c	Fri Feb 11 14:42:58 2011 -0800
     9.2 +++ b/src/video/SDL_blit_N.c	Fri Feb 11 14:51:04 2011 -0800
     9.3 @@ -28,846 +28,8 @@
     9.4  
     9.5  /* Functions to blit from N-bit surfaces to other surfaces */
     9.6  
     9.7 -#if SDL_ALTIVEC_BLITTERS
     9.8 -#if __MWERKS__
     9.9 -#pragma altivec_model on
    9.10 -#endif
    9.11 -#ifdef HAVE_ALTIVEC_H
    9.12 -#include <altivec.h>
    9.13 -#endif
    9.14 -#define assert(X)
    9.15 -#ifdef __MACOSX__
    9.16 -#include <sys/sysctl.h>
    9.17 -static size_t
    9.18 -GetL3CacheSize(void)
    9.19 -{
    9.20 -    const char key[] = "hw.l3cachesize";
    9.21 -    u_int64_t result = 0;
    9.22 -    size_t typeSize = sizeof(result);
    9.23 -
    9.24 -
    9.25 -    int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
    9.26 -    if (0 != err)
    9.27 -        return 0;
    9.28 -
    9.29 -    return result;
    9.30 -}
    9.31 -#else
    9.32 -static size_t
    9.33 -GetL3CacheSize(void)
    9.34 -{
    9.35 -    /* XXX: Just guess G4 */
    9.36 -    return 2097152;
    9.37 -}
    9.38 -#endif /* __MACOSX__ */
    9.39 -
    9.40 -#if (defined(__MACOSX__) && (__GNUC__ < 4))
    9.41 -#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    9.42 -        (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    9.43 -#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    9.44 -        (vector unsigned short) ( a,b,c,d,e,f,g,h )
    9.45 -#else
    9.46 -#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    9.47 -        (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    9.48 -#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    9.49 -        (vector unsigned short) { a,b,c,d,e,f,g,h }
    9.50 -#endif
    9.51 -
    9.52 -#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    9.53 -#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    9.54 -                               ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    9.55 -                                 0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    9.56 -                                 0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    9.57 -                                 0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    9.58 -
    9.59 -#define MAKE8888(dstfmt, r, g, b, a)  \
    9.60 -    ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    9.61 -      ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    9.62 -      ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    9.63 -      ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    9.64 -
    9.65 -/*
    9.66 - * Data Stream Touch...Altivec cache prefetching.
    9.67 - *
    9.68 - *  Don't use this on a G5...however, the speed boost is very significant
    9.69 - *   on a G4.
    9.70 - */
    9.71 -#define DST_CHAN_SRC 1
    9.72 -#define DST_CHAN_DEST 2
    9.73 -
    9.74 -/* macro to set DST control word value... */
    9.75 -#define DST_CTRL(size, count, stride) \
    9.76 -    (((size) << 24) | ((count) << 16) | (stride))
    9.77 -
    9.78 -#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    9.79 -    ? vec_lvsl(0, src) \
    9.80 -    : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
    9.81 -
    9.82 -/* Calculate the permute vector used for 32->32 swizzling */
    9.83 -static vector unsigned char
    9.84 -calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
    9.85 -{
    9.86 -    /*
    9.87 -     * We have to assume that the bits that aren't used by other
    9.88 -     *  colors is alpha, and it's one complete byte, since some formats
    9.89 -     *  leave alpha with a zero mask, but we should still swizzle the bits.
    9.90 -     */
    9.91 -    /* ARGB */
    9.92 -    const static const struct SDL_PixelFormat default_pixel_format = {
    9.93 -        NULL, 32, 4,
    9.94 -        0, 0, 0, 0,
    9.95 -        16, 8, 0, 24,
    9.96 -        0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000
    9.97 -    };
    9.98 -    if (!srcfmt) {
    9.99 -        srcfmt = &default_pixel_format;
   9.100 -    }
   9.101 -    if (!dstfmt) {
   9.102 -        dstfmt = &default_pixel_format;
   9.103 -    }
   9.104 -    const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
   9.105 -                                                       0x04, 0x04, 0x04, 0x04,
   9.106 -                                                       0x08, 0x08, 0x08, 0x08,
   9.107 -                                                       0x0C, 0x0C, 0x0C,
   9.108 -                                                       0x0C);
   9.109 -    vector unsigned char vswiz;
   9.110 -    vector unsigned int srcvec;
   9.111 -#define RESHIFT(X) (3 - ((X) >> 3))
   9.112 -    Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   9.113 -    Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   9.114 -    Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   9.115 -    Uint32 amask;
   9.116 -    /* Use zero for alpha if either surface doesn't have alpha */
   9.117 -    if (dstfmt->Amask) {
   9.118 -        amask =
   9.119 -            ((srcfmt->Amask) ? RESHIFT(srcfmt->
   9.120 -                                       Ashift) : 0x10) << (dstfmt->Ashift);
   9.121 -    } else {
   9.122 -        amask =
   9.123 -            0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
   9.124 -                          0xFFFFFFFF);
   9.125 -    }
   9.126 -#undef RESHIFT
   9.127 -    ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
   9.128 -    vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
   9.129 -    return (vswiz);
   9.130 -}
   9.131 -
   9.132 -static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
   9.133 -static void
   9.134 -Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
   9.135 -{
   9.136 -    int height = info->dst_h;
   9.137 -    Uint8 *src = (Uint8 *) info->src;
   9.138 -    int srcskip = info->src_skip;
   9.139 -    Uint8 *dst = (Uint8 *) info->dst;
   9.140 -    int dstskip = info->dst_skip;
   9.141 -    SDL_PixelFormat *srcfmt = info->src_fmt;
   9.142 -    vector unsigned char valpha = vec_splat_u8(0);
   9.143 -    vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   9.144 -    vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
   9.145 -                                                    0x00, 0x0a, 0x00, 0x0e,
   9.146 -                                                    0x00, 0x12, 0x00, 0x16,
   9.147 -                                                    0x00, 0x1a, 0x00, 0x1e);
   9.148 -    vector unsigned short v1 = vec_splat_u16(1);
   9.149 -    vector unsigned short v3 = vec_splat_u16(3);
   9.150 -    vector unsigned short v3f =
   9.151 -        VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
   9.152 -                          0x003f, 0x003f, 0x003f, 0x003f);
   9.153 -    vector unsigned short vfc =
   9.154 -        VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
   9.155 -                          0x00fc, 0x00fc, 0x00fc, 0x00fc);
   9.156 -    vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
   9.157 -    vf800 = vec_sl(vf800, vec_splat_u16(8));
   9.158 -
   9.159 -    while (height--) {
   9.160 -        vector unsigned char valigner;
   9.161 -        vector unsigned char voverflow;
   9.162 -        vector unsigned char vsrc;
   9.163 -
   9.164 -        int width = info->dst_w;
   9.165 -        int extrawidth;
   9.166 -
   9.167 -        /* do scalar until we can align... */
   9.168 -#define ONE_PIXEL_BLEND(condition, widthvar) \
   9.169 -        while (condition) { \
   9.170 -            Uint32 Pixel; \
   9.171 -            unsigned sR, sG, sB, sA; \
   9.172 -            DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   9.173 -                          sR, sG, sB, sA); \
   9.174 -            *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   9.175 -                                ((sG << 3) & 0x000007E0) | \
   9.176 -                                ((sB >> 3) & 0x0000001F)); \
   9.177 -            dst += 2; \
   9.178 -            src += 4; \
   9.179 -            widthvar--; \
   9.180 -        }
   9.181 -
   9.182 -        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   9.183 -
   9.184 -        /* After all that work, here's the vector part! */
   9.185 -        extrawidth = (width % 8);       /* trailing unaligned stores */
   9.186 -        width -= extrawidth;
   9.187 -        vsrc = vec_ld(0, src);
   9.188 -        valigner = VEC_ALIGNER(src);
   9.189 -
   9.190 -        while (width) {
   9.191 -            vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   9.192 -            vector unsigned int vsrc1, vsrc2;
   9.193 -            vector unsigned char vdst;
   9.194 -
   9.195 -            voverflow = vec_ld(15, src);
   9.196 -            vsrc = vec_perm(vsrc, voverflow, valigner);
   9.197 -            vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   9.198 -            src += 16;
   9.199 -            vsrc = voverflow;
   9.200 -            voverflow = vec_ld(15, src);
   9.201 -            vsrc = vec_perm(vsrc, voverflow, valigner);
   9.202 -            vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   9.203 -            /* 1555 */
   9.204 -            vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
   9.205 -            vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
   9.206 -            vgpixel = vec_and(vgpixel, vfc);
   9.207 -            vgpixel = vec_sl(vgpixel, v3);
   9.208 -            vrpixel = vec_sl(vpixel, v1);
   9.209 -            vrpixel = vec_and(vrpixel, vf800);
   9.210 -            vbpixel = vec_and(vpixel, v3f);
   9.211 -            vdst =
   9.212 -                vec_or((vector unsigned char) vrpixel,
   9.213 -                       (vector unsigned char) vgpixel);
   9.214 -            /* 565 */
   9.215 -            vdst = vec_or(vdst, (vector unsigned char) vbpixel);
   9.216 -            vec_st(vdst, 0, dst);
   9.217 -
   9.218 -            width -= 8;
   9.219 -            src += 16;
   9.220 -            dst += 16;
   9.221 -            vsrc = voverflow;
   9.222 -        }
   9.223 -
   9.224 -        assert(width == 0);
   9.225 -
   9.226 -        /* do scalar until we can align... */
   9.227 -        ONE_PIXEL_BLEND((extrawidth), extrawidth);
   9.228 -#undef ONE_PIXEL_BLEND
   9.229 -
   9.230 -        src += srcskip;         /* move to next row, accounting for pitch. */
   9.231 -        dst += dstskip;
   9.232 -    }
   9.233 -
   9.234 -
   9.235 -}
   9.236 -
   9.237 -static void
   9.238 -Blit_RGB565_32Altivec(SDL_BlitInfo * info)
   9.239 -{
   9.240 -    int height = info->dst_h;
   9.241 -    Uint8 *src = (Uint8 *) info->src;
   9.242 -    int srcskip = info->src_skip;
   9.243 -    Uint8 *dst = (Uint8 *) info->dst;
   9.244 -    int dstskip = info->dst_skip;
   9.245 -    SDL_PixelFormat *srcfmt = info->src_fmt;
   9.246 -    SDL_PixelFormat *dstfmt = info->dst_fmt;
   9.247 -    unsigned alpha;
   9.248 -    vector unsigned char valpha;
   9.249 -    vector unsigned char vpermute;
   9.250 -    vector unsigned short vf800;
   9.251 -    vector unsigned int v8 = vec_splat_u32(8);
   9.252 -    vector unsigned int v16 = vec_add(v8, v8);
   9.253 -    vector unsigned short v2 = vec_splat_u16(2);
   9.254 -    vector unsigned short v3 = vec_splat_u16(3);
   9.255 -    /* 
   9.256 -       0x10 - 0x1f is the alpha
   9.257 -       0x00 - 0x0e evens are the red
   9.258 -       0x01 - 0x0f odds are zero
   9.259 -     */
   9.260 -    vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   9.261 -                                                       0x10, 0x02, 0x01, 0x01,
   9.262 -                                                       0x10, 0x04, 0x01, 0x01,
   9.263 -                                                       0x10, 0x06, 0x01,
   9.264 -                                                       0x01);
   9.265 -    vector unsigned char vredalpha2 =
   9.266 -        (vector unsigned
   9.267 -         char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   9.268 -        );
   9.269 -    /*
   9.270 -       0x00 - 0x0f is ARxx ARxx ARxx ARxx
   9.271 -       0x11 - 0x0f odds are blue
   9.272 -     */
   9.273 -    vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   9.274 -                                                   0x04, 0x05, 0x06, 0x13,
   9.275 -                                                   0x08, 0x09, 0x0a, 0x15,
   9.276 -                                                   0x0c, 0x0d, 0x0e, 0x17);
   9.277 -    vector unsigned char vblue2 =
   9.278 -        (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   9.279 -        );
   9.280 -    /*
   9.281 -       0x00 - 0x0f is ARxB ARxB ARxB ARxB
   9.282 -       0x10 - 0x0e evens are green
   9.283 -     */
   9.284 -    vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   9.285 -                                                    0x04, 0x05, 0x12, 0x07,
   9.286 -                                                    0x08, 0x09, 0x14, 0x0b,
   9.287 -                                                    0x0c, 0x0d, 0x16, 0x0f);
   9.288 -    vector unsigned char vgreen2 =
   9.289 -        (vector unsigned
   9.290 -         char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   9.291 -        );
   9.292 -
   9.293 -
   9.294 -    assert(srcfmt->BytesPerPixel == 2);
   9.295 -    assert(dstfmt->BytesPerPixel == 4);
   9.296 -
   9.297 -    vf800 = (vector unsigned short) vec_splat_u8(-7);
   9.298 -    vf800 = vec_sl(vf800, vec_splat_u16(8));
   9.299 -
   9.300 -    if (dstfmt->Amask && info->a) {
   9.301 -        ((unsigned char *) &valpha)[0] = alpha = info->a;
   9.302 -        valpha = vec_splat(valpha, 0);
   9.303 -    } else {
   9.304 -        alpha = 0;
   9.305 -        valpha = vec_splat_u8(0);
   9.306 -    }
   9.307 -
   9.308 -    vpermute = calc_swizzle32(NULL, dstfmt);
   9.309 -    while (height--) {
   9.310 -        vector unsigned char valigner;
   9.311 -        vector unsigned char voverflow;
   9.312 -        vector unsigned char vsrc;
   9.313 -
   9.314 -        int width = info->dst_w;
   9.315 -        int extrawidth;
   9.316 -
   9.317 -        /* do scalar until we can align... */
   9.318 -#define ONE_PIXEL_BLEND(condition, widthvar) \
   9.319 -        while (condition) { \
   9.320 -            unsigned sR, sG, sB; \
   9.321 -            unsigned short Pixel = *((unsigned short *)src); \
   9.322 -            sR = (Pixel >> 8) & 0xf8; \
   9.323 -            sG = (Pixel >> 3) & 0xfc; \
   9.324 -            sB = (Pixel << 3) & 0xf8; \
   9.325 -            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   9.326 -            src += 2; \
   9.327 -            dst += 4; \
   9.328 -            widthvar--; \
   9.329 -        }
   9.330 -        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   9.331 -
   9.332 -        /* After all that work, here's the vector part! */
   9.333 -        extrawidth = (width % 8);       /* trailing unaligned stores */
   9.334 -        width -= extrawidth;
   9.335 -        vsrc = vec_ld(0, src);
   9.336 -        valigner = VEC_ALIGNER(src);
   9.337 -
   9.338 -        while (width) {
   9.339 -            vector unsigned short vR, vG, vB;
   9.340 -            vector unsigned char vdst1, vdst2;
   9.341 -
   9.342 -            voverflow = vec_ld(15, src);
   9.343 -            vsrc = vec_perm(vsrc, voverflow, valigner);
   9.344 -
   9.345 -            vR = vec_and((vector unsigned short) vsrc, vf800);
   9.346 -            vB = vec_sl((vector unsigned short) vsrc, v3);
   9.347 -            vG = vec_sl(vB, v2);
   9.348 -
   9.349 -            vdst1 =
   9.350 -                (vector unsigned char) vec_perm((vector unsigned char) vR,
   9.351 -                                                valpha, vredalpha1);
   9.352 -            vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   9.353 -            vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   9.354 -            vdst1 = vec_perm(vdst1, valpha, vpermute);
   9.355 -            vec_st(vdst1, 0, dst);
   9.356 -
   9.357 -            vdst2 =
   9.358 -                (vector unsigned char) vec_perm((vector unsigned char) vR,
   9.359 -                                                valpha, vredalpha2);
   9.360 -            vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   9.361 -            vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   9.362 -            vdst2 = vec_perm(vdst2, valpha, vpermute);
   9.363 -            vec_st(vdst2, 16, dst);
   9.364 -
   9.365 -            width -= 8;
   9.366 -            dst += 32;
   9.367 -            src += 16;
   9.368 -            vsrc = voverflow;
   9.369 -        }
   9.370 -
   9.371 -        assert(width == 0);
   9.372 -
   9.373 -
   9.374 -        /* do scalar until we can align... */
   9.375 -        ONE_PIXEL_BLEND((extrawidth), extrawidth);
   9.376 -#undef ONE_PIXEL_BLEND
   9.377 -
   9.378 -        src += srcskip;         /* move to next row, accounting for pitch. */
   9.379 -        dst += dstskip;
   9.380 -    }
   9.381 -
   9.382 -}
   9.383 -
   9.384 -
   9.385 -static void
   9.386 -Blit_RGB555_32Altivec(SDL_BlitInfo * info)
   9.387 -{
   9.388 -    int height = info->dst_h;
   9.389 -    Uint8 *src = (Uint8 *) info->src;
   9.390 -    int srcskip = info->src_skip;
   9.391 -    Uint8 *dst = (Uint8 *) info->dst;
   9.392 -    int dstskip = info->dst_skip;
   9.393 -    SDL_PixelFormat *srcfmt = info->src_fmt;
   9.394 -    SDL_PixelFormat *dstfmt = info->dst_fmt;
   9.395 -    unsigned alpha;
   9.396 -    vector unsigned char valpha;
   9.397 -    vector unsigned char vpermute;
   9.398 -    vector unsigned short vf800;
   9.399 -    vector unsigned int v8 = vec_splat_u32(8);
   9.400 -    vector unsigned int v16 = vec_add(v8, v8);
   9.401 -    vector unsigned short v1 = vec_splat_u16(1);
   9.402 -    vector unsigned short v3 = vec_splat_u16(3);
   9.403 -    /* 
   9.404 -       0x10 - 0x1f is the alpha
   9.405 -       0x00 - 0x0e evens are the red
   9.406 -       0x01 - 0x0f odds are zero
   9.407 -     */
   9.408 -    vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   9.409 -                                                       0x10, 0x02, 0x01, 0x01,
   9.410 -                                                       0x10, 0x04, 0x01, 0x01,
   9.411 -                                                       0x10, 0x06, 0x01,
   9.412 -                                                       0x01);
   9.413 -    vector unsigned char vredalpha2 =
   9.414 -        (vector unsigned
   9.415 -         char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   9.416 -        );
   9.417 -    /*
   9.418 -       0x00 - 0x0f is ARxx ARxx ARxx ARxx
   9.419 -       0x11 - 0x0f odds are blue
   9.420 -     */
   9.421 -    vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   9.422 -                                                   0x04, 0x05, 0x06, 0x13,
   9.423 -                                                   0x08, 0x09, 0x0a, 0x15,
   9.424 -                                                   0x0c, 0x0d, 0x0e, 0x17);
   9.425 -    vector unsigned char vblue2 =
   9.426 -        (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   9.427 -        );
   9.428 -    /*
   9.429 -       0x00 - 0x0f is ARxB ARxB ARxB ARxB
   9.430 -       0x10 - 0x0e evens are green
   9.431 -     */
   9.432 -    vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   9.433 -                                                    0x04, 0x05, 0x12, 0x07,
   9.434 -                                                    0x08, 0x09, 0x14, 0x0b,
   9.435 -                                                    0x0c, 0x0d, 0x16, 0x0f);
   9.436 -    vector unsigned char vgreen2 =
   9.437 -        (vector unsigned
   9.438 -         char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   9.439 -        );
   9.440 -
   9.441 -
   9.442 -    assert(srcfmt->BytesPerPixel == 2);
   9.443 -    assert(dstfmt->BytesPerPixel == 4);
   9.444 -
   9.445 -    vf800 = (vector unsigned short) vec_splat_u8(-7);
   9.446 -    vf800 = vec_sl(vf800, vec_splat_u16(8));
   9.447 -
   9.448 -    if (dstfmt->Amask && info->a) {
   9.449 -        ((unsigned char *) &valpha)[0] = alpha = info->a;
   9.450 -        valpha = vec_splat(valpha, 0);
   9.451 -    } else {
   9.452 -        alpha = 0;
   9.453 -        valpha = vec_splat_u8(0);
   9.454 -    }
   9.455 -
   9.456 -    vpermute = calc_swizzle32(NULL, dstfmt);
   9.457 -    while (height--) {
   9.458 -        vector unsigned char valigner;
   9.459 -        vector unsigned char voverflow;
   9.460 -        vector unsigned char vsrc;
   9.461 -
   9.462 -        int width = info->dst_w;
   9.463 -        int extrawidth;
   9.464 -
   9.465 -        /* do scalar until we can align... */
   9.466 -#define ONE_PIXEL_BLEND(condition, widthvar) \
   9.467 -        while (condition) { \
   9.468 -            unsigned sR, sG, sB; \
   9.469 -            unsigned short Pixel = *((unsigned short *)src); \
   9.470 -            sR = (Pixel >> 7) & 0xf8; \
   9.471 -            sG = (Pixel >> 2) & 0xf8; \
   9.472 -            sB = (Pixel << 3) & 0xf8; \
   9.473 -            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   9.474 -            src += 2; \
   9.475 -            dst += 4; \
   9.476 -            widthvar--; \
   9.477 -        }
   9.478 -        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   9.479 -
   9.480 -        /* After all that work, here's the vector part! */
   9.481 -        extrawidth = (width % 8);       /* trailing unaligned stores */
   9.482 -        width -= extrawidth;
   9.483 -        vsrc = vec_ld(0, src);
   9.484 -        valigner = VEC_ALIGNER(src);
   9.485 -
   9.486 -        while (width) {
   9.487 -            vector unsigned short vR, vG, vB;
   9.488 -            vector unsigned char vdst1, vdst2;
   9.489 -
   9.490 -            voverflow = vec_ld(15, src);
   9.491 -            vsrc = vec_perm(vsrc, voverflow, valigner);
   9.492 -
   9.493 -            vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
   9.494 -            vB = vec_sl((vector unsigned short) vsrc, v3);
   9.495 -            vG = vec_sl(vB, v3);
   9.496 -
   9.497 -            vdst1 =
   9.498 -                (vector unsigned char) vec_perm((vector unsigned char) vR,
   9.499 -                                                valpha, vredalpha1);
   9.500 -            vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   9.501 -            vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   9.502 -            vdst1 = vec_perm(vdst1, valpha, vpermute);
   9.503 -            vec_st(vdst1, 0, dst);
   9.504 -
   9.505 -            vdst2 =
   9.506 -                (vector unsigned char) vec_perm((vector unsigned char) vR,
   9.507 -                                                valpha, vredalpha2);
   9.508 -            vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   9.509 -            vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   9.510 -            vdst2 = vec_perm(vdst2, valpha, vpermute);
   9.511 -            vec_st(vdst2, 16, dst);
   9.512 -
   9.513 -            width -= 8;
   9.514 -            dst += 32;
   9.515 -            src += 16;
   9.516 -            vsrc = voverflow;
   9.517 -        }
   9.518 -
   9.519 -        assert(width == 0);
   9.520 -
   9.521 -
   9.522 -        /* do scalar until we can align... */
   9.523 -        ONE_PIXEL_BLEND((extrawidth), extrawidth);
   9.524 -#undef ONE_PIXEL_BLEND
   9.525 -
   9.526 -        src += srcskip;         /* move to next row, accounting for pitch. */
   9.527 -        dst += dstskip;
   9.528 -    }
   9.529 -
   9.530 -}
   9.531 -
   9.532 -static void BlitNtoNKey(SDL_BlitInfo * info);
   9.533 -static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
   9.534 -static void
   9.535 -Blit32to32KeyAltivec(SDL_BlitInfo * info)
   9.536 -{
   9.537 -    int height = info->dst_h;
   9.538 -    Uint32 *srcp = (Uint32 *) info->src;
   9.539 -    int srcskip = info->src_skip / 4;
   9.540 -    Uint32 *dstp = (Uint32 *) info->dst;
   9.541 -    int dstskip = info->dst_skip / 4;
   9.542 -    SDL_PixelFormat *srcfmt = info->src_fmt;
   9.543 -    int srcbpp = srcfmt->BytesPerPixel;
   9.544 -    SDL_PixelFormat *dstfmt = info->dst_fmt;
   9.545 -    int dstbpp = dstfmt->BytesPerPixel;
   9.546 -    int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   9.547 -    unsigned alpha = dstfmt->Amask ? info->a : 0;
   9.548 -    Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   9.549 -    Uint32 ckey = info->colorkey;
   9.550 -    vector unsigned int valpha;
   9.551 -    vector unsigned char vpermute;
   9.552 -    vector unsigned char vzero;
   9.553 -    vector unsigned int vckey;
   9.554 -    vector unsigned int vrgbmask;
   9.555 -    vpermute = calc_swizzle32(srcfmt, dstfmt);
   9.556 -    if (info->dst_w < 16) {
   9.557 -        if (copy_alpha) {
   9.558 -            BlitNtoNKeyCopyAlpha(info);
   9.559 -        } else {
   9.560 -            BlitNtoNKey(info);
   9.561 -        }
   9.562 -        return;
   9.563 -    }
   9.564 -    vzero = vec_splat_u8(0);
   9.565 -    if (alpha) {
   9.566 -        ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
   9.567 -        valpha =
   9.568 -            (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
   9.569 -    } else {
   9.570 -        valpha = (vector unsigned int) vzero;
   9.571 -    }
   9.572 -    ckey &= rgbmask;
   9.573 -    ((unsigned int *) (char *) &vckey)[0] = ckey;
   9.574 -    vckey = vec_splat(vckey, 0);
   9.575 -    ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
   9.576 -    vrgbmask = vec_splat(vrgbmask, 0);
   9.577 -
   9.578 -    while (height--) {
   9.579 -#define ONE_PIXEL_BLEND(condition, widthvar) \
   9.580 -        if (copy_alpha) { \
   9.581 -            while (condition) { \
   9.582 -                Uint32 Pixel; \
   9.583 -                unsigned sR, sG, sB, sA; \
   9.584 -                DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   9.585 -                          sR, sG, sB, sA); \
   9.586 -                if ( (Pixel & rgbmask) != ckey ) { \
   9.587 -                      ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   9.588 -                            sR, sG, sB, sA); \
   9.589 -                } \
   9.590 -                dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   9.591 -                srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   9.592 -                widthvar--; \
   9.593 -            } \
   9.594 -        } else { \
   9.595 -            while (condition) { \
   9.596 -                Uint32 Pixel; \
   9.597 -                unsigned sR, sG, sB; \
   9.598 -                RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   9.599 -                if ( Pixel != ckey ) { \
   9.600 -                    RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   9.601 -                    ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   9.602 -                              sR, sG, sB, alpha); \
   9.603 -                } \
   9.604 -                dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   9.605 -                srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   9.606 -                widthvar--; \
   9.607 -            } \
   9.608 -        }
   9.609 -        int width = info->dst_w;
   9.610 -        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   9.611 -        assert(width > 0);
   9.612 -        if (width > 0) {
   9.613 -            int extrawidth = (width % 4);
   9.614 -            vector unsigned char valigner = VEC_ALIGNER(srcp);
   9.615 -            vector unsigned int vs = vec_ld(0, srcp);
   9.616 -            width -= extrawidth;
   9.617 -            assert(width >= 4);
   9.618 -            while (width) {
   9.619 -                vector unsigned char vsel;
   9.620 -                vector unsigned int vd;
   9.621 -                vector unsigned int voverflow = vec_ld(15, srcp);
   9.622 -                /* load the source vec */
   9.623 -                vs = vec_perm(vs, voverflow, valigner);
   9.624 -                /* vsel is set for items that match the key */
   9.625 -                vsel = (vector unsigned char) vec_and(vs, vrgbmask);
   9.626 -                vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
   9.627 -                /* permute the src vec to the dest format */
   9.628 -                vs = vec_perm(vs, valpha, vpermute);
   9.629 -                /* load the destination vec */
   9.630 -                vd = vec_ld(0, dstp);
   9.631 -                /* select the source and dest into vs */
   9.632 -                vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
   9.633 -                                                   (vector unsigned char) vd,
   9.634 -                                                   vsel);
   9.635 -
   9.636 -                vec_st(vd, 0, dstp);
   9.637 -                srcp += 4;
   9.638 -                width -= 4;
   9.639 -                dstp += 4;
   9.640 -                vs = voverflow;
   9.641 -            }
   9.642 -            ONE_PIXEL_BLEND((extrawidth), extrawidth);
   9.643 -#undef ONE_PIXEL_BLEND
   9.644 -            srcp += srcskip;
   9.645 -            dstp += dstskip;
   9.646 -        }
   9.647 -    }
   9.648 -}
   9.649 -
   9.650 -/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   9.651 -/* Use this on a G5 */
   9.652 -static void
   9.653 -ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
   9.654 -{
   9.655 -    int height = info->dst_h;
   9.656 -    Uint32 *src = (Uint32 *) info->src;
   9.657 -    int srcskip = info->src_skip / 4;
   9.658 -    Uint32 *dst = (Uint32 *) info->dst;
   9.659 -    int dstskip = info->dst_skip / 4;
   9.660 -    SDL_PixelFormat *srcfmt = info->src_fmt;
   9.661 -    SDL_PixelFormat *dstfmt = info->dst_fmt;
   9.662 -    vector unsigned int vzero = vec_splat_u32(0);
   9.663 -    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   9.664 -    if (dstfmt->Amask && !srcfmt->Amask) {
   9.665 -        if (info->a) {
   9.666 -            vector unsigned char valpha;
   9.667 -            ((unsigned char *) &valpha)[0] = info->a;
   9.668 -            vzero = (vector unsigned int) vec_splat(valpha, 0);
   9.669 -        }
   9.670 -    }
   9.671 -
   9.672 -    assert(srcfmt->BytesPerPixel == 4);
   9.673 -    assert(dstfmt->BytesPerPixel == 4);
   9.674 -
   9.675 -    while (height--) {
   9.676 -        vector unsigned char valigner;
   9.677 -        vector unsigned int vbits;
   9.678 -        vector unsigned int voverflow;
   9.679 -        Uint32 bits;
   9.680 -        Uint8 r, g, b, a;
   9.681 -
   9.682 -        int width = info->dst_w;
   9.683 -        int extrawidth;
   9.684 -
   9.685 -        /* do scalar until we can align... */
   9.686 -        while ((UNALIGNED_PTR(dst)) && (width)) {
   9.687 -            bits = *(src++);
   9.688 -            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   9.689 -            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   9.690 -            width--;
   9.691 -        }
   9.692 -
   9.693 -        /* After all that work, here's the vector part! */
   9.694 -        extrawidth = (width % 4);
   9.695 -        width -= extrawidth;
   9.696 -        valigner = VEC_ALIGNER(src);
   9.697 -        vbits = vec_ld(0, src);
   9.698 -
   9.699 -        while (width) {
   9.700 -            voverflow = vec_ld(15, src);
   9.701 -            src += 4;
   9.702 -            width -= 4;
   9.703 -            vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   9.704 -            vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   9.705 -            vec_st(vbits, 0, dst);      /* store it back out. */
   9.706 -            dst += 4;
   9.707 -            vbits = voverflow;
   9.708 -        }
   9.709 -
   9.710 -        assert(width == 0);
   9.711 -
   9.712 -        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   9.713 -        while (extrawidth) {
   9.714 -            bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   9.715 -            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   9.716 -            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   9.717 -            extrawidth--;
   9.718 -        }
   9.719 -
   9.720 -        src += srcskip;
   9.721 -        dst += dstskip;
   9.722 -    }
   9.723 -
   9.724 -}
   9.725 -
   9.726 -/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   9.727 -/* Use this on a G4 */
   9.728 -static void
   9.729 -ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
   9.730 -{
   9.731 -    const int scalar_dst_lead = sizeof(Uint32) * 4;
   9.732 -    const int vector_dst_lead = sizeof(Uint32) * 16;
   9.733 -
   9.734 -    int height = info->dst_h;
   9.735 -    Uint32 *src = (Uint32 *) info->src;
   9.736 -    int srcskip = info->src_skip / 4;
   9.737 -    Uint32 *dst = (Uint32 *) info->dst;
   9.738 -    int dstskip = info->dst_skip / 4;
   9.739 -    SDL_PixelFormat *srcfmt = info->src_fmt;
   9.740 -    SDL_PixelFormat *dstfmt = info->dst_fmt;
   9.741 -    vector unsigned int vzero = vec_splat_u32(0);
   9.742 -    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   9.743 -    if (dstfmt->Amask && !srcfmt->Amask) {
   9.744 -        if (info->a) {
   9.745 -            vector unsigned char valpha;
   9.746 -            ((unsigned char *) &valpha)[0] = info->a;
   9.747 -            vzero = (vector unsigned int) vec_splat(valpha, 0);
   9.748 -        }
   9.749 -    }
   9.750 -
   9.751 -    assert(srcfmt->BytesPerPixel == 4);
   9.752 -    assert(dstfmt->BytesPerPixel == 4);
   9.753 -
   9.754 -    while (height--) {
   9.755 -        vector unsigned char valigner;
   9.756 -        vector unsigned int vbits;
   9.757 -        vector unsigned int voverflow;
   9.758 -        Uint32 bits;
   9.759 -        Uint8 r, g, b, a;
   9.760 -
   9.761 -        int width = info->dst_w;
   9.762 -        int extrawidth;
   9.763 -
   9.764 -        /* do scalar until we can align... */
   9.765 -        while ((UNALIGNED_PTR(dst)) && (width)) {
   9.766 -            vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   9.767 -                     DST_CHAN_SRC);
   9.768 -            vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   9.769 -                      DST_CHAN_DEST);
   9.770 -            bits = *(src++);
   9.771 -            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   9.772 -            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   9.773 -            width--;
   9.774 -        }
   9.775 -
   9.776 -        /* After all that work, here's the vector part! */
   9.777 -        extrawidth = (width % 4);
   9.778 -        width -= extrawidth;
   9.779 -        valigner = VEC_ALIGNER(src);
   9.780 -        vbits = vec_ld(0, src);
   9.781 -
   9.782 -        while (width) {
   9.783 -            vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
   9.784 -                     DST_CHAN_SRC);
   9.785 -            vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
   9.786 -                      DST_CHAN_DEST);
   9.787 -            voverflow = vec_ld(15, src);
   9.788 -            src += 4;
   9.789 -            width -= 4;
   9.790 -            vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   9.791 -            vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   9.792 -            vec_st(vbits, 0, dst);      /* store it back out. */
   9.793 -            dst += 4;
   9.794 -            vbits = voverflow;
   9.795 -        }
   9.796 -
   9.797 -        assert(width == 0);
   9.798 -
   9.799 -        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   9.800 -        while (extrawidth) {
   9.801 -            bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   9.802 -            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   9.803 -            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   9.804 -            extrawidth--;
   9.805 -        }
   9.806 -
   9.807 -        src += srcskip;
   9.808 -        dst += dstskip;
   9.809 -    }
   9.810 -
   9.811 -    vec_dss(DST_CHAN_SRC);
   9.812 -    vec_dss(DST_CHAN_DEST);
   9.813 -}
   9.814 -
   9.815 -static Uint32
   9.816 -GetBlitFeatures(void)
   9.817 -{
   9.818 -    static Uint32 features = 0xffffffff;
   9.819 -    if (features == 0xffffffff) {
   9.820 -        /* Provide an override for testing .. */
   9.821 -        char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   9.822 -        if (override) {
   9.823 -            features = 0;
   9.824 -            SDL_sscanf(override, "%u", &features);
   9.825 -        } else {
   9.826 -            features = (0
   9.827 -                        /* Feature 1 is has-MMX */
   9.828 -                        | ((SDL_HasMMX())? 1 : 0)
   9.829 -                        /* Feature 2 is has-AltiVec */
   9.830 -                        | ((SDL_HasAltiVec())? 2 : 0)
   9.831 -                        /* Feature 4 is dont-use-prefetch */
   9.832 -                        /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   9.833 -                        | ((GetL3CacheSize() == 0) ? 4 : 0)
   9.834 -                );
   9.835 -        }
   9.836 -    }
   9.837 -    return features;
   9.838 -}
   9.839 -
   9.840 -#if __MWERKS__
   9.841 -#pragma altivec_model off
   9.842 -#endif
   9.843 -#else
   9.844  /* Feature 1 is has-MMX */
   9.845  #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   9.846 -#endif
   9.847  
   9.848  /* This is now endian dependent */
   9.849  #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   9.850 @@ -2346,15 +1508,6 @@
   9.851  };
   9.852  
   9.853  static const struct blit_table normal_blit_2[] = {
   9.854 -#if SDL_ALTIVEC_BLITTERS
   9.855 -    /* has-altivec */
   9.856 -    {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000,
   9.857 -     0x00000000,
   9.858 -     2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   9.859 -    {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000,
   9.860 -     0x00000000,
   9.861 -     2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   9.862 -#endif
   9.863      {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00,
   9.864       0x000000FF,
   9.865       0, Blit_RGB565_ARGB8888, SET_ALPHA},
   9.866 @@ -2378,22 +1531,6 @@
   9.867  };
   9.868  
   9.869  static const struct blit_table normal_blit_4[] = {
   9.870 -#if SDL_ALTIVEC_BLITTERS
   9.871 -    /* has-altivec | dont-use-prefetch */
   9.872 -    {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
   9.873 -     0x00000000,
   9.874 -     6, ConvertAltivec32to32_noprefetch,
   9.875 -     NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   9.876 -    /* has-altivec */
   9.877 -    {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
   9.878 -     0x00000000,
   9.879 -     2, ConvertAltivec32to32_prefetch,
   9.880 -     NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   9.881 -    /* has-altivec */
   9.882 -    {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0,
   9.883 -     0x0000001F,
   9.884 -     2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
   9.885 -#endif
   9.886      {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
   9.887       0x0000001F,
   9.888       0, Blit_RGB888_RGB565, NO_ALPHA},
   9.889 @@ -2491,12 +1628,6 @@
   9.890          else if (dstfmt->BytesPerPixel == 1)
   9.891              return BlitNto1Key;
   9.892          else {
   9.893 -#if SDL_ALTIVEC_BLITTERS
   9.894 -            if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
   9.895 -                && SDL_HasAltiVec()) {
   9.896 -                return Blit32to32KeyAltivec;
   9.897 -            } else
   9.898 -#endif
   9.899              if (srcfmt->Amask && dstfmt->Amask) {
   9.900                  return BlitNtoNKeyCopyAlpha;
   9.901              } else {
    10.1 --- a/test/testplatform.c	Fri Feb 11 14:42:58 2011 -0800
    10.2 +++ b/test/testplatform.c	Fri Feb 11 14:51:04 2011 -0800
    10.3 @@ -143,13 +143,10 @@
    10.4  	printf("CPU cache line size: %d\n", SDL_GetCPUCacheLineSize());
    10.5          printf("RDTSC %s\n", SDL_HasRDTSC()? "detected" : "not detected");
    10.6          printf("MMX %s\n", SDL_HasMMX()? "detected" : "not detected");
    10.7 -        printf("MMX Ext %s\n", SDL_HasMMXExt()? "detected" : "not detected");
    10.8 -        printf("3DNow %s\n", SDL_Has3DNow()? "detected" : "not detected");
    10.9 -        printf("3DNow Ext %s\n",
   10.10 -               SDL_Has3DNowExt()? "detected" : "not detected");
   10.11          printf("SSE %s\n", SDL_HasSSE()? "detected" : "not detected");
   10.12          printf("SSE2 %s\n", SDL_HasSSE2()? "detected" : "not detected");
   10.13 -        printf("AltiVec %s\n", SDL_HasAltiVec()? "detected" : "not detected");
   10.14 +        printf("SSE3 %s\n", SDL_HasSSE3()? "detected" : "not detected");
   10.15 +        printf("SSE4 %s\n", SDL_HasSSE4()? "detected" : "not detected");
   10.16      }
   10.17      return (0);
   10.18  }