src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Wed, 01 Feb 2006 06:32:25 +0000
changeset 1312 c9b51268668f
parent 1232 0aa0000081d5
child 1330 450721ad5436
permissions -rw-r--r--
Updated copyright information and removed rcs id lines (problematic in branch merges)
I batch edited these files, so please let me know if I've accidentally removed anybody's
credit here.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 #include <stdio.h>
    24 
    25 #include "SDL_types.h"
    26 #include "SDL_video.h"
    27 #include "SDL_blit.h"
    28 #include "SDL_byteorder.h"
    29 #include "SDL_cpuinfo.h"
    30 
    31 /* Functions to blit from N-bit surfaces to other surfaces */
    32 
    33 #ifdef USE_ALTIVEC_BLITTERS
    34 #ifdef HAVE_ALTIVEC_H
    35 #include <altivec.h>
    36 #endif
    37 #include <assert.h>
    38 #include <stdlib.h>
    39 #ifdef MACOSX
    40 #include <sys/sysctl.h>
    41 static size_t GetL3CacheSize( void )
    42 {
    43     const char key[] = "hw.l3cachesize";
    44     u_int64_t result = 0;
    45     size_t typeSize = sizeof( result );
    46 
    47 
    48     int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
    49     if( 0 != err ) return 0;
    50 
    51     return result;
    52 }
    53 #else
    54 static size_t GetL3CacheSize( void )
    55 {
    56     /* XXX: Just guess G4 */
    57     return 2097152;
    58 }
    59 #endif /* MACOSX */
    60 
    61 #if ((defined MACOSX) && (__GNUC__ < 4))
    62     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    63         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    64     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    65         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    66 #else
    67     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    68         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    69     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    70         (vector unsigned short) { a,b,c,d,e,f,g,h }
    71 #endif
    72 
    73 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    74 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    75                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    76                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    77                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    78                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    79 
    80 #define MAKE8888(dstfmt, r, g, b, a)  \
    81     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    82       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    83       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    84       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    85 
    86 /*
    87  * Data Stream Touch...Altivec cache prefetching.
    88  *
    89  *  Don't use this on a G5...however, the speed boost is very significant
    90  *   on a G4.
    91  */
    92 #define DST_CHAN_SRC 1
    93 #define DST_CHAN_DEST 2
    94 
    95 /* macro to set DST control word value... */
    96 #define DST_CTRL(size, count, stride) \
    97     (((size) << 24) | ((count) << 16) | (stride))
    98 
    99 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
   100     ? vec_lvsl(0, src) \
   101     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   102 
   103 /* Calculate the permute vector used for 32->32 swizzling */
   104 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
   105                                   const SDL_PixelFormat *dstfmt)
   106 {
   107     /*
   108     * We have to assume that the bits that aren't used by other
   109      *  colors is alpha, and it's one complete byte, since some formats
   110      *  leave alpha with a zero mask, but we should still swizzle the bits.
   111      */
   112     /* ARGB */
   113     const static struct SDL_PixelFormat default_pixel_format = {
   114         NULL, 0, 0,
   115         0, 0, 0, 0,
   116         16, 8, 0, 24,
   117         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   118         0, 0};
   119     if (!srcfmt) {
   120         srcfmt = &default_pixel_format;
   121     }
   122     if (!dstfmt) {
   123         dstfmt = &default_pixel_format;
   124     }
   125     vector unsigned char plus = VECUINT8_LITERAL(
   126                                       0x00, 0x00, 0x00, 0x00,
   127                                       0x04, 0x04, 0x04, 0x04,
   128                                       0x08, 0x08, 0x08, 0x08,
   129                                       0x0C, 0x0C, 0x0C, 0x0C );
   130     vector unsigned char vswiz;
   131     vector unsigned int srcvec;
   132 #define RESHIFT(X) (3 - ((X) >> 3))
   133     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   134     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   135     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   136     Uint32 amask;
   137     /* Use zero for alpha if either surface doesn't have alpha */
   138     if (dstfmt->Amask) {
   139         amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
   140     } else {    
   141         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
   142     }           
   143 #undef RESHIFT  
   144     ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
   145     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
   146     return(vswiz);
   147 }
   148 
   149 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
   150 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
   151     int height = info->d_height;
   152     Uint8 *src = (Uint8 *) info->s_pixels;
   153     int srcskip = info->s_skip;
   154     Uint8 *dst = (Uint8 *) info->d_pixels;
   155     int dstskip = info->d_skip;
   156     SDL_PixelFormat *srcfmt = info->src;
   157     vector unsigned char valpha = vec_splat_u8(0);
   158     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   159     vector unsigned char vgmerge = VECUINT8_LITERAL(
   160         0x00, 0x02, 0x00, 0x06,
   161         0x00, 0x0a, 0x00, 0x0e,
   162         0x00, 0x12, 0x00, 0x16,
   163         0x00, 0x1a, 0x00, 0x1e);
   164     vector unsigned short v1 = vec_splat_u16(1);
   165     vector unsigned short v3 = vec_splat_u16(3);
   166     vector unsigned short v3f = VECUINT16_LITERAL(
   167         0x003f, 0x003f, 0x003f, 0x003f,
   168         0x003f, 0x003f, 0x003f, 0x003f);
   169     vector unsigned short vfc = VECUINT16_LITERAL(
   170         0x00fc, 0x00fc, 0x00fc, 0x00fc,
   171         0x00fc, 0x00fc, 0x00fc, 0x00fc);
   172     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
   173     vf800 = vec_sl(vf800, vec_splat_u16(8));
   174 
   175     while (height--) {
   176         vector unsigned char valigner;
   177         vector unsigned char voverflow;
   178         vector unsigned char vsrc;
   179 
   180         int width = info->d_width;
   181         int extrawidth;
   182 
   183         /* do scalar until we can align... */
   184 #define ONE_PIXEL_BLEND(condition, widthvar) \
   185         while (condition) { \
   186             Uint32 Pixel; \
   187             unsigned sR, sG, sB, sA; \
   188             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   189                           sR, sG, sB, sA); \
   190             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   191                                 ((sG << 3) & 0x000007E0) | \
   192                                 ((sB >> 3) & 0x0000001F)); \
   193             dst += 2; \
   194             src += 4; \
   195             widthvar--; \
   196         }
   197 
   198         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   199 
   200         /* After all that work, here's the vector part! */
   201         extrawidth = (width % 8);  /* trailing unaligned stores */
   202         width -= extrawidth;
   203         vsrc = vec_ld(0, src);
   204         valigner = VEC_ALIGNER(src);
   205 
   206         while (width) {
   207             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   208             vector unsigned int vsrc1, vsrc2;
   209             vector unsigned char vdst;
   210 
   211             voverflow = vec_ld(15, src);
   212             vsrc = vec_perm(vsrc, voverflow, valigner);
   213             vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   214             src += 16;
   215             vsrc = voverflow;
   216             voverflow = vec_ld(15, src);
   217             vsrc = vec_perm(vsrc, voverflow, valigner);
   218             vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   219             /* 1555 */
   220             vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
   221             vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
   222             vgpixel = vec_and(vgpixel, vfc);
   223             vgpixel = vec_sl(vgpixel, v3);
   224             vrpixel = vec_sl(vpixel, v1);
   225             vrpixel = vec_and(vrpixel, vf800);
   226             vbpixel = vec_and(vpixel, v3f);
   227             vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
   228             /* 565 */
   229             vdst = vec_or(vdst, (vector unsigned char)vbpixel);
   230             vec_st(vdst, 0, dst);
   231 
   232             width -= 8;
   233             src += 16;
   234             dst += 16;
   235             vsrc = voverflow;
   236         }
   237 
   238         assert(width == 0);
   239 
   240 
   241         /* do scalar until we can align... */
   242         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   243 #undef ONE_PIXEL_BLEND
   244 
   245         src += srcskip;  /* move to next row, accounting for pitch. */
   246         dst += dstskip;
   247     }
   248 
   249 
   250 }
   251 
   252 static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
   253     int height = info->d_height;
   254     Uint8 *src = (Uint8 *) info->s_pixels;
   255     int srcskip = info->s_skip;
   256     Uint8 *dst = (Uint8 *) info->d_pixels;
   257     int dstskip = info->d_skip;
   258     SDL_PixelFormat *srcfmt = info->src;
   259     SDL_PixelFormat *dstfmt = info->dst;
   260     unsigned alpha;
   261     vector unsigned char valpha;
   262     vector unsigned char vpermute;
   263     vector unsigned short vf800;
   264     vector unsigned int v8 = vec_splat_u32(8);
   265     vector unsigned int v16 = vec_add(v8, v8);
   266     vector unsigned short v2 = vec_splat_u16(2);
   267     vector unsigned short v3 = vec_splat_u16(3);
   268     /* 
   269         0x10 - 0x1f is the alpha
   270         0x00 - 0x0e evens are the red
   271         0x01 - 0x0f odds are zero
   272     */
   273     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   274         0x10, 0x00, 0x01, 0x01,
   275         0x10, 0x02, 0x01, 0x01,
   276         0x10, 0x04, 0x01, 0x01,
   277         0x10, 0x06, 0x01, 0x01
   278     );
   279     vector unsigned char vredalpha2 = (vector unsigned char) (
   280         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   281     );
   282     /*
   283         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   284         0x11 - 0x0f odds are blue
   285     */
   286     vector unsigned char vblue1 = VECUINT8_LITERAL(
   287         0x00, 0x01, 0x02, 0x11,
   288         0x04, 0x05, 0x06, 0x13,
   289         0x08, 0x09, 0x0a, 0x15,
   290         0x0c, 0x0d, 0x0e, 0x17
   291     );
   292     vector unsigned char vblue2 = (vector unsigned char)(
   293         vec_add((vector unsigned int)vblue1, v8)
   294     );
   295     /*
   296         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   297         0x10 - 0x0e evens are green
   298     */
   299     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   300         0x00, 0x01, 0x10, 0x03,
   301         0x04, 0x05, 0x12, 0x07,
   302         0x08, 0x09, 0x14, 0x0b,
   303         0x0c, 0x0d, 0x16, 0x0f
   304     );
   305     vector unsigned char vgreen2 = (vector unsigned char)(
   306         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   307     );
   308     
   309 
   310     assert(srcfmt->BytesPerPixel == 2);
   311     assert(dstfmt->BytesPerPixel == 4);
   312 
   313     vf800 = (vector unsigned short)vec_splat_u8(-7);
   314     vf800 = vec_sl(vf800, vec_splat_u16(8));
   315 
   316     if (dstfmt->Amask && srcfmt->alpha) {
   317         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   318         valpha = vec_splat(valpha, 0);
   319     } else {
   320         alpha = 0;
   321         valpha = vec_splat_u8(0);
   322     }
   323 
   324     vpermute = calc_swizzle32(NULL, dstfmt);
   325     while (height--) {
   326         vector unsigned char valigner;
   327         vector unsigned char voverflow;
   328         vector unsigned char vsrc;
   329 
   330         int width = info->d_width;
   331         int extrawidth;
   332 
   333         /* do scalar until we can align... */
   334 #define ONE_PIXEL_BLEND(condition, widthvar) \
   335         while (condition) { \
   336             unsigned sR, sG, sB; \
   337             unsigned short Pixel = *((unsigned short *)src); \
   338             sR = (Pixel >> 8) & 0xf8; \
   339             sG = (Pixel >> 3) & 0xfc; \
   340             sB = (Pixel << 3) & 0xf8; \
   341             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   342             src += 2; \
   343             dst += 4; \
   344             widthvar--; \
   345         }
   346         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   347 
   348         /* After all that work, here's the vector part! */
   349         extrawidth = (width % 8);  /* trailing unaligned stores */
   350         width -= extrawidth;
   351         vsrc = vec_ld(0, src);
   352         valigner = VEC_ALIGNER(src);
   353 
   354         while (width) {
   355             vector unsigned short vR, vG, vB;
   356             vector unsigned char vdst1, vdst2;
   357 
   358             voverflow = vec_ld(15, src);
   359             vsrc = vec_perm(vsrc, voverflow, valigner);
   360 
   361             vR = vec_and((vector unsigned short)vsrc, vf800);
   362             vB = vec_sl((vector unsigned short)vsrc, v3);
   363             vG = vec_sl(vB, v2);
   364 
   365             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   366             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   367             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   368             vdst1 = vec_perm(vdst1, valpha, vpermute);
   369             vec_st(vdst1, 0, dst);
   370 
   371             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   372             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   373             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   374             vdst2 = vec_perm(vdst2, valpha, vpermute);
   375             vec_st(vdst2, 16, dst);
   376             
   377             width -= 8;
   378             dst += 32;
   379             src += 16;
   380             vsrc = voverflow;
   381         }
   382 
   383         assert(width == 0);
   384 
   385 
   386         /* do scalar until we can align... */
   387         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   388 #undef ONE_PIXEL_BLEND
   389 
   390         src += srcskip;  /* move to next row, accounting for pitch. */
   391         dst += dstskip;
   392     }
   393 
   394 }
   395 
   396 
   397 static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
   398     int height = info->d_height;
   399     Uint8 *src = (Uint8 *) info->s_pixels;
   400     int srcskip = info->s_skip;
   401     Uint8 *dst = (Uint8 *) info->d_pixels;
   402     int dstskip = info->d_skip;
   403     SDL_PixelFormat *srcfmt = info->src;
   404     SDL_PixelFormat *dstfmt = info->dst;
   405     unsigned alpha;
   406     vector unsigned char valpha;
   407     vector unsigned char vpermute;
   408     vector unsigned short vf800;
   409     vector unsigned int v8 = vec_splat_u32(8);
   410     vector unsigned int v16 = vec_add(v8, v8);
   411     vector unsigned short v1 = vec_splat_u16(1);
   412     vector unsigned short v3 = vec_splat_u16(3);
   413     /* 
   414         0x10 - 0x1f is the alpha
   415         0x00 - 0x0e evens are the red
   416         0x01 - 0x0f odds are zero
   417     */
   418     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   419         0x10, 0x00, 0x01, 0x01,
   420         0x10, 0x02, 0x01, 0x01,
   421         0x10, 0x04, 0x01, 0x01,
   422         0x10, 0x06, 0x01, 0x01
   423     );
   424     vector unsigned char vredalpha2 = (vector unsigned char)(
   425         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   426     );
   427     /*
   428         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   429         0x11 - 0x0f odds are blue
   430     */
   431     vector unsigned char vblue1 = VECUINT8_LITERAL(
   432         0x00, 0x01, 0x02, 0x11,
   433         0x04, 0x05, 0x06, 0x13,
   434         0x08, 0x09, 0x0a, 0x15,
   435         0x0c, 0x0d, 0x0e, 0x17
   436     );
   437     vector unsigned char vblue2 = (vector unsigned char)(
   438         vec_add((vector unsigned int)vblue1, v8)
   439     );
   440     /*
   441         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   442         0x10 - 0x0e evens are green
   443     */
   444     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   445         0x00, 0x01, 0x10, 0x03,
   446         0x04, 0x05, 0x12, 0x07,
   447         0x08, 0x09, 0x14, 0x0b,
   448         0x0c, 0x0d, 0x16, 0x0f
   449     );
   450     vector unsigned char vgreen2 = (vector unsigned char)(
   451         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   452     );
   453     
   454 
   455     assert(srcfmt->BytesPerPixel == 2);
   456     assert(dstfmt->BytesPerPixel == 4);
   457 
   458     vf800 = (vector unsigned short)vec_splat_u8(-7);
   459     vf800 = vec_sl(vf800, vec_splat_u16(8));
   460 
   461     if (dstfmt->Amask && srcfmt->alpha) {
   462         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   463         valpha = vec_splat(valpha, 0);
   464     } else {
   465         alpha = 0;
   466         valpha = vec_splat_u8(0);
   467     }
   468 
   469     vpermute = calc_swizzle32(NULL, dstfmt);
   470     while (height--) {
   471         vector unsigned char valigner;
   472         vector unsigned char voverflow;
   473         vector unsigned char vsrc;
   474 
   475         int width = info->d_width;
   476         int extrawidth;
   477 
   478         /* do scalar until we can align... */
   479 #define ONE_PIXEL_BLEND(condition, widthvar) \
   480         while (condition) { \
   481             unsigned sR, sG, sB; \
   482             unsigned short Pixel = *((unsigned short *)src); \
   483             sR = (Pixel >> 7) & 0xf8; \
   484             sG = (Pixel >> 2) & 0xf8; \
   485             sB = (Pixel << 3) & 0xf8; \
   486             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   487             src += 2; \
   488             dst += 4; \
   489             widthvar--; \
   490         }
   491         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   492 
   493         /* After all that work, here's the vector part! */
   494         extrawidth = (width % 8);  /* trailing unaligned stores */
   495         width -= extrawidth;
   496         vsrc = vec_ld(0, src);
   497         valigner = VEC_ALIGNER(src);
   498 
   499         while (width) {
   500             vector unsigned short vR, vG, vB;
   501             vector unsigned char vdst1, vdst2;
   502 
   503             voverflow = vec_ld(15, src);
   504             vsrc = vec_perm(vsrc, voverflow, valigner);
   505 
   506             vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
   507             vB = vec_sl((vector unsigned short)vsrc, v3);
   508             vG = vec_sl(vB, v3);
   509 
   510             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   511             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   512             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   513             vdst1 = vec_perm(vdst1, valpha, vpermute);
   514             vec_st(vdst1, 0, dst);
   515 
   516             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   517             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   518             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   519             vdst2 = vec_perm(vdst2, valpha, vpermute);
   520             vec_st(vdst2, 16, dst);
   521             
   522             width -= 8;
   523             dst += 32;
   524             src += 16;
   525             vsrc = voverflow;
   526         }
   527 
   528         assert(width == 0);
   529 
   530 
   531         /* do scalar until we can align... */
   532         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   533 #undef ONE_PIXEL_BLEND
   534 
   535         src += srcskip;  /* move to next row, accounting for pitch. */
   536         dst += dstskip;
   537     }
   538 
   539 }
   540 
   541 static void BlitNtoNKey(SDL_BlitInfo *info);
   542 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
   543 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
   544 {
   545     int height = info->d_height;
   546     Uint32 *srcp = (Uint32 *) info->s_pixels;
   547     int srcskip = info->s_skip;
   548     Uint32 *dstp = (Uint32 *) info->d_pixels;
   549     int dstskip = info->d_skip;
   550     SDL_PixelFormat *srcfmt = info->src;
   551     int srcbpp = srcfmt->BytesPerPixel;
   552     SDL_PixelFormat *dstfmt = info->dst;
   553     int dstbpp = dstfmt->BytesPerPixel;
   554     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   555 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   556     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   557 	Uint32 ckey = info->src->colorkey;
   558     vector unsigned int valpha;
   559     vector unsigned char vpermute;
   560     vector unsigned char vzero;
   561     vector unsigned int vckey;
   562     vector unsigned int vrgbmask;
   563     vpermute = calc_swizzle32(srcfmt, dstfmt);
   564     if (info->d_width < 16) {
   565         if(copy_alpha) {
   566             return BlitNtoNKeyCopyAlpha(info);
   567         } else {
   568             return BlitNtoNKey(info);
   569         }
   570     }
   571     vzero = vec_splat_u8(0);
   572     if (alpha) {
   573         ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
   574         valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
   575     } else {
   576         valpha = (vector unsigned int)vzero;
   577     }
   578     ckey &= rgbmask;
   579     ((unsigned int *)(char*)&vckey)[0] = ckey;
   580     vckey = vec_splat(vckey, 0);
   581     ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
   582     vrgbmask = vec_splat(vrgbmask, 0);
   583 
   584     while (height--) {
   585 #define ONE_PIXEL_BLEND(condition, widthvar) \
   586         if (copy_alpha) { \
   587             while (condition) { \
   588                 Uint32 Pixel; \
   589                 unsigned sR, sG, sB, sA; \
   590                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   591                           sR, sG, sB, sA); \
   592                 if ( (Pixel & rgbmask) != ckey ) { \
   593                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   594                             sR, sG, sB, sA); \
   595                 } \
   596                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   597                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   598                 widthvar--; \
   599             } \
   600         } else { \
   601             while (condition) { \
   602                 Uint32 Pixel; \
   603                 unsigned sR, sG, sB; \
   604                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   605                 if ( Pixel != ckey ) { \
   606                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   607                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   608                               sR, sG, sB, alpha); \
   609                 } \
   610                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   611                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   612                 widthvar--; \
   613             } \
   614         }
   615         int width = info->d_width;
   616         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   617         assert(width > 0);
   618         if (width > 0) {
   619             int extrawidth = (width % 4);
   620             vector unsigned char valigner = VEC_ALIGNER(srcp);
   621             vector unsigned int vs = vec_ld(0, srcp);
   622             width -= extrawidth;
   623             assert(width >= 4);
   624             while (width) {
   625                 vector unsigned char vsel;
   626                 vector unsigned int vd;
   627                 vector unsigned int voverflow = vec_ld(15, srcp);
   628                 /* load the source vec */
   629                 vs = vec_perm(vs, voverflow, valigner);
   630                 /* vsel is set for items that match the key */
   631                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
   632                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
   633                 /* permute the src vec to the dest format */
   634                 vs = vec_perm(vs, valpha, vpermute);
   635                 /* load the destination vec */
   636                 vd = vec_ld(0, dstp);
   637                 /* select the source and dest into vs */
   638                 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
   639                 
   640                 vec_st(vd, 0, dstp);
   641                 srcp += 4;
   642                 width -= 4;
   643                 dstp += 4;
   644                 vs = voverflow;
   645             }
   646             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   647 #undef ONE_PIXEL_BLEND
   648             srcp += srcskip >> 2;
   649             dstp += dstskip >> 2;
   650         }
   651     }
   652 }
   653 
   654 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   655 /* Use this on a G5 */
   656 static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
   657 {
   658     int height = info->d_height;
   659     Uint32 *src = (Uint32 *) info->s_pixels;
   660     int srcskip = info->s_skip;
   661     Uint32 *dst = (Uint32 *) info->d_pixels;
   662     int dstskip = info->d_skip;
   663     SDL_PixelFormat *srcfmt = info->src;
   664     int srcbpp = srcfmt->BytesPerPixel;
   665     SDL_PixelFormat *dstfmt = info->dst;
   666     int dstbpp = dstfmt->BytesPerPixel;
   667     vector unsigned int vzero = vec_splat_u32(0);
   668     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   669     if (dstfmt->Amask && !srcfmt->Amask) {
   670         if (srcfmt->alpha) {
   671             vector unsigned char valpha;
   672             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   673             vzero = (vector unsigned int)vec_splat(valpha, 0);
   674         }
   675     }
   676 
   677     assert(srcbpp == 4);
   678     assert(dstbpp == 4);
   679 
   680     while (height--) {
   681         vector unsigned char valigner;
   682         vector unsigned int vbits;
   683         vector unsigned int voverflow;
   684         Uint32 bits;
   685         Uint8 r, g, b, a;
   686 
   687         int width = info->d_width;
   688         int extrawidth;
   689 
   690         /* do scalar until we can align... */
   691         while ((UNALIGNED_PTR(dst)) && (width)) {
   692             bits = *(src++);
   693             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   694             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   695             width--;
   696         }
   697 
   698         /* After all that work, here's the vector part! */
   699         extrawidth = (width % 4);
   700         width -= extrawidth;
   701         valigner = VEC_ALIGNER(src);
   702         vbits = vec_ld(0, src);
   703 
   704        while (width) {
   705             voverflow = vec_ld(15, src);
   706             src += 4;
   707             width -= 4;
   708             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   709             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   710             vec_st(vbits, 0, dst);  /* store it back out. */
   711             dst += 4;
   712             vbits = voverflow;
   713         }
   714 
   715         assert(width == 0);
   716 
   717         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   718         while (extrawidth) {
   719             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   720             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   721             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   722             extrawidth--;
   723         }
   724 
   725         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   726         dst += dstskip >> 2;
   727     }
   728 
   729 }
   730 
   731 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   732 /* Use this on a G4 */
   733 static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
   734 {
   735     const int scalar_dst_lead = sizeof (Uint32) * 4;
   736     const int vector_dst_lead = sizeof (Uint32) * 16;
   737 
   738     int height = info->d_height;
   739     Uint32 *src = (Uint32 *) info->s_pixels;
   740     int srcskip = info->s_skip;
   741     Uint32 *dst = (Uint32 *) info->d_pixels;
   742     int dstskip = info->d_skip;
   743     SDL_PixelFormat *srcfmt = info->src;
   744     int srcbpp = srcfmt->BytesPerPixel;
   745     SDL_PixelFormat *dstfmt = info->dst;
   746     int dstbpp = dstfmt->BytesPerPixel;
   747     vector unsigned int vzero = vec_splat_u32(0);
   748     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   749     if (dstfmt->Amask && !srcfmt->Amask) {
   750         if (srcfmt->alpha) {
   751             vector unsigned char valpha;
   752             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   753             vzero = (vector unsigned int)vec_splat(valpha, 0);
   754         }
   755     }
   756 
   757     assert(srcbpp == 4);
   758     assert(dstbpp == 4);
   759 
   760     while (height--) {
   761         vector unsigned char valigner;
   762         vector unsigned int vbits;
   763         vector unsigned int voverflow;
   764         Uint32 bits;
   765         Uint8 r, g, b, a;
   766 
   767         int width = info->d_width;
   768         int extrawidth;
   769 
   770         /* do scalar until we can align... */
   771         while ((UNALIGNED_PTR(dst)) && (width)) {
   772             vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   773             vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   774             bits = *(src++);
   775             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   776             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   777             width--;
   778         }
   779 
   780         /* After all that work, here's the vector part! */
   781         extrawidth = (width % 4);
   782         width -= extrawidth;
   783         valigner = VEC_ALIGNER(src);
   784         vbits = vec_ld(0, src);
   785 
   786         while (width) {
   787             vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   788             vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   789             voverflow = vec_ld(15, src);
   790             src += 4;
   791             width -= 4;
   792             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   793             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   794             vec_st(vbits, 0, dst);  /* store it back out. */
   795             dst += 4;
   796             vbits = voverflow;
   797         }
   798         
   799         assert(width == 0);
   800 
   801         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   802         while (extrawidth) {
   803             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   804             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   805             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   806             extrawidth--;
   807         }
   808 
   809         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   810         dst += dstskip >> 2;
   811     }
   812 
   813     vec_dss(DST_CHAN_SRC);
   814     vec_dss(DST_CHAN_DEST);
   815 }
   816 
   817 static Uint32 GetBlitFeatures( void )
   818 {
   819     static Uint32 features = 0xffffffff;
   820     if (features == 0xffffffff) {
   821         /* Provide an override for testing .. */
   822         char *override = getenv("SDL_ALTIVEC_BLIT_FEATURES");
   823         if (override) {
   824             features = 0;
   825             sscanf(override, "%u", &features);
   826         } else {
   827             features = ( 0
   828                 /* Feature 1 is has-MMX */
   829                 | ((SDL_HasMMX()) ? 1 : 0)
   830                 /* Feature 2 is has-AltiVec */
   831                 | ((SDL_HasAltiVec()) ? 2 : 0)
   832                 /* Feature 4 is dont-use-prefetch */
   833                 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   834                 | ((GetL3CacheSize() == 0) ? 4 : 0)
   835             );
   836         }
   837     }
   838     return features;
   839 }
   840 #else
   841 /* Feature 1 is has-MMX */
   842 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   843 #endif
   844 
   845 /* This is now endian dependent */
   846 #if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
   847 #define HI	1
   848 #define LO	0
   849 #else /* ( SDL_BYTEORDER == SDL_BIG_ENDIAN ) */
   850 #define HI	0
   851 #define LO	1
   852 #endif
   853 
   854 #ifdef USE_ASMBLIT
   855 
   856 /* Heheheh, we coerce Hermes into using SDL blit information */
   857 #define X86_ASSEMBLER
   858 #define HermesConverterInterface	SDL_BlitInfo
   859 #define HermesClearInterface		void
   860 #define STACKCALL
   861 
   862 #include "HeadMMX.h"
   863 #include "HeadX86.h"
   864 
   865 #else
   866 
   867 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   868 #define RGB888_RGB332(dst, src) { \
   869 	dst = (((src)&0x00E00000)>>16)| \
   870 	      (((src)&0x0000E000)>>11)| \
   871 	      (((src)&0x000000C0)>>6); \
   872 }
   873 static void Blit_RGB888_index8(SDL_BlitInfo *info)
   874 {
   875 #ifndef USE_DUFFS_LOOP
   876 	int c;
   877 #endif
   878 	int width, height;
   879 	Uint32 *src;
   880 	const Uint8 *map;
   881 	Uint8 *dst;
   882 	int srcskip, dstskip;
   883 
   884 	/* Set up some basic variables */
   885 	width = info->d_width;
   886 	height = info->d_height;
   887 	src = (Uint32 *)info->s_pixels;
   888 	srcskip = info->s_skip/4;
   889 	dst = info->d_pixels;
   890 	dstskip = info->d_skip;
   891 	map = info->table;
   892 
   893 	if ( map == NULL ) {
   894 		while ( height-- ) {
   895 #ifdef USE_DUFFS_LOOP
   896 			DUFFS_LOOP(
   897 				RGB888_RGB332(*dst++, *src);
   898 			, width);
   899 #else
   900 			for ( c=width/4; c; --c ) {
   901 				/* Pack RGB into 8bit pixel */
   902 				++src;
   903 				RGB888_RGB332(*dst++, *src);
   904 				++src;
   905 				RGB888_RGB332(*dst++, *src);
   906 				++src;
   907 				RGB888_RGB332(*dst++, *src);
   908 				++src;
   909 			}
   910 			switch ( width & 3 ) {
   911 				case 3:
   912 					RGB888_RGB332(*dst++, *src);
   913 					++src;
   914 				case 2:
   915 					RGB888_RGB332(*dst++, *src);
   916 					++src;
   917 				case 1:
   918 					RGB888_RGB332(*dst++, *src);
   919 					++src;
   920 			}
   921 #endif /* USE_DUFFS_LOOP */
   922 			src += srcskip;
   923 			dst += dstskip;
   924 		}
   925 	} else {
   926 		int Pixel;
   927 
   928 		while ( height-- ) {
   929 #ifdef USE_DUFFS_LOOP
   930 			DUFFS_LOOP(
   931 				RGB888_RGB332(Pixel, *src);
   932 				*dst++ = map[Pixel];
   933 				++src;
   934 			, width);
   935 #else
   936 			for ( c=width/4; c; --c ) {
   937 				/* Pack RGB into 8bit pixel */
   938 				RGB888_RGB332(Pixel, *src);
   939 				*dst++ = map[Pixel];
   940 				++src;
   941 				RGB888_RGB332(Pixel, *src);
   942 				*dst++ = map[Pixel];
   943 				++src;
   944 				RGB888_RGB332(Pixel, *src);
   945 				*dst++ = map[Pixel];
   946 				++src;
   947 				RGB888_RGB332(Pixel, *src);
   948 				*dst++ = map[Pixel];
   949 				++src;
   950 			}
   951 			switch ( width & 3 ) {
   952 				case 3:
   953 					RGB888_RGB332(Pixel, *src);
   954 					*dst++ = map[Pixel];
   955 					++src;
   956 				case 2:
   957 					RGB888_RGB332(Pixel, *src);
   958 					*dst++ = map[Pixel];
   959 					++src;
   960 				case 1:
   961 					RGB888_RGB332(Pixel, *src);
   962 					*dst++ = map[Pixel];
   963 					++src;
   964 			}
   965 #endif /* USE_DUFFS_LOOP */
   966 			src += srcskip;
   967 			dst += dstskip;
   968 		}
   969 	}
   970 }
   971 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   972 #define RGB888_RGB555(dst, src) { \
   973 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>9)| \
   974 	                   (((*src)&0x0000F800)>>6)| \
   975 	                   (((*src)&0x000000F8)>>3); \
   976 }
   977 #define RGB888_RGB555_TWO(dst, src) { \
   978 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   979 	                     (((src[HI])&0x0000F800)>>6)| \
   980 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   981 	                     (((src[LO])&0x00F80000)>>9)| \
   982 	                     (((src[LO])&0x0000F800)>>6)| \
   983 	                     (((src[LO])&0x000000F8)>>3); \
   984 }
   985 static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
   986 {
   987 #ifndef USE_DUFFS_LOOP
   988 	int c;
   989 #endif
   990 	int width, height;
   991 	Uint32 *src;
   992 	Uint16 *dst;
   993 	int srcskip, dstskip;
   994 
   995 	/* Set up some basic variables */
   996 	width = info->d_width;
   997 	height = info->d_height;
   998 	src = (Uint32 *)info->s_pixels;
   999 	srcskip = info->s_skip/4;
  1000 	dst = (Uint16 *)info->d_pixels;
  1001 	dstskip = info->d_skip/2;
  1002 
  1003 #ifdef USE_DUFFS_LOOP
  1004 	while ( height-- ) {
  1005 		DUFFS_LOOP(
  1006 			RGB888_RGB555(dst, src);
  1007 			++src;
  1008 			++dst;
  1009 		, width);
  1010 		src += srcskip;
  1011 		dst += dstskip;
  1012 	}
  1013 #else
  1014 	/* Memory align at 4-byte boundary, if necessary */
  1015 	if ( (long)dst & 0x03 ) {
  1016 		/* Don't do anything if width is 0 */
  1017 		if ( width == 0 ) {
  1018 			return;
  1019 		}
  1020 		--width;
  1021 
  1022 		while ( height-- ) {
  1023 			/* Perform copy alignment */
  1024 			RGB888_RGB555(dst, src);
  1025 			++src;
  1026 			++dst;
  1027 
  1028 			/* Copy in 4 pixel chunks */
  1029 			for ( c=width/4; c; --c ) {
  1030 				RGB888_RGB555_TWO(dst, src);
  1031 				src += 2;
  1032 				dst += 2;
  1033 				RGB888_RGB555_TWO(dst, src);
  1034 				src += 2;
  1035 				dst += 2;
  1036 			}
  1037 			/* Get any leftovers */
  1038 			switch (width & 3) {
  1039 				case 3:
  1040 					RGB888_RGB555(dst, src);
  1041 					++src;
  1042 					++dst;
  1043 				case 2:
  1044 					RGB888_RGB555_TWO(dst, src);
  1045 					src += 2;
  1046 					dst += 2;
  1047 					break;
  1048 				case 1:
  1049 					RGB888_RGB555(dst, src);
  1050 					++src;
  1051 					++dst;
  1052 					break;
  1053 			}
  1054 			src += srcskip;
  1055 			dst += dstskip;
  1056 		}
  1057 	} else { 
  1058 		while ( height-- ) {
  1059 			/* Copy in 4 pixel chunks */
  1060 			for ( c=width/4; c; --c ) {
  1061 				RGB888_RGB555_TWO(dst, src);
  1062 				src += 2;
  1063 				dst += 2;
  1064 				RGB888_RGB555_TWO(dst, src);
  1065 				src += 2;
  1066 				dst += 2;
  1067 			}
  1068 			/* Get any leftovers */
  1069 			switch (width & 3) {
  1070 				case 3:
  1071 					RGB888_RGB555(dst, src);
  1072 					++src;
  1073 					++dst;
  1074 				case 2:
  1075 					RGB888_RGB555_TWO(dst, src);
  1076 					src += 2;
  1077 					dst += 2;
  1078 					break;
  1079 				case 1:
  1080 					RGB888_RGB555(dst, src);
  1081 					++src;
  1082 					++dst;
  1083 					break;
  1084 			}
  1085 			src += srcskip;
  1086 			dst += dstskip;
  1087 		}
  1088 	}
  1089 #endif /* USE_DUFFS_LOOP */
  1090 }
  1091 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1092 #define RGB888_RGB565(dst, src) { \
  1093 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>8)| \
  1094 	                   (((*src)&0x0000FC00)>>5)| \
  1095 	                   (((*src)&0x000000F8)>>3); \
  1096 }
  1097 #define RGB888_RGB565_TWO(dst, src) { \
  1098 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1099 	                     (((src[HI])&0x0000FC00)>>5)| \
  1100 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1101 	                     (((src[LO])&0x00F80000)>>8)| \
  1102 	                     (((src[LO])&0x0000FC00)>>5)| \
  1103 	                     (((src[LO])&0x000000F8)>>3); \
  1104 }
  1105 static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
  1106 {
  1107 #ifndef USE_DUFFS_LOOP
  1108 	int c;
  1109 #endif
  1110 	int width, height;
  1111 	Uint32 *src;
  1112 	Uint16 *dst;
  1113 	int srcskip, dstskip;
  1114 
  1115 	/* Set up some basic variables */
  1116 	width = info->d_width;
  1117 	height = info->d_height;
  1118 	src = (Uint32 *)info->s_pixels;
  1119 	srcskip = info->s_skip/4;
  1120 	dst = (Uint16 *)info->d_pixels;
  1121 	dstskip = info->d_skip/2;
  1122 
  1123 #ifdef USE_DUFFS_LOOP
  1124 	while ( height-- ) {
  1125 		DUFFS_LOOP(
  1126 			RGB888_RGB565(dst, src);
  1127 			++src;
  1128 			++dst;
  1129 		, width);
  1130 		src += srcskip;
  1131 		dst += dstskip;
  1132 	}
  1133 #else
  1134 	/* Memory align at 4-byte boundary, if necessary */
  1135 	if ( (long)dst & 0x03 ) {
  1136 		/* Don't do anything if width is 0 */
  1137 		if ( width == 0 ) {
  1138 			return;
  1139 		}
  1140 		--width;
  1141 
  1142 		while ( height-- ) {
  1143 			/* Perform copy alignment */
  1144 			RGB888_RGB565(dst, src);
  1145 			++src;
  1146 			++dst;
  1147 
  1148 			/* Copy in 4 pixel chunks */
  1149 			for ( c=width/4; c; --c ) {
  1150 				RGB888_RGB565_TWO(dst, src);
  1151 				src += 2;
  1152 				dst += 2;
  1153 				RGB888_RGB565_TWO(dst, src);
  1154 				src += 2;
  1155 				dst += 2;
  1156 			}
  1157 			/* Get any leftovers */
  1158 			switch (width & 3) {
  1159 				case 3:
  1160 					RGB888_RGB565(dst, src);
  1161 					++src;
  1162 					++dst;
  1163 				case 2:
  1164 					RGB888_RGB565_TWO(dst, src);
  1165 					src += 2;
  1166 					dst += 2;
  1167 					break;
  1168 				case 1:
  1169 					RGB888_RGB565(dst, src);
  1170 					++src;
  1171 					++dst;
  1172 					break;
  1173 			}
  1174 			src += srcskip;
  1175 			dst += dstskip;
  1176 		}
  1177 	} else { 
  1178 		while ( height-- ) {
  1179 			/* Copy in 4 pixel chunks */
  1180 			for ( c=width/4; c; --c ) {
  1181 				RGB888_RGB565_TWO(dst, src);
  1182 				src += 2;
  1183 				dst += 2;
  1184 				RGB888_RGB565_TWO(dst, src);
  1185 				src += 2;
  1186 				dst += 2;
  1187 			}
  1188 			/* Get any leftovers */
  1189 			switch (width & 3) {
  1190 				case 3:
  1191 					RGB888_RGB565(dst, src);
  1192 					++src;
  1193 					++dst;
  1194 				case 2:
  1195 					RGB888_RGB565_TWO(dst, src);
  1196 					src += 2;
  1197 					dst += 2;
  1198 					break;
  1199 				case 1:
  1200 					RGB888_RGB565(dst, src);
  1201 					++src;
  1202 					++dst;
  1203 					break;
  1204 			}
  1205 			src += srcskip;
  1206 			dst += dstskip;
  1207 		}
  1208 	}
  1209 #endif /* USE_DUFFS_LOOP */
  1210 }
  1211 
  1212 #endif /* USE_ASMBLIT */
  1213 
  1214 
  1215 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1216 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1217 static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
  1218 {
  1219 #ifndef USE_DUFFS_LOOP
  1220 	int c;
  1221 #endif
  1222 	int width, height;
  1223 	Uint8 *src;
  1224 	Uint32 *dst;
  1225 	int srcskip, dstskip;
  1226 
  1227 	/* Set up some basic variables */
  1228 	width = info->d_width;
  1229 	height = info->d_height;
  1230 	src = (Uint8 *)info->s_pixels;
  1231 	srcskip = info->s_skip;
  1232 	dst = (Uint32 *)info->d_pixels;
  1233 	dstskip = info->d_skip/4;
  1234 
  1235 #ifdef USE_DUFFS_LOOP
  1236 	while ( height-- ) {
  1237 		DUFFS_LOOP(
  1238 		{
  1239 			*dst++ = RGB565_32(dst, src, map);
  1240 			src += 2;
  1241 		},
  1242 		width);
  1243 		src += srcskip;
  1244 		dst += dstskip;
  1245 	}
  1246 #else
  1247 	while ( height-- ) {
  1248 		/* Copy in 4 pixel chunks */
  1249 		for ( c=width/4; c; --c ) {
  1250 			*dst++ = RGB565_32(dst, src, map);
  1251 			src += 2;
  1252 			*dst++ = RGB565_32(dst, src, map);
  1253 			src += 2;
  1254 			*dst++ = RGB565_32(dst, src, map);
  1255 			src += 2;
  1256 			*dst++ = RGB565_32(dst, src, map);
  1257 			src += 2;
  1258 		}
  1259 		/* Get any leftovers */
  1260 		switch (width & 3) {
  1261 			case 3:
  1262 				*dst++ = RGB565_32(dst, src, map);
  1263 				src += 2;
  1264 			case 2:
  1265 				*dst++ = RGB565_32(dst, src, map);
  1266 				src += 2;
  1267 			case 1:
  1268 				*dst++ = RGB565_32(dst, src, map);
  1269 				src += 2;
  1270 				break;
  1271 		}
  1272 		src += srcskip;
  1273 		dst += dstskip;
  1274 	}
  1275 #endif /* USE_DUFFS_LOOP */
  1276 }
  1277 
  1278 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1279 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1280 		0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1281 		0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1282 		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1283 		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1284 		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1285 		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1286 		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1287 		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1288 		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1289 		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1290 		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1291 		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1292 		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1293 		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1294 		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1295 		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1296 		0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1297 		0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1298 		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1299 		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1300 		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1301 		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1302 		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1303 		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1304 		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1305 		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1306 		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1307 		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1308 		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1309 		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1310 		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1311 		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1312 		0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1313 		0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1314 		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1315 		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1316 		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1317 		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1318 		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1319 		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1320 		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1321 		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1322 		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1323 		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1324 		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1325 		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1326 		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1327 		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1328 		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1329 		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1330 		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1331 		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1332 		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1333 		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1334 		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1335 		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1336 		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1337 		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1338 		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1339 		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1340 		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1341 		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1342 		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1343 		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1344 		0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1345 		0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1346 		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1347 		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1348 		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1349 		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1350 		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1351 		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1352 		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1353 		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1354 		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1355 		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1356 		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1357 		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1358 		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1359 		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1360 		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1361 		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1362 		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1363 		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1364 		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1365 		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1366 		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1367 		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1368 		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1369 		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1370 		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1371 		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1372 		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1373 		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1374 		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1375 		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1376 		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1377 		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1378 		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1379 		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1380 		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1381 		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1382 		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1383 		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1384 		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1385 		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1386 		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1387 		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1388 		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1389 		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1390 		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1391 		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1392 		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1393 		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1394 		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1395 		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1396 		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1397 		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1398 		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1399 		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1400 		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1401 		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1402 		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1403 		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1404 		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1405 		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1406 		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1407 		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1408 };
  1409 static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
  1410 {
  1411     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1412 }
  1413 
  1414 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1415 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1416 		0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1417 		0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1418 		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1419 		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1420 		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1421 		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1422 		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1423 		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1424 		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1425 		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1426 		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1427 		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1428 		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1429 		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1430 		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1431 		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1432 		0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1433 		0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1434 		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1435 		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1436 		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1437 		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1438 		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1439 		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1440 		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1441 		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1442 		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1443 		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1444 		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1445 		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1446 		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1447 		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1448 		0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1449 		0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1450 		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1451 		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1452 		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1453 		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1454 		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1455 		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1456 		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1457 		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1458 		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1459 		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1460 		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1461 		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1462 		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1463 		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1464 		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1465 		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1466 		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1467 		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1468 		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1469 		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1470 		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1471 		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1472 		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1473 		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1474 		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1475 		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1476 		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1477 		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1478 		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1479 		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1480 		0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1481 		0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1482 		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1483 		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1484 		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1485 		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1486 		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1487 		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1488 		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1489 		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1490 		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1491 		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1492 		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1493 		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1494 		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1495 		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1496 		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1497 		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1498 		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1499 		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1500 		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1501 		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1502 		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1503 		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1504 		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1505 		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1506 		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1507 		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1508 		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1509 		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1510 		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1511 		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1512 		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1513 		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1514 		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1515 		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1516 		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1517 		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1518 		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1519 		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1520 		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1521 		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1522 		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1523 		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1524 		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1525 		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1526 		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1527 		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1528 		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1529 		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1530 		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1531 		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1532 		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1533 		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1534 		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1535 		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1536 		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1537 		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1538 		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1539 		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1540 		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1541 		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1542 		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1543 		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1544 };
  1545 static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
  1546 {
  1547     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1548 }
  1549 
  1550 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1551 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1552 		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1553 		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1554 		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1555 		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1556 		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1557 		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1558 		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1559 		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1560 		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1561 		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1562 		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1563 		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1564 		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1565 		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1566 		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1567 		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1568 		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1569 		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1570 		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1571 		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1572 		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1573 		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1574 		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1575 		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1576 		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1577 		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1578 		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1579 		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1580 		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1581 		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1582 		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1583 		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1584 		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1585 		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1586 		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1587 		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1588 		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1589 		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1590 		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1591 		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1592 		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1593 		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1594 		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1595 		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1596 		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1597 		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1598 		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1599 		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1600 		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1601 		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1602 		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1603 		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1604 		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1605 		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1606 		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1607 		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1608 		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1609 		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1610 		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1611 		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1612 		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1613 		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1614 		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1615 		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1616 		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1617 		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1618 		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1619 		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1620 		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1621 		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1622 		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1623 		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1624 		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1625 		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1626 		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1627 		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1628 		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1629 		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1630 		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1631 		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1632 		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1633 		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1634 		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1635 		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1636 		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1637 		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1638 		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1639 		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1640 		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1641 		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1642 		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1643 		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1644 		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1645 		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1646 		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1647 		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1648 		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1649 		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1650 		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1651 		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1652 		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1653 		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1654 		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1655 		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1656 		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1657 		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1658 		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1659 		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1660 		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1661 		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1662 		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1663 		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1664 		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1665 		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1666 		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1667 		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1668 		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1669 		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1670 		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1671 		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1672 		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1673 		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1674 		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1675 		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1676 		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1677 		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1678 		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1679 		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1680 };
  1681 static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
  1682 {
  1683     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1684 }
  1685 
  1686 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1687 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1688 		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1689 		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1690 		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1691 		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1692 		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1693 		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1694 		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1695 		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1696 		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1697 		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1698 		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1699 		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1700 		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1701 		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1702 		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1703 		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1704 		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1705 		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1706 		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1707 		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1708 		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1709 		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1710 		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1711 		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1712 		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1713 		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1714 		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1715 		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1716 		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1717 		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1718 		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1719 		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1720 		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1721 		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1722 		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1723 		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1724 		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1725 		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1726 		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1727 		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1728 		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1729 		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1730 		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1731 		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1732 		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1733 		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1734 		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1735 		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1736 		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1737 		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1738 		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1739 		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1740 		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1741 		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1742 		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1743 		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1744 		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1745 		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1746 		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1747 		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1748 		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1749 		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1750 		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1751 		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1752 		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1753 		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1754 		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1755 		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1756 		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1757 		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1758 		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1759 		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1760 		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1761 		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1762 		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1763 		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1764 		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1765 		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1766 		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1767 		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1768 		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1769 		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1770 		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1771 		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1772 		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1773 		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1774 		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1775 		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1776 		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1777 		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1778 		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1779 		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1780 		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1781 		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1782 		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1783 		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1784 		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1785 		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1786 		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1787 		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1788 		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1789 		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1790 		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1791 		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1792 		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1793 		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1794 		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1795 		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1796 		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1797 		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1798 		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1799 		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1800 		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1801 		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1802 		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1803 		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1804 		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1805 		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1806 		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1807 		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1808 		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1809 		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1810 		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1811 		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1812 		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1813 		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1814 		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1815 		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1816 };
  1817 static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
  1818 {
  1819     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1820 }
  1821 
  1822 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1823 #ifndef RGB888_RGB332
  1824 #define RGB888_RGB332(dst, src) { \
  1825 	dst = (((src)&0x00E00000)>>16)| \
  1826 	      (((src)&0x0000E000)>>11)| \
  1827 	      (((src)&0x000000C0)>>6); \
  1828 }
  1829 #endif
  1830 static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
  1831 {
  1832 #ifndef USE_DUFFS_LOOP
  1833 	int c;
  1834 #endif
  1835 	int Pixel;
  1836 	int width, height;
  1837 	Uint32 *src;
  1838 	const Uint8 *map;
  1839 	Uint8 *dst;
  1840 	int srcskip, dstskip;
  1841 
  1842 	/* Set up some basic variables */
  1843 	width = info->d_width;
  1844 	height = info->d_height;
  1845 	src = (Uint32 *)info->s_pixels;
  1846 	srcskip = info->s_skip/4;
  1847 	dst = info->d_pixels;
  1848 	dstskip = info->d_skip;
  1849 	map = info->table;
  1850 
  1851 #ifdef USE_DUFFS_LOOP
  1852 	while ( height-- ) {
  1853 		DUFFS_LOOP(
  1854 			RGB888_RGB332(Pixel, *src);
  1855 			*dst++ = map[Pixel];
  1856 			++src;
  1857 		, width);
  1858 		src += srcskip;
  1859 		dst += dstskip;
  1860 	}
  1861 #else
  1862 	while ( height-- ) {
  1863 		for ( c=width/4; c; --c ) {
  1864 			/* Pack RGB into 8bit pixel */
  1865 			RGB888_RGB332(Pixel, *src);
  1866 			*dst++ = map[Pixel];
  1867 			++src;
  1868 			RGB888_RGB332(Pixel, *src);
  1869 			*dst++ = map[Pixel];
  1870 			++src;
  1871 			RGB888_RGB332(Pixel, *src);
  1872 			*dst++ = map[Pixel];
  1873 			++src;
  1874 			RGB888_RGB332(Pixel, *src);
  1875 			*dst++ = map[Pixel];
  1876 			++src;
  1877 		}
  1878 		switch ( width & 3 ) {
  1879 			case 3:
  1880 				RGB888_RGB332(Pixel, *src);
  1881 				*dst++ = map[Pixel];
  1882 				++src;
  1883 			case 2:
  1884 				RGB888_RGB332(Pixel, *src);
  1885 				*dst++ = map[Pixel];
  1886 				++src;
  1887 			case 1:
  1888 				RGB888_RGB332(Pixel, *src);
  1889 				*dst++ = map[Pixel];
  1890 				++src;
  1891 		}
  1892 		src += srcskip;
  1893 		dst += dstskip;
  1894 	}
  1895 #endif /* USE_DUFFS_LOOP */
  1896 }
  1897 static void BlitNto1(SDL_BlitInfo *info)
  1898 {
  1899 #ifndef USE_DUFFS_LOOP
  1900 	int c;
  1901 #endif
  1902 	int width, height;
  1903 	Uint8 *src;
  1904 	const Uint8 *map;
  1905 	Uint8 *dst;
  1906 	int srcskip, dstskip;
  1907 	int srcbpp;
  1908 	Uint32 Pixel;
  1909 	int  sR, sG, sB;
  1910 	SDL_PixelFormat *srcfmt;
  1911 
  1912 	/* Set up some basic variables */
  1913 	width = info->d_width;
  1914 	height = info->d_height;
  1915 	src = info->s_pixels;
  1916 	srcskip = info->s_skip;
  1917 	dst = info->d_pixels;
  1918 	dstskip = info->d_skip;
  1919 	map = info->table;
  1920 	srcfmt = info->src;
  1921 	srcbpp = srcfmt->BytesPerPixel;
  1922 
  1923 	if ( map == NULL ) {
  1924 		while ( height-- ) {
  1925 #ifdef USE_DUFFS_LOOP
  1926 			DUFFS_LOOP(
  1927 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1928 								sR, sG, sB);
  1929 				if ( 1 ) {
  1930 				  	/* Pack RGB into 8bit pixel */
  1931 				  	*dst = ((sR>>5)<<(3+2))|
  1932 					        ((sG>>5)<<(2)) |
  1933 					        ((sB>>6)<<(0)) ;
  1934 				}
  1935 				dst++;
  1936 				src += srcbpp;
  1937 			, width);
  1938 #else
  1939 			for ( c=width; c; --c ) {
  1940 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1941 								sR, sG, sB);
  1942 				if ( 1 ) {
  1943 				  	/* Pack RGB into 8bit pixel */
  1944 				  	*dst = ((sR>>5)<<(3+2))|
  1945 					        ((sG>>5)<<(2)) |
  1946 					        ((sB>>6)<<(0)) ;
  1947 				}
  1948 				dst++;
  1949 				src += srcbpp;
  1950 			}
  1951 #endif
  1952 			src += srcskip;
  1953 			dst += dstskip;
  1954 		}
  1955 	} else {
  1956 		while ( height-- ) {
  1957 #ifdef USE_DUFFS_LOOP
  1958 			DUFFS_LOOP(
  1959 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1960 								sR, sG, sB);
  1961 				if ( 1 ) {
  1962 				  	/* Pack RGB into 8bit pixel */
  1963 				  	*dst = map[((sR>>5)<<(3+2))|
  1964 						   ((sG>>5)<<(2))  |
  1965 						   ((sB>>6)<<(0))  ];
  1966 				}
  1967 				dst++;
  1968 				src += srcbpp;
  1969 			, width);
  1970 #else
  1971 			for ( c=width; c; --c ) {
  1972 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1973 								sR, sG, sB);
  1974 				if ( 1 ) {
  1975 				  	/* Pack RGB into 8bit pixel */
  1976 				  	*dst = map[((sR>>5)<<(3+2))|
  1977 						   ((sG>>5)<<(2))  |
  1978 						   ((sB>>6)<<(0))  ];
  1979 				}
  1980 				dst++;
  1981 				src += srcbpp;
  1982 			}
  1983 #endif /* USE_DUFFS_LOOP */
  1984 			src += srcskip;
  1985 			dst += dstskip;
  1986 		}
  1987 	}
  1988 }
  1989 
  1990 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  1991 static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
  1992 {
  1993 	int width = info->d_width;
  1994 	int height = info->d_height;
  1995 	Uint32 *src = (Uint32 *)info->s_pixels;
  1996 	int srcskip = info->s_skip;
  1997 	Uint32 *dst = (Uint32 *)info->d_pixels;
  1998 	int dstskip = info->d_skip;
  1999 	SDL_PixelFormat *srcfmt = info->src;
  2000 	SDL_PixelFormat *dstfmt = info->dst;
  2001 
  2002 	if (dstfmt->Amask) {
  2003 		/* RGB->RGBA, SET_ALPHA */
  2004 		Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
  2005 
  2006 		while ( height-- ) {
  2007 			DUFFS_LOOP(
  2008 			{
  2009 				*dst = *src | mask;
  2010 				++dst;
  2011 				++src;
  2012 			},
  2013 			width);
  2014 			src = (Uint32*)((Uint8*)src + srcskip);
  2015 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2016 		}
  2017 	} else {
  2018 		/* RGBA->RGB, NO_ALPHA */
  2019 		Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2020 
  2021 		while ( height-- ) {
  2022 			DUFFS_LOOP(
  2023 			{
  2024 				*dst = *src & mask;
  2025 				++dst;
  2026 				++src;
  2027 			},
  2028 			width);
  2029 			src = (Uint32*)((Uint8*)src + srcskip);
  2030 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2031 		}
  2032 	}
  2033 }
  2034 
  2035 static void BlitNtoN(SDL_BlitInfo *info)
  2036 {
  2037 	int width = info->d_width;
  2038 	int height = info->d_height;
  2039 	Uint8 *src = info->s_pixels;
  2040 	int srcskip = info->s_skip;
  2041 	Uint8 *dst = info->d_pixels;
  2042 	int dstskip = info->d_skip;
  2043 	SDL_PixelFormat *srcfmt = info->src;
  2044 	int srcbpp = srcfmt->BytesPerPixel;
  2045 	SDL_PixelFormat *dstfmt = info->dst;
  2046 	int dstbpp = dstfmt->BytesPerPixel;
  2047 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2048 
  2049 	while ( height-- ) {
  2050 		DUFFS_LOOP(
  2051 		{
  2052 		        Uint32 Pixel;
  2053 			unsigned sR;
  2054 			unsigned sG;
  2055 			unsigned sB;
  2056 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2057 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2058 			dst += dstbpp;
  2059 			src += srcbpp;
  2060 		},
  2061 		width);
  2062 		src += srcskip;
  2063 		dst += dstskip;
  2064 	}
  2065 }
  2066 
  2067 static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
  2068 {
  2069 	int width = info->d_width;
  2070 	int height = info->d_height;
  2071 	Uint8 *src = info->s_pixels;
  2072 	int srcskip = info->s_skip;
  2073 	Uint8 *dst = info->d_pixels;
  2074 	int dstskip = info->d_skip;
  2075 	SDL_PixelFormat *srcfmt = info->src;
  2076 	int srcbpp = srcfmt->BytesPerPixel;
  2077 	SDL_PixelFormat *dstfmt = info->dst;
  2078 	int dstbpp = dstfmt->BytesPerPixel;
  2079 	int c;
  2080 
  2081 	/* FIXME: should map alpha to [0..255] correctly! */
  2082 	while ( height-- ) {
  2083 		for ( c=width; c; --c ) {
  2084 		        Uint32 Pixel;
  2085 			unsigned sR, sG, sB, sA;
  2086 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2087 				      sR, sG, sB, sA);
  2088 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2089 				      sR, sG, sB, sA);
  2090 			dst += dstbpp;
  2091 			src += srcbpp;
  2092 		}
  2093 		src += srcskip;
  2094 		dst += dstskip;
  2095 	}
  2096 }
  2097 
  2098 static void BlitNto1Key(SDL_BlitInfo *info)
  2099 {
  2100 	int width = info->d_width;
  2101 	int height = info->d_height;
  2102 	Uint8 *src = info->s_pixels;
  2103 	int srcskip = info->s_skip;
  2104 	Uint8 *dst = info->d_pixels;
  2105 	int dstskip = info->d_skip;
  2106 	SDL_PixelFormat *srcfmt = info->src;
  2107 	const Uint8 *palmap = info->table;
  2108 	Uint32 ckey = srcfmt->colorkey;
  2109 	Uint32 rgbmask = ~srcfmt->Amask;
  2110 	int srcbpp;
  2111 	Uint32 Pixel;
  2112 	Uint8  sR, sG, sB;
  2113 
  2114 	/* Set up some basic variables */
  2115 	srcbpp = srcfmt->BytesPerPixel;
  2116 	ckey &= rgbmask;
  2117 
  2118 	if ( palmap == NULL ) {
  2119 		while ( height-- ) {
  2120 			DUFFS_LOOP(
  2121 			{
  2122 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2123 								sR, sG, sB);
  2124 				if ( (Pixel & rgbmask) != ckey ) {
  2125 				  	/* Pack RGB into 8bit pixel */
  2126 				  	*dst = ((sR>>5)<<(3+2))|
  2127 						((sG>>5)<<(2)) |
  2128 						((sB>>6)<<(0)) ;
  2129 				}
  2130 				dst++;
  2131 				src += srcbpp;
  2132 			},
  2133 			width);
  2134 			src += srcskip;
  2135 			dst += dstskip;
  2136 		}
  2137 	} else {
  2138 		while ( height-- ) {
  2139 			DUFFS_LOOP(
  2140 			{
  2141 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2142 								sR, sG, sB);
  2143 				if ( (Pixel & rgbmask) != ckey ) {
  2144 				  	/* Pack RGB into 8bit pixel */
  2145 				  	*dst = palmap[((sR>>5)<<(3+2))|
  2146 							((sG>>5)<<(2))  |
  2147 							((sB>>6)<<(0))  ];
  2148 				}
  2149 				dst++;
  2150 				src += srcbpp;
  2151 			},
  2152 			width);
  2153 			src += srcskip;
  2154 			dst += dstskip;
  2155 		}
  2156 	}
  2157 }
  2158 
  2159 static void Blit2to2Key(SDL_BlitInfo *info)
  2160 {
  2161 	int width = info->d_width;
  2162 	int height = info->d_height;
  2163 	Uint16 *srcp = (Uint16 *)info->s_pixels;
  2164 	int srcskip = info->s_skip;
  2165 	Uint16 *dstp = (Uint16 *)info->d_pixels;
  2166 	int dstskip = info->d_skip;
  2167 	Uint32 ckey = info->src->colorkey;
  2168 	Uint32 rgbmask = ~info->src->Amask;
  2169 
  2170 	/* Set up some basic variables */
  2171         srcskip /= 2;
  2172         dstskip /= 2;
  2173 	ckey &= rgbmask;
  2174 
  2175 	while ( height-- ) {
  2176 		DUFFS_LOOP(
  2177 		{
  2178 			if ( (*srcp & rgbmask) != ckey ) {
  2179 				*dstp = *srcp;
  2180 			}
  2181 			dstp++;
  2182 			srcp++;
  2183 		},
  2184 		width);
  2185 		srcp += srcskip;
  2186 		dstp += dstskip;
  2187 	}
  2188 }
  2189 
  2190 static void BlitNtoNKey(SDL_BlitInfo *info)
  2191 {
  2192 	int width = info->d_width;
  2193 	int height = info->d_height;
  2194 	Uint8 *src = info->s_pixels;
  2195 	int srcskip = info->s_skip;
  2196 	Uint8 *dst = info->d_pixels;
  2197 	int dstskip = info->d_skip;
  2198 	Uint32 ckey = info->src->colorkey;
  2199 	SDL_PixelFormat *srcfmt = info->src;
  2200 	SDL_PixelFormat *dstfmt = info->dst;
  2201 	int srcbpp = srcfmt->BytesPerPixel;
  2202 	int dstbpp = dstfmt->BytesPerPixel;
  2203 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2204 
  2205 	while ( height-- ) {
  2206 		DUFFS_LOOP(
  2207 		{
  2208 		        Uint32 Pixel;
  2209 			unsigned sR;
  2210 			unsigned sG;
  2211 			unsigned sB;
  2212 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2213 			if ( Pixel != ckey ) {
  2214 			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2215 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2216 					      sR, sG, sB, alpha);
  2217 			}
  2218 			dst += dstbpp;
  2219 			src += srcbpp;
  2220 		},
  2221 		width);
  2222 		src += srcskip;
  2223 		dst += dstskip;
  2224 	}
  2225 }
  2226 
  2227 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
  2228 {
  2229 	int width = info->d_width;
  2230 	int height = info->d_height;
  2231 	Uint8 *src = info->s_pixels;
  2232 	int srcskip = info->s_skip;
  2233 	Uint8 *dst = info->d_pixels;
  2234 	int dstskip = info->d_skip;
  2235 	Uint32 ckey = info->src->colorkey;
  2236 	SDL_PixelFormat *srcfmt = info->src;
  2237 	SDL_PixelFormat *dstfmt = info->dst;
  2238 	Uint32 rgbmask = ~srcfmt->Amask;
  2239 
  2240 	Uint8 srcbpp;
  2241 	Uint8 dstbpp;
  2242 	Uint32 Pixel;
  2243 	Uint8  sR, sG, sB, sA;
  2244 
  2245 	/* Set up some basic variables */
  2246 	srcbpp = srcfmt->BytesPerPixel;
  2247 	dstbpp = dstfmt->BytesPerPixel;
  2248 	ckey &= rgbmask;
  2249 
  2250 	/* FIXME: should map alpha to [0..255] correctly! */
  2251 	while ( height-- ) {
  2252 		DUFFS_LOOP(
  2253 		{
  2254 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2255 				      sR, sG, sB, sA);
  2256 			if ( (Pixel & rgbmask) != ckey ) {
  2257 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2258 						sR, sG, sB, sA);
  2259 			}
  2260 			dst += dstbpp;
  2261 			src += srcbpp;
  2262 		},
  2263 		width);
  2264 		src += srcskip;
  2265 		dst += dstskip;
  2266 	}
  2267 }
  2268 
  2269 /* Normal N to N optimized blitters */
  2270 struct blit_table {
  2271 	Uint32 srcR, srcG, srcB;
  2272 	int dstbpp;
  2273 	Uint32 dstR, dstG, dstB;
  2274 	Uint32 blit_features;
  2275 	void *aux_data;
  2276 	SDL_loblit blitfunc;
  2277 	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
  2278 };
  2279 static const struct blit_table normal_blit_1[] = {
  2280 	/* Default for 8-bit RGB source, an invalid combination */
  2281 	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
  2282 };
  2283 static const struct blit_table normal_blit_2[] = {
  2284 #ifdef USE_ASMBLIT
  2285     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
  2286       0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
  2287     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
  2288       0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
  2289     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
  2290       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
  2291 #endif
  2292 #ifdef USE_ALTIVEC_BLITTERS
  2293     /* has-altivec */
  2294     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2295       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2296     { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2297       2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2298 #endif
  2299     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
  2300       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
  2301     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2302       0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
  2303     { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2304       0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
  2305     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2306       0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
  2307 
  2308     /* Default for 16-bit RGB source, used if no other blitter matches */
  2309     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2310 };
  2311 static const struct blit_table normal_blit_3[] = {
  2312 	/* Default for 24-bit RGB source, never optimized */
  2313     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2314 };
  2315 static const struct blit_table normal_blit_4[] = {
  2316 #ifdef USE_ASMBLIT
  2317     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2318       1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
  2319     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2320       0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
  2321     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2322       1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
  2323     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2324       0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
  2325     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2326       1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
  2327     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2328       0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
  2329     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2330       1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
  2331     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2332       0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
  2333     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
  2334       0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
  2335     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
  2336       0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
  2337     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2338       0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
  2339     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2340       0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
  2341     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2342       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
  2343 #else
  2344 #ifdef USE_ALTIVEC_BLITTERS
  2345     /* has-altivec | dont-use-prefetch */
  2346     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2347       6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2348     /* has-altivec */
  2349     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2350       2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2351     /* has-altivec */
  2352     { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
  2353       2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
  2354 #endif
  2355     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2356       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
  2357     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2358       0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
  2359 #endif
  2360 	/* Default for 32-bit RGB source, used if no other blitter matches */
  2361 	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2362 };
  2363 static const struct blit_table *normal_blit[] = {
  2364 	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2365 };
  2366 
  2367 /* Mask matches table, or table entry is zero */
  2368 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2369 
  2370 SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
  2371 {
  2372 	struct private_swaccel *sdata;
  2373 	SDL_PixelFormat *srcfmt;
  2374 	SDL_PixelFormat *dstfmt;
  2375 	const struct blit_table *table;
  2376 	int which;
  2377 	SDL_loblit blitfun;
  2378 
  2379 	/* Set up data for choosing the blit */
  2380 	sdata = surface->map->sw_data;
  2381 	srcfmt = surface->format;
  2382 	dstfmt = surface->map->dst->format;
  2383 
  2384 	if ( blit_index & 2 ) {
  2385 	        /* alpha or alpha+colorkey */
  2386 	        return SDL_CalculateAlphaBlit(surface, blit_index);
  2387 	}
  2388 
  2389 	/* We don't support destinations less than 8-bits */
  2390 	if ( dstfmt->BitsPerPixel < 8 ) {
  2391 		return(NULL);
  2392 	}
  2393 	
  2394 	if(blit_index == 1) {
  2395 	    /* colorkey blit: Here we don't have too many options, mostly
  2396 	       because RLE is the preferred fast way to deal with this.
  2397 	       If a particular case turns out to be useful we'll add it. */
  2398 
  2399 	    if(srcfmt->BytesPerPixel == 2
  2400 	       && surface->map->identity)
  2401 		return Blit2to2Key;
  2402 	    else if(dstfmt->BytesPerPixel == 1)
  2403 		return BlitNto1Key;
  2404 	    else {
  2405 #ifdef USE_ALTIVEC_BLITTERS
  2406         if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
  2407             return Blit32to32KeyAltivec;
  2408         } else
  2409 #endif
  2410 
  2411 		if(srcfmt->Amask && dstfmt->Amask)
  2412 		    return BlitNtoNKeyCopyAlpha;
  2413 		else
  2414 		    return BlitNtoNKey;
  2415 	    }
  2416 	}
  2417 
  2418 	blitfun = NULL;
  2419 	if ( dstfmt->BitsPerPixel == 8 ) {
  2420 		/* We assume 8-bit destinations are palettized */
  2421 		if ( (srcfmt->BytesPerPixel == 4) &&
  2422 		     (srcfmt->Rmask == 0x00FF0000) &&
  2423 		     (srcfmt->Gmask == 0x0000FF00) &&
  2424 		     (srcfmt->Bmask == 0x000000FF) ) {
  2425 			if ( surface->map->table ) {
  2426 				blitfun = Blit_RGB888_index8_map;
  2427 			} else {
  2428 #ifdef USE_ASMBLIT
  2429 				sdata->aux_data = ConvertX86p32_8RGB332;
  2430 				blitfun = ConvertX86;
  2431 #else
  2432 				blitfun = Blit_RGB888_index8;
  2433 #endif
  2434 			}
  2435 		} else {
  2436 			blitfun = BlitNto1;
  2437 		}
  2438 	} else {
  2439 		/* Now the meat, choose the blitter we want */
  2440 		int a_need = 0;
  2441 		if(dstfmt->Amask)
  2442 		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2443 		table = normal_blit[srcfmt->BytesPerPixel-1];
  2444 		for ( which=0; table[which].dstbpp; ++which ) {
  2445 			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2446 			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2447 			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2448 			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2449 			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2450 			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2451 			    dstfmt->BytesPerPixel == table[which].dstbpp &&
  2452 			    (a_need & table[which].alpha) == a_need &&
  2453 			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
  2454 				break;
  2455 		}
  2456 		sdata->aux_data = table[which].aux_data;
  2457 		blitfun = table[which].blitfunc;
  2458 
  2459 		if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2460 			/* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2461 			if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
  2462 			     srcfmt->Rmask == dstfmt->Rmask &&
  2463 			     srcfmt->Gmask == dstfmt->Gmask &&
  2464 			     srcfmt->Bmask == dstfmt->Bmask ) {
  2465 				blitfun = Blit4to4MaskAlpha;
  2466 			} else if ( a_need == COPY_ALPHA ) {
  2467 			    blitfun = BlitNtoNCopyAlpha;
  2468 			}
  2469 		}
  2470 	}
  2471 
  2472 #ifdef DEBUG_ASM
  2473 #ifdef USE_ASMBLIT
  2474 	if ( blitfun == ConvertMMX )
  2475 		fprintf(stderr, "Using mmx blit\n");
  2476 	else
  2477 	if ( blitfun == ConvertX86 )
  2478 		fprintf(stderr, "Using asm blit\n");
  2479 	else
  2480 #endif
  2481 	if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
  2482 		fprintf(stderr, "Using C blit\n");
  2483 	else
  2484 		fprintf(stderr, "Using optimized C blit\n");
  2485 #endif /* DEBUG_ASM */
  2486 
  2487 	return(blitfun);
  2488 }