src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Mon, 20 Feb 2012 20:55:23 -0500
branchSDL-1.2
changeset 6294 7a2e0f7b30cb
parent 6137 4720145f848b
child 6411 ec7ccf5ee3dd
permissions -rw-r--r--
Fixed bug 1424 - Handling of alpha channel in Altivec accelerated blit functions

evilbite 2012-02-19 09:38:21 PST

There is only one Altivec accelerated blit function
(ConvertAltivec32to32_prefetch() or ConvertAltivec32to32_noprefetch(),
depending on the CPU used) that is supposed to handle all alpha combinations.
This works as follows for every pixel line:
1. Blit single pixels until an aligned address is reached
2. Accelerated blit as far as possible
3. Blit single remaining pixels
Part 2. is set up correctly to handle different combinations of the alpha
channels of the participating surfaces. Parts 1. and 3. only do a simple copy
of all the pixel's components from souce to destination. But when the source
surface has no alpha channel (Amask is 0, e.g. the video surface) the surface's
alpha value must be used instead. Otherwise crap (uninitialized data) is being
copied to the destiniation's alpha channel.

The attached patch is a quick'n'dirty solution to the problem. A more
sophisticated solution might require separate functions for different
combinations of the alpha channels of the participating surfaces.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2012 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #include "SDL_video.h"
    25 #include "SDL_endian.h"
    26 #include "SDL_cpuinfo.h"
    27 #include "SDL_blit.h"
    28 
    29 /* Functions to blit from N-bit surfaces to other surfaces */
    30 
    31 #if SDL_ALTIVEC_BLITTERS
    32 #if __MWERKS__
    33 #pragma altivec_model on
    34 #endif
    35 #ifdef HAVE_ALTIVEC_H
    36 #include <altivec.h>
    37 #endif
    38 #define assert(X)
    39 #ifdef __MACOSX__
    40 #include <sys/sysctl.h>
    41 static size_t GetL3CacheSize( void )
    42 {
    43     const char key[] = "hw.l3cachesize";
    44     u_int64_t result = 0;
    45     size_t typeSize = sizeof( result );
    46 
    47 
    48     int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
    49     if( 0 != err ) return 0;
    50 
    51     return result;
    52 }
    53 #else
    54 static size_t GetL3CacheSize( void )
    55 {
    56     /* XXX: Just guess G4 */
    57     return 2097152;
    58 }
    59 #endif /* __MACOSX__ */
    60 
    61 #if (defined(__MACOSX__) && (__GNUC__ < 4))
    62     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    63         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    64     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    65         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    66 #else
    67     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    68         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    69     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    70         (vector unsigned short) { a,b,c,d,e,f,g,h }
    71 #endif
    72 
    73 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    74 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    75                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    76                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    77                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    78                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    79 
    80 #define MAKE8888(dstfmt, r, g, b, a)  \
    81     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    82       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    83       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    84       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    85 
    86 /*
    87  * Data Stream Touch...Altivec cache prefetching.
    88  *
    89  *  Don't use this on a G5...however, the speed boost is very significant
    90  *   on a G4.
    91  */
    92 #define DST_CHAN_SRC 1
    93 #define DST_CHAN_DEST 2
    94 
    95 /* macro to set DST control word value... */
    96 #define DST_CTRL(size, count, stride) \
    97     (((size) << 24) | ((count) << 16) | (stride))
    98 
    99 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
   100     ? vec_lvsl(0, src) \
   101     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   102 
   103 /* Calculate the permute vector used for 32->32 swizzling */
   104 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
   105                                   const SDL_PixelFormat *dstfmt)
   106 {
   107     /*
   108     * We have to assume that the bits that aren't used by other
   109      *  colors is alpha, and it's one complete byte, since some formats
   110      *  leave alpha with a zero mask, but we should still swizzle the bits.
   111      */
   112     /* ARGB */
   113     const static struct SDL_PixelFormat default_pixel_format = {
   114         NULL, 0, 0,
   115         0, 0, 0, 0,
   116         16, 8, 0, 24,
   117         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   118         0, 0};
   119     if (!srcfmt) {
   120         srcfmt = &default_pixel_format;
   121     }
   122     if (!dstfmt) {
   123         dstfmt = &default_pixel_format;
   124     }
   125     const vector unsigned char plus = VECUINT8_LITERAL(
   126                                       0x00, 0x00, 0x00, 0x00,
   127                                       0x04, 0x04, 0x04, 0x04,
   128                                       0x08, 0x08, 0x08, 0x08,
   129                                       0x0C, 0x0C, 0x0C, 0x0C );
   130     vector unsigned char vswiz;
   131     vector unsigned int srcvec;
   132 #define RESHIFT(X) (3 - ((X) >> 3))
   133     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   134     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   135     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   136     Uint32 amask;
   137     /* Use zero for alpha if either surface doesn't have alpha */
   138     if (dstfmt->Amask) {
   139         amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
   140     } else {    
   141         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
   142     }           
   143 #undef RESHIFT  
   144     ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
   145     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
   146     return(vswiz);
   147 }
   148 
   149 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
   150 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
   151     int height = info->d_height;
   152     Uint8 *src = (Uint8 *) info->s_pixels;
   153     int srcskip = info->s_skip;
   154     Uint8 *dst = (Uint8 *) info->d_pixels;
   155     int dstskip = info->d_skip;
   156     SDL_PixelFormat *srcfmt = info->src;
   157     vector unsigned char valpha = vec_splat_u8(0);
   158     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   159     vector unsigned char vgmerge = VECUINT8_LITERAL(
   160         0x00, 0x02, 0x00, 0x06,
   161         0x00, 0x0a, 0x00, 0x0e,
   162         0x00, 0x12, 0x00, 0x16,
   163         0x00, 0x1a, 0x00, 0x1e);
   164     vector unsigned short v1 = vec_splat_u16(1);
   165     vector unsigned short v3 = vec_splat_u16(3);
   166     vector unsigned short v3f = VECUINT16_LITERAL(
   167         0x003f, 0x003f, 0x003f, 0x003f,
   168         0x003f, 0x003f, 0x003f, 0x003f);
   169     vector unsigned short vfc = VECUINT16_LITERAL(
   170         0x00fc, 0x00fc, 0x00fc, 0x00fc,
   171         0x00fc, 0x00fc, 0x00fc, 0x00fc);
   172     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
   173     vf800 = vec_sl(vf800, vec_splat_u16(8));
   174 
   175     while (height--) {
   176         vector unsigned char valigner;
   177         vector unsigned char voverflow;
   178         vector unsigned char vsrc;
   179 
   180         int width = info->d_width;
   181         int extrawidth;
   182 
   183         /* do scalar until we can align... */
   184 #define ONE_PIXEL_BLEND(condition, widthvar) \
   185         while (condition) { \
   186             Uint32 Pixel; \
   187             unsigned sR, sG, sB, sA; \
   188             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   189                           sR, sG, sB, sA); \
   190             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   191                                 ((sG << 3) & 0x000007E0) | \
   192                                 ((sB >> 3) & 0x0000001F)); \
   193             dst += 2; \
   194             src += 4; \
   195             widthvar--; \
   196         }
   197 
   198         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   199 
   200         /* After all that work, here's the vector part! */
   201         extrawidth = (width % 8);  /* trailing unaligned stores */
   202         width -= extrawidth;
   203         vsrc = vec_ld(0, src);
   204         valigner = VEC_ALIGNER(src);
   205 
   206         while (width) {
   207             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   208             vector unsigned int vsrc1, vsrc2;
   209             vector unsigned char vdst;
   210 
   211             voverflow = vec_ld(15, src);
   212             vsrc = vec_perm(vsrc, voverflow, valigner);
   213             vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   214             src += 16;
   215             vsrc = voverflow;
   216             voverflow = vec_ld(15, src);
   217             vsrc = vec_perm(vsrc, voverflow, valigner);
   218             vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   219             /* 1555 */
   220             vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
   221             vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
   222             vgpixel = vec_and(vgpixel, vfc);
   223             vgpixel = vec_sl(vgpixel, v3);
   224             vrpixel = vec_sl(vpixel, v1);
   225             vrpixel = vec_and(vrpixel, vf800);
   226             vbpixel = vec_and(vpixel, v3f);
   227             vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
   228             /* 565 */
   229             vdst = vec_or(vdst, (vector unsigned char)vbpixel);
   230             vec_st(vdst, 0, dst);
   231 
   232             width -= 8;
   233             src += 16;
   234             dst += 16;
   235             vsrc = voverflow;
   236         }
   237 
   238         assert(width == 0);
   239 
   240         /* do scalar until we can align... */
   241         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   242 #undef ONE_PIXEL_BLEND
   243 
   244         src += srcskip;  /* move to next row, accounting for pitch. */
   245         dst += dstskip;
   246     }
   247 
   248 
   249 }
   250 
   251 static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
   252     int height = info->d_height;
   253     Uint8 *src = (Uint8 *) info->s_pixels;
   254     int srcskip = info->s_skip;
   255     Uint8 *dst = (Uint8 *) info->d_pixels;
   256     int dstskip = info->d_skip;
   257     SDL_PixelFormat *srcfmt = info->src;
   258     SDL_PixelFormat *dstfmt = info->dst;
   259     unsigned alpha;
   260     vector unsigned char valpha;
   261     vector unsigned char vpermute;
   262     vector unsigned short vf800;
   263     vector unsigned int v8 = vec_splat_u32(8);
   264     vector unsigned int v16 = vec_add(v8, v8);
   265     vector unsigned short v2 = vec_splat_u16(2);
   266     vector unsigned short v3 = vec_splat_u16(3);
   267     /* 
   268         0x10 - 0x1f is the alpha
   269         0x00 - 0x0e evens are the red
   270         0x01 - 0x0f odds are zero
   271     */
   272     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   273         0x10, 0x00, 0x01, 0x01,
   274         0x10, 0x02, 0x01, 0x01,
   275         0x10, 0x04, 0x01, 0x01,
   276         0x10, 0x06, 0x01, 0x01
   277     );
   278     vector unsigned char vredalpha2 = (vector unsigned char) (
   279         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   280     );
   281     /*
   282         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   283         0x11 - 0x0f odds are blue
   284     */
   285     vector unsigned char vblue1 = VECUINT8_LITERAL(
   286         0x00, 0x01, 0x02, 0x11,
   287         0x04, 0x05, 0x06, 0x13,
   288         0x08, 0x09, 0x0a, 0x15,
   289         0x0c, 0x0d, 0x0e, 0x17
   290     );
   291     vector unsigned char vblue2 = (vector unsigned char)(
   292         vec_add((vector unsigned int)vblue1, v8)
   293     );
   294     /*
   295         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   296         0x10 - 0x0e evens are green
   297     */
   298     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   299         0x00, 0x01, 0x10, 0x03,
   300         0x04, 0x05, 0x12, 0x07,
   301         0x08, 0x09, 0x14, 0x0b,
   302         0x0c, 0x0d, 0x16, 0x0f
   303     );
   304     vector unsigned char vgreen2 = (vector unsigned char)(
   305         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   306     );
   307     
   308 
   309     assert(srcfmt->BytesPerPixel == 2);
   310     assert(dstfmt->BytesPerPixel == 4);
   311 
   312     vf800 = (vector unsigned short)vec_splat_u8(-7);
   313     vf800 = vec_sl(vf800, vec_splat_u16(8));
   314 
   315     if (dstfmt->Amask && srcfmt->alpha) {
   316         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   317         valpha = vec_splat(valpha, 0);
   318     } else {
   319         alpha = 0;
   320         valpha = vec_splat_u8(0);
   321     }
   322 
   323     vpermute = calc_swizzle32(NULL, dstfmt);
   324     while (height--) {
   325         vector unsigned char valigner;
   326         vector unsigned char voverflow;
   327         vector unsigned char vsrc;
   328 
   329         int width = info->d_width;
   330         int extrawidth;
   331 
   332         /* do scalar until we can align... */
   333 #define ONE_PIXEL_BLEND(condition, widthvar) \
   334         while (condition) { \
   335             unsigned sR, sG, sB; \
   336             unsigned short Pixel = *((unsigned short *)src); \
   337             sR = (Pixel >> 8) & 0xf8; \
   338             sG = (Pixel >> 3) & 0xfc; \
   339             sB = (Pixel << 3) & 0xf8; \
   340             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   341             src += 2; \
   342             dst += 4; \
   343             widthvar--; \
   344         }
   345         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   346 
   347         /* After all that work, here's the vector part! */
   348         extrawidth = (width % 8);  /* trailing unaligned stores */
   349         width -= extrawidth;
   350         vsrc = vec_ld(0, src);
   351         valigner = VEC_ALIGNER(src);
   352 
   353         while (width) {
   354             vector unsigned short vR, vG, vB;
   355             vector unsigned char vdst1, vdst2;
   356 
   357             voverflow = vec_ld(15, src);
   358             vsrc = vec_perm(vsrc, voverflow, valigner);
   359 
   360             vR = vec_and((vector unsigned short)vsrc, vf800);
   361             vB = vec_sl((vector unsigned short)vsrc, v3);
   362             vG = vec_sl(vB, v2);
   363 
   364             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   365             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   366             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   367             vdst1 = vec_perm(vdst1, valpha, vpermute);
   368             vec_st(vdst1, 0, dst);
   369 
   370             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   371             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   372             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   373             vdst2 = vec_perm(vdst2, valpha, vpermute);
   374             vec_st(vdst2, 16, dst);
   375             
   376             width -= 8;
   377             dst += 32;
   378             src += 16;
   379             vsrc = voverflow;
   380         }
   381 
   382         assert(width == 0);
   383 
   384 
   385         /* do scalar until we can align... */
   386         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   387 #undef ONE_PIXEL_BLEND
   388 
   389         src += srcskip;  /* move to next row, accounting for pitch. */
   390         dst += dstskip;
   391     }
   392 
   393 }
   394 
   395 
   396 static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
   397     int height = info->d_height;
   398     Uint8 *src = (Uint8 *) info->s_pixels;
   399     int srcskip = info->s_skip;
   400     Uint8 *dst = (Uint8 *) info->d_pixels;
   401     int dstskip = info->d_skip;
   402     SDL_PixelFormat *srcfmt = info->src;
   403     SDL_PixelFormat *dstfmt = info->dst;
   404     unsigned alpha;
   405     vector unsigned char valpha;
   406     vector unsigned char vpermute;
   407     vector unsigned short vf800;
   408     vector unsigned int v8 = vec_splat_u32(8);
   409     vector unsigned int v16 = vec_add(v8, v8);
   410     vector unsigned short v1 = vec_splat_u16(1);
   411     vector unsigned short v3 = vec_splat_u16(3);
   412     /* 
   413         0x10 - 0x1f is the alpha
   414         0x00 - 0x0e evens are the red
   415         0x01 - 0x0f odds are zero
   416     */
   417     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   418         0x10, 0x00, 0x01, 0x01,
   419         0x10, 0x02, 0x01, 0x01,
   420         0x10, 0x04, 0x01, 0x01,
   421         0x10, 0x06, 0x01, 0x01
   422     );
   423     vector unsigned char vredalpha2 = (vector unsigned char)(
   424         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   425     );
   426     /*
   427         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   428         0x11 - 0x0f odds are blue
   429     */
   430     vector unsigned char vblue1 = VECUINT8_LITERAL(
   431         0x00, 0x01, 0x02, 0x11,
   432         0x04, 0x05, 0x06, 0x13,
   433         0x08, 0x09, 0x0a, 0x15,
   434         0x0c, 0x0d, 0x0e, 0x17
   435     );
   436     vector unsigned char vblue2 = (vector unsigned char)(
   437         vec_add((vector unsigned int)vblue1, v8)
   438     );
   439     /*
   440         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   441         0x10 - 0x0e evens are green
   442     */
   443     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   444         0x00, 0x01, 0x10, 0x03,
   445         0x04, 0x05, 0x12, 0x07,
   446         0x08, 0x09, 0x14, 0x0b,
   447         0x0c, 0x0d, 0x16, 0x0f
   448     );
   449     vector unsigned char vgreen2 = (vector unsigned char)(
   450         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   451     );
   452     
   453 
   454     assert(srcfmt->BytesPerPixel == 2);
   455     assert(dstfmt->BytesPerPixel == 4);
   456 
   457     vf800 = (vector unsigned short)vec_splat_u8(-7);
   458     vf800 = vec_sl(vf800, vec_splat_u16(8));
   459 
   460     if (dstfmt->Amask && srcfmt->alpha) {
   461         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   462         valpha = vec_splat(valpha, 0);
   463     } else {
   464         alpha = 0;
   465         valpha = vec_splat_u8(0);
   466     }
   467 
   468     vpermute = calc_swizzle32(NULL, dstfmt);
   469     while (height--) {
   470         vector unsigned char valigner;
   471         vector unsigned char voverflow;
   472         vector unsigned char vsrc;
   473 
   474         int width = info->d_width;
   475         int extrawidth;
   476 
   477         /* do scalar until we can align... */
   478 #define ONE_PIXEL_BLEND(condition, widthvar) \
   479         while (condition) { \
   480             unsigned sR, sG, sB; \
   481             unsigned short Pixel = *((unsigned short *)src); \
   482             sR = (Pixel >> 7) & 0xf8; \
   483             sG = (Pixel >> 2) & 0xf8; \
   484             sB = (Pixel << 3) & 0xf8; \
   485             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   486             src += 2; \
   487             dst += 4; \
   488             widthvar--; \
   489         }
   490         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   491 
   492         /* After all that work, here's the vector part! */
   493         extrawidth = (width % 8);  /* trailing unaligned stores */
   494         width -= extrawidth;
   495         vsrc = vec_ld(0, src);
   496         valigner = VEC_ALIGNER(src);
   497 
   498         while (width) {
   499             vector unsigned short vR, vG, vB;
   500             vector unsigned char vdst1, vdst2;
   501 
   502             voverflow = vec_ld(15, src);
   503             vsrc = vec_perm(vsrc, voverflow, valigner);
   504 
   505             vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
   506             vB = vec_sl((vector unsigned short)vsrc, v3);
   507             vG = vec_sl(vB, v3);
   508 
   509             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   510             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   511             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   512             vdst1 = vec_perm(vdst1, valpha, vpermute);
   513             vec_st(vdst1, 0, dst);
   514 
   515             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   516             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   517             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   518             vdst2 = vec_perm(vdst2, valpha, vpermute);
   519             vec_st(vdst2, 16, dst);
   520             
   521             width -= 8;
   522             dst += 32;
   523             src += 16;
   524             vsrc = voverflow;
   525         }
   526 
   527         assert(width == 0);
   528 
   529 
   530         /* do scalar until we can align... */
   531         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   532 #undef ONE_PIXEL_BLEND
   533 
   534         src += srcskip;  /* move to next row, accounting for pitch. */
   535         dst += dstskip;
   536     }
   537 
   538 }
   539 
   540 static void BlitNtoNKey(SDL_BlitInfo *info);
   541 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
   542 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
   543 {
   544     int height = info->d_height;
   545     Uint32 *srcp = (Uint32 *) info->s_pixels;
   546     int srcskip = info->s_skip;
   547     Uint32 *dstp = (Uint32 *) info->d_pixels;
   548     int dstskip = info->d_skip;
   549     SDL_PixelFormat *srcfmt = info->src;
   550     int srcbpp = srcfmt->BytesPerPixel;
   551     SDL_PixelFormat *dstfmt = info->dst;
   552     int dstbpp = dstfmt->BytesPerPixel;
   553     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   554 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   555     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   556 	Uint32 ckey = info->src->colorkey;
   557     vector unsigned int valpha;
   558     vector unsigned char vpermute;
   559     vector unsigned char vzero;
   560     vector unsigned int vckey;
   561     vector unsigned int vrgbmask;
   562     vpermute = calc_swizzle32(srcfmt, dstfmt);
   563     if (info->d_width < 16) {
   564         if(copy_alpha) {
   565             BlitNtoNKeyCopyAlpha(info);
   566         } else {
   567             BlitNtoNKey(info);
   568         }
   569         return;
   570     }
   571     vzero = vec_splat_u8(0);
   572     if (alpha) {
   573         ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
   574         valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
   575     } else {
   576         valpha = (vector unsigned int)vzero;
   577     }
   578     ckey &= rgbmask;
   579     ((unsigned int *)(char*)&vckey)[0] = ckey;
   580     vckey = vec_splat(vckey, 0);
   581     ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
   582     vrgbmask = vec_splat(vrgbmask, 0);
   583 
   584     while (height--) {
   585 #define ONE_PIXEL_BLEND(condition, widthvar) \
   586         if (copy_alpha) { \
   587             while (condition) { \
   588                 Uint32 Pixel; \
   589                 unsigned sR, sG, sB, sA; \
   590                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   591                           sR, sG, sB, sA); \
   592                 if ( (Pixel & rgbmask) != ckey ) { \
   593                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   594                             sR, sG, sB, sA); \
   595                 } \
   596                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   597                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   598                 widthvar--; \
   599             } \
   600         } else { \
   601             while (condition) { \
   602                 Uint32 Pixel; \
   603                 unsigned sR, sG, sB; \
   604                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   605                 if ( Pixel != ckey ) { \
   606                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   607                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   608                               sR, sG, sB, alpha); \
   609                 } \
   610                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   611                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   612                 widthvar--; \
   613             } \
   614         }
   615         int width = info->d_width;
   616         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   617         assert(width > 0);
   618         if (width > 0) {
   619             int extrawidth = (width % 4);
   620             vector unsigned char valigner = VEC_ALIGNER(srcp);
   621             vector unsigned int vs = vec_ld(0, srcp);
   622             width -= extrawidth;
   623             assert(width >= 4);
   624             while (width) {
   625                 vector unsigned char vsel;
   626                 vector unsigned int vd;
   627                 vector unsigned int voverflow = vec_ld(15, srcp);
   628                 /* load the source vec */
   629                 vs = vec_perm(vs, voverflow, valigner);
   630                 /* vsel is set for items that match the key */
   631                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
   632                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
   633                 /* permute the src vec to the dest format */
   634                 vs = vec_perm(vs, valpha, vpermute);
   635                 /* load the destination vec */
   636                 vd = vec_ld(0, dstp);
   637                 /* select the source and dest into vs */
   638                 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
   639                 
   640                 vec_st(vd, 0, dstp);
   641                 srcp += 4;
   642                 width -= 4;
   643                 dstp += 4;
   644                 vs = voverflow;
   645             }
   646             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   647 #undef ONE_PIXEL_BLEND
   648             srcp += srcskip >> 2;
   649             dstp += dstskip >> 2;
   650         }
   651     }
   652 }
   653 
   654 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   655 /* Use this on a G5 */
   656 static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
   657 {
   658     int height = info->d_height;
   659     Uint32 *src = (Uint32 *) info->s_pixels;
   660     int srcskip = info->s_skip;
   661     Uint32 *dst = (Uint32 *) info->d_pixels;
   662     int dstskip = info->d_skip;
   663     SDL_PixelFormat *srcfmt = info->src;
   664     SDL_PixelFormat *dstfmt = info->dst;
   665     vector unsigned int vzero = vec_splat_u32(0);
   666     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   667     if (dstfmt->Amask && !srcfmt->Amask) {
   668         if (srcfmt->alpha) {
   669             vector unsigned char valpha;
   670             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   671             vzero = (vector unsigned int)vec_splat(valpha, 0);
   672         }
   673     }
   674 
   675     assert(srcfmt->BytesPerPixel == 4);
   676     assert(dstfmt->BytesPerPixel == 4);
   677 
   678     while (height--) {
   679         vector unsigned char valigner;
   680         vector unsigned int vbits;
   681         vector unsigned int voverflow;
   682         Uint32 bits;
   683         Uint8 r, g, b, a;
   684 
   685         int width = info->d_width;
   686         int extrawidth;
   687 
   688         /* do scalar until we can align... */
   689         while ((UNALIGNED_PTR(dst)) && (width)) {
   690             bits = *(src++);
   691             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   692             if(!srcfmt->Amask)
   693               a = srcfmt->alpha;
   694             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   695             width--;
   696         }
   697 
   698         /* After all that work, here's the vector part! */
   699         extrawidth = (width % 4);
   700         width -= extrawidth;
   701         valigner = VEC_ALIGNER(src);
   702         vbits = vec_ld(0, src);
   703 
   704        while (width) {
   705             voverflow = vec_ld(15, src);
   706             src += 4;
   707             width -= 4;
   708             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   709             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   710             vec_st(vbits, 0, dst);  /* store it back out. */
   711             dst += 4;
   712             vbits = voverflow;
   713         }
   714 
   715         assert(width == 0);
   716 
   717         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   718         while (extrawidth) {
   719             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   720             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   721             if(!srcfmt->Amask)
   722               a = srcfmt->alpha;
   723             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   724             extrawidth--;
   725         }
   726 
   727         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   728         dst += dstskip >> 2;
   729     }
   730 
   731 }
   732 
   733 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   734 /* Use this on a G4 */
   735 static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
   736 {
   737     const int scalar_dst_lead = sizeof (Uint32) * 4;
   738     const int vector_dst_lead = sizeof (Uint32) * 16;
   739 
   740     int height = info->d_height;
   741     Uint32 *src = (Uint32 *) info->s_pixels;
   742     int srcskip = info->s_skip;
   743     Uint32 *dst = (Uint32 *) info->d_pixels;
   744     int dstskip = info->d_skip;
   745     SDL_PixelFormat *srcfmt = info->src;
   746     SDL_PixelFormat *dstfmt = info->dst;
   747     vector unsigned int vzero = vec_splat_u32(0);
   748     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   749     if (dstfmt->Amask && !srcfmt->Amask) {
   750         if (srcfmt->alpha) {
   751             vector unsigned char valpha;
   752             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   753             vzero = (vector unsigned int)vec_splat(valpha, 0);
   754         }
   755     }
   756 
   757     assert(srcfmt->BytesPerPixel == 4);
   758     assert(dstfmt->BytesPerPixel == 4);
   759 
   760     while (height--) {
   761         vector unsigned char valigner;
   762         vector unsigned int vbits;
   763         vector unsigned int voverflow;
   764         Uint32 bits;
   765         Uint8 r, g, b, a;
   766 
   767         int width = info->d_width;
   768         int extrawidth;
   769 
   770         /* do scalar until we can align... */
   771         while ((UNALIGNED_PTR(dst)) && (width)) {
   772             vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   773             vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   774             bits = *(src++);
   775             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   776             if(!srcfmt->Amask)
   777               a = srcfmt->alpha;
   778             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   779             width--;
   780         }
   781 
   782         /* After all that work, here's the vector part! */
   783         extrawidth = (width % 4);
   784         width -= extrawidth;
   785         valigner = VEC_ALIGNER(src);
   786         vbits = vec_ld(0, src);
   787 
   788         while (width) {
   789             vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   790             vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   791             voverflow = vec_ld(15, src);
   792             src += 4;
   793             width -= 4;
   794             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   795             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   796             vec_st(vbits, 0, dst);  /* store it back out. */
   797             dst += 4;
   798             vbits = voverflow;
   799         }
   800         
   801         assert(width == 0);
   802 
   803         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   804         while (extrawidth) {
   805             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   806             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   807             if(!srcfmt->Amask)
   808               a = srcfmt->alpha;
   809             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   810             extrawidth--;
   811         }
   812 
   813         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   814         dst += dstskip >> 2;
   815     }
   816 
   817     vec_dss(DST_CHAN_SRC);
   818     vec_dss(DST_CHAN_DEST);
   819 }
   820 
   821 static Uint32 GetBlitFeatures( void )
   822 {
   823     static Uint32 features = 0xffffffff;
   824     if (features == 0xffffffff) {
   825         /* Provide an override for testing .. */
   826         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   827         if (override) {
   828             features = 0;
   829             SDL_sscanf(override, "%u", &features);
   830         } else {
   831             features = ( 0
   832                 /* Feature 1 is has-MMX */
   833                 | ((SDL_HasMMX()) ? 1 : 0)
   834                 /* Feature 2 is has-AltiVec */
   835                 | ((SDL_HasAltiVec()) ? 2 : 0)
   836                 /* Feature 4 is dont-use-prefetch */
   837                 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   838                 | ((GetL3CacheSize() == 0) ? 4 : 0)
   839             );
   840         }
   841     }
   842     return features;
   843 }
   844 #if __MWERKS__
   845 #pragma altivec_model off
   846 #endif
   847 #else
   848 /* Feature 1 is has-MMX */
   849 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   850 #endif
   851 
   852 /* This is now endian dependent */
   853 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   854 #define HI	1
   855 #define LO	0
   856 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
   857 #define HI	0
   858 #define LO	1
   859 #endif
   860 
   861 #if SDL_HERMES_BLITTERS
   862 
   863 /* Heheheh, we coerce Hermes into using SDL blit information */
   864 #define X86_ASSEMBLER
   865 #define HermesConverterInterface	SDL_BlitInfo
   866 #define HermesClearInterface		void
   867 #define STACKCALL
   868 
   869 #include "../hermes/HeadMMX.h"
   870 #include "../hermes/HeadX86.h"
   871 
   872 #else
   873 
   874 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   875 #define RGB888_RGB332(dst, src) { \
   876 	dst = (Uint8)((((src)&0x00E00000)>>16)| \
   877 	              (((src)&0x0000E000)>>11)| \
   878 	              (((src)&0x000000C0)>>6)); \
   879 }
   880 static void Blit_RGB888_index8(SDL_BlitInfo *info)
   881 {
   882 #ifndef USE_DUFFS_LOOP
   883 	int c;
   884 #endif
   885 	int width, height;
   886 	Uint32 *src;
   887 	const Uint8 *map;
   888 	Uint8 *dst;
   889 	int srcskip, dstskip;
   890 
   891 	/* Set up some basic variables */
   892 	width = info->d_width;
   893 	height = info->d_height;
   894 	src = (Uint32 *)info->s_pixels;
   895 	srcskip = info->s_skip/4;
   896 	dst = info->d_pixels;
   897 	dstskip = info->d_skip;
   898 	map = info->table;
   899 
   900 	if ( map == NULL ) {
   901 		while ( height-- ) {
   902 #ifdef USE_DUFFS_LOOP
   903 			DUFFS_LOOP(
   904 				RGB888_RGB332(*dst++, *src);
   905 			, width);
   906 #else
   907 			for ( c=width/4; c; --c ) {
   908 				/* Pack RGB into 8bit pixel */
   909 				++src;
   910 				RGB888_RGB332(*dst++, *src);
   911 				++src;
   912 				RGB888_RGB332(*dst++, *src);
   913 				++src;
   914 				RGB888_RGB332(*dst++, *src);
   915 				++src;
   916 			}
   917 			switch ( width & 3 ) {
   918 				case 3:
   919 					RGB888_RGB332(*dst++, *src);
   920 					++src;
   921 				case 2:
   922 					RGB888_RGB332(*dst++, *src);
   923 					++src;
   924 				case 1:
   925 					RGB888_RGB332(*dst++, *src);
   926 					++src;
   927 			}
   928 #endif /* USE_DUFFS_LOOP */
   929 			src += srcskip;
   930 			dst += dstskip;
   931 		}
   932 	} else {
   933 		int Pixel;
   934 
   935 		while ( height-- ) {
   936 #ifdef USE_DUFFS_LOOP
   937 			DUFFS_LOOP(
   938 				RGB888_RGB332(Pixel, *src);
   939 				*dst++ = map[Pixel];
   940 				++src;
   941 			, width);
   942 #else
   943 			for ( c=width/4; c; --c ) {
   944 				/* Pack RGB into 8bit pixel */
   945 				RGB888_RGB332(Pixel, *src);
   946 				*dst++ = map[Pixel];
   947 				++src;
   948 				RGB888_RGB332(Pixel, *src);
   949 				*dst++ = map[Pixel];
   950 				++src;
   951 				RGB888_RGB332(Pixel, *src);
   952 				*dst++ = map[Pixel];
   953 				++src;
   954 				RGB888_RGB332(Pixel, *src);
   955 				*dst++ = map[Pixel];
   956 				++src;
   957 			}
   958 			switch ( width & 3 ) {
   959 				case 3:
   960 					RGB888_RGB332(Pixel, *src);
   961 					*dst++ = map[Pixel];
   962 					++src;
   963 				case 2:
   964 					RGB888_RGB332(Pixel, *src);
   965 					*dst++ = map[Pixel];
   966 					++src;
   967 				case 1:
   968 					RGB888_RGB332(Pixel, *src);
   969 					*dst++ = map[Pixel];
   970 					++src;
   971 			}
   972 #endif /* USE_DUFFS_LOOP */
   973 			src += srcskip;
   974 			dst += dstskip;
   975 		}
   976 	}
   977 }
   978 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   979 #define RGB888_RGB555(dst, src) { \
   980 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
   981 	                            (((*src)&0x0000F800)>>6)| \
   982 	                            (((*src)&0x000000F8)>>3)); \
   983 }
   984 #define RGB888_RGB555_TWO(dst, src) { \
   985 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   986 	                     (((src[HI])&0x0000F800)>>6)| \
   987 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   988 	                     (((src[LO])&0x00F80000)>>9)| \
   989 	                     (((src[LO])&0x0000F800)>>6)| \
   990 	                     (((src[LO])&0x000000F8)>>3); \
   991 }
   992 static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
   993 {
   994 #ifndef USE_DUFFS_LOOP
   995 	int c;
   996 #endif
   997 	int width, height;
   998 	Uint32 *src;
   999 	Uint16 *dst;
  1000 	int srcskip, dstskip;
  1001 
  1002 	/* Set up some basic variables */
  1003 	width = info->d_width;
  1004 	height = info->d_height;
  1005 	src = (Uint32 *)info->s_pixels;
  1006 	srcskip = info->s_skip/4;
  1007 	dst = (Uint16 *)info->d_pixels;
  1008 	dstskip = info->d_skip/2;
  1009 
  1010 #ifdef USE_DUFFS_LOOP
  1011 	while ( height-- ) {
  1012 		DUFFS_LOOP(
  1013 			RGB888_RGB555(dst, src);
  1014 			++src;
  1015 			++dst;
  1016 		, width);
  1017 		src += srcskip;
  1018 		dst += dstskip;
  1019 	}
  1020 #else
  1021 	/* Memory align at 4-byte boundary, if necessary */
  1022 	if ( (long)dst & 0x03 ) {
  1023 		/* Don't do anything if width is 0 */
  1024 		if ( width == 0 ) {
  1025 			return;
  1026 		}
  1027 		--width;
  1028 
  1029 		while ( height-- ) {
  1030 			/* Perform copy alignment */
  1031 			RGB888_RGB555(dst, src);
  1032 			++src;
  1033 			++dst;
  1034 
  1035 			/* Copy in 4 pixel chunks */
  1036 			for ( c=width/4; c; --c ) {
  1037 				RGB888_RGB555_TWO(dst, src);
  1038 				src += 2;
  1039 				dst += 2;
  1040 				RGB888_RGB555_TWO(dst, src);
  1041 				src += 2;
  1042 				dst += 2;
  1043 			}
  1044 			/* Get any leftovers */
  1045 			switch (width & 3) {
  1046 				case 3:
  1047 					RGB888_RGB555(dst, src);
  1048 					++src;
  1049 					++dst;
  1050 				case 2:
  1051 					RGB888_RGB555_TWO(dst, src);
  1052 					src += 2;
  1053 					dst += 2;
  1054 					break;
  1055 				case 1:
  1056 					RGB888_RGB555(dst, src);
  1057 					++src;
  1058 					++dst;
  1059 					break;
  1060 			}
  1061 			src += srcskip;
  1062 			dst += dstskip;
  1063 		}
  1064 	} else { 
  1065 		while ( height-- ) {
  1066 			/* Copy in 4 pixel chunks */
  1067 			for ( c=width/4; c; --c ) {
  1068 				RGB888_RGB555_TWO(dst, src);
  1069 				src += 2;
  1070 				dst += 2;
  1071 				RGB888_RGB555_TWO(dst, src);
  1072 				src += 2;
  1073 				dst += 2;
  1074 			}
  1075 			/* Get any leftovers */
  1076 			switch (width & 3) {
  1077 				case 3:
  1078 					RGB888_RGB555(dst, src);
  1079 					++src;
  1080 					++dst;
  1081 				case 2:
  1082 					RGB888_RGB555_TWO(dst, src);
  1083 					src += 2;
  1084 					dst += 2;
  1085 					break;
  1086 				case 1:
  1087 					RGB888_RGB555(dst, src);
  1088 					++src;
  1089 					++dst;
  1090 					break;
  1091 			}
  1092 			src += srcskip;
  1093 			dst += dstskip;
  1094 		}
  1095 	}
  1096 #endif /* USE_DUFFS_LOOP */
  1097 }
  1098 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1099 #define RGB888_RGB565(dst, src) { \
  1100 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
  1101 	                            (((*src)&0x0000FC00)>>5)| \
  1102 	                            (((*src)&0x000000F8)>>3)); \
  1103 }
  1104 #define RGB888_RGB565_TWO(dst, src) { \
  1105 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1106 	                     (((src[HI])&0x0000FC00)>>5)| \
  1107 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1108 	                     (((src[LO])&0x00F80000)>>8)| \
  1109 	                     (((src[LO])&0x0000FC00)>>5)| \
  1110 	                     (((src[LO])&0x000000F8)>>3); \
  1111 }
  1112 static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
  1113 {
  1114 #ifndef USE_DUFFS_LOOP
  1115 	int c;
  1116 #endif
  1117 	int width, height;
  1118 	Uint32 *src;
  1119 	Uint16 *dst;
  1120 	int srcskip, dstskip;
  1121 
  1122 	/* Set up some basic variables */
  1123 	width = info->d_width;
  1124 	height = info->d_height;
  1125 	src = (Uint32 *)info->s_pixels;
  1126 	srcskip = info->s_skip/4;
  1127 	dst = (Uint16 *)info->d_pixels;
  1128 	dstskip = info->d_skip/2;
  1129 
  1130 #ifdef USE_DUFFS_LOOP
  1131 	while ( height-- ) {
  1132 		DUFFS_LOOP(
  1133 			RGB888_RGB565(dst, src);
  1134 			++src;
  1135 			++dst;
  1136 		, width);
  1137 		src += srcskip;
  1138 		dst += dstskip;
  1139 	}
  1140 #else
  1141 	/* Memory align at 4-byte boundary, if necessary */
  1142 	if ( (long)dst & 0x03 ) {
  1143 		/* Don't do anything if width is 0 */
  1144 		if ( width == 0 ) {
  1145 			return;
  1146 		}
  1147 		--width;
  1148 
  1149 		while ( height-- ) {
  1150 			/* Perform copy alignment */
  1151 			RGB888_RGB565(dst, src);
  1152 			++src;
  1153 			++dst;
  1154 
  1155 			/* Copy in 4 pixel chunks */
  1156 			for ( c=width/4; c; --c ) {
  1157 				RGB888_RGB565_TWO(dst, src);
  1158 				src += 2;
  1159 				dst += 2;
  1160 				RGB888_RGB565_TWO(dst, src);
  1161 				src += 2;
  1162 				dst += 2;
  1163 			}
  1164 			/* Get any leftovers */
  1165 			switch (width & 3) {
  1166 				case 3:
  1167 					RGB888_RGB565(dst, src);
  1168 					++src;
  1169 					++dst;
  1170 				case 2:
  1171 					RGB888_RGB565_TWO(dst, src);
  1172 					src += 2;
  1173 					dst += 2;
  1174 					break;
  1175 				case 1:
  1176 					RGB888_RGB565(dst, src);
  1177 					++src;
  1178 					++dst;
  1179 					break;
  1180 			}
  1181 			src += srcskip;
  1182 			dst += dstskip;
  1183 		}
  1184 	} else { 
  1185 		while ( height-- ) {
  1186 			/* Copy in 4 pixel chunks */
  1187 			for ( c=width/4; c; --c ) {
  1188 				RGB888_RGB565_TWO(dst, src);
  1189 				src += 2;
  1190 				dst += 2;
  1191 				RGB888_RGB565_TWO(dst, src);
  1192 				src += 2;
  1193 				dst += 2;
  1194 			}
  1195 			/* Get any leftovers */
  1196 			switch (width & 3) {
  1197 				case 3:
  1198 					RGB888_RGB565(dst, src);
  1199 					++src;
  1200 					++dst;
  1201 				case 2:
  1202 					RGB888_RGB565_TWO(dst, src);
  1203 					src += 2;
  1204 					dst += 2;
  1205 					break;
  1206 				case 1:
  1207 					RGB888_RGB565(dst, src);
  1208 					++src;
  1209 					++dst;
  1210 					break;
  1211 			}
  1212 			src += srcskip;
  1213 			dst += dstskip;
  1214 		}
  1215 	}
  1216 #endif /* USE_DUFFS_LOOP */
  1217 }
  1218 
  1219 #endif /* SDL_HERMES_BLITTERS */
  1220 
  1221 
  1222 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1223 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1224 static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
  1225 {
  1226 #ifndef USE_DUFFS_LOOP
  1227 	int c;
  1228 #endif
  1229 	int width, height;
  1230 	Uint8 *src;
  1231 	Uint32 *dst;
  1232 	int srcskip, dstskip;
  1233 
  1234 	/* Set up some basic variables */
  1235 	width = info->d_width;
  1236 	height = info->d_height;
  1237 	src = (Uint8 *)info->s_pixels;
  1238 	srcskip = info->s_skip;
  1239 	dst = (Uint32 *)info->d_pixels;
  1240 	dstskip = info->d_skip/4;
  1241 
  1242 #ifdef USE_DUFFS_LOOP
  1243 	while ( height-- ) {
  1244 		DUFFS_LOOP(
  1245 		{
  1246 			*dst++ = RGB565_32(dst, src, map);
  1247 			src += 2;
  1248 		},
  1249 		width);
  1250 		src += srcskip;
  1251 		dst += dstskip;
  1252 	}
  1253 #else
  1254 	while ( height-- ) {
  1255 		/* Copy in 4 pixel chunks */
  1256 		for ( c=width/4; c; --c ) {
  1257 			*dst++ = RGB565_32(dst, src, map);
  1258 			src += 2;
  1259 			*dst++ = RGB565_32(dst, src, map);
  1260 			src += 2;
  1261 			*dst++ = RGB565_32(dst, src, map);
  1262 			src += 2;
  1263 			*dst++ = RGB565_32(dst, src, map);
  1264 			src += 2;
  1265 		}
  1266 		/* Get any leftovers */
  1267 		switch (width & 3) {
  1268 			case 3:
  1269 				*dst++ = RGB565_32(dst, src, map);
  1270 				src += 2;
  1271 			case 2:
  1272 				*dst++ = RGB565_32(dst, src, map);
  1273 				src += 2;
  1274 			case 1:
  1275 				*dst++ = RGB565_32(dst, src, map);
  1276 				src += 2;
  1277 				break;
  1278 		}
  1279 		src += srcskip;
  1280 		dst += dstskip;
  1281 	}
  1282 #endif /* USE_DUFFS_LOOP */
  1283 }
  1284 
  1285 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1286 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1287 		0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1288 		0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1289 		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1290 		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1291 		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1292 		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1293 		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1294 		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1295 		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1296 		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1297 		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1298 		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1299 		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1300 		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1301 		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1302 		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1303 		0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1304 		0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1305 		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1306 		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1307 		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1308 		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1309 		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1310 		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1311 		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1312 		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1313 		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1314 		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1315 		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1316 		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1317 		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1318 		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1319 		0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1320 		0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1321 		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1322 		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1323 		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1324 		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1325 		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1326 		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1327 		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1328 		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1329 		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1330 		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1331 		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1332 		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1333 		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1334 		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1335 		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1336 		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1337 		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1338 		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1339 		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1340 		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1341 		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1342 		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1343 		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1344 		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1345 		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1346 		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1347 		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1348 		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1349 		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1350 		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1351 		0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1352 		0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1353 		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1354 		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1355 		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1356 		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1357 		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1358 		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1359 		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1360 		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1361 		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1362 		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1363 		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1364 		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1365 		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1366 		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1367 		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1368 		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1369 		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1370 		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1371 		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1372 		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1373 		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1374 		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1375 		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1376 		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1377 		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1378 		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1379 		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1380 		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1381 		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1382 		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1383 		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1384 		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1385 		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1386 		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1387 		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1388 		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1389 		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1390 		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1391 		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1392 		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1393 		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1394 		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1395 		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1396 		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1397 		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1398 		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1399 		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1400 		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1401 		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1402 		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1403 		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1404 		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1405 		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1406 		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1407 		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1408 		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1409 		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1410 		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1411 		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1412 		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1413 		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1414 		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1415 };
  1416 static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
  1417 {
  1418     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1419 }
  1420 
  1421 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1422 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1423 		0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1424 		0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1425 		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1426 		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1427 		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1428 		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1429 		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1430 		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1431 		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1432 		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1433 		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1434 		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1435 		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1436 		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1437 		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1438 		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1439 		0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1440 		0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1441 		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1442 		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1443 		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1444 		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1445 		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1446 		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1447 		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1448 		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1449 		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1450 		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1451 		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1452 		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1453 		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1454 		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1455 		0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1456 		0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1457 		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1458 		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1459 		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1460 		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1461 		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1462 		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1463 		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1464 		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1465 		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1466 		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1467 		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1468 		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1469 		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1470 		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1471 		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1472 		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1473 		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1474 		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1475 		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1476 		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1477 		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1478 		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1479 		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1480 		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1481 		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1482 		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1483 		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1484 		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1485 		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1486 		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1487 		0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1488 		0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1489 		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1490 		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1491 		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1492 		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1493 		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1494 		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1495 		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1496 		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1497 		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1498 		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1499 		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1500 		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1501 		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1502 		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1503 		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1504 		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1505 		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1506 		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1507 		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1508 		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1509 		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1510 		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1511 		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1512 		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1513 		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1514 		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1515 		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1516 		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1517 		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1518 		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1519 		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1520 		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1521 		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1522 		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1523 		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1524 		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1525 		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1526 		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1527 		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1528 		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1529 		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1530 		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1531 		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1532 		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1533 		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1534 		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1535 		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1536 		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1537 		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1538 		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1539 		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1540 		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1541 		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1542 		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1543 		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1544 		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1545 		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1546 		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1547 		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1548 		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1549 		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1550 		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1551 };
  1552 static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
  1553 {
  1554     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1555 }
  1556 
  1557 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1558 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1559 		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1560 		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1561 		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1562 		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1563 		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1564 		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1565 		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1566 		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1567 		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1568 		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1569 		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1570 		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1571 		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1572 		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1573 		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1574 		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1575 		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1576 		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1577 		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1578 		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1579 		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1580 		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1581 		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1582 		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1583 		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1584 		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1585 		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1586 		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1587 		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1588 		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1589 		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1590 		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1591 		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1592 		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1593 		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1594 		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1595 		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1596 		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1597 		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1598 		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1599 		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1600 		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1601 		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1602 		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1603 		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1604 		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1605 		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1606 		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1607 		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1608 		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1609 		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1610 		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1611 		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1612 		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1613 		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1614 		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1615 		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1616 		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1617 		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1618 		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1619 		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1620 		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1621 		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1622 		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1623 		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1624 		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1625 		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1626 		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1627 		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1628 		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1629 		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1630 		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1631 		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1632 		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1633 		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1634 		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1635 		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1636 		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1637 		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1638 		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1639 		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1640 		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1641 		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1642 		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1643 		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1644 		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1645 		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1646 		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1647 		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1648 		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1649 		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1650 		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1651 		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1652 		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1653 		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1654 		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1655 		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1656 		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1657 		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1658 		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1659 		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1660 		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1661 		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1662 		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1663 		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1664 		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1665 		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1666 		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1667 		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1668 		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1669 		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1670 		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1671 		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1672 		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1673 		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1674 		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1675 		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1676 		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1677 		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1678 		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1679 		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1680 		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1681 		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1682 		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1683 		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1684 		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1685 		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1686 		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1687 };
  1688 static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
  1689 {
  1690     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1691 }
  1692 
  1693 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1694 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1695 		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1696 		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1697 		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1698 		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1699 		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1700 		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1701 		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1702 		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1703 		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1704 		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1705 		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1706 		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1707 		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1708 		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1709 		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1710 		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1711 		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1712 		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1713 		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1714 		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1715 		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1716 		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1717 		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1718 		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1719 		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1720 		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1721 		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1722 		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1723 		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1724 		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1725 		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1726 		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1727 		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1728 		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1729 		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1730 		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1731 		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1732 		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1733 		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1734 		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1735 		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1736 		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1737 		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1738 		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1739 		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1740 		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1741 		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1742 		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1743 		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1744 		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1745 		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1746 		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1747 		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1748 		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1749 		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1750 		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1751 		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1752 		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1753 		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1754 		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1755 		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1756 		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1757 		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1758 		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1759 		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1760 		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1761 		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1762 		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1763 		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1764 		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1765 		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1766 		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1767 		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1768 		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1769 		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1770 		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1771 		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1772 		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1773 		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1774 		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1775 		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1776 		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1777 		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1778 		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1779 		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1780 		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1781 		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1782 		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1783 		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1784 		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1785 		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1786 		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1787 		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1788 		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1789 		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1790 		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1791 		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1792 		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1793 		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1794 		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1795 		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1796 		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1797 		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1798 		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1799 		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1800 		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1801 		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1802 		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1803 		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1804 		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1805 		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1806 		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1807 		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1808 		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1809 		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1810 		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1811 		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1812 		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1813 		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1814 		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1815 		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1816 		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1817 		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1818 		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1819 		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1820 		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1821 		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1822 		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1823 };
  1824 static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
  1825 {
  1826     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1827 }
  1828 
  1829 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1830 #ifndef RGB888_RGB332
  1831 #define RGB888_RGB332(dst, src) { \
  1832 	dst = (((src)&0x00E00000)>>16)| \
  1833 	      (((src)&0x0000E000)>>11)| \
  1834 	      (((src)&0x000000C0)>>6); \
  1835 }
  1836 #endif
  1837 static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
  1838 {
  1839 #ifndef USE_DUFFS_LOOP
  1840 	int c;
  1841 #endif
  1842 	int Pixel;
  1843 	int width, height;
  1844 	Uint32 *src;
  1845 	const Uint8 *map;
  1846 	Uint8 *dst;
  1847 	int srcskip, dstskip;
  1848 
  1849 	/* Set up some basic variables */
  1850 	width = info->d_width;
  1851 	height = info->d_height;
  1852 	src = (Uint32 *)info->s_pixels;
  1853 	srcskip = info->s_skip/4;
  1854 	dst = info->d_pixels;
  1855 	dstskip = info->d_skip;
  1856 	map = info->table;
  1857 
  1858 #ifdef USE_DUFFS_LOOP
  1859 	while ( height-- ) {
  1860 		DUFFS_LOOP(
  1861 			RGB888_RGB332(Pixel, *src);
  1862 			*dst++ = map[Pixel];
  1863 			++src;
  1864 		, width);
  1865 		src += srcskip;
  1866 		dst += dstskip;
  1867 	}
  1868 #else
  1869 	while ( height-- ) {
  1870 		for ( c=width/4; c; --c ) {
  1871 			/* Pack RGB into 8bit pixel */
  1872 			RGB888_RGB332(Pixel, *src);
  1873 			*dst++ = map[Pixel];
  1874 			++src;
  1875 			RGB888_RGB332(Pixel, *src);
  1876 			*dst++ = map[Pixel];
  1877 			++src;
  1878 			RGB888_RGB332(Pixel, *src);
  1879 			*dst++ = map[Pixel];
  1880 			++src;
  1881 			RGB888_RGB332(Pixel, *src);
  1882 			*dst++ = map[Pixel];
  1883 			++src;
  1884 		}
  1885 		switch ( width & 3 ) {
  1886 			case 3:
  1887 				RGB888_RGB332(Pixel, *src);
  1888 				*dst++ = map[Pixel];
  1889 				++src;
  1890 			case 2:
  1891 				RGB888_RGB332(Pixel, *src);
  1892 				*dst++ = map[Pixel];
  1893 				++src;
  1894 			case 1:
  1895 				RGB888_RGB332(Pixel, *src);
  1896 				*dst++ = map[Pixel];
  1897 				++src;
  1898 		}
  1899 		src += srcskip;
  1900 		dst += dstskip;
  1901 	}
  1902 #endif /* USE_DUFFS_LOOP */
  1903 }
  1904 static void BlitNto1(SDL_BlitInfo *info)
  1905 {
  1906 #ifndef USE_DUFFS_LOOP
  1907 	int c;
  1908 #endif
  1909 	int width, height;
  1910 	Uint8 *src;
  1911 	const Uint8 *map;
  1912 	Uint8 *dst;
  1913 	int srcskip, dstskip;
  1914 	int srcbpp;
  1915 	Uint32 Pixel;
  1916 	int  sR, sG, sB;
  1917 	SDL_PixelFormat *srcfmt;
  1918 
  1919 	/* Set up some basic variables */
  1920 	width = info->d_width;
  1921 	height = info->d_height;
  1922 	src = info->s_pixels;
  1923 	srcskip = info->s_skip;
  1924 	dst = info->d_pixels;
  1925 	dstskip = info->d_skip;
  1926 	map = info->table;
  1927 	srcfmt = info->src;
  1928 	srcbpp = srcfmt->BytesPerPixel;
  1929 
  1930 	if ( map == NULL ) {
  1931 		while ( height-- ) {
  1932 #ifdef USE_DUFFS_LOOP
  1933 			DUFFS_LOOP(
  1934 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1935 								sR, sG, sB);
  1936 				if ( 1 ) {
  1937 				  	/* Pack RGB into 8bit pixel */
  1938 				  	*dst = ((sR>>5)<<(3+2))|
  1939 					        ((sG>>5)<<(2)) |
  1940 					        ((sB>>6)<<(0)) ;
  1941 				}
  1942 				dst++;
  1943 				src += srcbpp;
  1944 			, width);
  1945 #else
  1946 			for ( c=width; c; --c ) {
  1947 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1948 								sR, sG, sB);
  1949 				if ( 1 ) {
  1950 				  	/* Pack RGB into 8bit pixel */
  1951 				  	*dst = ((sR>>5)<<(3+2))|
  1952 					        ((sG>>5)<<(2)) |
  1953 					        ((sB>>6)<<(0)) ;
  1954 				}
  1955 				dst++;
  1956 				src += srcbpp;
  1957 			}
  1958 #endif
  1959 			src += srcskip;
  1960 			dst += dstskip;
  1961 		}
  1962 	} else {
  1963 		while ( height-- ) {
  1964 #ifdef USE_DUFFS_LOOP
  1965 			DUFFS_LOOP(
  1966 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1967 								sR, sG, sB);
  1968 				if ( 1 ) {
  1969 				  	/* Pack RGB into 8bit pixel */
  1970 				  	*dst = map[((sR>>5)<<(3+2))|
  1971 						   ((sG>>5)<<(2))  |
  1972 						   ((sB>>6)<<(0))  ];
  1973 				}
  1974 				dst++;
  1975 				src += srcbpp;
  1976 			, width);
  1977 #else
  1978 			for ( c=width; c; --c ) {
  1979 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1980 								sR, sG, sB);
  1981 				if ( 1 ) {
  1982 				  	/* Pack RGB into 8bit pixel */
  1983 				  	*dst = map[((sR>>5)<<(3+2))|
  1984 						   ((sG>>5)<<(2))  |
  1985 						   ((sB>>6)<<(0))  ];
  1986 				}
  1987 				dst++;
  1988 				src += srcbpp;
  1989 			}
  1990 #endif /* USE_DUFFS_LOOP */
  1991 			src += srcskip;
  1992 			dst += dstskip;
  1993 		}
  1994 	}
  1995 }
  1996 
  1997 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  1998 static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
  1999 {
  2000 	int width = info->d_width;
  2001 	int height = info->d_height;
  2002 	Uint32 *src = (Uint32 *)info->s_pixels;
  2003 	int srcskip = info->s_skip;
  2004 	Uint32 *dst = (Uint32 *)info->d_pixels;
  2005 	int dstskip = info->d_skip;
  2006 	SDL_PixelFormat *srcfmt = info->src;
  2007 	SDL_PixelFormat *dstfmt = info->dst;
  2008 
  2009 	if (dstfmt->Amask) {
  2010 		/* RGB->RGBA, SET_ALPHA */
  2011 		Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
  2012 
  2013 		while ( height-- ) {
  2014 			DUFFS_LOOP(
  2015 			{
  2016 				*dst = *src | mask;
  2017 				++dst;
  2018 				++src;
  2019 			},
  2020 			width);
  2021 			src = (Uint32*)((Uint8*)src + srcskip);
  2022 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2023 		}
  2024 	} else {
  2025 		/* RGBA->RGB, NO_ALPHA */
  2026 		Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2027 
  2028 		while ( height-- ) {
  2029 			DUFFS_LOOP(
  2030 			{
  2031 				*dst = *src & mask;
  2032 				++dst;
  2033 				++src;
  2034 			},
  2035 			width);
  2036 			src = (Uint32*)((Uint8*)src + srcskip);
  2037 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2038 		}
  2039 	}
  2040 }
  2041 
  2042 static void BlitNtoN(SDL_BlitInfo *info)
  2043 {
  2044 	int width = info->d_width;
  2045 	int height = info->d_height;
  2046 	Uint8 *src = info->s_pixels;
  2047 	int srcskip = info->s_skip;
  2048 	Uint8 *dst = info->d_pixels;
  2049 	int dstskip = info->d_skip;
  2050 	SDL_PixelFormat *srcfmt = info->src;
  2051 	int srcbpp = srcfmt->BytesPerPixel;
  2052 	SDL_PixelFormat *dstfmt = info->dst;
  2053 	int dstbpp = dstfmt->BytesPerPixel;
  2054 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2055 
  2056 	while ( height-- ) {
  2057 		DUFFS_LOOP(
  2058 		{
  2059 		        Uint32 Pixel;
  2060 			unsigned sR;
  2061 			unsigned sG;
  2062 			unsigned sB;
  2063 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2064 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2065 			dst += dstbpp;
  2066 			src += srcbpp;
  2067 		},
  2068 		width);
  2069 		src += srcskip;
  2070 		dst += dstskip;
  2071 	}
  2072 }
  2073 
  2074 static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
  2075 {
  2076 	int width = info->d_width;
  2077 	int height = info->d_height;
  2078 	Uint8 *src = info->s_pixels;
  2079 	int srcskip = info->s_skip;
  2080 	Uint8 *dst = info->d_pixels;
  2081 	int dstskip = info->d_skip;
  2082 	SDL_PixelFormat *srcfmt = info->src;
  2083 	int srcbpp = srcfmt->BytesPerPixel;
  2084 	SDL_PixelFormat *dstfmt = info->dst;
  2085 	int dstbpp = dstfmt->BytesPerPixel;
  2086 	int c;
  2087 
  2088 	/* FIXME: should map alpha to [0..255] correctly! */
  2089 	while ( height-- ) {
  2090 		for ( c=width; c; --c ) {
  2091 		        Uint32 Pixel;
  2092 			unsigned sR, sG, sB, sA;
  2093 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2094 				      sR, sG, sB, sA);
  2095 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2096 				      sR, sG, sB, sA);
  2097 			dst += dstbpp;
  2098 			src += srcbpp;
  2099 		}
  2100 		src += srcskip;
  2101 		dst += dstskip;
  2102 	}
  2103 }
  2104 
  2105 static void BlitNto1Key(SDL_BlitInfo *info)
  2106 {
  2107 	int width = info->d_width;
  2108 	int height = info->d_height;
  2109 	Uint8 *src = info->s_pixels;
  2110 	int srcskip = info->s_skip;
  2111 	Uint8 *dst = info->d_pixels;
  2112 	int dstskip = info->d_skip;
  2113 	SDL_PixelFormat *srcfmt = info->src;
  2114 	const Uint8 *palmap = info->table;
  2115 	Uint32 ckey = srcfmt->colorkey;
  2116 	Uint32 rgbmask = ~srcfmt->Amask;
  2117 	int srcbpp;
  2118 	Uint32 Pixel;
  2119 	unsigned sR, sG, sB;
  2120 
  2121 	/* Set up some basic variables */
  2122 	srcbpp = srcfmt->BytesPerPixel;
  2123 	ckey &= rgbmask;
  2124 
  2125 	if ( palmap == NULL ) {
  2126 		while ( height-- ) {
  2127 			DUFFS_LOOP(
  2128 			{
  2129 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2130 								sR, sG, sB);
  2131 				if ( (Pixel & rgbmask) != ckey ) {
  2132 				  	/* Pack RGB into 8bit pixel */
  2133 				  	*dst = (Uint8)(((sR>>5)<<(3+2))|
  2134 						           ((sG>>5)<<(2)) |
  2135 						           ((sB>>6)<<(0)));
  2136 				}
  2137 				dst++;
  2138 				src += srcbpp;
  2139 			},
  2140 			width);
  2141 			src += srcskip;
  2142 			dst += dstskip;
  2143 		}
  2144 	} else {
  2145 		while ( height-- ) {
  2146 			DUFFS_LOOP(
  2147 			{
  2148 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2149 								sR, sG, sB);
  2150 				if ( (Pixel & rgbmask) != ckey ) {
  2151 				  	/* Pack RGB into 8bit pixel */
  2152 				  	*dst = (Uint8)palmap[((sR>>5)<<(3+2))|
  2153 							             ((sG>>5)<<(2))  |
  2154 							             ((sB>>6)<<(0))  ];
  2155 				}
  2156 				dst++;
  2157 				src += srcbpp;
  2158 			},
  2159 			width);
  2160 			src += srcskip;
  2161 			dst += dstskip;
  2162 		}
  2163 	}
  2164 }
  2165 
  2166 static void Blit2to2Key(SDL_BlitInfo *info)
  2167 {
  2168 	int width = info->d_width;
  2169 	int height = info->d_height;
  2170 	Uint16 *srcp = (Uint16 *)info->s_pixels;
  2171 	int srcskip = info->s_skip;
  2172 	Uint16 *dstp = (Uint16 *)info->d_pixels;
  2173 	int dstskip = info->d_skip;
  2174 	Uint32 ckey = info->src->colorkey;
  2175 	Uint32 rgbmask = ~info->src->Amask;
  2176 
  2177 	/* Set up some basic variables */
  2178         srcskip /= 2;
  2179         dstskip /= 2;
  2180 	ckey &= rgbmask;
  2181 
  2182 	while ( height-- ) {
  2183 		DUFFS_LOOP(
  2184 		{
  2185 			if ( (*srcp & rgbmask) != ckey ) {
  2186 				*dstp = *srcp;
  2187 			}
  2188 			dstp++;
  2189 			srcp++;
  2190 		},
  2191 		width);
  2192 		srcp += srcskip;
  2193 		dstp += dstskip;
  2194 	}
  2195 }
  2196 
  2197 static void BlitNtoNKey(SDL_BlitInfo *info)
  2198 {
  2199 	int width = info->d_width;
  2200 	int height = info->d_height;
  2201 	Uint8 *src = info->s_pixels;
  2202 	int srcskip = info->s_skip;
  2203 	Uint8 *dst = info->d_pixels;
  2204 	int dstskip = info->d_skip;
  2205 	Uint32 ckey = info->src->colorkey;
  2206 	SDL_PixelFormat *srcfmt = info->src;
  2207 	SDL_PixelFormat *dstfmt = info->dst;
  2208 	int srcbpp = srcfmt->BytesPerPixel;
  2209 	int dstbpp = dstfmt->BytesPerPixel;
  2210 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2211 	Uint32 rgbmask = ~srcfmt->Amask;
  2212 
  2213 	/* Set up some basic variables */
  2214 	ckey &= rgbmask;
  2215 
  2216 	while ( height-- ) {
  2217 		DUFFS_LOOP(
  2218 		{
  2219 		        Uint32 Pixel;
  2220 			unsigned sR;
  2221 			unsigned sG;
  2222 			unsigned sB;
  2223 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2224 			if ( (Pixel & rgbmask) != ckey ) {
  2225 			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2226 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2227 					      sR, sG, sB, alpha);
  2228 			}
  2229 			dst += dstbpp;
  2230 			src += srcbpp;
  2231 		},
  2232 		width);
  2233 		src += srcskip;
  2234 		dst += dstskip;
  2235 	}
  2236 }
  2237 
  2238 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
  2239 {
  2240 	int width = info->d_width;
  2241 	int height = info->d_height;
  2242 	Uint8 *src = info->s_pixels;
  2243 	int srcskip = info->s_skip;
  2244 	Uint8 *dst = info->d_pixels;
  2245 	int dstskip = info->d_skip;
  2246 	Uint32 ckey = info->src->colorkey;
  2247 	SDL_PixelFormat *srcfmt = info->src;
  2248 	SDL_PixelFormat *dstfmt = info->dst;
  2249 	Uint32 rgbmask = ~srcfmt->Amask;
  2250 
  2251 	Uint8 srcbpp;
  2252 	Uint8 dstbpp;
  2253 	Uint32 Pixel;
  2254 	unsigned sR, sG, sB, sA;
  2255 
  2256 	/* Set up some basic variables */
  2257 	srcbpp = srcfmt->BytesPerPixel;
  2258 	dstbpp = dstfmt->BytesPerPixel;
  2259 	ckey &= rgbmask;
  2260 
  2261 	/* FIXME: should map alpha to [0..255] correctly! */
  2262 	while ( height-- ) {
  2263 		DUFFS_LOOP(
  2264 		{
  2265 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2266 				      sR, sG, sB, sA);
  2267 			if ( (Pixel & rgbmask) != ckey ) {
  2268 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2269 						sR, sG, sB, sA);
  2270 			}
  2271 			dst += dstbpp;
  2272 			src += srcbpp;
  2273 		},
  2274 		width);
  2275 		src += srcskip;
  2276 		dst += dstskip;
  2277 	}
  2278 }
  2279 
  2280 /* Normal N to N optimized blitters */
  2281 struct blit_table {
  2282 	Uint32 srcR, srcG, srcB;
  2283 	int dstbpp;
  2284 	Uint32 dstR, dstG, dstB;
  2285 	Uint32 blit_features;
  2286 	void *aux_data;
  2287 	SDL_loblit blitfunc;
  2288 	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
  2289 };
  2290 static const struct blit_table normal_blit_1[] = {
  2291 	/* Default for 8-bit RGB source, an invalid combination */
  2292 	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
  2293 };
  2294 static const struct blit_table normal_blit_2[] = {
  2295 #if SDL_HERMES_BLITTERS
  2296     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
  2297       0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
  2298     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
  2299       0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
  2300     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
  2301       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
  2302 #elif SDL_ALTIVEC_BLITTERS
  2303     /* has-altivec */
  2304     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2305       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2306     { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2307       2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2308 #endif
  2309     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
  2310       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
  2311     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2312       0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
  2313     { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2314       0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
  2315     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2316       0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
  2317 
  2318     /* Default for 16-bit RGB source, used if no other blitter matches */
  2319     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2320 };
  2321 static const struct blit_table normal_blit_3[] = {
  2322 	/* Default for 24-bit RGB source, never optimized */
  2323     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2324 };
  2325 static const struct blit_table normal_blit_4[] = {
  2326 #if SDL_HERMES_BLITTERS
  2327     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2328       1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
  2329     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2330       0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
  2331     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2332       1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
  2333     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2334       0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
  2335     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2336       1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
  2337     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2338       0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
  2339     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2340       1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
  2341     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2342       0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
  2343     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
  2344       1, ConvertMMXpII32_24RGB888, ConvertMMX, NO_ALPHA },
  2345     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
  2346       0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
  2347     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
  2348       0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
  2349     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2350       0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
  2351     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2352       0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
  2353     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2354       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
  2355 #else
  2356 #if SDL_ALTIVEC_BLITTERS
  2357     /* has-altivec | dont-use-prefetch */
  2358     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2359       6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2360     /* has-altivec */
  2361     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2362       2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2363     /* has-altivec */
  2364     { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
  2365       2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
  2366 #endif
  2367     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2368       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
  2369     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2370       0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
  2371 #endif
  2372 	/* Default for 32-bit RGB source, used if no other blitter matches */
  2373 	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2374 };
  2375 static const struct blit_table *normal_blit[] = {
  2376 	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2377 };
  2378 
  2379 /* Mask matches table, or table entry is zero */
  2380 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2381 
  2382 SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
  2383 {
  2384 	struct private_swaccel *sdata;
  2385 	SDL_PixelFormat *srcfmt;
  2386 	SDL_PixelFormat *dstfmt;
  2387 	const struct blit_table *table;
  2388 	int which;
  2389 	SDL_loblit blitfun;
  2390 
  2391 	/* Set up data for choosing the blit */
  2392 	sdata = surface->map->sw_data;
  2393 	srcfmt = surface->format;
  2394 	dstfmt = surface->map->dst->format;
  2395 
  2396 	if ( blit_index & 2 ) {
  2397 	        /* alpha or alpha+colorkey */
  2398 	        return SDL_CalculateAlphaBlit(surface, blit_index);
  2399 	}
  2400 
  2401 	/* We don't support destinations less than 8-bits */
  2402 	if ( dstfmt->BitsPerPixel < 8 ) {
  2403 		return(NULL);
  2404 	}
  2405 	
  2406 	if(blit_index == 1) {
  2407 	    /* colorkey blit: Here we don't have too many options, mostly
  2408 	       because RLE is the preferred fast way to deal with this.
  2409 	       If a particular case turns out to be useful we'll add it. */
  2410 
  2411 	    if(srcfmt->BytesPerPixel == 2
  2412 	       && surface->map->identity)
  2413 		return Blit2to2Key;
  2414 	    else if(dstfmt->BytesPerPixel == 1)
  2415 		return BlitNto1Key;
  2416 	    else {
  2417 #if SDL_ALTIVEC_BLITTERS
  2418         if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
  2419             return Blit32to32KeyAltivec;
  2420         } else
  2421 #endif
  2422 
  2423 		if(srcfmt->Amask && dstfmt->Amask)
  2424 		    return BlitNtoNKeyCopyAlpha;
  2425 		else
  2426 		    return BlitNtoNKey;
  2427 	    }
  2428 	}
  2429 
  2430 	blitfun = NULL;
  2431 	if ( dstfmt->BitsPerPixel == 8 ) {
  2432 		/* We assume 8-bit destinations are palettized */
  2433 		if ( (srcfmt->BytesPerPixel == 4) &&
  2434 		     (srcfmt->Rmask == 0x00FF0000) &&
  2435 		     (srcfmt->Gmask == 0x0000FF00) &&
  2436 		     (srcfmt->Bmask == 0x000000FF) ) {
  2437 			if ( surface->map->table ) {
  2438 				blitfun = Blit_RGB888_index8_map;
  2439 			} else {
  2440 #if SDL_HERMES_BLITTERS
  2441 				sdata->aux_data = ConvertX86p32_8RGB332;
  2442 				blitfun = ConvertX86;
  2443 #else
  2444 				blitfun = Blit_RGB888_index8;
  2445 #endif
  2446 			}
  2447 		} else {
  2448 			blitfun = BlitNto1;
  2449 		}
  2450 	} else {
  2451 		/* Now the meat, choose the blitter we want */
  2452 		int a_need = NO_ALPHA;
  2453 		if(dstfmt->Amask)
  2454 		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2455 		table = normal_blit[srcfmt->BytesPerPixel-1];
  2456 		for ( which=0; table[which].dstbpp; ++which ) {
  2457 			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2458 			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2459 			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2460 			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2461 			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2462 			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2463 			    dstfmt->BytesPerPixel == table[which].dstbpp &&
  2464 			    (a_need & table[which].alpha) == a_need &&
  2465 			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
  2466 				break;
  2467 		}
  2468 		sdata->aux_data = table[which].aux_data;
  2469 		blitfun = table[which].blitfunc;
  2470 
  2471 		if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2472 			/* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2473 			if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
  2474 			     srcfmt->Rmask == dstfmt->Rmask &&
  2475 			     srcfmt->Gmask == dstfmt->Gmask &&
  2476 			     srcfmt->Bmask == dstfmt->Bmask ) {
  2477 				blitfun = Blit4to4MaskAlpha;
  2478 			} else if ( a_need == COPY_ALPHA ) {
  2479 			    blitfun = BlitNtoNCopyAlpha;
  2480 			}
  2481 		}
  2482 	}
  2483 
  2484 #ifdef DEBUG_ASM
  2485 #if SDL_HERMES_BLITTERS
  2486 	if ( blitfun == ConvertMMX )
  2487 		fprintf(stderr, "Using mmx blit\n");
  2488 	else
  2489 	if ( blitfun == ConvertX86 )
  2490 		fprintf(stderr, "Using asm blit\n");
  2491 	else
  2492 #endif
  2493 	if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
  2494 		fprintf(stderr, "Using C blit\n");
  2495 	else
  2496 		fprintf(stderr, "Using optimized C blit\n");
  2497 #endif /* DEBUG_ASM */
  2498 
  2499 	return(blitfun);
  2500 }