src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Tue, 07 Feb 2006 06:59:48 +0000
changeset 1336 3692456e7b0f
parent 1330 450721ad5436
child 1338 604d73db6802
permissions -rw-r--r--
Use SDL_ prefixed versions of C library functions.
FIXME:
Change #include <stdlib.h> to #include "SDL_stdlib.h"
Change #include <string.h> to #include "SDL_string.h"
Make sure nothing else broke because of this...
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 #include "SDL_types.h"
    24 #include "SDL_video.h"
    25 #include "SDL_blit.h"
    26 #include "SDL_byteorder.h"
    27 #include "SDL_cpuinfo.h"
    28 
    29 /* Functions to blit from N-bit surfaces to other surfaces */
    30 
    31 #ifdef USE_ALTIVEC_BLITTERS
    32 #ifdef HAVE_ALTIVEC_H
    33 #include <altivec.h>
    34 #endif
    35 #include <assert.h>
    36 #include <stdlib.h>
    37 #ifdef MACOSX
    38 #include <sys/sysctl.h>
    39 static size_t GetL3CacheSize( void )
    40 {
    41     const char key[] = "hw.l3cachesize";
    42     u_int64_t result = 0;
    43     size_t typeSize = sizeof( result );
    44 
    45 
    46     int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
    47     if( 0 != err ) return 0;
    48 
    49     return result;
    50 }
    51 #else
    52 static size_t GetL3CacheSize( void )
    53 {
    54     /* XXX: Just guess G4 */
    55     return 2097152;
    56 }
    57 #endif /* MACOSX */
    58 
    59 #if ((defined MACOSX) && (__GNUC__ < 4))
    60     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    61         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    62     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    63         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    64 #else
    65     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    66         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    67     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    68         (vector unsigned short) { a,b,c,d,e,f,g,h }
    69 #endif
    70 
    71 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    72 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    73                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    74                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    75                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    76                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    77 
    78 #define MAKE8888(dstfmt, r, g, b, a)  \
    79     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    80       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    81       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    82       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    83 
    84 /*
    85  * Data Stream Touch...Altivec cache prefetching.
    86  *
    87  *  Don't use this on a G5...however, the speed boost is very significant
    88  *   on a G4.
    89  */
    90 #define DST_CHAN_SRC 1
    91 #define DST_CHAN_DEST 2
    92 
    93 /* macro to set DST control word value... */
    94 #define DST_CTRL(size, count, stride) \
    95     (((size) << 24) | ((count) << 16) | (stride))
    96 
    97 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    98     ? vec_lvsl(0, src) \
    99     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   100 
   101 /* Calculate the permute vector used for 32->32 swizzling */
   102 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
   103                                   const SDL_PixelFormat *dstfmt)
   104 {
   105     /*
   106     * We have to assume that the bits that aren't used by other
   107      *  colors is alpha, and it's one complete byte, since some formats
   108      *  leave alpha with a zero mask, but we should still swizzle the bits.
   109      */
   110     /* ARGB */
   111     const static struct SDL_PixelFormat default_pixel_format = {
   112         NULL, 0, 0,
   113         0, 0, 0, 0,
   114         16, 8, 0, 24,
   115         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   116         0, 0};
   117     if (!srcfmt) {
   118         srcfmt = &default_pixel_format;
   119     }
   120     if (!dstfmt) {
   121         dstfmt = &default_pixel_format;
   122     }
   123     vector unsigned char plus = VECUINT8_LITERAL(
   124                                       0x00, 0x00, 0x00, 0x00,
   125                                       0x04, 0x04, 0x04, 0x04,
   126                                       0x08, 0x08, 0x08, 0x08,
   127                                       0x0C, 0x0C, 0x0C, 0x0C );
   128     vector unsigned char vswiz;
   129     vector unsigned int srcvec;
   130 #define RESHIFT(X) (3 - ((X) >> 3))
   131     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   132     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   133     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   134     Uint32 amask;
   135     /* Use zero for alpha if either surface doesn't have alpha */
   136     if (dstfmt->Amask) {
   137         amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
   138     } else {    
   139         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
   140     }           
   141 #undef RESHIFT  
   142     ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
   143     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
   144     return(vswiz);
   145 }
   146 
   147 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
   148 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
   149     int height = info->d_height;
   150     Uint8 *src = (Uint8 *) info->s_pixels;
   151     int srcskip = info->s_skip;
   152     Uint8 *dst = (Uint8 *) info->d_pixels;
   153     int dstskip = info->d_skip;
   154     SDL_PixelFormat *srcfmt = info->src;
   155     vector unsigned char valpha = vec_splat_u8(0);
   156     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   157     vector unsigned char vgmerge = VECUINT8_LITERAL(
   158         0x00, 0x02, 0x00, 0x06,
   159         0x00, 0x0a, 0x00, 0x0e,
   160         0x00, 0x12, 0x00, 0x16,
   161         0x00, 0x1a, 0x00, 0x1e);
   162     vector unsigned short v1 = vec_splat_u16(1);
   163     vector unsigned short v3 = vec_splat_u16(3);
   164     vector unsigned short v3f = VECUINT16_LITERAL(
   165         0x003f, 0x003f, 0x003f, 0x003f,
   166         0x003f, 0x003f, 0x003f, 0x003f);
   167     vector unsigned short vfc = VECUINT16_LITERAL(
   168         0x00fc, 0x00fc, 0x00fc, 0x00fc,
   169         0x00fc, 0x00fc, 0x00fc, 0x00fc);
   170     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
   171     vf800 = vec_sl(vf800, vec_splat_u16(8));
   172 
   173     while (height--) {
   174         vector unsigned char valigner;
   175         vector unsigned char voverflow;
   176         vector unsigned char vsrc;
   177 
   178         int width = info->d_width;
   179         int extrawidth;
   180 
   181         /* do scalar until we can align... */
   182 #define ONE_PIXEL_BLEND(condition, widthvar) \
   183         while (condition) { \
   184             Uint32 Pixel; \
   185             unsigned sR, sG, sB, sA; \
   186             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   187                           sR, sG, sB, sA); \
   188             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   189                                 ((sG << 3) & 0x000007E0) | \
   190                                 ((sB >> 3) & 0x0000001F)); \
   191             dst += 2; \
   192             src += 4; \
   193             widthvar--; \
   194         }
   195 
   196         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   197 
   198         /* After all that work, here's the vector part! */
   199         extrawidth = (width % 8);  /* trailing unaligned stores */
   200         width -= extrawidth;
   201         vsrc = vec_ld(0, src);
   202         valigner = VEC_ALIGNER(src);
   203 
   204         while (width) {
   205             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   206             vector unsigned int vsrc1, vsrc2;
   207             vector unsigned char vdst;
   208 
   209             voverflow = vec_ld(15, src);
   210             vsrc = vec_perm(vsrc, voverflow, valigner);
   211             vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   212             src += 16;
   213             vsrc = voverflow;
   214             voverflow = vec_ld(15, src);
   215             vsrc = vec_perm(vsrc, voverflow, valigner);
   216             vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   217             /* 1555 */
   218             vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
   219             vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
   220             vgpixel = vec_and(vgpixel, vfc);
   221             vgpixel = vec_sl(vgpixel, v3);
   222             vrpixel = vec_sl(vpixel, v1);
   223             vrpixel = vec_and(vrpixel, vf800);
   224             vbpixel = vec_and(vpixel, v3f);
   225             vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
   226             /* 565 */
   227             vdst = vec_or(vdst, (vector unsigned char)vbpixel);
   228             vec_st(vdst, 0, dst);
   229 
   230             width -= 8;
   231             src += 16;
   232             dst += 16;
   233             vsrc = voverflow;
   234         }
   235 
   236         assert(width == 0);
   237 
   238 
   239         /* do scalar until we can align... */
   240         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   241 #undef ONE_PIXEL_BLEND
   242 
   243         src += srcskip;  /* move to next row, accounting for pitch. */
   244         dst += dstskip;
   245     }
   246 
   247 
   248 }
   249 
   250 static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
   251     int height = info->d_height;
   252     Uint8 *src = (Uint8 *) info->s_pixels;
   253     int srcskip = info->s_skip;
   254     Uint8 *dst = (Uint8 *) info->d_pixels;
   255     int dstskip = info->d_skip;
   256     SDL_PixelFormat *srcfmt = info->src;
   257     SDL_PixelFormat *dstfmt = info->dst;
   258     unsigned alpha;
   259     vector unsigned char valpha;
   260     vector unsigned char vpermute;
   261     vector unsigned short vf800;
   262     vector unsigned int v8 = vec_splat_u32(8);
   263     vector unsigned int v16 = vec_add(v8, v8);
   264     vector unsigned short v2 = vec_splat_u16(2);
   265     vector unsigned short v3 = vec_splat_u16(3);
   266     /* 
   267         0x10 - 0x1f is the alpha
   268         0x00 - 0x0e evens are the red
   269         0x01 - 0x0f odds are zero
   270     */
   271     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   272         0x10, 0x00, 0x01, 0x01,
   273         0x10, 0x02, 0x01, 0x01,
   274         0x10, 0x04, 0x01, 0x01,
   275         0x10, 0x06, 0x01, 0x01
   276     );
   277     vector unsigned char vredalpha2 = (vector unsigned char) (
   278         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   279     );
   280     /*
   281         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   282         0x11 - 0x0f odds are blue
   283     */
   284     vector unsigned char vblue1 = VECUINT8_LITERAL(
   285         0x00, 0x01, 0x02, 0x11,
   286         0x04, 0x05, 0x06, 0x13,
   287         0x08, 0x09, 0x0a, 0x15,
   288         0x0c, 0x0d, 0x0e, 0x17
   289     );
   290     vector unsigned char vblue2 = (vector unsigned char)(
   291         vec_add((vector unsigned int)vblue1, v8)
   292     );
   293     /*
   294         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   295         0x10 - 0x0e evens are green
   296     */
   297     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   298         0x00, 0x01, 0x10, 0x03,
   299         0x04, 0x05, 0x12, 0x07,
   300         0x08, 0x09, 0x14, 0x0b,
   301         0x0c, 0x0d, 0x16, 0x0f
   302     );
   303     vector unsigned char vgreen2 = (vector unsigned char)(
   304         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   305     );
   306     
   307 
   308     assert(srcfmt->BytesPerPixel == 2);
   309     assert(dstfmt->BytesPerPixel == 4);
   310 
   311     vf800 = (vector unsigned short)vec_splat_u8(-7);
   312     vf800 = vec_sl(vf800, vec_splat_u16(8));
   313 
   314     if (dstfmt->Amask && srcfmt->alpha) {
   315         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   316         valpha = vec_splat(valpha, 0);
   317     } else {
   318         alpha = 0;
   319         valpha = vec_splat_u8(0);
   320     }
   321 
   322     vpermute = calc_swizzle32(NULL, dstfmt);
   323     while (height--) {
   324         vector unsigned char valigner;
   325         vector unsigned char voverflow;
   326         vector unsigned char vsrc;
   327 
   328         int width = info->d_width;
   329         int extrawidth;
   330 
   331         /* do scalar until we can align... */
   332 #define ONE_PIXEL_BLEND(condition, widthvar) \
   333         while (condition) { \
   334             unsigned sR, sG, sB; \
   335             unsigned short Pixel = *((unsigned short *)src); \
   336             sR = (Pixel >> 8) & 0xf8; \
   337             sG = (Pixel >> 3) & 0xfc; \
   338             sB = (Pixel << 3) & 0xf8; \
   339             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   340             src += 2; \
   341             dst += 4; \
   342             widthvar--; \
   343         }
   344         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   345 
   346         /* After all that work, here's the vector part! */
   347         extrawidth = (width % 8);  /* trailing unaligned stores */
   348         width -= extrawidth;
   349         vsrc = vec_ld(0, src);
   350         valigner = VEC_ALIGNER(src);
   351 
   352         while (width) {
   353             vector unsigned short vR, vG, vB;
   354             vector unsigned char vdst1, vdst2;
   355 
   356             voverflow = vec_ld(15, src);
   357             vsrc = vec_perm(vsrc, voverflow, valigner);
   358 
   359             vR = vec_and((vector unsigned short)vsrc, vf800);
   360             vB = vec_sl((vector unsigned short)vsrc, v3);
   361             vG = vec_sl(vB, v2);
   362 
   363             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   364             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   365             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   366             vdst1 = vec_perm(vdst1, valpha, vpermute);
   367             vec_st(vdst1, 0, dst);
   368 
   369             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   370             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   371             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   372             vdst2 = vec_perm(vdst2, valpha, vpermute);
   373             vec_st(vdst2, 16, dst);
   374             
   375             width -= 8;
   376             dst += 32;
   377             src += 16;
   378             vsrc = voverflow;
   379         }
   380 
   381         assert(width == 0);
   382 
   383 
   384         /* do scalar until we can align... */
   385         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   386 #undef ONE_PIXEL_BLEND
   387 
   388         src += srcskip;  /* move to next row, accounting for pitch. */
   389         dst += dstskip;
   390     }
   391 
   392 }
   393 
   394 
   395 static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
   396     int height = info->d_height;
   397     Uint8 *src = (Uint8 *) info->s_pixels;
   398     int srcskip = info->s_skip;
   399     Uint8 *dst = (Uint8 *) info->d_pixels;
   400     int dstskip = info->d_skip;
   401     SDL_PixelFormat *srcfmt = info->src;
   402     SDL_PixelFormat *dstfmt = info->dst;
   403     unsigned alpha;
   404     vector unsigned char valpha;
   405     vector unsigned char vpermute;
   406     vector unsigned short vf800;
   407     vector unsigned int v8 = vec_splat_u32(8);
   408     vector unsigned int v16 = vec_add(v8, v8);
   409     vector unsigned short v1 = vec_splat_u16(1);
   410     vector unsigned short v3 = vec_splat_u16(3);
   411     /* 
   412         0x10 - 0x1f is the alpha
   413         0x00 - 0x0e evens are the red
   414         0x01 - 0x0f odds are zero
   415     */
   416     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   417         0x10, 0x00, 0x01, 0x01,
   418         0x10, 0x02, 0x01, 0x01,
   419         0x10, 0x04, 0x01, 0x01,
   420         0x10, 0x06, 0x01, 0x01
   421     );
   422     vector unsigned char vredalpha2 = (vector unsigned char)(
   423         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   424     );
   425     /*
   426         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   427         0x11 - 0x0f odds are blue
   428     */
   429     vector unsigned char vblue1 = VECUINT8_LITERAL(
   430         0x00, 0x01, 0x02, 0x11,
   431         0x04, 0x05, 0x06, 0x13,
   432         0x08, 0x09, 0x0a, 0x15,
   433         0x0c, 0x0d, 0x0e, 0x17
   434     );
   435     vector unsigned char vblue2 = (vector unsigned char)(
   436         vec_add((vector unsigned int)vblue1, v8)
   437     );
   438     /*
   439         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   440         0x10 - 0x0e evens are green
   441     */
   442     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   443         0x00, 0x01, 0x10, 0x03,
   444         0x04, 0x05, 0x12, 0x07,
   445         0x08, 0x09, 0x14, 0x0b,
   446         0x0c, 0x0d, 0x16, 0x0f
   447     );
   448     vector unsigned char vgreen2 = (vector unsigned char)(
   449         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   450     );
   451     
   452 
   453     assert(srcfmt->BytesPerPixel == 2);
   454     assert(dstfmt->BytesPerPixel == 4);
   455 
   456     vf800 = (vector unsigned short)vec_splat_u8(-7);
   457     vf800 = vec_sl(vf800, vec_splat_u16(8));
   458 
   459     if (dstfmt->Amask && srcfmt->alpha) {
   460         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   461         valpha = vec_splat(valpha, 0);
   462     } else {
   463         alpha = 0;
   464         valpha = vec_splat_u8(0);
   465     }
   466 
   467     vpermute = calc_swizzle32(NULL, dstfmt);
   468     while (height--) {
   469         vector unsigned char valigner;
   470         vector unsigned char voverflow;
   471         vector unsigned char vsrc;
   472 
   473         int width = info->d_width;
   474         int extrawidth;
   475 
   476         /* do scalar until we can align... */
   477 #define ONE_PIXEL_BLEND(condition, widthvar) \
   478         while (condition) { \
   479             unsigned sR, sG, sB; \
   480             unsigned short Pixel = *((unsigned short *)src); \
   481             sR = (Pixel >> 7) & 0xf8; \
   482             sG = (Pixel >> 2) & 0xf8; \
   483             sB = (Pixel << 3) & 0xf8; \
   484             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   485             src += 2; \
   486             dst += 4; \
   487             widthvar--; \
   488         }
   489         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   490 
   491         /* After all that work, here's the vector part! */
   492         extrawidth = (width % 8);  /* trailing unaligned stores */
   493         width -= extrawidth;
   494         vsrc = vec_ld(0, src);
   495         valigner = VEC_ALIGNER(src);
   496 
   497         while (width) {
   498             vector unsigned short vR, vG, vB;
   499             vector unsigned char vdst1, vdst2;
   500 
   501             voverflow = vec_ld(15, src);
   502             vsrc = vec_perm(vsrc, voverflow, valigner);
   503 
   504             vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
   505             vB = vec_sl((vector unsigned short)vsrc, v3);
   506             vG = vec_sl(vB, v3);
   507 
   508             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   509             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   510             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   511             vdst1 = vec_perm(vdst1, valpha, vpermute);
   512             vec_st(vdst1, 0, dst);
   513 
   514             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   515             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   516             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   517             vdst2 = vec_perm(vdst2, valpha, vpermute);
   518             vec_st(vdst2, 16, dst);
   519             
   520             width -= 8;
   521             dst += 32;
   522             src += 16;
   523             vsrc = voverflow;
   524         }
   525 
   526         assert(width == 0);
   527 
   528 
   529         /* do scalar until we can align... */
   530         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   531 #undef ONE_PIXEL_BLEND
   532 
   533         src += srcskip;  /* move to next row, accounting for pitch. */
   534         dst += dstskip;
   535     }
   536 
   537 }
   538 
   539 static void BlitNtoNKey(SDL_BlitInfo *info);
   540 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
   541 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
   542 {
   543     int height = info->d_height;
   544     Uint32 *srcp = (Uint32 *) info->s_pixels;
   545     int srcskip = info->s_skip;
   546     Uint32 *dstp = (Uint32 *) info->d_pixels;
   547     int dstskip = info->d_skip;
   548     SDL_PixelFormat *srcfmt = info->src;
   549     int srcbpp = srcfmt->BytesPerPixel;
   550     SDL_PixelFormat *dstfmt = info->dst;
   551     int dstbpp = dstfmt->BytesPerPixel;
   552     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   553 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   554     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   555 	Uint32 ckey = info->src->colorkey;
   556     vector unsigned int valpha;
   557     vector unsigned char vpermute;
   558     vector unsigned char vzero;
   559     vector unsigned int vckey;
   560     vector unsigned int vrgbmask;
   561     vpermute = calc_swizzle32(srcfmt, dstfmt);
   562     if (info->d_width < 16) {
   563         if(copy_alpha) {
   564             return BlitNtoNKeyCopyAlpha(info);
   565         } else {
   566             return BlitNtoNKey(info);
   567         }
   568     }
   569     vzero = vec_splat_u8(0);
   570     if (alpha) {
   571         ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
   572         valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
   573     } else {
   574         valpha = (vector unsigned int)vzero;
   575     }
   576     ckey &= rgbmask;
   577     ((unsigned int *)(char*)&vckey)[0] = ckey;
   578     vckey = vec_splat(vckey, 0);
   579     ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
   580     vrgbmask = vec_splat(vrgbmask, 0);
   581 
   582     while (height--) {
   583 #define ONE_PIXEL_BLEND(condition, widthvar) \
   584         if (copy_alpha) { \
   585             while (condition) { \
   586                 Uint32 Pixel; \
   587                 unsigned sR, sG, sB, sA; \
   588                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   589                           sR, sG, sB, sA); \
   590                 if ( (Pixel & rgbmask) != ckey ) { \
   591                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   592                             sR, sG, sB, sA); \
   593                 } \
   594                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   595                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   596                 widthvar--; \
   597             } \
   598         } else { \
   599             while (condition) { \
   600                 Uint32 Pixel; \
   601                 unsigned sR, sG, sB; \
   602                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   603                 if ( Pixel != ckey ) { \
   604                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   605                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   606                               sR, sG, sB, alpha); \
   607                 } \
   608                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   609                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   610                 widthvar--; \
   611             } \
   612         }
   613         int width = info->d_width;
   614         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   615         assert(width > 0);
   616         if (width > 0) {
   617             int extrawidth = (width % 4);
   618             vector unsigned char valigner = VEC_ALIGNER(srcp);
   619             vector unsigned int vs = vec_ld(0, srcp);
   620             width -= extrawidth;
   621             assert(width >= 4);
   622             while (width) {
   623                 vector unsigned char vsel;
   624                 vector unsigned int vd;
   625                 vector unsigned int voverflow = vec_ld(15, srcp);
   626                 /* load the source vec */
   627                 vs = vec_perm(vs, voverflow, valigner);
   628                 /* vsel is set for items that match the key */
   629                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
   630                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
   631                 /* permute the src vec to the dest format */
   632                 vs = vec_perm(vs, valpha, vpermute);
   633                 /* load the destination vec */
   634                 vd = vec_ld(0, dstp);
   635                 /* select the source and dest into vs */
   636                 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
   637                 
   638                 vec_st(vd, 0, dstp);
   639                 srcp += 4;
   640                 width -= 4;
   641                 dstp += 4;
   642                 vs = voverflow;
   643             }
   644             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   645 #undef ONE_PIXEL_BLEND
   646             srcp += srcskip >> 2;
   647             dstp += dstskip >> 2;
   648         }
   649     }
   650 }
   651 
   652 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   653 /* Use this on a G5 */
   654 static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
   655 {
   656     int height = info->d_height;
   657     Uint32 *src = (Uint32 *) info->s_pixels;
   658     int srcskip = info->s_skip;
   659     Uint32 *dst = (Uint32 *) info->d_pixels;
   660     int dstskip = info->d_skip;
   661     SDL_PixelFormat *srcfmt = info->src;
   662     int srcbpp = srcfmt->BytesPerPixel;
   663     SDL_PixelFormat *dstfmt = info->dst;
   664     int dstbpp = dstfmt->BytesPerPixel;
   665     vector unsigned int vzero = vec_splat_u32(0);
   666     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   667     if (dstfmt->Amask && !srcfmt->Amask) {
   668         if (srcfmt->alpha) {
   669             vector unsigned char valpha;
   670             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   671             vzero = (vector unsigned int)vec_splat(valpha, 0);
   672         }
   673     }
   674 
   675     assert(srcbpp == 4);
   676     assert(dstbpp == 4);
   677 
   678     while (height--) {
   679         vector unsigned char valigner;
   680         vector unsigned int vbits;
   681         vector unsigned int voverflow;
   682         Uint32 bits;
   683         Uint8 r, g, b, a;
   684 
   685         int width = info->d_width;
   686         int extrawidth;
   687 
   688         /* do scalar until we can align... */
   689         while ((UNALIGNED_PTR(dst)) && (width)) {
   690             bits = *(src++);
   691             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   692             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   693             width--;
   694         }
   695 
   696         /* After all that work, here's the vector part! */
   697         extrawidth = (width % 4);
   698         width -= extrawidth;
   699         valigner = VEC_ALIGNER(src);
   700         vbits = vec_ld(0, src);
   701 
   702        while (width) {
   703             voverflow = vec_ld(15, src);
   704             src += 4;
   705             width -= 4;
   706             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   707             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   708             vec_st(vbits, 0, dst);  /* store it back out. */
   709             dst += 4;
   710             vbits = voverflow;
   711         }
   712 
   713         assert(width == 0);
   714 
   715         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   716         while (extrawidth) {
   717             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   718             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   719             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   720             extrawidth--;
   721         }
   722 
   723         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   724         dst += dstskip >> 2;
   725     }
   726 
   727 }
   728 
   729 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   730 /* Use this on a G4 */
   731 static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
   732 {
   733     const int scalar_dst_lead = sizeof (Uint32) * 4;
   734     const int vector_dst_lead = sizeof (Uint32) * 16;
   735 
   736     int height = info->d_height;
   737     Uint32 *src = (Uint32 *) info->s_pixels;
   738     int srcskip = info->s_skip;
   739     Uint32 *dst = (Uint32 *) info->d_pixels;
   740     int dstskip = info->d_skip;
   741     SDL_PixelFormat *srcfmt = info->src;
   742     int srcbpp = srcfmt->BytesPerPixel;
   743     SDL_PixelFormat *dstfmt = info->dst;
   744     int dstbpp = dstfmt->BytesPerPixel;
   745     vector unsigned int vzero = vec_splat_u32(0);
   746     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   747     if (dstfmt->Amask && !srcfmt->Amask) {
   748         if (srcfmt->alpha) {
   749             vector unsigned char valpha;
   750             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   751             vzero = (vector unsigned int)vec_splat(valpha, 0);
   752         }
   753     }
   754 
   755     assert(srcbpp == 4);
   756     assert(dstbpp == 4);
   757 
   758     while (height--) {
   759         vector unsigned char valigner;
   760         vector unsigned int vbits;
   761         vector unsigned int voverflow;
   762         Uint32 bits;
   763         Uint8 r, g, b, a;
   764 
   765         int width = info->d_width;
   766         int extrawidth;
   767 
   768         /* do scalar until we can align... */
   769         while ((UNALIGNED_PTR(dst)) && (width)) {
   770             vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   771             vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   772             bits = *(src++);
   773             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   774             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   775             width--;
   776         }
   777 
   778         /* After all that work, here's the vector part! */
   779         extrawidth = (width % 4);
   780         width -= extrawidth;
   781         valigner = VEC_ALIGNER(src);
   782         vbits = vec_ld(0, src);
   783 
   784         while (width) {
   785             vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   786             vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   787             voverflow = vec_ld(15, src);
   788             src += 4;
   789             width -= 4;
   790             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   791             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   792             vec_st(vbits, 0, dst);  /* store it back out. */
   793             dst += 4;
   794             vbits = voverflow;
   795         }
   796         
   797         assert(width == 0);
   798 
   799         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   800         while (extrawidth) {
   801             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   802             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   803             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   804             extrawidth--;
   805         }
   806 
   807         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   808         dst += dstskip >> 2;
   809     }
   810 
   811     vec_dss(DST_CHAN_SRC);
   812     vec_dss(DST_CHAN_DEST);
   813 }
   814 
   815 static Uint32 GetBlitFeatures( void )
   816 {
   817     static Uint32 features = 0xffffffff;
   818     if (features == 0xffffffff) {
   819         /* Provide an override for testing .. */
   820         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   821         if (override) {
   822             features = 0;
   823             SDL_sscanf(override, "%u", &features);
   824         } else {
   825             features = ( 0
   826                 /* Feature 1 is has-MMX */
   827                 | ((SDL_HasMMX()) ? 1 : 0)
   828                 /* Feature 2 is has-AltiVec */
   829                 | ((SDL_HasAltiVec()) ? 2 : 0)
   830                 /* Feature 4 is dont-use-prefetch */
   831                 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   832                 | ((GetL3CacheSize() == 0) ? 4 : 0)
   833             );
   834         }
   835     }
   836     return features;
   837 }
   838 #else
   839 /* Feature 1 is has-MMX */
   840 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   841 #endif
   842 
   843 /* This is now endian dependent */
   844 #if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
   845 #define HI	1
   846 #define LO	0
   847 #else /* ( SDL_BYTEORDER == SDL_BIG_ENDIAN ) */
   848 #define HI	0
   849 #define LO	1
   850 #endif
   851 
   852 #ifdef USE_ASMBLIT
   853 
   854 /* Heheheh, we coerce Hermes into using SDL blit information */
   855 #define X86_ASSEMBLER
   856 #define HermesConverterInterface	SDL_BlitInfo
   857 #define HermesClearInterface		void
   858 #define STACKCALL
   859 
   860 #include "HeadMMX.h"
   861 #include "HeadX86.h"
   862 
   863 #else
   864 
   865 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   866 #define RGB888_RGB332(dst, src) { \
   867 	dst = (((src)&0x00E00000)>>16)| \
   868 	      (((src)&0x0000E000)>>11)| \
   869 	      (((src)&0x000000C0)>>6); \
   870 }
   871 static void Blit_RGB888_index8(SDL_BlitInfo *info)
   872 {
   873 #ifndef USE_DUFFS_LOOP
   874 	int c;
   875 #endif
   876 	int width, height;
   877 	Uint32 *src;
   878 	const Uint8 *map;
   879 	Uint8 *dst;
   880 	int srcskip, dstskip;
   881 
   882 	/* Set up some basic variables */
   883 	width = info->d_width;
   884 	height = info->d_height;
   885 	src = (Uint32 *)info->s_pixels;
   886 	srcskip = info->s_skip/4;
   887 	dst = info->d_pixels;
   888 	dstskip = info->d_skip;
   889 	map = info->table;
   890 
   891 	if ( map == NULL ) {
   892 		while ( height-- ) {
   893 #ifdef USE_DUFFS_LOOP
   894 			DUFFS_LOOP(
   895 				RGB888_RGB332(*dst++, *src);
   896 			, width);
   897 #else
   898 			for ( c=width/4; c; --c ) {
   899 				/* Pack RGB into 8bit pixel */
   900 				++src;
   901 				RGB888_RGB332(*dst++, *src);
   902 				++src;
   903 				RGB888_RGB332(*dst++, *src);
   904 				++src;
   905 				RGB888_RGB332(*dst++, *src);
   906 				++src;
   907 			}
   908 			switch ( width & 3 ) {
   909 				case 3:
   910 					RGB888_RGB332(*dst++, *src);
   911 					++src;
   912 				case 2:
   913 					RGB888_RGB332(*dst++, *src);
   914 					++src;
   915 				case 1:
   916 					RGB888_RGB332(*dst++, *src);
   917 					++src;
   918 			}
   919 #endif /* USE_DUFFS_LOOP */
   920 			src += srcskip;
   921 			dst += dstskip;
   922 		}
   923 	} else {
   924 		int Pixel;
   925 
   926 		while ( height-- ) {
   927 #ifdef USE_DUFFS_LOOP
   928 			DUFFS_LOOP(
   929 				RGB888_RGB332(Pixel, *src);
   930 				*dst++ = map[Pixel];
   931 				++src;
   932 			, width);
   933 #else
   934 			for ( c=width/4; c; --c ) {
   935 				/* Pack RGB into 8bit pixel */
   936 				RGB888_RGB332(Pixel, *src);
   937 				*dst++ = map[Pixel];
   938 				++src;
   939 				RGB888_RGB332(Pixel, *src);
   940 				*dst++ = map[Pixel];
   941 				++src;
   942 				RGB888_RGB332(Pixel, *src);
   943 				*dst++ = map[Pixel];
   944 				++src;
   945 				RGB888_RGB332(Pixel, *src);
   946 				*dst++ = map[Pixel];
   947 				++src;
   948 			}
   949 			switch ( width & 3 ) {
   950 				case 3:
   951 					RGB888_RGB332(Pixel, *src);
   952 					*dst++ = map[Pixel];
   953 					++src;
   954 				case 2:
   955 					RGB888_RGB332(Pixel, *src);
   956 					*dst++ = map[Pixel];
   957 					++src;
   958 				case 1:
   959 					RGB888_RGB332(Pixel, *src);
   960 					*dst++ = map[Pixel];
   961 					++src;
   962 			}
   963 #endif /* USE_DUFFS_LOOP */
   964 			src += srcskip;
   965 			dst += dstskip;
   966 		}
   967 	}
   968 }
   969 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   970 #define RGB888_RGB555(dst, src) { \
   971 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>9)| \
   972 	                   (((*src)&0x0000F800)>>6)| \
   973 	                   (((*src)&0x000000F8)>>3); \
   974 }
   975 #define RGB888_RGB555_TWO(dst, src) { \
   976 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   977 	                     (((src[HI])&0x0000F800)>>6)| \
   978 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   979 	                     (((src[LO])&0x00F80000)>>9)| \
   980 	                     (((src[LO])&0x0000F800)>>6)| \
   981 	                     (((src[LO])&0x000000F8)>>3); \
   982 }
   983 static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
   984 {
   985 #ifndef USE_DUFFS_LOOP
   986 	int c;
   987 #endif
   988 	int width, height;
   989 	Uint32 *src;
   990 	Uint16 *dst;
   991 	int srcskip, dstskip;
   992 
   993 	/* Set up some basic variables */
   994 	width = info->d_width;
   995 	height = info->d_height;
   996 	src = (Uint32 *)info->s_pixels;
   997 	srcskip = info->s_skip/4;
   998 	dst = (Uint16 *)info->d_pixels;
   999 	dstskip = info->d_skip/2;
  1000 
  1001 #ifdef USE_DUFFS_LOOP
  1002 	while ( height-- ) {
  1003 		DUFFS_LOOP(
  1004 			RGB888_RGB555(dst, src);
  1005 			++src;
  1006 			++dst;
  1007 		, width);
  1008 		src += srcskip;
  1009 		dst += dstskip;
  1010 	}
  1011 #else
  1012 	/* Memory align at 4-byte boundary, if necessary */
  1013 	if ( (long)dst & 0x03 ) {
  1014 		/* Don't do anything if width is 0 */
  1015 		if ( width == 0 ) {
  1016 			return;
  1017 		}
  1018 		--width;
  1019 
  1020 		while ( height-- ) {
  1021 			/* Perform copy alignment */
  1022 			RGB888_RGB555(dst, src);
  1023 			++src;
  1024 			++dst;
  1025 
  1026 			/* Copy in 4 pixel chunks */
  1027 			for ( c=width/4; c; --c ) {
  1028 				RGB888_RGB555_TWO(dst, src);
  1029 				src += 2;
  1030 				dst += 2;
  1031 				RGB888_RGB555_TWO(dst, src);
  1032 				src += 2;
  1033 				dst += 2;
  1034 			}
  1035 			/* Get any leftovers */
  1036 			switch (width & 3) {
  1037 				case 3:
  1038 					RGB888_RGB555(dst, src);
  1039 					++src;
  1040 					++dst;
  1041 				case 2:
  1042 					RGB888_RGB555_TWO(dst, src);
  1043 					src += 2;
  1044 					dst += 2;
  1045 					break;
  1046 				case 1:
  1047 					RGB888_RGB555(dst, src);
  1048 					++src;
  1049 					++dst;
  1050 					break;
  1051 			}
  1052 			src += srcskip;
  1053 			dst += dstskip;
  1054 		}
  1055 	} else { 
  1056 		while ( height-- ) {
  1057 			/* Copy in 4 pixel chunks */
  1058 			for ( c=width/4; c; --c ) {
  1059 				RGB888_RGB555_TWO(dst, src);
  1060 				src += 2;
  1061 				dst += 2;
  1062 				RGB888_RGB555_TWO(dst, src);
  1063 				src += 2;
  1064 				dst += 2;
  1065 			}
  1066 			/* Get any leftovers */
  1067 			switch (width & 3) {
  1068 				case 3:
  1069 					RGB888_RGB555(dst, src);
  1070 					++src;
  1071 					++dst;
  1072 				case 2:
  1073 					RGB888_RGB555_TWO(dst, src);
  1074 					src += 2;
  1075 					dst += 2;
  1076 					break;
  1077 				case 1:
  1078 					RGB888_RGB555(dst, src);
  1079 					++src;
  1080 					++dst;
  1081 					break;
  1082 			}
  1083 			src += srcskip;
  1084 			dst += dstskip;
  1085 		}
  1086 	}
  1087 #endif /* USE_DUFFS_LOOP */
  1088 }
  1089 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1090 #define RGB888_RGB565(dst, src) { \
  1091 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>8)| \
  1092 	                   (((*src)&0x0000FC00)>>5)| \
  1093 	                   (((*src)&0x000000F8)>>3); \
  1094 }
  1095 #define RGB888_RGB565_TWO(dst, src) { \
  1096 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1097 	                     (((src[HI])&0x0000FC00)>>5)| \
  1098 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1099 	                     (((src[LO])&0x00F80000)>>8)| \
  1100 	                     (((src[LO])&0x0000FC00)>>5)| \
  1101 	                     (((src[LO])&0x000000F8)>>3); \
  1102 }
  1103 static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
  1104 {
  1105 #ifndef USE_DUFFS_LOOP
  1106 	int c;
  1107 #endif
  1108 	int width, height;
  1109 	Uint32 *src;
  1110 	Uint16 *dst;
  1111 	int srcskip, dstskip;
  1112 
  1113 	/* Set up some basic variables */
  1114 	width = info->d_width;
  1115 	height = info->d_height;
  1116 	src = (Uint32 *)info->s_pixels;
  1117 	srcskip = info->s_skip/4;
  1118 	dst = (Uint16 *)info->d_pixels;
  1119 	dstskip = info->d_skip/2;
  1120 
  1121 #ifdef USE_DUFFS_LOOP
  1122 	while ( height-- ) {
  1123 		DUFFS_LOOP(
  1124 			RGB888_RGB565(dst, src);
  1125 			++src;
  1126 			++dst;
  1127 		, width);
  1128 		src += srcskip;
  1129 		dst += dstskip;
  1130 	}
  1131 #else
  1132 	/* Memory align at 4-byte boundary, if necessary */
  1133 	if ( (long)dst & 0x03 ) {
  1134 		/* Don't do anything if width is 0 */
  1135 		if ( width == 0 ) {
  1136 			return;
  1137 		}
  1138 		--width;
  1139 
  1140 		while ( height-- ) {
  1141 			/* Perform copy alignment */
  1142 			RGB888_RGB565(dst, src);
  1143 			++src;
  1144 			++dst;
  1145 
  1146 			/* Copy in 4 pixel chunks */
  1147 			for ( c=width/4; c; --c ) {
  1148 				RGB888_RGB565_TWO(dst, src);
  1149 				src += 2;
  1150 				dst += 2;
  1151 				RGB888_RGB565_TWO(dst, src);
  1152 				src += 2;
  1153 				dst += 2;
  1154 			}
  1155 			/* Get any leftovers */
  1156 			switch (width & 3) {
  1157 				case 3:
  1158 					RGB888_RGB565(dst, src);
  1159 					++src;
  1160 					++dst;
  1161 				case 2:
  1162 					RGB888_RGB565_TWO(dst, src);
  1163 					src += 2;
  1164 					dst += 2;
  1165 					break;
  1166 				case 1:
  1167 					RGB888_RGB565(dst, src);
  1168 					++src;
  1169 					++dst;
  1170 					break;
  1171 			}
  1172 			src += srcskip;
  1173 			dst += dstskip;
  1174 		}
  1175 	} else { 
  1176 		while ( height-- ) {
  1177 			/* Copy in 4 pixel chunks */
  1178 			for ( c=width/4; c; --c ) {
  1179 				RGB888_RGB565_TWO(dst, src);
  1180 				src += 2;
  1181 				dst += 2;
  1182 				RGB888_RGB565_TWO(dst, src);
  1183 				src += 2;
  1184 				dst += 2;
  1185 			}
  1186 			/* Get any leftovers */
  1187 			switch (width & 3) {
  1188 				case 3:
  1189 					RGB888_RGB565(dst, src);
  1190 					++src;
  1191 					++dst;
  1192 				case 2:
  1193 					RGB888_RGB565_TWO(dst, src);
  1194 					src += 2;
  1195 					dst += 2;
  1196 					break;
  1197 				case 1:
  1198 					RGB888_RGB565(dst, src);
  1199 					++src;
  1200 					++dst;
  1201 					break;
  1202 			}
  1203 			src += srcskip;
  1204 			dst += dstskip;
  1205 		}
  1206 	}
  1207 #endif /* USE_DUFFS_LOOP */
  1208 }
  1209 
  1210 #endif /* USE_ASMBLIT */
  1211 
  1212 
  1213 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1214 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1215 static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
  1216 {
  1217 #ifndef USE_DUFFS_LOOP
  1218 	int c;
  1219 #endif
  1220 	int width, height;
  1221 	Uint8 *src;
  1222 	Uint32 *dst;
  1223 	int srcskip, dstskip;
  1224 
  1225 	/* Set up some basic variables */
  1226 	width = info->d_width;
  1227 	height = info->d_height;
  1228 	src = (Uint8 *)info->s_pixels;
  1229 	srcskip = info->s_skip;
  1230 	dst = (Uint32 *)info->d_pixels;
  1231 	dstskip = info->d_skip/4;
  1232 
  1233 #ifdef USE_DUFFS_LOOP
  1234 	while ( height-- ) {
  1235 		DUFFS_LOOP(
  1236 		{
  1237 			*dst++ = RGB565_32(dst, src, map);
  1238 			src += 2;
  1239 		},
  1240 		width);
  1241 		src += srcskip;
  1242 		dst += dstskip;
  1243 	}
  1244 #else
  1245 	while ( height-- ) {
  1246 		/* Copy in 4 pixel chunks */
  1247 		for ( c=width/4; c; --c ) {
  1248 			*dst++ = RGB565_32(dst, src, map);
  1249 			src += 2;
  1250 			*dst++ = RGB565_32(dst, src, map);
  1251 			src += 2;
  1252 			*dst++ = RGB565_32(dst, src, map);
  1253 			src += 2;
  1254 			*dst++ = RGB565_32(dst, src, map);
  1255 			src += 2;
  1256 		}
  1257 		/* Get any leftovers */
  1258 		switch (width & 3) {
  1259 			case 3:
  1260 				*dst++ = RGB565_32(dst, src, map);
  1261 				src += 2;
  1262 			case 2:
  1263 				*dst++ = RGB565_32(dst, src, map);
  1264 				src += 2;
  1265 			case 1:
  1266 				*dst++ = RGB565_32(dst, src, map);
  1267 				src += 2;
  1268 				break;
  1269 		}
  1270 		src += srcskip;
  1271 		dst += dstskip;
  1272 	}
  1273 #endif /* USE_DUFFS_LOOP */
  1274 }
  1275 
  1276 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1277 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1278 		0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1279 		0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1280 		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1281 		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1282 		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1283 		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1284 		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1285 		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1286 		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1287 		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1288 		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1289 		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1290 		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1291 		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1292 		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1293 		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1294 		0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1295 		0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1296 		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1297 		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1298 		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1299 		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1300 		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1301 		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1302 		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1303 		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1304 		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1305 		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1306 		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1307 		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1308 		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1309 		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1310 		0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1311 		0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1312 		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1313 		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1314 		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1315 		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1316 		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1317 		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1318 		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1319 		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1320 		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1321 		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1322 		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1323 		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1324 		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1325 		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1326 		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1327 		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1328 		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1329 		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1330 		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1331 		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1332 		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1333 		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1334 		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1335 		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1336 		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1337 		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1338 		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1339 		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1340 		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1341 		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1342 		0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1343 		0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1344 		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1345 		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1346 		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1347 		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1348 		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1349 		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1350 		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1351 		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1352 		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1353 		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1354 		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1355 		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1356 		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1357 		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1358 		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1359 		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1360 		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1361 		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1362 		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1363 		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1364 		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1365 		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1366 		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1367 		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1368 		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1369 		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1370 		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1371 		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1372 		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1373 		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1374 		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1375 		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1376 		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1377 		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1378 		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1379 		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1380 		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1381 		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1382 		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1383 		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1384 		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1385 		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1386 		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1387 		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1388 		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1389 		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1390 		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1391 		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1392 		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1393 		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1394 		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1395 		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1396 		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1397 		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1398 		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1399 		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1400 		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1401 		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1402 		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1403 		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1404 		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1405 		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1406 };
  1407 static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
  1408 {
  1409     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1410 }
  1411 
  1412 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1413 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1414 		0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1415 		0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1416 		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1417 		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1418 		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1419 		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1420 		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1421 		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1422 		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1423 		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1424 		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1425 		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1426 		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1427 		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1428 		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1429 		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1430 		0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1431 		0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1432 		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1433 		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1434 		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1435 		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1436 		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1437 		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1438 		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1439 		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1440 		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1441 		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1442 		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1443 		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1444 		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1445 		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1446 		0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1447 		0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1448 		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1449 		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1450 		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1451 		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1452 		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1453 		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1454 		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1455 		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1456 		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1457 		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1458 		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1459 		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1460 		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1461 		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1462 		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1463 		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1464 		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1465 		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1466 		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1467 		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1468 		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1469 		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1470 		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1471 		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1472 		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1473 		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1474 		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1475 		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1476 		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1477 		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1478 		0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1479 		0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1480 		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1481 		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1482 		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1483 		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1484 		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1485 		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1486 		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1487 		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1488 		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1489 		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1490 		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1491 		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1492 		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1493 		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1494 		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1495 		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1496 		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1497 		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1498 		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1499 		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1500 		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1501 		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1502 		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1503 		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1504 		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1505 		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1506 		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1507 		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1508 		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1509 		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1510 		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1511 		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1512 		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1513 		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1514 		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1515 		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1516 		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1517 		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1518 		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1519 		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1520 		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1521 		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1522 		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1523 		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1524 		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1525 		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1526 		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1527 		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1528 		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1529 		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1530 		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1531 		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1532 		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1533 		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1534 		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1535 		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1536 		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1537 		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1538 		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1539 		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1540 		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1541 		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1542 };
  1543 static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
  1544 {
  1545     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1546 }
  1547 
  1548 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1549 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1550 		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1551 		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1552 		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1553 		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1554 		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1555 		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1556 		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1557 		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1558 		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1559 		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1560 		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1561 		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1562 		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1563 		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1564 		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1565 		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1566 		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1567 		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1568 		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1569 		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1570 		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1571 		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1572 		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1573 		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1574 		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1575 		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1576 		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1577 		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1578 		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1579 		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1580 		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1581 		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1582 		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1583 		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1584 		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1585 		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1586 		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1587 		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1588 		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1589 		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1590 		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1591 		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1592 		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1593 		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1594 		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1595 		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1596 		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1597 		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1598 		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1599 		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1600 		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1601 		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1602 		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1603 		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1604 		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1605 		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1606 		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1607 		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1608 		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1609 		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1610 		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1611 		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1612 		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1613 		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1614 		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1615 		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1616 		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1617 		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1618 		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1619 		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1620 		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1621 		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1622 		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1623 		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1624 		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1625 		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1626 		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1627 		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1628 		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1629 		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1630 		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1631 		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1632 		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1633 		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1634 		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1635 		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1636 		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1637 		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1638 		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1639 		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1640 		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1641 		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1642 		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1643 		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1644 		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1645 		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1646 		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1647 		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1648 		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1649 		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1650 		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1651 		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1652 		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1653 		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1654 		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1655 		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1656 		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1657 		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1658 		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1659 		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1660 		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1661 		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1662 		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1663 		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1664 		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1665 		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1666 		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1667 		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1668 		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1669 		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1670 		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1671 		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1672 		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1673 		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1674 		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1675 		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1676 		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1677 		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1678 };
  1679 static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
  1680 {
  1681     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1682 }
  1683 
  1684 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1685 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1686 		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1687 		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1688 		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1689 		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1690 		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1691 		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1692 		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1693 		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1694 		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1695 		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1696 		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1697 		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1698 		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1699 		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1700 		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1701 		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1702 		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1703 		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1704 		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1705 		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1706 		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1707 		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1708 		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1709 		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1710 		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1711 		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1712 		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1713 		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1714 		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1715 		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1716 		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1717 		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1718 		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1719 		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1720 		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1721 		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1722 		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1723 		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1724 		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1725 		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1726 		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1727 		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1728 		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1729 		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1730 		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1731 		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1732 		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1733 		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1734 		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1735 		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1736 		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1737 		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1738 		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1739 		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1740 		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1741 		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1742 		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1743 		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1744 		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1745 		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1746 		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1747 		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1748 		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1749 		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1750 		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1751 		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1752 		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1753 		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1754 		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1755 		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1756 		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1757 		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1758 		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1759 		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1760 		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1761 		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1762 		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1763 		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1764 		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1765 		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1766 		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1767 		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1768 		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1769 		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1770 		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1771 		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1772 		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1773 		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1774 		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1775 		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1776 		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1777 		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1778 		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1779 		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1780 		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1781 		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1782 		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1783 		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1784 		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1785 		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1786 		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1787 		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1788 		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1789 		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1790 		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1791 		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1792 		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1793 		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1794 		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1795 		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1796 		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1797 		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1798 		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1799 		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1800 		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1801 		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1802 		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1803 		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1804 		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1805 		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1806 		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1807 		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1808 		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1809 		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1810 		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1811 		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1812 		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1813 		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1814 };
  1815 static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
  1816 {
  1817     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1818 }
  1819 
  1820 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1821 #ifndef RGB888_RGB332
  1822 #define RGB888_RGB332(dst, src) { \
  1823 	dst = (((src)&0x00E00000)>>16)| \
  1824 	      (((src)&0x0000E000)>>11)| \
  1825 	      (((src)&0x000000C0)>>6); \
  1826 }
  1827 #endif
  1828 static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
  1829 {
  1830 #ifndef USE_DUFFS_LOOP
  1831 	int c;
  1832 #endif
  1833 	int Pixel;
  1834 	int width, height;
  1835 	Uint32 *src;
  1836 	const Uint8 *map;
  1837 	Uint8 *dst;
  1838 	int srcskip, dstskip;
  1839 
  1840 	/* Set up some basic variables */
  1841 	width = info->d_width;
  1842 	height = info->d_height;
  1843 	src = (Uint32 *)info->s_pixels;
  1844 	srcskip = info->s_skip/4;
  1845 	dst = info->d_pixels;
  1846 	dstskip = info->d_skip;
  1847 	map = info->table;
  1848 
  1849 #ifdef USE_DUFFS_LOOP
  1850 	while ( height-- ) {
  1851 		DUFFS_LOOP(
  1852 			RGB888_RGB332(Pixel, *src);
  1853 			*dst++ = map[Pixel];
  1854 			++src;
  1855 		, width);
  1856 		src += srcskip;
  1857 		dst += dstskip;
  1858 	}
  1859 #else
  1860 	while ( height-- ) {
  1861 		for ( c=width/4; c; --c ) {
  1862 			/* Pack RGB into 8bit pixel */
  1863 			RGB888_RGB332(Pixel, *src);
  1864 			*dst++ = map[Pixel];
  1865 			++src;
  1866 			RGB888_RGB332(Pixel, *src);
  1867 			*dst++ = map[Pixel];
  1868 			++src;
  1869 			RGB888_RGB332(Pixel, *src);
  1870 			*dst++ = map[Pixel];
  1871 			++src;
  1872 			RGB888_RGB332(Pixel, *src);
  1873 			*dst++ = map[Pixel];
  1874 			++src;
  1875 		}
  1876 		switch ( width & 3 ) {
  1877 			case 3:
  1878 				RGB888_RGB332(Pixel, *src);
  1879 				*dst++ = map[Pixel];
  1880 				++src;
  1881 			case 2:
  1882 				RGB888_RGB332(Pixel, *src);
  1883 				*dst++ = map[Pixel];
  1884 				++src;
  1885 			case 1:
  1886 				RGB888_RGB332(Pixel, *src);
  1887 				*dst++ = map[Pixel];
  1888 				++src;
  1889 		}
  1890 		src += srcskip;
  1891 		dst += dstskip;
  1892 	}
  1893 #endif /* USE_DUFFS_LOOP */
  1894 }
  1895 static void BlitNto1(SDL_BlitInfo *info)
  1896 {
  1897 #ifndef USE_DUFFS_LOOP
  1898 	int c;
  1899 #endif
  1900 	int width, height;
  1901 	Uint8 *src;
  1902 	const Uint8 *map;
  1903 	Uint8 *dst;
  1904 	int srcskip, dstskip;
  1905 	int srcbpp;
  1906 	Uint32 Pixel;
  1907 	int  sR, sG, sB;
  1908 	SDL_PixelFormat *srcfmt;
  1909 
  1910 	/* Set up some basic variables */
  1911 	width = info->d_width;
  1912 	height = info->d_height;
  1913 	src = info->s_pixels;
  1914 	srcskip = info->s_skip;
  1915 	dst = info->d_pixels;
  1916 	dstskip = info->d_skip;
  1917 	map = info->table;
  1918 	srcfmt = info->src;
  1919 	srcbpp = srcfmt->BytesPerPixel;
  1920 
  1921 	if ( map == NULL ) {
  1922 		while ( height-- ) {
  1923 #ifdef USE_DUFFS_LOOP
  1924 			DUFFS_LOOP(
  1925 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1926 								sR, sG, sB);
  1927 				if ( 1 ) {
  1928 				  	/* Pack RGB into 8bit pixel */
  1929 				  	*dst = ((sR>>5)<<(3+2))|
  1930 					        ((sG>>5)<<(2)) |
  1931 					        ((sB>>6)<<(0)) ;
  1932 				}
  1933 				dst++;
  1934 				src += srcbpp;
  1935 			, width);
  1936 #else
  1937 			for ( c=width; c; --c ) {
  1938 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1939 								sR, sG, sB);
  1940 				if ( 1 ) {
  1941 				  	/* Pack RGB into 8bit pixel */
  1942 				  	*dst = ((sR>>5)<<(3+2))|
  1943 					        ((sG>>5)<<(2)) |
  1944 					        ((sB>>6)<<(0)) ;
  1945 				}
  1946 				dst++;
  1947 				src += srcbpp;
  1948 			}
  1949 #endif
  1950 			src += srcskip;
  1951 			dst += dstskip;
  1952 		}
  1953 	} else {
  1954 		while ( height-- ) {
  1955 #ifdef USE_DUFFS_LOOP
  1956 			DUFFS_LOOP(
  1957 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1958 								sR, sG, sB);
  1959 				if ( 1 ) {
  1960 				  	/* Pack RGB into 8bit pixel */
  1961 				  	*dst = map[((sR>>5)<<(3+2))|
  1962 						   ((sG>>5)<<(2))  |
  1963 						   ((sB>>6)<<(0))  ];
  1964 				}
  1965 				dst++;
  1966 				src += srcbpp;
  1967 			, width);
  1968 #else
  1969 			for ( c=width; c; --c ) {
  1970 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1971 								sR, sG, sB);
  1972 				if ( 1 ) {
  1973 				  	/* Pack RGB into 8bit pixel */
  1974 				  	*dst = map[((sR>>5)<<(3+2))|
  1975 						   ((sG>>5)<<(2))  |
  1976 						   ((sB>>6)<<(0))  ];
  1977 				}
  1978 				dst++;
  1979 				src += srcbpp;
  1980 			}
  1981 #endif /* USE_DUFFS_LOOP */
  1982 			src += srcskip;
  1983 			dst += dstskip;
  1984 		}
  1985 	}
  1986 }
  1987 
  1988 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  1989 static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
  1990 {
  1991 	int width = info->d_width;
  1992 	int height = info->d_height;
  1993 	Uint32 *src = (Uint32 *)info->s_pixels;
  1994 	int srcskip = info->s_skip;
  1995 	Uint32 *dst = (Uint32 *)info->d_pixels;
  1996 	int dstskip = info->d_skip;
  1997 	SDL_PixelFormat *srcfmt = info->src;
  1998 	SDL_PixelFormat *dstfmt = info->dst;
  1999 
  2000 	if (dstfmt->Amask) {
  2001 		/* RGB->RGBA, SET_ALPHA */
  2002 		Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
  2003 
  2004 		while ( height-- ) {
  2005 			DUFFS_LOOP(
  2006 			{
  2007 				*dst = *src | mask;
  2008 				++dst;
  2009 				++src;
  2010 			},
  2011 			width);
  2012 			src = (Uint32*)((Uint8*)src + srcskip);
  2013 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2014 		}
  2015 	} else {
  2016 		/* RGBA->RGB, NO_ALPHA */
  2017 		Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2018 
  2019 		while ( height-- ) {
  2020 			DUFFS_LOOP(
  2021 			{
  2022 				*dst = *src & mask;
  2023 				++dst;
  2024 				++src;
  2025 			},
  2026 			width);
  2027 			src = (Uint32*)((Uint8*)src + srcskip);
  2028 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2029 		}
  2030 	}
  2031 }
  2032 
  2033 static void BlitNtoN(SDL_BlitInfo *info)
  2034 {
  2035 	int width = info->d_width;
  2036 	int height = info->d_height;
  2037 	Uint8 *src = info->s_pixels;
  2038 	int srcskip = info->s_skip;
  2039 	Uint8 *dst = info->d_pixels;
  2040 	int dstskip = info->d_skip;
  2041 	SDL_PixelFormat *srcfmt = info->src;
  2042 	int srcbpp = srcfmt->BytesPerPixel;
  2043 	SDL_PixelFormat *dstfmt = info->dst;
  2044 	int dstbpp = dstfmt->BytesPerPixel;
  2045 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2046 
  2047 	while ( height-- ) {
  2048 		DUFFS_LOOP(
  2049 		{
  2050 		        Uint32 Pixel;
  2051 			unsigned sR;
  2052 			unsigned sG;
  2053 			unsigned sB;
  2054 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2055 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2056 			dst += dstbpp;
  2057 			src += srcbpp;
  2058 		},
  2059 		width);
  2060 		src += srcskip;
  2061 		dst += dstskip;
  2062 	}
  2063 }
  2064 
  2065 static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
  2066 {
  2067 	int width = info->d_width;
  2068 	int height = info->d_height;
  2069 	Uint8 *src = info->s_pixels;
  2070 	int srcskip = info->s_skip;
  2071 	Uint8 *dst = info->d_pixels;
  2072 	int dstskip = info->d_skip;
  2073 	SDL_PixelFormat *srcfmt = info->src;
  2074 	int srcbpp = srcfmt->BytesPerPixel;
  2075 	SDL_PixelFormat *dstfmt = info->dst;
  2076 	int dstbpp = dstfmt->BytesPerPixel;
  2077 	int c;
  2078 
  2079 	/* FIXME: should map alpha to [0..255] correctly! */
  2080 	while ( height-- ) {
  2081 		for ( c=width; c; --c ) {
  2082 		        Uint32 Pixel;
  2083 			unsigned sR, sG, sB, sA;
  2084 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2085 				      sR, sG, sB, sA);
  2086 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2087 				      sR, sG, sB, sA);
  2088 			dst += dstbpp;
  2089 			src += srcbpp;
  2090 		}
  2091 		src += srcskip;
  2092 		dst += dstskip;
  2093 	}
  2094 }
  2095 
  2096 static void BlitNto1Key(SDL_BlitInfo *info)
  2097 {
  2098 	int width = info->d_width;
  2099 	int height = info->d_height;
  2100 	Uint8 *src = info->s_pixels;
  2101 	int srcskip = info->s_skip;
  2102 	Uint8 *dst = info->d_pixels;
  2103 	int dstskip = info->d_skip;
  2104 	SDL_PixelFormat *srcfmt = info->src;
  2105 	const Uint8 *palmap = info->table;
  2106 	Uint32 ckey = srcfmt->colorkey;
  2107 	Uint32 rgbmask = ~srcfmt->Amask;
  2108 	int srcbpp;
  2109 	Uint32 Pixel;
  2110 	Uint8  sR, sG, sB;
  2111 
  2112 	/* Set up some basic variables */
  2113 	srcbpp = srcfmt->BytesPerPixel;
  2114 	ckey &= rgbmask;
  2115 
  2116 	if ( palmap == NULL ) {
  2117 		while ( height-- ) {
  2118 			DUFFS_LOOP(
  2119 			{
  2120 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2121 								sR, sG, sB);
  2122 				if ( (Pixel & rgbmask) != ckey ) {
  2123 				  	/* Pack RGB into 8bit pixel */
  2124 				  	*dst = ((sR>>5)<<(3+2))|
  2125 						((sG>>5)<<(2)) |
  2126 						((sB>>6)<<(0)) ;
  2127 				}
  2128 				dst++;
  2129 				src += srcbpp;
  2130 			},
  2131 			width);
  2132 			src += srcskip;
  2133 			dst += dstskip;
  2134 		}
  2135 	} else {
  2136 		while ( height-- ) {
  2137 			DUFFS_LOOP(
  2138 			{
  2139 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2140 								sR, sG, sB);
  2141 				if ( (Pixel & rgbmask) != ckey ) {
  2142 				  	/* Pack RGB into 8bit pixel */
  2143 				  	*dst = palmap[((sR>>5)<<(3+2))|
  2144 							((sG>>5)<<(2))  |
  2145 							((sB>>6)<<(0))  ];
  2146 				}
  2147 				dst++;
  2148 				src += srcbpp;
  2149 			},
  2150 			width);
  2151 			src += srcskip;
  2152 			dst += dstskip;
  2153 		}
  2154 	}
  2155 }
  2156 
  2157 static void Blit2to2Key(SDL_BlitInfo *info)
  2158 {
  2159 	int width = info->d_width;
  2160 	int height = info->d_height;
  2161 	Uint16 *srcp = (Uint16 *)info->s_pixels;
  2162 	int srcskip = info->s_skip;
  2163 	Uint16 *dstp = (Uint16 *)info->d_pixels;
  2164 	int dstskip = info->d_skip;
  2165 	Uint32 ckey = info->src->colorkey;
  2166 	Uint32 rgbmask = ~info->src->Amask;
  2167 
  2168 	/* Set up some basic variables */
  2169         srcskip /= 2;
  2170         dstskip /= 2;
  2171 	ckey &= rgbmask;
  2172 
  2173 	while ( height-- ) {
  2174 		DUFFS_LOOP(
  2175 		{
  2176 			if ( (*srcp & rgbmask) != ckey ) {
  2177 				*dstp = *srcp;
  2178 			}
  2179 			dstp++;
  2180 			srcp++;
  2181 		},
  2182 		width);
  2183 		srcp += srcskip;
  2184 		dstp += dstskip;
  2185 	}
  2186 }
  2187 
  2188 static void BlitNtoNKey(SDL_BlitInfo *info)
  2189 {
  2190 	int width = info->d_width;
  2191 	int height = info->d_height;
  2192 	Uint8 *src = info->s_pixels;
  2193 	int srcskip = info->s_skip;
  2194 	Uint8 *dst = info->d_pixels;
  2195 	int dstskip = info->d_skip;
  2196 	Uint32 ckey = info->src->colorkey;
  2197 	SDL_PixelFormat *srcfmt = info->src;
  2198 	SDL_PixelFormat *dstfmt = info->dst;
  2199 	int srcbpp = srcfmt->BytesPerPixel;
  2200 	int dstbpp = dstfmt->BytesPerPixel;
  2201 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2202 
  2203 	while ( height-- ) {
  2204 		DUFFS_LOOP(
  2205 		{
  2206 		        Uint32 Pixel;
  2207 			unsigned sR;
  2208 			unsigned sG;
  2209 			unsigned sB;
  2210 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2211 			if ( Pixel != ckey ) {
  2212 			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2213 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2214 					      sR, sG, sB, alpha);
  2215 			}
  2216 			dst += dstbpp;
  2217 			src += srcbpp;
  2218 		},
  2219 		width);
  2220 		src += srcskip;
  2221 		dst += dstskip;
  2222 	}
  2223 }
  2224 
  2225 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
  2226 {
  2227 	int width = info->d_width;
  2228 	int height = info->d_height;
  2229 	Uint8 *src = info->s_pixels;
  2230 	int srcskip = info->s_skip;
  2231 	Uint8 *dst = info->d_pixels;
  2232 	int dstskip = info->d_skip;
  2233 	Uint32 ckey = info->src->colorkey;
  2234 	SDL_PixelFormat *srcfmt = info->src;
  2235 	SDL_PixelFormat *dstfmt = info->dst;
  2236 	Uint32 rgbmask = ~srcfmt->Amask;
  2237 
  2238 	Uint8 srcbpp;
  2239 	Uint8 dstbpp;
  2240 	Uint32 Pixel;
  2241 	Uint8  sR, sG, sB, sA;
  2242 
  2243 	/* Set up some basic variables */
  2244 	srcbpp = srcfmt->BytesPerPixel;
  2245 	dstbpp = dstfmt->BytesPerPixel;
  2246 	ckey &= rgbmask;
  2247 
  2248 	/* FIXME: should map alpha to [0..255] correctly! */
  2249 	while ( height-- ) {
  2250 		DUFFS_LOOP(
  2251 		{
  2252 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2253 				      sR, sG, sB, sA);
  2254 			if ( (Pixel & rgbmask) != ckey ) {
  2255 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2256 						sR, sG, sB, sA);
  2257 			}
  2258 			dst += dstbpp;
  2259 			src += srcbpp;
  2260 		},
  2261 		width);
  2262 		src += srcskip;
  2263 		dst += dstskip;
  2264 	}
  2265 }
  2266 
  2267 /* Normal N to N optimized blitters */
  2268 struct blit_table {
  2269 	Uint32 srcR, srcG, srcB;
  2270 	int dstbpp;
  2271 	Uint32 dstR, dstG, dstB;
  2272 	Uint32 blit_features;
  2273 	void *aux_data;
  2274 	SDL_loblit blitfunc;
  2275 	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
  2276 };
  2277 static const struct blit_table normal_blit_1[] = {
  2278 	/* Default for 8-bit RGB source, an invalid combination */
  2279 	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
  2280 };
  2281 static const struct blit_table normal_blit_2[] = {
  2282 #ifdef USE_ASMBLIT
  2283     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
  2284       0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
  2285     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
  2286       0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
  2287     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
  2288       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
  2289 #endif
  2290 #ifdef USE_ALTIVEC_BLITTERS
  2291     /* has-altivec */
  2292     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2293       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2294     { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2295       2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2296 #endif
  2297     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
  2298       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
  2299     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2300       0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
  2301     { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2302       0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
  2303     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2304       0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
  2305 
  2306     /* Default for 16-bit RGB source, used if no other blitter matches */
  2307     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2308 };
  2309 static const struct blit_table normal_blit_3[] = {
  2310 	/* Default for 24-bit RGB source, never optimized */
  2311     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2312 };
  2313 static const struct blit_table normal_blit_4[] = {
  2314 #ifdef USE_ASMBLIT
  2315     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2316       1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
  2317     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2318       0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
  2319     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2320       1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
  2321     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2322       0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
  2323     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2324       1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
  2325     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2326       0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
  2327     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2328       1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
  2329     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2330       0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
  2331     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
  2332       0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
  2333     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
  2334       0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
  2335     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2336       0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
  2337     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2338       0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
  2339     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2340       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
  2341 #else
  2342 #ifdef USE_ALTIVEC_BLITTERS
  2343     /* has-altivec | dont-use-prefetch */
  2344     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2345       6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2346     /* has-altivec */
  2347     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2348       2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2349     /* has-altivec */
  2350     { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
  2351       2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
  2352 #endif
  2353     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2354       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
  2355     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2356       0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
  2357 #endif
  2358 	/* Default for 32-bit RGB source, used if no other blitter matches */
  2359 	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2360 };
  2361 static const struct blit_table *normal_blit[] = {
  2362 	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2363 };
  2364 
  2365 /* Mask matches table, or table entry is zero */
  2366 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2367 
  2368 SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
  2369 {
  2370 	struct private_swaccel *sdata;
  2371 	SDL_PixelFormat *srcfmt;
  2372 	SDL_PixelFormat *dstfmt;
  2373 	const struct blit_table *table;
  2374 	int which;
  2375 	SDL_loblit blitfun;
  2376 
  2377 	/* Set up data for choosing the blit */
  2378 	sdata = surface->map->sw_data;
  2379 	srcfmt = surface->format;
  2380 	dstfmt = surface->map->dst->format;
  2381 
  2382 	if ( blit_index & 2 ) {
  2383 	        /* alpha or alpha+colorkey */
  2384 	        return SDL_CalculateAlphaBlit(surface, blit_index);
  2385 	}
  2386 
  2387 	/* We don't support destinations less than 8-bits */
  2388 	if ( dstfmt->BitsPerPixel < 8 ) {
  2389 		return(NULL);
  2390 	}
  2391 	
  2392 	if(blit_index == 1) {
  2393 	    /* colorkey blit: Here we don't have too many options, mostly
  2394 	       because RLE is the preferred fast way to deal with this.
  2395 	       If a particular case turns out to be useful we'll add it. */
  2396 
  2397 	    if(srcfmt->BytesPerPixel == 2
  2398 	       && surface->map->identity)
  2399 		return Blit2to2Key;
  2400 	    else if(dstfmt->BytesPerPixel == 1)
  2401 		return BlitNto1Key;
  2402 	    else {
  2403 #ifdef USE_ALTIVEC_BLITTERS
  2404         if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
  2405             return Blit32to32KeyAltivec;
  2406         } else
  2407 #endif
  2408 
  2409 		if(srcfmt->Amask && dstfmt->Amask)
  2410 		    return BlitNtoNKeyCopyAlpha;
  2411 		else
  2412 		    return BlitNtoNKey;
  2413 	    }
  2414 	}
  2415 
  2416 	blitfun = NULL;
  2417 	if ( dstfmt->BitsPerPixel == 8 ) {
  2418 		/* We assume 8-bit destinations are palettized */
  2419 		if ( (srcfmt->BytesPerPixel == 4) &&
  2420 		     (srcfmt->Rmask == 0x00FF0000) &&
  2421 		     (srcfmt->Gmask == 0x0000FF00) &&
  2422 		     (srcfmt->Bmask == 0x000000FF) ) {
  2423 			if ( surface->map->table ) {
  2424 				blitfun = Blit_RGB888_index8_map;
  2425 			} else {
  2426 #ifdef USE_ASMBLIT
  2427 				sdata->aux_data = ConvertX86p32_8RGB332;
  2428 				blitfun = ConvertX86;
  2429 #else
  2430 				blitfun = Blit_RGB888_index8;
  2431 #endif
  2432 			}
  2433 		} else {
  2434 			blitfun = BlitNto1;
  2435 		}
  2436 	} else {
  2437 		/* Now the meat, choose the blitter we want */
  2438 		int a_need = 0;
  2439 		if(dstfmt->Amask)
  2440 		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2441 		table = normal_blit[srcfmt->BytesPerPixel-1];
  2442 		for ( which=0; table[which].dstbpp; ++which ) {
  2443 			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2444 			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2445 			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2446 			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2447 			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2448 			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2449 			    dstfmt->BytesPerPixel == table[which].dstbpp &&
  2450 			    (a_need & table[which].alpha) == a_need &&
  2451 			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
  2452 				break;
  2453 		}
  2454 		sdata->aux_data = table[which].aux_data;
  2455 		blitfun = table[which].blitfunc;
  2456 
  2457 		if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2458 			/* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2459 			if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
  2460 			     srcfmt->Rmask == dstfmt->Rmask &&
  2461 			     srcfmt->Gmask == dstfmt->Gmask &&
  2462 			     srcfmt->Bmask == dstfmt->Bmask ) {
  2463 				blitfun = Blit4to4MaskAlpha;
  2464 			} else if ( a_need == COPY_ALPHA ) {
  2465 			    blitfun = BlitNtoNCopyAlpha;
  2466 			}
  2467 		}
  2468 	}
  2469 
  2470 #ifdef DEBUG_ASM
  2471 #ifdef USE_ASMBLIT
  2472 	if ( blitfun == ConvertMMX )
  2473 		fprintf(stderr, "Using mmx blit\n");
  2474 	else
  2475 	if ( blitfun == ConvertX86 )
  2476 		fprintf(stderr, "Using asm blit\n");
  2477 	else
  2478 #endif
  2479 	if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
  2480 		fprintf(stderr, "Using C blit\n");
  2481 	else
  2482 		fprintf(stderr, "Using optimized C blit\n");
  2483 #endif /* DEBUG_ASM */
  2484 
  2485 	return(blitfun);
  2486 }