src/video/SDL_blit_N.c
author Ryan C. Gordon <icculus@icculus.org>
Thu, 20 Oct 2005 06:55:26 +0000
changeset 1162 2651158f59b8
parent 1139 d0ae4dff7208
child 1175 867f521591e5
permissions -rw-r--r--
Enable altivec blitters on PowerPC Linux, and some fixes for recent
GCCs versions.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2004 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Library General Public
     7     License as published by the Free Software Foundation; either
     8     version 2 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Library General Public License for more details.
    14 
    15     You should have received a copy of the GNU Library General Public
    16     License along with this library; if not, write to the Free
    17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 #ifdef SAVE_RCSID
    24 static char rcsid =
    25  "@(#) $Id$";
    26 #endif
    27 
    28 #include <stdio.h>
    29 
    30 #include "SDL_types.h"
    31 #include "SDL_video.h"
    32 #include "SDL_blit.h"
    33 #include "SDL_byteorder.h"
    34 #include "SDL_cpuinfo.h"
    35 
    36 /* Functions to blit from N-bit surfaces to other surfaces */
    37 
    38 #ifdef USE_ALTIVEC_BLITTERS
    39 #include <altivec.h>
    40 #include <assert.h>
    41 #include <stdlib.h>
    42 #ifdef MACOSX
    43 #include <sys/sysctl.h>
    44 static size_t GetL3CacheSize( void )
    45 {
    46     const char key[] = "hw.l3cachesize";
    47     u_int64_t result = 0;
    48     size_t typeSize = sizeof( result );
    49 
    50 
    51     int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
    52     if( 0 != err ) return 0;
    53 
    54     return result;
    55 }
    56 #else
    57 static size_t GetL3CacheSize( void )
    58 {
    59     /* XXX: Just guess G4 */
    60     return 2097152;
    61 }
    62 #endif /* MACOSX */
    63 
    64 #if ((defined MACOSX) && (__GNUC__ < 4))
    65     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    66         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    67     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    68         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    69 #else
    70     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    71         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    72     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    73         (vector unsigned short) { a,b,c,d,e,f,g,h }
    74 #endif
    75 
    76 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    77 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    78                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    79                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    80                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    81                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    82 
    83 #define MAKE8888(dstfmt, r, g, b, a)  \
    84     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    85       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    86       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    87       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    88 
    89 /*
    90  * Data Stream Touch...Altivec cache prefetching.
    91  *
    92  *  Don't use this on a G5...however, the speed boost is very significant
    93  *   on a G4.
    94  */
    95 #define DST_CHAN_SRC 1
    96 #define DST_CHAN_DEST 2
    97 
    98 /* macro to set DST control word value... */
    99 #define DST_CTRL(size, count, stride) \
   100     (((size) << 24) | ((count) << 16) | (stride))
   101 
   102 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
   103     ? vec_lvsl(0, src) \
   104     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   105 
   106 /* Calculate the permute vector used for 32->32 swizzling */
   107 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
   108                                   const SDL_PixelFormat *dstfmt)
   109 {
   110     /*
   111     * We have to assume that the bits that aren't used by other
   112      *  colors is alpha, and it's one complete byte, since some formats
   113      *  leave alpha with a zero mask, but we should still swizzle the bits.
   114      */
   115     /* ARGB */
   116     const static struct SDL_PixelFormat default_pixel_format = {
   117         NULL, 0, 0,
   118         0, 0, 0, 0,
   119         16, 8, 0, 24,
   120         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   121         0, 0};
   122     if (!srcfmt) {
   123         srcfmt = &default_pixel_format;
   124     }
   125     if (!dstfmt) {
   126         dstfmt = &default_pixel_format;
   127     }
   128     vector unsigned char plus = VECUINT8_LITERAL(
   129                                       0x00, 0x00, 0x00, 0x00,
   130                                       0x04, 0x04, 0x04, 0x04,
   131                                       0x08, 0x08, 0x08, 0x08,
   132                                       0x0C, 0x0C, 0x0C, 0x0C );
   133     vector unsigned char vswiz;
   134     vector unsigned int srcvec;
   135 #define RESHIFT(X) (3 - ((X) >> 3))
   136     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   137     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   138     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   139     Uint32 amask;
   140     /* Use zero for alpha if either surface doesn't have alpha */
   141     if (dstfmt->Amask) {
   142         amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
   143     } else {    
   144         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
   145     }           
   146 #undef RESHIFT  
   147     ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
   148     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
   149     return(vswiz);
   150 }
   151 
   152 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
   153 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
   154     int height = info->d_height;
   155     Uint8 *src = (Uint8 *) info->s_pixels;
   156     int srcskip = info->s_skip;
   157     Uint8 *dst = (Uint8 *) info->d_pixels;
   158     int dstskip = info->d_skip;
   159     SDL_PixelFormat *srcfmt = info->src;
   160     vector unsigned char valpha = vec_splat_u8(0);
   161     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   162     vector unsigned char vgmerge = VECUINT8_LITERAL(
   163         0x00, 0x02, 0x00, 0x06,
   164         0x00, 0x0a, 0x00, 0x0e,
   165         0x00, 0x12, 0x00, 0x16,
   166         0x00, 0x1a, 0x00, 0x1e);
   167     vector unsigned short v1 = vec_splat_u16(1);
   168     vector unsigned short v3 = vec_splat_u16(3);
   169     vector unsigned short v3f = VECUINT16_LITERAL(
   170         0x003f, 0x003f, 0x003f, 0x003f,
   171         0x003f, 0x003f, 0x003f, 0x003f);
   172     vector unsigned short vfc = VECUINT16_LITERAL(
   173         0x00fc, 0x00fc, 0x00fc, 0x00fc,
   174         0x00fc, 0x00fc, 0x00fc, 0x00fc);
   175     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
   176     vf800 = vec_sl(vf800, vec_splat_u16(8));
   177 
   178     while (height--) {
   179         vector unsigned char valigner;
   180         vector unsigned char voverflow;
   181         vector unsigned char vsrc;
   182 
   183         int width = info->d_width;
   184         int extrawidth;
   185 
   186         /* do scalar until we can align... */
   187 #define ONE_PIXEL_BLEND(condition, widthvar) \
   188         while (condition) { \
   189             Uint32 Pixel; \
   190             unsigned sR, sG, sB, sA; \
   191             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   192                           sR, sG, sB, sA); \
   193             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   194                                 ((sG << 3) & 0x000007E0) | \
   195                                 ((sB >> 3) & 0x0000001F)); \
   196             dst += 2; \
   197             src += 4; \
   198             widthvar--; \
   199         }
   200 
   201         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   202 
   203         /* After all that work, here's the vector part! */
   204         extrawidth = (width % 8);  /* trailing unaligned stores */
   205         width -= extrawidth;
   206         vsrc = vec_ld(0, src);
   207         valigner = VEC_ALIGNER(src);
   208 
   209         while (width) {
   210             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   211             vector unsigned int vsrc1, vsrc2;
   212             vector unsigned char vdst;
   213 
   214             voverflow = vec_ld(15, src);
   215             vsrc = vec_perm(vsrc, voverflow, valigner);
   216             vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   217             src += 16;
   218             vsrc = voverflow;
   219             voverflow = vec_ld(15, src);
   220             vsrc = vec_perm(vsrc, voverflow, valigner);
   221             vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   222             /* 1555 */
   223             vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
   224             vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
   225             vgpixel = vec_and(vgpixel, vfc);
   226             vgpixel = vec_sl(vgpixel, v3);
   227             vrpixel = vec_sl(vpixel, v1);
   228             vrpixel = vec_and(vrpixel, vf800);
   229             vbpixel = vec_and(vpixel, v3f);
   230             vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
   231             /* 565 */
   232             vdst = vec_or(vdst, (vector unsigned char)vbpixel);
   233             vec_st(vdst, 0, dst);
   234 
   235             width -= 8;
   236             src += 16;
   237             dst += 16;
   238             vsrc = voverflow;
   239         }
   240 
   241         assert(width == 0);
   242 
   243 
   244         /* do scalar until we can align... */
   245         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   246 #undef ONE_PIXEL_BLEND
   247 
   248         src += srcskip;  /* move to next row, accounting for pitch. */
   249         dst += dstskip;
   250     }
   251 
   252 
   253 }
   254 
   255 static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
   256     int height = info->d_height;
   257     Uint8 *src = (Uint8 *) info->s_pixels;
   258     int srcskip = info->s_skip;
   259     Uint8 *dst = (Uint8 *) info->d_pixels;
   260     int dstskip = info->d_skip;
   261     SDL_PixelFormat *srcfmt = info->src;
   262     SDL_PixelFormat *dstfmt = info->dst;
   263     unsigned alpha;
   264     vector unsigned char valpha;
   265     vector unsigned char vpermute;
   266     vector unsigned short vf800;
   267     vector unsigned int v8 = vec_splat_u32(8);
   268     vector unsigned int v16 = vec_add(v8, v8);
   269     vector unsigned short v2 = vec_splat_u16(2);
   270     vector unsigned short v3 = vec_splat_u16(3);
   271     /* 
   272         0x10 - 0x1f is the alpha
   273         0x00 - 0x0e evens are the red
   274         0x01 - 0x0f odds are zero
   275     */
   276     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   277         0x10, 0x00, 0x01, 0x01,
   278         0x10, 0x02, 0x01, 0x01,
   279         0x10, 0x04, 0x01, 0x01,
   280         0x10, 0x06, 0x01, 0x01
   281     );
   282     vector unsigned char vredalpha2 = (vector unsigned char) (
   283         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   284     );
   285     /*
   286         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   287         0x11 - 0x0f odds are blue
   288     */
   289     vector unsigned char vblue1 = VECUINT8_LITERAL(
   290         0x00, 0x01, 0x02, 0x11,
   291         0x04, 0x05, 0x06, 0x13,
   292         0x08, 0x09, 0x0a, 0x15,
   293         0x0c, 0x0d, 0x0e, 0x17
   294     );
   295     vector unsigned char vblue2 = (vector unsigned char)(
   296         vec_add((vector unsigned int)vblue1, v8)
   297     );
   298     /*
   299         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   300         0x10 - 0x0e evens are green
   301     */
   302     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   303         0x00, 0x01, 0x10, 0x03,
   304         0x04, 0x05, 0x12, 0x07,
   305         0x08, 0x09, 0x14, 0x0b,
   306         0x0c, 0x0d, 0x16, 0x0f
   307     );
   308     vector unsigned char vgreen2 = (vector unsigned char)(
   309         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   310     );
   311     
   312 
   313     assert(srcfmt->BytesPerPixel == 2);
   314     assert(dstfmt->BytesPerPixel == 4);
   315 
   316     vf800 = (vector unsigned short)vec_splat_u8(-7);
   317     vf800 = vec_sl(vf800, vec_splat_u16(8));
   318 
   319     if (dstfmt->Amask && srcfmt->alpha) {
   320         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   321         valpha = vec_splat(valpha, 0);
   322     } else {
   323         alpha = 0;
   324         valpha = vec_splat_u8(0);
   325     }
   326 
   327     vpermute = calc_swizzle32(NULL, dstfmt);
   328     while (height--) {
   329         vector unsigned char valigner;
   330         vector unsigned char voverflow;
   331         vector unsigned char vsrc;
   332 
   333         int width = info->d_width;
   334         int extrawidth;
   335 
   336         /* do scalar until we can align... */
   337 #define ONE_PIXEL_BLEND(condition, widthvar) \
   338         while (condition) { \
   339             unsigned sR, sG, sB; \
   340             unsigned short Pixel = *((unsigned short *)src); \
   341             sR = (Pixel >> 8) & 0xf8; \
   342             sG = (Pixel >> 3) & 0xfc; \
   343             sB = (Pixel << 3) & 0xf8; \
   344             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   345             src += 2; \
   346             dst += 4; \
   347             widthvar--; \
   348         }
   349         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   350 
   351         /* After all that work, here's the vector part! */
   352         extrawidth = (width % 8);  /* trailing unaligned stores */
   353         width -= extrawidth;
   354         vsrc = vec_ld(0, src);
   355         valigner = VEC_ALIGNER(src);
   356 
   357         while (width) {
   358             vector unsigned short vR, vG, vB;
   359             vector unsigned char vdst1, vdst2;
   360 
   361             voverflow = vec_ld(15, src);
   362             vsrc = vec_perm(vsrc, voverflow, valigner);
   363 
   364             vR = vec_and((vector unsigned short)vsrc, vf800);
   365             vB = vec_sl((vector unsigned short)vsrc, v3);
   366             vG = vec_sl(vB, v2);
   367 
   368             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   369             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   370             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   371             vdst1 = vec_perm(vdst1, valpha, vpermute);
   372             vec_st(vdst1, 0, dst);
   373 
   374             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   375             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   376             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   377             vdst2 = vec_perm(vdst2, valpha, vpermute);
   378             vec_st(vdst2, 16, dst);
   379             
   380             width -= 8;
   381             dst += 32;
   382             src += 16;
   383             vsrc = voverflow;
   384         }
   385 
   386         assert(width == 0);
   387 
   388 
   389         /* do scalar until we can align... */
   390         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   391 #undef ONE_PIXEL_BLEND
   392 
   393         src += srcskip;  /* move to next row, accounting for pitch. */
   394         dst += dstskip;
   395     }
   396 
   397 }
   398 
   399 
   400 static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
   401     int height = info->d_height;
   402     Uint8 *src = (Uint8 *) info->s_pixels;
   403     int srcskip = info->s_skip;
   404     Uint8 *dst = (Uint8 *) info->d_pixels;
   405     int dstskip = info->d_skip;
   406     SDL_PixelFormat *srcfmt = info->src;
   407     SDL_PixelFormat *dstfmt = info->dst;
   408     unsigned alpha;
   409     vector unsigned char valpha;
   410     vector unsigned char vpermute;
   411     vector unsigned short vf800;
   412     vector unsigned int v8 = vec_splat_u32(8);
   413     vector unsigned int v16 = vec_add(v8, v8);
   414     vector unsigned short v1 = vec_splat_u16(1);
   415     vector unsigned short v3 = vec_splat_u16(3);
   416     /* 
   417         0x10 - 0x1f is the alpha
   418         0x00 - 0x0e evens are the red
   419         0x01 - 0x0f odds are zero
   420     */
   421     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   422         0x10, 0x00, 0x01, 0x01,
   423         0x10, 0x02, 0x01, 0x01,
   424         0x10, 0x04, 0x01, 0x01,
   425         0x10, 0x06, 0x01, 0x01
   426     );
   427     vector unsigned char vredalpha2 = (vector unsigned char)(
   428         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   429     );
   430     /*
   431         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   432         0x11 - 0x0f odds are blue
   433     */
   434     vector unsigned char vblue1 = VECUINT8_LITERAL(
   435         0x00, 0x01, 0x02, 0x11,
   436         0x04, 0x05, 0x06, 0x13,
   437         0x08, 0x09, 0x0a, 0x15,
   438         0x0c, 0x0d, 0x0e, 0x17
   439     );
   440     vector unsigned char vblue2 = (vector unsigned char)(
   441         vec_add((vector unsigned int)vblue1, v8)
   442     );
   443     /*
   444         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   445         0x10 - 0x0e evens are green
   446     */
   447     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   448         0x00, 0x01, 0x10, 0x03,
   449         0x04, 0x05, 0x12, 0x07,
   450         0x08, 0x09, 0x14, 0x0b,
   451         0x0c, 0x0d, 0x16, 0x0f
   452     );
   453     vector unsigned char vgreen2 = (vector unsigned char)(
   454         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   455     );
   456     
   457 
   458     assert(srcfmt->BytesPerPixel == 2);
   459     assert(dstfmt->BytesPerPixel == 4);
   460 
   461     vf800 = (vector unsigned short)vec_splat_u8(-7);
   462     vf800 = vec_sl(vf800, vec_splat_u16(8));
   463 
   464     if (dstfmt->Amask && srcfmt->alpha) {
   465         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   466         valpha = vec_splat(valpha, 0);
   467     } else {
   468         alpha = 0;
   469         valpha = vec_splat_u8(0);
   470     }
   471 
   472     vpermute = calc_swizzle32(NULL, dstfmt);
   473     while (height--) {
   474         vector unsigned char valigner;
   475         vector unsigned char voverflow;
   476         vector unsigned char vsrc;
   477 
   478         int width = info->d_width;
   479         int extrawidth;
   480 
   481         /* do scalar until we can align... */
   482 #define ONE_PIXEL_BLEND(condition, widthvar) \
   483         while (condition) { \
   484             unsigned sR, sG, sB; \
   485             unsigned short Pixel = *((unsigned short *)src); \
   486             sR = (Pixel >> 7) & 0xf8; \
   487             sG = (Pixel >> 2) & 0xf8; \
   488             sB = (Pixel << 3) & 0xf8; \
   489             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   490             src += 2; \
   491             dst += 4; \
   492             widthvar--; \
   493         }
   494         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   495 
   496         /* After all that work, here's the vector part! */
   497         extrawidth = (width % 8);  /* trailing unaligned stores */
   498         width -= extrawidth;
   499         vsrc = vec_ld(0, src);
   500         valigner = VEC_ALIGNER(src);
   501 
   502         while (width) {
   503             vector unsigned short vR, vG, vB;
   504             vector unsigned char vdst1, vdst2;
   505 
   506             voverflow = vec_ld(15, src);
   507             vsrc = vec_perm(vsrc, voverflow, valigner);
   508 
   509             vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
   510             vB = vec_sl((vector unsigned short)vsrc, v3);
   511             vG = vec_sl(vB, v3);
   512 
   513             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   514             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   515             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   516             vdst1 = vec_perm(vdst1, valpha, vpermute);
   517             vec_st(vdst1, 0, dst);
   518 
   519             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   520             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   521             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   522             vdst2 = vec_perm(vdst2, valpha, vpermute);
   523             vec_st(vdst2, 16, dst);
   524             
   525             width -= 8;
   526             dst += 32;
   527             src += 16;
   528             vsrc = voverflow;
   529         }
   530 
   531         assert(width == 0);
   532 
   533 
   534         /* do scalar until we can align... */
   535         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   536 #undef ONE_PIXEL_BLEND
   537 
   538         src += srcskip;  /* move to next row, accounting for pitch. */
   539         dst += dstskip;
   540     }
   541 
   542 }
   543 
   544 static void BlitNtoNKey(SDL_BlitInfo *info);
   545 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
   546 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
   547 {
   548     int height = info->d_height;
   549     Uint32 *srcp = (Uint32 *) info->s_pixels;
   550     int srcskip = info->s_skip;
   551     Uint32 *dstp = (Uint32 *) info->d_pixels;
   552     int dstskip = info->d_skip;
   553     SDL_PixelFormat *srcfmt = info->src;
   554     int srcbpp = srcfmt->BytesPerPixel;
   555     SDL_PixelFormat *dstfmt = info->dst;
   556     int dstbpp = dstfmt->BytesPerPixel;
   557     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   558 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   559     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   560 	Uint32 ckey = info->src->colorkey;
   561     vector unsigned int valpha;
   562     vector unsigned char vpermute;
   563     vector unsigned char vzero;
   564     vector unsigned int vckey;
   565     vector unsigned int vrgbmask;
   566     vpermute = calc_swizzle32(srcfmt, dstfmt);
   567     if (info->d_width < 16) {
   568         if(copy_alpha) {
   569             return BlitNtoNKeyCopyAlpha(info);
   570         } else {
   571             return BlitNtoNKey(info);
   572         }
   573     }
   574     vzero = vec_splat_u8(0);
   575     if (alpha) {
   576         ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
   577         valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
   578     } else {
   579         valpha = (vector unsigned int)vzero;
   580     }
   581     ckey &= rgbmask;
   582     ((unsigned int *)(char*)&vckey)[0] = ckey;
   583     vckey = vec_splat(vckey, 0);
   584     ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
   585     vrgbmask = vec_splat(vrgbmask, 0);
   586 
   587     while (height--) {
   588 #define ONE_PIXEL_BLEND(condition, widthvar) \
   589         if (copy_alpha) { \
   590             while (condition) { \
   591                 Uint32 Pixel; \
   592                 unsigned sR, sG, sB, sA; \
   593                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   594                           sR, sG, sB, sA); \
   595                 if ( (Pixel & rgbmask) != ckey ) { \
   596                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   597                             sR, sG, sB, sA); \
   598                 } \
   599                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   600                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   601                 widthvar--; \
   602             } \
   603         } else { \
   604             while (condition) { \
   605                 Uint32 Pixel; \
   606                 unsigned sR, sG, sB; \
   607                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   608                 if ( Pixel != ckey ) { \
   609                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   610                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   611                               sR, sG, sB, alpha); \
   612                 } \
   613                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   614                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   615                 widthvar--; \
   616             } \
   617         }
   618         int width = info->d_width;
   619         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   620         assert(width > 0);
   621         if (width > 0) {
   622             int extrawidth = (width % 4);
   623             vector unsigned char valigner = VEC_ALIGNER(srcp);
   624             vector unsigned int vs = vec_ld(0, srcp);
   625             width -= extrawidth;
   626             assert(width >= 4);
   627             while (width) {
   628                 vector unsigned char vsel;
   629                 vector unsigned int vd;
   630                 vector unsigned int voverflow = vec_ld(15, srcp);
   631                 /* load the source vec */
   632                 vs = vec_perm(vs, voverflow, valigner);
   633                 /* vsel is set for items that match the key */
   634                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
   635                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
   636                 /* permute the src vec to the dest format */
   637                 vs = vec_perm(vs, valpha, vpermute);
   638                 /* load the destination vec */
   639                 vd = vec_ld(0, dstp);
   640                 /* select the source and dest into vs */
   641                 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
   642                 
   643                 vec_st(vd, 0, dstp);
   644                 srcp += 4;
   645                 width -= 4;
   646                 dstp += 4;
   647                 vs = voverflow;
   648             }
   649             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   650 #undef ONE_PIXEL_BLEND
   651             srcp += srcskip >> 2;
   652             dstp += dstskip >> 2;
   653         }
   654     }
   655 }
   656 
   657 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   658 /* Use this on a G5 */
   659 static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
   660 {
   661     int height = info->d_height;
   662     Uint32 *src = (Uint32 *) info->s_pixels;
   663     int srcskip = info->s_skip;
   664     Uint32 *dst = (Uint32 *) info->d_pixels;
   665     int dstskip = info->d_skip;
   666     SDL_PixelFormat *srcfmt = info->src;
   667     int srcbpp = srcfmt->BytesPerPixel;
   668     SDL_PixelFormat *dstfmt = info->dst;
   669     int dstbpp = dstfmt->BytesPerPixel;
   670     vector unsigned int vzero = vec_splat_u32(0);
   671     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   672     if (dstfmt->Amask && !srcfmt->Amask) {
   673         if (srcfmt->alpha) {
   674             vector unsigned char valpha;
   675             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   676             vzero = (vector unsigned int)vec_splat(valpha, 0);
   677         }
   678     }
   679 
   680     assert(srcbpp == 4);
   681     assert(dstbpp == 4);
   682 
   683     while (height--) {
   684         vector unsigned char valigner;
   685         vector unsigned int vbits;
   686         vector unsigned int voverflow;
   687         Uint32 bits;
   688         Uint8 r, g, b, a;
   689 
   690         int width = info->d_width;
   691         int extrawidth;
   692 
   693         /* do scalar until we can align... */
   694         while ((UNALIGNED_PTR(dst)) && (width)) {
   695             bits = *(src++);
   696             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   697             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   698             width--;
   699         }
   700 
   701         /* After all that work, here's the vector part! */
   702         extrawidth = (width % 4);
   703         width -= extrawidth;
   704         valigner = VEC_ALIGNER(src);
   705         vbits = vec_ld(0, src);
   706 
   707        while (width) {
   708             voverflow = vec_ld(15, src);
   709             src += 4;
   710             width -= 4;
   711             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   712             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   713             vec_st(vbits, 0, dst);  /* store it back out. */
   714             dst += 4;
   715             vbits = voverflow;
   716         }
   717 
   718         assert(width == 0);
   719 
   720         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   721         while (extrawidth) {
   722             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   723             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   724             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   725             extrawidth--;
   726         }
   727 
   728         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   729         dst += dstskip >> 2;
   730     }
   731 
   732 }
   733 
   734 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   735 /* Use this on a G4 */
   736 static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
   737 {
   738     const int scalar_dst_lead = sizeof (Uint32) * 4;
   739     const int vector_dst_lead = sizeof (Uint32) * 16;
   740 
   741     int height = info->d_height;
   742     Uint32 *src = (Uint32 *) info->s_pixels;
   743     int srcskip = info->s_skip;
   744     Uint32 *dst = (Uint32 *) info->d_pixels;
   745     int dstskip = info->d_skip;
   746     SDL_PixelFormat *srcfmt = info->src;
   747     int srcbpp = srcfmt->BytesPerPixel;
   748     SDL_PixelFormat *dstfmt = info->dst;
   749     int dstbpp = dstfmt->BytesPerPixel;
   750     vector unsigned int vzero = vec_splat_u32(0);
   751     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   752     if (dstfmt->Amask && !srcfmt->Amask) {
   753         if (srcfmt->alpha) {
   754             vector unsigned char valpha;
   755             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   756             vzero = (vector unsigned int)vec_splat(valpha, 0);
   757         }
   758     }
   759 
   760     assert(srcbpp == 4);
   761     assert(dstbpp == 4);
   762 
   763     while (height--) {
   764         vector unsigned char valigner;
   765         vector unsigned int vbits;
   766         vector unsigned int voverflow;
   767         Uint32 bits;
   768         Uint8 r, g, b, a;
   769 
   770         int width = info->d_width;
   771         int extrawidth;
   772 
   773         /* do scalar until we can align... */
   774         while ((UNALIGNED_PTR(dst)) && (width)) {
   775             vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   776             vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   777             bits = *(src++);
   778             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   779             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   780             width--;
   781         }
   782 
   783         /* After all that work, here's the vector part! */
   784         extrawidth = (width % 4);
   785         width -= extrawidth;
   786         valigner = VEC_ALIGNER(src);
   787         vbits = vec_ld(0, src);
   788 
   789         while (width) {
   790             vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   791             vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   792             voverflow = vec_ld(15, src);
   793             src += 4;
   794             width -= 4;
   795             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   796             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   797             vec_st(vbits, 0, dst);  /* store it back out. */
   798             dst += 4;
   799             vbits = voverflow;
   800         }
   801         
   802         assert(width == 0);
   803 
   804         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   805         while (extrawidth) {
   806             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   807             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   808             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   809             extrawidth--;
   810         }
   811 
   812         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   813         dst += dstskip >> 2;
   814     }
   815 
   816     vec_dss(DST_CHAN_SRC);
   817     vec_dss(DST_CHAN_DEST);
   818 }
   819 
   820 static Uint32 GetBlitFeatures( void )
   821 {
   822     static Uint32 features = 0xffffffff;
   823     if (features == 0xffffffff) {
   824         /* Provide an override for testing .. */
   825         char *override = getenv("SDL_ALTIVEC_BLIT_FEATURES");
   826         if (override) {
   827             features = 0;
   828             sscanf(override, "%u", &features);
   829         } else {
   830             features = ( 0
   831                 /* Feature 1 is has-MMX */
   832                 | ((SDL_HasMMX()) ? 1 : 0)
   833                 /* Feature 2 is has-AltiVec */
   834                 | ((SDL_HasAltiVec()) ? 2 : 0)
   835                 /* Feature 4 is dont-use-prefetch */
   836                 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   837                 | ((GetL3CacheSize() == 0) ? 4 : 0)
   838             );
   839         }
   840     }
   841     return features;
   842 }
   843 #else
   844 /* Feature 1 is has-MMX */
   845 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   846 #endif
   847 
   848 /* This is now endian dependent */
   849 #if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
   850 #define HI	1
   851 #define LO	0
   852 #else /* ( SDL_BYTEORDER == SDL_BIG_ENDIAN ) */
   853 #define HI	0
   854 #define LO	1
   855 #endif
   856 
   857 #ifdef USE_ASMBLIT
   858 
   859 /* Heheheh, we coerce Hermes into using SDL blit information */
   860 #define X86_ASSEMBLER
   861 #define HermesConverterInterface	SDL_BlitInfo
   862 #define HermesClearInterface		void
   863 #define STACKCALL
   864 
   865 #include "HeadMMX.h"
   866 #include "HeadX86.h"
   867 
   868 #else
   869 
   870 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   871 #define RGB888_RGB332(dst, src) { \
   872 	dst = (((src)&0x00E00000)>>16)| \
   873 	      (((src)&0x0000E000)>>11)| \
   874 	      (((src)&0x000000C0)>>6); \
   875 }
   876 static void Blit_RGB888_index8(SDL_BlitInfo *info)
   877 {
   878 #ifndef USE_DUFFS_LOOP
   879 	int c;
   880 #endif
   881 	int width, height;
   882 	Uint32 *src;
   883 	const Uint8 *map;
   884 	Uint8 *dst;
   885 	int srcskip, dstskip;
   886 
   887 	/* Set up some basic variables */
   888 	width = info->d_width;
   889 	height = info->d_height;
   890 	src = (Uint32 *)info->s_pixels;
   891 	srcskip = info->s_skip/4;
   892 	dst = info->d_pixels;
   893 	dstskip = info->d_skip;
   894 	map = info->table;
   895 
   896 	if ( map == NULL ) {
   897 		while ( height-- ) {
   898 #ifdef USE_DUFFS_LOOP
   899 			DUFFS_LOOP(
   900 				RGB888_RGB332(*dst++, *src);
   901 			, width);
   902 #else
   903 			for ( c=width/4; c; --c ) {
   904 				/* Pack RGB into 8bit pixel */
   905 				++src;
   906 				RGB888_RGB332(*dst++, *src);
   907 				++src;
   908 				RGB888_RGB332(*dst++, *src);
   909 				++src;
   910 				RGB888_RGB332(*dst++, *src);
   911 				++src;
   912 			}
   913 			switch ( width & 3 ) {
   914 				case 3:
   915 					RGB888_RGB332(*dst++, *src);
   916 					++src;
   917 				case 2:
   918 					RGB888_RGB332(*dst++, *src);
   919 					++src;
   920 				case 1:
   921 					RGB888_RGB332(*dst++, *src);
   922 					++src;
   923 			}
   924 #endif /* USE_DUFFS_LOOP */
   925 			src += srcskip;
   926 			dst += dstskip;
   927 		}
   928 	} else {
   929 		int Pixel;
   930 
   931 		while ( height-- ) {
   932 #ifdef USE_DUFFS_LOOP
   933 			DUFFS_LOOP(
   934 				RGB888_RGB332(Pixel, *src);
   935 				*dst++ = map[Pixel];
   936 				++src;
   937 			, width);
   938 #else
   939 			for ( c=width/4; c; --c ) {
   940 				/* Pack RGB into 8bit pixel */
   941 				RGB888_RGB332(Pixel, *src);
   942 				*dst++ = map[Pixel];
   943 				++src;
   944 				RGB888_RGB332(Pixel, *src);
   945 				*dst++ = map[Pixel];
   946 				++src;
   947 				RGB888_RGB332(Pixel, *src);
   948 				*dst++ = map[Pixel];
   949 				++src;
   950 				RGB888_RGB332(Pixel, *src);
   951 				*dst++ = map[Pixel];
   952 				++src;
   953 			}
   954 			switch ( width & 3 ) {
   955 				case 3:
   956 					RGB888_RGB332(Pixel, *src);
   957 					*dst++ = map[Pixel];
   958 					++src;
   959 				case 2:
   960 					RGB888_RGB332(Pixel, *src);
   961 					*dst++ = map[Pixel];
   962 					++src;
   963 				case 1:
   964 					RGB888_RGB332(Pixel, *src);
   965 					*dst++ = map[Pixel];
   966 					++src;
   967 			}
   968 #endif /* USE_DUFFS_LOOP */
   969 			src += srcskip;
   970 			dst += dstskip;
   971 		}
   972 	}
   973 }
   974 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   975 #define RGB888_RGB555(dst, src) { \
   976 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>9)| \
   977 	                   (((*src)&0x0000F800)>>6)| \
   978 	                   (((*src)&0x000000F8)>>3); \
   979 }
   980 #define RGB888_RGB555_TWO(dst, src) { \
   981 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   982 	                     (((src[HI])&0x0000F800)>>6)| \
   983 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   984 	                     (((src[LO])&0x00F80000)>>9)| \
   985 	                     (((src[LO])&0x0000F800)>>6)| \
   986 	                     (((src[LO])&0x000000F8)>>3); \
   987 }
   988 static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
   989 {
   990 #ifndef USE_DUFFS_LOOP
   991 	int c;
   992 #endif
   993 	int width, height;
   994 	Uint32 *src;
   995 	Uint16 *dst;
   996 	int srcskip, dstskip;
   997 
   998 	/* Set up some basic variables */
   999 	width = info->d_width;
  1000 	height = info->d_height;
  1001 	src = (Uint32 *)info->s_pixels;
  1002 	srcskip = info->s_skip/4;
  1003 	dst = (Uint16 *)info->d_pixels;
  1004 	dstskip = info->d_skip/2;
  1005 
  1006 #ifdef USE_DUFFS_LOOP
  1007 	while ( height-- ) {
  1008 		DUFFS_LOOP(
  1009 			RGB888_RGB555(dst, src);
  1010 			++src;
  1011 			++dst;
  1012 		, width);
  1013 		src += srcskip;
  1014 		dst += dstskip;
  1015 	}
  1016 #else
  1017 	/* Memory align at 4-byte boundary, if necessary */
  1018 	if ( (long)dst & 0x03 ) {
  1019 		/* Don't do anything if width is 0 */
  1020 		if ( width == 0 ) {
  1021 			return;
  1022 		}
  1023 		--width;
  1024 
  1025 		while ( height-- ) {
  1026 			/* Perform copy alignment */
  1027 			RGB888_RGB555(dst, src);
  1028 			++src;
  1029 			++dst;
  1030 
  1031 			/* Copy in 4 pixel chunks */
  1032 			for ( c=width/4; c; --c ) {
  1033 				RGB888_RGB555_TWO(dst, src);
  1034 				src += 2;
  1035 				dst += 2;
  1036 				RGB888_RGB555_TWO(dst, src);
  1037 				src += 2;
  1038 				dst += 2;
  1039 			}
  1040 			/* Get any leftovers */
  1041 			switch (width & 3) {
  1042 				case 3:
  1043 					RGB888_RGB555(dst, src);
  1044 					++src;
  1045 					++dst;
  1046 				case 2:
  1047 					RGB888_RGB555_TWO(dst, src);
  1048 					src += 2;
  1049 					dst += 2;
  1050 					break;
  1051 				case 1:
  1052 					RGB888_RGB555(dst, src);
  1053 					++src;
  1054 					++dst;
  1055 					break;
  1056 			}
  1057 			src += srcskip;
  1058 			dst += dstskip;
  1059 		}
  1060 	} else { 
  1061 		while ( height-- ) {
  1062 			/* Copy in 4 pixel chunks */
  1063 			for ( c=width/4; c; --c ) {
  1064 				RGB888_RGB555_TWO(dst, src);
  1065 				src += 2;
  1066 				dst += 2;
  1067 				RGB888_RGB555_TWO(dst, src);
  1068 				src += 2;
  1069 				dst += 2;
  1070 			}
  1071 			/* Get any leftovers */
  1072 			switch (width & 3) {
  1073 				case 3:
  1074 					RGB888_RGB555(dst, src);
  1075 					++src;
  1076 					++dst;
  1077 				case 2:
  1078 					RGB888_RGB555_TWO(dst, src);
  1079 					src += 2;
  1080 					dst += 2;
  1081 					break;
  1082 				case 1:
  1083 					RGB888_RGB555(dst, src);
  1084 					++src;
  1085 					++dst;
  1086 					break;
  1087 			}
  1088 			src += srcskip;
  1089 			dst += dstskip;
  1090 		}
  1091 	}
  1092 #endif /* USE_DUFFS_LOOP */
  1093 }
  1094 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1095 #define RGB888_RGB565(dst, src) { \
  1096 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>8)| \
  1097 	                   (((*src)&0x0000FC00)>>5)| \
  1098 	                   (((*src)&0x000000F8)>>3); \
  1099 }
  1100 #define RGB888_RGB565_TWO(dst, src) { \
  1101 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1102 	                     (((src[HI])&0x0000FC00)>>5)| \
  1103 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1104 	                     (((src[LO])&0x00F80000)>>8)| \
  1105 	                     (((src[LO])&0x0000FC00)>>5)| \
  1106 	                     (((src[LO])&0x000000F8)>>3); \
  1107 }
  1108 static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
  1109 {
  1110 #ifndef USE_DUFFS_LOOP
  1111 	int c;
  1112 #endif
  1113 	int width, height;
  1114 	Uint32 *src;
  1115 	Uint16 *dst;
  1116 	int srcskip, dstskip;
  1117 
  1118 	/* Set up some basic variables */
  1119 	width = info->d_width;
  1120 	height = info->d_height;
  1121 	src = (Uint32 *)info->s_pixels;
  1122 	srcskip = info->s_skip/4;
  1123 	dst = (Uint16 *)info->d_pixels;
  1124 	dstskip = info->d_skip/2;
  1125 
  1126 #ifdef USE_DUFFS_LOOP
  1127 	while ( height-- ) {
  1128 		DUFFS_LOOP(
  1129 			RGB888_RGB565(dst, src);
  1130 			++src;
  1131 			++dst;
  1132 		, width);
  1133 		src += srcskip;
  1134 		dst += dstskip;
  1135 	}
  1136 #else
  1137 	/* Memory align at 4-byte boundary, if necessary */
  1138 	if ( (long)dst & 0x03 ) {
  1139 		/* Don't do anything if width is 0 */
  1140 		if ( width == 0 ) {
  1141 			return;
  1142 		}
  1143 		--width;
  1144 
  1145 		while ( height-- ) {
  1146 			/* Perform copy alignment */
  1147 			RGB888_RGB565(dst, src);
  1148 			++src;
  1149 			++dst;
  1150 
  1151 			/* Copy in 4 pixel chunks */
  1152 			for ( c=width/4; c; --c ) {
  1153 				RGB888_RGB565_TWO(dst, src);
  1154 				src += 2;
  1155 				dst += 2;
  1156 				RGB888_RGB565_TWO(dst, src);
  1157 				src += 2;
  1158 				dst += 2;
  1159 			}
  1160 			/* Get any leftovers */
  1161 			switch (width & 3) {
  1162 				case 3:
  1163 					RGB888_RGB565(dst, src);
  1164 					++src;
  1165 					++dst;
  1166 				case 2:
  1167 					RGB888_RGB565_TWO(dst, src);
  1168 					src += 2;
  1169 					dst += 2;
  1170 					break;
  1171 				case 1:
  1172 					RGB888_RGB565(dst, src);
  1173 					++src;
  1174 					++dst;
  1175 					break;
  1176 			}
  1177 			src += srcskip;
  1178 			dst += dstskip;
  1179 		}
  1180 	} else { 
  1181 		while ( height-- ) {
  1182 			/* Copy in 4 pixel chunks */
  1183 			for ( c=width/4; c; --c ) {
  1184 				RGB888_RGB565_TWO(dst, src);
  1185 				src += 2;
  1186 				dst += 2;
  1187 				RGB888_RGB565_TWO(dst, src);
  1188 				src += 2;
  1189 				dst += 2;
  1190 			}
  1191 			/* Get any leftovers */
  1192 			switch (width & 3) {
  1193 				case 3:
  1194 					RGB888_RGB565(dst, src);
  1195 					++src;
  1196 					++dst;
  1197 				case 2:
  1198 					RGB888_RGB565_TWO(dst, src);
  1199 					src += 2;
  1200 					dst += 2;
  1201 					break;
  1202 				case 1:
  1203 					RGB888_RGB565(dst, src);
  1204 					++src;
  1205 					++dst;
  1206 					break;
  1207 			}
  1208 			src += srcskip;
  1209 			dst += dstskip;
  1210 		}
  1211 	}
  1212 #endif /* USE_DUFFS_LOOP */
  1213 }
  1214 
  1215 #endif /* USE_ASMBLIT */
  1216 
  1217 
  1218 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1219 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1220 static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
  1221 {
  1222 #ifndef USE_DUFFS_LOOP
  1223 	int c;
  1224 #endif
  1225 	int width, height;
  1226 	Uint8 *src;
  1227 	Uint32 *dst;
  1228 	int srcskip, dstskip;
  1229 
  1230 	/* Set up some basic variables */
  1231 	width = info->d_width;
  1232 	height = info->d_height;
  1233 	src = (Uint8 *)info->s_pixels;
  1234 	srcskip = info->s_skip;
  1235 	dst = (Uint32 *)info->d_pixels;
  1236 	dstskip = info->d_skip/4;
  1237 
  1238 #ifdef USE_DUFFS_LOOP
  1239 	while ( height-- ) {
  1240 		DUFFS_LOOP(
  1241 		{
  1242 			*dst++ = RGB565_32(dst, src, map);
  1243 			src += 2;
  1244 		},
  1245 		width);
  1246 		src += srcskip;
  1247 		dst += dstskip;
  1248 	}
  1249 #else
  1250 	while ( height-- ) {
  1251 		/* Copy in 4 pixel chunks */
  1252 		for ( c=width/4; c; --c ) {
  1253 			*dst++ = RGB565_32(dst, src, map);
  1254 			src += 2;
  1255 			*dst++ = RGB565_32(dst, src, map);
  1256 			src += 2;
  1257 			*dst++ = RGB565_32(dst, src, map);
  1258 			src += 2;
  1259 			*dst++ = RGB565_32(dst, src, map);
  1260 			src += 2;
  1261 		}
  1262 		/* Get any leftovers */
  1263 		switch (width & 3) {
  1264 			case 3:
  1265 				*dst++ = RGB565_32(dst, src, map);
  1266 				src += 2;
  1267 			case 2:
  1268 				*dst++ = RGB565_32(dst, src, map);
  1269 				src += 2;
  1270 			case 1:
  1271 				*dst++ = RGB565_32(dst, src, map);
  1272 				src += 2;
  1273 				break;
  1274 		}
  1275 		src += srcskip;
  1276 		dst += dstskip;
  1277 	}
  1278 #endif /* USE_DUFFS_LOOP */
  1279 }
  1280 
  1281 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1282 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1283 		0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1284 		0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1285 		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1286 		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1287 		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1288 		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1289 		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1290 		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1291 		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1292 		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1293 		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1294 		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1295 		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1296 		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1297 		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1298 		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1299 		0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1300 		0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1301 		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1302 		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1303 		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1304 		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1305 		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1306 		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1307 		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1308 		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1309 		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1310 		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1311 		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1312 		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1313 		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1314 		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1315 		0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1316 		0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1317 		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1318 		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1319 		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1320 		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1321 		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1322 		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1323 		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1324 		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1325 		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1326 		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1327 		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1328 		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1329 		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1330 		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1331 		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1332 		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1333 		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1334 		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1335 		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1336 		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1337 		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1338 		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1339 		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1340 		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1341 		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1342 		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1343 		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1344 		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1345 		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1346 		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1347 		0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1348 		0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1349 		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1350 		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1351 		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1352 		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1353 		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1354 		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1355 		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1356 		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1357 		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1358 		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1359 		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1360 		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1361 		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1362 		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1363 		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1364 		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1365 		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1366 		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1367 		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1368 		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1369 		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1370 		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1371 		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1372 		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1373 		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1374 		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1375 		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1376 		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1377 		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1378 		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1379 		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1380 		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1381 		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1382 		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1383 		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1384 		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1385 		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1386 		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1387 		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1388 		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1389 		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1390 		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1391 		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1392 		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1393 		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1394 		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1395 		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1396 		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1397 		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1398 		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1399 		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1400 		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1401 		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1402 		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1403 		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1404 		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1405 		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1406 		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1407 		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1408 		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1409 		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1410 		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1411 };
  1412 static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
  1413 {
  1414     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1415 }
  1416 
  1417 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1418 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1419 		0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1420 		0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1421 		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1422 		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1423 		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1424 		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1425 		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1426 		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1427 		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1428 		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1429 		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1430 		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1431 		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1432 		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1433 		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1434 		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1435 		0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1436 		0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1437 		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1438 		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1439 		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1440 		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1441 		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1442 		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1443 		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1444 		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1445 		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1446 		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1447 		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1448 		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1449 		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1450 		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1451 		0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1452 		0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1453 		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1454 		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1455 		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1456 		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1457 		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1458 		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1459 		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1460 		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1461 		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1462 		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1463 		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1464 		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1465 		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1466 		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1467 		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1468 		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1469 		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1470 		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1471 		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1472 		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1473 		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1474 		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1475 		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1476 		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1477 		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1478 		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1479 		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1480 		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1481 		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1482 		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1483 		0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1484 		0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1485 		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1486 		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1487 		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1488 		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1489 		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1490 		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1491 		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1492 		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1493 		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1494 		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1495 		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1496 		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1497 		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1498 		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1499 		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1500 		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1501 		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1502 		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1503 		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1504 		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1505 		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1506 		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1507 		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1508 		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1509 		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1510 		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1511 		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1512 		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1513 		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1514 		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1515 		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1516 		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1517 		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1518 		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1519 		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1520 		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1521 		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1522 		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1523 		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1524 		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1525 		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1526 		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1527 		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1528 		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1529 		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1530 		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1531 		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1532 		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1533 		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1534 		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1535 		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1536 		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1537 		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1538 		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1539 		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1540 		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1541 		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1542 		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1543 		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1544 		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1545 		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1546 		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1547 };
  1548 static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
  1549 {
  1550     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1551 }
  1552 
  1553 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1554 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1555 		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1556 		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1557 		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1558 		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1559 		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1560 		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1561 		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1562 		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1563 		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1564 		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1565 		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1566 		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1567 		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1568 		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1569 		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1570 		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1571 		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1572 		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1573 		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1574 		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1575 		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1576 		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1577 		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1578 		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1579 		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1580 		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1581 		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1582 		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1583 		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1584 		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1585 		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1586 		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1587 		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1588 		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1589 		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1590 		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1591 		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1592 		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1593 		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1594 		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1595 		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1596 		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1597 		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1598 		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1599 		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1600 		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1601 		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1602 		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1603 		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1604 		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1605 		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1606 		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1607 		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1608 		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1609 		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1610 		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1611 		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1612 		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1613 		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1614 		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1615 		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1616 		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1617 		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1618 		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1619 		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1620 		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1621 		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1622 		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1623 		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1624 		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1625 		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1626 		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1627 		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1628 		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1629 		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1630 		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1631 		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1632 		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1633 		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1634 		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1635 		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1636 		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1637 		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1638 		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1639 		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1640 		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1641 		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1642 		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1643 		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1644 		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1645 		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1646 		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1647 		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1648 		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1649 		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1650 		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1651 		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1652 		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1653 		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1654 		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1655 		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1656 		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1657 		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1658 		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1659 		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1660 		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1661 		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1662 		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1663 		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1664 		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1665 		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1666 		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1667 		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1668 		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1669 		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1670 		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1671 		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1672 		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1673 		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1674 		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1675 		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1676 		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1677 		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1678 		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1679 		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1680 		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1681 		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1682 		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1683 };
  1684 static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
  1685 {
  1686     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1687 }
  1688 
  1689 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1690 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1691 		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1692 		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1693 		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1694 		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1695 		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1696 		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1697 		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1698 		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1699 		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1700 		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1701 		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1702 		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1703 		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1704 		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1705 		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1706 		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1707 		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1708 		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1709 		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1710 		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1711 		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1712 		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1713 		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1714 		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1715 		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1716 		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1717 		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1718 		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1719 		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1720 		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1721 		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1722 		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1723 		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1724 		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1725 		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1726 		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1727 		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1728 		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1729 		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1730 		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1731 		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1732 		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1733 		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1734 		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1735 		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1736 		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1737 		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1738 		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1739 		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1740 		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1741 		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1742 		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1743 		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1744 		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1745 		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1746 		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1747 		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1748 		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1749 		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1750 		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1751 		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1752 		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1753 		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1754 		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1755 		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1756 		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1757 		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1758 		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1759 		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1760 		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1761 		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1762 		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1763 		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1764 		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1765 		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1766 		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1767 		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1768 		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1769 		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1770 		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1771 		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1772 		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1773 		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1774 		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1775 		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1776 		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1777 		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1778 		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1779 		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1780 		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1781 		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1782 		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1783 		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1784 		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1785 		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1786 		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1787 		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1788 		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1789 		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1790 		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1791 		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1792 		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1793 		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1794 		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1795 		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1796 		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1797 		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1798 		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1799 		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1800 		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1801 		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1802 		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1803 		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1804 		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1805 		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1806 		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1807 		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1808 		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1809 		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1810 		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1811 		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1812 		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1813 		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1814 		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1815 		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1816 		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1817 		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1818 		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1819 };
  1820 static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
  1821 {
  1822     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1823 }
  1824 
  1825 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1826 #ifndef RGB888_RGB332
  1827 #define RGB888_RGB332(dst, src) { \
  1828 	dst = (((src)&0x00E00000)>>16)| \
  1829 	      (((src)&0x0000E000)>>11)| \
  1830 	      (((src)&0x000000C0)>>6); \
  1831 }
  1832 #endif
  1833 static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
  1834 {
  1835 #ifndef USE_DUFFS_LOOP
  1836 	int c;
  1837 #endif
  1838 	int Pixel;
  1839 	int width, height;
  1840 	Uint32 *src;
  1841 	const Uint8 *map;
  1842 	Uint8 *dst;
  1843 	int srcskip, dstskip;
  1844 
  1845 	/* Set up some basic variables */
  1846 	width = info->d_width;
  1847 	height = info->d_height;
  1848 	src = (Uint32 *)info->s_pixels;
  1849 	srcskip = info->s_skip/4;
  1850 	dst = info->d_pixels;
  1851 	dstskip = info->d_skip;
  1852 	map = info->table;
  1853 
  1854 #ifdef USE_DUFFS_LOOP
  1855 	while ( height-- ) {
  1856 		DUFFS_LOOP(
  1857 			RGB888_RGB332(Pixel, *src);
  1858 			*dst++ = map[Pixel];
  1859 			++src;
  1860 		, width);
  1861 		src += srcskip;
  1862 		dst += dstskip;
  1863 	}
  1864 #else
  1865 	while ( height-- ) {
  1866 		for ( c=width/4; c; --c ) {
  1867 			/* Pack RGB into 8bit pixel */
  1868 			RGB888_RGB332(Pixel, *src);
  1869 			*dst++ = map[Pixel];
  1870 			++src;
  1871 			RGB888_RGB332(Pixel, *src);
  1872 			*dst++ = map[Pixel];
  1873 			++src;
  1874 			RGB888_RGB332(Pixel, *src);
  1875 			*dst++ = map[Pixel];
  1876 			++src;
  1877 			RGB888_RGB332(Pixel, *src);
  1878 			*dst++ = map[Pixel];
  1879 			++src;
  1880 		}
  1881 		switch ( width & 3 ) {
  1882 			case 3:
  1883 				RGB888_RGB332(Pixel, *src);
  1884 				*dst++ = map[Pixel];
  1885 				++src;
  1886 			case 2:
  1887 				RGB888_RGB332(Pixel, *src);
  1888 				*dst++ = map[Pixel];
  1889 				++src;
  1890 			case 1:
  1891 				RGB888_RGB332(Pixel, *src);
  1892 				*dst++ = map[Pixel];
  1893 				++src;
  1894 		}
  1895 		src += srcskip;
  1896 		dst += dstskip;
  1897 	}
  1898 #endif /* USE_DUFFS_LOOP */
  1899 }
  1900 static void BlitNto1(SDL_BlitInfo *info)
  1901 {
  1902 #ifndef USE_DUFFS_LOOP
  1903 	int c;
  1904 #endif
  1905 	int width, height;
  1906 	Uint8 *src;
  1907 	const Uint8 *map;
  1908 	Uint8 *dst;
  1909 	int srcskip, dstskip;
  1910 	int srcbpp;
  1911 	Uint32 Pixel;
  1912 	int  sR, sG, sB;
  1913 	SDL_PixelFormat *srcfmt;
  1914 
  1915 	/* Set up some basic variables */
  1916 	width = info->d_width;
  1917 	height = info->d_height;
  1918 	src = info->s_pixels;
  1919 	srcskip = info->s_skip;
  1920 	dst = info->d_pixels;
  1921 	dstskip = info->d_skip;
  1922 	map = info->table;
  1923 	srcfmt = info->src;
  1924 	srcbpp = srcfmt->BytesPerPixel;
  1925 
  1926 	if ( map == NULL ) {
  1927 		while ( height-- ) {
  1928 #ifdef USE_DUFFS_LOOP
  1929 			DUFFS_LOOP(
  1930 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1931 								sR, sG, sB);
  1932 				if ( 1 ) {
  1933 				  	/* Pack RGB into 8bit pixel */
  1934 				  	*dst = ((sR>>5)<<(3+2))|
  1935 					        ((sG>>5)<<(2)) |
  1936 					        ((sB>>6)<<(0)) ;
  1937 				}
  1938 				dst++;
  1939 				src += srcbpp;
  1940 			, width);
  1941 #else
  1942 			for ( c=width; c; --c ) {
  1943 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1944 								sR, sG, sB);
  1945 				if ( 1 ) {
  1946 				  	/* Pack RGB into 8bit pixel */
  1947 				  	*dst = ((sR>>5)<<(3+2))|
  1948 					        ((sG>>5)<<(2)) |
  1949 					        ((sB>>6)<<(0)) ;
  1950 				}
  1951 				dst++;
  1952 				src += srcbpp;
  1953 			}
  1954 #endif
  1955 			src += srcskip;
  1956 			dst += dstskip;
  1957 		}
  1958 	} else {
  1959 		while ( height-- ) {
  1960 #ifdef USE_DUFFS_LOOP
  1961 			DUFFS_LOOP(
  1962 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1963 								sR, sG, sB);
  1964 				if ( 1 ) {
  1965 				  	/* Pack RGB into 8bit pixel */
  1966 				  	*dst = map[((sR>>5)<<(3+2))|
  1967 						   ((sG>>5)<<(2))  |
  1968 						   ((sB>>6)<<(0))  ];
  1969 				}
  1970 				dst++;
  1971 				src += srcbpp;
  1972 			, width);
  1973 #else
  1974 			for ( c=width; c; --c ) {
  1975 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1976 								sR, sG, sB);
  1977 				if ( 1 ) {
  1978 				  	/* Pack RGB into 8bit pixel */
  1979 				  	*dst = map[((sR>>5)<<(3+2))|
  1980 						   ((sG>>5)<<(2))  |
  1981 						   ((sB>>6)<<(0))  ];
  1982 				}
  1983 				dst++;
  1984 				src += srcbpp;
  1985 			}
  1986 #endif /* USE_DUFFS_LOOP */
  1987 			src += srcskip;
  1988 			dst += dstskip;
  1989 		}
  1990 	}
  1991 }
  1992 static void BlitNtoN(SDL_BlitInfo *info)
  1993 {
  1994 	int width = info->d_width;
  1995 	int height = info->d_height;
  1996 	Uint8 *src = info->s_pixels;
  1997 	int srcskip = info->s_skip;
  1998 	Uint8 *dst = info->d_pixels;
  1999 	int dstskip = info->d_skip;
  2000 	SDL_PixelFormat *srcfmt = info->src;
  2001 	int srcbpp = srcfmt->BytesPerPixel;
  2002 	SDL_PixelFormat *dstfmt = info->dst;
  2003 	int dstbpp = dstfmt->BytesPerPixel;
  2004 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2005 
  2006 	while ( height-- ) {
  2007 		DUFFS_LOOP(
  2008 		{
  2009 		        Uint32 Pixel;
  2010 			unsigned sR;
  2011 			unsigned sG;
  2012 			unsigned sB;
  2013 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2014 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2015 			dst += dstbpp;
  2016 			src += srcbpp;
  2017 		},
  2018 		width);
  2019 		src += srcskip;
  2020 		dst += dstskip;
  2021 	}
  2022 }
  2023 
  2024 static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
  2025 {
  2026 	int width = info->d_width;
  2027 	int height = info->d_height;
  2028 	Uint8 *src = info->s_pixels;
  2029 	int srcskip = info->s_skip;
  2030 	Uint8 *dst = info->d_pixels;
  2031 	int dstskip = info->d_skip;
  2032 	SDL_PixelFormat *srcfmt = info->src;
  2033 	int srcbpp = srcfmt->BytesPerPixel;
  2034 	SDL_PixelFormat *dstfmt = info->dst;
  2035 	int dstbpp = dstfmt->BytesPerPixel;
  2036 	int c;
  2037 
  2038 	/* FIXME: should map alpha to [0..255] correctly! */
  2039 	while ( height-- ) {
  2040 		for ( c=width; c; --c ) {
  2041 		        Uint32 Pixel;
  2042 			unsigned sR, sG, sB, sA;
  2043 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2044 				      sR, sG, sB, sA);
  2045 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2046 				      sR, sG, sB, sA);
  2047 			dst += dstbpp;
  2048 			src += srcbpp;
  2049 		}
  2050 		src += srcskip;
  2051 		dst += dstskip;
  2052 	}
  2053 }
  2054 
  2055 static void BlitNto1Key(SDL_BlitInfo *info)
  2056 {
  2057 	int width = info->d_width;
  2058 	int height = info->d_height;
  2059 	Uint8 *src = info->s_pixels;
  2060 	int srcskip = info->s_skip;
  2061 	Uint8 *dst = info->d_pixels;
  2062 	int dstskip = info->d_skip;
  2063 	SDL_PixelFormat *srcfmt = info->src;
  2064 	const Uint8 *palmap = info->table;
  2065 	Uint32 ckey = srcfmt->colorkey;
  2066 	Uint32 rgbmask = ~srcfmt->Amask;
  2067 	int srcbpp;
  2068 	Uint32 Pixel;
  2069 	Uint8  sR, sG, sB;
  2070 
  2071 	/* Set up some basic variables */
  2072 	srcbpp = srcfmt->BytesPerPixel;
  2073 	ckey &= rgbmask;
  2074 
  2075 	if ( palmap == NULL ) {
  2076 		while ( height-- ) {
  2077 			DUFFS_LOOP(
  2078 			{
  2079 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2080 								sR, sG, sB);
  2081 				if ( (Pixel & rgbmask) != ckey ) {
  2082 				  	/* Pack RGB into 8bit pixel */
  2083 				  	*dst = ((sR>>5)<<(3+2))|
  2084 						((sG>>5)<<(2)) |
  2085 						((sB>>6)<<(0)) ;
  2086 				}
  2087 				dst++;
  2088 				src += srcbpp;
  2089 			},
  2090 			width);
  2091 			src += srcskip;
  2092 			dst += dstskip;
  2093 		}
  2094 	} else {
  2095 		while ( height-- ) {
  2096 			DUFFS_LOOP(
  2097 			{
  2098 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2099 								sR, sG, sB);
  2100 				if ( (Pixel & rgbmask) != ckey ) {
  2101 				  	/* Pack RGB into 8bit pixel */
  2102 				  	*dst = palmap[((sR>>5)<<(3+2))|
  2103 							((sG>>5)<<(2))  |
  2104 							((sB>>6)<<(0))  ];
  2105 				}
  2106 				dst++;
  2107 				src += srcbpp;
  2108 			},
  2109 			width);
  2110 			src += srcskip;
  2111 			dst += dstskip;
  2112 		}
  2113 	}
  2114 }
  2115 
  2116 static void Blit2to2Key(SDL_BlitInfo *info)
  2117 {
  2118 	int width = info->d_width;
  2119 	int height = info->d_height;
  2120 	Uint16 *srcp = (Uint16 *)info->s_pixels;
  2121 	int srcskip = info->s_skip;
  2122 	Uint16 *dstp = (Uint16 *)info->d_pixels;
  2123 	int dstskip = info->d_skip;
  2124 	Uint32 ckey = info->src->colorkey;
  2125 	Uint32 rgbmask = ~info->src->Amask;
  2126 
  2127 	/* Set up some basic variables */
  2128         srcskip /= 2;
  2129         dstskip /= 2;
  2130 	ckey &= rgbmask;
  2131 
  2132 	while ( height-- ) {
  2133 		DUFFS_LOOP(
  2134 		{
  2135 			if ( (*srcp & rgbmask) != ckey ) {
  2136 				*dstp = *srcp;
  2137 			}
  2138 			dstp++;
  2139 			srcp++;
  2140 		},
  2141 		width);
  2142 		srcp += srcskip;
  2143 		dstp += dstskip;
  2144 	}
  2145 }
  2146 
  2147 static void BlitNtoNKey(SDL_BlitInfo *info)
  2148 {
  2149 	int width = info->d_width;
  2150 	int height = info->d_height;
  2151 	Uint8 *src = info->s_pixels;
  2152 	int srcskip = info->s_skip;
  2153 	Uint8 *dst = info->d_pixels;
  2154 	int dstskip = info->d_skip;
  2155 	Uint32 ckey = info->src->colorkey;
  2156 	SDL_PixelFormat *srcfmt = info->src;
  2157 	SDL_PixelFormat *dstfmt = info->dst;
  2158 	int srcbpp = srcfmt->BytesPerPixel;
  2159 	int dstbpp = dstfmt->BytesPerPixel;
  2160 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2161 
  2162 	while ( height-- ) {
  2163 		DUFFS_LOOP(
  2164 		{
  2165 		        Uint32 Pixel;
  2166 			unsigned sR;
  2167 			unsigned sG;
  2168 			unsigned sB;
  2169 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2170 			if ( Pixel != ckey ) {
  2171 			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2172 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2173 					      sR, sG, sB, alpha);
  2174 			}
  2175 			dst += dstbpp;
  2176 			src += srcbpp;
  2177 		},
  2178 		width);
  2179 		src += srcskip;
  2180 		dst += dstskip;
  2181 	}
  2182 }
  2183 
  2184 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
  2185 {
  2186 	int width = info->d_width;
  2187 	int height = info->d_height;
  2188 	Uint8 *src = info->s_pixels;
  2189 	int srcskip = info->s_skip;
  2190 	Uint8 *dst = info->d_pixels;
  2191 	int dstskip = info->d_skip;
  2192 	Uint32 ckey = info->src->colorkey;
  2193 	SDL_PixelFormat *srcfmt = info->src;
  2194 	SDL_PixelFormat *dstfmt = info->dst;
  2195 	Uint32 rgbmask = ~srcfmt->Amask;
  2196 
  2197 	Uint8 srcbpp;
  2198 	Uint8 dstbpp;
  2199 	Uint32 Pixel;
  2200 	Uint8  sR, sG, sB, sA;
  2201 
  2202 	/* Set up some basic variables */
  2203 	srcbpp = srcfmt->BytesPerPixel;
  2204 	dstbpp = dstfmt->BytesPerPixel;
  2205 	ckey &= rgbmask;
  2206 
  2207 	/* FIXME: should map alpha to [0..255] correctly! */
  2208 	while ( height-- ) {
  2209 		DUFFS_LOOP(
  2210 		{
  2211 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2212 				      sR, sG, sB, sA);
  2213 			if ( (Pixel & rgbmask) != ckey ) {
  2214 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2215 						sR, sG, sB, sA);
  2216 			}
  2217 			dst += dstbpp;
  2218 			src += srcbpp;
  2219 		},
  2220 		width);
  2221 		src += srcskip;
  2222 		dst += dstskip;
  2223 	}
  2224 }
  2225 
  2226 /* Normal N to N optimized blitters */
  2227 struct blit_table {
  2228 	Uint32 srcR, srcG, srcB;
  2229 	int dstbpp;
  2230 	Uint32 dstR, dstG, dstB;
  2231 	Uint32 blit_features;
  2232 	void *aux_data;
  2233 	SDL_loblit blitfunc;
  2234 	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
  2235 };
  2236 static const struct blit_table normal_blit_1[] = {
  2237 	/* Default for 8-bit RGB source, an invalid combination */
  2238 	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
  2239 };
  2240 static const struct blit_table normal_blit_2[] = {
  2241 #ifdef USE_ASMBLIT
  2242     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
  2243       0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
  2244     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
  2245       0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
  2246     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
  2247       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
  2248 #endif
  2249 #ifdef USE_ALTIVEC_BLITTERS
  2250     /* has-altivec */
  2251     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2252       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2253     { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2254       2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2255 #endif
  2256     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
  2257       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
  2258     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2259       0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
  2260     { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2261       0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
  2262     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2263       0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
  2264 
  2265     /* Default for 16-bit RGB source, used if no other blitter matches */
  2266     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2267 };
  2268 static const struct blit_table normal_blit_3[] = {
  2269 	/* Default for 24-bit RGB source, never optimized */
  2270     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2271 };
  2272 static const struct blit_table normal_blit_4[] = {
  2273 #ifdef USE_ASMBLIT
  2274     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2275       1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
  2276     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2277       0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
  2278     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2279       1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
  2280     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2281       0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
  2282     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2283       1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
  2284     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2285       0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
  2286     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2287       1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
  2288     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2289       0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
  2290     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
  2291       0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
  2292     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
  2293       0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
  2294     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2295       0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
  2296     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2297       0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
  2298     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2299       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
  2300 #else
  2301 #ifdef USE_ALTIVEC_BLITTERS
  2302     /* has-altivec | dont-use-prefetch */
  2303     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2304       6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2305     /* has-altivec */
  2306     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2307       2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2308     /* has-altivec */
  2309     { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
  2310       2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
  2311 #endif
  2312     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2313       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
  2314     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2315       0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
  2316 #endif
  2317 	/* Default for 32-bit RGB source, used if no other blitter matches */
  2318 	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2319 };
  2320 static const struct blit_table *normal_blit[] = {
  2321 	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2322 };
  2323 
  2324 /* Mask matches table, or table entry is zero */
  2325 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2326 
  2327 SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
  2328 {
  2329 	struct private_swaccel *sdata;
  2330 	SDL_PixelFormat *srcfmt;
  2331 	SDL_PixelFormat *dstfmt;
  2332 	const struct blit_table *table;
  2333 	int which;
  2334 	SDL_loblit blitfun;
  2335 
  2336 	/* Set up data for choosing the blit */
  2337 	sdata = surface->map->sw_data;
  2338 	srcfmt = surface->format;
  2339 	dstfmt = surface->map->dst->format;
  2340 
  2341 	if ( blit_index & 2 ) {
  2342 	        /* alpha or alpha+colorkey */
  2343 	        return SDL_CalculateAlphaBlit(surface, blit_index);
  2344 	}
  2345 
  2346 	/* We don't support destinations less than 8-bits */
  2347 	if ( dstfmt->BitsPerPixel < 8 ) {
  2348 		return(NULL);
  2349 	}
  2350 	
  2351 	if(blit_index == 1) {
  2352 	    /* colorkey blit: Here we don't have too many options, mostly
  2353 	       because RLE is the preferred fast way to deal with this.
  2354 	       If a particular case turns out to be useful we'll add it. */
  2355 
  2356 	    if(srcfmt->BytesPerPixel == 2
  2357 	       && surface->map->identity)
  2358 		return Blit2to2Key;
  2359 	    else if(dstfmt->BytesPerPixel == 1)
  2360 		return BlitNto1Key;
  2361 	    else {
  2362 #ifdef USE_ALTIVEC_BLITTERS
  2363         if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
  2364             return Blit32to32KeyAltivec;
  2365         } else
  2366 #endif
  2367 
  2368 		if(srcfmt->Amask && dstfmt->Amask)
  2369 		    return BlitNtoNKeyCopyAlpha;
  2370 		else
  2371 		    return BlitNtoNKey;
  2372 	    }
  2373 	}
  2374 
  2375 	blitfun = NULL;
  2376 	if ( dstfmt->BitsPerPixel == 8 ) {
  2377 		/* We assume 8-bit destinations are palettized */
  2378 		if ( (srcfmt->BytesPerPixel == 4) &&
  2379 		     (srcfmt->Rmask == 0x00FF0000) &&
  2380 		     (srcfmt->Gmask == 0x0000FF00) &&
  2381 		     (srcfmt->Bmask == 0x000000FF) ) {
  2382 			if ( surface->map->table ) {
  2383 				blitfun = Blit_RGB888_index8_map;
  2384 			} else {
  2385 #ifdef USE_ASMBLIT
  2386 				sdata->aux_data = ConvertX86p32_8RGB332;
  2387 				blitfun = ConvertX86;
  2388 #else
  2389 				blitfun = Blit_RGB888_index8;
  2390 #endif
  2391 			}
  2392 		} else {
  2393 			blitfun = BlitNto1;
  2394 		}
  2395 	} else {
  2396 		/* Now the meat, choose the blitter we want */
  2397 		int a_need = 0;
  2398 		if(dstfmt->Amask)
  2399 		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2400 		table = normal_blit[srcfmt->BytesPerPixel-1];
  2401 		for ( which=0; table[which].dstbpp; ++which ) {
  2402 			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2403 			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2404 			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2405 			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2406 			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2407 			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2408 			    dstfmt->BytesPerPixel == table[which].dstbpp &&
  2409 			    (a_need & table[which].alpha) == a_need &&
  2410 			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
  2411 				break;
  2412 		}
  2413 		sdata->aux_data = table[which].aux_data;
  2414 		blitfun = table[which].blitfunc;
  2415 		if(a_need == COPY_ALPHA && blitfun == BlitNtoN)
  2416 		    blitfun = BlitNtoNCopyAlpha;
  2417 	}
  2418 
  2419 #ifdef DEBUG_ASM
  2420 #ifdef USE_ASMBLIT
  2421 	if ( blitfun == ConvertMMX )
  2422 		fprintf(stderr, "Using mmx blit\n");
  2423 	else
  2424 	if ( blitfun == ConvertX86 )
  2425 		fprintf(stderr, "Using asm blit\n");
  2426 	else
  2427 #endif
  2428 	if ( (blitfun == SDL_BlitNtoN) || (blitfun == SDL_BlitNto1) )
  2429 		fprintf(stderr, "Using C blit\n");
  2430 	else
  2431 		fprintf(stderr, "Using optimized C blit\n");
  2432 #endif /* DEBUG_ASM */
  2433 
  2434 	return(blitfun);
  2435 }