src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Tue, 07 Feb 2006 10:40:14 +0000
changeset 1339 62802d9d7c87
parent 1338 604d73db6802
child 1358 c71e05b4dc2e
permissions -rw-r--r--
*** empty log message ***
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 #include "SDL_types.h"
    24 #include "SDL_string.h"
    25 #include "SDL_video.h"
    26 #include "SDL_blit.h"
    27 #include "SDL_byteorder.h"
    28 #include "SDL_cpuinfo.h"
    29 
    30 /* Functions to blit from N-bit surfaces to other surfaces */
    31 
    32 #ifdef USE_ALTIVEC_BLITTERS
    33 #ifdef HAVE_ALTIVEC_H
    34 #include <altivec.h>
    35 #endif
    36 #define assert(X)
    37 #ifdef MACOSX
    38 #include <sys/sysctl.h>
    39 static size_t GetL3CacheSize( void )
    40 {
    41     const char key[] = "hw.l3cachesize";
    42     u_int64_t result = 0;
    43     size_t typeSize = sizeof( result );
    44 
    45 
    46     int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
    47     if( 0 != err ) return 0;
    48 
    49     return result;
    50 }
    51 #else
    52 static size_t GetL3CacheSize( void )
    53 {
    54     /* XXX: Just guess G4 */
    55     return 2097152;
    56 }
    57 #endif /* MACOSX */
    58 
    59 #if ((defined MACOSX) && (__GNUC__ < 4))
    60     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    61         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    62     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    63         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    64 #else
    65     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    66         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    67     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    68         (vector unsigned short) { a,b,c,d,e,f,g,h }
    69 #endif
    70 
    71 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    72 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    73                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    74                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    75                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    76                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    77 
    78 #define MAKE8888(dstfmt, r, g, b, a)  \
    79     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    80       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    81       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    82       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    83 
    84 /*
    85  * Data Stream Touch...Altivec cache prefetching.
    86  *
    87  *  Don't use this on a G5...however, the speed boost is very significant
    88  *   on a G4.
    89  */
    90 #define DST_CHAN_SRC 1
    91 #define DST_CHAN_DEST 2
    92 
    93 /* macro to set DST control word value... */
    94 #define DST_CTRL(size, count, stride) \
    95     (((size) << 24) | ((count) << 16) | (stride))
    96 
    97 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    98     ? vec_lvsl(0, src) \
    99     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   100 
   101 /* Calculate the permute vector used for 32->32 swizzling */
   102 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
   103                                   const SDL_PixelFormat *dstfmt)
   104 {
   105     /*
   106     * We have to assume that the bits that aren't used by other
   107      *  colors is alpha, and it's one complete byte, since some formats
   108      *  leave alpha with a zero mask, but we should still swizzle the bits.
   109      */
   110     /* ARGB */
   111     const static struct SDL_PixelFormat default_pixel_format = {
   112         NULL, 0, 0,
   113         0, 0, 0, 0,
   114         16, 8, 0, 24,
   115         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   116         0, 0};
   117     if (!srcfmt) {
   118         srcfmt = &default_pixel_format;
   119     }
   120     if (!dstfmt) {
   121         dstfmt = &default_pixel_format;
   122     }
   123     vector unsigned char plus = VECUINT8_LITERAL(
   124                                       0x00, 0x00, 0x00, 0x00,
   125                                       0x04, 0x04, 0x04, 0x04,
   126                                       0x08, 0x08, 0x08, 0x08,
   127                                       0x0C, 0x0C, 0x0C, 0x0C );
   128     vector unsigned char vswiz;
   129     vector unsigned int srcvec;
   130 #define RESHIFT(X) (3 - ((X) >> 3))
   131     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   132     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   133     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   134     Uint32 amask;
   135     /* Use zero for alpha if either surface doesn't have alpha */
   136     if (dstfmt->Amask) {
   137         amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
   138     } else {    
   139         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
   140     }           
   141 #undef RESHIFT  
   142     ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
   143     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
   144     return(vswiz);
   145 }
   146 
   147 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
   148 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
   149     int height = info->d_height;
   150     Uint8 *src = (Uint8 *) info->s_pixels;
   151     int srcskip = info->s_skip;
   152     Uint8 *dst = (Uint8 *) info->d_pixels;
   153     int dstskip = info->d_skip;
   154     SDL_PixelFormat *srcfmt = info->src;
   155     vector unsigned char valpha = vec_splat_u8(0);
   156     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   157     vector unsigned char vgmerge = VECUINT8_LITERAL(
   158         0x00, 0x02, 0x00, 0x06,
   159         0x00, 0x0a, 0x00, 0x0e,
   160         0x00, 0x12, 0x00, 0x16,
   161         0x00, 0x1a, 0x00, 0x1e);
   162     vector unsigned short v1 = vec_splat_u16(1);
   163     vector unsigned short v3 = vec_splat_u16(3);
   164     vector unsigned short v3f = VECUINT16_LITERAL(
   165         0x003f, 0x003f, 0x003f, 0x003f,
   166         0x003f, 0x003f, 0x003f, 0x003f);
   167     vector unsigned short vfc = VECUINT16_LITERAL(
   168         0x00fc, 0x00fc, 0x00fc, 0x00fc,
   169         0x00fc, 0x00fc, 0x00fc, 0x00fc);
   170     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
   171     vf800 = vec_sl(vf800, vec_splat_u16(8));
   172 
   173     while (height--) {
   174         vector unsigned char valigner;
   175         vector unsigned char voverflow;
   176         vector unsigned char vsrc;
   177 
   178         int width = info->d_width;
   179         int extrawidth;
   180 
   181         /* do scalar until we can align... */
   182 #define ONE_PIXEL_BLEND(condition, widthvar) \
   183         while (condition) { \
   184             Uint32 Pixel; \
   185             unsigned sR, sG, sB, sA; \
   186             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   187                           sR, sG, sB, sA); \
   188             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   189                                 ((sG << 3) & 0x000007E0) | \
   190                                 ((sB >> 3) & 0x0000001F)); \
   191             dst += 2; \
   192             src += 4; \
   193             widthvar--; \
   194         }
   195 
   196         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   197 
   198         /* After all that work, here's the vector part! */
   199         extrawidth = (width % 8);  /* trailing unaligned stores */
   200         width -= extrawidth;
   201         vsrc = vec_ld(0, src);
   202         valigner = VEC_ALIGNER(src);
   203 
   204         while (width) {
   205             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   206             vector unsigned int vsrc1, vsrc2;
   207             vector unsigned char vdst;
   208 
   209             voverflow = vec_ld(15, src);
   210             vsrc = vec_perm(vsrc, voverflow, valigner);
   211             vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   212             src += 16;
   213             vsrc = voverflow;
   214             voverflow = vec_ld(15, src);
   215             vsrc = vec_perm(vsrc, voverflow, valigner);
   216             vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   217             /* 1555 */
   218             vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
   219             vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
   220             vgpixel = vec_and(vgpixel, vfc);
   221             vgpixel = vec_sl(vgpixel, v3);
   222             vrpixel = vec_sl(vpixel, v1);
   223             vrpixel = vec_and(vrpixel, vf800);
   224             vbpixel = vec_and(vpixel, v3f);
   225             vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
   226             /* 565 */
   227             vdst = vec_or(vdst, (vector unsigned char)vbpixel);
   228             vec_st(vdst, 0, dst);
   229 
   230             width -= 8;
   231             src += 16;
   232             dst += 16;
   233             vsrc = voverflow;
   234         }
   235 
   236         assert(width == 0);
   237 
   238         /* do scalar until we can align... */
   239         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   240 #undef ONE_PIXEL_BLEND
   241 
   242         src += srcskip;  /* move to next row, accounting for pitch. */
   243         dst += dstskip;
   244     }
   245 
   246 
   247 }
   248 
   249 static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
   250     int height = info->d_height;
   251     Uint8 *src = (Uint8 *) info->s_pixels;
   252     int srcskip = info->s_skip;
   253     Uint8 *dst = (Uint8 *) info->d_pixels;
   254     int dstskip = info->d_skip;
   255     SDL_PixelFormat *srcfmt = info->src;
   256     SDL_PixelFormat *dstfmt = info->dst;
   257     unsigned alpha;
   258     vector unsigned char valpha;
   259     vector unsigned char vpermute;
   260     vector unsigned short vf800;
   261     vector unsigned int v8 = vec_splat_u32(8);
   262     vector unsigned int v16 = vec_add(v8, v8);
   263     vector unsigned short v2 = vec_splat_u16(2);
   264     vector unsigned short v3 = vec_splat_u16(3);
   265     /* 
   266         0x10 - 0x1f is the alpha
   267         0x00 - 0x0e evens are the red
   268         0x01 - 0x0f odds are zero
   269     */
   270     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   271         0x10, 0x00, 0x01, 0x01,
   272         0x10, 0x02, 0x01, 0x01,
   273         0x10, 0x04, 0x01, 0x01,
   274         0x10, 0x06, 0x01, 0x01
   275     );
   276     vector unsigned char vredalpha2 = (vector unsigned char) (
   277         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   278     );
   279     /*
   280         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   281         0x11 - 0x0f odds are blue
   282     */
   283     vector unsigned char vblue1 = VECUINT8_LITERAL(
   284         0x00, 0x01, 0x02, 0x11,
   285         0x04, 0x05, 0x06, 0x13,
   286         0x08, 0x09, 0x0a, 0x15,
   287         0x0c, 0x0d, 0x0e, 0x17
   288     );
   289     vector unsigned char vblue2 = (vector unsigned char)(
   290         vec_add((vector unsigned int)vblue1, v8)
   291     );
   292     /*
   293         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   294         0x10 - 0x0e evens are green
   295     */
   296     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   297         0x00, 0x01, 0x10, 0x03,
   298         0x04, 0x05, 0x12, 0x07,
   299         0x08, 0x09, 0x14, 0x0b,
   300         0x0c, 0x0d, 0x16, 0x0f
   301     );
   302     vector unsigned char vgreen2 = (vector unsigned char)(
   303         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   304     );
   305     
   306 
   307     assert(srcfmt->BytesPerPixel == 2);
   308     assert(dstfmt->BytesPerPixel == 4);
   309 
   310     vf800 = (vector unsigned short)vec_splat_u8(-7);
   311     vf800 = vec_sl(vf800, vec_splat_u16(8));
   312 
   313     if (dstfmt->Amask && srcfmt->alpha) {
   314         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   315         valpha = vec_splat(valpha, 0);
   316     } else {
   317         alpha = 0;
   318         valpha = vec_splat_u8(0);
   319     }
   320 
   321     vpermute = calc_swizzle32(NULL, dstfmt);
   322     while (height--) {
   323         vector unsigned char valigner;
   324         vector unsigned char voverflow;
   325         vector unsigned char vsrc;
   326 
   327         int width = info->d_width;
   328         int extrawidth;
   329 
   330         /* do scalar until we can align... */
   331 #define ONE_PIXEL_BLEND(condition, widthvar) \
   332         while (condition) { \
   333             unsigned sR, sG, sB; \
   334             unsigned short Pixel = *((unsigned short *)src); \
   335             sR = (Pixel >> 8) & 0xf8; \
   336             sG = (Pixel >> 3) & 0xfc; \
   337             sB = (Pixel << 3) & 0xf8; \
   338             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   339             src += 2; \
   340             dst += 4; \
   341             widthvar--; \
   342         }
   343         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   344 
   345         /* After all that work, here's the vector part! */
   346         extrawidth = (width % 8);  /* trailing unaligned stores */
   347         width -= extrawidth;
   348         vsrc = vec_ld(0, src);
   349         valigner = VEC_ALIGNER(src);
   350 
   351         while (width) {
   352             vector unsigned short vR, vG, vB;
   353             vector unsigned char vdst1, vdst2;
   354 
   355             voverflow = vec_ld(15, src);
   356             vsrc = vec_perm(vsrc, voverflow, valigner);
   357 
   358             vR = vec_and((vector unsigned short)vsrc, vf800);
   359             vB = vec_sl((vector unsigned short)vsrc, v3);
   360             vG = vec_sl(vB, v2);
   361 
   362             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   363             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   364             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   365             vdst1 = vec_perm(vdst1, valpha, vpermute);
   366             vec_st(vdst1, 0, dst);
   367 
   368             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   369             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   370             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   371             vdst2 = vec_perm(vdst2, valpha, vpermute);
   372             vec_st(vdst2, 16, dst);
   373             
   374             width -= 8;
   375             dst += 32;
   376             src += 16;
   377             vsrc = voverflow;
   378         }
   379 
   380         assert(width == 0);
   381 
   382 
   383         /* do scalar until we can align... */
   384         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   385 #undef ONE_PIXEL_BLEND
   386 
   387         src += srcskip;  /* move to next row, accounting for pitch. */
   388         dst += dstskip;
   389     }
   390 
   391 }
   392 
   393 
   394 static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
   395     int height = info->d_height;
   396     Uint8 *src = (Uint8 *) info->s_pixels;
   397     int srcskip = info->s_skip;
   398     Uint8 *dst = (Uint8 *) info->d_pixels;
   399     int dstskip = info->d_skip;
   400     SDL_PixelFormat *srcfmt = info->src;
   401     SDL_PixelFormat *dstfmt = info->dst;
   402     unsigned alpha;
   403     vector unsigned char valpha;
   404     vector unsigned char vpermute;
   405     vector unsigned short vf800;
   406     vector unsigned int v8 = vec_splat_u32(8);
   407     vector unsigned int v16 = vec_add(v8, v8);
   408     vector unsigned short v1 = vec_splat_u16(1);
   409     vector unsigned short v3 = vec_splat_u16(3);
   410     /* 
   411         0x10 - 0x1f is the alpha
   412         0x00 - 0x0e evens are the red
   413         0x01 - 0x0f odds are zero
   414     */
   415     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   416         0x10, 0x00, 0x01, 0x01,
   417         0x10, 0x02, 0x01, 0x01,
   418         0x10, 0x04, 0x01, 0x01,
   419         0x10, 0x06, 0x01, 0x01
   420     );
   421     vector unsigned char vredalpha2 = (vector unsigned char)(
   422         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   423     );
   424     /*
   425         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   426         0x11 - 0x0f odds are blue
   427     */
   428     vector unsigned char vblue1 = VECUINT8_LITERAL(
   429         0x00, 0x01, 0x02, 0x11,
   430         0x04, 0x05, 0x06, 0x13,
   431         0x08, 0x09, 0x0a, 0x15,
   432         0x0c, 0x0d, 0x0e, 0x17
   433     );
   434     vector unsigned char vblue2 = (vector unsigned char)(
   435         vec_add((vector unsigned int)vblue1, v8)
   436     );
   437     /*
   438         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   439         0x10 - 0x0e evens are green
   440     */
   441     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   442         0x00, 0x01, 0x10, 0x03,
   443         0x04, 0x05, 0x12, 0x07,
   444         0x08, 0x09, 0x14, 0x0b,
   445         0x0c, 0x0d, 0x16, 0x0f
   446     );
   447     vector unsigned char vgreen2 = (vector unsigned char)(
   448         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   449     );
   450     
   451 
   452     assert(srcfmt->BytesPerPixel == 2);
   453     assert(dstfmt->BytesPerPixel == 4);
   454 
   455     vf800 = (vector unsigned short)vec_splat_u8(-7);
   456     vf800 = vec_sl(vf800, vec_splat_u16(8));
   457 
   458     if (dstfmt->Amask && srcfmt->alpha) {
   459         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   460         valpha = vec_splat(valpha, 0);
   461     } else {
   462         alpha = 0;
   463         valpha = vec_splat_u8(0);
   464     }
   465 
   466     vpermute = calc_swizzle32(NULL, dstfmt);
   467     while (height--) {
   468         vector unsigned char valigner;
   469         vector unsigned char voverflow;
   470         vector unsigned char vsrc;
   471 
   472         int width = info->d_width;
   473         int extrawidth;
   474 
   475         /* do scalar until we can align... */
   476 #define ONE_PIXEL_BLEND(condition, widthvar) \
   477         while (condition) { \
   478             unsigned sR, sG, sB; \
   479             unsigned short Pixel = *((unsigned short *)src); \
   480             sR = (Pixel >> 7) & 0xf8; \
   481             sG = (Pixel >> 2) & 0xf8; \
   482             sB = (Pixel << 3) & 0xf8; \
   483             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   484             src += 2; \
   485             dst += 4; \
   486             widthvar--; \
   487         }
   488         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   489 
   490         /* After all that work, here's the vector part! */
   491         extrawidth = (width % 8);  /* trailing unaligned stores */
   492         width -= extrawidth;
   493         vsrc = vec_ld(0, src);
   494         valigner = VEC_ALIGNER(src);
   495 
   496         while (width) {
   497             vector unsigned short vR, vG, vB;
   498             vector unsigned char vdst1, vdst2;
   499 
   500             voverflow = vec_ld(15, src);
   501             vsrc = vec_perm(vsrc, voverflow, valigner);
   502 
   503             vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
   504             vB = vec_sl((vector unsigned short)vsrc, v3);
   505             vG = vec_sl(vB, v3);
   506 
   507             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   508             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   509             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   510             vdst1 = vec_perm(vdst1, valpha, vpermute);
   511             vec_st(vdst1, 0, dst);
   512 
   513             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   514             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   515             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   516             vdst2 = vec_perm(vdst2, valpha, vpermute);
   517             vec_st(vdst2, 16, dst);
   518             
   519             width -= 8;
   520             dst += 32;
   521             src += 16;
   522             vsrc = voverflow;
   523         }
   524 
   525         assert(width == 0);
   526 
   527 
   528         /* do scalar until we can align... */
   529         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   530 #undef ONE_PIXEL_BLEND
   531 
   532         src += srcskip;  /* move to next row, accounting for pitch. */
   533         dst += dstskip;
   534     }
   535 
   536 }
   537 
   538 static void BlitNtoNKey(SDL_BlitInfo *info);
   539 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
   540 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
   541 {
   542     int height = info->d_height;
   543     Uint32 *srcp = (Uint32 *) info->s_pixels;
   544     int srcskip = info->s_skip;
   545     Uint32 *dstp = (Uint32 *) info->d_pixels;
   546     int dstskip = info->d_skip;
   547     SDL_PixelFormat *srcfmt = info->src;
   548     int srcbpp = srcfmt->BytesPerPixel;
   549     SDL_PixelFormat *dstfmt = info->dst;
   550     int dstbpp = dstfmt->BytesPerPixel;
   551     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   552 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   553     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   554 	Uint32 ckey = info->src->colorkey;
   555     vector unsigned int valpha;
   556     vector unsigned char vpermute;
   557     vector unsigned char vzero;
   558     vector unsigned int vckey;
   559     vector unsigned int vrgbmask;
   560     vpermute = calc_swizzle32(srcfmt, dstfmt);
   561     if (info->d_width < 16) {
   562         if(copy_alpha) {
   563             return BlitNtoNKeyCopyAlpha(info);
   564         } else {
   565             return BlitNtoNKey(info);
   566         }
   567     }
   568     vzero = vec_splat_u8(0);
   569     if (alpha) {
   570         ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
   571         valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
   572     } else {
   573         valpha = (vector unsigned int)vzero;
   574     }
   575     ckey &= rgbmask;
   576     ((unsigned int *)(char*)&vckey)[0] = ckey;
   577     vckey = vec_splat(vckey, 0);
   578     ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
   579     vrgbmask = vec_splat(vrgbmask, 0);
   580 
   581     while (height--) {
   582 #define ONE_PIXEL_BLEND(condition, widthvar) \
   583         if (copy_alpha) { \
   584             while (condition) { \
   585                 Uint32 Pixel; \
   586                 unsigned sR, sG, sB, sA; \
   587                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   588                           sR, sG, sB, sA); \
   589                 if ( (Pixel & rgbmask) != ckey ) { \
   590                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   591                             sR, sG, sB, sA); \
   592                 } \
   593                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   594                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   595                 widthvar--; \
   596             } \
   597         } else { \
   598             while (condition) { \
   599                 Uint32 Pixel; \
   600                 unsigned sR, sG, sB; \
   601                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   602                 if ( Pixel != ckey ) { \
   603                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   604                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   605                               sR, sG, sB, alpha); \
   606                 } \
   607                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   608                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   609                 widthvar--; \
   610             } \
   611         }
   612         int width = info->d_width;
   613         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   614         assert(width > 0);
   615         if (width > 0) {
   616             int extrawidth = (width % 4);
   617             vector unsigned char valigner = VEC_ALIGNER(srcp);
   618             vector unsigned int vs = vec_ld(0, srcp);
   619             width -= extrawidth;
   620             assert(width >= 4);
   621             while (width) {
   622                 vector unsigned char vsel;
   623                 vector unsigned int vd;
   624                 vector unsigned int voverflow = vec_ld(15, srcp);
   625                 /* load the source vec */
   626                 vs = vec_perm(vs, voverflow, valigner);
   627                 /* vsel is set for items that match the key */
   628                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
   629                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
   630                 /* permute the src vec to the dest format */
   631                 vs = vec_perm(vs, valpha, vpermute);
   632                 /* load the destination vec */
   633                 vd = vec_ld(0, dstp);
   634                 /* select the source and dest into vs */
   635                 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
   636                 
   637                 vec_st(vd, 0, dstp);
   638                 srcp += 4;
   639                 width -= 4;
   640                 dstp += 4;
   641                 vs = voverflow;
   642             }
   643             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   644 #undef ONE_PIXEL_BLEND
   645             srcp += srcskip >> 2;
   646             dstp += dstskip >> 2;
   647         }
   648     }
   649 }
   650 
   651 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   652 /* Use this on a G5 */
   653 static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
   654 {
   655     int height = info->d_height;
   656     Uint32 *src = (Uint32 *) info->s_pixels;
   657     int srcskip = info->s_skip;
   658     Uint32 *dst = (Uint32 *) info->d_pixels;
   659     int dstskip = info->d_skip;
   660     SDL_PixelFormat *srcfmt = info->src;
   661     SDL_PixelFormat *dstfmt = info->dst;
   662     vector unsigned int vzero = vec_splat_u32(0);
   663     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   664     if (dstfmt->Amask && !srcfmt->Amask) {
   665         if (srcfmt->alpha) {
   666             vector unsigned char valpha;
   667             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   668             vzero = (vector unsigned int)vec_splat(valpha, 0);
   669         }
   670     }
   671 
   672     assert(srcfmt->BytesPerPixel == 4);
   673     assert(dstfmt->BytesPerPixel == 4);
   674 
   675     while (height--) {
   676         vector unsigned char valigner;
   677         vector unsigned int vbits;
   678         vector unsigned int voverflow;
   679         Uint32 bits;
   680         Uint8 r, g, b, a;
   681 
   682         int width = info->d_width;
   683         int extrawidth;
   684 
   685         /* do scalar until we can align... */
   686         while ((UNALIGNED_PTR(dst)) && (width)) {
   687             bits = *(src++);
   688             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   689             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   690             width--;
   691         }
   692 
   693         /* After all that work, here's the vector part! */
   694         extrawidth = (width % 4);
   695         width -= extrawidth;
   696         valigner = VEC_ALIGNER(src);
   697         vbits = vec_ld(0, src);
   698 
   699        while (width) {
   700             voverflow = vec_ld(15, src);
   701             src += 4;
   702             width -= 4;
   703             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   704             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   705             vec_st(vbits, 0, dst);  /* store it back out. */
   706             dst += 4;
   707             vbits = voverflow;
   708         }
   709 
   710         assert(width == 0);
   711 
   712         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   713         while (extrawidth) {
   714             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   715             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   716             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   717             extrawidth--;
   718         }
   719 
   720         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   721         dst += dstskip >> 2;
   722     }
   723 
   724 }
   725 
   726 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   727 /* Use this on a G4 */
   728 static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
   729 {
   730     const int scalar_dst_lead = sizeof (Uint32) * 4;
   731     const int vector_dst_lead = sizeof (Uint32) * 16;
   732 
   733     int height = info->d_height;
   734     Uint32 *src = (Uint32 *) info->s_pixels;
   735     int srcskip = info->s_skip;
   736     Uint32 *dst = (Uint32 *) info->d_pixels;
   737     int dstskip = info->d_skip;
   738     SDL_PixelFormat *srcfmt = info->src;
   739     SDL_PixelFormat *dstfmt = info->dst;
   740     vector unsigned int vzero = vec_splat_u32(0);
   741     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   742     if (dstfmt->Amask && !srcfmt->Amask) {
   743         if (srcfmt->alpha) {
   744             vector unsigned char valpha;
   745             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   746             vzero = (vector unsigned int)vec_splat(valpha, 0);
   747         }
   748     }
   749 
   750     assert(srcfmt->BytesPerPixel == 4);
   751     assert(dstfmt->BytesPerPixel == 4);
   752 
   753     while (height--) {
   754         vector unsigned char valigner;
   755         vector unsigned int vbits;
   756         vector unsigned int voverflow;
   757         Uint32 bits;
   758         Uint8 r, g, b, a;
   759 
   760         int width = info->d_width;
   761         int extrawidth;
   762 
   763         /* do scalar until we can align... */
   764         while ((UNALIGNED_PTR(dst)) && (width)) {
   765             vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   766             vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   767             bits = *(src++);
   768             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   769             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   770             width--;
   771         }
   772 
   773         /* After all that work, here's the vector part! */
   774         extrawidth = (width % 4);
   775         width -= extrawidth;
   776         valigner = VEC_ALIGNER(src);
   777         vbits = vec_ld(0, src);
   778 
   779         while (width) {
   780             vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   781             vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   782             voverflow = vec_ld(15, src);
   783             src += 4;
   784             width -= 4;
   785             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   786             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   787             vec_st(vbits, 0, dst);  /* store it back out. */
   788             dst += 4;
   789             vbits = voverflow;
   790         }
   791         
   792         assert(width == 0);
   793 
   794         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   795         while (extrawidth) {
   796             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   797             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   798             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   799             extrawidth--;
   800         }
   801 
   802         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   803         dst += dstskip >> 2;
   804     }
   805 
   806     vec_dss(DST_CHAN_SRC);
   807     vec_dss(DST_CHAN_DEST);
   808 }
   809 
   810 static Uint32 GetBlitFeatures( void )
   811 {
   812     static Uint32 features = 0xffffffff;
   813     if (features == 0xffffffff) {
   814         /* Provide an override for testing .. */
   815         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   816         if (override) {
   817             features = 0;
   818             SDL_sscanf(override, "%u", &features);
   819         } else {
   820             features = ( 0
   821                 /* Feature 1 is has-MMX */
   822                 | ((SDL_HasMMX()) ? 1 : 0)
   823                 /* Feature 2 is has-AltiVec */
   824                 | ((SDL_HasAltiVec()) ? 2 : 0)
   825                 /* Feature 4 is dont-use-prefetch */
   826                 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   827                 | ((GetL3CacheSize() == 0) ? 4 : 0)
   828             );
   829         }
   830     }
   831     return features;
   832 }
   833 #else
   834 /* Feature 1 is has-MMX */
   835 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   836 #endif
   837 
   838 /* This is now endian dependent */
   839 #if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
   840 #define HI	1
   841 #define LO	0
   842 #else /* ( SDL_BYTEORDER == SDL_BIG_ENDIAN ) */
   843 #define HI	0
   844 #define LO	1
   845 #endif
   846 
   847 #ifdef USE_ASMBLIT
   848 
   849 /* Heheheh, we coerce Hermes into using SDL blit information */
   850 #define X86_ASSEMBLER
   851 #define HermesConverterInterface	SDL_BlitInfo
   852 #define HermesClearInterface		void
   853 #define STACKCALL
   854 
   855 #include "HeadMMX.h"
   856 #include "HeadX86.h"
   857 
   858 #else
   859 
   860 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   861 #define RGB888_RGB332(dst, src) { \
   862 	dst = (((src)&0x00E00000)>>16)| \
   863 	      (((src)&0x0000E000)>>11)| \
   864 	      (((src)&0x000000C0)>>6); \
   865 }
   866 static void Blit_RGB888_index8(SDL_BlitInfo *info)
   867 {
   868 #ifndef USE_DUFFS_LOOP
   869 	int c;
   870 #endif
   871 	int width, height;
   872 	Uint32 *src;
   873 	const Uint8 *map;
   874 	Uint8 *dst;
   875 	int srcskip, dstskip;
   876 
   877 	/* Set up some basic variables */
   878 	width = info->d_width;
   879 	height = info->d_height;
   880 	src = (Uint32 *)info->s_pixels;
   881 	srcskip = info->s_skip/4;
   882 	dst = info->d_pixels;
   883 	dstskip = info->d_skip;
   884 	map = info->table;
   885 
   886 	if ( map == NULL ) {
   887 		while ( height-- ) {
   888 #ifdef USE_DUFFS_LOOP
   889 			DUFFS_LOOP(
   890 				RGB888_RGB332(*dst++, *src);
   891 			, width);
   892 #else
   893 			for ( c=width/4; c; --c ) {
   894 				/* Pack RGB into 8bit pixel */
   895 				++src;
   896 				RGB888_RGB332(*dst++, *src);
   897 				++src;
   898 				RGB888_RGB332(*dst++, *src);
   899 				++src;
   900 				RGB888_RGB332(*dst++, *src);
   901 				++src;
   902 			}
   903 			switch ( width & 3 ) {
   904 				case 3:
   905 					RGB888_RGB332(*dst++, *src);
   906 					++src;
   907 				case 2:
   908 					RGB888_RGB332(*dst++, *src);
   909 					++src;
   910 				case 1:
   911 					RGB888_RGB332(*dst++, *src);
   912 					++src;
   913 			}
   914 #endif /* USE_DUFFS_LOOP */
   915 			src += srcskip;
   916 			dst += dstskip;
   917 		}
   918 	} else {
   919 		int Pixel;
   920 
   921 		while ( height-- ) {
   922 #ifdef USE_DUFFS_LOOP
   923 			DUFFS_LOOP(
   924 				RGB888_RGB332(Pixel, *src);
   925 				*dst++ = map[Pixel];
   926 				++src;
   927 			, width);
   928 #else
   929 			for ( c=width/4; c; --c ) {
   930 				/* Pack RGB into 8bit pixel */
   931 				RGB888_RGB332(Pixel, *src);
   932 				*dst++ = map[Pixel];
   933 				++src;
   934 				RGB888_RGB332(Pixel, *src);
   935 				*dst++ = map[Pixel];
   936 				++src;
   937 				RGB888_RGB332(Pixel, *src);
   938 				*dst++ = map[Pixel];
   939 				++src;
   940 				RGB888_RGB332(Pixel, *src);
   941 				*dst++ = map[Pixel];
   942 				++src;
   943 			}
   944 			switch ( width & 3 ) {
   945 				case 3:
   946 					RGB888_RGB332(Pixel, *src);
   947 					*dst++ = map[Pixel];
   948 					++src;
   949 				case 2:
   950 					RGB888_RGB332(Pixel, *src);
   951 					*dst++ = map[Pixel];
   952 					++src;
   953 				case 1:
   954 					RGB888_RGB332(Pixel, *src);
   955 					*dst++ = map[Pixel];
   956 					++src;
   957 			}
   958 #endif /* USE_DUFFS_LOOP */
   959 			src += srcskip;
   960 			dst += dstskip;
   961 		}
   962 	}
   963 }
   964 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   965 #define RGB888_RGB555(dst, src) { \
   966 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>9)| \
   967 	                   (((*src)&0x0000F800)>>6)| \
   968 	                   (((*src)&0x000000F8)>>3); \
   969 }
   970 #define RGB888_RGB555_TWO(dst, src) { \
   971 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   972 	                     (((src[HI])&0x0000F800)>>6)| \
   973 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   974 	                     (((src[LO])&0x00F80000)>>9)| \
   975 	                     (((src[LO])&0x0000F800)>>6)| \
   976 	                     (((src[LO])&0x000000F8)>>3); \
   977 }
   978 static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
   979 {
   980 #ifndef USE_DUFFS_LOOP
   981 	int c;
   982 #endif
   983 	int width, height;
   984 	Uint32 *src;
   985 	Uint16 *dst;
   986 	int srcskip, dstskip;
   987 
   988 	/* Set up some basic variables */
   989 	width = info->d_width;
   990 	height = info->d_height;
   991 	src = (Uint32 *)info->s_pixels;
   992 	srcskip = info->s_skip/4;
   993 	dst = (Uint16 *)info->d_pixels;
   994 	dstskip = info->d_skip/2;
   995 
   996 #ifdef USE_DUFFS_LOOP
   997 	while ( height-- ) {
   998 		DUFFS_LOOP(
   999 			RGB888_RGB555(dst, src);
  1000 			++src;
  1001 			++dst;
  1002 		, width);
  1003 		src += srcskip;
  1004 		dst += dstskip;
  1005 	}
  1006 #else
  1007 	/* Memory align at 4-byte boundary, if necessary */
  1008 	if ( (long)dst & 0x03 ) {
  1009 		/* Don't do anything if width is 0 */
  1010 		if ( width == 0 ) {
  1011 			return;
  1012 		}
  1013 		--width;
  1014 
  1015 		while ( height-- ) {
  1016 			/* Perform copy alignment */
  1017 			RGB888_RGB555(dst, src);
  1018 			++src;
  1019 			++dst;
  1020 
  1021 			/* Copy in 4 pixel chunks */
  1022 			for ( c=width/4; c; --c ) {
  1023 				RGB888_RGB555_TWO(dst, src);
  1024 				src += 2;
  1025 				dst += 2;
  1026 				RGB888_RGB555_TWO(dst, src);
  1027 				src += 2;
  1028 				dst += 2;
  1029 			}
  1030 			/* Get any leftovers */
  1031 			switch (width & 3) {
  1032 				case 3:
  1033 					RGB888_RGB555(dst, src);
  1034 					++src;
  1035 					++dst;
  1036 				case 2:
  1037 					RGB888_RGB555_TWO(dst, src);
  1038 					src += 2;
  1039 					dst += 2;
  1040 					break;
  1041 				case 1:
  1042 					RGB888_RGB555(dst, src);
  1043 					++src;
  1044 					++dst;
  1045 					break;
  1046 			}
  1047 			src += srcskip;
  1048 			dst += dstskip;
  1049 		}
  1050 	} else { 
  1051 		while ( height-- ) {
  1052 			/* Copy in 4 pixel chunks */
  1053 			for ( c=width/4; c; --c ) {
  1054 				RGB888_RGB555_TWO(dst, src);
  1055 				src += 2;
  1056 				dst += 2;
  1057 				RGB888_RGB555_TWO(dst, src);
  1058 				src += 2;
  1059 				dst += 2;
  1060 			}
  1061 			/* Get any leftovers */
  1062 			switch (width & 3) {
  1063 				case 3:
  1064 					RGB888_RGB555(dst, src);
  1065 					++src;
  1066 					++dst;
  1067 				case 2:
  1068 					RGB888_RGB555_TWO(dst, src);
  1069 					src += 2;
  1070 					dst += 2;
  1071 					break;
  1072 				case 1:
  1073 					RGB888_RGB555(dst, src);
  1074 					++src;
  1075 					++dst;
  1076 					break;
  1077 			}
  1078 			src += srcskip;
  1079 			dst += dstskip;
  1080 		}
  1081 	}
  1082 #endif /* USE_DUFFS_LOOP */
  1083 }
  1084 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1085 #define RGB888_RGB565(dst, src) { \
  1086 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>8)| \
  1087 	                   (((*src)&0x0000FC00)>>5)| \
  1088 	                   (((*src)&0x000000F8)>>3); \
  1089 }
  1090 #define RGB888_RGB565_TWO(dst, src) { \
  1091 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1092 	                     (((src[HI])&0x0000FC00)>>5)| \
  1093 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1094 	                     (((src[LO])&0x00F80000)>>8)| \
  1095 	                     (((src[LO])&0x0000FC00)>>5)| \
  1096 	                     (((src[LO])&0x000000F8)>>3); \
  1097 }
  1098 static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
  1099 {
  1100 #ifndef USE_DUFFS_LOOP
  1101 	int c;
  1102 #endif
  1103 	int width, height;
  1104 	Uint32 *src;
  1105 	Uint16 *dst;
  1106 	int srcskip, dstskip;
  1107 
  1108 	/* Set up some basic variables */
  1109 	width = info->d_width;
  1110 	height = info->d_height;
  1111 	src = (Uint32 *)info->s_pixels;
  1112 	srcskip = info->s_skip/4;
  1113 	dst = (Uint16 *)info->d_pixels;
  1114 	dstskip = info->d_skip/2;
  1115 
  1116 #ifdef USE_DUFFS_LOOP
  1117 	while ( height-- ) {
  1118 		DUFFS_LOOP(
  1119 			RGB888_RGB565(dst, src);
  1120 			++src;
  1121 			++dst;
  1122 		, width);
  1123 		src += srcskip;
  1124 		dst += dstskip;
  1125 	}
  1126 #else
  1127 	/* Memory align at 4-byte boundary, if necessary */
  1128 	if ( (long)dst & 0x03 ) {
  1129 		/* Don't do anything if width is 0 */
  1130 		if ( width == 0 ) {
  1131 			return;
  1132 		}
  1133 		--width;
  1134 
  1135 		while ( height-- ) {
  1136 			/* Perform copy alignment */
  1137 			RGB888_RGB565(dst, src);
  1138 			++src;
  1139 			++dst;
  1140 
  1141 			/* Copy in 4 pixel chunks */
  1142 			for ( c=width/4; c; --c ) {
  1143 				RGB888_RGB565_TWO(dst, src);
  1144 				src += 2;
  1145 				dst += 2;
  1146 				RGB888_RGB565_TWO(dst, src);
  1147 				src += 2;
  1148 				dst += 2;
  1149 			}
  1150 			/* Get any leftovers */
  1151 			switch (width & 3) {
  1152 				case 3:
  1153 					RGB888_RGB565(dst, src);
  1154 					++src;
  1155 					++dst;
  1156 				case 2:
  1157 					RGB888_RGB565_TWO(dst, src);
  1158 					src += 2;
  1159 					dst += 2;
  1160 					break;
  1161 				case 1:
  1162 					RGB888_RGB565(dst, src);
  1163 					++src;
  1164 					++dst;
  1165 					break;
  1166 			}
  1167 			src += srcskip;
  1168 			dst += dstskip;
  1169 		}
  1170 	} else { 
  1171 		while ( height-- ) {
  1172 			/* Copy in 4 pixel chunks */
  1173 			for ( c=width/4; c; --c ) {
  1174 				RGB888_RGB565_TWO(dst, src);
  1175 				src += 2;
  1176 				dst += 2;
  1177 				RGB888_RGB565_TWO(dst, src);
  1178 				src += 2;
  1179 				dst += 2;
  1180 			}
  1181 			/* Get any leftovers */
  1182 			switch (width & 3) {
  1183 				case 3:
  1184 					RGB888_RGB565(dst, src);
  1185 					++src;
  1186 					++dst;
  1187 				case 2:
  1188 					RGB888_RGB565_TWO(dst, src);
  1189 					src += 2;
  1190 					dst += 2;
  1191 					break;
  1192 				case 1:
  1193 					RGB888_RGB565(dst, src);
  1194 					++src;
  1195 					++dst;
  1196 					break;
  1197 			}
  1198 			src += srcskip;
  1199 			dst += dstskip;
  1200 		}
  1201 	}
  1202 #endif /* USE_DUFFS_LOOP */
  1203 }
  1204 
  1205 #endif /* USE_ASMBLIT */
  1206 
  1207 
  1208 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1209 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1210 static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
  1211 {
  1212 #ifndef USE_DUFFS_LOOP
  1213 	int c;
  1214 #endif
  1215 	int width, height;
  1216 	Uint8 *src;
  1217 	Uint32 *dst;
  1218 	int srcskip, dstskip;
  1219 
  1220 	/* Set up some basic variables */
  1221 	width = info->d_width;
  1222 	height = info->d_height;
  1223 	src = (Uint8 *)info->s_pixels;
  1224 	srcskip = info->s_skip;
  1225 	dst = (Uint32 *)info->d_pixels;
  1226 	dstskip = info->d_skip/4;
  1227 
  1228 #ifdef USE_DUFFS_LOOP
  1229 	while ( height-- ) {
  1230 		DUFFS_LOOP(
  1231 		{
  1232 			*dst++ = RGB565_32(dst, src, map);
  1233 			src += 2;
  1234 		},
  1235 		width);
  1236 		src += srcskip;
  1237 		dst += dstskip;
  1238 	}
  1239 #else
  1240 	while ( height-- ) {
  1241 		/* Copy in 4 pixel chunks */
  1242 		for ( c=width/4; c; --c ) {
  1243 			*dst++ = RGB565_32(dst, src, map);
  1244 			src += 2;
  1245 			*dst++ = RGB565_32(dst, src, map);
  1246 			src += 2;
  1247 			*dst++ = RGB565_32(dst, src, map);
  1248 			src += 2;
  1249 			*dst++ = RGB565_32(dst, src, map);
  1250 			src += 2;
  1251 		}
  1252 		/* Get any leftovers */
  1253 		switch (width & 3) {
  1254 			case 3:
  1255 				*dst++ = RGB565_32(dst, src, map);
  1256 				src += 2;
  1257 			case 2:
  1258 				*dst++ = RGB565_32(dst, src, map);
  1259 				src += 2;
  1260 			case 1:
  1261 				*dst++ = RGB565_32(dst, src, map);
  1262 				src += 2;
  1263 				break;
  1264 		}
  1265 		src += srcskip;
  1266 		dst += dstskip;
  1267 	}
  1268 #endif /* USE_DUFFS_LOOP */
  1269 }
  1270 
  1271 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1272 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1273 		0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1274 		0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1275 		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1276 		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1277 		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1278 		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1279 		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1280 		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1281 		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1282 		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1283 		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1284 		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1285 		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1286 		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1287 		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1288 		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1289 		0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1290 		0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1291 		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1292 		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1293 		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1294 		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1295 		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1296 		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1297 		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1298 		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1299 		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1300 		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1301 		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1302 		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1303 		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1304 		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1305 		0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1306 		0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1307 		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1308 		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1309 		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1310 		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1311 		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1312 		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1313 		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1314 		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1315 		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1316 		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1317 		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1318 		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1319 		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1320 		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1321 		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1322 		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1323 		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1324 		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1325 		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1326 		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1327 		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1328 		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1329 		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1330 		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1331 		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1332 		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1333 		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1334 		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1335 		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1336 		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1337 		0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1338 		0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1339 		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1340 		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1341 		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1342 		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1343 		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1344 		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1345 		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1346 		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1347 		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1348 		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1349 		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1350 		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1351 		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1352 		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1353 		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1354 		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1355 		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1356 		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1357 		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1358 		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1359 		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1360 		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1361 		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1362 		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1363 		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1364 		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1365 		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1366 		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1367 		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1368 		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1369 		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1370 		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1371 		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1372 		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1373 		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1374 		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1375 		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1376 		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1377 		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1378 		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1379 		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1380 		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1381 		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1382 		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1383 		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1384 		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1385 		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1386 		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1387 		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1388 		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1389 		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1390 		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1391 		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1392 		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1393 		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1394 		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1395 		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1396 		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1397 		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1398 		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1399 		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1400 		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1401 };
  1402 static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
  1403 {
  1404     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1405 }
  1406 
  1407 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1408 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1409 		0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1410 		0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1411 		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1412 		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1413 		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1414 		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1415 		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1416 		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1417 		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1418 		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1419 		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1420 		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1421 		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1422 		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1423 		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1424 		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1425 		0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1426 		0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1427 		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1428 		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1429 		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1430 		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1431 		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1432 		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1433 		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1434 		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1435 		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1436 		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1437 		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1438 		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1439 		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1440 		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1441 		0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1442 		0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1443 		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1444 		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1445 		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1446 		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1447 		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1448 		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1449 		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1450 		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1451 		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1452 		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1453 		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1454 		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1455 		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1456 		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1457 		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1458 		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1459 		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1460 		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1461 		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1462 		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1463 		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1464 		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1465 		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1466 		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1467 		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1468 		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1469 		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1470 		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1471 		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1472 		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1473 		0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1474 		0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1475 		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1476 		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1477 		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1478 		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1479 		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1480 		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1481 		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1482 		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1483 		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1484 		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1485 		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1486 		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1487 		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1488 		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1489 		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1490 		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1491 		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1492 		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1493 		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1494 		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1495 		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1496 		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1497 		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1498 		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1499 		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1500 		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1501 		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1502 		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1503 		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1504 		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1505 		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1506 		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1507 		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1508 		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1509 		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1510 		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1511 		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1512 		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1513 		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1514 		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1515 		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1516 		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1517 		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1518 		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1519 		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1520 		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1521 		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1522 		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1523 		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1524 		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1525 		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1526 		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1527 		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1528 		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1529 		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1530 		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1531 		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1532 		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1533 		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1534 		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1535 		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1536 		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1537 };
  1538 static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
  1539 {
  1540     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1541 }
  1542 
  1543 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1544 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1545 		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1546 		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1547 		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1548 		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1549 		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1550 		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1551 		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1552 		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1553 		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1554 		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1555 		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1556 		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1557 		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1558 		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1559 		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1560 		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1561 		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1562 		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1563 		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1564 		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1565 		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1566 		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1567 		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1568 		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1569 		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1570 		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1571 		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1572 		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1573 		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1574 		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1575 		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1576 		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1577 		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1578 		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1579 		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1580 		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1581 		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1582 		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1583 		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1584 		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1585 		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1586 		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1587 		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1588 		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1589 		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1590 		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1591 		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1592 		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1593 		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1594 		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1595 		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1596 		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1597 		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1598 		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1599 		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1600 		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1601 		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1602 		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1603 		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1604 		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1605 		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1606 		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1607 		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1608 		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1609 		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1610 		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1611 		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1612 		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1613 		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1614 		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1615 		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1616 		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1617 		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1618 		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1619 		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1620 		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1621 		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1622 		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1623 		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1624 		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1625 		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1626 		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1627 		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1628 		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1629 		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1630 		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1631 		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1632 		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1633 		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1634 		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1635 		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1636 		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1637 		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1638 		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1639 		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1640 		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1641 		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1642 		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1643 		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1644 		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1645 		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1646 		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1647 		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1648 		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1649 		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1650 		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1651 		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1652 		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1653 		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1654 		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1655 		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1656 		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1657 		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1658 		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1659 		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1660 		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1661 		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1662 		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1663 		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1664 		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1665 		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1666 		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1667 		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1668 		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1669 		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1670 		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1671 		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1672 		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1673 };
  1674 static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
  1675 {
  1676     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1677 }
  1678 
  1679 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1680 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1681 		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1682 		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1683 		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1684 		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1685 		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1686 		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1687 		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1688 		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1689 		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1690 		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1691 		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1692 		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1693 		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1694 		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1695 		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1696 		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1697 		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1698 		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1699 		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1700 		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1701 		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1702 		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1703 		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1704 		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1705 		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1706 		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1707 		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1708 		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1709 		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1710 		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1711 		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1712 		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1713 		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1714 		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1715 		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1716 		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1717 		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1718 		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1719 		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1720 		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1721 		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1722 		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1723 		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1724 		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1725 		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1726 		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1727 		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1728 		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1729 		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1730 		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1731 		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1732 		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1733 		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1734 		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1735 		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1736 		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1737 		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1738 		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1739 		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1740 		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1741 		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1742 		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1743 		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1744 		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1745 		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1746 		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1747 		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1748 		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1749 		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1750 		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1751 		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1752 		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1753 		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1754 		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1755 		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1756 		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1757 		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1758 		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1759 		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1760 		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1761 		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1762 		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1763 		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1764 		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1765 		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1766 		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1767 		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1768 		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1769 		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1770 		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1771 		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1772 		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1773 		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1774 		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1775 		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1776 		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1777 		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1778 		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1779 		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1780 		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1781 		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1782 		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1783 		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1784 		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1785 		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1786 		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1787 		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1788 		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1789 		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1790 		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1791 		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1792 		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1793 		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1794 		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1795 		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1796 		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1797 		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1798 		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1799 		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1800 		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1801 		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1802 		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1803 		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1804 		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1805 		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1806 		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1807 		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1808 		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1809 };
  1810 static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
  1811 {
  1812     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1813 }
  1814 
  1815 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1816 #ifndef RGB888_RGB332
  1817 #define RGB888_RGB332(dst, src) { \
  1818 	dst = (((src)&0x00E00000)>>16)| \
  1819 	      (((src)&0x0000E000)>>11)| \
  1820 	      (((src)&0x000000C0)>>6); \
  1821 }
  1822 #endif
  1823 static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
  1824 {
  1825 #ifndef USE_DUFFS_LOOP
  1826 	int c;
  1827 #endif
  1828 	int Pixel;
  1829 	int width, height;
  1830 	Uint32 *src;
  1831 	const Uint8 *map;
  1832 	Uint8 *dst;
  1833 	int srcskip, dstskip;
  1834 
  1835 	/* Set up some basic variables */
  1836 	width = info->d_width;
  1837 	height = info->d_height;
  1838 	src = (Uint32 *)info->s_pixels;
  1839 	srcskip = info->s_skip/4;
  1840 	dst = info->d_pixels;
  1841 	dstskip = info->d_skip;
  1842 	map = info->table;
  1843 
  1844 #ifdef USE_DUFFS_LOOP
  1845 	while ( height-- ) {
  1846 		DUFFS_LOOP(
  1847 			RGB888_RGB332(Pixel, *src);
  1848 			*dst++ = map[Pixel];
  1849 			++src;
  1850 		, width);
  1851 		src += srcskip;
  1852 		dst += dstskip;
  1853 	}
  1854 #else
  1855 	while ( height-- ) {
  1856 		for ( c=width/4; c; --c ) {
  1857 			/* Pack RGB into 8bit pixel */
  1858 			RGB888_RGB332(Pixel, *src);
  1859 			*dst++ = map[Pixel];
  1860 			++src;
  1861 			RGB888_RGB332(Pixel, *src);
  1862 			*dst++ = map[Pixel];
  1863 			++src;
  1864 			RGB888_RGB332(Pixel, *src);
  1865 			*dst++ = map[Pixel];
  1866 			++src;
  1867 			RGB888_RGB332(Pixel, *src);
  1868 			*dst++ = map[Pixel];
  1869 			++src;
  1870 		}
  1871 		switch ( width & 3 ) {
  1872 			case 3:
  1873 				RGB888_RGB332(Pixel, *src);
  1874 				*dst++ = map[Pixel];
  1875 				++src;
  1876 			case 2:
  1877 				RGB888_RGB332(Pixel, *src);
  1878 				*dst++ = map[Pixel];
  1879 				++src;
  1880 			case 1:
  1881 				RGB888_RGB332(Pixel, *src);
  1882 				*dst++ = map[Pixel];
  1883 				++src;
  1884 		}
  1885 		src += srcskip;
  1886 		dst += dstskip;
  1887 	}
  1888 #endif /* USE_DUFFS_LOOP */
  1889 }
  1890 static void BlitNto1(SDL_BlitInfo *info)
  1891 {
  1892 #ifndef USE_DUFFS_LOOP
  1893 	int c;
  1894 #endif
  1895 	int width, height;
  1896 	Uint8 *src;
  1897 	const Uint8 *map;
  1898 	Uint8 *dst;
  1899 	int srcskip, dstskip;
  1900 	int srcbpp;
  1901 	Uint32 Pixel;
  1902 	int  sR, sG, sB;
  1903 	SDL_PixelFormat *srcfmt;
  1904 
  1905 	/* Set up some basic variables */
  1906 	width = info->d_width;
  1907 	height = info->d_height;
  1908 	src = info->s_pixels;
  1909 	srcskip = info->s_skip;
  1910 	dst = info->d_pixels;
  1911 	dstskip = info->d_skip;
  1912 	map = info->table;
  1913 	srcfmt = info->src;
  1914 	srcbpp = srcfmt->BytesPerPixel;
  1915 
  1916 	if ( map == NULL ) {
  1917 		while ( height-- ) {
  1918 #ifdef USE_DUFFS_LOOP
  1919 			DUFFS_LOOP(
  1920 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1921 								sR, sG, sB);
  1922 				if ( 1 ) {
  1923 				  	/* Pack RGB into 8bit pixel */
  1924 				  	*dst = ((sR>>5)<<(3+2))|
  1925 					        ((sG>>5)<<(2)) |
  1926 					        ((sB>>6)<<(0)) ;
  1927 				}
  1928 				dst++;
  1929 				src += srcbpp;
  1930 			, width);
  1931 #else
  1932 			for ( c=width; c; --c ) {
  1933 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1934 								sR, sG, sB);
  1935 				if ( 1 ) {
  1936 				  	/* Pack RGB into 8bit pixel */
  1937 				  	*dst = ((sR>>5)<<(3+2))|
  1938 					        ((sG>>5)<<(2)) |
  1939 					        ((sB>>6)<<(0)) ;
  1940 				}
  1941 				dst++;
  1942 				src += srcbpp;
  1943 			}
  1944 #endif
  1945 			src += srcskip;
  1946 			dst += dstskip;
  1947 		}
  1948 	} else {
  1949 		while ( height-- ) {
  1950 #ifdef USE_DUFFS_LOOP
  1951 			DUFFS_LOOP(
  1952 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1953 								sR, sG, sB);
  1954 				if ( 1 ) {
  1955 				  	/* Pack RGB into 8bit pixel */
  1956 				  	*dst = map[((sR>>5)<<(3+2))|
  1957 						   ((sG>>5)<<(2))  |
  1958 						   ((sB>>6)<<(0))  ];
  1959 				}
  1960 				dst++;
  1961 				src += srcbpp;
  1962 			, width);
  1963 #else
  1964 			for ( c=width; c; --c ) {
  1965 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1966 								sR, sG, sB);
  1967 				if ( 1 ) {
  1968 				  	/* Pack RGB into 8bit pixel */
  1969 				  	*dst = map[((sR>>5)<<(3+2))|
  1970 						   ((sG>>5)<<(2))  |
  1971 						   ((sB>>6)<<(0))  ];
  1972 				}
  1973 				dst++;
  1974 				src += srcbpp;
  1975 			}
  1976 #endif /* USE_DUFFS_LOOP */
  1977 			src += srcskip;
  1978 			dst += dstskip;
  1979 		}
  1980 	}
  1981 }
  1982 
  1983 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  1984 static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
  1985 {
  1986 	int width = info->d_width;
  1987 	int height = info->d_height;
  1988 	Uint32 *src = (Uint32 *)info->s_pixels;
  1989 	int srcskip = info->s_skip;
  1990 	Uint32 *dst = (Uint32 *)info->d_pixels;
  1991 	int dstskip = info->d_skip;
  1992 	SDL_PixelFormat *srcfmt = info->src;
  1993 	SDL_PixelFormat *dstfmt = info->dst;
  1994 
  1995 	if (dstfmt->Amask) {
  1996 		/* RGB->RGBA, SET_ALPHA */
  1997 		Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
  1998 
  1999 		while ( height-- ) {
  2000 			DUFFS_LOOP(
  2001 			{
  2002 				*dst = *src | mask;
  2003 				++dst;
  2004 				++src;
  2005 			},
  2006 			width);
  2007 			src = (Uint32*)((Uint8*)src + srcskip);
  2008 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2009 		}
  2010 	} else {
  2011 		/* RGBA->RGB, NO_ALPHA */
  2012 		Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2013 
  2014 		while ( height-- ) {
  2015 			DUFFS_LOOP(
  2016 			{
  2017 				*dst = *src & mask;
  2018 				++dst;
  2019 				++src;
  2020 			},
  2021 			width);
  2022 			src = (Uint32*)((Uint8*)src + srcskip);
  2023 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2024 		}
  2025 	}
  2026 }
  2027 
  2028 static void BlitNtoN(SDL_BlitInfo *info)
  2029 {
  2030 	int width = info->d_width;
  2031 	int height = info->d_height;
  2032 	Uint8 *src = info->s_pixels;
  2033 	int srcskip = info->s_skip;
  2034 	Uint8 *dst = info->d_pixels;
  2035 	int dstskip = info->d_skip;
  2036 	SDL_PixelFormat *srcfmt = info->src;
  2037 	int srcbpp = srcfmt->BytesPerPixel;
  2038 	SDL_PixelFormat *dstfmt = info->dst;
  2039 	int dstbpp = dstfmt->BytesPerPixel;
  2040 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2041 
  2042 	while ( height-- ) {
  2043 		DUFFS_LOOP(
  2044 		{
  2045 		        Uint32 Pixel;
  2046 			unsigned sR;
  2047 			unsigned sG;
  2048 			unsigned sB;
  2049 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2050 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2051 			dst += dstbpp;
  2052 			src += srcbpp;
  2053 		},
  2054 		width);
  2055 		src += srcskip;
  2056 		dst += dstskip;
  2057 	}
  2058 }
  2059 
  2060 static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
  2061 {
  2062 	int width = info->d_width;
  2063 	int height = info->d_height;
  2064 	Uint8 *src = info->s_pixels;
  2065 	int srcskip = info->s_skip;
  2066 	Uint8 *dst = info->d_pixels;
  2067 	int dstskip = info->d_skip;
  2068 	SDL_PixelFormat *srcfmt = info->src;
  2069 	int srcbpp = srcfmt->BytesPerPixel;
  2070 	SDL_PixelFormat *dstfmt = info->dst;
  2071 	int dstbpp = dstfmt->BytesPerPixel;
  2072 	int c;
  2073 
  2074 	/* FIXME: should map alpha to [0..255] correctly! */
  2075 	while ( height-- ) {
  2076 		for ( c=width; c; --c ) {
  2077 		        Uint32 Pixel;
  2078 			unsigned sR, sG, sB, sA;
  2079 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2080 				      sR, sG, sB, sA);
  2081 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2082 				      sR, sG, sB, sA);
  2083 			dst += dstbpp;
  2084 			src += srcbpp;
  2085 		}
  2086 		src += srcskip;
  2087 		dst += dstskip;
  2088 	}
  2089 }
  2090 
  2091 static void BlitNto1Key(SDL_BlitInfo *info)
  2092 {
  2093 	int width = info->d_width;
  2094 	int height = info->d_height;
  2095 	Uint8 *src = info->s_pixels;
  2096 	int srcskip = info->s_skip;
  2097 	Uint8 *dst = info->d_pixels;
  2098 	int dstskip = info->d_skip;
  2099 	SDL_PixelFormat *srcfmt = info->src;
  2100 	const Uint8 *palmap = info->table;
  2101 	Uint32 ckey = srcfmt->colorkey;
  2102 	Uint32 rgbmask = ~srcfmt->Amask;
  2103 	int srcbpp;
  2104 	Uint32 Pixel;
  2105 	Uint8  sR, sG, sB;
  2106 
  2107 	/* Set up some basic variables */
  2108 	srcbpp = srcfmt->BytesPerPixel;
  2109 	ckey &= rgbmask;
  2110 
  2111 	if ( palmap == NULL ) {
  2112 		while ( height-- ) {
  2113 			DUFFS_LOOP(
  2114 			{
  2115 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2116 								sR, sG, sB);
  2117 				if ( (Pixel & rgbmask) != ckey ) {
  2118 				  	/* Pack RGB into 8bit pixel */
  2119 				  	*dst = ((sR>>5)<<(3+2))|
  2120 						((sG>>5)<<(2)) |
  2121 						((sB>>6)<<(0)) ;
  2122 				}
  2123 				dst++;
  2124 				src += srcbpp;
  2125 			},
  2126 			width);
  2127 			src += srcskip;
  2128 			dst += dstskip;
  2129 		}
  2130 	} else {
  2131 		while ( height-- ) {
  2132 			DUFFS_LOOP(
  2133 			{
  2134 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2135 								sR, sG, sB);
  2136 				if ( (Pixel & rgbmask) != ckey ) {
  2137 				  	/* Pack RGB into 8bit pixel */
  2138 				  	*dst = palmap[((sR>>5)<<(3+2))|
  2139 							((sG>>5)<<(2))  |
  2140 							((sB>>6)<<(0))  ];
  2141 				}
  2142 				dst++;
  2143 				src += srcbpp;
  2144 			},
  2145 			width);
  2146 			src += srcskip;
  2147 			dst += dstskip;
  2148 		}
  2149 	}
  2150 }
  2151 
  2152 static void Blit2to2Key(SDL_BlitInfo *info)
  2153 {
  2154 	int width = info->d_width;
  2155 	int height = info->d_height;
  2156 	Uint16 *srcp = (Uint16 *)info->s_pixels;
  2157 	int srcskip = info->s_skip;
  2158 	Uint16 *dstp = (Uint16 *)info->d_pixels;
  2159 	int dstskip = info->d_skip;
  2160 	Uint32 ckey = info->src->colorkey;
  2161 	Uint32 rgbmask = ~info->src->Amask;
  2162 
  2163 	/* Set up some basic variables */
  2164         srcskip /= 2;
  2165         dstskip /= 2;
  2166 	ckey &= rgbmask;
  2167 
  2168 	while ( height-- ) {
  2169 		DUFFS_LOOP(
  2170 		{
  2171 			if ( (*srcp & rgbmask) != ckey ) {
  2172 				*dstp = *srcp;
  2173 			}
  2174 			dstp++;
  2175 			srcp++;
  2176 		},
  2177 		width);
  2178 		srcp += srcskip;
  2179 		dstp += dstskip;
  2180 	}
  2181 }
  2182 
  2183 static void BlitNtoNKey(SDL_BlitInfo *info)
  2184 {
  2185 	int width = info->d_width;
  2186 	int height = info->d_height;
  2187 	Uint8 *src = info->s_pixels;
  2188 	int srcskip = info->s_skip;
  2189 	Uint8 *dst = info->d_pixels;
  2190 	int dstskip = info->d_skip;
  2191 	Uint32 ckey = info->src->colorkey;
  2192 	SDL_PixelFormat *srcfmt = info->src;
  2193 	SDL_PixelFormat *dstfmt = info->dst;
  2194 	int srcbpp = srcfmt->BytesPerPixel;
  2195 	int dstbpp = dstfmt->BytesPerPixel;
  2196 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2197 
  2198 	while ( height-- ) {
  2199 		DUFFS_LOOP(
  2200 		{
  2201 		        Uint32 Pixel;
  2202 			unsigned sR;
  2203 			unsigned sG;
  2204 			unsigned sB;
  2205 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2206 			if ( Pixel != ckey ) {
  2207 			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2208 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2209 					      sR, sG, sB, alpha);
  2210 			}
  2211 			dst += dstbpp;
  2212 			src += srcbpp;
  2213 		},
  2214 		width);
  2215 		src += srcskip;
  2216 		dst += dstskip;
  2217 	}
  2218 }
  2219 
  2220 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
  2221 {
  2222 	int width = info->d_width;
  2223 	int height = info->d_height;
  2224 	Uint8 *src = info->s_pixels;
  2225 	int srcskip = info->s_skip;
  2226 	Uint8 *dst = info->d_pixels;
  2227 	int dstskip = info->d_skip;
  2228 	Uint32 ckey = info->src->colorkey;
  2229 	SDL_PixelFormat *srcfmt = info->src;
  2230 	SDL_PixelFormat *dstfmt = info->dst;
  2231 	Uint32 rgbmask = ~srcfmt->Amask;
  2232 
  2233 	Uint8 srcbpp;
  2234 	Uint8 dstbpp;
  2235 	Uint32 Pixel;
  2236 	Uint8  sR, sG, sB, sA;
  2237 
  2238 	/* Set up some basic variables */
  2239 	srcbpp = srcfmt->BytesPerPixel;
  2240 	dstbpp = dstfmt->BytesPerPixel;
  2241 	ckey &= rgbmask;
  2242 
  2243 	/* FIXME: should map alpha to [0..255] correctly! */
  2244 	while ( height-- ) {
  2245 		DUFFS_LOOP(
  2246 		{
  2247 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2248 				      sR, sG, sB, sA);
  2249 			if ( (Pixel & rgbmask) != ckey ) {
  2250 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2251 						sR, sG, sB, sA);
  2252 			}
  2253 			dst += dstbpp;
  2254 			src += srcbpp;
  2255 		},
  2256 		width);
  2257 		src += srcskip;
  2258 		dst += dstskip;
  2259 	}
  2260 }
  2261 
  2262 /* Normal N to N optimized blitters */
  2263 struct blit_table {
  2264 	Uint32 srcR, srcG, srcB;
  2265 	int dstbpp;
  2266 	Uint32 dstR, dstG, dstB;
  2267 	Uint32 blit_features;
  2268 	void *aux_data;
  2269 	SDL_loblit blitfunc;
  2270 	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
  2271 };
  2272 static const struct blit_table normal_blit_1[] = {
  2273 	/* Default for 8-bit RGB source, an invalid combination */
  2274 	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
  2275 };
  2276 static const struct blit_table normal_blit_2[] = {
  2277 #ifdef USE_ASMBLIT
  2278     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
  2279       0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
  2280     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
  2281       0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
  2282     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
  2283       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
  2284 #endif
  2285 #ifdef USE_ALTIVEC_BLITTERS
  2286     /* has-altivec */
  2287     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2288       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2289     { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2290       2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2291 #endif
  2292     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
  2293       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
  2294     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2295       0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
  2296     { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2297       0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
  2298     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2299       0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
  2300 
  2301     /* Default for 16-bit RGB source, used if no other blitter matches */
  2302     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2303 };
  2304 static const struct blit_table normal_blit_3[] = {
  2305 	/* Default for 24-bit RGB source, never optimized */
  2306     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2307 };
  2308 static const struct blit_table normal_blit_4[] = {
  2309 #ifdef USE_ASMBLIT
  2310     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2311       1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
  2312     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2313       0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
  2314     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2315       1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
  2316     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2317       0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
  2318     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2319       1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
  2320     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2321       0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
  2322     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2323       1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
  2324     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2325       0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
  2326     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
  2327       0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
  2328     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
  2329       0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
  2330     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2331       0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
  2332     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2333       0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
  2334     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2335       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
  2336 #else
  2337 #ifdef USE_ALTIVEC_BLITTERS
  2338     /* has-altivec | dont-use-prefetch */
  2339     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2340       6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2341     /* has-altivec */
  2342     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2343       2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2344     /* has-altivec */
  2345     { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
  2346       2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
  2347 #endif
  2348     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2349       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
  2350     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2351       0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
  2352 #endif
  2353 	/* Default for 32-bit RGB source, used if no other blitter matches */
  2354 	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2355 };
  2356 static const struct blit_table *normal_blit[] = {
  2357 	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2358 };
  2359 
  2360 /* Mask matches table, or table entry is zero */
  2361 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2362 
  2363 SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
  2364 {
  2365 	struct private_swaccel *sdata;
  2366 	SDL_PixelFormat *srcfmt;
  2367 	SDL_PixelFormat *dstfmt;
  2368 	const struct blit_table *table;
  2369 	int which;
  2370 	SDL_loblit blitfun;
  2371 
  2372 	/* Set up data for choosing the blit */
  2373 	sdata = surface->map->sw_data;
  2374 	srcfmt = surface->format;
  2375 	dstfmt = surface->map->dst->format;
  2376 
  2377 	if ( blit_index & 2 ) {
  2378 	        /* alpha or alpha+colorkey */
  2379 	        return SDL_CalculateAlphaBlit(surface, blit_index);
  2380 	}
  2381 
  2382 	/* We don't support destinations less than 8-bits */
  2383 	if ( dstfmt->BitsPerPixel < 8 ) {
  2384 		return(NULL);
  2385 	}
  2386 	
  2387 	if(blit_index == 1) {
  2388 	    /* colorkey blit: Here we don't have too many options, mostly
  2389 	       because RLE is the preferred fast way to deal with this.
  2390 	       If a particular case turns out to be useful we'll add it. */
  2391 
  2392 	    if(srcfmt->BytesPerPixel == 2
  2393 	       && surface->map->identity)
  2394 		return Blit2to2Key;
  2395 	    else if(dstfmt->BytesPerPixel == 1)
  2396 		return BlitNto1Key;
  2397 	    else {
  2398 #ifdef USE_ALTIVEC_BLITTERS
  2399         if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
  2400             return Blit32to32KeyAltivec;
  2401         } else
  2402 #endif
  2403 
  2404 		if(srcfmt->Amask && dstfmt->Amask)
  2405 		    return BlitNtoNKeyCopyAlpha;
  2406 		else
  2407 		    return BlitNtoNKey;
  2408 	    }
  2409 	}
  2410 
  2411 	blitfun = NULL;
  2412 	if ( dstfmt->BitsPerPixel == 8 ) {
  2413 		/* We assume 8-bit destinations are palettized */
  2414 		if ( (srcfmt->BytesPerPixel == 4) &&
  2415 		     (srcfmt->Rmask == 0x00FF0000) &&
  2416 		     (srcfmt->Gmask == 0x0000FF00) &&
  2417 		     (srcfmt->Bmask == 0x000000FF) ) {
  2418 			if ( surface->map->table ) {
  2419 				blitfun = Blit_RGB888_index8_map;
  2420 			} else {
  2421 #ifdef USE_ASMBLIT
  2422 				sdata->aux_data = ConvertX86p32_8RGB332;
  2423 				blitfun = ConvertX86;
  2424 #else
  2425 				blitfun = Blit_RGB888_index8;
  2426 #endif
  2427 			}
  2428 		} else {
  2429 			blitfun = BlitNto1;
  2430 		}
  2431 	} else {
  2432 		/* Now the meat, choose the blitter we want */
  2433 		int a_need = 0;
  2434 		if(dstfmt->Amask)
  2435 		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2436 		table = normal_blit[srcfmt->BytesPerPixel-1];
  2437 		for ( which=0; table[which].dstbpp; ++which ) {
  2438 			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2439 			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2440 			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2441 			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2442 			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2443 			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2444 			    dstfmt->BytesPerPixel == table[which].dstbpp &&
  2445 			    (a_need & table[which].alpha) == a_need &&
  2446 			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
  2447 				break;
  2448 		}
  2449 		sdata->aux_data = table[which].aux_data;
  2450 		blitfun = table[which].blitfunc;
  2451 
  2452 		if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2453 			/* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2454 			if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
  2455 			     srcfmt->Rmask == dstfmt->Rmask &&
  2456 			     srcfmt->Gmask == dstfmt->Gmask &&
  2457 			     srcfmt->Bmask == dstfmt->Bmask ) {
  2458 				blitfun = Blit4to4MaskAlpha;
  2459 			} else if ( a_need == COPY_ALPHA ) {
  2460 			    blitfun = BlitNtoNCopyAlpha;
  2461 			}
  2462 		}
  2463 	}
  2464 
  2465 #ifdef DEBUG_ASM
  2466 #ifdef USE_ASMBLIT
  2467 	if ( blitfun == ConvertMMX )
  2468 		fprintf(stderr, "Using mmx blit\n");
  2469 	else
  2470 	if ( blitfun == ConvertX86 )
  2471 		fprintf(stderr, "Using asm blit\n");
  2472 	else
  2473 #endif
  2474 	if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
  2475 		fprintf(stderr, "Using C blit\n");
  2476 	else
  2477 		fprintf(stderr, "Using optimized C blit\n");
  2478 #endif /* DEBUG_ASM */
  2479 
  2480 	return(blitfun);
  2481 }