src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Fri, 10 Feb 2006 06:48:43 +0000
changeset 1358 c71e05b4dc2e
parent 1339 62802d9d7c87
child 1361 19418e4422cb
permissions -rw-r--r--
More header massaging... works great on Windows. ;-)
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 #include "SDL_video.h"
    24 #include "SDL_endian.h"
    25 #include "SDL_cpuinfo.h"
    26 #include "SDL_blit.h"
    27 
    28 /* Functions to blit from N-bit surfaces to other surfaces */
    29 
    30 #ifdef USE_ALTIVEC_BLITTERS
    31 #ifdef HAVE_ALTIVEC_H
    32 #include <altivec.h>
    33 #endif
    34 #define assert(X)
    35 #ifdef MACOSX
    36 #include <sys/sysctl.h>
    37 static size_t GetL3CacheSize( void )
    38 {
    39     const char key[] = "hw.l3cachesize";
    40     u_int64_t result = 0;
    41     size_t typeSize = sizeof( result );
    42 
    43 
    44     int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
    45     if( 0 != err ) return 0;
    46 
    47     return result;
    48 }
    49 #else
    50 static size_t GetL3CacheSize( void )
    51 {
    52     /* XXX: Just guess G4 */
    53     return 2097152;
    54 }
    55 #endif /* MACOSX */
    56 
    57 #if ((defined MACOSX) && (__GNUC__ < 4))
    58     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    59         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    60     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    61         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    62 #else
    63     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    64         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    65     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    66         (vector unsigned short) { a,b,c,d,e,f,g,h }
    67 #endif
    68 
    69 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    70 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    71                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    72                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    73                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    74                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    75 
    76 #define MAKE8888(dstfmt, r, g, b, a)  \
    77     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    78       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    79       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    80       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    81 
    82 /*
    83  * Data Stream Touch...Altivec cache prefetching.
    84  *
    85  *  Don't use this on a G5...however, the speed boost is very significant
    86  *   on a G4.
    87  */
    88 #define DST_CHAN_SRC 1
    89 #define DST_CHAN_DEST 2
    90 
    91 /* macro to set DST control word value... */
    92 #define DST_CTRL(size, count, stride) \
    93     (((size) << 24) | ((count) << 16) | (stride))
    94 
    95 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    96     ? vec_lvsl(0, src) \
    97     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
    98 
    99 /* Calculate the permute vector used for 32->32 swizzling */
   100 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
   101                                   const SDL_PixelFormat *dstfmt)
   102 {
   103     /*
   104     * We have to assume that the bits that aren't used by other
   105      *  colors is alpha, and it's one complete byte, since some formats
   106      *  leave alpha with a zero mask, but we should still swizzle the bits.
   107      */
   108     /* ARGB */
   109     const static struct SDL_PixelFormat default_pixel_format = {
   110         NULL, 0, 0,
   111         0, 0, 0, 0,
   112         16, 8, 0, 24,
   113         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   114         0, 0};
   115     if (!srcfmt) {
   116         srcfmt = &default_pixel_format;
   117     }
   118     if (!dstfmt) {
   119         dstfmt = &default_pixel_format;
   120     }
   121     vector unsigned char plus = VECUINT8_LITERAL(
   122                                       0x00, 0x00, 0x00, 0x00,
   123                                       0x04, 0x04, 0x04, 0x04,
   124                                       0x08, 0x08, 0x08, 0x08,
   125                                       0x0C, 0x0C, 0x0C, 0x0C );
   126     vector unsigned char vswiz;
   127     vector unsigned int srcvec;
   128 #define RESHIFT(X) (3 - ((X) >> 3))
   129     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   130     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   131     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   132     Uint32 amask;
   133     /* Use zero for alpha if either surface doesn't have alpha */
   134     if (dstfmt->Amask) {
   135         amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
   136     } else {    
   137         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
   138     }           
   139 #undef RESHIFT  
   140     ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
   141     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
   142     return(vswiz);
   143 }
   144 
   145 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
   146 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
   147     int height = info->d_height;
   148     Uint8 *src = (Uint8 *) info->s_pixels;
   149     int srcskip = info->s_skip;
   150     Uint8 *dst = (Uint8 *) info->d_pixels;
   151     int dstskip = info->d_skip;
   152     SDL_PixelFormat *srcfmt = info->src;
   153     vector unsigned char valpha = vec_splat_u8(0);
   154     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   155     vector unsigned char vgmerge = VECUINT8_LITERAL(
   156         0x00, 0x02, 0x00, 0x06,
   157         0x00, 0x0a, 0x00, 0x0e,
   158         0x00, 0x12, 0x00, 0x16,
   159         0x00, 0x1a, 0x00, 0x1e);
   160     vector unsigned short v1 = vec_splat_u16(1);
   161     vector unsigned short v3 = vec_splat_u16(3);
   162     vector unsigned short v3f = VECUINT16_LITERAL(
   163         0x003f, 0x003f, 0x003f, 0x003f,
   164         0x003f, 0x003f, 0x003f, 0x003f);
   165     vector unsigned short vfc = VECUINT16_LITERAL(
   166         0x00fc, 0x00fc, 0x00fc, 0x00fc,
   167         0x00fc, 0x00fc, 0x00fc, 0x00fc);
   168     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
   169     vf800 = vec_sl(vf800, vec_splat_u16(8));
   170 
   171     while (height--) {
   172         vector unsigned char valigner;
   173         vector unsigned char voverflow;
   174         vector unsigned char vsrc;
   175 
   176         int width = info->d_width;
   177         int extrawidth;
   178 
   179         /* do scalar until we can align... */
   180 #define ONE_PIXEL_BLEND(condition, widthvar) \
   181         while (condition) { \
   182             Uint32 Pixel; \
   183             unsigned sR, sG, sB, sA; \
   184             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   185                           sR, sG, sB, sA); \
   186             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   187                                 ((sG << 3) & 0x000007E0) | \
   188                                 ((sB >> 3) & 0x0000001F)); \
   189             dst += 2; \
   190             src += 4; \
   191             widthvar--; \
   192         }
   193 
   194         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   195 
   196         /* After all that work, here's the vector part! */
   197         extrawidth = (width % 8);  /* trailing unaligned stores */
   198         width -= extrawidth;
   199         vsrc = vec_ld(0, src);
   200         valigner = VEC_ALIGNER(src);
   201 
   202         while (width) {
   203             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   204             vector unsigned int vsrc1, vsrc2;
   205             vector unsigned char vdst;
   206 
   207             voverflow = vec_ld(15, src);
   208             vsrc = vec_perm(vsrc, voverflow, valigner);
   209             vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   210             src += 16;
   211             vsrc = voverflow;
   212             voverflow = vec_ld(15, src);
   213             vsrc = vec_perm(vsrc, voverflow, valigner);
   214             vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   215             /* 1555 */
   216             vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
   217             vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
   218             vgpixel = vec_and(vgpixel, vfc);
   219             vgpixel = vec_sl(vgpixel, v3);
   220             vrpixel = vec_sl(vpixel, v1);
   221             vrpixel = vec_and(vrpixel, vf800);
   222             vbpixel = vec_and(vpixel, v3f);
   223             vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
   224             /* 565 */
   225             vdst = vec_or(vdst, (vector unsigned char)vbpixel);
   226             vec_st(vdst, 0, dst);
   227 
   228             width -= 8;
   229             src += 16;
   230             dst += 16;
   231             vsrc = voverflow;
   232         }
   233 
   234         assert(width == 0);
   235 
   236         /* do scalar until we can align... */
   237         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   238 #undef ONE_PIXEL_BLEND
   239 
   240         src += srcskip;  /* move to next row, accounting for pitch. */
   241         dst += dstskip;
   242     }
   243 
   244 
   245 }
   246 
   247 static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
   248     int height = info->d_height;
   249     Uint8 *src = (Uint8 *) info->s_pixels;
   250     int srcskip = info->s_skip;
   251     Uint8 *dst = (Uint8 *) info->d_pixels;
   252     int dstskip = info->d_skip;
   253     SDL_PixelFormat *srcfmt = info->src;
   254     SDL_PixelFormat *dstfmt = info->dst;
   255     unsigned alpha;
   256     vector unsigned char valpha;
   257     vector unsigned char vpermute;
   258     vector unsigned short vf800;
   259     vector unsigned int v8 = vec_splat_u32(8);
   260     vector unsigned int v16 = vec_add(v8, v8);
   261     vector unsigned short v2 = vec_splat_u16(2);
   262     vector unsigned short v3 = vec_splat_u16(3);
   263     /* 
   264         0x10 - 0x1f is the alpha
   265         0x00 - 0x0e evens are the red
   266         0x01 - 0x0f odds are zero
   267     */
   268     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   269         0x10, 0x00, 0x01, 0x01,
   270         0x10, 0x02, 0x01, 0x01,
   271         0x10, 0x04, 0x01, 0x01,
   272         0x10, 0x06, 0x01, 0x01
   273     );
   274     vector unsigned char vredalpha2 = (vector unsigned char) (
   275         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   276     );
   277     /*
   278         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   279         0x11 - 0x0f odds are blue
   280     */
   281     vector unsigned char vblue1 = VECUINT8_LITERAL(
   282         0x00, 0x01, 0x02, 0x11,
   283         0x04, 0x05, 0x06, 0x13,
   284         0x08, 0x09, 0x0a, 0x15,
   285         0x0c, 0x0d, 0x0e, 0x17
   286     );
   287     vector unsigned char vblue2 = (vector unsigned char)(
   288         vec_add((vector unsigned int)vblue1, v8)
   289     );
   290     /*
   291         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   292         0x10 - 0x0e evens are green
   293     */
   294     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   295         0x00, 0x01, 0x10, 0x03,
   296         0x04, 0x05, 0x12, 0x07,
   297         0x08, 0x09, 0x14, 0x0b,
   298         0x0c, 0x0d, 0x16, 0x0f
   299     );
   300     vector unsigned char vgreen2 = (vector unsigned char)(
   301         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   302     );
   303     
   304 
   305     assert(srcfmt->BytesPerPixel == 2);
   306     assert(dstfmt->BytesPerPixel == 4);
   307 
   308     vf800 = (vector unsigned short)vec_splat_u8(-7);
   309     vf800 = vec_sl(vf800, vec_splat_u16(8));
   310 
   311     if (dstfmt->Amask && srcfmt->alpha) {
   312         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   313         valpha = vec_splat(valpha, 0);
   314     } else {
   315         alpha = 0;
   316         valpha = vec_splat_u8(0);
   317     }
   318 
   319     vpermute = calc_swizzle32(NULL, dstfmt);
   320     while (height--) {
   321         vector unsigned char valigner;
   322         vector unsigned char voverflow;
   323         vector unsigned char vsrc;
   324 
   325         int width = info->d_width;
   326         int extrawidth;
   327 
   328         /* do scalar until we can align... */
   329 #define ONE_PIXEL_BLEND(condition, widthvar) \
   330         while (condition) { \
   331             unsigned sR, sG, sB; \
   332             unsigned short Pixel = *((unsigned short *)src); \
   333             sR = (Pixel >> 8) & 0xf8; \
   334             sG = (Pixel >> 3) & 0xfc; \
   335             sB = (Pixel << 3) & 0xf8; \
   336             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   337             src += 2; \
   338             dst += 4; \
   339             widthvar--; \
   340         }
   341         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   342 
   343         /* After all that work, here's the vector part! */
   344         extrawidth = (width % 8);  /* trailing unaligned stores */
   345         width -= extrawidth;
   346         vsrc = vec_ld(0, src);
   347         valigner = VEC_ALIGNER(src);
   348 
   349         while (width) {
   350             vector unsigned short vR, vG, vB;
   351             vector unsigned char vdst1, vdst2;
   352 
   353             voverflow = vec_ld(15, src);
   354             vsrc = vec_perm(vsrc, voverflow, valigner);
   355 
   356             vR = vec_and((vector unsigned short)vsrc, vf800);
   357             vB = vec_sl((vector unsigned short)vsrc, v3);
   358             vG = vec_sl(vB, v2);
   359 
   360             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   361             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   362             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   363             vdst1 = vec_perm(vdst1, valpha, vpermute);
   364             vec_st(vdst1, 0, dst);
   365 
   366             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   367             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   368             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   369             vdst2 = vec_perm(vdst2, valpha, vpermute);
   370             vec_st(vdst2, 16, dst);
   371             
   372             width -= 8;
   373             dst += 32;
   374             src += 16;
   375             vsrc = voverflow;
   376         }
   377 
   378         assert(width == 0);
   379 
   380 
   381         /* do scalar until we can align... */
   382         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   383 #undef ONE_PIXEL_BLEND
   384 
   385         src += srcskip;  /* move to next row, accounting for pitch. */
   386         dst += dstskip;
   387     }
   388 
   389 }
   390 
   391 
   392 static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
   393     int height = info->d_height;
   394     Uint8 *src = (Uint8 *) info->s_pixels;
   395     int srcskip = info->s_skip;
   396     Uint8 *dst = (Uint8 *) info->d_pixels;
   397     int dstskip = info->d_skip;
   398     SDL_PixelFormat *srcfmt = info->src;
   399     SDL_PixelFormat *dstfmt = info->dst;
   400     unsigned alpha;
   401     vector unsigned char valpha;
   402     vector unsigned char vpermute;
   403     vector unsigned short vf800;
   404     vector unsigned int v8 = vec_splat_u32(8);
   405     vector unsigned int v16 = vec_add(v8, v8);
   406     vector unsigned short v1 = vec_splat_u16(1);
   407     vector unsigned short v3 = vec_splat_u16(3);
   408     /* 
   409         0x10 - 0x1f is the alpha
   410         0x00 - 0x0e evens are the red
   411         0x01 - 0x0f odds are zero
   412     */
   413     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   414         0x10, 0x00, 0x01, 0x01,
   415         0x10, 0x02, 0x01, 0x01,
   416         0x10, 0x04, 0x01, 0x01,
   417         0x10, 0x06, 0x01, 0x01
   418     );
   419     vector unsigned char vredalpha2 = (vector unsigned char)(
   420         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   421     );
   422     /*
   423         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   424         0x11 - 0x0f odds are blue
   425     */
   426     vector unsigned char vblue1 = VECUINT8_LITERAL(
   427         0x00, 0x01, 0x02, 0x11,
   428         0x04, 0x05, 0x06, 0x13,
   429         0x08, 0x09, 0x0a, 0x15,
   430         0x0c, 0x0d, 0x0e, 0x17
   431     );
   432     vector unsigned char vblue2 = (vector unsigned char)(
   433         vec_add((vector unsigned int)vblue1, v8)
   434     );
   435     /*
   436         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   437         0x10 - 0x0e evens are green
   438     */
   439     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   440         0x00, 0x01, 0x10, 0x03,
   441         0x04, 0x05, 0x12, 0x07,
   442         0x08, 0x09, 0x14, 0x0b,
   443         0x0c, 0x0d, 0x16, 0x0f
   444     );
   445     vector unsigned char vgreen2 = (vector unsigned char)(
   446         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   447     );
   448     
   449 
   450     assert(srcfmt->BytesPerPixel == 2);
   451     assert(dstfmt->BytesPerPixel == 4);
   452 
   453     vf800 = (vector unsigned short)vec_splat_u8(-7);
   454     vf800 = vec_sl(vf800, vec_splat_u16(8));
   455 
   456     if (dstfmt->Amask && srcfmt->alpha) {
   457         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   458         valpha = vec_splat(valpha, 0);
   459     } else {
   460         alpha = 0;
   461         valpha = vec_splat_u8(0);
   462     }
   463 
   464     vpermute = calc_swizzle32(NULL, dstfmt);
   465     while (height--) {
   466         vector unsigned char valigner;
   467         vector unsigned char voverflow;
   468         vector unsigned char vsrc;
   469 
   470         int width = info->d_width;
   471         int extrawidth;
   472 
   473         /* do scalar until we can align... */
   474 #define ONE_PIXEL_BLEND(condition, widthvar) \
   475         while (condition) { \
   476             unsigned sR, sG, sB; \
   477             unsigned short Pixel = *((unsigned short *)src); \
   478             sR = (Pixel >> 7) & 0xf8; \
   479             sG = (Pixel >> 2) & 0xf8; \
   480             sB = (Pixel << 3) & 0xf8; \
   481             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   482             src += 2; \
   483             dst += 4; \
   484             widthvar--; \
   485         }
   486         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   487 
   488         /* After all that work, here's the vector part! */
   489         extrawidth = (width % 8);  /* trailing unaligned stores */
   490         width -= extrawidth;
   491         vsrc = vec_ld(0, src);
   492         valigner = VEC_ALIGNER(src);
   493 
   494         while (width) {
   495             vector unsigned short vR, vG, vB;
   496             vector unsigned char vdst1, vdst2;
   497 
   498             voverflow = vec_ld(15, src);
   499             vsrc = vec_perm(vsrc, voverflow, valigner);
   500 
   501             vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
   502             vB = vec_sl((vector unsigned short)vsrc, v3);
   503             vG = vec_sl(vB, v3);
   504 
   505             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   506             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   507             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   508             vdst1 = vec_perm(vdst1, valpha, vpermute);
   509             vec_st(vdst1, 0, dst);
   510 
   511             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   512             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   513             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   514             vdst2 = vec_perm(vdst2, valpha, vpermute);
   515             vec_st(vdst2, 16, dst);
   516             
   517             width -= 8;
   518             dst += 32;
   519             src += 16;
   520             vsrc = voverflow;
   521         }
   522 
   523         assert(width == 0);
   524 
   525 
   526         /* do scalar until we can align... */
   527         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   528 #undef ONE_PIXEL_BLEND
   529 
   530         src += srcskip;  /* move to next row, accounting for pitch. */
   531         dst += dstskip;
   532     }
   533 
   534 }
   535 
   536 static void BlitNtoNKey(SDL_BlitInfo *info);
   537 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
   538 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
   539 {
   540     int height = info->d_height;
   541     Uint32 *srcp = (Uint32 *) info->s_pixels;
   542     int srcskip = info->s_skip;
   543     Uint32 *dstp = (Uint32 *) info->d_pixels;
   544     int dstskip = info->d_skip;
   545     SDL_PixelFormat *srcfmt = info->src;
   546     int srcbpp = srcfmt->BytesPerPixel;
   547     SDL_PixelFormat *dstfmt = info->dst;
   548     int dstbpp = dstfmt->BytesPerPixel;
   549     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   550 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   551     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   552 	Uint32 ckey = info->src->colorkey;
   553     vector unsigned int valpha;
   554     vector unsigned char vpermute;
   555     vector unsigned char vzero;
   556     vector unsigned int vckey;
   557     vector unsigned int vrgbmask;
   558     vpermute = calc_swizzle32(srcfmt, dstfmt);
   559     if (info->d_width < 16) {
   560         if(copy_alpha) {
   561             return BlitNtoNKeyCopyAlpha(info);
   562         } else {
   563             return BlitNtoNKey(info);
   564         }
   565     }
   566     vzero = vec_splat_u8(0);
   567     if (alpha) {
   568         ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
   569         valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
   570     } else {
   571         valpha = (vector unsigned int)vzero;
   572     }
   573     ckey &= rgbmask;
   574     ((unsigned int *)(char*)&vckey)[0] = ckey;
   575     vckey = vec_splat(vckey, 0);
   576     ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
   577     vrgbmask = vec_splat(vrgbmask, 0);
   578 
   579     while (height--) {
   580 #define ONE_PIXEL_BLEND(condition, widthvar) \
   581         if (copy_alpha) { \
   582             while (condition) { \
   583                 Uint32 Pixel; \
   584                 unsigned sR, sG, sB, sA; \
   585                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   586                           sR, sG, sB, sA); \
   587                 if ( (Pixel & rgbmask) != ckey ) { \
   588                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   589                             sR, sG, sB, sA); \
   590                 } \
   591                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   592                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   593                 widthvar--; \
   594             } \
   595         } else { \
   596             while (condition) { \
   597                 Uint32 Pixel; \
   598                 unsigned sR, sG, sB; \
   599                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   600                 if ( Pixel != ckey ) { \
   601                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   602                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   603                               sR, sG, sB, alpha); \
   604                 } \
   605                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   606                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   607                 widthvar--; \
   608             } \
   609         }
   610         int width = info->d_width;
   611         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   612         assert(width > 0);
   613         if (width > 0) {
   614             int extrawidth = (width % 4);
   615             vector unsigned char valigner = VEC_ALIGNER(srcp);
   616             vector unsigned int vs = vec_ld(0, srcp);
   617             width -= extrawidth;
   618             assert(width >= 4);
   619             while (width) {
   620                 vector unsigned char vsel;
   621                 vector unsigned int vd;
   622                 vector unsigned int voverflow = vec_ld(15, srcp);
   623                 /* load the source vec */
   624                 vs = vec_perm(vs, voverflow, valigner);
   625                 /* vsel is set for items that match the key */
   626                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
   627                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
   628                 /* permute the src vec to the dest format */
   629                 vs = vec_perm(vs, valpha, vpermute);
   630                 /* load the destination vec */
   631                 vd = vec_ld(0, dstp);
   632                 /* select the source and dest into vs */
   633                 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
   634                 
   635                 vec_st(vd, 0, dstp);
   636                 srcp += 4;
   637                 width -= 4;
   638                 dstp += 4;
   639                 vs = voverflow;
   640             }
   641             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   642 #undef ONE_PIXEL_BLEND
   643             srcp += srcskip >> 2;
   644             dstp += dstskip >> 2;
   645         }
   646     }
   647 }
   648 
   649 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   650 /* Use this on a G5 */
   651 static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
   652 {
   653     int height = info->d_height;
   654     Uint32 *src = (Uint32 *) info->s_pixels;
   655     int srcskip = info->s_skip;
   656     Uint32 *dst = (Uint32 *) info->d_pixels;
   657     int dstskip = info->d_skip;
   658     SDL_PixelFormat *srcfmt = info->src;
   659     SDL_PixelFormat *dstfmt = info->dst;
   660     vector unsigned int vzero = vec_splat_u32(0);
   661     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   662     if (dstfmt->Amask && !srcfmt->Amask) {
   663         if (srcfmt->alpha) {
   664             vector unsigned char valpha;
   665             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   666             vzero = (vector unsigned int)vec_splat(valpha, 0);
   667         }
   668     }
   669 
   670     assert(srcfmt->BytesPerPixel == 4);
   671     assert(dstfmt->BytesPerPixel == 4);
   672 
   673     while (height--) {
   674         vector unsigned char valigner;
   675         vector unsigned int vbits;
   676         vector unsigned int voverflow;
   677         Uint32 bits;
   678         Uint8 r, g, b, a;
   679 
   680         int width = info->d_width;
   681         int extrawidth;
   682 
   683         /* do scalar until we can align... */
   684         while ((UNALIGNED_PTR(dst)) && (width)) {
   685             bits = *(src++);
   686             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   687             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   688             width--;
   689         }
   690 
   691         /* After all that work, here's the vector part! */
   692         extrawidth = (width % 4);
   693         width -= extrawidth;
   694         valigner = VEC_ALIGNER(src);
   695         vbits = vec_ld(0, src);
   696 
   697        while (width) {
   698             voverflow = vec_ld(15, src);
   699             src += 4;
   700             width -= 4;
   701             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   702             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   703             vec_st(vbits, 0, dst);  /* store it back out. */
   704             dst += 4;
   705             vbits = voverflow;
   706         }
   707 
   708         assert(width == 0);
   709 
   710         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   711         while (extrawidth) {
   712             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   713             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   714             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   715             extrawidth--;
   716         }
   717 
   718         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   719         dst += dstskip >> 2;
   720     }
   721 
   722 }
   723 
   724 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   725 /* Use this on a G4 */
   726 static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
   727 {
   728     const int scalar_dst_lead = sizeof (Uint32) * 4;
   729     const int vector_dst_lead = sizeof (Uint32) * 16;
   730 
   731     int height = info->d_height;
   732     Uint32 *src = (Uint32 *) info->s_pixels;
   733     int srcskip = info->s_skip;
   734     Uint32 *dst = (Uint32 *) info->d_pixels;
   735     int dstskip = info->d_skip;
   736     SDL_PixelFormat *srcfmt = info->src;
   737     SDL_PixelFormat *dstfmt = info->dst;
   738     vector unsigned int vzero = vec_splat_u32(0);
   739     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   740     if (dstfmt->Amask && !srcfmt->Amask) {
   741         if (srcfmt->alpha) {
   742             vector unsigned char valpha;
   743             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   744             vzero = (vector unsigned int)vec_splat(valpha, 0);
   745         }
   746     }
   747 
   748     assert(srcfmt->BytesPerPixel == 4);
   749     assert(dstfmt->BytesPerPixel == 4);
   750 
   751     while (height--) {
   752         vector unsigned char valigner;
   753         vector unsigned int vbits;
   754         vector unsigned int voverflow;
   755         Uint32 bits;
   756         Uint8 r, g, b, a;
   757 
   758         int width = info->d_width;
   759         int extrawidth;
   760 
   761         /* do scalar until we can align... */
   762         while ((UNALIGNED_PTR(dst)) && (width)) {
   763             vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   764             vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   765             bits = *(src++);
   766             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   767             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   768             width--;
   769         }
   770 
   771         /* After all that work, here's the vector part! */
   772         extrawidth = (width % 4);
   773         width -= extrawidth;
   774         valigner = VEC_ALIGNER(src);
   775         vbits = vec_ld(0, src);
   776 
   777         while (width) {
   778             vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   779             vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   780             voverflow = vec_ld(15, src);
   781             src += 4;
   782             width -= 4;
   783             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   784             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   785             vec_st(vbits, 0, dst);  /* store it back out. */
   786             dst += 4;
   787             vbits = voverflow;
   788         }
   789         
   790         assert(width == 0);
   791 
   792         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   793         while (extrawidth) {
   794             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   795             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   796             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   797             extrawidth--;
   798         }
   799 
   800         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   801         dst += dstskip >> 2;
   802     }
   803 
   804     vec_dss(DST_CHAN_SRC);
   805     vec_dss(DST_CHAN_DEST);
   806 }
   807 
   808 static Uint32 GetBlitFeatures( void )
   809 {
   810     static Uint32 features = 0xffffffff;
   811     if (features == 0xffffffff) {
   812         /* Provide an override for testing .. */
   813         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   814         if (override) {
   815             features = 0;
   816             SDL_sscanf(override, "%u", &features);
   817         } else {
   818             features = ( 0
   819                 /* Feature 1 is has-MMX */
   820                 | ((SDL_HasMMX()) ? 1 : 0)
   821                 /* Feature 2 is has-AltiVec */
   822                 | ((SDL_HasAltiVec()) ? 2 : 0)
   823                 /* Feature 4 is dont-use-prefetch */
   824                 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   825                 | ((GetL3CacheSize() == 0) ? 4 : 0)
   826             );
   827         }
   828     }
   829     return features;
   830 }
   831 #else
   832 /* Feature 1 is has-MMX */
   833 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   834 #endif
   835 
   836 /* This is now endian dependent */
   837 #if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
   838 #define HI	1
   839 #define LO	0
   840 #else /* ( SDL_BYTEORDER == SDL_BIG_ENDIAN ) */
   841 #define HI	0
   842 #define LO	1
   843 #endif
   844 
   845 #ifdef USE_ASMBLIT
   846 
   847 /* Heheheh, we coerce Hermes into using SDL blit information */
   848 #define X86_ASSEMBLER
   849 #define HermesConverterInterface	SDL_BlitInfo
   850 #define HermesClearInterface		void
   851 #define STACKCALL
   852 
   853 #include "HeadMMX.h"
   854 #include "HeadX86.h"
   855 
   856 #else
   857 
   858 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   859 #define RGB888_RGB332(dst, src) { \
   860 	dst = (((src)&0x00E00000)>>16)| \
   861 	      (((src)&0x0000E000)>>11)| \
   862 	      (((src)&0x000000C0)>>6); \
   863 }
   864 static void Blit_RGB888_index8(SDL_BlitInfo *info)
   865 {
   866 #ifndef USE_DUFFS_LOOP
   867 	int c;
   868 #endif
   869 	int width, height;
   870 	Uint32 *src;
   871 	const Uint8 *map;
   872 	Uint8 *dst;
   873 	int srcskip, dstskip;
   874 
   875 	/* Set up some basic variables */
   876 	width = info->d_width;
   877 	height = info->d_height;
   878 	src = (Uint32 *)info->s_pixels;
   879 	srcskip = info->s_skip/4;
   880 	dst = info->d_pixels;
   881 	dstskip = info->d_skip;
   882 	map = info->table;
   883 
   884 	if ( map == NULL ) {
   885 		while ( height-- ) {
   886 #ifdef USE_DUFFS_LOOP
   887 			DUFFS_LOOP(
   888 				RGB888_RGB332(*dst++, *src);
   889 			, width);
   890 #else
   891 			for ( c=width/4; c; --c ) {
   892 				/* Pack RGB into 8bit pixel */
   893 				++src;
   894 				RGB888_RGB332(*dst++, *src);
   895 				++src;
   896 				RGB888_RGB332(*dst++, *src);
   897 				++src;
   898 				RGB888_RGB332(*dst++, *src);
   899 				++src;
   900 			}
   901 			switch ( width & 3 ) {
   902 				case 3:
   903 					RGB888_RGB332(*dst++, *src);
   904 					++src;
   905 				case 2:
   906 					RGB888_RGB332(*dst++, *src);
   907 					++src;
   908 				case 1:
   909 					RGB888_RGB332(*dst++, *src);
   910 					++src;
   911 			}
   912 #endif /* USE_DUFFS_LOOP */
   913 			src += srcskip;
   914 			dst += dstskip;
   915 		}
   916 	} else {
   917 		int Pixel;
   918 
   919 		while ( height-- ) {
   920 #ifdef USE_DUFFS_LOOP
   921 			DUFFS_LOOP(
   922 				RGB888_RGB332(Pixel, *src);
   923 				*dst++ = map[Pixel];
   924 				++src;
   925 			, width);
   926 #else
   927 			for ( c=width/4; c; --c ) {
   928 				/* Pack RGB into 8bit pixel */
   929 				RGB888_RGB332(Pixel, *src);
   930 				*dst++ = map[Pixel];
   931 				++src;
   932 				RGB888_RGB332(Pixel, *src);
   933 				*dst++ = map[Pixel];
   934 				++src;
   935 				RGB888_RGB332(Pixel, *src);
   936 				*dst++ = map[Pixel];
   937 				++src;
   938 				RGB888_RGB332(Pixel, *src);
   939 				*dst++ = map[Pixel];
   940 				++src;
   941 			}
   942 			switch ( width & 3 ) {
   943 				case 3:
   944 					RGB888_RGB332(Pixel, *src);
   945 					*dst++ = map[Pixel];
   946 					++src;
   947 				case 2:
   948 					RGB888_RGB332(Pixel, *src);
   949 					*dst++ = map[Pixel];
   950 					++src;
   951 				case 1:
   952 					RGB888_RGB332(Pixel, *src);
   953 					*dst++ = map[Pixel];
   954 					++src;
   955 			}
   956 #endif /* USE_DUFFS_LOOP */
   957 			src += srcskip;
   958 			dst += dstskip;
   959 		}
   960 	}
   961 }
   962 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   963 #define RGB888_RGB555(dst, src) { \
   964 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>9)| \
   965 	                   (((*src)&0x0000F800)>>6)| \
   966 	                   (((*src)&0x000000F8)>>3); \
   967 }
   968 #define RGB888_RGB555_TWO(dst, src) { \
   969 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   970 	                     (((src[HI])&0x0000F800)>>6)| \
   971 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   972 	                     (((src[LO])&0x00F80000)>>9)| \
   973 	                     (((src[LO])&0x0000F800)>>6)| \
   974 	                     (((src[LO])&0x000000F8)>>3); \
   975 }
   976 static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
   977 {
   978 #ifndef USE_DUFFS_LOOP
   979 	int c;
   980 #endif
   981 	int width, height;
   982 	Uint32 *src;
   983 	Uint16 *dst;
   984 	int srcskip, dstskip;
   985 
   986 	/* Set up some basic variables */
   987 	width = info->d_width;
   988 	height = info->d_height;
   989 	src = (Uint32 *)info->s_pixels;
   990 	srcskip = info->s_skip/4;
   991 	dst = (Uint16 *)info->d_pixels;
   992 	dstskip = info->d_skip/2;
   993 
   994 #ifdef USE_DUFFS_LOOP
   995 	while ( height-- ) {
   996 		DUFFS_LOOP(
   997 			RGB888_RGB555(dst, src);
   998 			++src;
   999 			++dst;
  1000 		, width);
  1001 		src += srcskip;
  1002 		dst += dstskip;
  1003 	}
  1004 #else
  1005 	/* Memory align at 4-byte boundary, if necessary */
  1006 	if ( (long)dst & 0x03 ) {
  1007 		/* Don't do anything if width is 0 */
  1008 		if ( width == 0 ) {
  1009 			return;
  1010 		}
  1011 		--width;
  1012 
  1013 		while ( height-- ) {
  1014 			/* Perform copy alignment */
  1015 			RGB888_RGB555(dst, src);
  1016 			++src;
  1017 			++dst;
  1018 
  1019 			/* Copy in 4 pixel chunks */
  1020 			for ( c=width/4; c; --c ) {
  1021 				RGB888_RGB555_TWO(dst, src);
  1022 				src += 2;
  1023 				dst += 2;
  1024 				RGB888_RGB555_TWO(dst, src);
  1025 				src += 2;
  1026 				dst += 2;
  1027 			}
  1028 			/* Get any leftovers */
  1029 			switch (width & 3) {
  1030 				case 3:
  1031 					RGB888_RGB555(dst, src);
  1032 					++src;
  1033 					++dst;
  1034 				case 2:
  1035 					RGB888_RGB555_TWO(dst, src);
  1036 					src += 2;
  1037 					dst += 2;
  1038 					break;
  1039 				case 1:
  1040 					RGB888_RGB555(dst, src);
  1041 					++src;
  1042 					++dst;
  1043 					break;
  1044 			}
  1045 			src += srcskip;
  1046 			dst += dstskip;
  1047 		}
  1048 	} else { 
  1049 		while ( height-- ) {
  1050 			/* Copy in 4 pixel chunks */
  1051 			for ( c=width/4; c; --c ) {
  1052 				RGB888_RGB555_TWO(dst, src);
  1053 				src += 2;
  1054 				dst += 2;
  1055 				RGB888_RGB555_TWO(dst, src);
  1056 				src += 2;
  1057 				dst += 2;
  1058 			}
  1059 			/* Get any leftovers */
  1060 			switch (width & 3) {
  1061 				case 3:
  1062 					RGB888_RGB555(dst, src);
  1063 					++src;
  1064 					++dst;
  1065 				case 2:
  1066 					RGB888_RGB555_TWO(dst, src);
  1067 					src += 2;
  1068 					dst += 2;
  1069 					break;
  1070 				case 1:
  1071 					RGB888_RGB555(dst, src);
  1072 					++src;
  1073 					++dst;
  1074 					break;
  1075 			}
  1076 			src += srcskip;
  1077 			dst += dstskip;
  1078 		}
  1079 	}
  1080 #endif /* USE_DUFFS_LOOP */
  1081 }
  1082 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1083 #define RGB888_RGB565(dst, src) { \
  1084 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>8)| \
  1085 	                   (((*src)&0x0000FC00)>>5)| \
  1086 	                   (((*src)&0x000000F8)>>3); \
  1087 }
  1088 #define RGB888_RGB565_TWO(dst, src) { \
  1089 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1090 	                     (((src[HI])&0x0000FC00)>>5)| \
  1091 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1092 	                     (((src[LO])&0x00F80000)>>8)| \
  1093 	                     (((src[LO])&0x0000FC00)>>5)| \
  1094 	                     (((src[LO])&0x000000F8)>>3); \
  1095 }
  1096 static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
  1097 {
  1098 #ifndef USE_DUFFS_LOOP
  1099 	int c;
  1100 #endif
  1101 	int width, height;
  1102 	Uint32 *src;
  1103 	Uint16 *dst;
  1104 	int srcskip, dstskip;
  1105 
  1106 	/* Set up some basic variables */
  1107 	width = info->d_width;
  1108 	height = info->d_height;
  1109 	src = (Uint32 *)info->s_pixels;
  1110 	srcskip = info->s_skip/4;
  1111 	dst = (Uint16 *)info->d_pixels;
  1112 	dstskip = info->d_skip/2;
  1113 
  1114 #ifdef USE_DUFFS_LOOP
  1115 	while ( height-- ) {
  1116 		DUFFS_LOOP(
  1117 			RGB888_RGB565(dst, src);
  1118 			++src;
  1119 			++dst;
  1120 		, width);
  1121 		src += srcskip;
  1122 		dst += dstskip;
  1123 	}
  1124 #else
  1125 	/* Memory align at 4-byte boundary, if necessary */
  1126 	if ( (long)dst & 0x03 ) {
  1127 		/* Don't do anything if width is 0 */
  1128 		if ( width == 0 ) {
  1129 			return;
  1130 		}
  1131 		--width;
  1132 
  1133 		while ( height-- ) {
  1134 			/* Perform copy alignment */
  1135 			RGB888_RGB565(dst, src);
  1136 			++src;
  1137 			++dst;
  1138 
  1139 			/* Copy in 4 pixel chunks */
  1140 			for ( c=width/4; c; --c ) {
  1141 				RGB888_RGB565_TWO(dst, src);
  1142 				src += 2;
  1143 				dst += 2;
  1144 				RGB888_RGB565_TWO(dst, src);
  1145 				src += 2;
  1146 				dst += 2;
  1147 			}
  1148 			/* Get any leftovers */
  1149 			switch (width & 3) {
  1150 				case 3:
  1151 					RGB888_RGB565(dst, src);
  1152 					++src;
  1153 					++dst;
  1154 				case 2:
  1155 					RGB888_RGB565_TWO(dst, src);
  1156 					src += 2;
  1157 					dst += 2;
  1158 					break;
  1159 				case 1:
  1160 					RGB888_RGB565(dst, src);
  1161 					++src;
  1162 					++dst;
  1163 					break;
  1164 			}
  1165 			src += srcskip;
  1166 			dst += dstskip;
  1167 		}
  1168 	} else { 
  1169 		while ( height-- ) {
  1170 			/* Copy in 4 pixel chunks */
  1171 			for ( c=width/4; c; --c ) {
  1172 				RGB888_RGB565_TWO(dst, src);
  1173 				src += 2;
  1174 				dst += 2;
  1175 				RGB888_RGB565_TWO(dst, src);
  1176 				src += 2;
  1177 				dst += 2;
  1178 			}
  1179 			/* Get any leftovers */
  1180 			switch (width & 3) {
  1181 				case 3:
  1182 					RGB888_RGB565(dst, src);
  1183 					++src;
  1184 					++dst;
  1185 				case 2:
  1186 					RGB888_RGB565_TWO(dst, src);
  1187 					src += 2;
  1188 					dst += 2;
  1189 					break;
  1190 				case 1:
  1191 					RGB888_RGB565(dst, src);
  1192 					++src;
  1193 					++dst;
  1194 					break;
  1195 			}
  1196 			src += srcskip;
  1197 			dst += dstskip;
  1198 		}
  1199 	}
  1200 #endif /* USE_DUFFS_LOOP */
  1201 }
  1202 
  1203 #endif /* USE_ASMBLIT */
  1204 
  1205 
  1206 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1207 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1208 static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
  1209 {
  1210 #ifndef USE_DUFFS_LOOP
  1211 	int c;
  1212 #endif
  1213 	int width, height;
  1214 	Uint8 *src;
  1215 	Uint32 *dst;
  1216 	int srcskip, dstskip;
  1217 
  1218 	/* Set up some basic variables */
  1219 	width = info->d_width;
  1220 	height = info->d_height;
  1221 	src = (Uint8 *)info->s_pixels;
  1222 	srcskip = info->s_skip;
  1223 	dst = (Uint32 *)info->d_pixels;
  1224 	dstskip = info->d_skip/4;
  1225 
  1226 #ifdef USE_DUFFS_LOOP
  1227 	while ( height-- ) {
  1228 		DUFFS_LOOP(
  1229 		{
  1230 			*dst++ = RGB565_32(dst, src, map);
  1231 			src += 2;
  1232 		},
  1233 		width);
  1234 		src += srcskip;
  1235 		dst += dstskip;
  1236 	}
  1237 #else
  1238 	while ( height-- ) {
  1239 		/* Copy in 4 pixel chunks */
  1240 		for ( c=width/4; c; --c ) {
  1241 			*dst++ = RGB565_32(dst, src, map);
  1242 			src += 2;
  1243 			*dst++ = RGB565_32(dst, src, map);
  1244 			src += 2;
  1245 			*dst++ = RGB565_32(dst, src, map);
  1246 			src += 2;
  1247 			*dst++ = RGB565_32(dst, src, map);
  1248 			src += 2;
  1249 		}
  1250 		/* Get any leftovers */
  1251 		switch (width & 3) {
  1252 			case 3:
  1253 				*dst++ = RGB565_32(dst, src, map);
  1254 				src += 2;
  1255 			case 2:
  1256 				*dst++ = RGB565_32(dst, src, map);
  1257 				src += 2;
  1258 			case 1:
  1259 				*dst++ = RGB565_32(dst, src, map);
  1260 				src += 2;
  1261 				break;
  1262 		}
  1263 		src += srcskip;
  1264 		dst += dstskip;
  1265 	}
  1266 #endif /* USE_DUFFS_LOOP */
  1267 }
  1268 
  1269 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1270 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1271 		0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1272 		0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1273 		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1274 		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1275 		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1276 		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1277 		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1278 		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1279 		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1280 		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1281 		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1282 		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1283 		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1284 		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1285 		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1286 		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1287 		0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1288 		0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1289 		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1290 		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1291 		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1292 		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1293 		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1294 		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1295 		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1296 		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1297 		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1298 		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1299 		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1300 		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1301 		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1302 		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1303 		0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1304 		0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1305 		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1306 		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1307 		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1308 		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1309 		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1310 		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1311 		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1312 		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1313 		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1314 		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1315 		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1316 		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1317 		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1318 		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1319 		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1320 		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1321 		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1322 		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1323 		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1324 		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1325 		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1326 		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1327 		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1328 		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1329 		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1330 		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1331 		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1332 		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1333 		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1334 		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1335 		0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1336 		0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1337 		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1338 		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1339 		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1340 		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1341 		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1342 		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1343 		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1344 		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1345 		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1346 		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1347 		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1348 		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1349 		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1350 		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1351 		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1352 		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1353 		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1354 		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1355 		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1356 		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1357 		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1358 		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1359 		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1360 		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1361 		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1362 		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1363 		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1364 		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1365 		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1366 		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1367 		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1368 		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1369 		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1370 		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1371 		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1372 		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1373 		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1374 		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1375 		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1376 		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1377 		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1378 		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1379 		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1380 		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1381 		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1382 		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1383 		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1384 		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1385 		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1386 		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1387 		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1388 		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1389 		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1390 		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1391 		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1392 		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1393 		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1394 		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1395 		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1396 		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1397 		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1398 		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1399 };
  1400 static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
  1401 {
  1402     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1403 }
  1404 
  1405 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1406 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1407 		0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1408 		0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1409 		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1410 		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1411 		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1412 		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1413 		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1414 		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1415 		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1416 		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1417 		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1418 		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1419 		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1420 		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1421 		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1422 		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1423 		0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1424 		0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1425 		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1426 		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1427 		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1428 		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1429 		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1430 		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1431 		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1432 		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1433 		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1434 		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1435 		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1436 		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1437 		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1438 		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1439 		0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1440 		0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1441 		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1442 		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1443 		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1444 		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1445 		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1446 		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1447 		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1448 		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1449 		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1450 		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1451 		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1452 		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1453 		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1454 		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1455 		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1456 		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1457 		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1458 		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1459 		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1460 		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1461 		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1462 		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1463 		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1464 		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1465 		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1466 		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1467 		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1468 		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1469 		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1470 		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1471 		0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1472 		0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1473 		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1474 		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1475 		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1476 		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1477 		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1478 		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1479 		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1480 		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1481 		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1482 		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1483 		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1484 		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1485 		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1486 		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1487 		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1488 		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1489 		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1490 		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1491 		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1492 		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1493 		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1494 		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1495 		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1496 		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1497 		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1498 		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1499 		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1500 		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1501 		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1502 		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1503 		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1504 		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1505 		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1506 		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1507 		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1508 		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1509 		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1510 		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1511 		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1512 		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1513 		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1514 		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1515 		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1516 		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1517 		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1518 		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1519 		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1520 		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1521 		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1522 		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1523 		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1524 		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1525 		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1526 		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1527 		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1528 		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1529 		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1530 		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1531 		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1532 		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1533 		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1534 		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1535 };
  1536 static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
  1537 {
  1538     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1539 }
  1540 
  1541 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1542 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1543 		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1544 		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1545 		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1546 		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1547 		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1548 		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1549 		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1550 		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1551 		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1552 		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1553 		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1554 		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1555 		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1556 		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1557 		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1558 		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1559 		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1560 		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1561 		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1562 		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1563 		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1564 		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1565 		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1566 		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1567 		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1568 		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1569 		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1570 		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1571 		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1572 		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1573 		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1574 		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1575 		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1576 		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1577 		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1578 		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1579 		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1580 		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1581 		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1582 		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1583 		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1584 		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1585 		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1586 		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1587 		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1588 		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1589 		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1590 		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1591 		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1592 		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1593 		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1594 		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1595 		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1596 		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1597 		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1598 		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1599 		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1600 		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1601 		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1602 		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1603 		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1604 		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1605 		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1606 		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1607 		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1608 		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1609 		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1610 		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1611 		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1612 		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1613 		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1614 		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1615 		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1616 		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1617 		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1618 		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1619 		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1620 		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1621 		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1622 		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1623 		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1624 		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1625 		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1626 		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1627 		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1628 		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1629 		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1630 		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1631 		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1632 		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1633 		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1634 		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1635 		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1636 		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1637 		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1638 		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1639 		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1640 		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1641 		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1642 		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1643 		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1644 		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1645 		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1646 		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1647 		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1648 		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1649 		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1650 		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1651 		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1652 		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1653 		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1654 		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1655 		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1656 		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1657 		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1658 		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1659 		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1660 		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1661 		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1662 		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1663 		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1664 		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1665 		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1666 		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1667 		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1668 		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1669 		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1670 		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1671 };
  1672 static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
  1673 {
  1674     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1675 }
  1676 
  1677 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1678 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1679 		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1680 		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1681 		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1682 		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1683 		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1684 		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1685 		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1686 		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1687 		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1688 		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1689 		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1690 		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1691 		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1692 		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1693 		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1694 		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1695 		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1696 		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1697 		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1698 		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1699 		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1700 		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1701 		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1702 		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1703 		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1704 		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1705 		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1706 		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1707 		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1708 		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1709 		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1710 		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1711 		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1712 		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1713 		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1714 		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1715 		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1716 		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1717 		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1718 		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1719 		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1720 		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1721 		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1722 		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1723 		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1724 		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1725 		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1726 		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1727 		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1728 		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1729 		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1730 		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1731 		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1732 		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1733 		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1734 		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1735 		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1736 		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1737 		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1738 		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1739 		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1740 		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1741 		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1742 		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1743 		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1744 		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1745 		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1746 		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1747 		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1748 		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1749 		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1750 		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1751 		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1752 		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1753 		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1754 		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1755 		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1756 		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1757 		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1758 		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1759 		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1760 		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1761 		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1762 		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1763 		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1764 		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1765 		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1766 		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1767 		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1768 		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1769 		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1770 		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1771 		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1772 		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1773 		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1774 		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1775 		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1776 		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1777 		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1778 		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1779 		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1780 		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1781 		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1782 		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1783 		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1784 		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1785 		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1786 		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1787 		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1788 		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1789 		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1790 		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1791 		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1792 		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1793 		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1794 		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1795 		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1796 		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1797 		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1798 		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1799 		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1800 		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1801 		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1802 		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1803 		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1804 		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1805 		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1806 		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1807 };
  1808 static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
  1809 {
  1810     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1811 }
  1812 
  1813 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1814 #ifndef RGB888_RGB332
  1815 #define RGB888_RGB332(dst, src) { \
  1816 	dst = (((src)&0x00E00000)>>16)| \
  1817 	      (((src)&0x0000E000)>>11)| \
  1818 	      (((src)&0x000000C0)>>6); \
  1819 }
  1820 #endif
  1821 static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
  1822 {
  1823 #ifndef USE_DUFFS_LOOP
  1824 	int c;
  1825 #endif
  1826 	int Pixel;
  1827 	int width, height;
  1828 	Uint32 *src;
  1829 	const Uint8 *map;
  1830 	Uint8 *dst;
  1831 	int srcskip, dstskip;
  1832 
  1833 	/* Set up some basic variables */
  1834 	width = info->d_width;
  1835 	height = info->d_height;
  1836 	src = (Uint32 *)info->s_pixels;
  1837 	srcskip = info->s_skip/4;
  1838 	dst = info->d_pixels;
  1839 	dstskip = info->d_skip;
  1840 	map = info->table;
  1841 
  1842 #ifdef USE_DUFFS_LOOP
  1843 	while ( height-- ) {
  1844 		DUFFS_LOOP(
  1845 			RGB888_RGB332(Pixel, *src);
  1846 			*dst++ = map[Pixel];
  1847 			++src;
  1848 		, width);
  1849 		src += srcskip;
  1850 		dst += dstskip;
  1851 	}
  1852 #else
  1853 	while ( height-- ) {
  1854 		for ( c=width/4; c; --c ) {
  1855 			/* Pack RGB into 8bit pixel */
  1856 			RGB888_RGB332(Pixel, *src);
  1857 			*dst++ = map[Pixel];
  1858 			++src;
  1859 			RGB888_RGB332(Pixel, *src);
  1860 			*dst++ = map[Pixel];
  1861 			++src;
  1862 			RGB888_RGB332(Pixel, *src);
  1863 			*dst++ = map[Pixel];
  1864 			++src;
  1865 			RGB888_RGB332(Pixel, *src);
  1866 			*dst++ = map[Pixel];
  1867 			++src;
  1868 		}
  1869 		switch ( width & 3 ) {
  1870 			case 3:
  1871 				RGB888_RGB332(Pixel, *src);
  1872 				*dst++ = map[Pixel];
  1873 				++src;
  1874 			case 2:
  1875 				RGB888_RGB332(Pixel, *src);
  1876 				*dst++ = map[Pixel];
  1877 				++src;
  1878 			case 1:
  1879 				RGB888_RGB332(Pixel, *src);
  1880 				*dst++ = map[Pixel];
  1881 				++src;
  1882 		}
  1883 		src += srcskip;
  1884 		dst += dstskip;
  1885 	}
  1886 #endif /* USE_DUFFS_LOOP */
  1887 }
  1888 static void BlitNto1(SDL_BlitInfo *info)
  1889 {
  1890 #ifndef USE_DUFFS_LOOP
  1891 	int c;
  1892 #endif
  1893 	int width, height;
  1894 	Uint8 *src;
  1895 	const Uint8 *map;
  1896 	Uint8 *dst;
  1897 	int srcskip, dstskip;
  1898 	int srcbpp;
  1899 	Uint32 Pixel;
  1900 	int  sR, sG, sB;
  1901 	SDL_PixelFormat *srcfmt;
  1902 
  1903 	/* Set up some basic variables */
  1904 	width = info->d_width;
  1905 	height = info->d_height;
  1906 	src = info->s_pixels;
  1907 	srcskip = info->s_skip;
  1908 	dst = info->d_pixels;
  1909 	dstskip = info->d_skip;
  1910 	map = info->table;
  1911 	srcfmt = info->src;
  1912 	srcbpp = srcfmt->BytesPerPixel;
  1913 
  1914 	if ( map == NULL ) {
  1915 		while ( height-- ) {
  1916 #ifdef USE_DUFFS_LOOP
  1917 			DUFFS_LOOP(
  1918 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1919 								sR, sG, sB);
  1920 				if ( 1 ) {
  1921 				  	/* Pack RGB into 8bit pixel */
  1922 				  	*dst = ((sR>>5)<<(3+2))|
  1923 					        ((sG>>5)<<(2)) |
  1924 					        ((sB>>6)<<(0)) ;
  1925 				}
  1926 				dst++;
  1927 				src += srcbpp;
  1928 			, width);
  1929 #else
  1930 			for ( c=width; c; --c ) {
  1931 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1932 								sR, sG, sB);
  1933 				if ( 1 ) {
  1934 				  	/* Pack RGB into 8bit pixel */
  1935 				  	*dst = ((sR>>5)<<(3+2))|
  1936 					        ((sG>>5)<<(2)) |
  1937 					        ((sB>>6)<<(0)) ;
  1938 				}
  1939 				dst++;
  1940 				src += srcbpp;
  1941 			}
  1942 #endif
  1943 			src += srcskip;
  1944 			dst += dstskip;
  1945 		}
  1946 	} else {
  1947 		while ( height-- ) {
  1948 #ifdef USE_DUFFS_LOOP
  1949 			DUFFS_LOOP(
  1950 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1951 								sR, sG, sB);
  1952 				if ( 1 ) {
  1953 				  	/* Pack RGB into 8bit pixel */
  1954 				  	*dst = map[((sR>>5)<<(3+2))|
  1955 						   ((sG>>5)<<(2))  |
  1956 						   ((sB>>6)<<(0))  ];
  1957 				}
  1958 				dst++;
  1959 				src += srcbpp;
  1960 			, width);
  1961 #else
  1962 			for ( c=width; c; --c ) {
  1963 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1964 								sR, sG, sB);
  1965 				if ( 1 ) {
  1966 				  	/* Pack RGB into 8bit pixel */
  1967 				  	*dst = map[((sR>>5)<<(3+2))|
  1968 						   ((sG>>5)<<(2))  |
  1969 						   ((sB>>6)<<(0))  ];
  1970 				}
  1971 				dst++;
  1972 				src += srcbpp;
  1973 			}
  1974 #endif /* USE_DUFFS_LOOP */
  1975 			src += srcskip;
  1976 			dst += dstskip;
  1977 		}
  1978 	}
  1979 }
  1980 
  1981 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  1982 static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
  1983 {
  1984 	int width = info->d_width;
  1985 	int height = info->d_height;
  1986 	Uint32 *src = (Uint32 *)info->s_pixels;
  1987 	int srcskip = info->s_skip;
  1988 	Uint32 *dst = (Uint32 *)info->d_pixels;
  1989 	int dstskip = info->d_skip;
  1990 	SDL_PixelFormat *srcfmt = info->src;
  1991 	SDL_PixelFormat *dstfmt = info->dst;
  1992 
  1993 	if (dstfmt->Amask) {
  1994 		/* RGB->RGBA, SET_ALPHA */
  1995 		Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
  1996 
  1997 		while ( height-- ) {
  1998 			DUFFS_LOOP(
  1999 			{
  2000 				*dst = *src | mask;
  2001 				++dst;
  2002 				++src;
  2003 			},
  2004 			width);
  2005 			src = (Uint32*)((Uint8*)src + srcskip);
  2006 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2007 		}
  2008 	} else {
  2009 		/* RGBA->RGB, NO_ALPHA */
  2010 		Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2011 
  2012 		while ( height-- ) {
  2013 			DUFFS_LOOP(
  2014 			{
  2015 				*dst = *src & mask;
  2016 				++dst;
  2017 				++src;
  2018 			},
  2019 			width);
  2020 			src = (Uint32*)((Uint8*)src + srcskip);
  2021 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2022 		}
  2023 	}
  2024 }
  2025 
  2026 static void BlitNtoN(SDL_BlitInfo *info)
  2027 {
  2028 	int width = info->d_width;
  2029 	int height = info->d_height;
  2030 	Uint8 *src = info->s_pixels;
  2031 	int srcskip = info->s_skip;
  2032 	Uint8 *dst = info->d_pixels;
  2033 	int dstskip = info->d_skip;
  2034 	SDL_PixelFormat *srcfmt = info->src;
  2035 	int srcbpp = srcfmt->BytesPerPixel;
  2036 	SDL_PixelFormat *dstfmt = info->dst;
  2037 	int dstbpp = dstfmt->BytesPerPixel;
  2038 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2039 
  2040 	while ( height-- ) {
  2041 		DUFFS_LOOP(
  2042 		{
  2043 		        Uint32 Pixel;
  2044 			unsigned sR;
  2045 			unsigned sG;
  2046 			unsigned sB;
  2047 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2048 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2049 			dst += dstbpp;
  2050 			src += srcbpp;
  2051 		},
  2052 		width);
  2053 		src += srcskip;
  2054 		dst += dstskip;
  2055 	}
  2056 }
  2057 
  2058 static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
  2059 {
  2060 	int width = info->d_width;
  2061 	int height = info->d_height;
  2062 	Uint8 *src = info->s_pixels;
  2063 	int srcskip = info->s_skip;
  2064 	Uint8 *dst = info->d_pixels;
  2065 	int dstskip = info->d_skip;
  2066 	SDL_PixelFormat *srcfmt = info->src;
  2067 	int srcbpp = srcfmt->BytesPerPixel;
  2068 	SDL_PixelFormat *dstfmt = info->dst;
  2069 	int dstbpp = dstfmt->BytesPerPixel;
  2070 	int c;
  2071 
  2072 	/* FIXME: should map alpha to [0..255] correctly! */
  2073 	while ( height-- ) {
  2074 		for ( c=width; c; --c ) {
  2075 		        Uint32 Pixel;
  2076 			unsigned sR, sG, sB, sA;
  2077 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2078 				      sR, sG, sB, sA);
  2079 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2080 				      sR, sG, sB, sA);
  2081 			dst += dstbpp;
  2082 			src += srcbpp;
  2083 		}
  2084 		src += srcskip;
  2085 		dst += dstskip;
  2086 	}
  2087 }
  2088 
  2089 static void BlitNto1Key(SDL_BlitInfo *info)
  2090 {
  2091 	int width = info->d_width;
  2092 	int height = info->d_height;
  2093 	Uint8 *src = info->s_pixels;
  2094 	int srcskip = info->s_skip;
  2095 	Uint8 *dst = info->d_pixels;
  2096 	int dstskip = info->d_skip;
  2097 	SDL_PixelFormat *srcfmt = info->src;
  2098 	const Uint8 *palmap = info->table;
  2099 	Uint32 ckey = srcfmt->colorkey;
  2100 	Uint32 rgbmask = ~srcfmt->Amask;
  2101 	int srcbpp;
  2102 	Uint32 Pixel;
  2103 	Uint8  sR, sG, sB;
  2104 
  2105 	/* Set up some basic variables */
  2106 	srcbpp = srcfmt->BytesPerPixel;
  2107 	ckey &= rgbmask;
  2108 
  2109 	if ( palmap == NULL ) {
  2110 		while ( height-- ) {
  2111 			DUFFS_LOOP(
  2112 			{
  2113 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2114 								sR, sG, sB);
  2115 				if ( (Pixel & rgbmask) != ckey ) {
  2116 				  	/* Pack RGB into 8bit pixel */
  2117 				  	*dst = ((sR>>5)<<(3+2))|
  2118 						((sG>>5)<<(2)) |
  2119 						((sB>>6)<<(0)) ;
  2120 				}
  2121 				dst++;
  2122 				src += srcbpp;
  2123 			},
  2124 			width);
  2125 			src += srcskip;
  2126 			dst += dstskip;
  2127 		}
  2128 	} else {
  2129 		while ( height-- ) {
  2130 			DUFFS_LOOP(
  2131 			{
  2132 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2133 								sR, sG, sB);
  2134 				if ( (Pixel & rgbmask) != ckey ) {
  2135 				  	/* Pack RGB into 8bit pixel */
  2136 				  	*dst = palmap[((sR>>5)<<(3+2))|
  2137 							((sG>>5)<<(2))  |
  2138 							((sB>>6)<<(0))  ];
  2139 				}
  2140 				dst++;
  2141 				src += srcbpp;
  2142 			},
  2143 			width);
  2144 			src += srcskip;
  2145 			dst += dstskip;
  2146 		}
  2147 	}
  2148 }
  2149 
  2150 static void Blit2to2Key(SDL_BlitInfo *info)
  2151 {
  2152 	int width = info->d_width;
  2153 	int height = info->d_height;
  2154 	Uint16 *srcp = (Uint16 *)info->s_pixels;
  2155 	int srcskip = info->s_skip;
  2156 	Uint16 *dstp = (Uint16 *)info->d_pixels;
  2157 	int dstskip = info->d_skip;
  2158 	Uint32 ckey = info->src->colorkey;
  2159 	Uint32 rgbmask = ~info->src->Amask;
  2160 
  2161 	/* Set up some basic variables */
  2162         srcskip /= 2;
  2163         dstskip /= 2;
  2164 	ckey &= rgbmask;
  2165 
  2166 	while ( height-- ) {
  2167 		DUFFS_LOOP(
  2168 		{
  2169 			if ( (*srcp & rgbmask) != ckey ) {
  2170 				*dstp = *srcp;
  2171 			}
  2172 			dstp++;
  2173 			srcp++;
  2174 		},
  2175 		width);
  2176 		srcp += srcskip;
  2177 		dstp += dstskip;
  2178 	}
  2179 }
  2180 
  2181 static void BlitNtoNKey(SDL_BlitInfo *info)
  2182 {
  2183 	int width = info->d_width;
  2184 	int height = info->d_height;
  2185 	Uint8 *src = info->s_pixels;
  2186 	int srcskip = info->s_skip;
  2187 	Uint8 *dst = info->d_pixels;
  2188 	int dstskip = info->d_skip;
  2189 	Uint32 ckey = info->src->colorkey;
  2190 	SDL_PixelFormat *srcfmt = info->src;
  2191 	SDL_PixelFormat *dstfmt = info->dst;
  2192 	int srcbpp = srcfmt->BytesPerPixel;
  2193 	int dstbpp = dstfmt->BytesPerPixel;
  2194 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2195 
  2196 	while ( height-- ) {
  2197 		DUFFS_LOOP(
  2198 		{
  2199 		        Uint32 Pixel;
  2200 			unsigned sR;
  2201 			unsigned sG;
  2202 			unsigned sB;
  2203 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2204 			if ( Pixel != ckey ) {
  2205 			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2206 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2207 					      sR, sG, sB, alpha);
  2208 			}
  2209 			dst += dstbpp;
  2210 			src += srcbpp;
  2211 		},
  2212 		width);
  2213 		src += srcskip;
  2214 		dst += dstskip;
  2215 	}
  2216 }
  2217 
  2218 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
  2219 {
  2220 	int width = info->d_width;
  2221 	int height = info->d_height;
  2222 	Uint8 *src = info->s_pixels;
  2223 	int srcskip = info->s_skip;
  2224 	Uint8 *dst = info->d_pixels;
  2225 	int dstskip = info->d_skip;
  2226 	Uint32 ckey = info->src->colorkey;
  2227 	SDL_PixelFormat *srcfmt = info->src;
  2228 	SDL_PixelFormat *dstfmt = info->dst;
  2229 	Uint32 rgbmask = ~srcfmt->Amask;
  2230 
  2231 	Uint8 srcbpp;
  2232 	Uint8 dstbpp;
  2233 	Uint32 Pixel;
  2234 	Uint8  sR, sG, sB, sA;
  2235 
  2236 	/* Set up some basic variables */
  2237 	srcbpp = srcfmt->BytesPerPixel;
  2238 	dstbpp = dstfmt->BytesPerPixel;
  2239 	ckey &= rgbmask;
  2240 
  2241 	/* FIXME: should map alpha to [0..255] correctly! */
  2242 	while ( height-- ) {
  2243 		DUFFS_LOOP(
  2244 		{
  2245 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2246 				      sR, sG, sB, sA);
  2247 			if ( (Pixel & rgbmask) != ckey ) {
  2248 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2249 						sR, sG, sB, sA);
  2250 			}
  2251 			dst += dstbpp;
  2252 			src += srcbpp;
  2253 		},
  2254 		width);
  2255 		src += srcskip;
  2256 		dst += dstskip;
  2257 	}
  2258 }
  2259 
  2260 /* Normal N to N optimized blitters */
  2261 struct blit_table {
  2262 	Uint32 srcR, srcG, srcB;
  2263 	int dstbpp;
  2264 	Uint32 dstR, dstG, dstB;
  2265 	Uint32 blit_features;
  2266 	void *aux_data;
  2267 	SDL_loblit blitfunc;
  2268 	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
  2269 };
  2270 static const struct blit_table normal_blit_1[] = {
  2271 	/* Default for 8-bit RGB source, an invalid combination */
  2272 	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
  2273 };
  2274 static const struct blit_table normal_blit_2[] = {
  2275 #ifdef USE_ASMBLIT
  2276     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
  2277       0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
  2278     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
  2279       0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
  2280     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
  2281       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
  2282 #endif
  2283 #ifdef USE_ALTIVEC_BLITTERS
  2284     /* has-altivec */
  2285     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2286       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2287     { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2288       2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2289 #endif
  2290     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
  2291       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
  2292     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2293       0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
  2294     { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2295       0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
  2296     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2297       0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
  2298 
  2299     /* Default for 16-bit RGB source, used if no other blitter matches */
  2300     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2301 };
  2302 static const struct blit_table normal_blit_3[] = {
  2303 	/* Default for 24-bit RGB source, never optimized */
  2304     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2305 };
  2306 static const struct blit_table normal_blit_4[] = {
  2307 #ifdef USE_ASMBLIT
  2308     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2309       1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
  2310     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2311       0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
  2312     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2313       1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
  2314     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2315       0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
  2316     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2317       1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
  2318     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2319       0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
  2320     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2321       1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
  2322     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2323       0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
  2324     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
  2325       0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
  2326     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
  2327       0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
  2328     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2329       0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
  2330     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2331       0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
  2332     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2333       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
  2334 #else
  2335 #ifdef USE_ALTIVEC_BLITTERS
  2336     /* has-altivec | dont-use-prefetch */
  2337     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2338       6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2339     /* has-altivec */
  2340     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2341       2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2342     /* has-altivec */
  2343     { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
  2344       2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
  2345 #endif
  2346     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2347       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
  2348     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2349       0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
  2350 #endif
  2351 	/* Default for 32-bit RGB source, used if no other blitter matches */
  2352 	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2353 };
  2354 static const struct blit_table *normal_blit[] = {
  2355 	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2356 };
  2357 
  2358 /* Mask matches table, or table entry is zero */
  2359 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2360 
  2361 SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
  2362 {
  2363 	struct private_swaccel *sdata;
  2364 	SDL_PixelFormat *srcfmt;
  2365 	SDL_PixelFormat *dstfmt;
  2366 	const struct blit_table *table;
  2367 	int which;
  2368 	SDL_loblit blitfun;
  2369 
  2370 	/* Set up data for choosing the blit */
  2371 	sdata = surface->map->sw_data;
  2372 	srcfmt = surface->format;
  2373 	dstfmt = surface->map->dst->format;
  2374 
  2375 	if ( blit_index & 2 ) {
  2376 	        /* alpha or alpha+colorkey */
  2377 	        return SDL_CalculateAlphaBlit(surface, blit_index);
  2378 	}
  2379 
  2380 	/* We don't support destinations less than 8-bits */
  2381 	if ( dstfmt->BitsPerPixel < 8 ) {
  2382 		return(NULL);
  2383 	}
  2384 	
  2385 	if(blit_index == 1) {
  2386 	    /* colorkey blit: Here we don't have too many options, mostly
  2387 	       because RLE is the preferred fast way to deal with this.
  2388 	       If a particular case turns out to be useful we'll add it. */
  2389 
  2390 	    if(srcfmt->BytesPerPixel == 2
  2391 	       && surface->map->identity)
  2392 		return Blit2to2Key;
  2393 	    else if(dstfmt->BytesPerPixel == 1)
  2394 		return BlitNto1Key;
  2395 	    else {
  2396 #ifdef USE_ALTIVEC_BLITTERS
  2397         if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
  2398             return Blit32to32KeyAltivec;
  2399         } else
  2400 #endif
  2401 
  2402 		if(srcfmt->Amask && dstfmt->Amask)
  2403 		    return BlitNtoNKeyCopyAlpha;
  2404 		else
  2405 		    return BlitNtoNKey;
  2406 	    }
  2407 	}
  2408 
  2409 	blitfun = NULL;
  2410 	if ( dstfmt->BitsPerPixel == 8 ) {
  2411 		/* We assume 8-bit destinations are palettized */
  2412 		if ( (srcfmt->BytesPerPixel == 4) &&
  2413 		     (srcfmt->Rmask == 0x00FF0000) &&
  2414 		     (srcfmt->Gmask == 0x0000FF00) &&
  2415 		     (srcfmt->Bmask == 0x000000FF) ) {
  2416 			if ( surface->map->table ) {
  2417 				blitfun = Blit_RGB888_index8_map;
  2418 			} else {
  2419 #ifdef USE_ASMBLIT
  2420 				sdata->aux_data = ConvertX86p32_8RGB332;
  2421 				blitfun = ConvertX86;
  2422 #else
  2423 				blitfun = Blit_RGB888_index8;
  2424 #endif
  2425 			}
  2426 		} else {
  2427 			blitfun = BlitNto1;
  2428 		}
  2429 	} else {
  2430 		/* Now the meat, choose the blitter we want */
  2431 		int a_need = 0;
  2432 		if(dstfmt->Amask)
  2433 		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2434 		table = normal_blit[srcfmt->BytesPerPixel-1];
  2435 		for ( which=0; table[which].dstbpp; ++which ) {
  2436 			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2437 			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2438 			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2439 			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2440 			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2441 			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2442 			    dstfmt->BytesPerPixel == table[which].dstbpp &&
  2443 			    (a_need & table[which].alpha) == a_need &&
  2444 			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
  2445 				break;
  2446 		}
  2447 		sdata->aux_data = table[which].aux_data;
  2448 		blitfun = table[which].blitfunc;
  2449 
  2450 		if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2451 			/* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2452 			if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
  2453 			     srcfmt->Rmask == dstfmt->Rmask &&
  2454 			     srcfmt->Gmask == dstfmt->Gmask &&
  2455 			     srcfmt->Bmask == dstfmt->Bmask ) {
  2456 				blitfun = Blit4to4MaskAlpha;
  2457 			} else if ( a_need == COPY_ALPHA ) {
  2458 			    blitfun = BlitNtoNCopyAlpha;
  2459 			}
  2460 		}
  2461 	}
  2462 
  2463 #ifdef DEBUG_ASM
  2464 #ifdef USE_ASMBLIT
  2465 	if ( blitfun == ConvertMMX )
  2466 		fprintf(stderr, "Using mmx blit\n");
  2467 	else
  2468 	if ( blitfun == ConvertX86 )
  2469 		fprintf(stderr, "Using asm blit\n");
  2470 	else
  2471 #endif
  2472 	if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
  2473 		fprintf(stderr, "Using C blit\n");
  2474 	else
  2475 		fprintf(stderr, "Using optimized C blit\n");
  2476 #endif /* DEBUG_ASM */
  2477 
  2478 	return(blitfun);
  2479 }