src/video/SDL_blit_N.c
author Ryan C. Gordon <icculus@icculus.org>
Thu, 08 Sep 2005 07:20:59 +0000
changeset 1139 d0ae4dff7208
parent 1053 f596fa4f17a6
child 1162 2651158f59b8
permissions -rw-r--r--
Altivec blitter for 555 -> 8888 surface, written by me.

--ryan.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2004 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Library General Public
     7     License as published by the Free Software Foundation; either
     8     version 2 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Library General Public License for more details.
    14 
    15     You should have received a copy of the GNU Library General Public
    16     License along with this library; if not, write to the Free
    17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 #ifdef SAVE_RCSID
    24 static char rcsid =
    25  "@(#) $Id$";
    26 #endif
    27 
    28 #include <stdio.h>
    29 
    30 #include "SDL_types.h"
    31 #include "SDL_video.h"
    32 #include "SDL_blit.h"
    33 #include "SDL_byteorder.h"
    34 #include "SDL_cpuinfo.h"
    35 
    36 /* Functions to blit from N-bit surfaces to other surfaces */
    37 
    38 #ifdef USE_ALTIVEC_BLITTERS
    39 #include <assert.h>
    40 #ifdef MACOSX
    41 #include <sys/sysctl.h>
    42 #include <stdlib.h>
    43 static size_t GetL3CacheSize( void )
    44 {
    45     const char key[] = "hw.l3cachesize";
    46     u_int64_t result = 0;
    47     size_t typeSize = sizeof( result );
    48 
    49 
    50     int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
    51     if( 0 != err ) return 0;
    52 
    53     return result;
    54 }
    55 #else
    56 static size_t GetL3CacheSize( void )
    57 {
    58     /* XXX: Just guess G4 */
    59     return 2097152;
    60 }
    61 #endif /* MACOSX */
    62 
    63 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    64 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    65                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    66                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    67                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    68                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    69 
    70 #define MAKE8888(dstfmt, r, g, b, a)  \
    71     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    72       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    73       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    74       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    75 
    76 /*
    77  * Data Stream Touch...Altivec cache prefetching.
    78  *
    79  *  Don't use this on a G5...however, the speed boost is very significant
    80  *   on a G4.
    81  */
    82 #define DST_CHAN_SRC 1
    83 #define DST_CHAN_DEST 2
    84 
    85 /* macro to set DST control word value... */
    86 #define DST_CTRL(size, count, stride) \
    87     (((size) << 24) | ((count) << 16) | (stride))
    88 
    89 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    90     ? vec_lvsl(0, src) \
    91     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
    92 
    93 /* Calculate the permute vector used for 32->32 swizzling */
    94 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
    95                                   const SDL_PixelFormat *dstfmt)
    96 {
    97     /*
    98     * We have to assume that the bits that aren't used by other
    99      *  colors is alpha, and it's one complete byte, since some formats
   100      *  leave alpha with a zero mask, but we should still swizzle the bits.
   101      */
   102     /* ARGB */
   103     const static struct SDL_PixelFormat default_pixel_format = {
   104         NULL, 0, 0,
   105         0, 0, 0, 0,
   106         16, 8, 0, 24,
   107         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   108         0, 0};
   109     if (!srcfmt) {
   110         srcfmt = &default_pixel_format;
   111     }
   112     if (!dstfmt) {
   113         dstfmt = &default_pixel_format;
   114     }
   115     vector unsigned char plus = (vector unsigned char)( 0x00, 0x00, 0x00, 0x00,
   116                                       0x04, 0x04, 0x04, 0x04,
   117                                       0x08, 0x08, 0x08, 0x08,
   118                                       0x0C, 0x0C, 0x0C, 0x0C );
   119     vector unsigned char vswiz;
   120     vector unsigned int srcvec;
   121 #define RESHIFT(X) (3 - ((X) >> 3))
   122     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   123     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   124     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   125     Uint32 amask;
   126     /* Use zero for alpha if either surface doesn't have alpha */
   127     if (dstfmt->Amask) {
   128         amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
   129     } else {    
   130         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
   131     }           
   132 #undef RESHIFT  
   133     ((unsigned int *)&srcvec)[0] = (rmask | gmask | bmask | amask);
   134     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
   135     return(vswiz);
   136 }
   137 
   138 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
   139 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
   140     int height = info->d_height;
   141     Uint8 *src = (Uint8 *) info->s_pixels;
   142     int srcskip = info->s_skip;
   143     Uint8 *dst = (Uint8 *) info->d_pixels;
   144     int dstskip = info->d_skip;
   145     SDL_PixelFormat *srcfmt = info->src;
   146     vector unsigned char valpha = vec_splat_u8(0);
   147     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   148     vector unsigned char vgmerge = (vector unsigned char)(
   149         0x00, 0x02, 0x00, 0x06,
   150         0x00, 0x0a, 0x00, 0x0e,
   151         0x00, 0x12, 0x00, 0x16,
   152         0x00, 0x1a, 0x00, 0x1e);
   153     vector unsigned short v1 = vec_splat_u16(1);
   154     vector unsigned short v3 = vec_splat_u16(3);
   155     vector unsigned short v3f = (vector unsigned short)(
   156         0x003f, 0x003f, 0x003f, 0x003f,
   157         0x003f, 0x003f, 0x003f, 0x003f);
   158     vector unsigned short vfc = (vector unsigned short)(
   159         0x00fc, 0x00fc, 0x00fc, 0x00fc,
   160         0x00fc, 0x00fc, 0x00fc, 0x00fc);
   161     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
   162     vf800 = vec_sl(vf800, vec_splat_u16(8));
   163 
   164     while (height--) {
   165         vector unsigned char valigner;
   166         vector unsigned char voverflow;
   167         vector unsigned char vsrc;
   168 
   169         int width = info->d_width;
   170         int extrawidth;
   171 
   172         /* do scalar until we can align... */
   173 #define ONE_PIXEL_BLEND(condition, widthvar) \
   174         while (condition) { \
   175             Uint32 pixel; \
   176             unsigned sR, sG, sB, sA; \
   177             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, pixel, \
   178                           sR, sG, sB, sA); \
   179             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   180                                 ((sG << 3) & 0x000007E0) | \
   181                                 ((sB >> 3) & 0x0000001F)); \
   182             dst += 2; \
   183             src += 4; \
   184             widthvar--; \
   185         }
   186 
   187         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   188 
   189         /* After all that work, here's the vector part! */
   190         extrawidth = (width % 8);  /* trailing unaligned stores */
   191         width -= extrawidth;
   192         vsrc = vec_ld(0, src);
   193         valigner = VEC_ALIGNER(src);
   194 
   195         while (width) {
   196             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   197             vector unsigned int vsrc1, vsrc2;
   198             vector unsigned char vdst;
   199 
   200             voverflow = vec_ld(15, src);
   201             vsrc = vec_perm(vsrc, voverflow, valigner);
   202             vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   203             src += 16;
   204             vsrc = voverflow;
   205             voverflow = vec_ld(15, src);
   206             vsrc = vec_perm(vsrc, voverflow, valigner);
   207             vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   208             /* 1555 */
   209             vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
   210             vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
   211             vgpixel = vec_and(vgpixel, vfc);
   212             vgpixel = vec_sl(vgpixel, v3);
   213             vrpixel = vec_sl(vpixel, v1);
   214             vrpixel = vec_and(vrpixel, vf800);
   215             vbpixel = vec_and(vpixel, v3f);
   216             vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
   217             /* 565 */
   218             vdst = vec_or(vdst, (vector unsigned char)vbpixel);
   219             vec_st(vdst, 0, dst);
   220 
   221             width -= 8;
   222             src += 16;
   223             dst += 16;
   224             vsrc = voverflow;
   225         }
   226 
   227         assert(width == 0);
   228 
   229 
   230         /* do scalar until we can align... */
   231         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   232 #undef ONE_PIXEL_BLEND
   233 
   234         src += srcskip;  /* move to next row, accounting for pitch. */
   235         dst += dstskip;
   236     }
   237 
   238 
   239 }
   240 
   241 static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
   242     int height = info->d_height;
   243     Uint8 *src = (Uint8 *) info->s_pixels;
   244     int srcskip = info->s_skip;
   245     Uint8 *dst = (Uint8 *) info->d_pixels;
   246     int dstskip = info->d_skip;
   247     SDL_PixelFormat *srcfmt = info->src;
   248     SDL_PixelFormat *dstfmt = info->dst;
   249     unsigned alpha;
   250     vector unsigned char valpha;
   251     vector unsigned char vpermute;
   252     vector unsigned short vf800;
   253     vector unsigned int v8 = vec_splat_u32(8);
   254     vector unsigned int v16 = vec_add(v8, v8);
   255     vector unsigned short v2 = vec_splat_u16(2);
   256     vector unsigned short v3 = vec_splat_u16(3);
   257     /* 
   258         0x10 - 0x1f is the alpha
   259         0x00 - 0x0e evens are the red
   260         0x01 - 0x0f odds are zero
   261     */
   262     vector unsigned char vredalpha1 = (vector unsigned char)(
   263         0x10, 0x00, 0x01, 0x01,
   264         0x10, 0x02, 0x01, 0x01,
   265         0x10, 0x04, 0x01, 0x01,
   266         0x10, 0x06, 0x01, 0x01
   267     );
   268     vector unsigned char vredalpha2 = (vector unsigned char)(
   269         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   270     );
   271     /*
   272         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   273         0x11 - 0x0f odds are blue
   274     */
   275     vector unsigned char vblue1 = (vector unsigned char)(
   276         0x00, 0x01, 0x02, 0x11,
   277         0x04, 0x05, 0x06, 0x13,
   278         0x08, 0x09, 0x0a, 0x15,
   279         0x0c, 0x0d, 0x0e, 0x17
   280     );
   281     vector unsigned char vblue2 = (vector unsigned char)(
   282         vec_add((vector unsigned int)vblue1, v8)
   283     );
   284     /*
   285         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   286         0x10 - 0x0e evens are green
   287     */
   288     vector unsigned char vgreen1 = (vector unsigned char)(
   289         0x00, 0x01, 0x10, 0x03,
   290         0x04, 0x05, 0x12, 0x07,
   291         0x08, 0x09, 0x14, 0x0b,
   292         0x0c, 0x0d, 0x16, 0x0f
   293     );
   294     vector unsigned char vgreen2 = (vector unsigned char)(
   295         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   296     );
   297     
   298 
   299     assert(srcfmt->BytesPerPixel == 2);
   300     assert(dstfmt->BytesPerPixel == 4);
   301 
   302     vf800 = (vector unsigned short)vec_splat_u8(-7);
   303     vf800 = vec_sl(vf800, vec_splat_u16(8));
   304 
   305     if (dstfmt->Amask && srcfmt->alpha) {
   306         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   307         valpha = vec_splat(valpha, 0);
   308     } else {
   309         alpha = 0;
   310         valpha = vec_splat_u8(0);
   311     }
   312 
   313     vpermute = calc_swizzle32(NULL, dstfmt);
   314     while (height--) {
   315         vector unsigned char valigner;
   316         vector unsigned char voverflow;
   317         vector unsigned char vsrc;
   318 
   319         int width = info->d_width;
   320         int extrawidth;
   321 
   322         /* do scalar until we can align... */
   323 #define ONE_PIXEL_BLEND(condition, widthvar) \
   324         while (condition) { \
   325             unsigned sR, sG, sB; \
   326             unsigned short pixel = *((unsigned short *)src); \
   327             sR = (pixel >> 8) & 0xf8; \
   328             sG = (pixel >> 3) & 0xfc; \
   329             sB = (pixel << 3) & 0xf8; \
   330             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   331             src += 2; \
   332             dst += 4; \
   333             widthvar--; \
   334         }
   335         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   336 
   337         /* After all that work, here's the vector part! */
   338         extrawidth = (width % 8);  /* trailing unaligned stores */
   339         width -= extrawidth;
   340         vsrc = vec_ld(0, src);
   341         valigner = VEC_ALIGNER(src);
   342 
   343         while (width) {
   344             vector unsigned short vR, vG, vB;
   345             vector unsigned char vdst1, vdst2;
   346 
   347             voverflow = vec_ld(15, src);
   348             vsrc = vec_perm(vsrc, voverflow, valigner);
   349 
   350             vR = vec_and((vector unsigned short)vsrc, vf800);
   351             vB = vec_sl((vector unsigned short)vsrc, v3);
   352             vG = vec_sl(vB, v2);
   353 
   354             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   355             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   356             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   357             vdst1 = vec_perm(vdst1, valpha, vpermute);
   358             vec_st(vdst1, 0, dst);
   359 
   360             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   361             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   362             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   363             vdst2 = vec_perm(vdst2, valpha, vpermute);
   364             vec_st(vdst2, 16, dst);
   365             
   366             width -= 8;
   367             dst += 32;
   368             src += 16;
   369             vsrc = voverflow;
   370         }
   371 
   372         assert(width == 0);
   373 
   374 
   375         /* do scalar until we can align... */
   376         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   377 #undef ONE_PIXEL_BLEND
   378 
   379         src += srcskip;  /* move to next row, accounting for pitch. */
   380         dst += dstskip;
   381     }
   382 
   383 }
   384 
   385 
   386 static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
   387     int height = info->d_height;
   388     Uint8 *src = (Uint8 *) info->s_pixels;
   389     int srcskip = info->s_skip;
   390     Uint8 *dst = (Uint8 *) info->d_pixels;
   391     int dstskip = info->d_skip;
   392     SDL_PixelFormat *srcfmt = info->src;
   393     SDL_PixelFormat *dstfmt = info->dst;
   394     unsigned alpha;
   395     vector unsigned char valpha;
   396     vector unsigned char vpermute;
   397     vector unsigned short vf800;
   398     vector unsigned int v8 = vec_splat_u32(8);
   399     vector unsigned int v16 = vec_add(v8, v8);
   400     vector unsigned short v1 = vec_splat_u16(1);
   401     vector unsigned short v3 = vec_splat_u16(3);
   402     /* 
   403         0x10 - 0x1f is the alpha
   404         0x00 - 0x0e evens are the red
   405         0x01 - 0x0f odds are zero
   406     */
   407     vector unsigned char vredalpha1 = (vector unsigned char)(
   408         0x10, 0x00, 0x01, 0x01,
   409         0x10, 0x02, 0x01, 0x01,
   410         0x10, 0x04, 0x01, 0x01,
   411         0x10, 0x06, 0x01, 0x01
   412     );
   413     vector unsigned char vredalpha2 = (vector unsigned char)(
   414         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   415     );
   416     /*
   417         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   418         0x11 - 0x0f odds are blue
   419     */
   420     vector unsigned char vblue1 = (vector unsigned char)(
   421         0x00, 0x01, 0x02, 0x11,
   422         0x04, 0x05, 0x06, 0x13,
   423         0x08, 0x09, 0x0a, 0x15,
   424         0x0c, 0x0d, 0x0e, 0x17
   425     );
   426     vector unsigned char vblue2 = (vector unsigned char)(
   427         vec_add((vector unsigned int)vblue1, v8)
   428     );
   429     /*
   430         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   431         0x10 - 0x0e evens are green
   432     */
   433     vector unsigned char vgreen1 = (vector unsigned char)(
   434         0x00, 0x01, 0x10, 0x03,
   435         0x04, 0x05, 0x12, 0x07,
   436         0x08, 0x09, 0x14, 0x0b,
   437         0x0c, 0x0d, 0x16, 0x0f
   438     );
   439     vector unsigned char vgreen2 = (vector unsigned char)(
   440         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   441     );
   442     
   443 
   444     assert(srcfmt->BytesPerPixel == 2);
   445     assert(dstfmt->BytesPerPixel == 4);
   446 
   447     vf800 = (vector unsigned short)vec_splat_u8(-7);
   448     vf800 = vec_sl(vf800, vec_splat_u16(8));
   449 
   450     if (dstfmt->Amask && srcfmt->alpha) {
   451         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   452         valpha = vec_splat(valpha, 0);
   453     } else {
   454         alpha = 0;
   455         valpha = vec_splat_u8(0);
   456     }
   457 
   458     vpermute = calc_swizzle32(NULL, dstfmt);
   459     while (height--) {
   460         vector unsigned char valigner;
   461         vector unsigned char voverflow;
   462         vector unsigned char vsrc;
   463 
   464         int width = info->d_width;
   465         int extrawidth;
   466 
   467         /* do scalar until we can align... */
   468 #define ONE_PIXEL_BLEND(condition, widthvar) \
   469         while (condition) { \
   470             unsigned sR, sG, sB; \
   471             unsigned short pixel = *((unsigned short *)src); \
   472             sR = (pixel >> 7) & 0xf8; \
   473             sG = (pixel >> 2) & 0xf8; \
   474             sB = (pixel << 3) & 0xf8; \
   475             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   476             src += 2; \
   477             dst += 4; \
   478             widthvar--; \
   479         }
   480         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   481 
   482         /* After all that work, here's the vector part! */
   483         extrawidth = (width % 8);  /* trailing unaligned stores */
   484         width -= extrawidth;
   485         vsrc = vec_ld(0, src);
   486         valigner = VEC_ALIGNER(src);
   487 
   488         while (width) {
   489             vector unsigned short vR, vG, vB;
   490             vector unsigned char vdst1, vdst2;
   491 
   492             voverflow = vec_ld(15, src);
   493             vsrc = vec_perm(vsrc, voverflow, valigner);
   494 
   495             vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
   496             vB = vec_sl((vector unsigned short)vsrc, v3);
   497             vG = vec_sl(vB, v3);
   498 
   499             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   500             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   501             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   502             vdst1 = vec_perm(vdst1, valpha, vpermute);
   503             vec_st(vdst1, 0, dst);
   504 
   505             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   506             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   507             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   508             vdst2 = vec_perm(vdst2, valpha, vpermute);
   509             vec_st(vdst2, 16, dst);
   510             
   511             width -= 8;
   512             dst += 32;
   513             src += 16;
   514             vsrc = voverflow;
   515         }
   516 
   517         assert(width == 0);
   518 
   519 
   520         /* do scalar until we can align... */
   521         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   522 #undef ONE_PIXEL_BLEND
   523 
   524         src += srcskip;  /* move to next row, accounting for pitch. */
   525         dst += dstskip;
   526     }
   527 
   528 }
   529 
   530 static void BlitNtoNKey(SDL_BlitInfo *info);
   531 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
   532 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
   533 {
   534     int height = info->d_height;
   535     Uint32 *srcp = (Uint32 *) info->s_pixels;
   536     int srcskip = info->s_skip;
   537     Uint32 *dstp = (Uint32 *) info->d_pixels;
   538     int dstskip = info->d_skip;
   539     SDL_PixelFormat *srcfmt = info->src;
   540     int srcbpp = srcfmt->BytesPerPixel;
   541     SDL_PixelFormat *dstfmt = info->dst;
   542     int dstbpp = dstfmt->BytesPerPixel;
   543     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   544 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   545     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   546 	Uint32 ckey = info->src->colorkey;
   547     vector unsigned int valpha;
   548     vector unsigned char vpermute;
   549     vector unsigned char vzero;
   550     vector unsigned int vckey;
   551     vector unsigned int vrgbmask;
   552     vpermute = calc_swizzle32(srcfmt, dstfmt);
   553     if (info->d_width < 16) {
   554         if(copy_alpha) {
   555             return BlitNtoNKeyCopyAlpha(info);
   556         } else {
   557             return BlitNtoNKey(info);
   558         }
   559     }
   560     vzero = vec_splat_u8(0);
   561     if (alpha) {
   562         ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
   563         valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
   564     } else {
   565         valpha = (vector unsigned int)vzero;
   566     }
   567     ckey &= rgbmask;
   568     ((unsigned int *)&vckey)[0] = ckey;
   569     vckey = vec_splat(vckey, 0);
   570     ((unsigned int *)&vrgbmask)[0] = rgbmask;
   571     vrgbmask = vec_splat(vrgbmask, 0);
   572 
   573     while (height--) {
   574 #define ONE_PIXEL_BLEND(condition, widthvar) \
   575         if (copy_alpha) { \
   576             while (condition) { \
   577                 Uint32 pixel; \
   578                 unsigned sR, sG, sB, sA; \
   579                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, pixel, \
   580                           sR, sG, sB, sA); \
   581                 if ( (pixel & rgbmask) != ckey ) { \
   582                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   583                             sR, sG, sB, sA); \
   584                 } \
   585                 ((Uint8 *)dstp) += dstbpp; \
   586                 ((Uint8 *)srcp) += srcbpp; \
   587                 widthvar--; \
   588             } \
   589         } else { \
   590             while (condition) { \
   591                 Uint32 pixel; \
   592                 unsigned sR, sG, sB; \
   593                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, pixel); \
   594                 if ( pixel != ckey ) { \
   595                     RGB_FROM_PIXEL(pixel, srcfmt, sR, sG, sB); \
   596                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   597                               sR, sG, sB, alpha); \
   598                 } \
   599                 ((Uint8 *)dstp) += dstbpp; \
   600                 ((Uint8 *)srcp) += srcbpp; \
   601                 widthvar--; \
   602             } \
   603         }
   604         int width = info->d_width;
   605         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   606         assert(width > 0);
   607         if (width > 0) {
   608             int extrawidth = (width % 4);
   609             vector unsigned char valigner = VEC_ALIGNER(srcp);
   610             vector unsigned int vs = vec_ld(0, srcp);
   611             width -= extrawidth;
   612             assert(width >= 4);
   613             while (width) {
   614                 vector unsigned char vsel;
   615                 vector unsigned int vd;
   616                 vector unsigned int voverflow = vec_ld(15, srcp);
   617                 /* load the source vec */
   618                 vs = vec_perm(vs, voverflow, valigner);
   619                 /* vsel is set for items that match the key */
   620                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
   621                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
   622                 /* permute the src vec to the dest format */
   623                 vs = vec_perm(vs, valpha, vpermute);
   624                 /* load the destination vec */
   625                 vd = vec_ld(0, dstp);
   626                 /* select the source and dest into vs */
   627                 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
   628                 
   629                 vec_st(vd, 0, dstp);
   630                 srcp += 4;
   631                 width -= 4;
   632                 dstp += 4;
   633                 vs = voverflow;
   634             }
   635             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   636 #undef ONE_PIXEL_BLEND
   637             srcp += srcskip >> 2;
   638             dstp += dstskip >> 2;
   639         }
   640     }
   641 }
   642 
   643 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   644 /* Use this on a G5 */
   645 static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
   646 {
   647     int height = info->d_height;
   648     Uint32 *src = (Uint32 *) info->s_pixels;
   649     int srcskip = info->s_skip;
   650     Uint32 *dst = (Uint32 *) info->d_pixels;
   651     int dstskip = info->d_skip;
   652     SDL_PixelFormat *srcfmt = info->src;
   653     int srcbpp = srcfmt->BytesPerPixel;
   654     SDL_PixelFormat *dstfmt = info->dst;
   655     int dstbpp = dstfmt->BytesPerPixel;
   656     vector unsigned int vzero = vec_splat_u32(0);
   657     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   658     if (dstfmt->Amask && !srcfmt->Amask) {
   659         if (srcfmt->alpha) {
   660             vector unsigned char valpha;
   661             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   662             vzero = (vector unsigned int)vec_splat(valpha, 0);
   663         }
   664     }
   665 
   666     assert(srcbpp == 4);
   667     assert(dstbpp == 4);
   668 
   669     while (height--) {
   670         vector unsigned char valigner;
   671         vector unsigned int vbits;
   672         vector unsigned int voverflow;
   673         Uint32 bits;
   674         Uint8 r, g, b, a;
   675 
   676         int width = info->d_width;
   677         int extrawidth;
   678 
   679         /* do scalar until we can align... */
   680         while ((UNALIGNED_PTR(dst)) && (width)) {
   681             bits = *(src++);
   682             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   683             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   684             width--;
   685         }
   686 
   687         /* After all that work, here's the vector part! */
   688         extrawidth = (width % 4);
   689         width -= extrawidth;
   690         valigner = VEC_ALIGNER(src);
   691         vbits = vec_ld(0, src);
   692 
   693        while (width) {
   694             voverflow = vec_ld(15, src);
   695             src += 4;
   696             width -= 4;
   697             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   698             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   699             vec_st(vbits, 0, dst);  /* store it back out. */
   700             dst += 4;
   701             vbits = voverflow;
   702         }
   703 
   704         assert(width == 0);
   705 
   706         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   707         while (extrawidth) {
   708             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   709             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   710             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   711             extrawidth--;
   712         }
   713 
   714         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   715         dst += dstskip >> 2;
   716     }
   717 
   718 }
   719 
   720 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   721 /* Use this on a G4 */
   722 static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
   723 {
   724     const int scalar_dst_lead = sizeof (Uint32) * 4;
   725     const int vector_dst_lead = sizeof (Uint32) * 16;
   726 
   727     int height = info->d_height;
   728     Uint32 *src = (Uint32 *) info->s_pixels;
   729     int srcskip = info->s_skip;
   730     Uint32 *dst = (Uint32 *) info->d_pixels;
   731     int dstskip = info->d_skip;
   732     SDL_PixelFormat *srcfmt = info->src;
   733     int srcbpp = srcfmt->BytesPerPixel;
   734     SDL_PixelFormat *dstfmt = info->dst;
   735     int dstbpp = dstfmt->BytesPerPixel;
   736     vector unsigned int vzero = vec_splat_u32(0);
   737     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   738     if (dstfmt->Amask && !srcfmt->Amask) {
   739         if (srcfmt->alpha) {
   740             vector unsigned char valpha;
   741             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   742             vzero = (vector unsigned int)vec_splat(valpha, 0);
   743         }
   744     }
   745 
   746     assert(srcbpp == 4);
   747     assert(dstbpp == 4);
   748 
   749     while (height--) {
   750         vector unsigned char valigner;
   751         vector unsigned int vbits;
   752         vector unsigned int voverflow;
   753         Uint32 bits;
   754         Uint8 r, g, b, a;
   755 
   756         int width = info->d_width;
   757         int extrawidth;
   758 
   759         /* do scalar until we can align... */
   760         while ((UNALIGNED_PTR(dst)) && (width)) {
   761             vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   762             vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   763             bits = *(src++);
   764             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   765             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   766             width--;
   767         }
   768 
   769         /* After all that work, here's the vector part! */
   770         extrawidth = (width % 4);
   771         width -= extrawidth;
   772         valigner = VEC_ALIGNER(src);
   773         vbits = vec_ld(0, src);
   774 
   775         while (width) {
   776             vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   777             vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   778             voverflow = vec_ld(15, src);
   779             src += 4;
   780             width -= 4;
   781             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   782             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   783             vec_st(vbits, 0, dst);  /* store it back out. */
   784             dst += 4;
   785             vbits = voverflow;
   786         }
   787         
   788         assert(width == 0);
   789 
   790         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   791         while (extrawidth) {
   792             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   793             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   794             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   795             extrawidth--;
   796         }
   797 
   798         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   799         dst += dstskip >> 2;
   800     }
   801 
   802     vec_dss(DST_CHAN_SRC);
   803     vec_dss(DST_CHAN_DEST);
   804 }
   805 
   806 static Uint32 GetBlitFeatures( void )
   807 {
   808     static Uint32 features = 0xffffffff;
   809     if (features == 0xffffffff) {
   810         /* Provide an override for testing .. */
   811         char *override = getenv("SDL_ALTIVEC_BLIT_FEATURES");
   812         if (override) {
   813             features = 0;
   814             sscanf(override, "%u", &features);
   815         } else {
   816             features = ( 0
   817                 /* Feature 1 is has-MMX */
   818                 | ((SDL_HasMMX()) ? 1 : 0)
   819                 /* Feature 2 is has-AltiVec */
   820                 | ((SDL_HasAltiVec()) ? 2 : 0)
   821                 /* Feature 4 is dont-use-prefetch */
   822                 | ((GetL3CacheSize() == 0) ? 4 : 0)
   823             );
   824         }
   825     }
   826     return features;
   827 }
   828 #else
   829 /* Feature 1 is has-MMX */
   830 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   831 #endif
   832 
   833 /* This is now endian dependent */
   834 #if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
   835 #define HI	1
   836 #define LO	0
   837 #else /* ( SDL_BYTEORDER == SDL_BIG_ENDIAN ) */
   838 #define HI	0
   839 #define LO	1
   840 #endif
   841 
   842 #ifdef USE_ASMBLIT
   843 
   844 /* Heheheh, we coerce Hermes into using SDL blit information */
   845 #define X86_ASSEMBLER
   846 #define HermesConverterInterface	SDL_BlitInfo
   847 #define HermesClearInterface		void
   848 #define STACKCALL
   849 
   850 #include "HeadMMX.h"
   851 #include "HeadX86.h"
   852 
   853 #else
   854 
   855 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   856 #define RGB888_RGB332(dst, src) { \
   857 	dst = (((src)&0x00E00000)>>16)| \
   858 	      (((src)&0x0000E000)>>11)| \
   859 	      (((src)&0x000000C0)>>6); \
   860 }
   861 static void Blit_RGB888_index8(SDL_BlitInfo *info)
   862 {
   863 #ifndef USE_DUFFS_LOOP
   864 	int c;
   865 #endif
   866 	int width, height;
   867 	Uint32 *src;
   868 	const Uint8 *map;
   869 	Uint8 *dst;
   870 	int srcskip, dstskip;
   871 
   872 	/* Set up some basic variables */
   873 	width = info->d_width;
   874 	height = info->d_height;
   875 	src = (Uint32 *)info->s_pixels;
   876 	srcskip = info->s_skip/4;
   877 	dst = info->d_pixels;
   878 	dstskip = info->d_skip;
   879 	map = info->table;
   880 
   881 	if ( map == NULL ) {
   882 		while ( height-- ) {
   883 #ifdef USE_DUFFS_LOOP
   884 			DUFFS_LOOP(
   885 				RGB888_RGB332(*dst++, *src);
   886 			, width);
   887 #else
   888 			for ( c=width/4; c; --c ) {
   889 				/* Pack RGB into 8bit pixel */
   890 				++src;
   891 				RGB888_RGB332(*dst++, *src);
   892 				++src;
   893 				RGB888_RGB332(*dst++, *src);
   894 				++src;
   895 				RGB888_RGB332(*dst++, *src);
   896 				++src;
   897 			}
   898 			switch ( width & 3 ) {
   899 				case 3:
   900 					RGB888_RGB332(*dst++, *src);
   901 					++src;
   902 				case 2:
   903 					RGB888_RGB332(*dst++, *src);
   904 					++src;
   905 				case 1:
   906 					RGB888_RGB332(*dst++, *src);
   907 					++src;
   908 			}
   909 #endif /* USE_DUFFS_LOOP */
   910 			src += srcskip;
   911 			dst += dstskip;
   912 		}
   913 	} else {
   914 		int pixel;
   915 
   916 		while ( height-- ) {
   917 #ifdef USE_DUFFS_LOOP
   918 			DUFFS_LOOP(
   919 				RGB888_RGB332(pixel, *src);
   920 				*dst++ = map[pixel];
   921 				++src;
   922 			, width);
   923 #else
   924 			for ( c=width/4; c; --c ) {
   925 				/* Pack RGB into 8bit pixel */
   926 				RGB888_RGB332(pixel, *src);
   927 				*dst++ = map[pixel];
   928 				++src;
   929 				RGB888_RGB332(pixel, *src);
   930 				*dst++ = map[pixel];
   931 				++src;
   932 				RGB888_RGB332(pixel, *src);
   933 				*dst++ = map[pixel];
   934 				++src;
   935 				RGB888_RGB332(pixel, *src);
   936 				*dst++ = map[pixel];
   937 				++src;
   938 			}
   939 			switch ( width & 3 ) {
   940 				case 3:
   941 					RGB888_RGB332(pixel, *src);
   942 					*dst++ = map[pixel];
   943 					++src;
   944 				case 2:
   945 					RGB888_RGB332(pixel, *src);
   946 					*dst++ = map[pixel];
   947 					++src;
   948 				case 1:
   949 					RGB888_RGB332(pixel, *src);
   950 					*dst++ = map[pixel];
   951 					++src;
   952 			}
   953 #endif /* USE_DUFFS_LOOP */
   954 			src += srcskip;
   955 			dst += dstskip;
   956 		}
   957 	}
   958 }
   959 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   960 #define RGB888_RGB555(dst, src) { \
   961 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>9)| \
   962 	                   (((*src)&0x0000F800)>>6)| \
   963 	                   (((*src)&0x000000F8)>>3); \
   964 }
   965 #define RGB888_RGB555_TWO(dst, src) { \
   966 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   967 	                     (((src[HI])&0x0000F800)>>6)| \
   968 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   969 	                     (((src[LO])&0x00F80000)>>9)| \
   970 	                     (((src[LO])&0x0000F800)>>6)| \
   971 	                     (((src[LO])&0x000000F8)>>3); \
   972 }
   973 static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
   974 {
   975 #ifndef USE_DUFFS_LOOP
   976 	int c;
   977 #endif
   978 	int width, height;
   979 	Uint32 *src;
   980 	Uint16 *dst;
   981 	int srcskip, dstskip;
   982 
   983 	/* Set up some basic variables */
   984 	width = info->d_width;
   985 	height = info->d_height;
   986 	src = (Uint32 *)info->s_pixels;
   987 	srcskip = info->s_skip/4;
   988 	dst = (Uint16 *)info->d_pixels;
   989 	dstskip = info->d_skip/2;
   990 
   991 #ifdef USE_DUFFS_LOOP
   992 	while ( height-- ) {
   993 		DUFFS_LOOP(
   994 			RGB888_RGB555(dst, src);
   995 			++src;
   996 			++dst;
   997 		, width);
   998 		src += srcskip;
   999 		dst += dstskip;
  1000 	}
  1001 #else
  1002 	/* Memory align at 4-byte boundary, if necessary */
  1003 	if ( (long)dst & 0x03 ) {
  1004 		/* Don't do anything if width is 0 */
  1005 		if ( width == 0 ) {
  1006 			return;
  1007 		}
  1008 		--width;
  1009 
  1010 		while ( height-- ) {
  1011 			/* Perform copy alignment */
  1012 			RGB888_RGB555(dst, src);
  1013 			++src;
  1014 			++dst;
  1015 
  1016 			/* Copy in 4 pixel chunks */
  1017 			for ( c=width/4; c; --c ) {
  1018 				RGB888_RGB555_TWO(dst, src);
  1019 				src += 2;
  1020 				dst += 2;
  1021 				RGB888_RGB555_TWO(dst, src);
  1022 				src += 2;
  1023 				dst += 2;
  1024 			}
  1025 			/* Get any leftovers */
  1026 			switch (width & 3) {
  1027 				case 3:
  1028 					RGB888_RGB555(dst, src);
  1029 					++src;
  1030 					++dst;
  1031 				case 2:
  1032 					RGB888_RGB555_TWO(dst, src);
  1033 					src += 2;
  1034 					dst += 2;
  1035 					break;
  1036 				case 1:
  1037 					RGB888_RGB555(dst, src);
  1038 					++src;
  1039 					++dst;
  1040 					break;
  1041 			}
  1042 			src += srcskip;
  1043 			dst += dstskip;
  1044 		}
  1045 	} else { 
  1046 		while ( height-- ) {
  1047 			/* Copy in 4 pixel chunks */
  1048 			for ( c=width/4; c; --c ) {
  1049 				RGB888_RGB555_TWO(dst, src);
  1050 				src += 2;
  1051 				dst += 2;
  1052 				RGB888_RGB555_TWO(dst, src);
  1053 				src += 2;
  1054 				dst += 2;
  1055 			}
  1056 			/* Get any leftovers */
  1057 			switch (width & 3) {
  1058 				case 3:
  1059 					RGB888_RGB555(dst, src);
  1060 					++src;
  1061 					++dst;
  1062 				case 2:
  1063 					RGB888_RGB555_TWO(dst, src);
  1064 					src += 2;
  1065 					dst += 2;
  1066 					break;
  1067 				case 1:
  1068 					RGB888_RGB555(dst, src);
  1069 					++src;
  1070 					++dst;
  1071 					break;
  1072 			}
  1073 			src += srcskip;
  1074 			dst += dstskip;
  1075 		}
  1076 	}
  1077 #endif /* USE_DUFFS_LOOP */
  1078 }
  1079 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1080 #define RGB888_RGB565(dst, src) { \
  1081 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>8)| \
  1082 	                   (((*src)&0x0000FC00)>>5)| \
  1083 	                   (((*src)&0x000000F8)>>3); \
  1084 }
  1085 #define RGB888_RGB565_TWO(dst, src) { \
  1086 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1087 	                     (((src[HI])&0x0000FC00)>>5)| \
  1088 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1089 	                     (((src[LO])&0x00F80000)>>8)| \
  1090 	                     (((src[LO])&0x0000FC00)>>5)| \
  1091 	                     (((src[LO])&0x000000F8)>>3); \
  1092 }
  1093 static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
  1094 {
  1095 #ifndef USE_DUFFS_LOOP
  1096 	int c;
  1097 #endif
  1098 	int width, height;
  1099 	Uint32 *src;
  1100 	Uint16 *dst;
  1101 	int srcskip, dstskip;
  1102 
  1103 	/* Set up some basic variables */
  1104 	width = info->d_width;
  1105 	height = info->d_height;
  1106 	src = (Uint32 *)info->s_pixels;
  1107 	srcskip = info->s_skip/4;
  1108 	dst = (Uint16 *)info->d_pixels;
  1109 	dstskip = info->d_skip/2;
  1110 
  1111 #ifdef USE_DUFFS_LOOP
  1112 	while ( height-- ) {
  1113 		DUFFS_LOOP(
  1114 			RGB888_RGB565(dst, src);
  1115 			++src;
  1116 			++dst;
  1117 		, width);
  1118 		src += srcskip;
  1119 		dst += dstskip;
  1120 	}
  1121 #else
  1122 	/* Memory align at 4-byte boundary, if necessary */
  1123 	if ( (long)dst & 0x03 ) {
  1124 		/* Don't do anything if width is 0 */
  1125 		if ( width == 0 ) {
  1126 			return;
  1127 		}
  1128 		--width;
  1129 
  1130 		while ( height-- ) {
  1131 			/* Perform copy alignment */
  1132 			RGB888_RGB565(dst, src);
  1133 			++src;
  1134 			++dst;
  1135 
  1136 			/* Copy in 4 pixel chunks */
  1137 			for ( c=width/4; c; --c ) {
  1138 				RGB888_RGB565_TWO(dst, src);
  1139 				src += 2;
  1140 				dst += 2;
  1141 				RGB888_RGB565_TWO(dst, src);
  1142 				src += 2;
  1143 				dst += 2;
  1144 			}
  1145 			/* Get any leftovers */
  1146 			switch (width & 3) {
  1147 				case 3:
  1148 					RGB888_RGB565(dst, src);
  1149 					++src;
  1150 					++dst;
  1151 				case 2:
  1152 					RGB888_RGB565_TWO(dst, src);
  1153 					src += 2;
  1154 					dst += 2;
  1155 					break;
  1156 				case 1:
  1157 					RGB888_RGB565(dst, src);
  1158 					++src;
  1159 					++dst;
  1160 					break;
  1161 			}
  1162 			src += srcskip;
  1163 			dst += dstskip;
  1164 		}
  1165 	} else { 
  1166 		while ( height-- ) {
  1167 			/* Copy in 4 pixel chunks */
  1168 			for ( c=width/4; c; --c ) {
  1169 				RGB888_RGB565_TWO(dst, src);
  1170 				src += 2;
  1171 				dst += 2;
  1172 				RGB888_RGB565_TWO(dst, src);
  1173 				src += 2;
  1174 				dst += 2;
  1175 			}
  1176 			/* Get any leftovers */
  1177 			switch (width & 3) {
  1178 				case 3:
  1179 					RGB888_RGB565(dst, src);
  1180 					++src;
  1181 					++dst;
  1182 				case 2:
  1183 					RGB888_RGB565_TWO(dst, src);
  1184 					src += 2;
  1185 					dst += 2;
  1186 					break;
  1187 				case 1:
  1188 					RGB888_RGB565(dst, src);
  1189 					++src;
  1190 					++dst;
  1191 					break;
  1192 			}
  1193 			src += srcskip;
  1194 			dst += dstskip;
  1195 		}
  1196 	}
  1197 #endif /* USE_DUFFS_LOOP */
  1198 }
  1199 
  1200 #endif /* USE_ASMBLIT */
  1201 
  1202 
  1203 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1204 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1205 static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
  1206 {
  1207 #ifndef USE_DUFFS_LOOP
  1208 	int c;
  1209 #endif
  1210 	int width, height;
  1211 	Uint8 *src;
  1212 	Uint32 *dst;
  1213 	int srcskip, dstskip;
  1214 
  1215 	/* Set up some basic variables */
  1216 	width = info->d_width;
  1217 	height = info->d_height;
  1218 	src = (Uint8 *)info->s_pixels;
  1219 	srcskip = info->s_skip;
  1220 	dst = (Uint32 *)info->d_pixels;
  1221 	dstskip = info->d_skip/4;
  1222 
  1223 #ifdef USE_DUFFS_LOOP
  1224 	while ( height-- ) {
  1225 		DUFFS_LOOP(
  1226 		{
  1227 			*dst++ = RGB565_32(dst, src, map);
  1228 			src += 2;
  1229 		},
  1230 		width);
  1231 		src += srcskip;
  1232 		dst += dstskip;
  1233 	}
  1234 #else
  1235 	while ( height-- ) {
  1236 		/* Copy in 4 pixel chunks */
  1237 		for ( c=width/4; c; --c ) {
  1238 			*dst++ = RGB565_32(dst, src, map);
  1239 			src += 2;
  1240 			*dst++ = RGB565_32(dst, src, map);
  1241 			src += 2;
  1242 			*dst++ = RGB565_32(dst, src, map);
  1243 			src += 2;
  1244 			*dst++ = RGB565_32(dst, src, map);
  1245 			src += 2;
  1246 		}
  1247 		/* Get any leftovers */
  1248 		switch (width & 3) {
  1249 			case 3:
  1250 				*dst++ = RGB565_32(dst, src, map);
  1251 				src += 2;
  1252 			case 2:
  1253 				*dst++ = RGB565_32(dst, src, map);
  1254 				src += 2;
  1255 			case 1:
  1256 				*dst++ = RGB565_32(dst, src, map);
  1257 				src += 2;
  1258 				break;
  1259 		}
  1260 		src += srcskip;
  1261 		dst += dstskip;
  1262 	}
  1263 #endif /* USE_DUFFS_LOOP */
  1264 }
  1265 
  1266 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1267 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1268 		0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1269 		0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1270 		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1271 		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1272 		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1273 		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1274 		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1275 		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1276 		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1277 		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1278 		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1279 		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1280 		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1281 		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1282 		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1283 		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1284 		0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1285 		0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1286 		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1287 		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1288 		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1289 		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1290 		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1291 		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1292 		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1293 		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1294 		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1295 		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1296 		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1297 		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1298 		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1299 		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1300 		0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1301 		0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1302 		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1303 		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1304 		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1305 		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1306 		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1307 		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1308 		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1309 		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1310 		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1311 		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1312 		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1313 		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1314 		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1315 		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1316 		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1317 		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1318 		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1319 		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1320 		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1321 		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1322 		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1323 		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1324 		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1325 		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1326 		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1327 		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1328 		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1329 		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1330 		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1331 		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1332 		0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1333 		0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1334 		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1335 		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1336 		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1337 		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1338 		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1339 		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1340 		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1341 		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1342 		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1343 		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1344 		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1345 		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1346 		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1347 		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1348 		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1349 		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1350 		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1351 		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1352 		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1353 		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1354 		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1355 		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1356 		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1357 		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1358 		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1359 		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1360 		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1361 		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1362 		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1363 		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1364 		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1365 		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1366 		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1367 		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1368 		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1369 		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1370 		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1371 		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1372 		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1373 		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1374 		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1375 		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1376 		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1377 		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1378 		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1379 		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1380 		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1381 		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1382 		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1383 		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1384 		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1385 		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1386 		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1387 		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1388 		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1389 		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1390 		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1391 		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1392 		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1393 		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1394 		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1395 		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1396 };
  1397 static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
  1398 {
  1399     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1400 }
  1401 
  1402 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1403 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1404 		0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1405 		0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1406 		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1407 		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1408 		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1409 		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1410 		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1411 		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1412 		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1413 		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1414 		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1415 		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1416 		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1417 		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1418 		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1419 		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1420 		0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1421 		0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1422 		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1423 		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1424 		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1425 		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1426 		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1427 		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1428 		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1429 		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1430 		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1431 		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1432 		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1433 		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1434 		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1435 		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1436 		0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1437 		0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1438 		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1439 		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1440 		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1441 		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1442 		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1443 		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1444 		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1445 		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1446 		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1447 		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1448 		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1449 		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1450 		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1451 		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1452 		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1453 		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1454 		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1455 		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1456 		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1457 		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1458 		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1459 		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1460 		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1461 		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1462 		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1463 		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1464 		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1465 		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1466 		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1467 		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1468 		0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1469 		0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1470 		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1471 		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1472 		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1473 		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1474 		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1475 		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1476 		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1477 		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1478 		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1479 		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1480 		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1481 		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1482 		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1483 		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1484 		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1485 		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1486 		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1487 		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1488 		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1489 		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1490 		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1491 		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1492 		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1493 		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1494 		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1495 		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1496 		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1497 		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1498 		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1499 		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1500 		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1501 		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1502 		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1503 		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1504 		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1505 		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1506 		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1507 		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1508 		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1509 		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1510 		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1511 		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1512 		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1513 		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1514 		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1515 		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1516 		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1517 		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1518 		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1519 		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1520 		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1521 		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1522 		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1523 		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1524 		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1525 		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1526 		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1527 		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1528 		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1529 		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1530 		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1531 		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1532 };
  1533 static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
  1534 {
  1535     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1536 }
  1537 
  1538 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1539 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1540 		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1541 		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1542 		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1543 		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1544 		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1545 		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1546 		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1547 		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1548 		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1549 		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1550 		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1551 		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1552 		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1553 		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1554 		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1555 		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1556 		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1557 		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1558 		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1559 		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1560 		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1561 		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1562 		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1563 		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1564 		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1565 		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1566 		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1567 		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1568 		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1569 		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1570 		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1571 		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1572 		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1573 		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1574 		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1575 		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1576 		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1577 		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1578 		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1579 		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1580 		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1581 		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1582 		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1583 		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1584 		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1585 		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1586 		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1587 		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1588 		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1589 		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1590 		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1591 		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1592 		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1593 		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1594 		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1595 		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1596 		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1597 		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1598 		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1599 		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1600 		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1601 		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1602 		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1603 		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1604 		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1605 		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1606 		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1607 		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1608 		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1609 		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1610 		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1611 		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1612 		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1613 		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1614 		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1615 		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1616 		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1617 		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1618 		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1619 		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1620 		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1621 		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1622 		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1623 		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1624 		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1625 		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1626 		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1627 		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1628 		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1629 		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1630 		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1631 		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1632 		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1633 		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1634 		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1635 		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1636 		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1637 		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1638 		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1639 		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1640 		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1641 		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1642 		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1643 		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1644 		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1645 		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1646 		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1647 		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1648 		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1649 		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1650 		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1651 		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1652 		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1653 		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1654 		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1655 		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1656 		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1657 		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1658 		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1659 		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1660 		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1661 		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1662 		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1663 		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1664 		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1665 		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1666 		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1667 		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1668 };
  1669 static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
  1670 {
  1671     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1672 }
  1673 
  1674 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1675 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1676 		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1677 		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1678 		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1679 		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1680 		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1681 		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1682 		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1683 		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1684 		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1685 		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1686 		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1687 		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1688 		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1689 		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1690 		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1691 		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1692 		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1693 		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1694 		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1695 		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1696 		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1697 		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1698 		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1699 		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1700 		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1701 		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1702 		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1703 		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1704 		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1705 		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1706 		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1707 		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1708 		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1709 		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1710 		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1711 		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1712 		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1713 		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1714 		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1715 		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1716 		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1717 		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1718 		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1719 		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1720 		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1721 		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1722 		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1723 		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1724 		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1725 		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1726 		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1727 		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1728 		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1729 		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1730 		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1731 		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1732 		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1733 		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1734 		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1735 		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1736 		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1737 		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1738 		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1739 		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1740 		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1741 		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1742 		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1743 		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1744 		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1745 		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1746 		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1747 		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1748 		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1749 		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1750 		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1751 		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1752 		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1753 		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1754 		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1755 		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1756 		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1757 		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1758 		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1759 		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1760 		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1761 		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1762 		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1763 		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1764 		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1765 		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1766 		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1767 		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1768 		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1769 		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1770 		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1771 		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1772 		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1773 		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1774 		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1775 		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1776 		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1777 		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1778 		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1779 		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1780 		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1781 		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1782 		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1783 		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1784 		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1785 		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1786 		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1787 		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1788 		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1789 		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1790 		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1791 		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1792 		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1793 		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1794 		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1795 		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1796 		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1797 		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1798 		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1799 		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1800 		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1801 		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1802 		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1803 		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1804 };
  1805 static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
  1806 {
  1807     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1808 }
  1809 
  1810 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1811 #ifndef RGB888_RGB332
  1812 #define RGB888_RGB332(dst, src) { \
  1813 	dst = (((src)&0x00E00000)>>16)| \
  1814 	      (((src)&0x0000E000)>>11)| \
  1815 	      (((src)&0x000000C0)>>6); \
  1816 }
  1817 #endif
  1818 static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
  1819 {
  1820 #ifndef USE_DUFFS_LOOP
  1821 	int c;
  1822 #endif
  1823 	int pixel;
  1824 	int width, height;
  1825 	Uint32 *src;
  1826 	const Uint8 *map;
  1827 	Uint8 *dst;
  1828 	int srcskip, dstskip;
  1829 
  1830 	/* Set up some basic variables */
  1831 	width = info->d_width;
  1832 	height = info->d_height;
  1833 	src = (Uint32 *)info->s_pixels;
  1834 	srcskip = info->s_skip/4;
  1835 	dst = info->d_pixels;
  1836 	dstskip = info->d_skip;
  1837 	map = info->table;
  1838 
  1839 #ifdef USE_DUFFS_LOOP
  1840 	while ( height-- ) {
  1841 		DUFFS_LOOP(
  1842 			RGB888_RGB332(pixel, *src);
  1843 			*dst++ = map[pixel];
  1844 			++src;
  1845 		, width);
  1846 		src += srcskip;
  1847 		dst += dstskip;
  1848 	}
  1849 #else
  1850 	while ( height-- ) {
  1851 		for ( c=width/4; c; --c ) {
  1852 			/* Pack RGB into 8bit pixel */
  1853 			RGB888_RGB332(pixel, *src);
  1854 			*dst++ = map[pixel];
  1855 			++src;
  1856 			RGB888_RGB332(pixel, *src);
  1857 			*dst++ = map[pixel];
  1858 			++src;
  1859 			RGB888_RGB332(pixel, *src);
  1860 			*dst++ = map[pixel];
  1861 			++src;
  1862 			RGB888_RGB332(pixel, *src);
  1863 			*dst++ = map[pixel];
  1864 			++src;
  1865 		}
  1866 		switch ( width & 3 ) {
  1867 			case 3:
  1868 				RGB888_RGB332(pixel, *src);
  1869 				*dst++ = map[pixel];
  1870 				++src;
  1871 			case 2:
  1872 				RGB888_RGB332(pixel, *src);
  1873 				*dst++ = map[pixel];
  1874 				++src;
  1875 			case 1:
  1876 				RGB888_RGB332(pixel, *src);
  1877 				*dst++ = map[pixel];
  1878 				++src;
  1879 		}
  1880 		src += srcskip;
  1881 		dst += dstskip;
  1882 	}
  1883 #endif /* USE_DUFFS_LOOP */
  1884 }
  1885 static void BlitNto1(SDL_BlitInfo *info)
  1886 {
  1887 #ifndef USE_DUFFS_LOOP
  1888 	int c;
  1889 #endif
  1890 	int width, height;
  1891 	Uint8 *src;
  1892 	const Uint8 *map;
  1893 	Uint8 *dst;
  1894 	int srcskip, dstskip;
  1895 	int srcbpp;
  1896 	Uint32 pixel;
  1897 	int  sR, sG, sB;
  1898 	SDL_PixelFormat *srcfmt;
  1899 
  1900 	/* Set up some basic variables */
  1901 	width = info->d_width;
  1902 	height = info->d_height;
  1903 	src = info->s_pixels;
  1904 	srcskip = info->s_skip;
  1905 	dst = info->d_pixels;
  1906 	dstskip = info->d_skip;
  1907 	map = info->table;
  1908 	srcfmt = info->src;
  1909 	srcbpp = srcfmt->BytesPerPixel;
  1910 
  1911 	if ( map == NULL ) {
  1912 		while ( height-- ) {
  1913 #ifdef USE_DUFFS_LOOP
  1914 			DUFFS_LOOP(
  1915 				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
  1916 								sR, sG, sB);
  1917 				if ( 1 ) {
  1918 				  	/* Pack RGB into 8bit pixel */
  1919 				  	*dst = ((sR>>5)<<(3+2))|
  1920 					        ((sG>>5)<<(2)) |
  1921 					        ((sB>>6)<<(0)) ;
  1922 				}
  1923 				dst++;
  1924 				src += srcbpp;
  1925 			, width);
  1926 #else
  1927 			for ( c=width; c; --c ) {
  1928 				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
  1929 								sR, sG, sB);
  1930 				if ( 1 ) {
  1931 				  	/* Pack RGB into 8bit pixel */
  1932 				  	*dst = ((sR>>5)<<(3+2))|
  1933 					        ((sG>>5)<<(2)) |
  1934 					        ((sB>>6)<<(0)) ;
  1935 				}
  1936 				dst++;
  1937 				src += srcbpp;
  1938 			}
  1939 #endif
  1940 			src += srcskip;
  1941 			dst += dstskip;
  1942 		}
  1943 	} else {
  1944 		while ( height-- ) {
  1945 #ifdef USE_DUFFS_LOOP
  1946 			DUFFS_LOOP(
  1947 				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
  1948 								sR, sG, sB);
  1949 				if ( 1 ) {
  1950 				  	/* Pack RGB into 8bit pixel */
  1951 				  	*dst = map[((sR>>5)<<(3+2))|
  1952 						   ((sG>>5)<<(2))  |
  1953 						   ((sB>>6)<<(0))  ];
  1954 				}
  1955 				dst++;
  1956 				src += srcbpp;
  1957 			, width);
  1958 #else
  1959 			for ( c=width; c; --c ) {
  1960 				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
  1961 								sR, sG, sB);
  1962 				if ( 1 ) {
  1963 				  	/* Pack RGB into 8bit pixel */
  1964 				  	*dst = map[((sR>>5)<<(3+2))|
  1965 						   ((sG>>5)<<(2))  |
  1966 						   ((sB>>6)<<(0))  ];
  1967 				}
  1968 				dst++;
  1969 				src += srcbpp;
  1970 			}
  1971 #endif /* USE_DUFFS_LOOP */
  1972 			src += srcskip;
  1973 			dst += dstskip;
  1974 		}
  1975 	}
  1976 }
  1977 static void BlitNtoN(SDL_BlitInfo *info)
  1978 {
  1979 	int width = info->d_width;
  1980 	int height = info->d_height;
  1981 	Uint8 *src = info->s_pixels;
  1982 	int srcskip = info->s_skip;
  1983 	Uint8 *dst = info->d_pixels;
  1984 	int dstskip = info->d_skip;
  1985 	SDL_PixelFormat *srcfmt = info->src;
  1986 	int srcbpp = srcfmt->BytesPerPixel;
  1987 	SDL_PixelFormat *dstfmt = info->dst;
  1988 	int dstbpp = dstfmt->BytesPerPixel;
  1989 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  1990 
  1991 	while ( height-- ) {
  1992 		DUFFS_LOOP(
  1993 		{
  1994 		        Uint32 pixel;
  1995 			unsigned sR;
  1996 			unsigned sG;
  1997 			unsigned sB;
  1998 			DISEMBLE_RGB(src, srcbpp, srcfmt, pixel, sR, sG, sB);
  1999 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2000 			dst += dstbpp;
  2001 			src += srcbpp;
  2002 		},
  2003 		width);
  2004 		src += srcskip;
  2005 		dst += dstskip;
  2006 	}
  2007 }
  2008 
  2009 static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
  2010 {
  2011 	int width = info->d_width;
  2012 	int height = info->d_height;
  2013 	Uint8 *src = info->s_pixels;
  2014 	int srcskip = info->s_skip;
  2015 	Uint8 *dst = info->d_pixels;
  2016 	int dstskip = info->d_skip;
  2017 	SDL_PixelFormat *srcfmt = info->src;
  2018 	int srcbpp = srcfmt->BytesPerPixel;
  2019 	SDL_PixelFormat *dstfmt = info->dst;
  2020 	int dstbpp = dstfmt->BytesPerPixel;
  2021 	int c;
  2022 
  2023 	/* FIXME: should map alpha to [0..255] correctly! */
  2024 	while ( height-- ) {
  2025 		for ( c=width; c; --c ) {
  2026 		        Uint32 pixel;
  2027 			unsigned sR, sG, sB, sA;
  2028 			DISEMBLE_RGBA(src, srcbpp, srcfmt, pixel,
  2029 				      sR, sG, sB, sA);
  2030 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2031 				      sR, sG, sB, sA);
  2032 			dst += dstbpp;
  2033 			src += srcbpp;
  2034 		}
  2035 		src += srcskip;
  2036 		dst += dstskip;
  2037 	}
  2038 }
  2039 
  2040 static void BlitNto1Key(SDL_BlitInfo *info)
  2041 {
  2042 	int width = info->d_width;
  2043 	int height = info->d_height;
  2044 	Uint8 *src = info->s_pixels;
  2045 	int srcskip = info->s_skip;
  2046 	Uint8 *dst = info->d_pixels;
  2047 	int dstskip = info->d_skip;
  2048 	SDL_PixelFormat *srcfmt = info->src;
  2049 	const Uint8 *palmap = info->table;
  2050 	Uint32 ckey = srcfmt->colorkey;
  2051 	Uint32 rgbmask = ~srcfmt->Amask;
  2052 	int srcbpp;
  2053 	Uint32 pixel;
  2054 	Uint8  sR, sG, sB;
  2055 
  2056 	/* Set up some basic variables */
  2057 	srcbpp = srcfmt->BytesPerPixel;
  2058 	ckey &= rgbmask;
  2059 
  2060 	if ( palmap == NULL ) {
  2061 		while ( height-- ) {
  2062 			DUFFS_LOOP(
  2063 			{
  2064 				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
  2065 								sR, sG, sB);
  2066 				if ( (pixel & rgbmask) != ckey ) {
  2067 				  	/* Pack RGB into 8bit pixel */
  2068 				  	*dst = ((sR>>5)<<(3+2))|
  2069 						((sG>>5)<<(2)) |
  2070 						((sB>>6)<<(0)) ;
  2071 				}
  2072 				dst++;
  2073 				src += srcbpp;
  2074 			},
  2075 			width);
  2076 			src += srcskip;
  2077 			dst += dstskip;
  2078 		}
  2079 	} else {
  2080 		while ( height-- ) {
  2081 			DUFFS_LOOP(
  2082 			{
  2083 				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
  2084 								sR, sG, sB);
  2085 				if ( (pixel & rgbmask) != ckey ) {
  2086 				  	/* Pack RGB into 8bit pixel */
  2087 				  	*dst = palmap[((sR>>5)<<(3+2))|
  2088 							((sG>>5)<<(2))  |
  2089 							((sB>>6)<<(0))  ];
  2090 				}
  2091 				dst++;
  2092 				src += srcbpp;
  2093 			},
  2094 			width);
  2095 			src += srcskip;
  2096 			dst += dstskip;
  2097 		}
  2098 	}
  2099 }
  2100 
  2101 static void Blit2to2Key(SDL_BlitInfo *info)
  2102 {
  2103 	int width = info->d_width;
  2104 	int height = info->d_height;
  2105 	Uint16 *srcp = (Uint16 *)info->s_pixels;
  2106 	int srcskip = info->s_skip;
  2107 	Uint16 *dstp = (Uint16 *)info->d_pixels;
  2108 	int dstskip = info->d_skip;
  2109 	Uint32 ckey = info->src->colorkey;
  2110 	Uint32 rgbmask = ~info->src->Amask;
  2111 
  2112 	/* Set up some basic variables */
  2113         srcskip /= 2;
  2114         dstskip /= 2;
  2115 	ckey &= rgbmask;
  2116 
  2117 	while ( height-- ) {
  2118 		DUFFS_LOOP(
  2119 		{
  2120 			if ( (*srcp & rgbmask) != ckey ) {
  2121 				*dstp = *srcp;
  2122 			}
  2123 			dstp++;
  2124 			srcp++;
  2125 		},
  2126 		width);
  2127 		srcp += srcskip;
  2128 		dstp += dstskip;
  2129 	}
  2130 }
  2131 
  2132 static void BlitNtoNKey(SDL_BlitInfo *info)
  2133 {
  2134 	int width = info->d_width;
  2135 	int height = info->d_height;
  2136 	Uint8 *src = info->s_pixels;
  2137 	int srcskip = info->s_skip;
  2138 	Uint8 *dst = info->d_pixels;
  2139 	int dstskip = info->d_skip;
  2140 	Uint32 ckey = info->src->colorkey;
  2141 	SDL_PixelFormat *srcfmt = info->src;
  2142 	SDL_PixelFormat *dstfmt = info->dst;
  2143 	int srcbpp = srcfmt->BytesPerPixel;
  2144 	int dstbpp = dstfmt->BytesPerPixel;
  2145 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2146 
  2147 	while ( height-- ) {
  2148 		DUFFS_LOOP(
  2149 		{
  2150 		        Uint32 pixel;
  2151 			unsigned sR;
  2152 			unsigned sG;
  2153 			unsigned sB;
  2154 			RETRIEVE_RGB_PIXEL(src, srcbpp, pixel);
  2155 			if ( pixel != ckey ) {
  2156 			        RGB_FROM_PIXEL(pixel, srcfmt, sR, sG, sB);
  2157 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2158 					      sR, sG, sB, alpha);
  2159 			}
  2160 			dst += dstbpp;
  2161 			src += srcbpp;
  2162 		},
  2163 		width);
  2164 		src += srcskip;
  2165 		dst += dstskip;
  2166 	}
  2167 }
  2168 
  2169 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
  2170 {
  2171 	int width = info->d_width;
  2172 	int height = info->d_height;
  2173 	Uint8 *src = info->s_pixels;
  2174 	int srcskip = info->s_skip;
  2175 	Uint8 *dst = info->d_pixels;
  2176 	int dstskip = info->d_skip;
  2177 	Uint32 ckey = info->src->colorkey;
  2178 	SDL_PixelFormat *srcfmt = info->src;
  2179 	SDL_PixelFormat *dstfmt = info->dst;
  2180 	Uint32 rgbmask = ~srcfmt->Amask;
  2181 
  2182 	Uint8 srcbpp;
  2183 	Uint8 dstbpp;
  2184 	Uint32 pixel;
  2185 	Uint8  sR, sG, sB, sA;
  2186 
  2187 	/* Set up some basic variables */
  2188 	srcbpp = srcfmt->BytesPerPixel;
  2189 	dstbpp = dstfmt->BytesPerPixel;
  2190 	ckey &= rgbmask;
  2191 
  2192 	/* FIXME: should map alpha to [0..255] correctly! */
  2193 	while ( height-- ) {
  2194 		DUFFS_LOOP(
  2195 		{
  2196 			DISEMBLE_RGBA(src, srcbpp, srcfmt, pixel,
  2197 				      sR, sG, sB, sA);
  2198 			if ( (pixel & rgbmask) != ckey ) {
  2199 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2200 						sR, sG, sB, sA);
  2201 			}
  2202 			dst += dstbpp;
  2203 			src += srcbpp;
  2204 		},
  2205 		width);
  2206 		src += srcskip;
  2207 		dst += dstskip;
  2208 	}
  2209 }
  2210 
  2211 /* Normal N to N optimized blitters */
  2212 struct blit_table {
  2213 	Uint32 srcR, srcG, srcB;
  2214 	int dstbpp;
  2215 	Uint32 dstR, dstG, dstB;
  2216 	Uint32 blit_features;
  2217 	void *aux_data;
  2218 	SDL_loblit blitfunc;
  2219 	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
  2220 };
  2221 static const struct blit_table normal_blit_1[] = {
  2222 	/* Default for 8-bit RGB source, an invalid combination */
  2223 	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
  2224 };
  2225 static const struct blit_table normal_blit_2[] = {
  2226 #ifdef USE_ASMBLIT
  2227     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
  2228       0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
  2229     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
  2230       0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
  2231     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
  2232       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
  2233 #endif
  2234 #ifdef USE_ALTIVEC_BLITTERS
  2235     /* has-altivec */
  2236     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2237       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2238     { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2239       2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2240 #endif
  2241     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
  2242       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
  2243     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2244       0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
  2245     { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2246       0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
  2247     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2248       0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
  2249 
  2250     /* Default for 16-bit RGB source, used if no other blitter matches */
  2251     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2252 };
  2253 static const struct blit_table normal_blit_3[] = {
  2254 	/* Default for 24-bit RGB source, never optimized */
  2255     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2256 };
  2257 static const struct blit_table normal_blit_4[] = {
  2258 #ifdef USE_ASMBLIT
  2259     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2260       1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
  2261     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2262       0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
  2263     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2264       1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
  2265     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2266       0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
  2267     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2268       1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
  2269     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2270       0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
  2271     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2272       1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
  2273     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2274       0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
  2275     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
  2276       0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
  2277     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
  2278       0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
  2279     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2280       0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
  2281     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2282       0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
  2283     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2284       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
  2285 #else
  2286 #ifdef USE_ALTIVEC_BLITTERS
  2287     /* has-altivec | dont-use-prefetch */
  2288     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2289       6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2290     /* has-altivec */
  2291     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2292       2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2293     /* has-altivec */
  2294     { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
  2295       2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
  2296 #endif
  2297     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2298       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
  2299     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2300       0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
  2301 #endif
  2302 	/* Default for 32-bit RGB source, used if no other blitter matches */
  2303 	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2304 };
  2305 static const struct blit_table *normal_blit[] = {
  2306 	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2307 };
  2308 
  2309 /* Mask matches table, or table entry is zero */
  2310 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2311 
  2312 SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
  2313 {
  2314 	struct private_swaccel *sdata;
  2315 	SDL_PixelFormat *srcfmt;
  2316 	SDL_PixelFormat *dstfmt;
  2317 	const struct blit_table *table;
  2318 	int which;
  2319 	SDL_loblit blitfun;
  2320 
  2321 	/* Set up data for choosing the blit */
  2322 	sdata = surface->map->sw_data;
  2323 	srcfmt = surface->format;
  2324 	dstfmt = surface->map->dst->format;
  2325 
  2326 	if ( blit_index & 2 ) {
  2327 	        /* alpha or alpha+colorkey */
  2328 	        return SDL_CalculateAlphaBlit(surface, blit_index);
  2329 	}
  2330 
  2331 	/* We don't support destinations less than 8-bits */
  2332 	if ( dstfmt->BitsPerPixel < 8 ) {
  2333 		return(NULL);
  2334 	}
  2335 	
  2336 	if(blit_index == 1) {
  2337 	    /* colorkey blit: Here we don't have too many options, mostly
  2338 	       because RLE is the preferred fast way to deal with this.
  2339 	       If a particular case turns out to be useful we'll add it. */
  2340 
  2341 	    if(srcfmt->BytesPerPixel == 2
  2342 	       && surface->map->identity)
  2343 		return Blit2to2Key;
  2344 	    else if(dstfmt->BytesPerPixel == 1)
  2345 		return BlitNto1Key;
  2346 	    else {
  2347 #ifdef USE_ALTIVEC_BLITTERS
  2348         if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
  2349             return Blit32to32KeyAltivec;
  2350         } else
  2351 #endif
  2352 
  2353 		if(srcfmt->Amask && dstfmt->Amask)
  2354 		    return BlitNtoNKeyCopyAlpha;
  2355 		else
  2356 		    return BlitNtoNKey;
  2357 	    }
  2358 	}
  2359 
  2360 	blitfun = NULL;
  2361 	if ( dstfmt->BitsPerPixel == 8 ) {
  2362 		/* We assume 8-bit destinations are palettized */
  2363 		if ( (srcfmt->BytesPerPixel == 4) &&
  2364 		     (srcfmt->Rmask == 0x00FF0000) &&
  2365 		     (srcfmt->Gmask == 0x0000FF00) &&
  2366 		     (srcfmt->Bmask == 0x000000FF) ) {
  2367 			if ( surface->map->table ) {
  2368 				blitfun = Blit_RGB888_index8_map;
  2369 			} else {
  2370 #ifdef USE_ASMBLIT
  2371 				sdata->aux_data = ConvertX86p32_8RGB332;
  2372 				blitfun = ConvertX86;
  2373 #else
  2374 				blitfun = Blit_RGB888_index8;
  2375 #endif
  2376 			}
  2377 		} else {
  2378 			blitfun = BlitNto1;
  2379 		}
  2380 	} else {
  2381 		/* Now the meat, choose the blitter we want */
  2382 		int a_need = 0;
  2383 		if(dstfmt->Amask)
  2384 		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2385 		table = normal_blit[srcfmt->BytesPerPixel-1];
  2386 		for ( which=0; table[which].dstbpp; ++which ) {
  2387 			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2388 			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2389 			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2390 			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2391 			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2392 			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2393 			    dstfmt->BytesPerPixel == table[which].dstbpp &&
  2394 			    (a_need & table[which].alpha) == a_need &&
  2395 			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
  2396 				break;
  2397 		}
  2398 		sdata->aux_data = table[which].aux_data;
  2399 		blitfun = table[which].blitfunc;
  2400 		if(a_need == COPY_ALPHA && blitfun == BlitNtoN)
  2401 		    blitfun = BlitNtoNCopyAlpha;
  2402 	}
  2403 
  2404 #ifdef DEBUG_ASM
  2405 #ifdef USE_ASMBLIT
  2406 	if ( blitfun == ConvertMMX )
  2407 		fprintf(stderr, "Using mmx blit\n");
  2408 	else
  2409 	if ( blitfun == ConvertX86 )
  2410 		fprintf(stderr, "Using asm blit\n");
  2411 	else
  2412 #endif
  2413 	if ( (blitfun == SDL_BlitNtoN) || (blitfun == SDL_BlitNto1) )
  2414 		fprintf(stderr, "Using C blit\n");
  2415 	else
  2416 		fprintf(stderr, "Using optimized C blit\n");
  2417 #endif /* DEBUG_ASM */
  2418 
  2419 	return(blitfun);
  2420 }