src/video/SDL_blit_N.c
author Ryan C. Gordon <icculus@icculus.org>
Fri, 06 Jan 2006 13:20:10 +0000
changeset 1234 73676c1f56ee
parent 1232 0aa0000081d5
child 1312 c9b51268668f
permissions -rw-r--r--
For sanity's sake, removed the '&' when passing copy_row array to asm.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2004 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Library General Public
     7     License as published by the Free Software Foundation; either
     8     version 2 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Library General Public License for more details.
    14 
    15     You should have received a copy of the GNU Library General Public
    16     License along with this library; if not, write to the Free
    17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 #ifdef SAVE_RCSID
    24 static char rcsid =
    25  "@(#) $Id$";
    26 #endif
    27 
    28 #include <stdio.h>
    29 
    30 #include "SDL_types.h"
    31 #include "SDL_video.h"
    32 #include "SDL_blit.h"
    33 #include "SDL_byteorder.h"
    34 #include "SDL_cpuinfo.h"
    35 
    36 /* Functions to blit from N-bit surfaces to other surfaces */
    37 
    38 #ifdef USE_ALTIVEC_BLITTERS
    39 #ifdef HAVE_ALTIVEC_H
    40 #include <altivec.h>
    41 #endif
    42 #include <assert.h>
    43 #include <stdlib.h>
    44 #ifdef MACOSX
    45 #include <sys/sysctl.h>
    46 static size_t GetL3CacheSize( void )
    47 {
    48     const char key[] = "hw.l3cachesize";
    49     u_int64_t result = 0;
    50     size_t typeSize = sizeof( result );
    51 
    52 
    53     int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
    54     if( 0 != err ) return 0;
    55 
    56     return result;
    57 }
    58 #else
    59 static size_t GetL3CacheSize( void )
    60 {
    61     /* XXX: Just guess G4 */
    62     return 2097152;
    63 }
    64 #endif /* MACOSX */
    65 
    66 #if ((defined MACOSX) && (__GNUC__ < 4))
    67     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    68         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    69     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    70         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    71 #else
    72     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    73         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    74     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    75         (vector unsigned short) { a,b,c,d,e,f,g,h }
    76 #endif
    77 
    78 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    79 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    80                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    81                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    82                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    83                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    84 
    85 #define MAKE8888(dstfmt, r, g, b, a)  \
    86     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    87       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    88       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    89       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    90 
    91 /*
    92  * Data Stream Touch...Altivec cache prefetching.
    93  *
    94  *  Don't use this on a G5...however, the speed boost is very significant
    95  *   on a G4.
    96  */
    97 #define DST_CHAN_SRC 1
    98 #define DST_CHAN_DEST 2
    99 
   100 /* macro to set DST control word value... */
   101 #define DST_CTRL(size, count, stride) \
   102     (((size) << 24) | ((count) << 16) | (stride))
   103 
   104 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
   105     ? vec_lvsl(0, src) \
   106     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   107 
   108 /* Calculate the permute vector used for 32->32 swizzling */
   109 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
   110                                   const SDL_PixelFormat *dstfmt)
   111 {
   112     /*
   113     * We have to assume that the bits that aren't used by other
   114      *  colors is alpha, and it's one complete byte, since some formats
   115      *  leave alpha with a zero mask, but we should still swizzle the bits.
   116      */
   117     /* ARGB */
   118     const static struct SDL_PixelFormat default_pixel_format = {
   119         NULL, 0, 0,
   120         0, 0, 0, 0,
   121         16, 8, 0, 24,
   122         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   123         0, 0};
   124     if (!srcfmt) {
   125         srcfmt = &default_pixel_format;
   126     }
   127     if (!dstfmt) {
   128         dstfmt = &default_pixel_format;
   129     }
   130     vector unsigned char plus = VECUINT8_LITERAL(
   131                                       0x00, 0x00, 0x00, 0x00,
   132                                       0x04, 0x04, 0x04, 0x04,
   133                                       0x08, 0x08, 0x08, 0x08,
   134                                       0x0C, 0x0C, 0x0C, 0x0C );
   135     vector unsigned char vswiz;
   136     vector unsigned int srcvec;
   137 #define RESHIFT(X) (3 - ((X) >> 3))
   138     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   139     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   140     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   141     Uint32 amask;
   142     /* Use zero for alpha if either surface doesn't have alpha */
   143     if (dstfmt->Amask) {
   144         amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
   145     } else {    
   146         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
   147     }           
   148 #undef RESHIFT  
   149     ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
   150     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
   151     return(vswiz);
   152 }
   153 
   154 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
   155 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
   156     int height = info->d_height;
   157     Uint8 *src = (Uint8 *) info->s_pixels;
   158     int srcskip = info->s_skip;
   159     Uint8 *dst = (Uint8 *) info->d_pixels;
   160     int dstskip = info->d_skip;
   161     SDL_PixelFormat *srcfmt = info->src;
   162     vector unsigned char valpha = vec_splat_u8(0);
   163     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   164     vector unsigned char vgmerge = VECUINT8_LITERAL(
   165         0x00, 0x02, 0x00, 0x06,
   166         0x00, 0x0a, 0x00, 0x0e,
   167         0x00, 0x12, 0x00, 0x16,
   168         0x00, 0x1a, 0x00, 0x1e);
   169     vector unsigned short v1 = vec_splat_u16(1);
   170     vector unsigned short v3 = vec_splat_u16(3);
   171     vector unsigned short v3f = VECUINT16_LITERAL(
   172         0x003f, 0x003f, 0x003f, 0x003f,
   173         0x003f, 0x003f, 0x003f, 0x003f);
   174     vector unsigned short vfc = VECUINT16_LITERAL(
   175         0x00fc, 0x00fc, 0x00fc, 0x00fc,
   176         0x00fc, 0x00fc, 0x00fc, 0x00fc);
   177     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
   178     vf800 = vec_sl(vf800, vec_splat_u16(8));
   179 
   180     while (height--) {
   181         vector unsigned char valigner;
   182         vector unsigned char voverflow;
   183         vector unsigned char vsrc;
   184 
   185         int width = info->d_width;
   186         int extrawidth;
   187 
   188         /* do scalar until we can align... */
   189 #define ONE_PIXEL_BLEND(condition, widthvar) \
   190         while (condition) { \
   191             Uint32 Pixel; \
   192             unsigned sR, sG, sB, sA; \
   193             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   194                           sR, sG, sB, sA); \
   195             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   196                                 ((sG << 3) & 0x000007E0) | \
   197                                 ((sB >> 3) & 0x0000001F)); \
   198             dst += 2; \
   199             src += 4; \
   200             widthvar--; \
   201         }
   202 
   203         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   204 
   205         /* After all that work, here's the vector part! */
   206         extrawidth = (width % 8);  /* trailing unaligned stores */
   207         width -= extrawidth;
   208         vsrc = vec_ld(0, src);
   209         valigner = VEC_ALIGNER(src);
   210 
   211         while (width) {
   212             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   213             vector unsigned int vsrc1, vsrc2;
   214             vector unsigned char vdst;
   215 
   216             voverflow = vec_ld(15, src);
   217             vsrc = vec_perm(vsrc, voverflow, valigner);
   218             vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   219             src += 16;
   220             vsrc = voverflow;
   221             voverflow = vec_ld(15, src);
   222             vsrc = vec_perm(vsrc, voverflow, valigner);
   223             vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   224             /* 1555 */
   225             vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
   226             vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
   227             vgpixel = vec_and(vgpixel, vfc);
   228             vgpixel = vec_sl(vgpixel, v3);
   229             vrpixel = vec_sl(vpixel, v1);
   230             vrpixel = vec_and(vrpixel, vf800);
   231             vbpixel = vec_and(vpixel, v3f);
   232             vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
   233             /* 565 */
   234             vdst = vec_or(vdst, (vector unsigned char)vbpixel);
   235             vec_st(vdst, 0, dst);
   236 
   237             width -= 8;
   238             src += 16;
   239             dst += 16;
   240             vsrc = voverflow;
   241         }
   242 
   243         assert(width == 0);
   244 
   245 
   246         /* do scalar until we can align... */
   247         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   248 #undef ONE_PIXEL_BLEND
   249 
   250         src += srcskip;  /* move to next row, accounting for pitch. */
   251         dst += dstskip;
   252     }
   253 
   254 
   255 }
   256 
   257 static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
   258     int height = info->d_height;
   259     Uint8 *src = (Uint8 *) info->s_pixels;
   260     int srcskip = info->s_skip;
   261     Uint8 *dst = (Uint8 *) info->d_pixels;
   262     int dstskip = info->d_skip;
   263     SDL_PixelFormat *srcfmt = info->src;
   264     SDL_PixelFormat *dstfmt = info->dst;
   265     unsigned alpha;
   266     vector unsigned char valpha;
   267     vector unsigned char vpermute;
   268     vector unsigned short vf800;
   269     vector unsigned int v8 = vec_splat_u32(8);
   270     vector unsigned int v16 = vec_add(v8, v8);
   271     vector unsigned short v2 = vec_splat_u16(2);
   272     vector unsigned short v3 = vec_splat_u16(3);
   273     /* 
   274         0x10 - 0x1f is the alpha
   275         0x00 - 0x0e evens are the red
   276         0x01 - 0x0f odds are zero
   277     */
   278     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   279         0x10, 0x00, 0x01, 0x01,
   280         0x10, 0x02, 0x01, 0x01,
   281         0x10, 0x04, 0x01, 0x01,
   282         0x10, 0x06, 0x01, 0x01
   283     );
   284     vector unsigned char vredalpha2 = (vector unsigned char) (
   285         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   286     );
   287     /*
   288         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   289         0x11 - 0x0f odds are blue
   290     */
   291     vector unsigned char vblue1 = VECUINT8_LITERAL(
   292         0x00, 0x01, 0x02, 0x11,
   293         0x04, 0x05, 0x06, 0x13,
   294         0x08, 0x09, 0x0a, 0x15,
   295         0x0c, 0x0d, 0x0e, 0x17
   296     );
   297     vector unsigned char vblue2 = (vector unsigned char)(
   298         vec_add((vector unsigned int)vblue1, v8)
   299     );
   300     /*
   301         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   302         0x10 - 0x0e evens are green
   303     */
   304     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   305         0x00, 0x01, 0x10, 0x03,
   306         0x04, 0x05, 0x12, 0x07,
   307         0x08, 0x09, 0x14, 0x0b,
   308         0x0c, 0x0d, 0x16, 0x0f
   309     );
   310     vector unsigned char vgreen2 = (vector unsigned char)(
   311         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   312     );
   313     
   314 
   315     assert(srcfmt->BytesPerPixel == 2);
   316     assert(dstfmt->BytesPerPixel == 4);
   317 
   318     vf800 = (vector unsigned short)vec_splat_u8(-7);
   319     vf800 = vec_sl(vf800, vec_splat_u16(8));
   320 
   321     if (dstfmt->Amask && srcfmt->alpha) {
   322         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   323         valpha = vec_splat(valpha, 0);
   324     } else {
   325         alpha = 0;
   326         valpha = vec_splat_u8(0);
   327     }
   328 
   329     vpermute = calc_swizzle32(NULL, dstfmt);
   330     while (height--) {
   331         vector unsigned char valigner;
   332         vector unsigned char voverflow;
   333         vector unsigned char vsrc;
   334 
   335         int width = info->d_width;
   336         int extrawidth;
   337 
   338         /* do scalar until we can align... */
   339 #define ONE_PIXEL_BLEND(condition, widthvar) \
   340         while (condition) { \
   341             unsigned sR, sG, sB; \
   342             unsigned short Pixel = *((unsigned short *)src); \
   343             sR = (Pixel >> 8) & 0xf8; \
   344             sG = (Pixel >> 3) & 0xfc; \
   345             sB = (Pixel << 3) & 0xf8; \
   346             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   347             src += 2; \
   348             dst += 4; \
   349             widthvar--; \
   350         }
   351         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   352 
   353         /* After all that work, here's the vector part! */
   354         extrawidth = (width % 8);  /* trailing unaligned stores */
   355         width -= extrawidth;
   356         vsrc = vec_ld(0, src);
   357         valigner = VEC_ALIGNER(src);
   358 
   359         while (width) {
   360             vector unsigned short vR, vG, vB;
   361             vector unsigned char vdst1, vdst2;
   362 
   363             voverflow = vec_ld(15, src);
   364             vsrc = vec_perm(vsrc, voverflow, valigner);
   365 
   366             vR = vec_and((vector unsigned short)vsrc, vf800);
   367             vB = vec_sl((vector unsigned short)vsrc, v3);
   368             vG = vec_sl(vB, v2);
   369 
   370             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   371             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   372             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   373             vdst1 = vec_perm(vdst1, valpha, vpermute);
   374             vec_st(vdst1, 0, dst);
   375 
   376             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   377             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   378             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   379             vdst2 = vec_perm(vdst2, valpha, vpermute);
   380             vec_st(vdst2, 16, dst);
   381             
   382             width -= 8;
   383             dst += 32;
   384             src += 16;
   385             vsrc = voverflow;
   386         }
   387 
   388         assert(width == 0);
   389 
   390 
   391         /* do scalar until we can align... */
   392         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   393 #undef ONE_PIXEL_BLEND
   394 
   395         src += srcskip;  /* move to next row, accounting for pitch. */
   396         dst += dstskip;
   397     }
   398 
   399 }
   400 
   401 
   402 static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
   403     int height = info->d_height;
   404     Uint8 *src = (Uint8 *) info->s_pixels;
   405     int srcskip = info->s_skip;
   406     Uint8 *dst = (Uint8 *) info->d_pixels;
   407     int dstskip = info->d_skip;
   408     SDL_PixelFormat *srcfmt = info->src;
   409     SDL_PixelFormat *dstfmt = info->dst;
   410     unsigned alpha;
   411     vector unsigned char valpha;
   412     vector unsigned char vpermute;
   413     vector unsigned short vf800;
   414     vector unsigned int v8 = vec_splat_u32(8);
   415     vector unsigned int v16 = vec_add(v8, v8);
   416     vector unsigned short v1 = vec_splat_u16(1);
   417     vector unsigned short v3 = vec_splat_u16(3);
   418     /* 
   419         0x10 - 0x1f is the alpha
   420         0x00 - 0x0e evens are the red
   421         0x01 - 0x0f odds are zero
   422     */
   423     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   424         0x10, 0x00, 0x01, 0x01,
   425         0x10, 0x02, 0x01, 0x01,
   426         0x10, 0x04, 0x01, 0x01,
   427         0x10, 0x06, 0x01, 0x01
   428     );
   429     vector unsigned char vredalpha2 = (vector unsigned char)(
   430         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   431     );
   432     /*
   433         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   434         0x11 - 0x0f odds are blue
   435     */
   436     vector unsigned char vblue1 = VECUINT8_LITERAL(
   437         0x00, 0x01, 0x02, 0x11,
   438         0x04, 0x05, 0x06, 0x13,
   439         0x08, 0x09, 0x0a, 0x15,
   440         0x0c, 0x0d, 0x0e, 0x17
   441     );
   442     vector unsigned char vblue2 = (vector unsigned char)(
   443         vec_add((vector unsigned int)vblue1, v8)
   444     );
   445     /*
   446         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   447         0x10 - 0x0e evens are green
   448     */
   449     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   450         0x00, 0x01, 0x10, 0x03,
   451         0x04, 0x05, 0x12, 0x07,
   452         0x08, 0x09, 0x14, 0x0b,
   453         0x0c, 0x0d, 0x16, 0x0f
   454     );
   455     vector unsigned char vgreen2 = (vector unsigned char)(
   456         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   457     );
   458     
   459 
   460     assert(srcfmt->BytesPerPixel == 2);
   461     assert(dstfmt->BytesPerPixel == 4);
   462 
   463     vf800 = (vector unsigned short)vec_splat_u8(-7);
   464     vf800 = vec_sl(vf800, vec_splat_u16(8));
   465 
   466     if (dstfmt->Amask && srcfmt->alpha) {
   467         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   468         valpha = vec_splat(valpha, 0);
   469     } else {
   470         alpha = 0;
   471         valpha = vec_splat_u8(0);
   472     }
   473 
   474     vpermute = calc_swizzle32(NULL, dstfmt);
   475     while (height--) {
   476         vector unsigned char valigner;
   477         vector unsigned char voverflow;
   478         vector unsigned char vsrc;
   479 
   480         int width = info->d_width;
   481         int extrawidth;
   482 
   483         /* do scalar until we can align... */
   484 #define ONE_PIXEL_BLEND(condition, widthvar) \
   485         while (condition) { \
   486             unsigned sR, sG, sB; \
   487             unsigned short Pixel = *((unsigned short *)src); \
   488             sR = (Pixel >> 7) & 0xf8; \
   489             sG = (Pixel >> 2) & 0xf8; \
   490             sB = (Pixel << 3) & 0xf8; \
   491             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   492             src += 2; \
   493             dst += 4; \
   494             widthvar--; \
   495         }
   496         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   497 
   498         /* After all that work, here's the vector part! */
   499         extrawidth = (width % 8);  /* trailing unaligned stores */
   500         width -= extrawidth;
   501         vsrc = vec_ld(0, src);
   502         valigner = VEC_ALIGNER(src);
   503 
   504         while (width) {
   505             vector unsigned short vR, vG, vB;
   506             vector unsigned char vdst1, vdst2;
   507 
   508             voverflow = vec_ld(15, src);
   509             vsrc = vec_perm(vsrc, voverflow, valigner);
   510 
   511             vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
   512             vB = vec_sl((vector unsigned short)vsrc, v3);
   513             vG = vec_sl(vB, v3);
   514 
   515             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   516             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   517             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   518             vdst1 = vec_perm(vdst1, valpha, vpermute);
   519             vec_st(vdst1, 0, dst);
   520 
   521             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   522             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   523             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   524             vdst2 = vec_perm(vdst2, valpha, vpermute);
   525             vec_st(vdst2, 16, dst);
   526             
   527             width -= 8;
   528             dst += 32;
   529             src += 16;
   530             vsrc = voverflow;
   531         }
   532 
   533         assert(width == 0);
   534 
   535 
   536         /* do scalar until we can align... */
   537         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   538 #undef ONE_PIXEL_BLEND
   539 
   540         src += srcskip;  /* move to next row, accounting for pitch. */
   541         dst += dstskip;
   542     }
   543 
   544 }
   545 
   546 static void BlitNtoNKey(SDL_BlitInfo *info);
   547 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
   548 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
   549 {
   550     int height = info->d_height;
   551     Uint32 *srcp = (Uint32 *) info->s_pixels;
   552     int srcskip = info->s_skip;
   553     Uint32 *dstp = (Uint32 *) info->d_pixels;
   554     int dstskip = info->d_skip;
   555     SDL_PixelFormat *srcfmt = info->src;
   556     int srcbpp = srcfmt->BytesPerPixel;
   557     SDL_PixelFormat *dstfmt = info->dst;
   558     int dstbpp = dstfmt->BytesPerPixel;
   559     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   560 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   561     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   562 	Uint32 ckey = info->src->colorkey;
   563     vector unsigned int valpha;
   564     vector unsigned char vpermute;
   565     vector unsigned char vzero;
   566     vector unsigned int vckey;
   567     vector unsigned int vrgbmask;
   568     vpermute = calc_swizzle32(srcfmt, dstfmt);
   569     if (info->d_width < 16) {
   570         if(copy_alpha) {
   571             return BlitNtoNKeyCopyAlpha(info);
   572         } else {
   573             return BlitNtoNKey(info);
   574         }
   575     }
   576     vzero = vec_splat_u8(0);
   577     if (alpha) {
   578         ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
   579         valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
   580     } else {
   581         valpha = (vector unsigned int)vzero;
   582     }
   583     ckey &= rgbmask;
   584     ((unsigned int *)(char*)&vckey)[0] = ckey;
   585     vckey = vec_splat(vckey, 0);
   586     ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
   587     vrgbmask = vec_splat(vrgbmask, 0);
   588 
   589     while (height--) {
   590 #define ONE_PIXEL_BLEND(condition, widthvar) \
   591         if (copy_alpha) { \
   592             while (condition) { \
   593                 Uint32 Pixel; \
   594                 unsigned sR, sG, sB, sA; \
   595                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   596                           sR, sG, sB, sA); \
   597                 if ( (Pixel & rgbmask) != ckey ) { \
   598                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   599                             sR, sG, sB, sA); \
   600                 } \
   601                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   602                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   603                 widthvar--; \
   604             } \
   605         } else { \
   606             while (condition) { \
   607                 Uint32 Pixel; \
   608                 unsigned sR, sG, sB; \
   609                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   610                 if ( Pixel != ckey ) { \
   611                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   612                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   613                               sR, sG, sB, alpha); \
   614                 } \
   615                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   616                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   617                 widthvar--; \
   618             } \
   619         }
   620         int width = info->d_width;
   621         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   622         assert(width > 0);
   623         if (width > 0) {
   624             int extrawidth = (width % 4);
   625             vector unsigned char valigner = VEC_ALIGNER(srcp);
   626             vector unsigned int vs = vec_ld(0, srcp);
   627             width -= extrawidth;
   628             assert(width >= 4);
   629             while (width) {
   630                 vector unsigned char vsel;
   631                 vector unsigned int vd;
   632                 vector unsigned int voverflow = vec_ld(15, srcp);
   633                 /* load the source vec */
   634                 vs = vec_perm(vs, voverflow, valigner);
   635                 /* vsel is set for items that match the key */
   636                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
   637                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
   638                 /* permute the src vec to the dest format */
   639                 vs = vec_perm(vs, valpha, vpermute);
   640                 /* load the destination vec */
   641                 vd = vec_ld(0, dstp);
   642                 /* select the source and dest into vs */
   643                 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
   644                 
   645                 vec_st(vd, 0, dstp);
   646                 srcp += 4;
   647                 width -= 4;
   648                 dstp += 4;
   649                 vs = voverflow;
   650             }
   651             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   652 #undef ONE_PIXEL_BLEND
   653             srcp += srcskip >> 2;
   654             dstp += dstskip >> 2;
   655         }
   656     }
   657 }
   658 
   659 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   660 /* Use this on a G5 */
   661 static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
   662 {
   663     int height = info->d_height;
   664     Uint32 *src = (Uint32 *) info->s_pixels;
   665     int srcskip = info->s_skip;
   666     Uint32 *dst = (Uint32 *) info->d_pixels;
   667     int dstskip = info->d_skip;
   668     SDL_PixelFormat *srcfmt = info->src;
   669     int srcbpp = srcfmt->BytesPerPixel;
   670     SDL_PixelFormat *dstfmt = info->dst;
   671     int dstbpp = dstfmt->BytesPerPixel;
   672     vector unsigned int vzero = vec_splat_u32(0);
   673     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   674     if (dstfmt->Amask && !srcfmt->Amask) {
   675         if (srcfmt->alpha) {
   676             vector unsigned char valpha;
   677             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   678             vzero = (vector unsigned int)vec_splat(valpha, 0);
   679         }
   680     }
   681 
   682     assert(srcbpp == 4);
   683     assert(dstbpp == 4);
   684 
   685     while (height--) {
   686         vector unsigned char valigner;
   687         vector unsigned int vbits;
   688         vector unsigned int voverflow;
   689         Uint32 bits;
   690         Uint8 r, g, b, a;
   691 
   692         int width = info->d_width;
   693         int extrawidth;
   694 
   695         /* do scalar until we can align... */
   696         while ((UNALIGNED_PTR(dst)) && (width)) {
   697             bits = *(src++);
   698             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   699             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   700             width--;
   701         }
   702 
   703         /* After all that work, here's the vector part! */
   704         extrawidth = (width % 4);
   705         width -= extrawidth;
   706         valigner = VEC_ALIGNER(src);
   707         vbits = vec_ld(0, src);
   708 
   709        while (width) {
   710             voverflow = vec_ld(15, src);
   711             src += 4;
   712             width -= 4;
   713             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   714             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   715             vec_st(vbits, 0, dst);  /* store it back out. */
   716             dst += 4;
   717             vbits = voverflow;
   718         }
   719 
   720         assert(width == 0);
   721 
   722         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   723         while (extrawidth) {
   724             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   725             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   726             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   727             extrawidth--;
   728         }
   729 
   730         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   731         dst += dstskip >> 2;
   732     }
   733 
   734 }
   735 
   736 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   737 /* Use this on a G4 */
   738 static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
   739 {
   740     const int scalar_dst_lead = sizeof (Uint32) * 4;
   741     const int vector_dst_lead = sizeof (Uint32) * 16;
   742 
   743     int height = info->d_height;
   744     Uint32 *src = (Uint32 *) info->s_pixels;
   745     int srcskip = info->s_skip;
   746     Uint32 *dst = (Uint32 *) info->d_pixels;
   747     int dstskip = info->d_skip;
   748     SDL_PixelFormat *srcfmt = info->src;
   749     int srcbpp = srcfmt->BytesPerPixel;
   750     SDL_PixelFormat *dstfmt = info->dst;
   751     int dstbpp = dstfmt->BytesPerPixel;
   752     vector unsigned int vzero = vec_splat_u32(0);
   753     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   754     if (dstfmt->Amask && !srcfmt->Amask) {
   755         if (srcfmt->alpha) {
   756             vector unsigned char valpha;
   757             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   758             vzero = (vector unsigned int)vec_splat(valpha, 0);
   759         }
   760     }
   761 
   762     assert(srcbpp == 4);
   763     assert(dstbpp == 4);
   764 
   765     while (height--) {
   766         vector unsigned char valigner;
   767         vector unsigned int vbits;
   768         vector unsigned int voverflow;
   769         Uint32 bits;
   770         Uint8 r, g, b, a;
   771 
   772         int width = info->d_width;
   773         int extrawidth;
   774 
   775         /* do scalar until we can align... */
   776         while ((UNALIGNED_PTR(dst)) && (width)) {
   777             vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   778             vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   779             bits = *(src++);
   780             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   781             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   782             width--;
   783         }
   784 
   785         /* After all that work, here's the vector part! */
   786         extrawidth = (width % 4);
   787         width -= extrawidth;
   788         valigner = VEC_ALIGNER(src);
   789         vbits = vec_ld(0, src);
   790 
   791         while (width) {
   792             vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   793             vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   794             voverflow = vec_ld(15, src);
   795             src += 4;
   796             width -= 4;
   797             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   798             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   799             vec_st(vbits, 0, dst);  /* store it back out. */
   800             dst += 4;
   801             vbits = voverflow;
   802         }
   803         
   804         assert(width == 0);
   805 
   806         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   807         while (extrawidth) {
   808             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   809             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   810             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   811             extrawidth--;
   812         }
   813 
   814         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   815         dst += dstskip >> 2;
   816     }
   817 
   818     vec_dss(DST_CHAN_SRC);
   819     vec_dss(DST_CHAN_DEST);
   820 }
   821 
   822 static Uint32 GetBlitFeatures( void )
   823 {
   824     static Uint32 features = 0xffffffff;
   825     if (features == 0xffffffff) {
   826         /* Provide an override for testing .. */
   827         char *override = getenv("SDL_ALTIVEC_BLIT_FEATURES");
   828         if (override) {
   829             features = 0;
   830             sscanf(override, "%u", &features);
   831         } else {
   832             features = ( 0
   833                 /* Feature 1 is has-MMX */
   834                 | ((SDL_HasMMX()) ? 1 : 0)
   835                 /* Feature 2 is has-AltiVec */
   836                 | ((SDL_HasAltiVec()) ? 2 : 0)
   837                 /* Feature 4 is dont-use-prefetch */
   838                 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   839                 | ((GetL3CacheSize() == 0) ? 4 : 0)
   840             );
   841         }
   842     }
   843     return features;
   844 }
   845 #else
   846 /* Feature 1 is has-MMX */
   847 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   848 #endif
   849 
   850 /* This is now endian dependent */
   851 #if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
   852 #define HI	1
   853 #define LO	0
   854 #else /* ( SDL_BYTEORDER == SDL_BIG_ENDIAN ) */
   855 #define HI	0
   856 #define LO	1
   857 #endif
   858 
   859 #ifdef USE_ASMBLIT
   860 
   861 /* Heheheh, we coerce Hermes into using SDL blit information */
   862 #define X86_ASSEMBLER
   863 #define HermesConverterInterface	SDL_BlitInfo
   864 #define HermesClearInterface		void
   865 #define STACKCALL
   866 
   867 #include "HeadMMX.h"
   868 #include "HeadX86.h"
   869 
   870 #else
   871 
   872 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   873 #define RGB888_RGB332(dst, src) { \
   874 	dst = (((src)&0x00E00000)>>16)| \
   875 	      (((src)&0x0000E000)>>11)| \
   876 	      (((src)&0x000000C0)>>6); \
   877 }
   878 static void Blit_RGB888_index8(SDL_BlitInfo *info)
   879 {
   880 #ifndef USE_DUFFS_LOOP
   881 	int c;
   882 #endif
   883 	int width, height;
   884 	Uint32 *src;
   885 	const Uint8 *map;
   886 	Uint8 *dst;
   887 	int srcskip, dstskip;
   888 
   889 	/* Set up some basic variables */
   890 	width = info->d_width;
   891 	height = info->d_height;
   892 	src = (Uint32 *)info->s_pixels;
   893 	srcskip = info->s_skip/4;
   894 	dst = info->d_pixels;
   895 	dstskip = info->d_skip;
   896 	map = info->table;
   897 
   898 	if ( map == NULL ) {
   899 		while ( height-- ) {
   900 #ifdef USE_DUFFS_LOOP
   901 			DUFFS_LOOP(
   902 				RGB888_RGB332(*dst++, *src);
   903 			, width);
   904 #else
   905 			for ( c=width/4; c; --c ) {
   906 				/* Pack RGB into 8bit pixel */
   907 				++src;
   908 				RGB888_RGB332(*dst++, *src);
   909 				++src;
   910 				RGB888_RGB332(*dst++, *src);
   911 				++src;
   912 				RGB888_RGB332(*dst++, *src);
   913 				++src;
   914 			}
   915 			switch ( width & 3 ) {
   916 				case 3:
   917 					RGB888_RGB332(*dst++, *src);
   918 					++src;
   919 				case 2:
   920 					RGB888_RGB332(*dst++, *src);
   921 					++src;
   922 				case 1:
   923 					RGB888_RGB332(*dst++, *src);
   924 					++src;
   925 			}
   926 #endif /* USE_DUFFS_LOOP */
   927 			src += srcskip;
   928 			dst += dstskip;
   929 		}
   930 	} else {
   931 		int Pixel;
   932 
   933 		while ( height-- ) {
   934 #ifdef USE_DUFFS_LOOP
   935 			DUFFS_LOOP(
   936 				RGB888_RGB332(Pixel, *src);
   937 				*dst++ = map[Pixel];
   938 				++src;
   939 			, width);
   940 #else
   941 			for ( c=width/4; c; --c ) {
   942 				/* Pack RGB into 8bit pixel */
   943 				RGB888_RGB332(Pixel, *src);
   944 				*dst++ = map[Pixel];
   945 				++src;
   946 				RGB888_RGB332(Pixel, *src);
   947 				*dst++ = map[Pixel];
   948 				++src;
   949 				RGB888_RGB332(Pixel, *src);
   950 				*dst++ = map[Pixel];
   951 				++src;
   952 				RGB888_RGB332(Pixel, *src);
   953 				*dst++ = map[Pixel];
   954 				++src;
   955 			}
   956 			switch ( width & 3 ) {
   957 				case 3:
   958 					RGB888_RGB332(Pixel, *src);
   959 					*dst++ = map[Pixel];
   960 					++src;
   961 				case 2:
   962 					RGB888_RGB332(Pixel, *src);
   963 					*dst++ = map[Pixel];
   964 					++src;
   965 				case 1:
   966 					RGB888_RGB332(Pixel, *src);
   967 					*dst++ = map[Pixel];
   968 					++src;
   969 			}
   970 #endif /* USE_DUFFS_LOOP */
   971 			src += srcskip;
   972 			dst += dstskip;
   973 		}
   974 	}
   975 }
   976 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   977 #define RGB888_RGB555(dst, src) { \
   978 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>9)| \
   979 	                   (((*src)&0x0000F800)>>6)| \
   980 	                   (((*src)&0x000000F8)>>3); \
   981 }
   982 #define RGB888_RGB555_TWO(dst, src) { \
   983 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   984 	                     (((src[HI])&0x0000F800)>>6)| \
   985 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   986 	                     (((src[LO])&0x00F80000)>>9)| \
   987 	                     (((src[LO])&0x0000F800)>>6)| \
   988 	                     (((src[LO])&0x000000F8)>>3); \
   989 }
   990 static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
   991 {
   992 #ifndef USE_DUFFS_LOOP
   993 	int c;
   994 #endif
   995 	int width, height;
   996 	Uint32 *src;
   997 	Uint16 *dst;
   998 	int srcskip, dstskip;
   999 
  1000 	/* Set up some basic variables */
  1001 	width = info->d_width;
  1002 	height = info->d_height;
  1003 	src = (Uint32 *)info->s_pixels;
  1004 	srcskip = info->s_skip/4;
  1005 	dst = (Uint16 *)info->d_pixels;
  1006 	dstskip = info->d_skip/2;
  1007 
  1008 #ifdef USE_DUFFS_LOOP
  1009 	while ( height-- ) {
  1010 		DUFFS_LOOP(
  1011 			RGB888_RGB555(dst, src);
  1012 			++src;
  1013 			++dst;
  1014 		, width);
  1015 		src += srcskip;
  1016 		dst += dstskip;
  1017 	}
  1018 #else
  1019 	/* Memory align at 4-byte boundary, if necessary */
  1020 	if ( (long)dst & 0x03 ) {
  1021 		/* Don't do anything if width is 0 */
  1022 		if ( width == 0 ) {
  1023 			return;
  1024 		}
  1025 		--width;
  1026 
  1027 		while ( height-- ) {
  1028 			/* Perform copy alignment */
  1029 			RGB888_RGB555(dst, src);
  1030 			++src;
  1031 			++dst;
  1032 
  1033 			/* Copy in 4 pixel chunks */
  1034 			for ( c=width/4; c; --c ) {
  1035 				RGB888_RGB555_TWO(dst, src);
  1036 				src += 2;
  1037 				dst += 2;
  1038 				RGB888_RGB555_TWO(dst, src);
  1039 				src += 2;
  1040 				dst += 2;
  1041 			}
  1042 			/* Get any leftovers */
  1043 			switch (width & 3) {
  1044 				case 3:
  1045 					RGB888_RGB555(dst, src);
  1046 					++src;
  1047 					++dst;
  1048 				case 2:
  1049 					RGB888_RGB555_TWO(dst, src);
  1050 					src += 2;
  1051 					dst += 2;
  1052 					break;
  1053 				case 1:
  1054 					RGB888_RGB555(dst, src);
  1055 					++src;
  1056 					++dst;
  1057 					break;
  1058 			}
  1059 			src += srcskip;
  1060 			dst += dstskip;
  1061 		}
  1062 	} else { 
  1063 		while ( height-- ) {
  1064 			/* Copy in 4 pixel chunks */
  1065 			for ( c=width/4; c; --c ) {
  1066 				RGB888_RGB555_TWO(dst, src);
  1067 				src += 2;
  1068 				dst += 2;
  1069 				RGB888_RGB555_TWO(dst, src);
  1070 				src += 2;
  1071 				dst += 2;
  1072 			}
  1073 			/* Get any leftovers */
  1074 			switch (width & 3) {
  1075 				case 3:
  1076 					RGB888_RGB555(dst, src);
  1077 					++src;
  1078 					++dst;
  1079 				case 2:
  1080 					RGB888_RGB555_TWO(dst, src);
  1081 					src += 2;
  1082 					dst += 2;
  1083 					break;
  1084 				case 1:
  1085 					RGB888_RGB555(dst, src);
  1086 					++src;
  1087 					++dst;
  1088 					break;
  1089 			}
  1090 			src += srcskip;
  1091 			dst += dstskip;
  1092 		}
  1093 	}
  1094 #endif /* USE_DUFFS_LOOP */
  1095 }
  1096 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1097 #define RGB888_RGB565(dst, src) { \
  1098 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>8)| \
  1099 	                   (((*src)&0x0000FC00)>>5)| \
  1100 	                   (((*src)&0x000000F8)>>3); \
  1101 }
  1102 #define RGB888_RGB565_TWO(dst, src) { \
  1103 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1104 	                     (((src[HI])&0x0000FC00)>>5)| \
  1105 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1106 	                     (((src[LO])&0x00F80000)>>8)| \
  1107 	                     (((src[LO])&0x0000FC00)>>5)| \
  1108 	                     (((src[LO])&0x000000F8)>>3); \
  1109 }
  1110 static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
  1111 {
  1112 #ifndef USE_DUFFS_LOOP
  1113 	int c;
  1114 #endif
  1115 	int width, height;
  1116 	Uint32 *src;
  1117 	Uint16 *dst;
  1118 	int srcskip, dstskip;
  1119 
  1120 	/* Set up some basic variables */
  1121 	width = info->d_width;
  1122 	height = info->d_height;
  1123 	src = (Uint32 *)info->s_pixels;
  1124 	srcskip = info->s_skip/4;
  1125 	dst = (Uint16 *)info->d_pixels;
  1126 	dstskip = info->d_skip/2;
  1127 
  1128 #ifdef USE_DUFFS_LOOP
  1129 	while ( height-- ) {
  1130 		DUFFS_LOOP(
  1131 			RGB888_RGB565(dst, src);
  1132 			++src;
  1133 			++dst;
  1134 		, width);
  1135 		src += srcskip;
  1136 		dst += dstskip;
  1137 	}
  1138 #else
  1139 	/* Memory align at 4-byte boundary, if necessary */
  1140 	if ( (long)dst & 0x03 ) {
  1141 		/* Don't do anything if width is 0 */
  1142 		if ( width == 0 ) {
  1143 			return;
  1144 		}
  1145 		--width;
  1146 
  1147 		while ( height-- ) {
  1148 			/* Perform copy alignment */
  1149 			RGB888_RGB565(dst, src);
  1150 			++src;
  1151 			++dst;
  1152 
  1153 			/* Copy in 4 pixel chunks */
  1154 			for ( c=width/4; c; --c ) {
  1155 				RGB888_RGB565_TWO(dst, src);
  1156 				src += 2;
  1157 				dst += 2;
  1158 				RGB888_RGB565_TWO(dst, src);
  1159 				src += 2;
  1160 				dst += 2;
  1161 			}
  1162 			/* Get any leftovers */
  1163 			switch (width & 3) {
  1164 				case 3:
  1165 					RGB888_RGB565(dst, src);
  1166 					++src;
  1167 					++dst;
  1168 				case 2:
  1169 					RGB888_RGB565_TWO(dst, src);
  1170 					src += 2;
  1171 					dst += 2;
  1172 					break;
  1173 				case 1:
  1174 					RGB888_RGB565(dst, src);
  1175 					++src;
  1176 					++dst;
  1177 					break;
  1178 			}
  1179 			src += srcskip;
  1180 			dst += dstskip;
  1181 		}
  1182 	} else { 
  1183 		while ( height-- ) {
  1184 			/* Copy in 4 pixel chunks */
  1185 			for ( c=width/4; c; --c ) {
  1186 				RGB888_RGB565_TWO(dst, src);
  1187 				src += 2;
  1188 				dst += 2;
  1189 				RGB888_RGB565_TWO(dst, src);
  1190 				src += 2;
  1191 				dst += 2;
  1192 			}
  1193 			/* Get any leftovers */
  1194 			switch (width & 3) {
  1195 				case 3:
  1196 					RGB888_RGB565(dst, src);
  1197 					++src;
  1198 					++dst;
  1199 				case 2:
  1200 					RGB888_RGB565_TWO(dst, src);
  1201 					src += 2;
  1202 					dst += 2;
  1203 					break;
  1204 				case 1:
  1205 					RGB888_RGB565(dst, src);
  1206 					++src;
  1207 					++dst;
  1208 					break;
  1209 			}
  1210 			src += srcskip;
  1211 			dst += dstskip;
  1212 		}
  1213 	}
  1214 #endif /* USE_DUFFS_LOOP */
  1215 }
  1216 
  1217 #endif /* USE_ASMBLIT */
  1218 
  1219 
  1220 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1221 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1222 static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
  1223 {
  1224 #ifndef USE_DUFFS_LOOP
  1225 	int c;
  1226 #endif
  1227 	int width, height;
  1228 	Uint8 *src;
  1229 	Uint32 *dst;
  1230 	int srcskip, dstskip;
  1231 
  1232 	/* Set up some basic variables */
  1233 	width = info->d_width;
  1234 	height = info->d_height;
  1235 	src = (Uint8 *)info->s_pixels;
  1236 	srcskip = info->s_skip;
  1237 	dst = (Uint32 *)info->d_pixels;
  1238 	dstskip = info->d_skip/4;
  1239 
  1240 #ifdef USE_DUFFS_LOOP
  1241 	while ( height-- ) {
  1242 		DUFFS_LOOP(
  1243 		{
  1244 			*dst++ = RGB565_32(dst, src, map);
  1245 			src += 2;
  1246 		},
  1247 		width);
  1248 		src += srcskip;
  1249 		dst += dstskip;
  1250 	}
  1251 #else
  1252 	while ( height-- ) {
  1253 		/* Copy in 4 pixel chunks */
  1254 		for ( c=width/4; c; --c ) {
  1255 			*dst++ = RGB565_32(dst, src, map);
  1256 			src += 2;
  1257 			*dst++ = RGB565_32(dst, src, map);
  1258 			src += 2;
  1259 			*dst++ = RGB565_32(dst, src, map);
  1260 			src += 2;
  1261 			*dst++ = RGB565_32(dst, src, map);
  1262 			src += 2;
  1263 		}
  1264 		/* Get any leftovers */
  1265 		switch (width & 3) {
  1266 			case 3:
  1267 				*dst++ = RGB565_32(dst, src, map);
  1268 				src += 2;
  1269 			case 2:
  1270 				*dst++ = RGB565_32(dst, src, map);
  1271 				src += 2;
  1272 			case 1:
  1273 				*dst++ = RGB565_32(dst, src, map);
  1274 				src += 2;
  1275 				break;
  1276 		}
  1277 		src += srcskip;
  1278 		dst += dstskip;
  1279 	}
  1280 #endif /* USE_DUFFS_LOOP */
  1281 }
  1282 
  1283 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1284 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1285 		0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1286 		0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1287 		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1288 		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1289 		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1290 		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1291 		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1292 		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1293 		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1294 		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1295 		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1296 		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1297 		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1298 		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1299 		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1300 		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1301 		0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1302 		0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1303 		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1304 		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1305 		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1306 		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1307 		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1308 		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1309 		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1310 		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1311 		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1312 		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1313 		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1314 		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1315 		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1316 		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1317 		0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1318 		0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1319 		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1320 		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1321 		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1322 		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1323 		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1324 		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1325 		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1326 		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1327 		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1328 		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1329 		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1330 		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1331 		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1332 		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1333 		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1334 		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1335 		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1336 		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1337 		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1338 		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1339 		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1340 		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1341 		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1342 		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1343 		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1344 		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1345 		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1346 		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1347 		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1348 		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1349 		0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1350 		0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1351 		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1352 		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1353 		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1354 		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1355 		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1356 		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1357 		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1358 		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1359 		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1360 		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1361 		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1362 		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1363 		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1364 		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1365 		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1366 		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1367 		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1368 		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1369 		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1370 		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1371 		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1372 		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1373 		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1374 		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1375 		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1376 		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1377 		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1378 		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1379 		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1380 		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1381 		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1382 		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1383 		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1384 		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1385 		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1386 		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1387 		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1388 		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1389 		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1390 		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1391 		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1392 		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1393 		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1394 		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1395 		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1396 		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1397 		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1398 		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1399 		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1400 		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1401 		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1402 		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1403 		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1404 		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1405 		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1406 		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1407 		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1408 		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1409 		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1410 		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1411 		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1412 		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1413 };
  1414 static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
  1415 {
  1416     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1417 }
  1418 
  1419 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1420 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1421 		0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1422 		0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1423 		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1424 		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1425 		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1426 		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1427 		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1428 		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1429 		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1430 		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1431 		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1432 		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1433 		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1434 		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1435 		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1436 		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1437 		0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1438 		0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1439 		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1440 		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1441 		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1442 		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1443 		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1444 		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1445 		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1446 		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1447 		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1448 		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1449 		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1450 		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1451 		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1452 		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1453 		0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1454 		0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1455 		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1456 		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1457 		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1458 		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1459 		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1460 		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1461 		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1462 		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1463 		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1464 		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1465 		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1466 		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1467 		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1468 		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1469 		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1470 		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1471 		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1472 		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1473 		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1474 		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1475 		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1476 		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1477 		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1478 		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1479 		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1480 		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1481 		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1482 		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1483 		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1484 		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1485 		0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1486 		0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1487 		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1488 		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1489 		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1490 		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1491 		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1492 		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1493 		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1494 		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1495 		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1496 		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1497 		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1498 		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1499 		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1500 		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1501 		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1502 		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1503 		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1504 		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1505 		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1506 		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1507 		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1508 		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1509 		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1510 		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1511 		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1512 		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1513 		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1514 		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1515 		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1516 		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1517 		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1518 		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1519 		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1520 		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1521 		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1522 		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1523 		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1524 		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1525 		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1526 		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1527 		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1528 		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1529 		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1530 		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1531 		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1532 		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1533 		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1534 		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1535 		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1536 		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1537 		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1538 		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1539 		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1540 		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1541 		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1542 		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1543 		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1544 		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1545 		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1546 		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1547 		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1548 		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1549 };
  1550 static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
  1551 {
  1552     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1553 }
  1554 
  1555 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1556 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1557 		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1558 		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1559 		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1560 		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1561 		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1562 		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1563 		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1564 		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1565 		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1566 		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1567 		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1568 		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1569 		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1570 		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1571 		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1572 		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1573 		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1574 		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1575 		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1576 		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1577 		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1578 		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1579 		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1580 		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1581 		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1582 		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1583 		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1584 		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1585 		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1586 		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1587 		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1588 		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1589 		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1590 		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1591 		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1592 		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1593 		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1594 		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1595 		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1596 		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1597 		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1598 		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1599 		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1600 		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1601 		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1602 		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1603 		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1604 		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1605 		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1606 		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1607 		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1608 		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1609 		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1610 		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1611 		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1612 		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1613 		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1614 		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1615 		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1616 		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1617 		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1618 		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1619 		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1620 		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1621 		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1622 		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1623 		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1624 		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1625 		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1626 		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1627 		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1628 		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1629 		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1630 		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1631 		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1632 		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1633 		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1634 		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1635 		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1636 		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1637 		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1638 		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1639 		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1640 		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1641 		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1642 		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1643 		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1644 		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1645 		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1646 		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1647 		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1648 		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1649 		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1650 		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1651 		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1652 		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1653 		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1654 		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1655 		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1656 		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1657 		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1658 		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1659 		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1660 		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1661 		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1662 		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1663 		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1664 		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1665 		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1666 		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1667 		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1668 		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1669 		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1670 		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1671 		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1672 		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1673 		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1674 		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1675 		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1676 		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1677 		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1678 		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1679 		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1680 		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1681 		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1682 		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1683 		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1684 		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1685 };
  1686 static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
  1687 {
  1688     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1689 }
  1690 
  1691 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1692 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1693 		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1694 		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1695 		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1696 		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1697 		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1698 		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1699 		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1700 		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1701 		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1702 		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1703 		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1704 		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1705 		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1706 		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1707 		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1708 		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1709 		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1710 		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1711 		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1712 		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1713 		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1714 		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1715 		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1716 		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1717 		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1718 		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1719 		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1720 		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1721 		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1722 		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1723 		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1724 		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1725 		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1726 		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1727 		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1728 		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1729 		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1730 		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1731 		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1732 		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1733 		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1734 		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1735 		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1736 		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1737 		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1738 		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1739 		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1740 		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1741 		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1742 		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1743 		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1744 		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1745 		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1746 		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1747 		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1748 		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1749 		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1750 		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1751 		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1752 		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1753 		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1754 		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1755 		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1756 		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1757 		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1758 		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1759 		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1760 		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1761 		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1762 		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1763 		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1764 		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1765 		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1766 		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1767 		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1768 		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1769 		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1770 		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1771 		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1772 		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1773 		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1774 		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1775 		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1776 		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1777 		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1778 		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1779 		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1780 		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1781 		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1782 		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1783 		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1784 		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1785 		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1786 		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1787 		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1788 		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1789 		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1790 		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1791 		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1792 		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1793 		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1794 		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1795 		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1796 		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1797 		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1798 		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1799 		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1800 		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1801 		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1802 		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1803 		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1804 		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1805 		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1806 		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1807 		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1808 		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1809 		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1810 		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1811 		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1812 		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1813 		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1814 		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1815 		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1816 		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1817 		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1818 		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1819 		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1820 		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1821 };
  1822 static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
  1823 {
  1824     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1825 }
  1826 
  1827 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1828 #ifndef RGB888_RGB332
  1829 #define RGB888_RGB332(dst, src) { \
  1830 	dst = (((src)&0x00E00000)>>16)| \
  1831 	      (((src)&0x0000E000)>>11)| \
  1832 	      (((src)&0x000000C0)>>6); \
  1833 }
  1834 #endif
  1835 static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
  1836 {
  1837 #ifndef USE_DUFFS_LOOP
  1838 	int c;
  1839 #endif
  1840 	int Pixel;
  1841 	int width, height;
  1842 	Uint32 *src;
  1843 	const Uint8 *map;
  1844 	Uint8 *dst;
  1845 	int srcskip, dstskip;
  1846 
  1847 	/* Set up some basic variables */
  1848 	width = info->d_width;
  1849 	height = info->d_height;
  1850 	src = (Uint32 *)info->s_pixels;
  1851 	srcskip = info->s_skip/4;
  1852 	dst = info->d_pixels;
  1853 	dstskip = info->d_skip;
  1854 	map = info->table;
  1855 
  1856 #ifdef USE_DUFFS_LOOP
  1857 	while ( height-- ) {
  1858 		DUFFS_LOOP(
  1859 			RGB888_RGB332(Pixel, *src);
  1860 			*dst++ = map[Pixel];
  1861 			++src;
  1862 		, width);
  1863 		src += srcskip;
  1864 		dst += dstskip;
  1865 	}
  1866 #else
  1867 	while ( height-- ) {
  1868 		for ( c=width/4; c; --c ) {
  1869 			/* Pack RGB into 8bit pixel */
  1870 			RGB888_RGB332(Pixel, *src);
  1871 			*dst++ = map[Pixel];
  1872 			++src;
  1873 			RGB888_RGB332(Pixel, *src);
  1874 			*dst++ = map[Pixel];
  1875 			++src;
  1876 			RGB888_RGB332(Pixel, *src);
  1877 			*dst++ = map[Pixel];
  1878 			++src;
  1879 			RGB888_RGB332(Pixel, *src);
  1880 			*dst++ = map[Pixel];
  1881 			++src;
  1882 		}
  1883 		switch ( width & 3 ) {
  1884 			case 3:
  1885 				RGB888_RGB332(Pixel, *src);
  1886 				*dst++ = map[Pixel];
  1887 				++src;
  1888 			case 2:
  1889 				RGB888_RGB332(Pixel, *src);
  1890 				*dst++ = map[Pixel];
  1891 				++src;
  1892 			case 1:
  1893 				RGB888_RGB332(Pixel, *src);
  1894 				*dst++ = map[Pixel];
  1895 				++src;
  1896 		}
  1897 		src += srcskip;
  1898 		dst += dstskip;
  1899 	}
  1900 #endif /* USE_DUFFS_LOOP */
  1901 }
  1902 static void BlitNto1(SDL_BlitInfo *info)
  1903 {
  1904 #ifndef USE_DUFFS_LOOP
  1905 	int c;
  1906 #endif
  1907 	int width, height;
  1908 	Uint8 *src;
  1909 	const Uint8 *map;
  1910 	Uint8 *dst;
  1911 	int srcskip, dstskip;
  1912 	int srcbpp;
  1913 	Uint32 Pixel;
  1914 	int  sR, sG, sB;
  1915 	SDL_PixelFormat *srcfmt;
  1916 
  1917 	/* Set up some basic variables */
  1918 	width = info->d_width;
  1919 	height = info->d_height;
  1920 	src = info->s_pixels;
  1921 	srcskip = info->s_skip;
  1922 	dst = info->d_pixels;
  1923 	dstskip = info->d_skip;
  1924 	map = info->table;
  1925 	srcfmt = info->src;
  1926 	srcbpp = srcfmt->BytesPerPixel;
  1927 
  1928 	if ( map == NULL ) {
  1929 		while ( height-- ) {
  1930 #ifdef USE_DUFFS_LOOP
  1931 			DUFFS_LOOP(
  1932 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1933 								sR, sG, sB);
  1934 				if ( 1 ) {
  1935 				  	/* Pack RGB into 8bit pixel */
  1936 				  	*dst = ((sR>>5)<<(3+2))|
  1937 					        ((sG>>5)<<(2)) |
  1938 					        ((sB>>6)<<(0)) ;
  1939 				}
  1940 				dst++;
  1941 				src += srcbpp;
  1942 			, width);
  1943 #else
  1944 			for ( c=width; c; --c ) {
  1945 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1946 								sR, sG, sB);
  1947 				if ( 1 ) {
  1948 				  	/* Pack RGB into 8bit pixel */
  1949 				  	*dst = ((sR>>5)<<(3+2))|
  1950 					        ((sG>>5)<<(2)) |
  1951 					        ((sB>>6)<<(0)) ;
  1952 				}
  1953 				dst++;
  1954 				src += srcbpp;
  1955 			}
  1956 #endif
  1957 			src += srcskip;
  1958 			dst += dstskip;
  1959 		}
  1960 	} else {
  1961 		while ( height-- ) {
  1962 #ifdef USE_DUFFS_LOOP
  1963 			DUFFS_LOOP(
  1964 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1965 								sR, sG, sB);
  1966 				if ( 1 ) {
  1967 				  	/* Pack RGB into 8bit pixel */
  1968 				  	*dst = map[((sR>>5)<<(3+2))|
  1969 						   ((sG>>5)<<(2))  |
  1970 						   ((sB>>6)<<(0))  ];
  1971 				}
  1972 				dst++;
  1973 				src += srcbpp;
  1974 			, width);
  1975 #else
  1976 			for ( c=width; c; --c ) {
  1977 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1978 								sR, sG, sB);
  1979 				if ( 1 ) {
  1980 				  	/* Pack RGB into 8bit pixel */
  1981 				  	*dst = map[((sR>>5)<<(3+2))|
  1982 						   ((sG>>5)<<(2))  |
  1983 						   ((sB>>6)<<(0))  ];
  1984 				}
  1985 				dst++;
  1986 				src += srcbpp;
  1987 			}
  1988 #endif /* USE_DUFFS_LOOP */
  1989 			src += srcskip;
  1990 			dst += dstskip;
  1991 		}
  1992 	}
  1993 }
  1994 
  1995 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  1996 static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
  1997 {
  1998 	int width = info->d_width;
  1999 	int height = info->d_height;
  2000 	Uint32 *src = (Uint32 *)info->s_pixels;
  2001 	int srcskip = info->s_skip;
  2002 	Uint32 *dst = (Uint32 *)info->d_pixels;
  2003 	int dstskip = info->d_skip;
  2004 	SDL_PixelFormat *srcfmt = info->src;
  2005 	SDL_PixelFormat *dstfmt = info->dst;
  2006 
  2007 	if (dstfmt->Amask) {
  2008 		/* RGB->RGBA, SET_ALPHA */
  2009 		Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
  2010 
  2011 		while ( height-- ) {
  2012 			DUFFS_LOOP(
  2013 			{
  2014 				*dst = *src | mask;
  2015 				++dst;
  2016 				++src;
  2017 			},
  2018 			width);
  2019 			src = (Uint32*)((Uint8*)src + srcskip);
  2020 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2021 		}
  2022 	} else {
  2023 		/* RGBA->RGB, NO_ALPHA */
  2024 		Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2025 
  2026 		while ( height-- ) {
  2027 			DUFFS_LOOP(
  2028 			{
  2029 				*dst = *src & mask;
  2030 				++dst;
  2031 				++src;
  2032 			},
  2033 			width);
  2034 			src = (Uint32*)((Uint8*)src + srcskip);
  2035 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2036 		}
  2037 	}
  2038 }
  2039 
  2040 static void BlitNtoN(SDL_BlitInfo *info)
  2041 {
  2042 	int width = info->d_width;
  2043 	int height = info->d_height;
  2044 	Uint8 *src = info->s_pixels;
  2045 	int srcskip = info->s_skip;
  2046 	Uint8 *dst = info->d_pixels;
  2047 	int dstskip = info->d_skip;
  2048 	SDL_PixelFormat *srcfmt = info->src;
  2049 	int srcbpp = srcfmt->BytesPerPixel;
  2050 	SDL_PixelFormat *dstfmt = info->dst;
  2051 	int dstbpp = dstfmt->BytesPerPixel;
  2052 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2053 
  2054 	while ( height-- ) {
  2055 		DUFFS_LOOP(
  2056 		{
  2057 		        Uint32 Pixel;
  2058 			unsigned sR;
  2059 			unsigned sG;
  2060 			unsigned sB;
  2061 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2062 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2063 			dst += dstbpp;
  2064 			src += srcbpp;
  2065 		},
  2066 		width);
  2067 		src += srcskip;
  2068 		dst += dstskip;
  2069 	}
  2070 }
  2071 
  2072 static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
  2073 {
  2074 	int width = info->d_width;
  2075 	int height = info->d_height;
  2076 	Uint8 *src = info->s_pixels;
  2077 	int srcskip = info->s_skip;
  2078 	Uint8 *dst = info->d_pixels;
  2079 	int dstskip = info->d_skip;
  2080 	SDL_PixelFormat *srcfmt = info->src;
  2081 	int srcbpp = srcfmt->BytesPerPixel;
  2082 	SDL_PixelFormat *dstfmt = info->dst;
  2083 	int dstbpp = dstfmt->BytesPerPixel;
  2084 	int c;
  2085 
  2086 	/* FIXME: should map alpha to [0..255] correctly! */
  2087 	while ( height-- ) {
  2088 		for ( c=width; c; --c ) {
  2089 		        Uint32 Pixel;
  2090 			unsigned sR, sG, sB, sA;
  2091 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2092 				      sR, sG, sB, sA);
  2093 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2094 				      sR, sG, sB, sA);
  2095 			dst += dstbpp;
  2096 			src += srcbpp;
  2097 		}
  2098 		src += srcskip;
  2099 		dst += dstskip;
  2100 	}
  2101 }
  2102 
  2103 static void BlitNto1Key(SDL_BlitInfo *info)
  2104 {
  2105 	int width = info->d_width;
  2106 	int height = info->d_height;
  2107 	Uint8 *src = info->s_pixels;
  2108 	int srcskip = info->s_skip;
  2109 	Uint8 *dst = info->d_pixels;
  2110 	int dstskip = info->d_skip;
  2111 	SDL_PixelFormat *srcfmt = info->src;
  2112 	const Uint8 *palmap = info->table;
  2113 	Uint32 ckey = srcfmt->colorkey;
  2114 	Uint32 rgbmask = ~srcfmt->Amask;
  2115 	int srcbpp;
  2116 	Uint32 Pixel;
  2117 	Uint8  sR, sG, sB;
  2118 
  2119 	/* Set up some basic variables */
  2120 	srcbpp = srcfmt->BytesPerPixel;
  2121 	ckey &= rgbmask;
  2122 
  2123 	if ( palmap == NULL ) {
  2124 		while ( height-- ) {
  2125 			DUFFS_LOOP(
  2126 			{
  2127 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2128 								sR, sG, sB);
  2129 				if ( (Pixel & rgbmask) != ckey ) {
  2130 				  	/* Pack RGB into 8bit pixel */
  2131 				  	*dst = ((sR>>5)<<(3+2))|
  2132 						((sG>>5)<<(2)) |
  2133 						((sB>>6)<<(0)) ;
  2134 				}
  2135 				dst++;
  2136 				src += srcbpp;
  2137 			},
  2138 			width);
  2139 			src += srcskip;
  2140 			dst += dstskip;
  2141 		}
  2142 	} else {
  2143 		while ( height-- ) {
  2144 			DUFFS_LOOP(
  2145 			{
  2146 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2147 								sR, sG, sB);
  2148 				if ( (Pixel & rgbmask) != ckey ) {
  2149 				  	/* Pack RGB into 8bit pixel */
  2150 				  	*dst = palmap[((sR>>5)<<(3+2))|
  2151 							((sG>>5)<<(2))  |
  2152 							((sB>>6)<<(0))  ];
  2153 				}
  2154 				dst++;
  2155 				src += srcbpp;
  2156 			},
  2157 			width);
  2158 			src += srcskip;
  2159 			dst += dstskip;
  2160 		}
  2161 	}
  2162 }
  2163 
  2164 static void Blit2to2Key(SDL_BlitInfo *info)
  2165 {
  2166 	int width = info->d_width;
  2167 	int height = info->d_height;
  2168 	Uint16 *srcp = (Uint16 *)info->s_pixels;
  2169 	int srcskip = info->s_skip;
  2170 	Uint16 *dstp = (Uint16 *)info->d_pixels;
  2171 	int dstskip = info->d_skip;
  2172 	Uint32 ckey = info->src->colorkey;
  2173 	Uint32 rgbmask = ~info->src->Amask;
  2174 
  2175 	/* Set up some basic variables */
  2176         srcskip /= 2;
  2177         dstskip /= 2;
  2178 	ckey &= rgbmask;
  2179 
  2180 	while ( height-- ) {
  2181 		DUFFS_LOOP(
  2182 		{
  2183 			if ( (*srcp & rgbmask) != ckey ) {
  2184 				*dstp = *srcp;
  2185 			}
  2186 			dstp++;
  2187 			srcp++;
  2188 		},
  2189 		width);
  2190 		srcp += srcskip;
  2191 		dstp += dstskip;
  2192 	}
  2193 }
  2194 
  2195 static void BlitNtoNKey(SDL_BlitInfo *info)
  2196 {
  2197 	int width = info->d_width;
  2198 	int height = info->d_height;
  2199 	Uint8 *src = info->s_pixels;
  2200 	int srcskip = info->s_skip;
  2201 	Uint8 *dst = info->d_pixels;
  2202 	int dstskip = info->d_skip;
  2203 	Uint32 ckey = info->src->colorkey;
  2204 	SDL_PixelFormat *srcfmt = info->src;
  2205 	SDL_PixelFormat *dstfmt = info->dst;
  2206 	int srcbpp = srcfmt->BytesPerPixel;
  2207 	int dstbpp = dstfmt->BytesPerPixel;
  2208 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2209 
  2210 	while ( height-- ) {
  2211 		DUFFS_LOOP(
  2212 		{
  2213 		        Uint32 Pixel;
  2214 			unsigned sR;
  2215 			unsigned sG;
  2216 			unsigned sB;
  2217 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2218 			if ( Pixel != ckey ) {
  2219 			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2220 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2221 					      sR, sG, sB, alpha);
  2222 			}
  2223 			dst += dstbpp;
  2224 			src += srcbpp;
  2225 		},
  2226 		width);
  2227 		src += srcskip;
  2228 		dst += dstskip;
  2229 	}
  2230 }
  2231 
  2232 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
  2233 {
  2234 	int width = info->d_width;
  2235 	int height = info->d_height;
  2236 	Uint8 *src = info->s_pixels;
  2237 	int srcskip = info->s_skip;
  2238 	Uint8 *dst = info->d_pixels;
  2239 	int dstskip = info->d_skip;
  2240 	Uint32 ckey = info->src->colorkey;
  2241 	SDL_PixelFormat *srcfmt = info->src;
  2242 	SDL_PixelFormat *dstfmt = info->dst;
  2243 	Uint32 rgbmask = ~srcfmt->Amask;
  2244 
  2245 	Uint8 srcbpp;
  2246 	Uint8 dstbpp;
  2247 	Uint32 Pixel;
  2248 	Uint8  sR, sG, sB, sA;
  2249 
  2250 	/* Set up some basic variables */
  2251 	srcbpp = srcfmt->BytesPerPixel;
  2252 	dstbpp = dstfmt->BytesPerPixel;
  2253 	ckey &= rgbmask;
  2254 
  2255 	/* FIXME: should map alpha to [0..255] correctly! */
  2256 	while ( height-- ) {
  2257 		DUFFS_LOOP(
  2258 		{
  2259 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2260 				      sR, sG, sB, sA);
  2261 			if ( (Pixel & rgbmask) != ckey ) {
  2262 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2263 						sR, sG, sB, sA);
  2264 			}
  2265 			dst += dstbpp;
  2266 			src += srcbpp;
  2267 		},
  2268 		width);
  2269 		src += srcskip;
  2270 		dst += dstskip;
  2271 	}
  2272 }
  2273 
  2274 /* Normal N to N optimized blitters */
  2275 struct blit_table {
  2276 	Uint32 srcR, srcG, srcB;
  2277 	int dstbpp;
  2278 	Uint32 dstR, dstG, dstB;
  2279 	Uint32 blit_features;
  2280 	void *aux_data;
  2281 	SDL_loblit blitfunc;
  2282 	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
  2283 };
  2284 static const struct blit_table normal_blit_1[] = {
  2285 	/* Default for 8-bit RGB source, an invalid combination */
  2286 	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
  2287 };
  2288 static const struct blit_table normal_blit_2[] = {
  2289 #ifdef USE_ASMBLIT
  2290     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
  2291       0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
  2292     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
  2293       0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
  2294     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
  2295       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
  2296 #endif
  2297 #ifdef USE_ALTIVEC_BLITTERS
  2298     /* has-altivec */
  2299     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2300       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2301     { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2302       2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2303 #endif
  2304     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
  2305       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
  2306     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2307       0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
  2308     { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2309       0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
  2310     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2311       0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
  2312 
  2313     /* Default for 16-bit RGB source, used if no other blitter matches */
  2314     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2315 };
  2316 static const struct blit_table normal_blit_3[] = {
  2317 	/* Default for 24-bit RGB source, never optimized */
  2318     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2319 };
  2320 static const struct blit_table normal_blit_4[] = {
  2321 #ifdef USE_ASMBLIT
  2322     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2323       1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
  2324     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2325       0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
  2326     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2327       1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
  2328     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2329       0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
  2330     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2331       1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
  2332     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2333       0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
  2334     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2335       1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
  2336     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2337       0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
  2338     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
  2339       0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
  2340     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
  2341       0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
  2342     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2343       0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
  2344     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2345       0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
  2346     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2347       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
  2348 #else
  2349 #ifdef USE_ALTIVEC_BLITTERS
  2350     /* has-altivec | dont-use-prefetch */
  2351     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2352       6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2353     /* has-altivec */
  2354     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2355       2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2356     /* has-altivec */
  2357     { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
  2358       2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
  2359 #endif
  2360     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2361       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
  2362     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2363       0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
  2364 #endif
  2365 	/* Default for 32-bit RGB source, used if no other blitter matches */
  2366 	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2367 };
  2368 static const struct blit_table *normal_blit[] = {
  2369 	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2370 };
  2371 
  2372 /* Mask matches table, or table entry is zero */
  2373 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2374 
  2375 SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
  2376 {
  2377 	struct private_swaccel *sdata;
  2378 	SDL_PixelFormat *srcfmt;
  2379 	SDL_PixelFormat *dstfmt;
  2380 	const struct blit_table *table;
  2381 	int which;
  2382 	SDL_loblit blitfun;
  2383 
  2384 	/* Set up data for choosing the blit */
  2385 	sdata = surface->map->sw_data;
  2386 	srcfmt = surface->format;
  2387 	dstfmt = surface->map->dst->format;
  2388 
  2389 	if ( blit_index & 2 ) {
  2390 	        /* alpha or alpha+colorkey */
  2391 	        return SDL_CalculateAlphaBlit(surface, blit_index);
  2392 	}
  2393 
  2394 	/* We don't support destinations less than 8-bits */
  2395 	if ( dstfmt->BitsPerPixel < 8 ) {
  2396 		return(NULL);
  2397 	}
  2398 	
  2399 	if(blit_index == 1) {
  2400 	    /* colorkey blit: Here we don't have too many options, mostly
  2401 	       because RLE is the preferred fast way to deal with this.
  2402 	       If a particular case turns out to be useful we'll add it. */
  2403 
  2404 	    if(srcfmt->BytesPerPixel == 2
  2405 	       && surface->map->identity)
  2406 		return Blit2to2Key;
  2407 	    else if(dstfmt->BytesPerPixel == 1)
  2408 		return BlitNto1Key;
  2409 	    else {
  2410 #ifdef USE_ALTIVEC_BLITTERS
  2411         if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
  2412             return Blit32to32KeyAltivec;
  2413         } else
  2414 #endif
  2415 
  2416 		if(srcfmt->Amask && dstfmt->Amask)
  2417 		    return BlitNtoNKeyCopyAlpha;
  2418 		else
  2419 		    return BlitNtoNKey;
  2420 	    }
  2421 	}
  2422 
  2423 	blitfun = NULL;
  2424 	if ( dstfmt->BitsPerPixel == 8 ) {
  2425 		/* We assume 8-bit destinations are palettized */
  2426 		if ( (srcfmt->BytesPerPixel == 4) &&
  2427 		     (srcfmt->Rmask == 0x00FF0000) &&
  2428 		     (srcfmt->Gmask == 0x0000FF00) &&
  2429 		     (srcfmt->Bmask == 0x000000FF) ) {
  2430 			if ( surface->map->table ) {
  2431 				blitfun = Blit_RGB888_index8_map;
  2432 			} else {
  2433 #ifdef USE_ASMBLIT
  2434 				sdata->aux_data = ConvertX86p32_8RGB332;
  2435 				blitfun = ConvertX86;
  2436 #else
  2437 				blitfun = Blit_RGB888_index8;
  2438 #endif
  2439 			}
  2440 		} else {
  2441 			blitfun = BlitNto1;
  2442 		}
  2443 	} else {
  2444 		/* Now the meat, choose the blitter we want */
  2445 		int a_need = 0;
  2446 		if(dstfmt->Amask)
  2447 		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2448 		table = normal_blit[srcfmt->BytesPerPixel-1];
  2449 		for ( which=0; table[which].dstbpp; ++which ) {
  2450 			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2451 			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2452 			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2453 			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2454 			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2455 			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2456 			    dstfmt->BytesPerPixel == table[which].dstbpp &&
  2457 			    (a_need & table[which].alpha) == a_need &&
  2458 			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
  2459 				break;
  2460 		}
  2461 		sdata->aux_data = table[which].aux_data;
  2462 		blitfun = table[which].blitfunc;
  2463 
  2464 		if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2465 			/* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2466 			if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
  2467 			     srcfmt->Rmask == dstfmt->Rmask &&
  2468 			     srcfmt->Gmask == dstfmt->Gmask &&
  2469 			     srcfmt->Bmask == dstfmt->Bmask ) {
  2470 				blitfun = Blit4to4MaskAlpha;
  2471 			} else if ( a_need == COPY_ALPHA ) {
  2472 			    blitfun = BlitNtoNCopyAlpha;
  2473 			}
  2474 		}
  2475 	}
  2476 
  2477 #ifdef DEBUG_ASM
  2478 #ifdef USE_ASMBLIT
  2479 	if ( blitfun == ConvertMMX )
  2480 		fprintf(stderr, "Using mmx blit\n");
  2481 	else
  2482 	if ( blitfun == ConvertX86 )
  2483 		fprintf(stderr, "Using asm blit\n");
  2484 	else
  2485 #endif
  2486 	if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
  2487 		fprintf(stderr, "Using C blit\n");
  2488 	else
  2489 		fprintf(stderr, "Using optimized C blit\n");
  2490 #endif /* DEBUG_ASM */
  2491 
  2492 	return(blitfun);
  2493 }