src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Tue, 07 Feb 2006 09:29:18 +0000
changeset 1338 604d73db6802
parent 1336 3692456e7b0f
child 1339 62802d9d7c87
permissions -rw-r--r--
Removed uses of stdlib.h and string.h
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 #include "SDL_types.h"
    24 #include "SDL_video.h"
    25 #include "SDL_blit.h"
    26 #include "SDL_byteorder.h"
    27 #include "SDL_cpuinfo.h"
    28 
    29 /* Functions to blit from N-bit surfaces to other surfaces */
    30 
    31 #ifdef USE_ALTIVEC_BLITTERS
    32 #ifdef HAVE_ALTIVEC_H
    33 #include <altivec.h>
    34 #endif
    35 #define assert(X)
    36 #ifdef MACOSX
    37 #include <sys/sysctl.h>
    38 static size_t GetL3CacheSize( void )
    39 {
    40     const char key[] = "hw.l3cachesize";
    41     u_int64_t result = 0;
    42     size_t typeSize = sizeof( result );
    43 
    44 
    45     int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
    46     if( 0 != err ) return 0;
    47 
    48     return result;
    49 }
    50 #else
    51 static size_t GetL3CacheSize( void )
    52 {
    53     /* XXX: Just guess G4 */
    54     return 2097152;
    55 }
    56 #endif /* MACOSX */
    57 
    58 #if ((defined MACOSX) && (__GNUC__ < 4))
    59     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    60         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    61     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    62         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    63 #else
    64     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    65         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    66     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    67         (vector unsigned short) { a,b,c,d,e,f,g,h }
    68 #endif
    69 
    70 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    71 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    72                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    73                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    74                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    75                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    76 
    77 #define MAKE8888(dstfmt, r, g, b, a)  \
    78     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    79       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    80       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    81       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    82 
    83 /*
    84  * Data Stream Touch...Altivec cache prefetching.
    85  *
    86  *  Don't use this on a G5...however, the speed boost is very significant
    87  *   on a G4.
    88  */
    89 #define DST_CHAN_SRC 1
    90 #define DST_CHAN_DEST 2
    91 
    92 /* macro to set DST control word value... */
    93 #define DST_CTRL(size, count, stride) \
    94     (((size) << 24) | ((count) << 16) | (stride))
    95 
    96 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    97     ? vec_lvsl(0, src) \
    98     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
    99 
   100 /* Calculate the permute vector used for 32->32 swizzling */
   101 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
   102                                   const SDL_PixelFormat *dstfmt)
   103 {
   104     /*
   105     * We have to assume that the bits that aren't used by other
   106      *  colors is alpha, and it's one complete byte, since some formats
   107      *  leave alpha with a zero mask, but we should still swizzle the bits.
   108      */
   109     /* ARGB */
   110     const static struct SDL_PixelFormat default_pixel_format = {
   111         NULL, 0, 0,
   112         0, 0, 0, 0,
   113         16, 8, 0, 24,
   114         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   115         0, 0};
   116     if (!srcfmt) {
   117         srcfmt = &default_pixel_format;
   118     }
   119     if (!dstfmt) {
   120         dstfmt = &default_pixel_format;
   121     }
   122     vector unsigned char plus = VECUINT8_LITERAL(
   123                                       0x00, 0x00, 0x00, 0x00,
   124                                       0x04, 0x04, 0x04, 0x04,
   125                                       0x08, 0x08, 0x08, 0x08,
   126                                       0x0C, 0x0C, 0x0C, 0x0C );
   127     vector unsigned char vswiz;
   128     vector unsigned int srcvec;
   129 #define RESHIFT(X) (3 - ((X) >> 3))
   130     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   131     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   132     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   133     Uint32 amask;
   134     /* Use zero for alpha if either surface doesn't have alpha */
   135     if (dstfmt->Amask) {
   136         amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
   137     } else {    
   138         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
   139     }           
   140 #undef RESHIFT  
   141     ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
   142     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
   143     return(vswiz);
   144 }
   145 
   146 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
   147 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
   148     int height = info->d_height;
   149     Uint8 *src = (Uint8 *) info->s_pixels;
   150     int srcskip = info->s_skip;
   151     Uint8 *dst = (Uint8 *) info->d_pixels;
   152     int dstskip = info->d_skip;
   153     SDL_PixelFormat *srcfmt = info->src;
   154     vector unsigned char valpha = vec_splat_u8(0);
   155     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   156     vector unsigned char vgmerge = VECUINT8_LITERAL(
   157         0x00, 0x02, 0x00, 0x06,
   158         0x00, 0x0a, 0x00, 0x0e,
   159         0x00, 0x12, 0x00, 0x16,
   160         0x00, 0x1a, 0x00, 0x1e);
   161     vector unsigned short v1 = vec_splat_u16(1);
   162     vector unsigned short v3 = vec_splat_u16(3);
   163     vector unsigned short v3f = VECUINT16_LITERAL(
   164         0x003f, 0x003f, 0x003f, 0x003f,
   165         0x003f, 0x003f, 0x003f, 0x003f);
   166     vector unsigned short vfc = VECUINT16_LITERAL(
   167         0x00fc, 0x00fc, 0x00fc, 0x00fc,
   168         0x00fc, 0x00fc, 0x00fc, 0x00fc);
   169     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
   170     vf800 = vec_sl(vf800, vec_splat_u16(8));
   171 
   172     while (height--) {
   173         vector unsigned char valigner;
   174         vector unsigned char voverflow;
   175         vector unsigned char vsrc;
   176 
   177         int width = info->d_width;
   178         int extrawidth;
   179 
   180         /* do scalar until we can align... */
   181 #define ONE_PIXEL_BLEND(condition, widthvar) \
   182         while (condition) { \
   183             Uint32 Pixel; \
   184             unsigned sR, sG, sB, sA; \
   185             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   186                           sR, sG, sB, sA); \
   187             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   188                                 ((sG << 3) & 0x000007E0) | \
   189                                 ((sB >> 3) & 0x0000001F)); \
   190             dst += 2; \
   191             src += 4; \
   192             widthvar--; \
   193         }
   194 
   195         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   196 
   197         /* After all that work, here's the vector part! */
   198         extrawidth = (width % 8);  /* trailing unaligned stores */
   199         width -= extrawidth;
   200         vsrc = vec_ld(0, src);
   201         valigner = VEC_ALIGNER(src);
   202 
   203         while (width) {
   204             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   205             vector unsigned int vsrc1, vsrc2;
   206             vector unsigned char vdst;
   207 
   208             voverflow = vec_ld(15, src);
   209             vsrc = vec_perm(vsrc, voverflow, valigner);
   210             vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   211             src += 16;
   212             vsrc = voverflow;
   213             voverflow = vec_ld(15, src);
   214             vsrc = vec_perm(vsrc, voverflow, valigner);
   215             vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   216             /* 1555 */
   217             vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
   218             vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
   219             vgpixel = vec_and(vgpixel, vfc);
   220             vgpixel = vec_sl(vgpixel, v3);
   221             vrpixel = vec_sl(vpixel, v1);
   222             vrpixel = vec_and(vrpixel, vf800);
   223             vbpixel = vec_and(vpixel, v3f);
   224             vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
   225             /* 565 */
   226             vdst = vec_or(vdst, (vector unsigned char)vbpixel);
   227             vec_st(vdst, 0, dst);
   228 
   229             width -= 8;
   230             src += 16;
   231             dst += 16;
   232             vsrc = voverflow;
   233         }
   234 
   235         assert(width == 0);
   236 
   237         /* do scalar until we can align... */
   238         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   239 #undef ONE_PIXEL_BLEND
   240 
   241         src += srcskip;  /* move to next row, accounting for pitch. */
   242         dst += dstskip;
   243     }
   244 
   245 
   246 }
   247 
   248 static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
   249     int height = info->d_height;
   250     Uint8 *src = (Uint8 *) info->s_pixels;
   251     int srcskip = info->s_skip;
   252     Uint8 *dst = (Uint8 *) info->d_pixels;
   253     int dstskip = info->d_skip;
   254     SDL_PixelFormat *srcfmt = info->src;
   255     SDL_PixelFormat *dstfmt = info->dst;
   256     unsigned alpha;
   257     vector unsigned char valpha;
   258     vector unsigned char vpermute;
   259     vector unsigned short vf800;
   260     vector unsigned int v8 = vec_splat_u32(8);
   261     vector unsigned int v16 = vec_add(v8, v8);
   262     vector unsigned short v2 = vec_splat_u16(2);
   263     vector unsigned short v3 = vec_splat_u16(3);
   264     /* 
   265         0x10 - 0x1f is the alpha
   266         0x00 - 0x0e evens are the red
   267         0x01 - 0x0f odds are zero
   268     */
   269     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   270         0x10, 0x00, 0x01, 0x01,
   271         0x10, 0x02, 0x01, 0x01,
   272         0x10, 0x04, 0x01, 0x01,
   273         0x10, 0x06, 0x01, 0x01
   274     );
   275     vector unsigned char vredalpha2 = (vector unsigned char) (
   276         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   277     );
   278     /*
   279         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   280         0x11 - 0x0f odds are blue
   281     */
   282     vector unsigned char vblue1 = VECUINT8_LITERAL(
   283         0x00, 0x01, 0x02, 0x11,
   284         0x04, 0x05, 0x06, 0x13,
   285         0x08, 0x09, 0x0a, 0x15,
   286         0x0c, 0x0d, 0x0e, 0x17
   287     );
   288     vector unsigned char vblue2 = (vector unsigned char)(
   289         vec_add((vector unsigned int)vblue1, v8)
   290     );
   291     /*
   292         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   293         0x10 - 0x0e evens are green
   294     */
   295     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   296         0x00, 0x01, 0x10, 0x03,
   297         0x04, 0x05, 0x12, 0x07,
   298         0x08, 0x09, 0x14, 0x0b,
   299         0x0c, 0x0d, 0x16, 0x0f
   300     );
   301     vector unsigned char vgreen2 = (vector unsigned char)(
   302         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   303     );
   304     
   305 
   306     assert(srcfmt->BytesPerPixel == 2);
   307     assert(dstfmt->BytesPerPixel == 4);
   308 
   309     vf800 = (vector unsigned short)vec_splat_u8(-7);
   310     vf800 = vec_sl(vf800, vec_splat_u16(8));
   311 
   312     if (dstfmt->Amask && srcfmt->alpha) {
   313         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   314         valpha = vec_splat(valpha, 0);
   315     } else {
   316         alpha = 0;
   317         valpha = vec_splat_u8(0);
   318     }
   319 
   320     vpermute = calc_swizzle32(NULL, dstfmt);
   321     while (height--) {
   322         vector unsigned char valigner;
   323         vector unsigned char voverflow;
   324         vector unsigned char vsrc;
   325 
   326         int width = info->d_width;
   327         int extrawidth;
   328 
   329         /* do scalar until we can align... */
   330 #define ONE_PIXEL_BLEND(condition, widthvar) \
   331         while (condition) { \
   332             unsigned sR, sG, sB; \
   333             unsigned short Pixel = *((unsigned short *)src); \
   334             sR = (Pixel >> 8) & 0xf8; \
   335             sG = (Pixel >> 3) & 0xfc; \
   336             sB = (Pixel << 3) & 0xf8; \
   337             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   338             src += 2; \
   339             dst += 4; \
   340             widthvar--; \
   341         }
   342         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   343 
   344         /* After all that work, here's the vector part! */
   345         extrawidth = (width % 8);  /* trailing unaligned stores */
   346         width -= extrawidth;
   347         vsrc = vec_ld(0, src);
   348         valigner = VEC_ALIGNER(src);
   349 
   350         while (width) {
   351             vector unsigned short vR, vG, vB;
   352             vector unsigned char vdst1, vdst2;
   353 
   354             voverflow = vec_ld(15, src);
   355             vsrc = vec_perm(vsrc, voverflow, valigner);
   356 
   357             vR = vec_and((vector unsigned short)vsrc, vf800);
   358             vB = vec_sl((vector unsigned short)vsrc, v3);
   359             vG = vec_sl(vB, v2);
   360 
   361             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   362             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   363             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   364             vdst1 = vec_perm(vdst1, valpha, vpermute);
   365             vec_st(vdst1, 0, dst);
   366 
   367             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   368             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   369             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   370             vdst2 = vec_perm(vdst2, valpha, vpermute);
   371             vec_st(vdst2, 16, dst);
   372             
   373             width -= 8;
   374             dst += 32;
   375             src += 16;
   376             vsrc = voverflow;
   377         }
   378 
   379         assert(width == 0);
   380 
   381 
   382         /* do scalar until we can align... */
   383         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   384 #undef ONE_PIXEL_BLEND
   385 
   386         src += srcskip;  /* move to next row, accounting for pitch. */
   387         dst += dstskip;
   388     }
   389 
   390 }
   391 
   392 
   393 static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
   394     int height = info->d_height;
   395     Uint8 *src = (Uint8 *) info->s_pixels;
   396     int srcskip = info->s_skip;
   397     Uint8 *dst = (Uint8 *) info->d_pixels;
   398     int dstskip = info->d_skip;
   399     SDL_PixelFormat *srcfmt = info->src;
   400     SDL_PixelFormat *dstfmt = info->dst;
   401     unsigned alpha;
   402     vector unsigned char valpha;
   403     vector unsigned char vpermute;
   404     vector unsigned short vf800;
   405     vector unsigned int v8 = vec_splat_u32(8);
   406     vector unsigned int v16 = vec_add(v8, v8);
   407     vector unsigned short v1 = vec_splat_u16(1);
   408     vector unsigned short v3 = vec_splat_u16(3);
   409     /* 
   410         0x10 - 0x1f is the alpha
   411         0x00 - 0x0e evens are the red
   412         0x01 - 0x0f odds are zero
   413     */
   414     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   415         0x10, 0x00, 0x01, 0x01,
   416         0x10, 0x02, 0x01, 0x01,
   417         0x10, 0x04, 0x01, 0x01,
   418         0x10, 0x06, 0x01, 0x01
   419     );
   420     vector unsigned char vredalpha2 = (vector unsigned char)(
   421         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   422     );
   423     /*
   424         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   425         0x11 - 0x0f odds are blue
   426     */
   427     vector unsigned char vblue1 = VECUINT8_LITERAL(
   428         0x00, 0x01, 0x02, 0x11,
   429         0x04, 0x05, 0x06, 0x13,
   430         0x08, 0x09, 0x0a, 0x15,
   431         0x0c, 0x0d, 0x0e, 0x17
   432     );
   433     vector unsigned char vblue2 = (vector unsigned char)(
   434         vec_add((vector unsigned int)vblue1, v8)
   435     );
   436     /*
   437         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   438         0x10 - 0x0e evens are green
   439     */
   440     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   441         0x00, 0x01, 0x10, 0x03,
   442         0x04, 0x05, 0x12, 0x07,
   443         0x08, 0x09, 0x14, 0x0b,
   444         0x0c, 0x0d, 0x16, 0x0f
   445     );
   446     vector unsigned char vgreen2 = (vector unsigned char)(
   447         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   448     );
   449     
   450 
   451     assert(srcfmt->BytesPerPixel == 2);
   452     assert(dstfmt->BytesPerPixel == 4);
   453 
   454     vf800 = (vector unsigned short)vec_splat_u8(-7);
   455     vf800 = vec_sl(vf800, vec_splat_u16(8));
   456 
   457     if (dstfmt->Amask && srcfmt->alpha) {
   458         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   459         valpha = vec_splat(valpha, 0);
   460     } else {
   461         alpha = 0;
   462         valpha = vec_splat_u8(0);
   463     }
   464 
   465     vpermute = calc_swizzle32(NULL, dstfmt);
   466     while (height--) {
   467         vector unsigned char valigner;
   468         vector unsigned char voverflow;
   469         vector unsigned char vsrc;
   470 
   471         int width = info->d_width;
   472         int extrawidth;
   473 
   474         /* do scalar until we can align... */
   475 #define ONE_PIXEL_BLEND(condition, widthvar) \
   476         while (condition) { \
   477             unsigned sR, sG, sB; \
   478             unsigned short Pixel = *((unsigned short *)src); \
   479             sR = (Pixel >> 7) & 0xf8; \
   480             sG = (Pixel >> 2) & 0xf8; \
   481             sB = (Pixel << 3) & 0xf8; \
   482             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   483             src += 2; \
   484             dst += 4; \
   485             widthvar--; \
   486         }
   487         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   488 
   489         /* After all that work, here's the vector part! */
   490         extrawidth = (width % 8);  /* trailing unaligned stores */
   491         width -= extrawidth;
   492         vsrc = vec_ld(0, src);
   493         valigner = VEC_ALIGNER(src);
   494 
   495         while (width) {
   496             vector unsigned short vR, vG, vB;
   497             vector unsigned char vdst1, vdst2;
   498 
   499             voverflow = vec_ld(15, src);
   500             vsrc = vec_perm(vsrc, voverflow, valigner);
   501 
   502             vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
   503             vB = vec_sl((vector unsigned short)vsrc, v3);
   504             vG = vec_sl(vB, v3);
   505 
   506             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   507             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   508             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   509             vdst1 = vec_perm(vdst1, valpha, vpermute);
   510             vec_st(vdst1, 0, dst);
   511 
   512             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   513             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   514             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   515             vdst2 = vec_perm(vdst2, valpha, vpermute);
   516             vec_st(vdst2, 16, dst);
   517             
   518             width -= 8;
   519             dst += 32;
   520             src += 16;
   521             vsrc = voverflow;
   522         }
   523 
   524         assert(width == 0);
   525 
   526 
   527         /* do scalar until we can align... */
   528         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   529 #undef ONE_PIXEL_BLEND
   530 
   531         src += srcskip;  /* move to next row, accounting for pitch. */
   532         dst += dstskip;
   533     }
   534 
   535 }
   536 
   537 static void BlitNtoNKey(SDL_BlitInfo *info);
   538 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
   539 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
   540 {
   541     int height = info->d_height;
   542     Uint32 *srcp = (Uint32 *) info->s_pixels;
   543     int srcskip = info->s_skip;
   544     Uint32 *dstp = (Uint32 *) info->d_pixels;
   545     int dstskip = info->d_skip;
   546     SDL_PixelFormat *srcfmt = info->src;
   547     int srcbpp = srcfmt->BytesPerPixel;
   548     SDL_PixelFormat *dstfmt = info->dst;
   549     int dstbpp = dstfmt->BytesPerPixel;
   550     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   551 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   552     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   553 	Uint32 ckey = info->src->colorkey;
   554     vector unsigned int valpha;
   555     vector unsigned char vpermute;
   556     vector unsigned char vzero;
   557     vector unsigned int vckey;
   558     vector unsigned int vrgbmask;
   559     vpermute = calc_swizzle32(srcfmt, dstfmt);
   560     if (info->d_width < 16) {
   561         if(copy_alpha) {
   562             return BlitNtoNKeyCopyAlpha(info);
   563         } else {
   564             return BlitNtoNKey(info);
   565         }
   566     }
   567     vzero = vec_splat_u8(0);
   568     if (alpha) {
   569         ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
   570         valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
   571     } else {
   572         valpha = (vector unsigned int)vzero;
   573     }
   574     ckey &= rgbmask;
   575     ((unsigned int *)(char*)&vckey)[0] = ckey;
   576     vckey = vec_splat(vckey, 0);
   577     ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
   578     vrgbmask = vec_splat(vrgbmask, 0);
   579 
   580     while (height--) {
   581 #define ONE_PIXEL_BLEND(condition, widthvar) \
   582         if (copy_alpha) { \
   583             while (condition) { \
   584                 Uint32 Pixel; \
   585                 unsigned sR, sG, sB, sA; \
   586                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   587                           sR, sG, sB, sA); \
   588                 if ( (Pixel & rgbmask) != ckey ) { \
   589                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   590                             sR, sG, sB, sA); \
   591                 } \
   592                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   593                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   594                 widthvar--; \
   595             } \
   596         } else { \
   597             while (condition) { \
   598                 Uint32 Pixel; \
   599                 unsigned sR, sG, sB; \
   600                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   601                 if ( Pixel != ckey ) { \
   602                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   603                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   604                               sR, sG, sB, alpha); \
   605                 } \
   606                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   607                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   608                 widthvar--; \
   609             } \
   610         }
   611         int width = info->d_width;
   612         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   613         assert(width > 0);
   614         if (width > 0) {
   615             int extrawidth = (width % 4);
   616             vector unsigned char valigner = VEC_ALIGNER(srcp);
   617             vector unsigned int vs = vec_ld(0, srcp);
   618             width -= extrawidth;
   619             assert(width >= 4);
   620             while (width) {
   621                 vector unsigned char vsel;
   622                 vector unsigned int vd;
   623                 vector unsigned int voverflow = vec_ld(15, srcp);
   624                 /* load the source vec */
   625                 vs = vec_perm(vs, voverflow, valigner);
   626                 /* vsel is set for items that match the key */
   627                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
   628                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
   629                 /* permute the src vec to the dest format */
   630                 vs = vec_perm(vs, valpha, vpermute);
   631                 /* load the destination vec */
   632                 vd = vec_ld(0, dstp);
   633                 /* select the source and dest into vs */
   634                 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
   635                 
   636                 vec_st(vd, 0, dstp);
   637                 srcp += 4;
   638                 width -= 4;
   639                 dstp += 4;
   640                 vs = voverflow;
   641             }
   642             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   643 #undef ONE_PIXEL_BLEND
   644             srcp += srcskip >> 2;
   645             dstp += dstskip >> 2;
   646         }
   647     }
   648 }
   649 
   650 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   651 /* Use this on a G5 */
   652 static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
   653 {
   654     int height = info->d_height;
   655     Uint32 *src = (Uint32 *) info->s_pixels;
   656     int srcskip = info->s_skip;
   657     Uint32 *dst = (Uint32 *) info->d_pixels;
   658     int dstskip = info->d_skip;
   659     SDL_PixelFormat *srcfmt = info->src;
   660     int srcbpp = srcfmt->BytesPerPixel;
   661     SDL_PixelFormat *dstfmt = info->dst;
   662     int dstbpp = dstfmt->BytesPerPixel;
   663     vector unsigned int vzero = vec_splat_u32(0);
   664     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   665     if (dstfmt->Amask && !srcfmt->Amask) {
   666         if (srcfmt->alpha) {
   667             vector unsigned char valpha;
   668             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   669             vzero = (vector unsigned int)vec_splat(valpha, 0);
   670         }
   671     }
   672 
   673     assert(srcbpp == 4);
   674     assert(dstbpp == 4);
   675 
   676     while (height--) {
   677         vector unsigned char valigner;
   678         vector unsigned int vbits;
   679         vector unsigned int voverflow;
   680         Uint32 bits;
   681         Uint8 r, g, b, a;
   682 
   683         int width = info->d_width;
   684         int extrawidth;
   685 
   686         /* do scalar until we can align... */
   687         while ((UNALIGNED_PTR(dst)) && (width)) {
   688             bits = *(src++);
   689             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   690             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   691             width--;
   692         }
   693 
   694         /* After all that work, here's the vector part! */
   695         extrawidth = (width % 4);
   696         width -= extrawidth;
   697         valigner = VEC_ALIGNER(src);
   698         vbits = vec_ld(0, src);
   699 
   700        while (width) {
   701             voverflow = vec_ld(15, src);
   702             src += 4;
   703             width -= 4;
   704             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   705             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   706             vec_st(vbits, 0, dst);  /* store it back out. */
   707             dst += 4;
   708             vbits = voverflow;
   709         }
   710 
   711         assert(width == 0);
   712 
   713         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   714         while (extrawidth) {
   715             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   716             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   717             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   718             extrawidth--;
   719         }
   720 
   721         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   722         dst += dstskip >> 2;
   723     }
   724 
   725 }
   726 
   727 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   728 /* Use this on a G4 */
   729 static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
   730 {
   731     const int scalar_dst_lead = sizeof (Uint32) * 4;
   732     const int vector_dst_lead = sizeof (Uint32) * 16;
   733 
   734     int height = info->d_height;
   735     Uint32 *src = (Uint32 *) info->s_pixels;
   736     int srcskip = info->s_skip;
   737     Uint32 *dst = (Uint32 *) info->d_pixels;
   738     int dstskip = info->d_skip;
   739     SDL_PixelFormat *srcfmt = info->src;
   740     int srcbpp = srcfmt->BytesPerPixel;
   741     SDL_PixelFormat *dstfmt = info->dst;
   742     int dstbpp = dstfmt->BytesPerPixel;
   743     vector unsigned int vzero = vec_splat_u32(0);
   744     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   745     if (dstfmt->Amask && !srcfmt->Amask) {
   746         if (srcfmt->alpha) {
   747             vector unsigned char valpha;
   748             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   749             vzero = (vector unsigned int)vec_splat(valpha, 0);
   750         }
   751     }
   752 
   753     assert(srcbpp == 4);
   754     assert(dstbpp == 4);
   755 
   756     while (height--) {
   757         vector unsigned char valigner;
   758         vector unsigned int vbits;
   759         vector unsigned int voverflow;
   760         Uint32 bits;
   761         Uint8 r, g, b, a;
   762 
   763         int width = info->d_width;
   764         int extrawidth;
   765 
   766         /* do scalar until we can align... */
   767         while ((UNALIGNED_PTR(dst)) && (width)) {
   768             vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   769             vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   770             bits = *(src++);
   771             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   772             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   773             width--;
   774         }
   775 
   776         /* After all that work, here's the vector part! */
   777         extrawidth = (width % 4);
   778         width -= extrawidth;
   779         valigner = VEC_ALIGNER(src);
   780         vbits = vec_ld(0, src);
   781 
   782         while (width) {
   783             vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   784             vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   785             voverflow = vec_ld(15, src);
   786             src += 4;
   787             width -= 4;
   788             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   789             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   790             vec_st(vbits, 0, dst);  /* store it back out. */
   791             dst += 4;
   792             vbits = voverflow;
   793         }
   794         
   795         assert(width == 0);
   796 
   797         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   798         while (extrawidth) {
   799             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   800             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   801             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   802             extrawidth--;
   803         }
   804 
   805         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   806         dst += dstskip >> 2;
   807     }
   808 
   809     vec_dss(DST_CHAN_SRC);
   810     vec_dss(DST_CHAN_DEST);
   811 }
   812 
   813 static Uint32 GetBlitFeatures( void )
   814 {
   815     static Uint32 features = 0xffffffff;
   816     if (features == 0xffffffff) {
   817         /* Provide an override for testing .. */
   818         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   819         if (override) {
   820             features = 0;
   821             SDL_sscanf(override, "%u", &features);
   822         } else {
   823             features = ( 0
   824                 /* Feature 1 is has-MMX */
   825                 | ((SDL_HasMMX()) ? 1 : 0)
   826                 /* Feature 2 is has-AltiVec */
   827                 | ((SDL_HasAltiVec()) ? 2 : 0)
   828                 /* Feature 4 is dont-use-prefetch */
   829                 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   830                 | ((GetL3CacheSize() == 0) ? 4 : 0)
   831             );
   832         }
   833     }
   834     return features;
   835 }
   836 #else
   837 /* Feature 1 is has-MMX */
   838 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   839 #endif
   840 
   841 /* This is now endian dependent */
   842 #if ( SDL_BYTEORDER == SDL_LIL_ENDIAN )
   843 #define HI	1
   844 #define LO	0
   845 #else /* ( SDL_BYTEORDER == SDL_BIG_ENDIAN ) */
   846 #define HI	0
   847 #define LO	1
   848 #endif
   849 
   850 #ifdef USE_ASMBLIT
   851 
   852 /* Heheheh, we coerce Hermes into using SDL blit information */
   853 #define X86_ASSEMBLER
   854 #define HermesConverterInterface	SDL_BlitInfo
   855 #define HermesClearInterface		void
   856 #define STACKCALL
   857 
   858 #include "HeadMMX.h"
   859 #include "HeadX86.h"
   860 
   861 #else
   862 
   863 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   864 #define RGB888_RGB332(dst, src) { \
   865 	dst = (((src)&0x00E00000)>>16)| \
   866 	      (((src)&0x0000E000)>>11)| \
   867 	      (((src)&0x000000C0)>>6); \
   868 }
   869 static void Blit_RGB888_index8(SDL_BlitInfo *info)
   870 {
   871 #ifndef USE_DUFFS_LOOP
   872 	int c;
   873 #endif
   874 	int width, height;
   875 	Uint32 *src;
   876 	const Uint8 *map;
   877 	Uint8 *dst;
   878 	int srcskip, dstskip;
   879 
   880 	/* Set up some basic variables */
   881 	width = info->d_width;
   882 	height = info->d_height;
   883 	src = (Uint32 *)info->s_pixels;
   884 	srcskip = info->s_skip/4;
   885 	dst = info->d_pixels;
   886 	dstskip = info->d_skip;
   887 	map = info->table;
   888 
   889 	if ( map == NULL ) {
   890 		while ( height-- ) {
   891 #ifdef USE_DUFFS_LOOP
   892 			DUFFS_LOOP(
   893 				RGB888_RGB332(*dst++, *src);
   894 			, width);
   895 #else
   896 			for ( c=width/4; c; --c ) {
   897 				/* Pack RGB into 8bit pixel */
   898 				++src;
   899 				RGB888_RGB332(*dst++, *src);
   900 				++src;
   901 				RGB888_RGB332(*dst++, *src);
   902 				++src;
   903 				RGB888_RGB332(*dst++, *src);
   904 				++src;
   905 			}
   906 			switch ( width & 3 ) {
   907 				case 3:
   908 					RGB888_RGB332(*dst++, *src);
   909 					++src;
   910 				case 2:
   911 					RGB888_RGB332(*dst++, *src);
   912 					++src;
   913 				case 1:
   914 					RGB888_RGB332(*dst++, *src);
   915 					++src;
   916 			}
   917 #endif /* USE_DUFFS_LOOP */
   918 			src += srcskip;
   919 			dst += dstskip;
   920 		}
   921 	} else {
   922 		int Pixel;
   923 
   924 		while ( height-- ) {
   925 #ifdef USE_DUFFS_LOOP
   926 			DUFFS_LOOP(
   927 				RGB888_RGB332(Pixel, *src);
   928 				*dst++ = map[Pixel];
   929 				++src;
   930 			, width);
   931 #else
   932 			for ( c=width/4; c; --c ) {
   933 				/* Pack RGB into 8bit pixel */
   934 				RGB888_RGB332(Pixel, *src);
   935 				*dst++ = map[Pixel];
   936 				++src;
   937 				RGB888_RGB332(Pixel, *src);
   938 				*dst++ = map[Pixel];
   939 				++src;
   940 				RGB888_RGB332(Pixel, *src);
   941 				*dst++ = map[Pixel];
   942 				++src;
   943 				RGB888_RGB332(Pixel, *src);
   944 				*dst++ = map[Pixel];
   945 				++src;
   946 			}
   947 			switch ( width & 3 ) {
   948 				case 3:
   949 					RGB888_RGB332(Pixel, *src);
   950 					*dst++ = map[Pixel];
   951 					++src;
   952 				case 2:
   953 					RGB888_RGB332(Pixel, *src);
   954 					*dst++ = map[Pixel];
   955 					++src;
   956 				case 1:
   957 					RGB888_RGB332(Pixel, *src);
   958 					*dst++ = map[Pixel];
   959 					++src;
   960 			}
   961 #endif /* USE_DUFFS_LOOP */
   962 			src += srcskip;
   963 			dst += dstskip;
   964 		}
   965 	}
   966 }
   967 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   968 #define RGB888_RGB555(dst, src) { \
   969 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>9)| \
   970 	                   (((*src)&0x0000F800)>>6)| \
   971 	                   (((*src)&0x000000F8)>>3); \
   972 }
   973 #define RGB888_RGB555_TWO(dst, src) { \
   974 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   975 	                     (((src[HI])&0x0000F800)>>6)| \
   976 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   977 	                     (((src[LO])&0x00F80000)>>9)| \
   978 	                     (((src[LO])&0x0000F800)>>6)| \
   979 	                     (((src[LO])&0x000000F8)>>3); \
   980 }
   981 static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
   982 {
   983 #ifndef USE_DUFFS_LOOP
   984 	int c;
   985 #endif
   986 	int width, height;
   987 	Uint32 *src;
   988 	Uint16 *dst;
   989 	int srcskip, dstskip;
   990 
   991 	/* Set up some basic variables */
   992 	width = info->d_width;
   993 	height = info->d_height;
   994 	src = (Uint32 *)info->s_pixels;
   995 	srcskip = info->s_skip/4;
   996 	dst = (Uint16 *)info->d_pixels;
   997 	dstskip = info->d_skip/2;
   998 
   999 #ifdef USE_DUFFS_LOOP
  1000 	while ( height-- ) {
  1001 		DUFFS_LOOP(
  1002 			RGB888_RGB555(dst, src);
  1003 			++src;
  1004 			++dst;
  1005 		, width);
  1006 		src += srcskip;
  1007 		dst += dstskip;
  1008 	}
  1009 #else
  1010 	/* Memory align at 4-byte boundary, if necessary */
  1011 	if ( (long)dst & 0x03 ) {
  1012 		/* Don't do anything if width is 0 */
  1013 		if ( width == 0 ) {
  1014 			return;
  1015 		}
  1016 		--width;
  1017 
  1018 		while ( height-- ) {
  1019 			/* Perform copy alignment */
  1020 			RGB888_RGB555(dst, src);
  1021 			++src;
  1022 			++dst;
  1023 
  1024 			/* Copy in 4 pixel chunks */
  1025 			for ( c=width/4; c; --c ) {
  1026 				RGB888_RGB555_TWO(dst, src);
  1027 				src += 2;
  1028 				dst += 2;
  1029 				RGB888_RGB555_TWO(dst, src);
  1030 				src += 2;
  1031 				dst += 2;
  1032 			}
  1033 			/* Get any leftovers */
  1034 			switch (width & 3) {
  1035 				case 3:
  1036 					RGB888_RGB555(dst, src);
  1037 					++src;
  1038 					++dst;
  1039 				case 2:
  1040 					RGB888_RGB555_TWO(dst, src);
  1041 					src += 2;
  1042 					dst += 2;
  1043 					break;
  1044 				case 1:
  1045 					RGB888_RGB555(dst, src);
  1046 					++src;
  1047 					++dst;
  1048 					break;
  1049 			}
  1050 			src += srcskip;
  1051 			dst += dstskip;
  1052 		}
  1053 	} else { 
  1054 		while ( height-- ) {
  1055 			/* Copy in 4 pixel chunks */
  1056 			for ( c=width/4; c; --c ) {
  1057 				RGB888_RGB555_TWO(dst, src);
  1058 				src += 2;
  1059 				dst += 2;
  1060 				RGB888_RGB555_TWO(dst, src);
  1061 				src += 2;
  1062 				dst += 2;
  1063 			}
  1064 			/* Get any leftovers */
  1065 			switch (width & 3) {
  1066 				case 3:
  1067 					RGB888_RGB555(dst, src);
  1068 					++src;
  1069 					++dst;
  1070 				case 2:
  1071 					RGB888_RGB555_TWO(dst, src);
  1072 					src += 2;
  1073 					dst += 2;
  1074 					break;
  1075 				case 1:
  1076 					RGB888_RGB555(dst, src);
  1077 					++src;
  1078 					++dst;
  1079 					break;
  1080 			}
  1081 			src += srcskip;
  1082 			dst += dstskip;
  1083 		}
  1084 	}
  1085 #endif /* USE_DUFFS_LOOP */
  1086 }
  1087 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1088 #define RGB888_RGB565(dst, src) { \
  1089 	*(Uint16 *)(dst) = (((*src)&0x00F80000)>>8)| \
  1090 	                   (((*src)&0x0000FC00)>>5)| \
  1091 	                   (((*src)&0x000000F8)>>3); \
  1092 }
  1093 #define RGB888_RGB565_TWO(dst, src) { \
  1094 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1095 	                     (((src[HI])&0x0000FC00)>>5)| \
  1096 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1097 	                     (((src[LO])&0x00F80000)>>8)| \
  1098 	                     (((src[LO])&0x0000FC00)>>5)| \
  1099 	                     (((src[LO])&0x000000F8)>>3); \
  1100 }
  1101 static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
  1102 {
  1103 #ifndef USE_DUFFS_LOOP
  1104 	int c;
  1105 #endif
  1106 	int width, height;
  1107 	Uint32 *src;
  1108 	Uint16 *dst;
  1109 	int srcskip, dstskip;
  1110 
  1111 	/* Set up some basic variables */
  1112 	width = info->d_width;
  1113 	height = info->d_height;
  1114 	src = (Uint32 *)info->s_pixels;
  1115 	srcskip = info->s_skip/4;
  1116 	dst = (Uint16 *)info->d_pixels;
  1117 	dstskip = info->d_skip/2;
  1118 
  1119 #ifdef USE_DUFFS_LOOP
  1120 	while ( height-- ) {
  1121 		DUFFS_LOOP(
  1122 			RGB888_RGB565(dst, src);
  1123 			++src;
  1124 			++dst;
  1125 		, width);
  1126 		src += srcskip;
  1127 		dst += dstskip;
  1128 	}
  1129 #else
  1130 	/* Memory align at 4-byte boundary, if necessary */
  1131 	if ( (long)dst & 0x03 ) {
  1132 		/* Don't do anything if width is 0 */
  1133 		if ( width == 0 ) {
  1134 			return;
  1135 		}
  1136 		--width;
  1137 
  1138 		while ( height-- ) {
  1139 			/* Perform copy alignment */
  1140 			RGB888_RGB565(dst, src);
  1141 			++src;
  1142 			++dst;
  1143 
  1144 			/* Copy in 4 pixel chunks */
  1145 			for ( c=width/4; c; --c ) {
  1146 				RGB888_RGB565_TWO(dst, src);
  1147 				src += 2;
  1148 				dst += 2;
  1149 				RGB888_RGB565_TWO(dst, src);
  1150 				src += 2;
  1151 				dst += 2;
  1152 			}
  1153 			/* Get any leftovers */
  1154 			switch (width & 3) {
  1155 				case 3:
  1156 					RGB888_RGB565(dst, src);
  1157 					++src;
  1158 					++dst;
  1159 				case 2:
  1160 					RGB888_RGB565_TWO(dst, src);
  1161 					src += 2;
  1162 					dst += 2;
  1163 					break;
  1164 				case 1:
  1165 					RGB888_RGB565(dst, src);
  1166 					++src;
  1167 					++dst;
  1168 					break;
  1169 			}
  1170 			src += srcskip;
  1171 			dst += dstskip;
  1172 		}
  1173 	} else { 
  1174 		while ( height-- ) {
  1175 			/* Copy in 4 pixel chunks */
  1176 			for ( c=width/4; c; --c ) {
  1177 				RGB888_RGB565_TWO(dst, src);
  1178 				src += 2;
  1179 				dst += 2;
  1180 				RGB888_RGB565_TWO(dst, src);
  1181 				src += 2;
  1182 				dst += 2;
  1183 			}
  1184 			/* Get any leftovers */
  1185 			switch (width & 3) {
  1186 				case 3:
  1187 					RGB888_RGB565(dst, src);
  1188 					++src;
  1189 					++dst;
  1190 				case 2:
  1191 					RGB888_RGB565_TWO(dst, src);
  1192 					src += 2;
  1193 					dst += 2;
  1194 					break;
  1195 				case 1:
  1196 					RGB888_RGB565(dst, src);
  1197 					++src;
  1198 					++dst;
  1199 					break;
  1200 			}
  1201 			src += srcskip;
  1202 			dst += dstskip;
  1203 		}
  1204 	}
  1205 #endif /* USE_DUFFS_LOOP */
  1206 }
  1207 
  1208 #endif /* USE_ASMBLIT */
  1209 
  1210 
  1211 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1212 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1213 static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
  1214 {
  1215 #ifndef USE_DUFFS_LOOP
  1216 	int c;
  1217 #endif
  1218 	int width, height;
  1219 	Uint8 *src;
  1220 	Uint32 *dst;
  1221 	int srcskip, dstskip;
  1222 
  1223 	/* Set up some basic variables */
  1224 	width = info->d_width;
  1225 	height = info->d_height;
  1226 	src = (Uint8 *)info->s_pixels;
  1227 	srcskip = info->s_skip;
  1228 	dst = (Uint32 *)info->d_pixels;
  1229 	dstskip = info->d_skip/4;
  1230 
  1231 #ifdef USE_DUFFS_LOOP
  1232 	while ( height-- ) {
  1233 		DUFFS_LOOP(
  1234 		{
  1235 			*dst++ = RGB565_32(dst, src, map);
  1236 			src += 2;
  1237 		},
  1238 		width);
  1239 		src += srcskip;
  1240 		dst += dstskip;
  1241 	}
  1242 #else
  1243 	while ( height-- ) {
  1244 		/* Copy in 4 pixel chunks */
  1245 		for ( c=width/4; c; --c ) {
  1246 			*dst++ = RGB565_32(dst, src, map);
  1247 			src += 2;
  1248 			*dst++ = RGB565_32(dst, src, map);
  1249 			src += 2;
  1250 			*dst++ = RGB565_32(dst, src, map);
  1251 			src += 2;
  1252 			*dst++ = RGB565_32(dst, src, map);
  1253 			src += 2;
  1254 		}
  1255 		/* Get any leftovers */
  1256 		switch (width & 3) {
  1257 			case 3:
  1258 				*dst++ = RGB565_32(dst, src, map);
  1259 				src += 2;
  1260 			case 2:
  1261 				*dst++ = RGB565_32(dst, src, map);
  1262 				src += 2;
  1263 			case 1:
  1264 				*dst++ = RGB565_32(dst, src, map);
  1265 				src += 2;
  1266 				break;
  1267 		}
  1268 		src += srcskip;
  1269 		dst += dstskip;
  1270 	}
  1271 #endif /* USE_DUFFS_LOOP */
  1272 }
  1273 
  1274 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1275 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1276 		0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1277 		0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1278 		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1279 		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1280 		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1281 		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1282 		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1283 		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1284 		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1285 		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1286 		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1287 		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1288 		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1289 		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1290 		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1291 		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1292 		0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1293 		0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1294 		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1295 		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1296 		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1297 		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1298 		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1299 		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1300 		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1301 		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1302 		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1303 		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1304 		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1305 		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1306 		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1307 		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1308 		0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1309 		0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1310 		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1311 		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1312 		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1313 		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1314 		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1315 		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1316 		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1317 		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1318 		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1319 		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1320 		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1321 		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1322 		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1323 		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1324 		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1325 		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1326 		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1327 		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1328 		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1329 		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1330 		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1331 		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1332 		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1333 		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1334 		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1335 		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1336 		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1337 		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1338 		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1339 		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1340 		0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1341 		0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1342 		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1343 		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1344 		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1345 		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1346 		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1347 		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1348 		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1349 		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1350 		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1351 		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1352 		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1353 		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1354 		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1355 		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1356 		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1357 		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1358 		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1359 		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1360 		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1361 		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1362 		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1363 		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1364 		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1365 		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1366 		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1367 		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1368 		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1369 		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1370 		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1371 		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1372 		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1373 		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1374 		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1375 		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1376 		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1377 		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1378 		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1379 		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1380 		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1381 		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1382 		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1383 		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1384 		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1385 		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1386 		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1387 		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1388 		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1389 		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1390 		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1391 		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1392 		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1393 		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1394 		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1395 		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1396 		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1397 		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1398 		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1399 		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1400 		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1401 		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1402 		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1403 		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1404 };
  1405 static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
  1406 {
  1407     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1408 }
  1409 
  1410 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1411 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1412 		0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1413 		0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1414 		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1415 		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1416 		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1417 		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1418 		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1419 		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1420 		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1421 		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1422 		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1423 		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1424 		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1425 		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1426 		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1427 		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1428 		0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1429 		0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1430 		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1431 		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1432 		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1433 		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1434 		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1435 		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1436 		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1437 		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1438 		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1439 		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1440 		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1441 		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1442 		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1443 		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1444 		0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1445 		0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1446 		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1447 		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1448 		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1449 		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1450 		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1451 		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1452 		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1453 		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1454 		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1455 		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1456 		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1457 		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1458 		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1459 		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1460 		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1461 		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1462 		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1463 		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1464 		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1465 		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1466 		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1467 		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1468 		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1469 		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1470 		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1471 		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1472 		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1473 		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1474 		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1475 		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1476 		0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1477 		0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1478 		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1479 		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1480 		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1481 		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1482 		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1483 		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1484 		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1485 		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1486 		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1487 		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1488 		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1489 		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1490 		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1491 		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1492 		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1493 		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1494 		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1495 		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1496 		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1497 		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1498 		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1499 		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1500 		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1501 		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1502 		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1503 		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1504 		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1505 		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1506 		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1507 		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1508 		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1509 		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1510 		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1511 		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1512 		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1513 		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1514 		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1515 		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1516 		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1517 		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1518 		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1519 		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1520 		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1521 		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1522 		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1523 		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1524 		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1525 		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1526 		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1527 		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1528 		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1529 		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1530 		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1531 		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1532 		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1533 		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1534 		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1535 		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1536 		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1537 		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1538 		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1539 		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1540 };
  1541 static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
  1542 {
  1543     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1544 }
  1545 
  1546 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1547 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1548 		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1549 		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1550 		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1551 		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1552 		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1553 		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1554 		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1555 		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1556 		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1557 		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1558 		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1559 		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1560 		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1561 		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1562 		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1563 		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1564 		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1565 		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1566 		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1567 		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1568 		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1569 		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1570 		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1571 		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1572 		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1573 		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1574 		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1575 		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1576 		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1577 		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1578 		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1579 		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1580 		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1581 		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1582 		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1583 		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1584 		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1585 		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1586 		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1587 		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1588 		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1589 		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1590 		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1591 		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1592 		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1593 		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1594 		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1595 		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1596 		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1597 		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1598 		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1599 		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1600 		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1601 		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1602 		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1603 		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1604 		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1605 		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1606 		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1607 		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1608 		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1609 		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1610 		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1611 		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1612 		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1613 		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1614 		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1615 		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1616 		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1617 		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1618 		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1619 		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1620 		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1621 		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1622 		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1623 		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1624 		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1625 		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1626 		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1627 		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1628 		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1629 		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1630 		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1631 		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1632 		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1633 		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1634 		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1635 		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1636 		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1637 		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1638 		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1639 		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1640 		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1641 		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1642 		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1643 		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1644 		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1645 		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1646 		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1647 		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1648 		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1649 		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1650 		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1651 		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1652 		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1653 		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1654 		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1655 		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1656 		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1657 		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1658 		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1659 		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1660 		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1661 		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1662 		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1663 		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1664 		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1665 		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1666 		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1667 		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1668 		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1669 		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1670 		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1671 		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1672 		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1673 		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1674 		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1675 		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1676 };
  1677 static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
  1678 {
  1679     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1680 }
  1681 
  1682 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1683 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1684 		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1685 		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1686 		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1687 		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1688 		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1689 		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1690 		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1691 		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1692 		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1693 		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1694 		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1695 		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1696 		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1697 		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1698 		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1699 		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1700 		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1701 		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1702 		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1703 		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1704 		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1705 		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1706 		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1707 		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1708 		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1709 		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1710 		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1711 		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1712 		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1713 		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1714 		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1715 		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1716 		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1717 		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1718 		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1719 		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1720 		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1721 		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1722 		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1723 		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1724 		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1725 		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1726 		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1727 		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1728 		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1729 		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1730 		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1731 		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1732 		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1733 		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1734 		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1735 		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1736 		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1737 		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1738 		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1739 		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1740 		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1741 		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1742 		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1743 		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1744 		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1745 		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1746 		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1747 		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1748 		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1749 		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1750 		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1751 		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1752 		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1753 		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1754 		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1755 		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1756 		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1757 		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1758 		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1759 		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1760 		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1761 		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1762 		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1763 		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1764 		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1765 		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1766 		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1767 		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1768 		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1769 		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1770 		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1771 		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1772 		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1773 		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1774 		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1775 		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1776 		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1777 		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1778 		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1779 		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1780 		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1781 		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1782 		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1783 		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1784 		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1785 		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1786 		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1787 		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1788 		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1789 		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1790 		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1791 		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1792 		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1793 		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1794 		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1795 		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1796 		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1797 		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1798 		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1799 		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1800 		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1801 		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1802 		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1803 		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1804 		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1805 		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1806 		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1807 		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1808 		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1809 		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1810 		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1811 		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1812 };
  1813 static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
  1814 {
  1815     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1816 }
  1817 
  1818 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1819 #ifndef RGB888_RGB332
  1820 #define RGB888_RGB332(dst, src) { \
  1821 	dst = (((src)&0x00E00000)>>16)| \
  1822 	      (((src)&0x0000E000)>>11)| \
  1823 	      (((src)&0x000000C0)>>6); \
  1824 }
  1825 #endif
  1826 static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
  1827 {
  1828 #ifndef USE_DUFFS_LOOP
  1829 	int c;
  1830 #endif
  1831 	int Pixel;
  1832 	int width, height;
  1833 	Uint32 *src;
  1834 	const Uint8 *map;
  1835 	Uint8 *dst;
  1836 	int srcskip, dstskip;
  1837 
  1838 	/* Set up some basic variables */
  1839 	width = info->d_width;
  1840 	height = info->d_height;
  1841 	src = (Uint32 *)info->s_pixels;
  1842 	srcskip = info->s_skip/4;
  1843 	dst = info->d_pixels;
  1844 	dstskip = info->d_skip;
  1845 	map = info->table;
  1846 
  1847 #ifdef USE_DUFFS_LOOP
  1848 	while ( height-- ) {
  1849 		DUFFS_LOOP(
  1850 			RGB888_RGB332(Pixel, *src);
  1851 			*dst++ = map[Pixel];
  1852 			++src;
  1853 		, width);
  1854 		src += srcskip;
  1855 		dst += dstskip;
  1856 	}
  1857 #else
  1858 	while ( height-- ) {
  1859 		for ( c=width/4; c; --c ) {
  1860 			/* Pack RGB into 8bit pixel */
  1861 			RGB888_RGB332(Pixel, *src);
  1862 			*dst++ = map[Pixel];
  1863 			++src;
  1864 			RGB888_RGB332(Pixel, *src);
  1865 			*dst++ = map[Pixel];
  1866 			++src;
  1867 			RGB888_RGB332(Pixel, *src);
  1868 			*dst++ = map[Pixel];
  1869 			++src;
  1870 			RGB888_RGB332(Pixel, *src);
  1871 			*dst++ = map[Pixel];
  1872 			++src;
  1873 		}
  1874 		switch ( width & 3 ) {
  1875 			case 3:
  1876 				RGB888_RGB332(Pixel, *src);
  1877 				*dst++ = map[Pixel];
  1878 				++src;
  1879 			case 2:
  1880 				RGB888_RGB332(Pixel, *src);
  1881 				*dst++ = map[Pixel];
  1882 				++src;
  1883 			case 1:
  1884 				RGB888_RGB332(Pixel, *src);
  1885 				*dst++ = map[Pixel];
  1886 				++src;
  1887 		}
  1888 		src += srcskip;
  1889 		dst += dstskip;
  1890 	}
  1891 #endif /* USE_DUFFS_LOOP */
  1892 }
  1893 static void BlitNto1(SDL_BlitInfo *info)
  1894 {
  1895 #ifndef USE_DUFFS_LOOP
  1896 	int c;
  1897 #endif
  1898 	int width, height;
  1899 	Uint8 *src;
  1900 	const Uint8 *map;
  1901 	Uint8 *dst;
  1902 	int srcskip, dstskip;
  1903 	int srcbpp;
  1904 	Uint32 Pixel;
  1905 	int  sR, sG, sB;
  1906 	SDL_PixelFormat *srcfmt;
  1907 
  1908 	/* Set up some basic variables */
  1909 	width = info->d_width;
  1910 	height = info->d_height;
  1911 	src = info->s_pixels;
  1912 	srcskip = info->s_skip;
  1913 	dst = info->d_pixels;
  1914 	dstskip = info->d_skip;
  1915 	map = info->table;
  1916 	srcfmt = info->src;
  1917 	srcbpp = srcfmt->BytesPerPixel;
  1918 
  1919 	if ( map == NULL ) {
  1920 		while ( height-- ) {
  1921 #ifdef USE_DUFFS_LOOP
  1922 			DUFFS_LOOP(
  1923 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1924 								sR, sG, sB);
  1925 				if ( 1 ) {
  1926 				  	/* Pack RGB into 8bit pixel */
  1927 				  	*dst = ((sR>>5)<<(3+2))|
  1928 					        ((sG>>5)<<(2)) |
  1929 					        ((sB>>6)<<(0)) ;
  1930 				}
  1931 				dst++;
  1932 				src += srcbpp;
  1933 			, width);
  1934 #else
  1935 			for ( c=width; c; --c ) {
  1936 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1937 								sR, sG, sB);
  1938 				if ( 1 ) {
  1939 				  	/* Pack RGB into 8bit pixel */
  1940 				  	*dst = ((sR>>5)<<(3+2))|
  1941 					        ((sG>>5)<<(2)) |
  1942 					        ((sB>>6)<<(0)) ;
  1943 				}
  1944 				dst++;
  1945 				src += srcbpp;
  1946 			}
  1947 #endif
  1948 			src += srcskip;
  1949 			dst += dstskip;
  1950 		}
  1951 	} else {
  1952 		while ( height-- ) {
  1953 #ifdef USE_DUFFS_LOOP
  1954 			DUFFS_LOOP(
  1955 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1956 								sR, sG, sB);
  1957 				if ( 1 ) {
  1958 				  	/* Pack RGB into 8bit pixel */
  1959 				  	*dst = map[((sR>>5)<<(3+2))|
  1960 						   ((sG>>5)<<(2))  |
  1961 						   ((sB>>6)<<(0))  ];
  1962 				}
  1963 				dst++;
  1964 				src += srcbpp;
  1965 			, width);
  1966 #else
  1967 			for ( c=width; c; --c ) {
  1968 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1969 								sR, sG, sB);
  1970 				if ( 1 ) {
  1971 				  	/* Pack RGB into 8bit pixel */
  1972 				  	*dst = map[((sR>>5)<<(3+2))|
  1973 						   ((sG>>5)<<(2))  |
  1974 						   ((sB>>6)<<(0))  ];
  1975 				}
  1976 				dst++;
  1977 				src += srcbpp;
  1978 			}
  1979 #endif /* USE_DUFFS_LOOP */
  1980 			src += srcskip;
  1981 			dst += dstskip;
  1982 		}
  1983 	}
  1984 }
  1985 
  1986 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  1987 static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
  1988 {
  1989 	int width = info->d_width;
  1990 	int height = info->d_height;
  1991 	Uint32 *src = (Uint32 *)info->s_pixels;
  1992 	int srcskip = info->s_skip;
  1993 	Uint32 *dst = (Uint32 *)info->d_pixels;
  1994 	int dstskip = info->d_skip;
  1995 	SDL_PixelFormat *srcfmt = info->src;
  1996 	SDL_PixelFormat *dstfmt = info->dst;
  1997 
  1998 	if (dstfmt->Amask) {
  1999 		/* RGB->RGBA, SET_ALPHA */
  2000 		Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
  2001 
  2002 		while ( height-- ) {
  2003 			DUFFS_LOOP(
  2004 			{
  2005 				*dst = *src | mask;
  2006 				++dst;
  2007 				++src;
  2008 			},
  2009 			width);
  2010 			src = (Uint32*)((Uint8*)src + srcskip);
  2011 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2012 		}
  2013 	} else {
  2014 		/* RGBA->RGB, NO_ALPHA */
  2015 		Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2016 
  2017 		while ( height-- ) {
  2018 			DUFFS_LOOP(
  2019 			{
  2020 				*dst = *src & mask;
  2021 				++dst;
  2022 				++src;
  2023 			},
  2024 			width);
  2025 			src = (Uint32*)((Uint8*)src + srcskip);
  2026 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2027 		}
  2028 	}
  2029 }
  2030 
  2031 static void BlitNtoN(SDL_BlitInfo *info)
  2032 {
  2033 	int width = info->d_width;
  2034 	int height = info->d_height;
  2035 	Uint8 *src = info->s_pixels;
  2036 	int srcskip = info->s_skip;
  2037 	Uint8 *dst = info->d_pixels;
  2038 	int dstskip = info->d_skip;
  2039 	SDL_PixelFormat *srcfmt = info->src;
  2040 	int srcbpp = srcfmt->BytesPerPixel;
  2041 	SDL_PixelFormat *dstfmt = info->dst;
  2042 	int dstbpp = dstfmt->BytesPerPixel;
  2043 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2044 
  2045 	while ( height-- ) {
  2046 		DUFFS_LOOP(
  2047 		{
  2048 		        Uint32 Pixel;
  2049 			unsigned sR;
  2050 			unsigned sG;
  2051 			unsigned sB;
  2052 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2053 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2054 			dst += dstbpp;
  2055 			src += srcbpp;
  2056 		},
  2057 		width);
  2058 		src += srcskip;
  2059 		dst += dstskip;
  2060 	}
  2061 }
  2062 
  2063 static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
  2064 {
  2065 	int width = info->d_width;
  2066 	int height = info->d_height;
  2067 	Uint8 *src = info->s_pixels;
  2068 	int srcskip = info->s_skip;
  2069 	Uint8 *dst = info->d_pixels;
  2070 	int dstskip = info->d_skip;
  2071 	SDL_PixelFormat *srcfmt = info->src;
  2072 	int srcbpp = srcfmt->BytesPerPixel;
  2073 	SDL_PixelFormat *dstfmt = info->dst;
  2074 	int dstbpp = dstfmt->BytesPerPixel;
  2075 	int c;
  2076 
  2077 	/* FIXME: should map alpha to [0..255] correctly! */
  2078 	while ( height-- ) {
  2079 		for ( c=width; c; --c ) {
  2080 		        Uint32 Pixel;
  2081 			unsigned sR, sG, sB, sA;
  2082 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2083 				      sR, sG, sB, sA);
  2084 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2085 				      sR, sG, sB, sA);
  2086 			dst += dstbpp;
  2087 			src += srcbpp;
  2088 		}
  2089 		src += srcskip;
  2090 		dst += dstskip;
  2091 	}
  2092 }
  2093 
  2094 static void BlitNto1Key(SDL_BlitInfo *info)
  2095 {
  2096 	int width = info->d_width;
  2097 	int height = info->d_height;
  2098 	Uint8 *src = info->s_pixels;
  2099 	int srcskip = info->s_skip;
  2100 	Uint8 *dst = info->d_pixels;
  2101 	int dstskip = info->d_skip;
  2102 	SDL_PixelFormat *srcfmt = info->src;
  2103 	const Uint8 *palmap = info->table;
  2104 	Uint32 ckey = srcfmt->colorkey;
  2105 	Uint32 rgbmask = ~srcfmt->Amask;
  2106 	int srcbpp;
  2107 	Uint32 Pixel;
  2108 	Uint8  sR, sG, sB;
  2109 
  2110 	/* Set up some basic variables */
  2111 	srcbpp = srcfmt->BytesPerPixel;
  2112 	ckey &= rgbmask;
  2113 
  2114 	if ( palmap == NULL ) {
  2115 		while ( height-- ) {
  2116 			DUFFS_LOOP(
  2117 			{
  2118 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2119 								sR, sG, sB);
  2120 				if ( (Pixel & rgbmask) != ckey ) {
  2121 				  	/* Pack RGB into 8bit pixel */
  2122 				  	*dst = ((sR>>5)<<(3+2))|
  2123 						((sG>>5)<<(2)) |
  2124 						((sB>>6)<<(0)) ;
  2125 				}
  2126 				dst++;
  2127 				src += srcbpp;
  2128 			},
  2129 			width);
  2130 			src += srcskip;
  2131 			dst += dstskip;
  2132 		}
  2133 	} else {
  2134 		while ( height-- ) {
  2135 			DUFFS_LOOP(
  2136 			{
  2137 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2138 								sR, sG, sB);
  2139 				if ( (Pixel & rgbmask) != ckey ) {
  2140 				  	/* Pack RGB into 8bit pixel */
  2141 				  	*dst = palmap[((sR>>5)<<(3+2))|
  2142 							((sG>>5)<<(2))  |
  2143 							((sB>>6)<<(0))  ];
  2144 				}
  2145 				dst++;
  2146 				src += srcbpp;
  2147 			},
  2148 			width);
  2149 			src += srcskip;
  2150 			dst += dstskip;
  2151 		}
  2152 	}
  2153 }
  2154 
  2155 static void Blit2to2Key(SDL_BlitInfo *info)
  2156 {
  2157 	int width = info->d_width;
  2158 	int height = info->d_height;
  2159 	Uint16 *srcp = (Uint16 *)info->s_pixels;
  2160 	int srcskip = info->s_skip;
  2161 	Uint16 *dstp = (Uint16 *)info->d_pixels;
  2162 	int dstskip = info->d_skip;
  2163 	Uint32 ckey = info->src->colorkey;
  2164 	Uint32 rgbmask = ~info->src->Amask;
  2165 
  2166 	/* Set up some basic variables */
  2167         srcskip /= 2;
  2168         dstskip /= 2;
  2169 	ckey &= rgbmask;
  2170 
  2171 	while ( height-- ) {
  2172 		DUFFS_LOOP(
  2173 		{
  2174 			if ( (*srcp & rgbmask) != ckey ) {
  2175 				*dstp = *srcp;
  2176 			}
  2177 			dstp++;
  2178 			srcp++;
  2179 		},
  2180 		width);
  2181 		srcp += srcskip;
  2182 		dstp += dstskip;
  2183 	}
  2184 }
  2185 
  2186 static void BlitNtoNKey(SDL_BlitInfo *info)
  2187 {
  2188 	int width = info->d_width;
  2189 	int height = info->d_height;
  2190 	Uint8 *src = info->s_pixels;
  2191 	int srcskip = info->s_skip;
  2192 	Uint8 *dst = info->d_pixels;
  2193 	int dstskip = info->d_skip;
  2194 	Uint32 ckey = info->src->colorkey;
  2195 	SDL_PixelFormat *srcfmt = info->src;
  2196 	SDL_PixelFormat *dstfmt = info->dst;
  2197 	int srcbpp = srcfmt->BytesPerPixel;
  2198 	int dstbpp = dstfmt->BytesPerPixel;
  2199 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2200 
  2201 	while ( height-- ) {
  2202 		DUFFS_LOOP(
  2203 		{
  2204 		        Uint32 Pixel;
  2205 			unsigned sR;
  2206 			unsigned sG;
  2207 			unsigned sB;
  2208 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2209 			if ( Pixel != ckey ) {
  2210 			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2211 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2212 					      sR, sG, sB, alpha);
  2213 			}
  2214 			dst += dstbpp;
  2215 			src += srcbpp;
  2216 		},
  2217 		width);
  2218 		src += srcskip;
  2219 		dst += dstskip;
  2220 	}
  2221 }
  2222 
  2223 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
  2224 {
  2225 	int width = info->d_width;
  2226 	int height = info->d_height;
  2227 	Uint8 *src = info->s_pixels;
  2228 	int srcskip = info->s_skip;
  2229 	Uint8 *dst = info->d_pixels;
  2230 	int dstskip = info->d_skip;
  2231 	Uint32 ckey = info->src->colorkey;
  2232 	SDL_PixelFormat *srcfmt = info->src;
  2233 	SDL_PixelFormat *dstfmt = info->dst;
  2234 	Uint32 rgbmask = ~srcfmt->Amask;
  2235 
  2236 	Uint8 srcbpp;
  2237 	Uint8 dstbpp;
  2238 	Uint32 Pixel;
  2239 	Uint8  sR, sG, sB, sA;
  2240 
  2241 	/* Set up some basic variables */
  2242 	srcbpp = srcfmt->BytesPerPixel;
  2243 	dstbpp = dstfmt->BytesPerPixel;
  2244 	ckey &= rgbmask;
  2245 
  2246 	/* FIXME: should map alpha to [0..255] correctly! */
  2247 	while ( height-- ) {
  2248 		DUFFS_LOOP(
  2249 		{
  2250 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2251 				      sR, sG, sB, sA);
  2252 			if ( (Pixel & rgbmask) != ckey ) {
  2253 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2254 						sR, sG, sB, sA);
  2255 			}
  2256 			dst += dstbpp;
  2257 			src += srcbpp;
  2258 		},
  2259 		width);
  2260 		src += srcskip;
  2261 		dst += dstskip;
  2262 	}
  2263 }
  2264 
  2265 /* Normal N to N optimized blitters */
  2266 struct blit_table {
  2267 	Uint32 srcR, srcG, srcB;
  2268 	int dstbpp;
  2269 	Uint32 dstR, dstG, dstB;
  2270 	Uint32 blit_features;
  2271 	void *aux_data;
  2272 	SDL_loblit blitfunc;
  2273 	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
  2274 };
  2275 static const struct blit_table normal_blit_1[] = {
  2276 	/* Default for 8-bit RGB source, an invalid combination */
  2277 	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
  2278 };
  2279 static const struct blit_table normal_blit_2[] = {
  2280 #ifdef USE_ASMBLIT
  2281     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
  2282       0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
  2283     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
  2284       0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
  2285     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
  2286       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
  2287 #endif
  2288 #ifdef USE_ALTIVEC_BLITTERS
  2289     /* has-altivec */
  2290     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2291       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2292     { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2293       2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2294 #endif
  2295     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
  2296       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
  2297     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2298       0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
  2299     { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2300       0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
  2301     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2302       0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
  2303 
  2304     /* Default for 16-bit RGB source, used if no other blitter matches */
  2305     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2306 };
  2307 static const struct blit_table normal_blit_3[] = {
  2308 	/* Default for 24-bit RGB source, never optimized */
  2309     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2310 };
  2311 static const struct blit_table normal_blit_4[] = {
  2312 #ifdef USE_ASMBLIT
  2313     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2314       1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
  2315     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2316       0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
  2317     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2318       1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
  2319     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2320       0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
  2321     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2322       1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
  2323     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2324       0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
  2325     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2326       1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
  2327     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2328       0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
  2329     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
  2330       0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
  2331     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
  2332       0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
  2333     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2334       0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
  2335     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2336       0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
  2337     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2338       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
  2339 #else
  2340 #ifdef USE_ALTIVEC_BLITTERS
  2341     /* has-altivec | dont-use-prefetch */
  2342     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2343       6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2344     /* has-altivec */
  2345     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2346       2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2347     /* has-altivec */
  2348     { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
  2349       2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
  2350 #endif
  2351     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2352       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
  2353     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2354       0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
  2355 #endif
  2356 	/* Default for 32-bit RGB source, used if no other blitter matches */
  2357 	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2358 };
  2359 static const struct blit_table *normal_blit[] = {
  2360 	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2361 };
  2362 
  2363 /* Mask matches table, or table entry is zero */
  2364 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2365 
  2366 SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
  2367 {
  2368 	struct private_swaccel *sdata;
  2369 	SDL_PixelFormat *srcfmt;
  2370 	SDL_PixelFormat *dstfmt;
  2371 	const struct blit_table *table;
  2372 	int which;
  2373 	SDL_loblit blitfun;
  2374 
  2375 	/* Set up data for choosing the blit */
  2376 	sdata = surface->map->sw_data;
  2377 	srcfmt = surface->format;
  2378 	dstfmt = surface->map->dst->format;
  2379 
  2380 	if ( blit_index & 2 ) {
  2381 	        /* alpha or alpha+colorkey */
  2382 	        return SDL_CalculateAlphaBlit(surface, blit_index);
  2383 	}
  2384 
  2385 	/* We don't support destinations less than 8-bits */
  2386 	if ( dstfmt->BitsPerPixel < 8 ) {
  2387 		return(NULL);
  2388 	}
  2389 	
  2390 	if(blit_index == 1) {
  2391 	    /* colorkey blit: Here we don't have too many options, mostly
  2392 	       because RLE is the preferred fast way to deal with this.
  2393 	       If a particular case turns out to be useful we'll add it. */
  2394 
  2395 	    if(srcfmt->BytesPerPixel == 2
  2396 	       && surface->map->identity)
  2397 		return Blit2to2Key;
  2398 	    else if(dstfmt->BytesPerPixel == 1)
  2399 		return BlitNto1Key;
  2400 	    else {
  2401 #ifdef USE_ALTIVEC_BLITTERS
  2402         if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
  2403             return Blit32to32KeyAltivec;
  2404         } else
  2405 #endif
  2406 
  2407 		if(srcfmt->Amask && dstfmt->Amask)
  2408 		    return BlitNtoNKeyCopyAlpha;
  2409 		else
  2410 		    return BlitNtoNKey;
  2411 	    }
  2412 	}
  2413 
  2414 	blitfun = NULL;
  2415 	if ( dstfmt->BitsPerPixel == 8 ) {
  2416 		/* We assume 8-bit destinations are palettized */
  2417 		if ( (srcfmt->BytesPerPixel == 4) &&
  2418 		     (srcfmt->Rmask == 0x00FF0000) &&
  2419 		     (srcfmt->Gmask == 0x0000FF00) &&
  2420 		     (srcfmt->Bmask == 0x000000FF) ) {
  2421 			if ( surface->map->table ) {
  2422 				blitfun = Blit_RGB888_index8_map;
  2423 			} else {
  2424 #ifdef USE_ASMBLIT
  2425 				sdata->aux_data = ConvertX86p32_8RGB332;
  2426 				blitfun = ConvertX86;
  2427 #else
  2428 				blitfun = Blit_RGB888_index8;
  2429 #endif
  2430 			}
  2431 		} else {
  2432 			blitfun = BlitNto1;
  2433 		}
  2434 	} else {
  2435 		/* Now the meat, choose the blitter we want */
  2436 		int a_need = 0;
  2437 		if(dstfmt->Amask)
  2438 		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2439 		table = normal_blit[srcfmt->BytesPerPixel-1];
  2440 		for ( which=0; table[which].dstbpp; ++which ) {
  2441 			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2442 			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2443 			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2444 			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2445 			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2446 			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2447 			    dstfmt->BytesPerPixel == table[which].dstbpp &&
  2448 			    (a_need & table[which].alpha) == a_need &&
  2449 			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
  2450 				break;
  2451 		}
  2452 		sdata->aux_data = table[which].aux_data;
  2453 		blitfun = table[which].blitfunc;
  2454 
  2455 		if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2456 			/* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2457 			if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
  2458 			     srcfmt->Rmask == dstfmt->Rmask &&
  2459 			     srcfmt->Gmask == dstfmt->Gmask &&
  2460 			     srcfmt->Bmask == dstfmt->Bmask ) {
  2461 				blitfun = Blit4to4MaskAlpha;
  2462 			} else if ( a_need == COPY_ALPHA ) {
  2463 			    blitfun = BlitNtoNCopyAlpha;
  2464 			}
  2465 		}
  2466 	}
  2467 
  2468 #ifdef DEBUG_ASM
  2469 #ifdef USE_ASMBLIT
  2470 	if ( blitfun == ConvertMMX )
  2471 		fprintf(stderr, "Using mmx blit\n");
  2472 	else
  2473 	if ( blitfun == ConvertX86 )
  2474 		fprintf(stderr, "Using asm blit\n");
  2475 	else
  2476 #endif
  2477 	if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
  2478 		fprintf(stderr, "Using C blit\n");
  2479 	else
  2480 		fprintf(stderr, "Using optimized C blit\n");
  2481 #endif /* DEBUG_ASM */
  2482 
  2483 	return(blitfun);
  2484 }