src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 26 Feb 2006 19:37:40 +0000
changeset 1443 9ebbbb4ae53b
parent 1428 5f52867ba65c
child 1487 dc6b59e925a2
permissions -rw-r--r--
Fixed some OpenWatcom warnings
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #include "SDL_video.h"
    25 #include "SDL_endian.h"
    26 #include "SDL_cpuinfo.h"
    27 #include "SDL_blit.h"
    28 
    29 /* Functions to blit from N-bit surfaces to other surfaces */
    30 
    31 #if SDL_ALTIVEC_BLITTERS
    32 #ifdef HAVE_ALTIVEC_H
    33 #include <altivec.h>
    34 #endif
    35 #define assert(X)
    36 #ifdef __MACOSX__
    37 #include <sys/sysctl.h>
    38 static size_t GetL3CacheSize( void )
    39 {
    40     const char key[] = "hw.l3cachesize";
    41     u_int64_t result = 0;
    42     size_t typeSize = sizeof( result );
    43 
    44 
    45     int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
    46     if( 0 != err ) return 0;
    47 
    48     return result;
    49 }
    50 #else
    51 static size_t GetL3CacheSize( void )
    52 {
    53     /* XXX: Just guess G4 */
    54     return 2097152;
    55 }
    56 #endif /* __MACOSX__ */
    57 
    58 #if (defined(__MACOSX__) && (__GNUC__ < 4))
    59     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    60         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    61     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    62         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    63 #else
    64     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    65         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    66     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    67         (vector unsigned short) { a,b,c,d,e,f,g,h }
    68 #endif
    69 
    70 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    71 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    72                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    73                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    74                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    75                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    76 
    77 #define MAKE8888(dstfmt, r, g, b, a)  \
    78     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    79       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    80       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    81       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    82 
    83 /*
    84  * Data Stream Touch...Altivec cache prefetching.
    85  *
    86  *  Don't use this on a G5...however, the speed boost is very significant
    87  *   on a G4.
    88  */
    89 #define DST_CHAN_SRC 1
    90 #define DST_CHAN_DEST 2
    91 
    92 /* macro to set DST control word value... */
    93 #define DST_CTRL(size, count, stride) \
    94     (((size) << 24) | ((count) << 16) | (stride))
    95 
    96 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    97     ? vec_lvsl(0, src) \
    98     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
    99 
   100 /* Calculate the permute vector used for 32->32 swizzling */
   101 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
   102                                   const SDL_PixelFormat *dstfmt)
   103 {
   104     /*
   105     * We have to assume that the bits that aren't used by other
   106      *  colors is alpha, and it's one complete byte, since some formats
   107      *  leave alpha with a zero mask, but we should still swizzle the bits.
   108      */
   109     /* ARGB */
   110     const static struct SDL_PixelFormat default_pixel_format = {
   111         NULL, 0, 0,
   112         0, 0, 0, 0,
   113         16, 8, 0, 24,
   114         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   115         0, 0};
   116     if (!srcfmt) {
   117         srcfmt = &default_pixel_format;
   118     }
   119     if (!dstfmt) {
   120         dstfmt = &default_pixel_format;
   121     }
   122     vector unsigned char plus = VECUINT8_LITERAL(
   123                                       0x00, 0x00, 0x00, 0x00,
   124                                       0x04, 0x04, 0x04, 0x04,
   125                                       0x08, 0x08, 0x08, 0x08,
   126                                       0x0C, 0x0C, 0x0C, 0x0C );
   127     vector unsigned char vswiz;
   128     vector unsigned int srcvec;
   129 #define RESHIFT(X) (3 - ((X) >> 3))
   130     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   131     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   132     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   133     Uint32 amask;
   134     /* Use zero for alpha if either surface doesn't have alpha */
   135     if (dstfmt->Amask) {
   136         amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
   137     } else {    
   138         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
   139     }           
   140 #undef RESHIFT  
   141     ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
   142     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
   143     return(vswiz);
   144 }
   145 
   146 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
   147 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
   148     int height = info->d_height;
   149     Uint8 *src = (Uint8 *) info->s_pixels;
   150     int srcskip = info->s_skip;
   151     Uint8 *dst = (Uint8 *) info->d_pixels;
   152     int dstskip = info->d_skip;
   153     SDL_PixelFormat *srcfmt = info->src;
   154     vector unsigned char valpha = vec_splat_u8(0);
   155     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   156     vector unsigned char vgmerge = VECUINT8_LITERAL(
   157         0x00, 0x02, 0x00, 0x06,
   158         0x00, 0x0a, 0x00, 0x0e,
   159         0x00, 0x12, 0x00, 0x16,
   160         0x00, 0x1a, 0x00, 0x1e);
   161     vector unsigned short v1 = vec_splat_u16(1);
   162     vector unsigned short v3 = vec_splat_u16(3);
   163     vector unsigned short v3f = VECUINT16_LITERAL(
   164         0x003f, 0x003f, 0x003f, 0x003f,
   165         0x003f, 0x003f, 0x003f, 0x003f);
   166     vector unsigned short vfc = VECUINT16_LITERAL(
   167         0x00fc, 0x00fc, 0x00fc, 0x00fc,
   168         0x00fc, 0x00fc, 0x00fc, 0x00fc);
   169     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
   170     vf800 = vec_sl(vf800, vec_splat_u16(8));
   171 
   172     while (height--) {
   173         vector unsigned char valigner;
   174         vector unsigned char voverflow;
   175         vector unsigned char vsrc;
   176 
   177         int width = info->d_width;
   178         int extrawidth;
   179 
   180         /* do scalar until we can align... */
   181 #define ONE_PIXEL_BLEND(condition, widthvar) \
   182         while (condition) { \
   183             Uint32 Pixel; \
   184             unsigned sR, sG, sB, sA; \
   185             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   186                           sR, sG, sB, sA); \
   187             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   188                                 ((sG << 3) & 0x000007E0) | \
   189                                 ((sB >> 3) & 0x0000001F)); \
   190             dst += 2; \
   191             src += 4; \
   192             widthvar--; \
   193         }
   194 
   195         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   196 
   197         /* After all that work, here's the vector part! */
   198         extrawidth = (width % 8);  /* trailing unaligned stores */
   199         width -= extrawidth;
   200         vsrc = vec_ld(0, src);
   201         valigner = VEC_ALIGNER(src);
   202 
   203         while (width) {
   204             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   205             vector unsigned int vsrc1, vsrc2;
   206             vector unsigned char vdst;
   207 
   208             voverflow = vec_ld(15, src);
   209             vsrc = vec_perm(vsrc, voverflow, valigner);
   210             vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   211             src += 16;
   212             vsrc = voverflow;
   213             voverflow = vec_ld(15, src);
   214             vsrc = vec_perm(vsrc, voverflow, valigner);
   215             vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
   216             /* 1555 */
   217             vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
   218             vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
   219             vgpixel = vec_and(vgpixel, vfc);
   220             vgpixel = vec_sl(vgpixel, v3);
   221             vrpixel = vec_sl(vpixel, v1);
   222             vrpixel = vec_and(vrpixel, vf800);
   223             vbpixel = vec_and(vpixel, v3f);
   224             vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
   225             /* 565 */
   226             vdst = vec_or(vdst, (vector unsigned char)vbpixel);
   227             vec_st(vdst, 0, dst);
   228 
   229             width -= 8;
   230             src += 16;
   231             dst += 16;
   232             vsrc = voverflow;
   233         }
   234 
   235         assert(width == 0);
   236 
   237         /* do scalar until we can align... */
   238         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   239 #undef ONE_PIXEL_BLEND
   240 
   241         src += srcskip;  /* move to next row, accounting for pitch. */
   242         dst += dstskip;
   243     }
   244 
   245 
   246 }
   247 
   248 static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
   249     int height = info->d_height;
   250     Uint8 *src = (Uint8 *) info->s_pixels;
   251     int srcskip = info->s_skip;
   252     Uint8 *dst = (Uint8 *) info->d_pixels;
   253     int dstskip = info->d_skip;
   254     SDL_PixelFormat *srcfmt = info->src;
   255     SDL_PixelFormat *dstfmt = info->dst;
   256     unsigned alpha;
   257     vector unsigned char valpha;
   258     vector unsigned char vpermute;
   259     vector unsigned short vf800;
   260     vector unsigned int v8 = vec_splat_u32(8);
   261     vector unsigned int v16 = vec_add(v8, v8);
   262     vector unsigned short v2 = vec_splat_u16(2);
   263     vector unsigned short v3 = vec_splat_u16(3);
   264     /* 
   265         0x10 - 0x1f is the alpha
   266         0x00 - 0x0e evens are the red
   267         0x01 - 0x0f odds are zero
   268     */
   269     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   270         0x10, 0x00, 0x01, 0x01,
   271         0x10, 0x02, 0x01, 0x01,
   272         0x10, 0x04, 0x01, 0x01,
   273         0x10, 0x06, 0x01, 0x01
   274     );
   275     vector unsigned char vredalpha2 = (vector unsigned char) (
   276         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   277     );
   278     /*
   279         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   280         0x11 - 0x0f odds are blue
   281     */
   282     vector unsigned char vblue1 = VECUINT8_LITERAL(
   283         0x00, 0x01, 0x02, 0x11,
   284         0x04, 0x05, 0x06, 0x13,
   285         0x08, 0x09, 0x0a, 0x15,
   286         0x0c, 0x0d, 0x0e, 0x17
   287     );
   288     vector unsigned char vblue2 = (vector unsigned char)(
   289         vec_add((vector unsigned int)vblue1, v8)
   290     );
   291     /*
   292         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   293         0x10 - 0x0e evens are green
   294     */
   295     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   296         0x00, 0x01, 0x10, 0x03,
   297         0x04, 0x05, 0x12, 0x07,
   298         0x08, 0x09, 0x14, 0x0b,
   299         0x0c, 0x0d, 0x16, 0x0f
   300     );
   301     vector unsigned char vgreen2 = (vector unsigned char)(
   302         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   303     );
   304     
   305 
   306     assert(srcfmt->BytesPerPixel == 2);
   307     assert(dstfmt->BytesPerPixel == 4);
   308 
   309     vf800 = (vector unsigned short)vec_splat_u8(-7);
   310     vf800 = vec_sl(vf800, vec_splat_u16(8));
   311 
   312     if (dstfmt->Amask && srcfmt->alpha) {
   313         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   314         valpha = vec_splat(valpha, 0);
   315     } else {
   316         alpha = 0;
   317         valpha = vec_splat_u8(0);
   318     }
   319 
   320     vpermute = calc_swizzle32(NULL, dstfmt);
   321     while (height--) {
   322         vector unsigned char valigner;
   323         vector unsigned char voverflow;
   324         vector unsigned char vsrc;
   325 
   326         int width = info->d_width;
   327         int extrawidth;
   328 
   329         /* do scalar until we can align... */
   330 #define ONE_PIXEL_BLEND(condition, widthvar) \
   331         while (condition) { \
   332             unsigned sR, sG, sB; \
   333             unsigned short Pixel = *((unsigned short *)src); \
   334             sR = (Pixel >> 8) & 0xf8; \
   335             sG = (Pixel >> 3) & 0xfc; \
   336             sB = (Pixel << 3) & 0xf8; \
   337             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   338             src += 2; \
   339             dst += 4; \
   340             widthvar--; \
   341         }
   342         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   343 
   344         /* After all that work, here's the vector part! */
   345         extrawidth = (width % 8);  /* trailing unaligned stores */
   346         width -= extrawidth;
   347         vsrc = vec_ld(0, src);
   348         valigner = VEC_ALIGNER(src);
   349 
   350         while (width) {
   351             vector unsigned short vR, vG, vB;
   352             vector unsigned char vdst1, vdst2;
   353 
   354             voverflow = vec_ld(15, src);
   355             vsrc = vec_perm(vsrc, voverflow, valigner);
   356 
   357             vR = vec_and((vector unsigned short)vsrc, vf800);
   358             vB = vec_sl((vector unsigned short)vsrc, v3);
   359             vG = vec_sl(vB, v2);
   360 
   361             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   362             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   363             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   364             vdst1 = vec_perm(vdst1, valpha, vpermute);
   365             vec_st(vdst1, 0, dst);
   366 
   367             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   368             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   369             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   370             vdst2 = vec_perm(vdst2, valpha, vpermute);
   371             vec_st(vdst2, 16, dst);
   372             
   373             width -= 8;
   374             dst += 32;
   375             src += 16;
   376             vsrc = voverflow;
   377         }
   378 
   379         assert(width == 0);
   380 
   381 
   382         /* do scalar until we can align... */
   383         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   384 #undef ONE_PIXEL_BLEND
   385 
   386         src += srcskip;  /* move to next row, accounting for pitch. */
   387         dst += dstskip;
   388     }
   389 
   390 }
   391 
   392 
   393 static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
   394     int height = info->d_height;
   395     Uint8 *src = (Uint8 *) info->s_pixels;
   396     int srcskip = info->s_skip;
   397     Uint8 *dst = (Uint8 *) info->d_pixels;
   398     int dstskip = info->d_skip;
   399     SDL_PixelFormat *srcfmt = info->src;
   400     SDL_PixelFormat *dstfmt = info->dst;
   401     unsigned alpha;
   402     vector unsigned char valpha;
   403     vector unsigned char vpermute;
   404     vector unsigned short vf800;
   405     vector unsigned int v8 = vec_splat_u32(8);
   406     vector unsigned int v16 = vec_add(v8, v8);
   407     vector unsigned short v1 = vec_splat_u16(1);
   408     vector unsigned short v3 = vec_splat_u16(3);
   409     /* 
   410         0x10 - 0x1f is the alpha
   411         0x00 - 0x0e evens are the red
   412         0x01 - 0x0f odds are zero
   413     */
   414     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
   415         0x10, 0x00, 0x01, 0x01,
   416         0x10, 0x02, 0x01, 0x01,
   417         0x10, 0x04, 0x01, 0x01,
   418         0x10, 0x06, 0x01, 0x01
   419     );
   420     vector unsigned char vredalpha2 = (vector unsigned char)(
   421         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
   422     );
   423     /*
   424         0x00 - 0x0f is ARxx ARxx ARxx ARxx
   425         0x11 - 0x0f odds are blue
   426     */
   427     vector unsigned char vblue1 = VECUINT8_LITERAL(
   428         0x00, 0x01, 0x02, 0x11,
   429         0x04, 0x05, 0x06, 0x13,
   430         0x08, 0x09, 0x0a, 0x15,
   431         0x0c, 0x0d, 0x0e, 0x17
   432     );
   433     vector unsigned char vblue2 = (vector unsigned char)(
   434         vec_add((vector unsigned int)vblue1, v8)
   435     );
   436     /*
   437         0x00 - 0x0f is ARxB ARxB ARxB ARxB
   438         0x10 - 0x0e evens are green
   439     */
   440     vector unsigned char vgreen1 = VECUINT8_LITERAL(
   441         0x00, 0x01, 0x10, 0x03,
   442         0x04, 0x05, 0x12, 0x07,
   443         0x08, 0x09, 0x14, 0x0b,
   444         0x0c, 0x0d, 0x16, 0x0f
   445     );
   446     vector unsigned char vgreen2 = (vector unsigned char)(
   447         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
   448     );
   449     
   450 
   451     assert(srcfmt->BytesPerPixel == 2);
   452     assert(dstfmt->BytesPerPixel == 4);
   453 
   454     vf800 = (vector unsigned short)vec_splat_u8(-7);
   455     vf800 = vec_sl(vf800, vec_splat_u16(8));
   456 
   457     if (dstfmt->Amask && srcfmt->alpha) {
   458         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
   459         valpha = vec_splat(valpha, 0);
   460     } else {
   461         alpha = 0;
   462         valpha = vec_splat_u8(0);
   463     }
   464 
   465     vpermute = calc_swizzle32(NULL, dstfmt);
   466     while (height--) {
   467         vector unsigned char valigner;
   468         vector unsigned char voverflow;
   469         vector unsigned char vsrc;
   470 
   471         int width = info->d_width;
   472         int extrawidth;
   473 
   474         /* do scalar until we can align... */
   475 #define ONE_PIXEL_BLEND(condition, widthvar) \
   476         while (condition) { \
   477             unsigned sR, sG, sB; \
   478             unsigned short Pixel = *((unsigned short *)src); \
   479             sR = (Pixel >> 7) & 0xf8; \
   480             sG = (Pixel >> 2) & 0xf8; \
   481             sB = (Pixel << 3) & 0xf8; \
   482             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   483             src += 2; \
   484             dst += 4; \
   485             widthvar--; \
   486         }
   487         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   488 
   489         /* After all that work, here's the vector part! */
   490         extrawidth = (width % 8);  /* trailing unaligned stores */
   491         width -= extrawidth;
   492         vsrc = vec_ld(0, src);
   493         valigner = VEC_ALIGNER(src);
   494 
   495         while (width) {
   496             vector unsigned short vR, vG, vB;
   497             vector unsigned char vdst1, vdst2;
   498 
   499             voverflow = vec_ld(15, src);
   500             vsrc = vec_perm(vsrc, voverflow, valigner);
   501 
   502             vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
   503             vB = vec_sl((vector unsigned short)vsrc, v3);
   504             vG = vec_sl(vB, v3);
   505 
   506             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   507             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   508             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   509             vdst1 = vec_perm(vdst1, valpha, vpermute);
   510             vec_st(vdst1, 0, dst);
   511 
   512             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   513             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   514             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   515             vdst2 = vec_perm(vdst2, valpha, vpermute);
   516             vec_st(vdst2, 16, dst);
   517             
   518             width -= 8;
   519             dst += 32;
   520             src += 16;
   521             vsrc = voverflow;
   522         }
   523 
   524         assert(width == 0);
   525 
   526 
   527         /* do scalar until we can align... */
   528         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   529 #undef ONE_PIXEL_BLEND
   530 
   531         src += srcskip;  /* move to next row, accounting for pitch. */
   532         dst += dstskip;
   533     }
   534 
   535 }
   536 
   537 static void BlitNtoNKey(SDL_BlitInfo *info);
   538 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
   539 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
   540 {
   541     int height = info->d_height;
   542     Uint32 *srcp = (Uint32 *) info->s_pixels;
   543     int srcskip = info->s_skip;
   544     Uint32 *dstp = (Uint32 *) info->d_pixels;
   545     int dstskip = info->d_skip;
   546     SDL_PixelFormat *srcfmt = info->src;
   547     int srcbpp = srcfmt->BytesPerPixel;
   548     SDL_PixelFormat *dstfmt = info->dst;
   549     int dstbpp = dstfmt->BytesPerPixel;
   550     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   551 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   552     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   553 	Uint32 ckey = info->src->colorkey;
   554     vector unsigned int valpha;
   555     vector unsigned char vpermute;
   556     vector unsigned char vzero;
   557     vector unsigned int vckey;
   558     vector unsigned int vrgbmask;
   559     vpermute = calc_swizzle32(srcfmt, dstfmt);
   560     if (info->d_width < 16) {
   561         if(copy_alpha) {
   562             return BlitNtoNKeyCopyAlpha(info);
   563         } else {
   564             return BlitNtoNKey(info);
   565         }
   566     }
   567     vzero = vec_splat_u8(0);
   568     if (alpha) {
   569         ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
   570         valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
   571     } else {
   572         valpha = (vector unsigned int)vzero;
   573     }
   574     ckey &= rgbmask;
   575     ((unsigned int *)(char*)&vckey)[0] = ckey;
   576     vckey = vec_splat(vckey, 0);
   577     ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
   578     vrgbmask = vec_splat(vrgbmask, 0);
   579 
   580     while (height--) {
   581 #define ONE_PIXEL_BLEND(condition, widthvar) \
   582         if (copy_alpha) { \
   583             while (condition) { \
   584                 Uint32 Pixel; \
   585                 unsigned sR, sG, sB, sA; \
   586                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   587                           sR, sG, sB, sA); \
   588                 if ( (Pixel & rgbmask) != ckey ) { \
   589                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   590                             sR, sG, sB, sA); \
   591                 } \
   592                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   593                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   594                 widthvar--; \
   595             } \
   596         } else { \
   597             while (condition) { \
   598                 Uint32 Pixel; \
   599                 unsigned sR, sG, sB; \
   600                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   601                 if ( Pixel != ckey ) { \
   602                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   603                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   604                               sR, sG, sB, alpha); \
   605                 } \
   606                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   607                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   608                 widthvar--; \
   609             } \
   610         }
   611         int width = info->d_width;
   612         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   613         assert(width > 0);
   614         if (width > 0) {
   615             int extrawidth = (width % 4);
   616             vector unsigned char valigner = VEC_ALIGNER(srcp);
   617             vector unsigned int vs = vec_ld(0, srcp);
   618             width -= extrawidth;
   619             assert(width >= 4);
   620             while (width) {
   621                 vector unsigned char vsel;
   622                 vector unsigned int vd;
   623                 vector unsigned int voverflow = vec_ld(15, srcp);
   624                 /* load the source vec */
   625                 vs = vec_perm(vs, voverflow, valigner);
   626                 /* vsel is set for items that match the key */
   627                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
   628                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
   629                 /* permute the src vec to the dest format */
   630                 vs = vec_perm(vs, valpha, vpermute);
   631                 /* load the destination vec */
   632                 vd = vec_ld(0, dstp);
   633                 /* select the source and dest into vs */
   634                 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
   635                 
   636                 vec_st(vd, 0, dstp);
   637                 srcp += 4;
   638                 width -= 4;
   639                 dstp += 4;
   640                 vs = voverflow;
   641             }
   642             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   643 #undef ONE_PIXEL_BLEND
   644             srcp += srcskip >> 2;
   645             dstp += dstskip >> 2;
   646         }
   647     }
   648 }
   649 
   650 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   651 /* Use this on a G5 */
   652 static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
   653 {
   654     int height = info->d_height;
   655     Uint32 *src = (Uint32 *) info->s_pixels;
   656     int srcskip = info->s_skip;
   657     Uint32 *dst = (Uint32 *) info->d_pixels;
   658     int dstskip = info->d_skip;
   659     SDL_PixelFormat *srcfmt = info->src;
   660     SDL_PixelFormat *dstfmt = info->dst;
   661     vector unsigned int vzero = vec_splat_u32(0);
   662     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   663     if (dstfmt->Amask && !srcfmt->Amask) {
   664         if (srcfmt->alpha) {
   665             vector unsigned char valpha;
   666             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   667             vzero = (vector unsigned int)vec_splat(valpha, 0);
   668         }
   669     }
   670 
   671     assert(srcfmt->BytesPerPixel == 4);
   672     assert(dstfmt->BytesPerPixel == 4);
   673 
   674     while (height--) {
   675         vector unsigned char valigner;
   676         vector unsigned int vbits;
   677         vector unsigned int voverflow;
   678         Uint32 bits;
   679         Uint8 r, g, b, a;
   680 
   681         int width = info->d_width;
   682         int extrawidth;
   683 
   684         /* do scalar until we can align... */
   685         while ((UNALIGNED_PTR(dst)) && (width)) {
   686             bits = *(src++);
   687             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   688             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   689             width--;
   690         }
   691 
   692         /* After all that work, here's the vector part! */
   693         extrawidth = (width % 4);
   694         width -= extrawidth;
   695         valigner = VEC_ALIGNER(src);
   696         vbits = vec_ld(0, src);
   697 
   698        while (width) {
   699             voverflow = vec_ld(15, src);
   700             src += 4;
   701             width -= 4;
   702             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   703             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   704             vec_st(vbits, 0, dst);  /* store it back out. */
   705             dst += 4;
   706             vbits = voverflow;
   707         }
   708 
   709         assert(width == 0);
   710 
   711         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   712         while (extrawidth) {
   713             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   714             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   715             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   716             extrawidth--;
   717         }
   718 
   719         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   720         dst += dstskip >> 2;
   721     }
   722 
   723 }
   724 
   725 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   726 /* Use this on a G4 */
   727 static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
   728 {
   729     const int scalar_dst_lead = sizeof (Uint32) * 4;
   730     const int vector_dst_lead = sizeof (Uint32) * 16;
   731 
   732     int height = info->d_height;
   733     Uint32 *src = (Uint32 *) info->s_pixels;
   734     int srcskip = info->s_skip;
   735     Uint32 *dst = (Uint32 *) info->d_pixels;
   736     int dstskip = info->d_skip;
   737     SDL_PixelFormat *srcfmt = info->src;
   738     SDL_PixelFormat *dstfmt = info->dst;
   739     vector unsigned int vzero = vec_splat_u32(0);
   740     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   741     if (dstfmt->Amask && !srcfmt->Amask) {
   742         if (srcfmt->alpha) {
   743             vector unsigned char valpha;
   744             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
   745             vzero = (vector unsigned int)vec_splat(valpha, 0);
   746         }
   747     }
   748 
   749     assert(srcfmt->BytesPerPixel == 4);
   750     assert(dstfmt->BytesPerPixel == 4);
   751 
   752     while (height--) {
   753         vector unsigned char valigner;
   754         vector unsigned int vbits;
   755         vector unsigned int voverflow;
   756         Uint32 bits;
   757         Uint8 r, g, b, a;
   758 
   759         int width = info->d_width;
   760         int extrawidth;
   761 
   762         /* do scalar until we can align... */
   763         while ((UNALIGNED_PTR(dst)) && (width)) {
   764             vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   765             vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   766             bits = *(src++);
   767             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   768             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   769             width--;
   770         }
   771 
   772         /* After all that work, here's the vector part! */
   773         extrawidth = (width % 4);
   774         width -= extrawidth;
   775         valigner = VEC_ALIGNER(src);
   776         vbits = vec_ld(0, src);
   777 
   778         while (width) {
   779             vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
   780             vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
   781             voverflow = vec_ld(15, src);
   782             src += 4;
   783             width -= 4;
   784             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
   785             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
   786             vec_st(vbits, 0, dst);  /* store it back out. */
   787             dst += 4;
   788             vbits = voverflow;
   789         }
   790         
   791         assert(width == 0);
   792 
   793         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   794         while (extrawidth) {
   795             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
   796             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   797             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   798             extrawidth--;
   799         }
   800 
   801         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
   802         dst += dstskip >> 2;
   803     }
   804 
   805     vec_dss(DST_CHAN_SRC);
   806     vec_dss(DST_CHAN_DEST);
   807 }
   808 
   809 static Uint32 GetBlitFeatures( void )
   810 {
   811     static Uint32 features = 0xffffffff;
   812     if (features == 0xffffffff) {
   813         /* Provide an override for testing .. */
   814         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   815         if (override) {
   816             features = 0;
   817             SDL_sscanf(override, "%u", &features);
   818         } else {
   819             features = ( 0
   820                 /* Feature 1 is has-MMX */
   821                 | ((SDL_HasMMX()) ? 1 : 0)
   822                 /* Feature 2 is has-AltiVec */
   823                 | ((SDL_HasAltiVec()) ? 2 : 0)
   824                 /* Feature 4 is dont-use-prefetch */
   825                 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   826                 | ((GetL3CacheSize() == 0) ? 4 : 0)
   827             );
   828         }
   829     }
   830     return features;
   831 }
   832 #else
   833 /* Feature 1 is has-MMX */
   834 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   835 #endif
   836 
   837 /* This is now endian dependent */
   838 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   839 #define HI	1
   840 #define LO	0
   841 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
   842 #define HI	0
   843 #define LO	1
   844 #endif
   845 
   846 #if SDL_HERMES_BLITTERS
   847 
   848 /* Heheheh, we coerce Hermes into using SDL blit information */
   849 #define X86_ASSEMBLER
   850 #define HermesConverterInterface	SDL_BlitInfo
   851 #define HermesClearInterface		void
   852 #define STACKCALL
   853 
   854 #include "../hermes/HeadMMX.h"
   855 #include "../hermes/HeadX86.h"
   856 
   857 #else
   858 
   859 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   860 #define RGB888_RGB332(dst, src) { \
   861 	dst = (Uint8)((((src)&0x00E00000)>>16)| \
   862 	              (((src)&0x0000E000)>>11)| \
   863 	              (((src)&0x000000C0)>>6)); \
   864 }
   865 static void Blit_RGB888_index8(SDL_BlitInfo *info)
   866 {
   867 #ifndef USE_DUFFS_LOOP
   868 	int c;
   869 #endif
   870 	int width, height;
   871 	Uint32 *src;
   872 	const Uint8 *map;
   873 	Uint8 *dst;
   874 	int srcskip, dstskip;
   875 
   876 	/* Set up some basic variables */
   877 	width = info->d_width;
   878 	height = info->d_height;
   879 	src = (Uint32 *)info->s_pixels;
   880 	srcskip = info->s_skip/4;
   881 	dst = info->d_pixels;
   882 	dstskip = info->d_skip;
   883 	map = info->table;
   884 
   885 	if ( map == NULL ) {
   886 		while ( height-- ) {
   887 #ifdef USE_DUFFS_LOOP
   888 			DUFFS_LOOP(
   889 				RGB888_RGB332(*dst++, *src);
   890 			, width);
   891 #else
   892 			for ( c=width/4; c; --c ) {
   893 				/* Pack RGB into 8bit pixel */
   894 				++src;
   895 				RGB888_RGB332(*dst++, *src);
   896 				++src;
   897 				RGB888_RGB332(*dst++, *src);
   898 				++src;
   899 				RGB888_RGB332(*dst++, *src);
   900 				++src;
   901 			}
   902 			switch ( width & 3 ) {
   903 				case 3:
   904 					RGB888_RGB332(*dst++, *src);
   905 					++src;
   906 				case 2:
   907 					RGB888_RGB332(*dst++, *src);
   908 					++src;
   909 				case 1:
   910 					RGB888_RGB332(*dst++, *src);
   911 					++src;
   912 			}
   913 #endif /* USE_DUFFS_LOOP */
   914 			src += srcskip;
   915 			dst += dstskip;
   916 		}
   917 	} else {
   918 		int Pixel;
   919 
   920 		while ( height-- ) {
   921 #ifdef USE_DUFFS_LOOP
   922 			DUFFS_LOOP(
   923 				RGB888_RGB332(Pixel, *src);
   924 				*dst++ = map[Pixel];
   925 				++src;
   926 			, width);
   927 #else
   928 			for ( c=width/4; c; --c ) {
   929 				/* Pack RGB into 8bit pixel */
   930 				RGB888_RGB332(Pixel, *src);
   931 				*dst++ = map[Pixel];
   932 				++src;
   933 				RGB888_RGB332(Pixel, *src);
   934 				*dst++ = map[Pixel];
   935 				++src;
   936 				RGB888_RGB332(Pixel, *src);
   937 				*dst++ = map[Pixel];
   938 				++src;
   939 				RGB888_RGB332(Pixel, *src);
   940 				*dst++ = map[Pixel];
   941 				++src;
   942 			}
   943 			switch ( width & 3 ) {
   944 				case 3:
   945 					RGB888_RGB332(Pixel, *src);
   946 					*dst++ = map[Pixel];
   947 					++src;
   948 				case 2:
   949 					RGB888_RGB332(Pixel, *src);
   950 					*dst++ = map[Pixel];
   951 					++src;
   952 				case 1:
   953 					RGB888_RGB332(Pixel, *src);
   954 					*dst++ = map[Pixel];
   955 					++src;
   956 			}
   957 #endif /* USE_DUFFS_LOOP */
   958 			src += srcskip;
   959 			dst += dstskip;
   960 		}
   961 	}
   962 }
   963 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   964 #define RGB888_RGB555(dst, src) { \
   965 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
   966 	                            (((*src)&0x0000F800)>>6)| \
   967 	                            (((*src)&0x000000F8)>>3)); \
   968 }
   969 #define RGB888_RGB555_TWO(dst, src) { \
   970 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   971 	                     (((src[HI])&0x0000F800)>>6)| \
   972 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   973 	                     (((src[LO])&0x00F80000)>>9)| \
   974 	                     (((src[LO])&0x0000F800)>>6)| \
   975 	                     (((src[LO])&0x000000F8)>>3); \
   976 }
   977 static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
   978 {
   979 #ifndef USE_DUFFS_LOOP
   980 	int c;
   981 #endif
   982 	int width, height;
   983 	Uint32 *src;
   984 	Uint16 *dst;
   985 	int srcskip, dstskip;
   986 
   987 	/* Set up some basic variables */
   988 	width = info->d_width;
   989 	height = info->d_height;
   990 	src = (Uint32 *)info->s_pixels;
   991 	srcskip = info->s_skip/4;
   992 	dst = (Uint16 *)info->d_pixels;
   993 	dstskip = info->d_skip/2;
   994 
   995 #ifdef USE_DUFFS_LOOP
   996 	while ( height-- ) {
   997 		DUFFS_LOOP(
   998 			RGB888_RGB555(dst, src);
   999 			++src;
  1000 			++dst;
  1001 		, width);
  1002 		src += srcskip;
  1003 		dst += dstskip;
  1004 	}
  1005 #else
  1006 	/* Memory align at 4-byte boundary, if necessary */
  1007 	if ( (long)dst & 0x03 ) {
  1008 		/* Don't do anything if width is 0 */
  1009 		if ( width == 0 ) {
  1010 			return;
  1011 		}
  1012 		--width;
  1013 
  1014 		while ( height-- ) {
  1015 			/* Perform copy alignment */
  1016 			RGB888_RGB555(dst, src);
  1017 			++src;
  1018 			++dst;
  1019 
  1020 			/* Copy in 4 pixel chunks */
  1021 			for ( c=width/4; c; --c ) {
  1022 				RGB888_RGB555_TWO(dst, src);
  1023 				src += 2;
  1024 				dst += 2;
  1025 				RGB888_RGB555_TWO(dst, src);
  1026 				src += 2;
  1027 				dst += 2;
  1028 			}
  1029 			/* Get any leftovers */
  1030 			switch (width & 3) {
  1031 				case 3:
  1032 					RGB888_RGB555(dst, src);
  1033 					++src;
  1034 					++dst;
  1035 				case 2:
  1036 					RGB888_RGB555_TWO(dst, src);
  1037 					src += 2;
  1038 					dst += 2;
  1039 					break;
  1040 				case 1:
  1041 					RGB888_RGB555(dst, src);
  1042 					++src;
  1043 					++dst;
  1044 					break;
  1045 			}
  1046 			src += srcskip;
  1047 			dst += dstskip;
  1048 		}
  1049 	} else { 
  1050 		while ( height-- ) {
  1051 			/* Copy in 4 pixel chunks */
  1052 			for ( c=width/4; c; --c ) {
  1053 				RGB888_RGB555_TWO(dst, src);
  1054 				src += 2;
  1055 				dst += 2;
  1056 				RGB888_RGB555_TWO(dst, src);
  1057 				src += 2;
  1058 				dst += 2;
  1059 			}
  1060 			/* Get any leftovers */
  1061 			switch (width & 3) {
  1062 				case 3:
  1063 					RGB888_RGB555(dst, src);
  1064 					++src;
  1065 					++dst;
  1066 				case 2:
  1067 					RGB888_RGB555_TWO(dst, src);
  1068 					src += 2;
  1069 					dst += 2;
  1070 					break;
  1071 				case 1:
  1072 					RGB888_RGB555(dst, src);
  1073 					++src;
  1074 					++dst;
  1075 					break;
  1076 			}
  1077 			src += srcskip;
  1078 			dst += dstskip;
  1079 		}
  1080 	}
  1081 #endif /* USE_DUFFS_LOOP */
  1082 }
  1083 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1084 #define RGB888_RGB565(dst, src) { \
  1085 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
  1086 	                            (((*src)&0x0000FC00)>>5)| \
  1087 	                            (((*src)&0x000000F8)>>3)); \
  1088 }
  1089 #define RGB888_RGB565_TWO(dst, src) { \
  1090 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1091 	                     (((src[HI])&0x0000FC00)>>5)| \
  1092 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1093 	                     (((src[LO])&0x00F80000)>>8)| \
  1094 	                     (((src[LO])&0x0000FC00)>>5)| \
  1095 	                     (((src[LO])&0x000000F8)>>3); \
  1096 }
  1097 static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
  1098 {
  1099 #ifndef USE_DUFFS_LOOP
  1100 	int c;
  1101 #endif
  1102 	int width, height;
  1103 	Uint32 *src;
  1104 	Uint16 *dst;
  1105 	int srcskip, dstskip;
  1106 
  1107 	/* Set up some basic variables */
  1108 	width = info->d_width;
  1109 	height = info->d_height;
  1110 	src = (Uint32 *)info->s_pixels;
  1111 	srcskip = info->s_skip/4;
  1112 	dst = (Uint16 *)info->d_pixels;
  1113 	dstskip = info->d_skip/2;
  1114 
  1115 #ifdef USE_DUFFS_LOOP
  1116 	while ( height-- ) {
  1117 		DUFFS_LOOP(
  1118 			RGB888_RGB565(dst, src);
  1119 			++src;
  1120 			++dst;
  1121 		, width);
  1122 		src += srcskip;
  1123 		dst += dstskip;
  1124 	}
  1125 #else
  1126 	/* Memory align at 4-byte boundary, if necessary */
  1127 	if ( (long)dst & 0x03 ) {
  1128 		/* Don't do anything if width is 0 */
  1129 		if ( width == 0 ) {
  1130 			return;
  1131 		}
  1132 		--width;
  1133 
  1134 		while ( height-- ) {
  1135 			/* Perform copy alignment */
  1136 			RGB888_RGB565(dst, src);
  1137 			++src;
  1138 			++dst;
  1139 
  1140 			/* Copy in 4 pixel chunks */
  1141 			for ( c=width/4; c; --c ) {
  1142 				RGB888_RGB565_TWO(dst, src);
  1143 				src += 2;
  1144 				dst += 2;
  1145 				RGB888_RGB565_TWO(dst, src);
  1146 				src += 2;
  1147 				dst += 2;
  1148 			}
  1149 			/* Get any leftovers */
  1150 			switch (width & 3) {
  1151 				case 3:
  1152 					RGB888_RGB565(dst, src);
  1153 					++src;
  1154 					++dst;
  1155 				case 2:
  1156 					RGB888_RGB565_TWO(dst, src);
  1157 					src += 2;
  1158 					dst += 2;
  1159 					break;
  1160 				case 1:
  1161 					RGB888_RGB565(dst, src);
  1162 					++src;
  1163 					++dst;
  1164 					break;
  1165 			}
  1166 			src += srcskip;
  1167 			dst += dstskip;
  1168 		}
  1169 	} else { 
  1170 		while ( height-- ) {
  1171 			/* Copy in 4 pixel chunks */
  1172 			for ( c=width/4; c; --c ) {
  1173 				RGB888_RGB565_TWO(dst, src);
  1174 				src += 2;
  1175 				dst += 2;
  1176 				RGB888_RGB565_TWO(dst, src);
  1177 				src += 2;
  1178 				dst += 2;
  1179 			}
  1180 			/* Get any leftovers */
  1181 			switch (width & 3) {
  1182 				case 3:
  1183 					RGB888_RGB565(dst, src);
  1184 					++src;
  1185 					++dst;
  1186 				case 2:
  1187 					RGB888_RGB565_TWO(dst, src);
  1188 					src += 2;
  1189 					dst += 2;
  1190 					break;
  1191 				case 1:
  1192 					RGB888_RGB565(dst, src);
  1193 					++src;
  1194 					++dst;
  1195 					break;
  1196 			}
  1197 			src += srcskip;
  1198 			dst += dstskip;
  1199 		}
  1200 	}
  1201 #endif /* USE_DUFFS_LOOP */
  1202 }
  1203 
  1204 #endif /* SDL_HERMES_BLITTERS */
  1205 
  1206 
  1207 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1208 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1209 static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
  1210 {
  1211 #ifndef USE_DUFFS_LOOP
  1212 	int c;
  1213 #endif
  1214 	int width, height;
  1215 	Uint8 *src;
  1216 	Uint32 *dst;
  1217 	int srcskip, dstskip;
  1218 
  1219 	/* Set up some basic variables */
  1220 	width = info->d_width;
  1221 	height = info->d_height;
  1222 	src = (Uint8 *)info->s_pixels;
  1223 	srcskip = info->s_skip;
  1224 	dst = (Uint32 *)info->d_pixels;
  1225 	dstskip = info->d_skip/4;
  1226 
  1227 #ifdef USE_DUFFS_LOOP
  1228 	while ( height-- ) {
  1229 		DUFFS_LOOP(
  1230 		{
  1231 			*dst++ = RGB565_32(dst, src, map);
  1232 			src += 2;
  1233 		},
  1234 		width);
  1235 		src += srcskip;
  1236 		dst += dstskip;
  1237 	}
  1238 #else
  1239 	while ( height-- ) {
  1240 		/* Copy in 4 pixel chunks */
  1241 		for ( c=width/4; c; --c ) {
  1242 			*dst++ = RGB565_32(dst, src, map);
  1243 			src += 2;
  1244 			*dst++ = RGB565_32(dst, src, map);
  1245 			src += 2;
  1246 			*dst++ = RGB565_32(dst, src, map);
  1247 			src += 2;
  1248 			*dst++ = RGB565_32(dst, src, map);
  1249 			src += 2;
  1250 		}
  1251 		/* Get any leftovers */
  1252 		switch (width & 3) {
  1253 			case 3:
  1254 				*dst++ = RGB565_32(dst, src, map);
  1255 				src += 2;
  1256 			case 2:
  1257 				*dst++ = RGB565_32(dst, src, map);
  1258 				src += 2;
  1259 			case 1:
  1260 				*dst++ = RGB565_32(dst, src, map);
  1261 				src += 2;
  1262 				break;
  1263 		}
  1264 		src += srcskip;
  1265 		dst += dstskip;
  1266 	}
  1267 #endif /* USE_DUFFS_LOOP */
  1268 }
  1269 
  1270 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1271 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1272 		0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1273 		0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1274 		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1275 		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1276 		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1277 		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1278 		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1279 		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1280 		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1281 		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1282 		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1283 		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1284 		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1285 		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1286 		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1287 		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1288 		0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1289 		0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1290 		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1291 		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1292 		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1293 		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1294 		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1295 		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1296 		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1297 		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1298 		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1299 		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1300 		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1301 		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1302 		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1303 		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1304 		0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1305 		0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1306 		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1307 		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1308 		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1309 		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1310 		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1311 		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1312 		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1313 		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1314 		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1315 		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1316 		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1317 		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1318 		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1319 		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1320 		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1321 		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1322 		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1323 		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1324 		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1325 		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1326 		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1327 		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1328 		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1329 		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1330 		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1331 		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1332 		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1333 		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1334 		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1335 		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1336 		0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1337 		0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1338 		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1339 		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1340 		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1341 		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1342 		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1343 		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1344 		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1345 		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1346 		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1347 		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1348 		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1349 		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1350 		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1351 		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1352 		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1353 		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1354 		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1355 		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1356 		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1357 		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1358 		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1359 		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1360 		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1361 		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1362 		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1363 		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1364 		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1365 		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1366 		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1367 		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1368 		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1369 		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1370 		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1371 		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1372 		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1373 		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1374 		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1375 		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1376 		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1377 		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1378 		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1379 		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1380 		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1381 		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1382 		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1383 		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1384 		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1385 		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1386 		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1387 		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1388 		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1389 		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1390 		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1391 		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1392 		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1393 		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1394 		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1395 		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1396 		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1397 		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1398 		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1399 		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1400 };
  1401 static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
  1402 {
  1403     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1404 }
  1405 
  1406 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1407 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1408 		0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1409 		0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1410 		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1411 		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1412 		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1413 		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1414 		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1415 		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1416 		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1417 		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1418 		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1419 		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1420 		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1421 		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1422 		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1423 		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1424 		0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1425 		0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1426 		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1427 		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1428 		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1429 		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1430 		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1431 		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1432 		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1433 		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1434 		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1435 		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1436 		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1437 		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1438 		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1439 		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1440 		0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1441 		0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1442 		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1443 		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1444 		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1445 		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1446 		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1447 		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1448 		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1449 		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1450 		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1451 		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1452 		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1453 		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1454 		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1455 		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1456 		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1457 		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1458 		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1459 		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1460 		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1461 		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1462 		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1463 		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1464 		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1465 		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1466 		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1467 		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1468 		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1469 		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1470 		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1471 		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1472 		0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1473 		0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1474 		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1475 		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1476 		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1477 		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1478 		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1479 		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1480 		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1481 		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1482 		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1483 		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1484 		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1485 		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1486 		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1487 		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1488 		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1489 		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1490 		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1491 		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1492 		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1493 		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1494 		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1495 		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1496 		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1497 		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1498 		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1499 		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1500 		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1501 		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1502 		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1503 		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1504 		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1505 		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1506 		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1507 		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1508 		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1509 		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1510 		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1511 		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1512 		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1513 		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1514 		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1515 		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1516 		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1517 		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1518 		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1519 		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1520 		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1521 		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1522 		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1523 		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1524 		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1525 		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1526 		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1527 		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1528 		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1529 		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1530 		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1531 		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1532 		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1533 		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1534 		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1535 		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1536 };
  1537 static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
  1538 {
  1539     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1540 }
  1541 
  1542 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1543 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1544 		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1545 		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1546 		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1547 		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1548 		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1549 		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1550 		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1551 		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1552 		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1553 		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1554 		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1555 		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1556 		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1557 		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1558 		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1559 		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1560 		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1561 		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1562 		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1563 		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1564 		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1565 		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1566 		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1567 		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1568 		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1569 		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1570 		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1571 		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1572 		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1573 		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1574 		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1575 		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1576 		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1577 		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1578 		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1579 		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1580 		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1581 		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1582 		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1583 		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1584 		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1585 		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1586 		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1587 		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1588 		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1589 		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1590 		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1591 		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1592 		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1593 		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1594 		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1595 		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1596 		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1597 		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1598 		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1599 		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1600 		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1601 		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1602 		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1603 		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1604 		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1605 		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1606 		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1607 		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1608 		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1609 		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1610 		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1611 		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1612 		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1613 		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1614 		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1615 		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1616 		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1617 		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1618 		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1619 		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1620 		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1621 		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1622 		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1623 		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1624 		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1625 		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1626 		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1627 		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1628 		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1629 		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1630 		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1631 		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1632 		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1633 		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1634 		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1635 		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1636 		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1637 		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1638 		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1639 		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1640 		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1641 		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1642 		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1643 		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1644 		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1645 		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1646 		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1647 		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1648 		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1649 		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1650 		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1651 		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1652 		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1653 		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1654 		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1655 		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1656 		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1657 		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1658 		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1659 		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1660 		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1661 		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1662 		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1663 		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1664 		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1665 		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1666 		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1667 		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1668 		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1669 		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1670 		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1671 		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1672 };
  1673 static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
  1674 {
  1675     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1676 }
  1677 
  1678 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1679 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1680 		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1681 		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1682 		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1683 		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1684 		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1685 		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1686 		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1687 		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1688 		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1689 		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1690 		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1691 		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1692 		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1693 		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1694 		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1695 		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1696 		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1697 		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1698 		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1699 		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1700 		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1701 		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1702 		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1703 		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1704 		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1705 		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1706 		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1707 		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1708 		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1709 		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1710 		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1711 		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1712 		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1713 		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1714 		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1715 		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1716 		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1717 		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1718 		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1719 		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1720 		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1721 		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1722 		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1723 		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1724 		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1725 		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1726 		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1727 		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1728 		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1729 		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1730 		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1731 		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1732 		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1733 		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1734 		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1735 		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1736 		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1737 		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1738 		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1739 		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1740 		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1741 		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1742 		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1743 		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1744 		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1745 		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1746 		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1747 		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1748 		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1749 		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1750 		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1751 		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1752 		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1753 		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1754 		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1755 		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1756 		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1757 		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1758 		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1759 		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1760 		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1761 		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1762 		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1763 		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1764 		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1765 		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1766 		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1767 		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1768 		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1769 		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1770 		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1771 		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1772 		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1773 		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1774 		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1775 		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1776 		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1777 		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1778 		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1779 		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1780 		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1781 		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1782 		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1783 		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1784 		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1785 		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1786 		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1787 		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1788 		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1789 		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1790 		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1791 		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1792 		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1793 		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1794 		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1795 		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1796 		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1797 		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1798 		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1799 		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1800 		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1801 		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1802 		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1803 		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1804 		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1805 		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1806 		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1807 		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1808 };
  1809 static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
  1810 {
  1811     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1812 }
  1813 
  1814 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1815 #ifndef RGB888_RGB332
  1816 #define RGB888_RGB332(dst, src) { \
  1817 	dst = (((src)&0x00E00000)>>16)| \
  1818 	      (((src)&0x0000E000)>>11)| \
  1819 	      (((src)&0x000000C0)>>6); \
  1820 }
  1821 #endif
  1822 static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
  1823 {
  1824 #ifndef USE_DUFFS_LOOP
  1825 	int c;
  1826 #endif
  1827 	int Pixel;
  1828 	int width, height;
  1829 	Uint32 *src;
  1830 	const Uint8 *map;
  1831 	Uint8 *dst;
  1832 	int srcskip, dstskip;
  1833 
  1834 	/* Set up some basic variables */
  1835 	width = info->d_width;
  1836 	height = info->d_height;
  1837 	src = (Uint32 *)info->s_pixels;
  1838 	srcskip = info->s_skip/4;
  1839 	dst = info->d_pixels;
  1840 	dstskip = info->d_skip;
  1841 	map = info->table;
  1842 
  1843 #ifdef USE_DUFFS_LOOP
  1844 	while ( height-- ) {
  1845 		DUFFS_LOOP(
  1846 			RGB888_RGB332(Pixel, *src);
  1847 			*dst++ = map[Pixel];
  1848 			++src;
  1849 		, width);
  1850 		src += srcskip;
  1851 		dst += dstskip;
  1852 	}
  1853 #else
  1854 	while ( height-- ) {
  1855 		for ( c=width/4; c; --c ) {
  1856 			/* Pack RGB into 8bit pixel */
  1857 			RGB888_RGB332(Pixel, *src);
  1858 			*dst++ = map[Pixel];
  1859 			++src;
  1860 			RGB888_RGB332(Pixel, *src);
  1861 			*dst++ = map[Pixel];
  1862 			++src;
  1863 			RGB888_RGB332(Pixel, *src);
  1864 			*dst++ = map[Pixel];
  1865 			++src;
  1866 			RGB888_RGB332(Pixel, *src);
  1867 			*dst++ = map[Pixel];
  1868 			++src;
  1869 		}
  1870 		switch ( width & 3 ) {
  1871 			case 3:
  1872 				RGB888_RGB332(Pixel, *src);
  1873 				*dst++ = map[Pixel];
  1874 				++src;
  1875 			case 2:
  1876 				RGB888_RGB332(Pixel, *src);
  1877 				*dst++ = map[Pixel];
  1878 				++src;
  1879 			case 1:
  1880 				RGB888_RGB332(Pixel, *src);
  1881 				*dst++ = map[Pixel];
  1882 				++src;
  1883 		}
  1884 		src += srcskip;
  1885 		dst += dstskip;
  1886 	}
  1887 #endif /* USE_DUFFS_LOOP */
  1888 }
  1889 static void BlitNto1(SDL_BlitInfo *info)
  1890 {
  1891 #ifndef USE_DUFFS_LOOP
  1892 	int c;
  1893 #endif
  1894 	int width, height;
  1895 	Uint8 *src;
  1896 	const Uint8 *map;
  1897 	Uint8 *dst;
  1898 	int srcskip, dstskip;
  1899 	int srcbpp;
  1900 	Uint32 Pixel;
  1901 	int  sR, sG, sB;
  1902 	SDL_PixelFormat *srcfmt;
  1903 
  1904 	/* Set up some basic variables */
  1905 	width = info->d_width;
  1906 	height = info->d_height;
  1907 	src = info->s_pixels;
  1908 	srcskip = info->s_skip;
  1909 	dst = info->d_pixels;
  1910 	dstskip = info->d_skip;
  1911 	map = info->table;
  1912 	srcfmt = info->src;
  1913 	srcbpp = srcfmt->BytesPerPixel;
  1914 
  1915 	if ( map == NULL ) {
  1916 		while ( height-- ) {
  1917 #ifdef USE_DUFFS_LOOP
  1918 			DUFFS_LOOP(
  1919 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1920 								sR, sG, sB);
  1921 				if ( 1 ) {
  1922 				  	/* Pack RGB into 8bit pixel */
  1923 				  	*dst = ((sR>>5)<<(3+2))|
  1924 					        ((sG>>5)<<(2)) |
  1925 					        ((sB>>6)<<(0)) ;
  1926 				}
  1927 				dst++;
  1928 				src += srcbpp;
  1929 			, width);
  1930 #else
  1931 			for ( c=width; c; --c ) {
  1932 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1933 								sR, sG, sB);
  1934 				if ( 1 ) {
  1935 				  	/* Pack RGB into 8bit pixel */
  1936 				  	*dst = ((sR>>5)<<(3+2))|
  1937 					        ((sG>>5)<<(2)) |
  1938 					        ((sB>>6)<<(0)) ;
  1939 				}
  1940 				dst++;
  1941 				src += srcbpp;
  1942 			}
  1943 #endif
  1944 			src += srcskip;
  1945 			dst += dstskip;
  1946 		}
  1947 	} else {
  1948 		while ( height-- ) {
  1949 #ifdef USE_DUFFS_LOOP
  1950 			DUFFS_LOOP(
  1951 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1952 								sR, sG, sB);
  1953 				if ( 1 ) {
  1954 				  	/* Pack RGB into 8bit pixel */
  1955 				  	*dst = map[((sR>>5)<<(3+2))|
  1956 						   ((sG>>5)<<(2))  |
  1957 						   ((sB>>6)<<(0))  ];
  1958 				}
  1959 				dst++;
  1960 				src += srcbpp;
  1961 			, width);
  1962 #else
  1963 			for ( c=width; c; --c ) {
  1964 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1965 								sR, sG, sB);
  1966 				if ( 1 ) {
  1967 				  	/* Pack RGB into 8bit pixel */
  1968 				  	*dst = map[((sR>>5)<<(3+2))|
  1969 						   ((sG>>5)<<(2))  |
  1970 						   ((sB>>6)<<(0))  ];
  1971 				}
  1972 				dst++;
  1973 				src += srcbpp;
  1974 			}
  1975 #endif /* USE_DUFFS_LOOP */
  1976 			src += srcskip;
  1977 			dst += dstskip;
  1978 		}
  1979 	}
  1980 }
  1981 
  1982 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  1983 static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
  1984 {
  1985 	int width = info->d_width;
  1986 	int height = info->d_height;
  1987 	Uint32 *src = (Uint32 *)info->s_pixels;
  1988 	int srcskip = info->s_skip;
  1989 	Uint32 *dst = (Uint32 *)info->d_pixels;
  1990 	int dstskip = info->d_skip;
  1991 	SDL_PixelFormat *srcfmt = info->src;
  1992 	SDL_PixelFormat *dstfmt = info->dst;
  1993 
  1994 	if (dstfmt->Amask) {
  1995 		/* RGB->RGBA, SET_ALPHA */
  1996 		Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
  1997 
  1998 		while ( height-- ) {
  1999 			DUFFS_LOOP(
  2000 			{
  2001 				*dst = *src | mask;
  2002 				++dst;
  2003 				++src;
  2004 			},
  2005 			width);
  2006 			src = (Uint32*)((Uint8*)src + srcskip);
  2007 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2008 		}
  2009 	} else {
  2010 		/* RGBA->RGB, NO_ALPHA */
  2011 		Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2012 
  2013 		while ( height-- ) {
  2014 			DUFFS_LOOP(
  2015 			{
  2016 				*dst = *src & mask;
  2017 				++dst;
  2018 				++src;
  2019 			},
  2020 			width);
  2021 			src = (Uint32*)((Uint8*)src + srcskip);
  2022 			dst = (Uint32*)((Uint8*)dst + dstskip);
  2023 		}
  2024 	}
  2025 }
  2026 
  2027 static void BlitNtoN(SDL_BlitInfo *info)
  2028 {
  2029 	int width = info->d_width;
  2030 	int height = info->d_height;
  2031 	Uint8 *src = info->s_pixels;
  2032 	int srcskip = info->s_skip;
  2033 	Uint8 *dst = info->d_pixels;
  2034 	int dstskip = info->d_skip;
  2035 	SDL_PixelFormat *srcfmt = info->src;
  2036 	int srcbpp = srcfmt->BytesPerPixel;
  2037 	SDL_PixelFormat *dstfmt = info->dst;
  2038 	int dstbpp = dstfmt->BytesPerPixel;
  2039 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2040 
  2041 	while ( height-- ) {
  2042 		DUFFS_LOOP(
  2043 		{
  2044 		        Uint32 Pixel;
  2045 			unsigned sR;
  2046 			unsigned sG;
  2047 			unsigned sB;
  2048 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2049 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2050 			dst += dstbpp;
  2051 			src += srcbpp;
  2052 		},
  2053 		width);
  2054 		src += srcskip;
  2055 		dst += dstskip;
  2056 	}
  2057 }
  2058 
  2059 static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
  2060 {
  2061 	int width = info->d_width;
  2062 	int height = info->d_height;
  2063 	Uint8 *src = info->s_pixels;
  2064 	int srcskip = info->s_skip;
  2065 	Uint8 *dst = info->d_pixels;
  2066 	int dstskip = info->d_skip;
  2067 	SDL_PixelFormat *srcfmt = info->src;
  2068 	int srcbpp = srcfmt->BytesPerPixel;
  2069 	SDL_PixelFormat *dstfmt = info->dst;
  2070 	int dstbpp = dstfmt->BytesPerPixel;
  2071 	int c;
  2072 
  2073 	/* FIXME: should map alpha to [0..255] correctly! */
  2074 	while ( height-- ) {
  2075 		for ( c=width; c; --c ) {
  2076 		        Uint32 Pixel;
  2077 			unsigned sR, sG, sB, sA;
  2078 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2079 				      sR, sG, sB, sA);
  2080 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2081 				      sR, sG, sB, sA);
  2082 			dst += dstbpp;
  2083 			src += srcbpp;
  2084 		}
  2085 		src += srcskip;
  2086 		dst += dstskip;
  2087 	}
  2088 }
  2089 
  2090 static void BlitNto1Key(SDL_BlitInfo *info)
  2091 {
  2092 	int width = info->d_width;
  2093 	int height = info->d_height;
  2094 	Uint8 *src = info->s_pixels;
  2095 	int srcskip = info->s_skip;
  2096 	Uint8 *dst = info->d_pixels;
  2097 	int dstskip = info->d_skip;
  2098 	SDL_PixelFormat *srcfmt = info->src;
  2099 	const Uint8 *palmap = info->table;
  2100 	Uint32 ckey = srcfmt->colorkey;
  2101 	Uint32 rgbmask = ~srcfmt->Amask;
  2102 	int srcbpp;
  2103 	Uint32 Pixel;
  2104 	unsigned sR, sG, sB;
  2105 
  2106 	/* Set up some basic variables */
  2107 	srcbpp = srcfmt->BytesPerPixel;
  2108 	ckey &= rgbmask;
  2109 
  2110 	if ( palmap == NULL ) {
  2111 		while ( height-- ) {
  2112 			DUFFS_LOOP(
  2113 			{
  2114 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2115 								sR, sG, sB);
  2116 				if ( (Pixel & rgbmask) != ckey ) {
  2117 				  	/* Pack RGB into 8bit pixel */
  2118 				  	*dst = (Uint8)(((sR>>5)<<(3+2))|
  2119 						           ((sG>>5)<<(2)) |
  2120 						           ((sB>>6)<<(0)));
  2121 				}
  2122 				dst++;
  2123 				src += srcbpp;
  2124 			},
  2125 			width);
  2126 			src += srcskip;
  2127 			dst += dstskip;
  2128 		}
  2129 	} else {
  2130 		while ( height-- ) {
  2131 			DUFFS_LOOP(
  2132 			{
  2133 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2134 								sR, sG, sB);
  2135 				if ( (Pixel & rgbmask) != ckey ) {
  2136 				  	/* Pack RGB into 8bit pixel */
  2137 				  	*dst = (Uint8)palmap[((sR>>5)<<(3+2))|
  2138 							             ((sG>>5)<<(2))  |
  2139 							             ((sB>>6)<<(0))  ];
  2140 				}
  2141 				dst++;
  2142 				src += srcbpp;
  2143 			},
  2144 			width);
  2145 			src += srcskip;
  2146 			dst += dstskip;
  2147 		}
  2148 	}
  2149 }
  2150 
  2151 static void Blit2to2Key(SDL_BlitInfo *info)
  2152 {
  2153 	int width = info->d_width;
  2154 	int height = info->d_height;
  2155 	Uint16 *srcp = (Uint16 *)info->s_pixels;
  2156 	int srcskip = info->s_skip;
  2157 	Uint16 *dstp = (Uint16 *)info->d_pixels;
  2158 	int dstskip = info->d_skip;
  2159 	Uint32 ckey = info->src->colorkey;
  2160 	Uint32 rgbmask = ~info->src->Amask;
  2161 
  2162 	/* Set up some basic variables */
  2163         srcskip /= 2;
  2164         dstskip /= 2;
  2165 	ckey &= rgbmask;
  2166 
  2167 	while ( height-- ) {
  2168 		DUFFS_LOOP(
  2169 		{
  2170 			if ( (*srcp & rgbmask) != ckey ) {
  2171 				*dstp = *srcp;
  2172 			}
  2173 			dstp++;
  2174 			srcp++;
  2175 		},
  2176 		width);
  2177 		srcp += srcskip;
  2178 		dstp += dstskip;
  2179 	}
  2180 }
  2181 
  2182 static void BlitNtoNKey(SDL_BlitInfo *info)
  2183 {
  2184 	int width = info->d_width;
  2185 	int height = info->d_height;
  2186 	Uint8 *src = info->s_pixels;
  2187 	int srcskip = info->s_skip;
  2188 	Uint8 *dst = info->d_pixels;
  2189 	int dstskip = info->d_skip;
  2190 	Uint32 ckey = info->src->colorkey;
  2191 	SDL_PixelFormat *srcfmt = info->src;
  2192 	SDL_PixelFormat *dstfmt = info->dst;
  2193 	int srcbpp = srcfmt->BytesPerPixel;
  2194 	int dstbpp = dstfmt->BytesPerPixel;
  2195 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2196 
  2197 	while ( height-- ) {
  2198 		DUFFS_LOOP(
  2199 		{
  2200 		        Uint32 Pixel;
  2201 			unsigned sR;
  2202 			unsigned sG;
  2203 			unsigned sB;
  2204 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2205 			if ( Pixel != ckey ) {
  2206 			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2207 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2208 					      sR, sG, sB, alpha);
  2209 			}
  2210 			dst += dstbpp;
  2211 			src += srcbpp;
  2212 		},
  2213 		width);
  2214 		src += srcskip;
  2215 		dst += dstskip;
  2216 	}
  2217 }
  2218 
  2219 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
  2220 {
  2221 	int width = info->d_width;
  2222 	int height = info->d_height;
  2223 	Uint8 *src = info->s_pixels;
  2224 	int srcskip = info->s_skip;
  2225 	Uint8 *dst = info->d_pixels;
  2226 	int dstskip = info->d_skip;
  2227 	Uint32 ckey = info->src->colorkey;
  2228 	SDL_PixelFormat *srcfmt = info->src;
  2229 	SDL_PixelFormat *dstfmt = info->dst;
  2230 	Uint32 rgbmask = ~srcfmt->Amask;
  2231 
  2232 	Uint8 srcbpp;
  2233 	Uint8 dstbpp;
  2234 	Uint32 Pixel;
  2235 	unsigned sR, sG, sB, sA;
  2236 
  2237 	/* Set up some basic variables */
  2238 	srcbpp = srcfmt->BytesPerPixel;
  2239 	dstbpp = dstfmt->BytesPerPixel;
  2240 	ckey &= rgbmask;
  2241 
  2242 	/* FIXME: should map alpha to [0..255] correctly! */
  2243 	while ( height-- ) {
  2244 		DUFFS_LOOP(
  2245 		{
  2246 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2247 				      sR, sG, sB, sA);
  2248 			if ( (Pixel & rgbmask) != ckey ) {
  2249 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2250 						sR, sG, sB, sA);
  2251 			}
  2252 			dst += dstbpp;
  2253 			src += srcbpp;
  2254 		},
  2255 		width);
  2256 		src += srcskip;
  2257 		dst += dstskip;
  2258 	}
  2259 }
  2260 
  2261 /* Normal N to N optimized blitters */
  2262 struct blit_table {
  2263 	Uint32 srcR, srcG, srcB;
  2264 	int dstbpp;
  2265 	Uint32 dstR, dstG, dstB;
  2266 	Uint32 blit_features;
  2267 	void *aux_data;
  2268 	SDL_loblit blitfunc;
  2269 	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
  2270 };
  2271 static const struct blit_table normal_blit_1[] = {
  2272 	/* Default for 8-bit RGB source, an invalid combination */
  2273 	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
  2274 };
  2275 static const struct blit_table normal_blit_2[] = {
  2276 #if SDL_HERMES_BLITTERS
  2277     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
  2278       0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
  2279     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
  2280       0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
  2281     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
  2282       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
  2283 #elif SDL_ALTIVEC_BLITTERS
  2284     /* has-altivec */
  2285     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2286       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2287     { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
  2288       2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2289 #endif
  2290     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
  2291       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
  2292     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2293       0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
  2294     { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2295       0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
  2296     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2297       0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
  2298 
  2299     /* Default for 16-bit RGB source, used if no other blitter matches */
  2300     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2301 };
  2302 static const struct blit_table normal_blit_3[] = {
  2303 	/* Default for 24-bit RGB source, never optimized */
  2304     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2305 };
  2306 static const struct blit_table normal_blit_4[] = {
  2307 #if SDL_HERMES_BLITTERS
  2308     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2309       1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
  2310     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2311       0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
  2312     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2313       1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
  2314     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
  2315       0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
  2316     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2317       1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
  2318     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2319       0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
  2320     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2321       1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
  2322     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
  2323       0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
  2324     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
  2325       0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
  2326     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
  2327       0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
  2328     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
  2329       0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
  2330     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
  2331       0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
  2332     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
  2333       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
  2334 #else
  2335 #if SDL_ALTIVEC_BLITTERS
  2336     /* has-altivec | dont-use-prefetch */
  2337     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2338       6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2339     /* has-altivec */
  2340     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
  2341       2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2342     /* has-altivec */
  2343     { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
  2344       2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
  2345 #endif
  2346     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
  2347       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
  2348     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
  2349       0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
  2350 #endif
  2351 	/* Default for 32-bit RGB source, used if no other blitter matches */
  2352 	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
  2353 };
  2354 static const struct blit_table *normal_blit[] = {
  2355 	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2356 };
  2357 
  2358 /* Mask matches table, or table entry is zero */
  2359 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2360 
  2361 SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
  2362 {
  2363 	struct private_swaccel *sdata;
  2364 	SDL_PixelFormat *srcfmt;
  2365 	SDL_PixelFormat *dstfmt;
  2366 	const struct blit_table *table;
  2367 	int which;
  2368 	SDL_loblit blitfun;
  2369 
  2370 	/* Set up data for choosing the blit */
  2371 	sdata = surface->map->sw_data;
  2372 	srcfmt = surface->format;
  2373 	dstfmt = surface->map->dst->format;
  2374 
  2375 	if ( blit_index & 2 ) {
  2376 	        /* alpha or alpha+colorkey */
  2377 	        return SDL_CalculateAlphaBlit(surface, blit_index);
  2378 	}
  2379 
  2380 	/* We don't support destinations less than 8-bits */
  2381 	if ( dstfmt->BitsPerPixel < 8 ) {
  2382 		return(NULL);
  2383 	}
  2384 	
  2385 	if(blit_index == 1) {
  2386 	    /* colorkey blit: Here we don't have too many options, mostly
  2387 	       because RLE is the preferred fast way to deal with this.
  2388 	       If a particular case turns out to be useful we'll add it. */
  2389 
  2390 	    if(srcfmt->BytesPerPixel == 2
  2391 	       && surface->map->identity)
  2392 		return Blit2to2Key;
  2393 	    else if(dstfmt->BytesPerPixel == 1)
  2394 		return BlitNto1Key;
  2395 	    else {
  2396 #if SDL_ALTIVEC_BLITTERS
  2397         if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
  2398             return Blit32to32KeyAltivec;
  2399         } else
  2400 #endif
  2401 
  2402 		if(srcfmt->Amask && dstfmt->Amask)
  2403 		    return BlitNtoNKeyCopyAlpha;
  2404 		else
  2405 		    return BlitNtoNKey;
  2406 	    }
  2407 	}
  2408 
  2409 	blitfun = NULL;
  2410 	if ( dstfmt->BitsPerPixel == 8 ) {
  2411 		/* We assume 8-bit destinations are palettized */
  2412 		if ( (srcfmt->BytesPerPixel == 4) &&
  2413 		     (srcfmt->Rmask == 0x00FF0000) &&
  2414 		     (srcfmt->Gmask == 0x0000FF00) &&
  2415 		     (srcfmt->Bmask == 0x000000FF) ) {
  2416 			if ( surface->map->table ) {
  2417 				blitfun = Blit_RGB888_index8_map;
  2418 			} else {
  2419 #if SDL_HERMES_BLITTERS
  2420 				sdata->aux_data = ConvertX86p32_8RGB332;
  2421 				blitfun = ConvertX86;
  2422 #else
  2423 				blitfun = Blit_RGB888_index8;
  2424 #endif
  2425 			}
  2426 		} else {
  2427 			blitfun = BlitNto1;
  2428 		}
  2429 	} else {
  2430 		/* Now the meat, choose the blitter we want */
  2431 		int a_need = 0;
  2432 		if(dstfmt->Amask)
  2433 		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2434 		table = normal_blit[srcfmt->BytesPerPixel-1];
  2435 		for ( which=0; table[which].dstbpp; ++which ) {
  2436 			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2437 			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2438 			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2439 			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2440 			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2441 			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2442 			    dstfmt->BytesPerPixel == table[which].dstbpp &&
  2443 			    (a_need & table[which].alpha) == a_need &&
  2444 			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
  2445 				break;
  2446 		}
  2447 		sdata->aux_data = table[which].aux_data;
  2448 		blitfun = table[which].blitfunc;
  2449 
  2450 		if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2451 			/* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2452 			if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
  2453 			     srcfmt->Rmask == dstfmt->Rmask &&
  2454 			     srcfmt->Gmask == dstfmt->Gmask &&
  2455 			     srcfmt->Bmask == dstfmt->Bmask ) {
  2456 				blitfun = Blit4to4MaskAlpha;
  2457 			} else if ( a_need == COPY_ALPHA ) {
  2458 			    blitfun = BlitNtoNCopyAlpha;
  2459 			}
  2460 		}
  2461 	}
  2462 
  2463 #ifdef DEBUG_ASM
  2464 #if SDL_HERMES_BLITTERS
  2465 	if ( blitfun == ConvertMMX )
  2466 		fprintf(stderr, "Using mmx blit\n");
  2467 	else
  2468 	if ( blitfun == ConvertX86 )
  2469 		fprintf(stderr, "Using asm blit\n");
  2470 	else
  2471 #endif
  2472 	if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
  2473 		fprintf(stderr, "Using C blit\n");
  2474 	else
  2475 		fprintf(stderr, "Using optimized C blit\n");
  2476 #endif /* DEBUG_ASM */
  2477 
  2478 	return(blitfun);
  2479 }