src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 27 Feb 2011 22:22:58 -0800
changeset 5407 40c9d744e595
parent 5389 24903690f48a
child 5439 3a778c6c0269
permissions -rw-r--r--
Fixed compiling AltiVec blitters
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2011 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #include "SDL_video.h"
    25 #include "SDL_endian.h"
    26 #include "SDL_cpuinfo.h"
    27 #include "SDL_blit.h"
    28 
    29 /* Functions to blit from N-bit surfaces to other surfaces */
    30 
    31 #if SDL_ALTIVEC_BLITTERS
    32 #define assert(X)
    33 #ifdef __MACOSX__
    34 #include <sys/sysctl.h>
    35 static size_t
    36 GetL3CacheSize(void)
    37 {
    38     const char key[] = "hw.l3cachesize";
    39     u_int64_t result = 0;
    40     size_t typeSize = sizeof(result);
    41 
    42 
    43     int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
    44     if (0 != err)
    45         return 0;
    46 
    47     return result;
    48 }
    49 #else
    50 static size_t
    51 GetL3CacheSize(void)
    52 {
    53     /* XXX: Just guess G4 */
    54     return 2097152;
    55 }
    56 #endif /* __MACOSX__ */
    57 
    58 #if (defined(__MACOSX__) && (__GNUC__ < 4))
    59 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    60         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    61 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    62         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    63 #else
    64 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    65         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    66 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    67         (vector unsigned short) { a,b,c,d,e,f,g,h }
    68 #endif
    69 
    70 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    71 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    72                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    73                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    74                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    75                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    76 
    77 #define MAKE8888(dstfmt, r, g, b, a)  \
    78     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    79       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    80       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    81       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    82 
    83 /*
    84  * Data Stream Touch...Altivec cache prefetching.
    85  *
    86  *  Don't use this on a G5...however, the speed boost is very significant
    87  *   on a G4.
    88  */
    89 #define DST_CHAN_SRC 1
    90 #define DST_CHAN_DEST 2
    91 
    92 /* macro to set DST control word value... */
    93 #define DST_CTRL(size, count, stride) \
    94     (((size) << 24) | ((count) << 16) | (stride))
    95 
    96 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    97     ? vec_lvsl(0, src) \
    98     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
    99 
   100 /* Calculate the permute vector used for 32->32 swizzling */
   101 static vector unsigned char
   102 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
   103 {
   104     /*
   105      * We have to assume that the bits that aren't used by other
   106      *  colors is alpha, and it's one complete byte, since some formats
   107      *  leave alpha with a zero mask, but we should still swizzle the bits.
   108      */
   109     /* ARGB */
   110     const static const struct SDL_PixelFormat default_pixel_format = {
   111         0, NULL, 0, 0,
   112         0, 0, 0, 0,
   113         16, 8, 0, 24,
   114         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   115         0, NULL
   116     };
   117     if (!srcfmt) {
   118         srcfmt = &default_pixel_format;
   119     }
   120     if (!dstfmt) {
   121         dstfmt = &default_pixel_format;
   122     }
   123     const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
   124                                                        0x04, 0x04, 0x04, 0x04,
   125                                                        0x08, 0x08, 0x08, 0x08,
   126                                                        0x0C, 0x0C, 0x0C,
   127                                                        0x0C);
   128     vector unsigned char vswiz;
   129     vector unsigned int srcvec;
   130 #define RESHIFT(X) (3 - ((X) >> 3))
   131     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   132     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   133     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   134     Uint32 amask;
   135     /* Use zero for alpha if either surface doesn't have alpha */
   136     if (dstfmt->Amask) {
   137         amask =
   138             ((srcfmt->Amask) ? RESHIFT(srcfmt->
   139                                        Ashift) : 0x10) << (dstfmt->Ashift);
   140     } else {
   141         amask =
   142             0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
   143                           0xFFFFFFFF);
   144     }
   145 #undef RESHIFT
   146     ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
   147     vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
   148     return (vswiz);
   149 }
   150 
   151 static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
   152 static void
   153 Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
   154 {
   155     int height = info->dst_h;
   156     Uint8 *src = (Uint8 *) info->src;
   157     int srcskip = info->src_skip;
   158     Uint8 *dst = (Uint8 *) info->dst;
   159     int dstskip = info->dst_skip;
   160     SDL_PixelFormat *srcfmt = info->src_fmt;
   161     vector unsigned char valpha = vec_splat_u8(0);
   162     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   163     vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
   164                                                     0x00, 0x0a, 0x00, 0x0e,
   165                                                     0x00, 0x12, 0x00, 0x16,
   166                                                     0x00, 0x1a, 0x00, 0x1e);
   167     vector unsigned short v1 = vec_splat_u16(1);
   168     vector unsigned short v3 = vec_splat_u16(3);
   169     vector unsigned short v3f =
   170         VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
   171                           0x003f, 0x003f, 0x003f, 0x003f);
   172     vector unsigned short vfc =
   173         VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
   174                           0x00fc, 0x00fc, 0x00fc, 0x00fc);
   175     vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
   176     vf800 = vec_sl(vf800, vec_splat_u16(8));
   177 
   178     while (height--) {
   179         vector unsigned char valigner;
   180         vector unsigned char voverflow;
   181         vector unsigned char vsrc;
   182 
   183         int width = info->dst_w;
   184         int extrawidth;
   185 
   186         /* do scalar until we can align... */
   187 #define ONE_PIXEL_BLEND(condition, widthvar) \
   188         while (condition) { \
   189             Uint32 Pixel; \
   190             unsigned sR, sG, sB, sA; \
   191             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   192                           sR, sG, sB, sA); \
   193             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   194                                 ((sG << 3) & 0x000007E0) | \
   195                                 ((sB >> 3) & 0x0000001F)); \
   196             dst += 2; \
   197             src += 4; \
   198             widthvar--; \
   199         }
   200 
   201         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   202 
   203         /* After all that work, here's the vector part! */
   204         extrawidth = (width % 8);       /* trailing unaligned stores */
   205         width -= extrawidth;
   206         vsrc = vec_ld(0, src);
   207         valigner = VEC_ALIGNER(src);
   208 
   209         while (width) {
   210             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   211             vector unsigned int vsrc1, vsrc2;
   212             vector unsigned char vdst;
   213 
   214             voverflow = vec_ld(15, src);
   215             vsrc = vec_perm(vsrc, voverflow, valigner);
   216             vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   217             src += 16;
   218             vsrc = voverflow;
   219             voverflow = vec_ld(15, src);
   220             vsrc = vec_perm(vsrc, voverflow, valigner);
   221             vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   222             /* 1555 */
   223             vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
   224             vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
   225             vgpixel = vec_and(vgpixel, vfc);
   226             vgpixel = vec_sl(vgpixel, v3);
   227             vrpixel = vec_sl(vpixel, v1);
   228             vrpixel = vec_and(vrpixel, vf800);
   229             vbpixel = vec_and(vpixel, v3f);
   230             vdst =
   231                 vec_or((vector unsigned char) vrpixel,
   232                        (vector unsigned char) vgpixel);
   233             /* 565 */
   234             vdst = vec_or(vdst, (vector unsigned char) vbpixel);
   235             vec_st(vdst, 0, dst);
   236 
   237             width -= 8;
   238             src += 16;
   239             dst += 16;
   240             vsrc = voverflow;
   241         }
   242 
   243         assert(width == 0);
   244 
   245         /* do scalar until we can align... */
   246         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   247 #undef ONE_PIXEL_BLEND
   248 
   249         src += srcskip;         /* move to next row, accounting for pitch. */
   250         dst += dstskip;
   251     }
   252 
   253 
   254 }
   255 
   256 static void
   257 Blit_RGB565_32Altivec(SDL_BlitInfo * info)
   258 {
   259     int height = info->dst_h;
   260     Uint8 *src = (Uint8 *) info->src;
   261     int srcskip = info->src_skip;
   262     Uint8 *dst = (Uint8 *) info->dst;
   263     int dstskip = info->dst_skip;
   264     SDL_PixelFormat *srcfmt = info->src_fmt;
   265     SDL_PixelFormat *dstfmt = info->dst_fmt;
   266     unsigned alpha;
   267     vector unsigned char valpha;
   268     vector unsigned char vpermute;
   269     vector unsigned short vf800;
   270     vector unsigned int v8 = vec_splat_u32(8);
   271     vector unsigned int v16 = vec_add(v8, v8);
   272     vector unsigned short v2 = vec_splat_u16(2);
   273     vector unsigned short v3 = vec_splat_u16(3);
   274     /* 
   275        0x10 - 0x1f is the alpha
   276        0x00 - 0x0e evens are the red
   277        0x01 - 0x0f odds are zero
   278      */
   279     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   280                                                        0x10, 0x02, 0x01, 0x01,
   281                                                        0x10, 0x04, 0x01, 0x01,
   282                                                        0x10, 0x06, 0x01,
   283                                                        0x01);
   284     vector unsigned char vredalpha2 =
   285         (vector unsigned
   286          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   287         );
   288     /*
   289        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   290        0x11 - 0x0f odds are blue
   291      */
   292     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   293                                                    0x04, 0x05, 0x06, 0x13,
   294                                                    0x08, 0x09, 0x0a, 0x15,
   295                                                    0x0c, 0x0d, 0x0e, 0x17);
   296     vector unsigned char vblue2 =
   297         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   298         );
   299     /*
   300        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   301        0x10 - 0x0e evens are green
   302      */
   303     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   304                                                     0x04, 0x05, 0x12, 0x07,
   305                                                     0x08, 0x09, 0x14, 0x0b,
   306                                                     0x0c, 0x0d, 0x16, 0x0f);
   307     vector unsigned char vgreen2 =
   308         (vector unsigned
   309          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   310         );
   311 
   312 
   313     assert(srcfmt->BytesPerPixel == 2);
   314     assert(dstfmt->BytesPerPixel == 4);
   315 
   316     vf800 = (vector unsigned short) vec_splat_u8(-7);
   317     vf800 = vec_sl(vf800, vec_splat_u16(8));
   318 
   319     if (dstfmt->Amask && info->a) {
   320         ((unsigned char *) &valpha)[0] = alpha = info->a;
   321         valpha = vec_splat(valpha, 0);
   322     } else {
   323         alpha = 0;
   324         valpha = vec_splat_u8(0);
   325     }
   326 
   327     vpermute = calc_swizzle32(NULL, dstfmt);
   328     while (height--) {
   329         vector unsigned char valigner;
   330         vector unsigned char voverflow;
   331         vector unsigned char vsrc;
   332 
   333         int width = info->dst_w;
   334         int extrawidth;
   335 
   336         /* do scalar until we can align... */
   337 #define ONE_PIXEL_BLEND(condition, widthvar) \
   338         while (condition) { \
   339             unsigned sR, sG, sB; \
   340             unsigned short Pixel = *((unsigned short *)src); \
   341             sR = (Pixel >> 8) & 0xf8; \
   342             sG = (Pixel >> 3) & 0xfc; \
   343             sB = (Pixel << 3) & 0xf8; \
   344             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   345             src += 2; \
   346             dst += 4; \
   347             widthvar--; \
   348         }
   349         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   350 
   351         /* After all that work, here's the vector part! */
   352         extrawidth = (width % 8);       /* trailing unaligned stores */
   353         width -= extrawidth;
   354         vsrc = vec_ld(0, src);
   355         valigner = VEC_ALIGNER(src);
   356 
   357         while (width) {
   358             vector unsigned short vR, vG, vB;
   359             vector unsigned char vdst1, vdst2;
   360 
   361             voverflow = vec_ld(15, src);
   362             vsrc = vec_perm(vsrc, voverflow, valigner);
   363 
   364             vR = vec_and((vector unsigned short) vsrc, vf800);
   365             vB = vec_sl((vector unsigned short) vsrc, v3);
   366             vG = vec_sl(vB, v2);
   367 
   368             vdst1 =
   369                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   370                                                 valpha, vredalpha1);
   371             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   372             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   373             vdst1 = vec_perm(vdst1, valpha, vpermute);
   374             vec_st(vdst1, 0, dst);
   375 
   376             vdst2 =
   377                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   378                                                 valpha, vredalpha2);
   379             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   380             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   381             vdst2 = vec_perm(vdst2, valpha, vpermute);
   382             vec_st(vdst2, 16, dst);
   383 
   384             width -= 8;
   385             dst += 32;
   386             src += 16;
   387             vsrc = voverflow;
   388         }
   389 
   390         assert(width == 0);
   391 
   392 
   393         /* do scalar until we can align... */
   394         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   395 #undef ONE_PIXEL_BLEND
   396 
   397         src += srcskip;         /* move to next row, accounting for pitch. */
   398         dst += dstskip;
   399     }
   400 
   401 }
   402 
   403 
   404 static void
   405 Blit_RGB555_32Altivec(SDL_BlitInfo * info)
   406 {
   407     int height = info->dst_h;
   408     Uint8 *src = (Uint8 *) info->src;
   409     int srcskip = info->src_skip;
   410     Uint8 *dst = (Uint8 *) info->dst;
   411     int dstskip = info->dst_skip;
   412     SDL_PixelFormat *srcfmt = info->src_fmt;
   413     SDL_PixelFormat *dstfmt = info->dst_fmt;
   414     unsigned alpha;
   415     vector unsigned char valpha;
   416     vector unsigned char vpermute;
   417     vector unsigned short vf800;
   418     vector unsigned int v8 = vec_splat_u32(8);
   419     vector unsigned int v16 = vec_add(v8, v8);
   420     vector unsigned short v1 = vec_splat_u16(1);
   421     vector unsigned short v3 = vec_splat_u16(3);
   422     /* 
   423        0x10 - 0x1f is the alpha
   424        0x00 - 0x0e evens are the red
   425        0x01 - 0x0f odds are zero
   426      */
   427     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   428                                                        0x10, 0x02, 0x01, 0x01,
   429                                                        0x10, 0x04, 0x01, 0x01,
   430                                                        0x10, 0x06, 0x01,
   431                                                        0x01);
   432     vector unsigned char vredalpha2 =
   433         (vector unsigned
   434          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   435         );
   436     /*
   437        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   438        0x11 - 0x0f odds are blue
   439      */
   440     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   441                                                    0x04, 0x05, 0x06, 0x13,
   442                                                    0x08, 0x09, 0x0a, 0x15,
   443                                                    0x0c, 0x0d, 0x0e, 0x17);
   444     vector unsigned char vblue2 =
   445         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   446         );
   447     /*
   448        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   449        0x10 - 0x0e evens are green
   450      */
   451     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   452                                                     0x04, 0x05, 0x12, 0x07,
   453                                                     0x08, 0x09, 0x14, 0x0b,
   454                                                     0x0c, 0x0d, 0x16, 0x0f);
   455     vector unsigned char vgreen2 =
   456         (vector unsigned
   457          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   458         );
   459 
   460 
   461     assert(srcfmt->BytesPerPixel == 2);
   462     assert(dstfmt->BytesPerPixel == 4);
   463 
   464     vf800 = (vector unsigned short) vec_splat_u8(-7);
   465     vf800 = vec_sl(vf800, vec_splat_u16(8));
   466 
   467     if (dstfmt->Amask && info->a) {
   468         ((unsigned char *) &valpha)[0] = alpha = info->a;
   469         valpha = vec_splat(valpha, 0);
   470     } else {
   471         alpha = 0;
   472         valpha = vec_splat_u8(0);
   473     }
   474 
   475     vpermute = calc_swizzle32(NULL, dstfmt);
   476     while (height--) {
   477         vector unsigned char valigner;
   478         vector unsigned char voverflow;
   479         vector unsigned char vsrc;
   480 
   481         int width = info->dst_w;
   482         int extrawidth;
   483 
   484         /* do scalar until we can align... */
   485 #define ONE_PIXEL_BLEND(condition, widthvar) \
   486         while (condition) { \
   487             unsigned sR, sG, sB; \
   488             unsigned short Pixel = *((unsigned short *)src); \
   489             sR = (Pixel >> 7) & 0xf8; \
   490             sG = (Pixel >> 2) & 0xf8; \
   491             sB = (Pixel << 3) & 0xf8; \
   492             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   493             src += 2; \
   494             dst += 4; \
   495             widthvar--; \
   496         }
   497         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   498 
   499         /* After all that work, here's the vector part! */
   500         extrawidth = (width % 8);       /* trailing unaligned stores */
   501         width -= extrawidth;
   502         vsrc = vec_ld(0, src);
   503         valigner = VEC_ALIGNER(src);
   504 
   505         while (width) {
   506             vector unsigned short vR, vG, vB;
   507             vector unsigned char vdst1, vdst2;
   508 
   509             voverflow = vec_ld(15, src);
   510             vsrc = vec_perm(vsrc, voverflow, valigner);
   511 
   512             vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
   513             vB = vec_sl((vector unsigned short) vsrc, v3);
   514             vG = vec_sl(vB, v3);
   515 
   516             vdst1 =
   517                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   518                                                 valpha, vredalpha1);
   519             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   520             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   521             vdst1 = vec_perm(vdst1, valpha, vpermute);
   522             vec_st(vdst1, 0, dst);
   523 
   524             vdst2 =
   525                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   526                                                 valpha, vredalpha2);
   527             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   528             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   529             vdst2 = vec_perm(vdst2, valpha, vpermute);
   530             vec_st(vdst2, 16, dst);
   531 
   532             width -= 8;
   533             dst += 32;
   534             src += 16;
   535             vsrc = voverflow;
   536         }
   537 
   538         assert(width == 0);
   539 
   540 
   541         /* do scalar until we can align... */
   542         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   543 #undef ONE_PIXEL_BLEND
   544 
   545         src += srcskip;         /* move to next row, accounting for pitch. */
   546         dst += dstskip;
   547     }
   548 
   549 }
   550 
   551 static void BlitNtoNKey(SDL_BlitInfo * info);
   552 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
   553 static void
   554 Blit32to32KeyAltivec(SDL_BlitInfo * info)
   555 {
   556     int height = info->dst_h;
   557     Uint32 *srcp = (Uint32 *) info->src;
   558     int srcskip = info->src_skip / 4;
   559     Uint32 *dstp = (Uint32 *) info->dst;
   560     int dstskip = info->dst_skip / 4;
   561     SDL_PixelFormat *srcfmt = info->src_fmt;
   562     int srcbpp = srcfmt->BytesPerPixel;
   563     SDL_PixelFormat *dstfmt = info->dst_fmt;
   564     int dstbpp = dstfmt->BytesPerPixel;
   565     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   566     unsigned alpha = dstfmt->Amask ? info->a : 0;
   567     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   568     Uint32 ckey = info->colorkey;
   569     vector unsigned int valpha;
   570     vector unsigned char vpermute;
   571     vector unsigned char vzero;
   572     vector unsigned int vckey;
   573     vector unsigned int vrgbmask;
   574     vpermute = calc_swizzle32(srcfmt, dstfmt);
   575     if (info->dst_w < 16) {
   576         if (copy_alpha) {
   577             BlitNtoNKeyCopyAlpha(info);
   578         } else {
   579             BlitNtoNKey(info);
   580         }
   581         return;
   582     }
   583     vzero = vec_splat_u8(0);
   584     if (alpha) {
   585         ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
   586         valpha =
   587             (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
   588     } else {
   589         valpha = (vector unsigned int) vzero;
   590     }
   591     ckey &= rgbmask;
   592     ((unsigned int *) (char *) &vckey)[0] = ckey;
   593     vckey = vec_splat(vckey, 0);
   594     ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
   595     vrgbmask = vec_splat(vrgbmask, 0);
   596 
   597     while (height--) {
   598 #define ONE_PIXEL_BLEND(condition, widthvar) \
   599         if (copy_alpha) { \
   600             while (condition) { \
   601                 Uint32 Pixel; \
   602                 unsigned sR, sG, sB, sA; \
   603                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   604                           sR, sG, sB, sA); \
   605                 if ( (Pixel & rgbmask) != ckey ) { \
   606                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   607                             sR, sG, sB, sA); \
   608                 } \
   609                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   610                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   611                 widthvar--; \
   612             } \
   613         } else { \
   614             while (condition) { \
   615                 Uint32 Pixel; \
   616                 unsigned sR, sG, sB; \
   617                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   618                 if ( Pixel != ckey ) { \
   619                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   620                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   621                               sR, sG, sB, alpha); \
   622                 } \
   623                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   624                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   625                 widthvar--; \
   626             } \
   627         }
   628         int width = info->dst_w;
   629         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   630         assert(width > 0);
   631         if (width > 0) {
   632             int extrawidth = (width % 4);
   633             vector unsigned char valigner = VEC_ALIGNER(srcp);
   634             vector unsigned int vs = vec_ld(0, srcp);
   635             width -= extrawidth;
   636             assert(width >= 4);
   637             while (width) {
   638                 vector unsigned char vsel;
   639                 vector unsigned int vd;
   640                 vector unsigned int voverflow = vec_ld(15, srcp);
   641                 /* load the source vec */
   642                 vs = vec_perm(vs, voverflow, valigner);
   643                 /* vsel is set for items that match the key */
   644                 vsel = (vector unsigned char) vec_and(vs, vrgbmask);
   645                 vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
   646                 /* permute the src vec to the dest format */
   647                 vs = vec_perm(vs, valpha, vpermute);
   648                 /* load the destination vec */
   649                 vd = vec_ld(0, dstp);
   650                 /* select the source and dest into vs */
   651                 vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
   652                                                    (vector unsigned char) vd,
   653                                                    vsel);
   654 
   655                 vec_st(vd, 0, dstp);
   656                 srcp += 4;
   657                 width -= 4;
   658                 dstp += 4;
   659                 vs = voverflow;
   660             }
   661             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   662 #undef ONE_PIXEL_BLEND
   663             srcp += srcskip;
   664             dstp += dstskip;
   665         }
   666     }
   667 }
   668 
   669 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   670 /* Use this on a G5 */
   671 static void
   672 ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
   673 {
   674     int height = info->dst_h;
   675     Uint32 *src = (Uint32 *) info->src;
   676     int srcskip = info->src_skip / 4;
   677     Uint32 *dst = (Uint32 *) info->dst;
   678     int dstskip = info->dst_skip / 4;
   679     SDL_PixelFormat *srcfmt = info->src_fmt;
   680     SDL_PixelFormat *dstfmt = info->dst_fmt;
   681     vector unsigned int vzero = vec_splat_u32(0);
   682     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   683     if (dstfmt->Amask && !srcfmt->Amask) {
   684         if (info->a) {
   685             vector unsigned char valpha;
   686             ((unsigned char *) &valpha)[0] = info->a;
   687             vzero = (vector unsigned int) vec_splat(valpha, 0);
   688         }
   689     }
   690 
   691     assert(srcfmt->BytesPerPixel == 4);
   692     assert(dstfmt->BytesPerPixel == 4);
   693 
   694     while (height--) {
   695         vector unsigned char valigner;
   696         vector unsigned int vbits;
   697         vector unsigned int voverflow;
   698         Uint32 bits;
   699         Uint8 r, g, b, a;
   700 
   701         int width = info->dst_w;
   702         int extrawidth;
   703 
   704         /* do scalar until we can align... */
   705         while ((UNALIGNED_PTR(dst)) && (width)) {
   706             bits = *(src++);
   707             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   708             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   709             width--;
   710         }
   711 
   712         /* After all that work, here's the vector part! */
   713         extrawidth = (width % 4);
   714         width -= extrawidth;
   715         valigner = VEC_ALIGNER(src);
   716         vbits = vec_ld(0, src);
   717 
   718         while (width) {
   719             voverflow = vec_ld(15, src);
   720             src += 4;
   721             width -= 4;
   722             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   723             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   724             vec_st(vbits, 0, dst);      /* store it back out. */
   725             dst += 4;
   726             vbits = voverflow;
   727         }
   728 
   729         assert(width == 0);
   730 
   731         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   732         while (extrawidth) {
   733             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   734             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   735             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   736             extrawidth--;
   737         }
   738 
   739         src += srcskip;
   740         dst += dstskip;
   741     }
   742 
   743 }
   744 
   745 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   746 /* Use this on a G4 */
   747 static void
   748 ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
   749 {
   750     const int scalar_dst_lead = sizeof(Uint32) * 4;
   751     const int vector_dst_lead = sizeof(Uint32) * 16;
   752 
   753     int height = info->dst_h;
   754     Uint32 *src = (Uint32 *) info->src;
   755     int srcskip = info->src_skip / 4;
   756     Uint32 *dst = (Uint32 *) info->dst;
   757     int dstskip = info->dst_skip / 4;
   758     SDL_PixelFormat *srcfmt = info->src_fmt;
   759     SDL_PixelFormat *dstfmt = info->dst_fmt;
   760     vector unsigned int vzero = vec_splat_u32(0);
   761     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   762     if (dstfmt->Amask && !srcfmt->Amask) {
   763         if (info->a) {
   764             vector unsigned char valpha;
   765             ((unsigned char *) &valpha)[0] = info->a;
   766             vzero = (vector unsigned int) vec_splat(valpha, 0);
   767         }
   768     }
   769 
   770     assert(srcfmt->BytesPerPixel == 4);
   771     assert(dstfmt->BytesPerPixel == 4);
   772 
   773     while (height--) {
   774         vector unsigned char valigner;
   775         vector unsigned int vbits;
   776         vector unsigned int voverflow;
   777         Uint32 bits;
   778         Uint8 r, g, b, a;
   779 
   780         int width = info->dst_w;
   781         int extrawidth;
   782 
   783         /* do scalar until we can align... */
   784         while ((UNALIGNED_PTR(dst)) && (width)) {
   785             vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   786                      DST_CHAN_SRC);
   787             vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   788                       DST_CHAN_DEST);
   789             bits = *(src++);
   790             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   791             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   792             width--;
   793         }
   794 
   795         /* After all that work, here's the vector part! */
   796         extrawidth = (width % 4);
   797         width -= extrawidth;
   798         valigner = VEC_ALIGNER(src);
   799         vbits = vec_ld(0, src);
   800 
   801         while (width) {
   802             vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
   803                      DST_CHAN_SRC);
   804             vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
   805                       DST_CHAN_DEST);
   806             voverflow = vec_ld(15, src);
   807             src += 4;
   808             width -= 4;
   809             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   810             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   811             vec_st(vbits, 0, dst);      /* store it back out. */
   812             dst += 4;
   813             vbits = voverflow;
   814         }
   815 
   816         assert(width == 0);
   817 
   818         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   819         while (extrawidth) {
   820             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   821             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   822             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   823             extrawidth--;
   824         }
   825 
   826         src += srcskip;
   827         dst += dstskip;
   828     }
   829 
   830     vec_dss(DST_CHAN_SRC);
   831     vec_dss(DST_CHAN_DEST);
   832 }
   833 
   834 static Uint32
   835 GetBlitFeatures(void)
   836 {
   837     static Uint32 features = 0xffffffff;
   838     if (features == 0xffffffff) {
   839         /* Provide an override for testing .. */
   840         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   841         if (override) {
   842             features = 0;
   843             SDL_sscanf(override, "%u", &features);
   844         } else {
   845             features = (0
   846                         /* Feature 1 is has-MMX */
   847                         | ((SDL_HasMMX())? 1 : 0)
   848                         /* Feature 2 is has-AltiVec */
   849                         | ((SDL_HasAltiVec())? 2 : 0)
   850                         /* Feature 4 is dont-use-prefetch */
   851                         /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   852                         | ((GetL3CacheSize() == 0) ? 4 : 0)
   853                 );
   854         }
   855     }
   856     return features;
   857 }
   858 
   859 #if __MWERKS__
   860 #pragma altivec_model off
   861 #endif
   862 #else
   863 /* Feature 1 is has-MMX */
   864 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   865 #endif
   866 
   867 /* This is now endian dependent */
   868 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   869 #define HI	1
   870 #define LO	0
   871 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
   872 #define HI	0
   873 #define LO	1
   874 #endif
   875 
   876 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   877 #define RGB888_RGB332(dst, src) { \
   878 	dst = (Uint8)((((src)&0x00E00000)>>16)| \
   879 	              (((src)&0x0000E000)>>11)| \
   880 	              (((src)&0x000000C0)>>6)); \
   881 }
   882 static void
   883 Blit_RGB888_index8(SDL_BlitInfo * info)
   884 {
   885 #ifndef USE_DUFFS_LOOP
   886     int c;
   887 #endif
   888     int width, height;
   889     Uint32 *src;
   890     const Uint8 *map;
   891     Uint8 *dst;
   892     int srcskip, dstskip;
   893 
   894     /* Set up some basic variables */
   895     width = info->dst_w;
   896     height = info->dst_h;
   897     src = (Uint32 *) info->src;
   898     srcskip = info->src_skip / 4;
   899     dst = info->dst;
   900     dstskip = info->dst_skip;
   901     map = info->table;
   902 
   903     if (map == NULL) {
   904         while (height--) {
   905 #ifdef USE_DUFFS_LOOP
   906 			/* *INDENT-OFF* */
   907 			DUFFS_LOOP(
   908 				RGB888_RGB332(*dst++, *src);
   909 			, width);
   910 			/* *INDENT-ON* */
   911 #else
   912             for (c = width / 4; c; --c) {
   913                 /* Pack RGB into 8bit pixel */
   914                 ++src;
   915                 RGB888_RGB332(*dst++, *src);
   916                 ++src;
   917                 RGB888_RGB332(*dst++, *src);
   918                 ++src;
   919                 RGB888_RGB332(*dst++, *src);
   920                 ++src;
   921             }
   922             switch (width & 3) {
   923             case 3:
   924                 RGB888_RGB332(*dst++, *src);
   925                 ++src;
   926             case 2:
   927                 RGB888_RGB332(*dst++, *src);
   928                 ++src;
   929             case 1:
   930                 RGB888_RGB332(*dst++, *src);
   931                 ++src;
   932             }
   933 #endif /* USE_DUFFS_LOOP */
   934             src += srcskip;
   935             dst += dstskip;
   936         }
   937     } else {
   938         int Pixel;
   939 
   940         while (height--) {
   941 #ifdef USE_DUFFS_LOOP
   942 			/* *INDENT-OFF* */
   943 			DUFFS_LOOP(
   944 				RGB888_RGB332(Pixel, *src);
   945 				*dst++ = map[Pixel];
   946 				++src;
   947 			, width);
   948 			/* *INDENT-ON* */
   949 #else
   950             for (c = width / 4; c; --c) {
   951                 /* Pack RGB into 8bit pixel */
   952                 RGB888_RGB332(Pixel, *src);
   953                 *dst++ = map[Pixel];
   954                 ++src;
   955                 RGB888_RGB332(Pixel, *src);
   956                 *dst++ = map[Pixel];
   957                 ++src;
   958                 RGB888_RGB332(Pixel, *src);
   959                 *dst++ = map[Pixel];
   960                 ++src;
   961                 RGB888_RGB332(Pixel, *src);
   962                 *dst++ = map[Pixel];
   963                 ++src;
   964             }
   965             switch (width & 3) {
   966             case 3:
   967                 RGB888_RGB332(Pixel, *src);
   968                 *dst++ = map[Pixel];
   969                 ++src;
   970             case 2:
   971                 RGB888_RGB332(Pixel, *src);
   972                 *dst++ = map[Pixel];
   973                 ++src;
   974             case 1:
   975                 RGB888_RGB332(Pixel, *src);
   976                 *dst++ = map[Pixel];
   977                 ++src;
   978             }
   979 #endif /* USE_DUFFS_LOOP */
   980             src += srcskip;
   981             dst += dstskip;
   982         }
   983     }
   984 }
   985 
   986 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   987 #define RGB888_RGB555(dst, src) { \
   988 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
   989 	                            (((*src)&0x0000F800)>>6)| \
   990 	                            (((*src)&0x000000F8)>>3)); \
   991 }
   992 #define RGB888_RGB555_TWO(dst, src) { \
   993 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   994 	                     (((src[HI])&0x0000F800)>>6)| \
   995 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   996 	                     (((src[LO])&0x00F80000)>>9)| \
   997 	                     (((src[LO])&0x0000F800)>>6)| \
   998 	                     (((src[LO])&0x000000F8)>>3); \
   999 }
  1000 static void
  1001 Blit_RGB888_RGB555(SDL_BlitInfo * info)
  1002 {
  1003 #ifndef USE_DUFFS_LOOP
  1004     int c;
  1005 #endif
  1006     int width, height;
  1007     Uint32 *src;
  1008     Uint16 *dst;
  1009     int srcskip, dstskip;
  1010 
  1011     /* Set up some basic variables */
  1012     width = info->dst_w;
  1013     height = info->dst_h;
  1014     src = (Uint32 *) info->src;
  1015     srcskip = info->src_skip / 4;
  1016     dst = (Uint16 *) info->dst;
  1017     dstskip = info->dst_skip / 2;
  1018 
  1019 #ifdef USE_DUFFS_LOOP
  1020     while (height--) {
  1021 		/* *INDENT-OFF* */
  1022 		DUFFS_LOOP(
  1023 			RGB888_RGB555(dst, src);
  1024 			++src;
  1025 			++dst;
  1026 		, width);
  1027 		/* *INDENT-ON* */
  1028         src += srcskip;
  1029         dst += dstskip;
  1030     }
  1031 #else
  1032     /* Memory align at 4-byte boundary, if necessary */
  1033     if ((long) dst & 0x03) {
  1034         /* Don't do anything if width is 0 */
  1035         if (width == 0) {
  1036             return;
  1037         }
  1038         --width;
  1039 
  1040         while (height--) {
  1041             /* Perform copy alignment */
  1042             RGB888_RGB555(dst, src);
  1043             ++src;
  1044             ++dst;
  1045 
  1046             /* Copy in 4 pixel chunks */
  1047             for (c = width / 4; c; --c) {
  1048                 RGB888_RGB555_TWO(dst, src);
  1049                 src += 2;
  1050                 dst += 2;
  1051                 RGB888_RGB555_TWO(dst, src);
  1052                 src += 2;
  1053                 dst += 2;
  1054             }
  1055             /* Get any leftovers */
  1056             switch (width & 3) {
  1057             case 3:
  1058                 RGB888_RGB555(dst, src);
  1059                 ++src;
  1060                 ++dst;
  1061             case 2:
  1062                 RGB888_RGB555_TWO(dst, src);
  1063                 src += 2;
  1064                 dst += 2;
  1065                 break;
  1066             case 1:
  1067                 RGB888_RGB555(dst, src);
  1068                 ++src;
  1069                 ++dst;
  1070                 break;
  1071             }
  1072             src += srcskip;
  1073             dst += dstskip;
  1074         }
  1075     } else {
  1076         while (height--) {
  1077             /* Copy in 4 pixel chunks */
  1078             for (c = width / 4; c; --c) {
  1079                 RGB888_RGB555_TWO(dst, src);
  1080                 src += 2;
  1081                 dst += 2;
  1082                 RGB888_RGB555_TWO(dst, src);
  1083                 src += 2;
  1084                 dst += 2;
  1085             }
  1086             /* Get any leftovers */
  1087             switch (width & 3) {
  1088             case 3:
  1089                 RGB888_RGB555(dst, src);
  1090                 ++src;
  1091                 ++dst;
  1092             case 2:
  1093                 RGB888_RGB555_TWO(dst, src);
  1094                 src += 2;
  1095                 dst += 2;
  1096                 break;
  1097             case 1:
  1098                 RGB888_RGB555(dst, src);
  1099                 ++src;
  1100                 ++dst;
  1101                 break;
  1102             }
  1103             src += srcskip;
  1104             dst += dstskip;
  1105         }
  1106     }
  1107 #endif /* USE_DUFFS_LOOP */
  1108 }
  1109 
  1110 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1111 #define RGB888_RGB565(dst, src) { \
  1112 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
  1113 	                            (((*src)&0x0000FC00)>>5)| \
  1114 	                            (((*src)&0x000000F8)>>3)); \
  1115 }
  1116 #define RGB888_RGB565_TWO(dst, src) { \
  1117 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1118 	                     (((src[HI])&0x0000FC00)>>5)| \
  1119 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1120 	                     (((src[LO])&0x00F80000)>>8)| \
  1121 	                     (((src[LO])&0x0000FC00)>>5)| \
  1122 	                     (((src[LO])&0x000000F8)>>3); \
  1123 }
  1124 static void
  1125 Blit_RGB888_RGB565(SDL_BlitInfo * info)
  1126 {
  1127 #ifndef USE_DUFFS_LOOP
  1128     int c;
  1129 #endif
  1130     int width, height;
  1131     Uint32 *src;
  1132     Uint16 *dst;
  1133     int srcskip, dstskip;
  1134 
  1135     /* Set up some basic variables */
  1136     width = info->dst_w;
  1137     height = info->dst_h;
  1138     src = (Uint32 *) info->src;
  1139     srcskip = info->src_skip / 4;
  1140     dst = (Uint16 *) info->dst;
  1141     dstskip = info->dst_skip / 2;
  1142 
  1143 #ifdef USE_DUFFS_LOOP
  1144     while (height--) {
  1145 		/* *INDENT-OFF* */
  1146 		DUFFS_LOOP(
  1147 			RGB888_RGB565(dst, src);
  1148 			++src;
  1149 			++dst;
  1150 		, width);
  1151 		/* *INDENT-ON* */
  1152         src += srcskip;
  1153         dst += dstskip;
  1154     }
  1155 #else
  1156     /* Memory align at 4-byte boundary, if necessary */
  1157     if ((long) dst & 0x03) {
  1158         /* Don't do anything if width is 0 */
  1159         if (width == 0) {
  1160             return;
  1161         }
  1162         --width;
  1163 
  1164         while (height--) {
  1165             /* Perform copy alignment */
  1166             RGB888_RGB565(dst, src);
  1167             ++src;
  1168             ++dst;
  1169 
  1170             /* Copy in 4 pixel chunks */
  1171             for (c = width / 4; c; --c) {
  1172                 RGB888_RGB565_TWO(dst, src);
  1173                 src += 2;
  1174                 dst += 2;
  1175                 RGB888_RGB565_TWO(dst, src);
  1176                 src += 2;
  1177                 dst += 2;
  1178             }
  1179             /* Get any leftovers */
  1180             switch (width & 3) {
  1181             case 3:
  1182                 RGB888_RGB565(dst, src);
  1183                 ++src;
  1184                 ++dst;
  1185             case 2:
  1186                 RGB888_RGB565_TWO(dst, src);
  1187                 src += 2;
  1188                 dst += 2;
  1189                 break;
  1190             case 1:
  1191                 RGB888_RGB565(dst, src);
  1192                 ++src;
  1193                 ++dst;
  1194                 break;
  1195             }
  1196             src += srcskip;
  1197             dst += dstskip;
  1198         }
  1199     } else {
  1200         while (height--) {
  1201             /* Copy in 4 pixel chunks */
  1202             for (c = width / 4; c; --c) {
  1203                 RGB888_RGB565_TWO(dst, src);
  1204                 src += 2;
  1205                 dst += 2;
  1206                 RGB888_RGB565_TWO(dst, src);
  1207                 src += 2;
  1208                 dst += 2;
  1209             }
  1210             /* Get any leftovers */
  1211             switch (width & 3) {
  1212             case 3:
  1213                 RGB888_RGB565(dst, src);
  1214                 ++src;
  1215                 ++dst;
  1216             case 2:
  1217                 RGB888_RGB565_TWO(dst, src);
  1218                 src += 2;
  1219                 dst += 2;
  1220                 break;
  1221             case 1:
  1222                 RGB888_RGB565(dst, src);
  1223                 ++src;
  1224                 ++dst;
  1225                 break;
  1226             }
  1227             src += srcskip;
  1228             dst += dstskip;
  1229         }
  1230     }
  1231 #endif /* USE_DUFFS_LOOP */
  1232 }
  1233 
  1234 
  1235 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1236 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1237 static void
  1238 Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
  1239 {
  1240 #ifndef USE_DUFFS_LOOP
  1241     int c;
  1242 #endif
  1243     int width, height;
  1244     Uint8 *src;
  1245     Uint32 *dst;
  1246     int srcskip, dstskip;
  1247 
  1248     /* Set up some basic variables */
  1249     width = info->dst_w;
  1250     height = info->dst_h;
  1251     src = (Uint8 *) info->src;
  1252     srcskip = info->src_skip;
  1253     dst = (Uint32 *) info->dst;
  1254     dstskip = info->dst_skip / 4;
  1255 
  1256 #ifdef USE_DUFFS_LOOP
  1257     while (height--) {
  1258 		/* *INDENT-OFF* */
  1259 		DUFFS_LOOP(
  1260 		{
  1261 			*dst++ = RGB565_32(dst, src, map);
  1262 			src += 2;
  1263 		},
  1264 		width);
  1265 		/* *INDENT-ON* */
  1266         src += srcskip;
  1267         dst += dstskip;
  1268     }
  1269 #else
  1270     while (height--) {
  1271         /* Copy in 4 pixel chunks */
  1272         for (c = width / 4; c; --c) {
  1273             *dst++ = RGB565_32(dst, src, map);
  1274             src += 2;
  1275             *dst++ = RGB565_32(dst, src, map);
  1276             src += 2;
  1277             *dst++ = RGB565_32(dst, src, map);
  1278             src += 2;
  1279             *dst++ = RGB565_32(dst, src, map);
  1280             src += 2;
  1281         }
  1282         /* Get any leftovers */
  1283         switch (width & 3) {
  1284         case 3:
  1285             *dst++ = RGB565_32(dst, src, map);
  1286             src += 2;
  1287         case 2:
  1288             *dst++ = RGB565_32(dst, src, map);
  1289             src += 2;
  1290         case 1:
  1291             *dst++ = RGB565_32(dst, src, map);
  1292             src += 2;
  1293             break;
  1294         }
  1295         src += srcskip;
  1296         dst += dstskip;
  1297     }
  1298 #endif /* USE_DUFFS_LOOP */
  1299 }
  1300 
  1301 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1302 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1303     0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1304     0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1305     0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1306     0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1307     0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1308     0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1309     0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1310     0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1311     0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1312     0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1313     0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1314     0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1315     0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1316     0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1317     0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1318     0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1319     0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1320     0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1321     0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1322     0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1323     0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1324     0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1325     0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1326     0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1327     0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1328     0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1329     0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1330     0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1331     0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1332     0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1333     0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1334     0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1335     0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1336     0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1337     0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1338     0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1339     0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1340     0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1341     0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1342     0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1343     0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1344     0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1345     0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1346     0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1347     0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1348     0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1349     0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1350     0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1351     0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1352     0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1353     0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1354     0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1355     0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1356     0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1357     0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1358     0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1359     0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1360     0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1361     0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1362     0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1363     0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1364     0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1365     0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1366     0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1367     0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1368     0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1369     0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1370     0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1371     0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1372     0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1373     0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1374     0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1375     0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1376     0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1377     0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1378     0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1379     0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1380     0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1381     0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1382     0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1383     0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1384     0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1385     0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1386     0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1387     0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1388     0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1389     0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1390     0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1391     0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1392     0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1393     0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1394     0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1395     0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1396     0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1397     0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1398     0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1399     0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1400     0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1401     0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1402     0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1403     0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1404     0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1405     0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1406     0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1407     0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1408     0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1409     0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1410     0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1411     0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1412     0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1413     0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1414     0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1415     0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1416     0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1417     0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1418     0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1419     0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1420     0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1421     0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1422     0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1423     0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1424     0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1425     0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1426     0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1427     0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1428     0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1429     0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1430     0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1431 };
  1432 
  1433 static void
  1434 Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
  1435 {
  1436     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1437 }
  1438 
  1439 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1440 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1441     0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1442     0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1443     0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1444     0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1445     0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1446     0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1447     0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1448     0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1449     0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1450     0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1451     0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1452     0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1453     0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1454     0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1455     0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1456     0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1457     0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1458     0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1459     0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1460     0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1461     0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1462     0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1463     0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1464     0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1465     0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1466     0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1467     0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1468     0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1469     0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1470     0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1471     0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1472     0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1473     0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1474     0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1475     0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1476     0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1477     0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1478     0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1479     0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1480     0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1481     0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1482     0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1483     0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1484     0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1485     0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1486     0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1487     0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1488     0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1489     0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1490     0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1491     0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1492     0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1493     0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1494     0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1495     0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1496     0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1497     0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1498     0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1499     0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1500     0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1501     0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1502     0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1503     0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1504     0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1505     0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1506     0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1507     0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1508     0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1509     0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1510     0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1511     0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1512     0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1513     0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1514     0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1515     0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1516     0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1517     0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1518     0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1519     0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1520     0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1521     0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1522     0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1523     0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1524     0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1525     0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1526     0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1527     0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1528     0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1529     0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1530     0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1531     0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1532     0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1533     0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1534     0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1535     0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1536     0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1537     0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1538     0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1539     0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1540     0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1541     0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1542     0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1543     0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1544     0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1545     0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1546     0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1547     0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1548     0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1549     0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1550     0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1551     0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1552     0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1553     0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1554     0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1555     0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1556     0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1557     0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1558     0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1559     0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1560     0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1561     0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1562     0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1563     0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1564     0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1565     0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1566     0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1567     0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1568     0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1569 };
  1570 
  1571 static void
  1572 Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
  1573 {
  1574     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1575 }
  1576 
  1577 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1578 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1579     0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1580     0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1581     0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1582     0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1583     0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1584     0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1585     0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1586     0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1587     0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1588     0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1589     0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1590     0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1591     0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1592     0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1593     0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1594     0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1595     0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1596     0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1597     0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1598     0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1599     0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1600     0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1601     0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1602     0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1603     0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1604     0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1605     0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1606     0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1607     0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1608     0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1609     0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1610     0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1611     0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1612     0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1613     0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1614     0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1615     0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1616     0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1617     0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1618     0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1619     0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1620     0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1621     0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1622     0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1623     0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1624     0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1625     0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1626     0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1627     0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1628     0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1629     0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1630     0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1631     0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1632     0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1633     0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1634     0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1635     0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1636     0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1637     0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1638     0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1639     0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1640     0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1641     0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1642     0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1643     0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1644     0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1645     0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1646     0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1647     0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1648     0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1649     0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1650     0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1651     0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1652     0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1653     0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1654     0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1655     0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1656     0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1657     0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1658     0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1659     0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1660     0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1661     0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1662     0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1663     0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1664     0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1665     0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1666     0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1667     0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1668     0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1669     0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1670     0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1671     0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1672     0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1673     0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1674     0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1675     0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1676     0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1677     0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1678     0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1679     0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1680     0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1681     0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1682     0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1683     0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1684     0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1685     0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1686     0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1687     0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1688     0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1689     0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1690     0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1691     0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1692     0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1693     0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1694     0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1695     0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1696     0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1697     0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1698     0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1699     0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1700     0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1701     0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1702     0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1703     0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1704     0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1705     0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1706     0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1707 };
  1708 
  1709 static void
  1710 Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
  1711 {
  1712     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1713 }
  1714 
  1715 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1716 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1717     0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1718     0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1719     0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1720     0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1721     0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1722     0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1723     0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1724     0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1725     0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1726     0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1727     0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1728     0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1729     0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1730     0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1731     0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1732     0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1733     0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1734     0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1735     0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1736     0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1737     0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1738     0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1739     0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1740     0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1741     0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1742     0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1743     0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1744     0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1745     0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1746     0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1747     0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1748     0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1749     0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1750     0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1751     0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1752     0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1753     0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1754     0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1755     0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1756     0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1757     0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1758     0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1759     0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1760     0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1761     0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1762     0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1763     0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1764     0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1765     0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1766     0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1767     0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1768     0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1769     0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1770     0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1771     0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1772     0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1773     0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1774     0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1775     0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1776     0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1777     0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1778     0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1779     0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1780     0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1781     0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1782     0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1783     0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1784     0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1785     0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1786     0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1787     0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1788     0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1789     0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1790     0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1791     0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1792     0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1793     0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1794     0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1795     0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1796     0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1797     0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1798     0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1799     0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1800     0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1801     0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1802     0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1803     0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1804     0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1805     0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1806     0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1807     0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1808     0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1809     0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1810     0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1811     0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1812     0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1813     0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1814     0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1815     0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1816     0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1817     0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1818     0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1819     0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1820     0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1821     0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1822     0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1823     0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1824     0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1825     0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1826     0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1827     0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1828     0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1829     0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1830     0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1831     0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1832     0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1833     0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1834     0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1835     0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1836     0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1837     0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1838     0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1839     0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1840     0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1841     0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1842     0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1843     0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1844     0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1845 };
  1846 
  1847 static void
  1848 Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
  1849 {
  1850     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1851 }
  1852 
  1853 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1854 #ifndef RGB888_RGB332
  1855 #define RGB888_RGB332(dst, src) { \
  1856 	dst = (((src)&0x00E00000)>>16)| \
  1857 	      (((src)&0x0000E000)>>11)| \
  1858 	      (((src)&0x000000C0)>>6); \
  1859 }
  1860 #endif
  1861 static void
  1862 Blit_RGB888_index8_map(SDL_BlitInfo * info)
  1863 {
  1864 #ifndef USE_DUFFS_LOOP
  1865     int c;
  1866 #endif
  1867     int Pixel;
  1868     int width, height;
  1869     Uint32 *src;
  1870     const Uint8 *map;
  1871     Uint8 *dst;
  1872     int srcskip, dstskip;
  1873 
  1874     /* Set up some basic variables */
  1875     width = info->dst_w;
  1876     height = info->dst_h;
  1877     src = (Uint32 *) info->src;
  1878     srcskip = info->src_skip / 4;
  1879     dst = info->dst;
  1880     dstskip = info->dst_skip;
  1881     map = info->table;
  1882 
  1883 #ifdef USE_DUFFS_LOOP
  1884     while (height--) {
  1885 		/* *INDENT-OFF* */
  1886 		DUFFS_LOOP(
  1887 			RGB888_RGB332(Pixel, *src);
  1888 			*dst++ = map[Pixel];
  1889 			++src;
  1890 		, width);
  1891 		/* *INDENT-ON* */
  1892         src += srcskip;
  1893         dst += dstskip;
  1894     }
  1895 #else
  1896     while (height--) {
  1897         for (c = width / 4; c; --c) {
  1898             /* Pack RGB into 8bit pixel */
  1899             RGB888_RGB332(Pixel, *src);
  1900             *dst++ = map[Pixel];
  1901             ++src;
  1902             RGB888_RGB332(Pixel, *src);
  1903             *dst++ = map[Pixel];
  1904             ++src;
  1905             RGB888_RGB332(Pixel, *src);
  1906             *dst++ = map[Pixel];
  1907             ++src;
  1908             RGB888_RGB332(Pixel, *src);
  1909             *dst++ = map[Pixel];
  1910             ++src;
  1911         }
  1912         switch (width & 3) {
  1913         case 3:
  1914             RGB888_RGB332(Pixel, *src);
  1915             *dst++ = map[Pixel];
  1916             ++src;
  1917         case 2:
  1918             RGB888_RGB332(Pixel, *src);
  1919             *dst++ = map[Pixel];
  1920             ++src;
  1921         case 1:
  1922             RGB888_RGB332(Pixel, *src);
  1923             *dst++ = map[Pixel];
  1924             ++src;
  1925         }
  1926         src += srcskip;
  1927         dst += dstskip;
  1928     }
  1929 #endif /* USE_DUFFS_LOOP */
  1930 }
  1931 
  1932 static void
  1933 BlitNto1(SDL_BlitInfo * info)
  1934 {
  1935 #ifndef USE_DUFFS_LOOP
  1936     int c;
  1937 #endif
  1938     int width, height;
  1939     Uint8 *src;
  1940     const Uint8 *map;
  1941     Uint8 *dst;
  1942     int srcskip, dstskip;
  1943     int srcbpp;
  1944     Uint32 Pixel;
  1945     int sR, sG, sB;
  1946     SDL_PixelFormat *srcfmt;
  1947 
  1948     /* Set up some basic variables */
  1949     width = info->dst_w;
  1950     height = info->dst_h;
  1951     src = info->src;
  1952     srcskip = info->src_skip;
  1953     dst = info->dst;
  1954     dstskip = info->dst_skip;
  1955     map = info->table;
  1956     srcfmt = info->src_fmt;
  1957     srcbpp = srcfmt->BytesPerPixel;
  1958 
  1959     if (map == NULL) {
  1960         while (height--) {
  1961 #ifdef USE_DUFFS_LOOP
  1962 			/* *INDENT-OFF* */
  1963 			DUFFS_LOOP(
  1964 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1965 								sR, sG, sB);
  1966 				if ( 1 ) {
  1967 				  	/* Pack RGB into 8bit pixel */
  1968 				  	*dst = ((sR>>5)<<(3+2))|
  1969 					        ((sG>>5)<<(2)) |
  1970 					        ((sB>>6)<<(0)) ;
  1971 				}
  1972 				dst++;
  1973 				src += srcbpp;
  1974 			, width);
  1975 			/* *INDENT-ON* */
  1976 #else
  1977             for (c = width; c; --c) {
  1978                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  1979                 if (1) {
  1980                     /* Pack RGB into 8bit pixel */
  1981                     *dst = ((sR >> 5) << (3 + 2)) |
  1982                         ((sG >> 5) << (2)) | ((sB >> 6) << (0));
  1983                 }
  1984                 dst++;
  1985                 src += srcbpp;
  1986             }
  1987 #endif
  1988             src += srcskip;
  1989             dst += dstskip;
  1990         }
  1991     } else {
  1992         while (height--) {
  1993 #ifdef USE_DUFFS_LOOP
  1994 			/* *INDENT-OFF* */
  1995 			DUFFS_LOOP(
  1996 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1997 								sR, sG, sB);
  1998 				if ( 1 ) {
  1999 				  	/* Pack RGB into 8bit pixel */
  2000 				  	*dst = map[((sR>>5)<<(3+2))|
  2001 						   ((sG>>5)<<(2))  |
  2002 						   ((sB>>6)<<(0))  ];
  2003 				}
  2004 				dst++;
  2005 				src += srcbpp;
  2006 			, width);
  2007 			/* *INDENT-ON* */
  2008 #else
  2009             for (c = width; c; --c) {
  2010                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2011                 if (1) {
  2012                     /* Pack RGB into 8bit pixel */
  2013                     *dst = map[((sR >> 5) << (3 + 2)) |
  2014                                ((sG >> 5) << (2)) | ((sB >> 6) << (0))];
  2015                 }
  2016                 dst++;
  2017                 src += srcbpp;
  2018             }
  2019 #endif /* USE_DUFFS_LOOP */
  2020             src += srcskip;
  2021             dst += dstskip;
  2022         }
  2023     }
  2024 }
  2025 
  2026 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  2027 static void
  2028 Blit4to4MaskAlpha(SDL_BlitInfo * info)
  2029 {
  2030     int width = info->dst_w;
  2031     int height = info->dst_h;
  2032     Uint32 *src = (Uint32 *) info->src;
  2033     int srcskip = info->src_skip;
  2034     Uint32 *dst = (Uint32 *) info->dst;
  2035     int dstskip = info->dst_skip;
  2036     SDL_PixelFormat *srcfmt = info->src_fmt;
  2037     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2038 
  2039     if (dstfmt->Amask) {
  2040         /* RGB->RGBA, SET_ALPHA */
  2041         Uint32 mask = (info->a >> dstfmt->Aloss) << dstfmt->Ashift;
  2042 
  2043         while (height--) {
  2044 			/* *INDENT-OFF* */
  2045 			DUFFS_LOOP(
  2046 			{
  2047 				*dst = *src | mask;
  2048 				++dst;
  2049 				++src;
  2050 			},
  2051 			width);
  2052 			/* *INDENT-ON* */
  2053             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2054             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2055         }
  2056     } else {
  2057         /* RGBA->RGB, NO_ALPHA */
  2058         Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2059 
  2060         while (height--) {
  2061 			/* *INDENT-OFF* */
  2062 			DUFFS_LOOP(
  2063 			{
  2064 				*dst = *src & mask;
  2065 				++dst;
  2066 				++src;
  2067 			},
  2068 			width);
  2069 			/* *INDENT-ON* */
  2070             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2071             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2072         }
  2073     }
  2074 }
  2075 
  2076 static void
  2077 BlitNtoN(SDL_BlitInfo * info)
  2078 {
  2079     int width = info->dst_w;
  2080     int height = info->dst_h;
  2081     Uint8 *src = info->src;
  2082     int srcskip = info->src_skip;
  2083     Uint8 *dst = info->dst;
  2084     int dstskip = info->dst_skip;
  2085     SDL_PixelFormat *srcfmt = info->src_fmt;
  2086     int srcbpp = srcfmt->BytesPerPixel;
  2087     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2088     int dstbpp = dstfmt->BytesPerPixel;
  2089     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2090 
  2091     while (height--) {
  2092 		/* *INDENT-OFF* */
  2093 		DUFFS_LOOP(
  2094 		{
  2095             Uint32 Pixel;
  2096 			unsigned sR;
  2097 			unsigned sG;
  2098 			unsigned sB;
  2099 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2100 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2101 			dst += dstbpp;
  2102 			src += srcbpp;
  2103 		},
  2104 		width);
  2105 		/* *INDENT-ON* */
  2106         src += srcskip;
  2107         dst += dstskip;
  2108     }
  2109 }
  2110 
  2111 static void
  2112 BlitNtoNCopyAlpha(SDL_BlitInfo * info)
  2113 {
  2114     int width = info->dst_w;
  2115     int height = info->dst_h;
  2116     Uint8 *src = info->src;
  2117     int srcskip = info->src_skip;
  2118     Uint8 *dst = info->dst;
  2119     int dstskip = info->dst_skip;
  2120     SDL_PixelFormat *srcfmt = info->src_fmt;
  2121     int srcbpp = srcfmt->BytesPerPixel;
  2122     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2123     int dstbpp = dstfmt->BytesPerPixel;
  2124     int c;
  2125 
  2126     /* FIXME: should map alpha to [0..255] correctly! */
  2127     while (height--) {
  2128         for (c = width; c; --c) {
  2129             Uint32 Pixel;
  2130             unsigned sR, sG, sB, sA;
  2131             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2132             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2133             dst += dstbpp;
  2134             src += srcbpp;
  2135         }
  2136         src += srcskip;
  2137         dst += dstskip;
  2138     }
  2139 }
  2140 
  2141 static void
  2142 BlitNto1Key(SDL_BlitInfo * info)
  2143 {
  2144     int width = info->dst_w;
  2145     int height = info->dst_h;
  2146     Uint8 *src = info->src;
  2147     int srcskip = info->src_skip;
  2148     Uint8 *dst = info->dst;
  2149     int dstskip = info->dst_skip;
  2150     SDL_PixelFormat *srcfmt = info->src_fmt;
  2151     const Uint8 *palmap = info->table;
  2152     Uint32 ckey = info->colorkey;
  2153     Uint32 rgbmask = ~srcfmt->Amask;
  2154     int srcbpp;
  2155     Uint32 Pixel;
  2156     unsigned sR, sG, sB;
  2157 
  2158     /* Set up some basic variables */
  2159     srcbpp = srcfmt->BytesPerPixel;
  2160     ckey &= rgbmask;
  2161 
  2162     if (palmap == NULL) {
  2163         while (height--) {
  2164 			/* *INDENT-OFF* */
  2165 			DUFFS_LOOP(
  2166 			{
  2167 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2168 								sR, sG, sB);
  2169 				if ( (Pixel & rgbmask) != ckey ) {
  2170 				  	/* Pack RGB into 8bit pixel */
  2171 				  	*dst = (Uint8)(((sR>>5)<<(3+2))|
  2172 						           ((sG>>5)<<(2)) |
  2173 						           ((sB>>6)<<(0)));
  2174 				}
  2175 				dst++;
  2176 				src += srcbpp;
  2177 			},
  2178 			width);
  2179 			/* *INDENT-ON* */
  2180             src += srcskip;
  2181             dst += dstskip;
  2182         }
  2183     } else {
  2184         while (height--) {
  2185 			/* *INDENT-OFF* */
  2186 			DUFFS_LOOP(
  2187 			{
  2188 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2189 								sR, sG, sB);
  2190 				if ( (Pixel & rgbmask) != ckey ) {
  2191 				  	/* Pack RGB into 8bit pixel */
  2192 				  	*dst = (Uint8)palmap[((sR>>5)<<(3+2))|
  2193 							             ((sG>>5)<<(2))  |
  2194 							             ((sB>>6)<<(0))  ];
  2195 				}
  2196 				dst++;
  2197 				src += srcbpp;
  2198 			},
  2199 			width);
  2200 			/* *INDENT-ON* */
  2201             src += srcskip;
  2202             dst += dstskip;
  2203         }
  2204     }
  2205 }
  2206 
  2207 static void
  2208 Blit2to2Key(SDL_BlitInfo * info)
  2209 {
  2210     int width = info->dst_w;
  2211     int height = info->dst_h;
  2212     Uint16 *srcp = (Uint16 *) info->src;
  2213     int srcskip = info->src_skip;
  2214     Uint16 *dstp = (Uint16 *) info->dst;
  2215     int dstskip = info->dst_skip;
  2216     Uint32 ckey = info->colorkey;
  2217     Uint32 rgbmask = ~info->src_fmt->Amask;
  2218 
  2219     /* Set up some basic variables */
  2220     srcskip /= 2;
  2221     dstskip /= 2;
  2222     ckey &= rgbmask;
  2223 
  2224     while (height--) {
  2225 		/* *INDENT-OFF* */
  2226 		DUFFS_LOOP(
  2227 		{
  2228 			if ( (*srcp & rgbmask) != ckey ) {
  2229 				*dstp = *srcp;
  2230 			}
  2231 			dstp++;
  2232 			srcp++;
  2233 		},
  2234 		width);
  2235 		/* *INDENT-ON* */
  2236         srcp += srcskip;
  2237         dstp += dstskip;
  2238     }
  2239 }
  2240 
  2241 static void
  2242 BlitNtoNKey(SDL_BlitInfo * info)
  2243 {
  2244     int width = info->dst_w;
  2245     int height = info->dst_h;
  2246     Uint8 *src = info->src;
  2247     int srcskip = info->src_skip;
  2248     Uint8 *dst = info->dst;
  2249     int dstskip = info->dst_skip;
  2250     Uint32 ckey = info->colorkey;
  2251     SDL_PixelFormat *srcfmt = info->src_fmt;
  2252     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2253     int srcbpp = srcfmt->BytesPerPixel;
  2254     int dstbpp = dstfmt->BytesPerPixel;
  2255     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2256     Uint32 rgbmask = ~srcfmt->Amask;
  2257 
  2258     /* Set up some basic variables */
  2259     ckey &= rgbmask;
  2260 
  2261     while (height--) {
  2262 		/* *INDENT-OFF* */
  2263 		DUFFS_LOOP(
  2264 		{
  2265             Uint32 Pixel;
  2266 			unsigned sR;
  2267 			unsigned sG;
  2268 			unsigned sB;
  2269 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2270 			if ( (Pixel & rgbmask) != ckey ) {
  2271                 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2272 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2273 			}
  2274 			dst += dstbpp;
  2275 			src += srcbpp;
  2276 		},
  2277 		width);
  2278 		/* *INDENT-ON* */
  2279         src += srcskip;
  2280         dst += dstskip;
  2281     }
  2282 }
  2283 
  2284 static void
  2285 BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
  2286 {
  2287     int width = info->dst_w;
  2288     int height = info->dst_h;
  2289     Uint8 *src = info->src;
  2290     int srcskip = info->src_skip;
  2291     Uint8 *dst = info->dst;
  2292     int dstskip = info->dst_skip;
  2293     Uint32 ckey = info->colorkey;
  2294     SDL_PixelFormat *srcfmt = info->src_fmt;
  2295     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2296     Uint32 rgbmask = ~srcfmt->Amask;
  2297 
  2298     Uint8 srcbpp;
  2299     Uint8 dstbpp;
  2300     Uint32 Pixel;
  2301     unsigned sR, sG, sB, sA;
  2302 
  2303     /* Set up some basic variables */
  2304     srcbpp = srcfmt->BytesPerPixel;
  2305     dstbpp = dstfmt->BytesPerPixel;
  2306     ckey &= rgbmask;
  2307 
  2308     /* FIXME: should map alpha to [0..255] correctly! */
  2309     while (height--) {
  2310 		/* *INDENT-OFF* */
  2311 		DUFFS_LOOP(
  2312 		{
  2313 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2314 			if ( (Pixel & rgbmask) != ckey ) {
  2315 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2316 			}
  2317 			dst += dstbpp;
  2318 			src += srcbpp;
  2319 		},
  2320 		width);
  2321 		/* *INDENT-ON* */
  2322         src += srcskip;
  2323         dst += dstskip;
  2324     }
  2325 }
  2326 
  2327 /* Normal N to N optimized blitters */
  2328 struct blit_table
  2329 {
  2330     Uint32 srcR, srcG, srcB;
  2331     int dstbpp;
  2332     Uint32 dstR, dstG, dstB;
  2333     Uint32 blit_features;
  2334     SDL_BlitFunc blitfunc;
  2335     enum
  2336     { NO_ALPHA = 1, SET_ALPHA = 2, COPY_ALPHA = 4 } alpha;
  2337 };
  2338 static const struct blit_table normal_blit_1[] = {
  2339     /* Default for 8-bit RGB source, an invalid combination */
  2340     {0, 0, 0, 0, 0, 0, 0, 0, NULL},
  2341 };
  2342 
  2343 static const struct blit_table normal_blit_2[] = {
  2344 #if SDL_ALTIVEC_BLITTERS
  2345     /* has-altivec */
  2346     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000,
  2347      0x00000000,
  2348      2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2349     {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000,
  2350      0x00000000,
  2351      2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2352 #endif
  2353     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00,
  2354      0x000000FF,
  2355      0, Blit_RGB565_ARGB8888, SET_ALPHA},
  2356     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00,
  2357      0x00FF0000,
  2358      0, Blit_RGB565_ABGR8888, SET_ALPHA},
  2359     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000,
  2360      0x0000FF00,
  2361      0, Blit_RGB565_RGBA8888, SET_ALPHA},
  2362     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000,
  2363      0xFF000000,
  2364      0, Blit_RGB565_BGRA8888, SET_ALPHA},
  2365 
  2366     /* Default for 16-bit RGB source, used if no other blitter matches */
  2367     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2368 };
  2369 
  2370 static const struct blit_table normal_blit_3[] = {
  2371     /* Default for 24-bit RGB source, never optimized */
  2372     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2373 };
  2374 
  2375 static const struct blit_table normal_blit_4[] = {
  2376 #if SDL_ALTIVEC_BLITTERS
  2377     /* has-altivec | dont-use-prefetch */
  2378     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
  2379      0x00000000,
  2380      6, ConvertAltivec32to32_noprefetch,
  2381      NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2382     /* has-altivec */
  2383     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
  2384      0x00000000,
  2385      2, ConvertAltivec32to32_prefetch,
  2386      NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2387     /* has-altivec */
  2388     {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0,
  2389      0x0000001F,
  2390      2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
  2391 #endif
  2392     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
  2393      0x0000001F,
  2394      0, Blit_RGB888_RGB565, NO_ALPHA},
  2395     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
  2396      0x0000001F,
  2397      0, Blit_RGB888_RGB555, NO_ALPHA},
  2398     /* Default for 32-bit RGB source, used if no other blitter matches */
  2399     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2400 };
  2401 
  2402 static const struct blit_table *const normal_blit[] = {
  2403     normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2404 };
  2405 
  2406 /* Mask matches table, or table entry is zero */
  2407 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2408 
  2409 SDL_BlitFunc
  2410 SDL_CalculateBlitN(SDL_Surface * surface)
  2411 {
  2412     SDL_PixelFormat *srcfmt;
  2413     SDL_PixelFormat *dstfmt;
  2414     const struct blit_table *table;
  2415     int which;
  2416     SDL_BlitFunc blitfun;
  2417 
  2418     /* Set up data for choosing the blit */
  2419     srcfmt = surface->format;
  2420     dstfmt = surface->map->dst->format;
  2421 
  2422     /* We don't support destinations less than 8-bits */
  2423     if (dstfmt->BitsPerPixel < 8) {
  2424         return (NULL);
  2425     }
  2426 
  2427     switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
  2428     case 0:
  2429         blitfun = NULL;
  2430         if (dstfmt->BitsPerPixel == 8) {
  2431             /* We assume 8-bit destinations are palettized */
  2432             if ((srcfmt->BytesPerPixel == 4) &&
  2433                 (srcfmt->Rmask == 0x00FF0000) &&
  2434                 (srcfmt->Gmask == 0x0000FF00) &&
  2435                 (srcfmt->Bmask == 0x000000FF)) {
  2436                 if (surface->map->info.table) {
  2437                     blitfun = Blit_RGB888_index8_map;
  2438                 } else {
  2439                     blitfun = Blit_RGB888_index8;
  2440                 }
  2441             } else {
  2442                 blitfun = BlitNto1;
  2443             }
  2444         } else {
  2445             /* Now the meat, choose the blitter we want */
  2446             int a_need = NO_ALPHA;
  2447             if (dstfmt->Amask)
  2448                 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2449             table = normal_blit[srcfmt->BytesPerPixel - 1];
  2450             for (which = 0; table[which].dstbpp; ++which) {
  2451                 if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2452                     MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2453                     MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2454                     MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2455                     MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2456                     MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2457                     dstfmt->BytesPerPixel == table[which].dstbpp &&
  2458                     (a_need & table[which].alpha) == a_need &&
  2459                     ((table[which].blit_features & GetBlitFeatures()) ==
  2460                      table[which].blit_features))
  2461                     break;
  2462             }
  2463             blitfun = table[which].blitfunc;
  2464 
  2465             if (blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2466                 /* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2467                 if (srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4
  2468                     && srcfmt->Rmask == dstfmt->Rmask
  2469                     && srcfmt->Gmask == dstfmt->Gmask
  2470                     && srcfmt->Bmask == dstfmt->Bmask) {
  2471                     blitfun = Blit4to4MaskAlpha;
  2472                 } else if (a_need == COPY_ALPHA) {
  2473                     blitfun = BlitNtoNCopyAlpha;
  2474                 }
  2475             }
  2476         }
  2477         return (blitfun);
  2478 
  2479     case SDL_COPY_COLORKEY:
  2480         /* colorkey blit: Here we don't have too many options, mostly
  2481            because RLE is the preferred fast way to deal with this.
  2482            If a particular case turns out to be useful we'll add it. */
  2483 
  2484         if (srcfmt->BytesPerPixel == 2 && surface->map->identity)
  2485             return Blit2to2Key;
  2486         else if (dstfmt->BytesPerPixel == 1)
  2487             return BlitNto1Key;
  2488         else {
  2489 #if SDL_ALTIVEC_BLITTERS
  2490             if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
  2491                 && SDL_HasAltiVec()) {
  2492                 return Blit32to32KeyAltivec;
  2493             } else
  2494 #endif
  2495             if (srcfmt->Amask && dstfmt->Amask) {
  2496                 return BlitNtoNKeyCopyAlpha;
  2497             } else {
  2498                 return BlitNtoNKey;
  2499             }
  2500         }
  2501     }
  2502 
  2503     return NULL;
  2504 }
  2505 
  2506 /* vi: set ts=4 sw=4 expandtab: */