src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Sat, 18 Aug 2007 05:39:09 +0000
changeset 2267 c785543d1843
parent 2262 bee005ace1bf
child 2698 e1da92da346c
permissions -rw-r--r--
Okay, still some bugs, but everything builds again...
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #include "SDL_video.h"
    25 #include "SDL_endian.h"
    26 #include "SDL_cpuinfo.h"
    27 #include "SDL_blit.h"
    28 
    29 /* Functions to blit from N-bit surfaces to other surfaces */
    30 
    31 #if SDL_ALTIVEC_BLITTERS
    32 #if __MWERKS__
    33 #pragma altivec_model on
    34 #endif
    35 #ifdef HAVE_ALTIVEC_H
    36 #include <altivec.h>
    37 #endif
    38 #define assert(X)
    39 #ifdef __MACOSX__
    40 #include <sys/sysctl.h>
    41 static size_t
    42 GetL3CacheSize(void)
    43 {
    44     const char key[] = "hw.l3cachesize";
    45     u_int64_t result = 0;
    46     size_t typeSize = sizeof(result);
    47 
    48 
    49     int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
    50     if (0 != err)
    51         return 0;
    52 
    53     return result;
    54 }
    55 #else
    56 static size_t
    57 GetL3CacheSize(void)
    58 {
    59     /* XXX: Just guess G4 */
    60     return 2097152;
    61 }
    62 #endif /* __MACOSX__ */
    63 
    64 #if (defined(__MACOSX__) && (__GNUC__ < 4))
    65 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    66         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    67 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    68         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    69 #else
    70 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    71         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    72 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    73         (vector unsigned short) { a,b,c,d,e,f,g,h }
    74 #endif
    75 
    76 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    77 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    78                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    79                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    80                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    81                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    82 
    83 #define MAKE8888(dstfmt, r, g, b, a)  \
    84     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    85       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    86       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    87       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    88 
    89 /*
    90  * Data Stream Touch...Altivec cache prefetching.
    91  *
    92  *  Don't use this on a G5...however, the speed boost is very significant
    93  *   on a G4.
    94  */
    95 #define DST_CHAN_SRC 1
    96 #define DST_CHAN_DEST 2
    97 
    98 /* macro to set DST control word value... */
    99 #define DST_CTRL(size, count, stride) \
   100     (((size) << 24) | ((count) << 16) | (stride))
   101 
   102 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
   103     ? vec_lvsl(0, src) \
   104     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   105 
   106 /* Calculate the permute vector used for 32->32 swizzling */
   107 static vector unsigned char
   108 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
   109 {
   110     /*
   111      * We have to assume that the bits that aren't used by other
   112      *  colors is alpha, and it's one complete byte, since some formats
   113      *  leave alpha with a zero mask, but we should still swizzle the bits.
   114      */
   115     /* ARGB */
   116     const static struct SDL_PixelFormat default_pixel_format = {
   117         NULL, 0, 0,
   118         0, 0, 0, 0,
   119         16, 8, 0, 24,
   120         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   121         0, 0
   122     };
   123     if (!srcfmt) {
   124         srcfmt = &default_pixel_format;
   125     }
   126     if (!dstfmt) {
   127         dstfmt = &default_pixel_format;
   128     }
   129     const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
   130                                                        0x04, 0x04, 0x04, 0x04,
   131                                                        0x08, 0x08, 0x08, 0x08,
   132                                                        0x0C, 0x0C, 0x0C,
   133                                                        0x0C);
   134     vector unsigned char vswiz;
   135     vector unsigned int srcvec;
   136 #define RESHIFT(X) (3 - ((X) >> 3))
   137     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   138     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   139     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   140     Uint32 amask;
   141     /* Use zero for alpha if either surface doesn't have alpha */
   142     if (dstfmt->Amask) {
   143         amask =
   144             ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->
   145                                                                    Ashift);
   146     } else {
   147         amask =
   148             0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
   149                           0xFFFFFFFF);
   150     }
   151 #undef RESHIFT
   152     ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
   153     vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
   154     return (vswiz);
   155 }
   156 
   157 static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
   158 static void
   159 Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
   160 {
   161     int height = info->dst_h;
   162     Uint8 *src = (Uint8 *) info->src;
   163     int srcskip = info->src_skip;
   164     Uint8 *dst = (Uint8 *) info->dst;
   165     int dstskip = info->dst_skip;
   166     SDL_PixelFormat *srcfmt = info->src;
   167     vector unsigned char valpha = vec_splat_u8(0);
   168     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   169     vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
   170                                                     0x00, 0x0a, 0x00, 0x0e,
   171                                                     0x00, 0x12, 0x00, 0x16,
   172                                                     0x00, 0x1a, 0x00, 0x1e);
   173     vector unsigned short v1 = vec_splat_u16(1);
   174     vector unsigned short v3 = vec_splat_u16(3);
   175     vector unsigned short v3f =
   176         VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
   177                           0x003f, 0x003f, 0x003f, 0x003f);
   178     vector unsigned short vfc =
   179         VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
   180                           0x00fc, 0x00fc, 0x00fc, 0x00fc);
   181     vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
   182     vf800 = vec_sl(vf800, vec_splat_u16(8));
   183 
   184     while (height--) {
   185         vector unsigned char valigner;
   186         vector unsigned char voverflow;
   187         vector unsigned char vsrc;
   188 
   189         int width = info->dst_w;
   190         int extrawidth;
   191 
   192         /* do scalar until we can align... */
   193 #define ONE_PIXEL_BLEND(condition, widthvar) \
   194         while (condition) { \
   195             Uint32 Pixel; \
   196             unsigned sR, sG, sB, sA; \
   197             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   198                           sR, sG, sB, sA); \
   199             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   200                                 ((sG << 3) & 0x000007E0) | \
   201                                 ((sB >> 3) & 0x0000001F)); \
   202             dst += 2; \
   203             src += 4; \
   204             widthvar--; \
   205         }
   206 
   207         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   208 
   209         /* After all that work, here's the vector part! */
   210         extrawidth = (width % 8);       /* trailing unaligned stores */
   211         width -= extrawidth;
   212         vsrc = vec_ld(0, src);
   213         valigner = VEC_ALIGNER(src);
   214 
   215         while (width) {
   216             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   217             vector unsigned int vsrc1, vsrc2;
   218             vector unsigned char vdst;
   219 
   220             voverflow = vec_ld(15, src);
   221             vsrc = vec_perm(vsrc, voverflow, valigner);
   222             vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   223             src += 16;
   224             vsrc = voverflow;
   225             voverflow = vec_ld(15, src);
   226             vsrc = vec_perm(vsrc, voverflow, valigner);
   227             vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   228             /* 1555 */
   229             vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
   230             vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
   231             vgpixel = vec_and(vgpixel, vfc);
   232             vgpixel = vec_sl(vgpixel, v3);
   233             vrpixel = vec_sl(vpixel, v1);
   234             vrpixel = vec_and(vrpixel, vf800);
   235             vbpixel = vec_and(vpixel, v3f);
   236             vdst =
   237                 vec_or((vector unsigned char) vrpixel,
   238                        (vector unsigned char) vgpixel);
   239             /* 565 */
   240             vdst = vec_or(vdst, (vector unsigned char) vbpixel);
   241             vec_st(vdst, 0, dst);
   242 
   243             width -= 8;
   244             src += 16;
   245             dst += 16;
   246             vsrc = voverflow;
   247         }
   248 
   249         assert(width == 0);
   250 
   251         /* do scalar until we can align... */
   252         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   253 #undef ONE_PIXEL_BLEND
   254 
   255         src += srcskip;         /* move to next row, accounting for pitch. */
   256         dst += dstskip;
   257     }
   258 
   259 
   260 }
   261 
   262 static void
   263 Blit_RGB565_32Altivec(SDL_BlitInfo * info)
   264 {
   265     int height = info->dst_h;
   266     Uint8 *src = (Uint8 *) info->src;
   267     int srcskip = info->src_skip;
   268     Uint8 *dst = (Uint8 *) info->dst;
   269     int dstskip = info->dst_skip;
   270     SDL_PixelFormat *srcfmt = info->src;
   271     SDL_PixelFormat *dstfmt = info->dst;
   272     unsigned alpha;
   273     vector unsigned char valpha;
   274     vector unsigned char vpermute;
   275     vector unsigned short vf800;
   276     vector unsigned int v8 = vec_splat_u32(8);
   277     vector unsigned int v16 = vec_add(v8, v8);
   278     vector unsigned short v2 = vec_splat_u16(2);
   279     vector unsigned short v3 = vec_splat_u16(3);
   280     /* 
   281        0x10 - 0x1f is the alpha
   282        0x00 - 0x0e evens are the red
   283        0x01 - 0x0f odds are zero
   284      */
   285     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   286                                                        0x10, 0x02, 0x01, 0x01,
   287                                                        0x10, 0x04, 0x01, 0x01,
   288                                                        0x10, 0x06, 0x01,
   289                                                        0x01);
   290     vector unsigned char vredalpha2 =
   291         (vector unsigned
   292          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   293         );
   294     /*
   295        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   296        0x11 - 0x0f odds are blue
   297      */
   298     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   299                                                    0x04, 0x05, 0x06, 0x13,
   300                                                    0x08, 0x09, 0x0a, 0x15,
   301                                                    0x0c, 0x0d, 0x0e, 0x17);
   302     vector unsigned char vblue2 =
   303         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   304         );
   305     /*
   306        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   307        0x10 - 0x0e evens are green
   308      */
   309     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   310                                                     0x04, 0x05, 0x12, 0x07,
   311                                                     0x08, 0x09, 0x14, 0x0b,
   312                                                     0x0c, 0x0d, 0x16, 0x0f);
   313     vector unsigned char vgreen2 =
   314         (vector unsigned
   315          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   316         );
   317 
   318 
   319     assert(srcfmt->BytesPerPixel == 2);
   320     assert(dstfmt->BytesPerPixel == 4);
   321 
   322     vf800 = (vector unsigned short) vec_splat_u8(-7);
   323     vf800 = vec_sl(vf800, vec_splat_u16(8));
   324 
   325     if (dstfmt->Amask && info->a) {
   326         ((unsigned char *) &valpha)[0] = alpha = info->a;
   327         valpha = vec_splat(valpha, 0);
   328     } else {
   329         alpha = 0;
   330         valpha = vec_splat_u8(0);
   331     }
   332 
   333     vpermute = calc_swizzle32(NULL, dstfmt);
   334     while (height--) {
   335         vector unsigned char valigner;
   336         vector unsigned char voverflow;
   337         vector unsigned char vsrc;
   338 
   339         int width = info->dst_w;
   340         int extrawidth;
   341 
   342         /* do scalar until we can align... */
   343 #define ONE_PIXEL_BLEND(condition, widthvar) \
   344         while (condition) { \
   345             unsigned sR, sG, sB; \
   346             unsigned short Pixel = *((unsigned short *)src); \
   347             sR = (Pixel >> 8) & 0xf8; \
   348             sG = (Pixel >> 3) & 0xfc; \
   349             sB = (Pixel << 3) & 0xf8; \
   350             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   351             src += 2; \
   352             dst += 4; \
   353             widthvar--; \
   354         }
   355         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   356 
   357         /* After all that work, here's the vector part! */
   358         extrawidth = (width % 8);       /* trailing unaligned stores */
   359         width -= extrawidth;
   360         vsrc = vec_ld(0, src);
   361         valigner = VEC_ALIGNER(src);
   362 
   363         while (width) {
   364             vector unsigned short vR, vG, vB;
   365             vector unsigned char vdst1, vdst2;
   366 
   367             voverflow = vec_ld(15, src);
   368             vsrc = vec_perm(vsrc, voverflow, valigner);
   369 
   370             vR = vec_and((vector unsigned short) vsrc, vf800);
   371             vB = vec_sl((vector unsigned short) vsrc, v3);
   372             vG = vec_sl(vB, v2);
   373 
   374             vdst1 =
   375                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   376                                                 valpha, vredalpha1);
   377             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   378             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   379             vdst1 = vec_perm(vdst1, valpha, vpermute);
   380             vec_st(vdst1, 0, dst);
   381 
   382             vdst2 =
   383                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   384                                                 valpha, vredalpha2);
   385             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   386             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   387             vdst2 = vec_perm(vdst2, valpha, vpermute);
   388             vec_st(vdst2, 16, dst);
   389 
   390             width -= 8;
   391             dst += 32;
   392             src += 16;
   393             vsrc = voverflow;
   394         }
   395 
   396         assert(width == 0);
   397 
   398 
   399         /* do scalar until we can align... */
   400         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   401 #undef ONE_PIXEL_BLEND
   402 
   403         src += srcskip;         /* move to next row, accounting for pitch. */
   404         dst += dstskip;
   405     }
   406 
   407 }
   408 
   409 
   410 static void
   411 Blit_RGB555_32Altivec(SDL_BlitInfo * info)
   412 {
   413     int height = info->dst_h;
   414     Uint8 *src = (Uint8 *) info->src;
   415     int srcskip = info->src_skip;
   416     Uint8 *dst = (Uint8 *) info->dst;
   417     int dstskip = info->dst_skip;
   418     SDL_PixelFormat *srcfmt = info->src;
   419     SDL_PixelFormat *dstfmt = info->dst;
   420     unsigned alpha;
   421     vector unsigned char valpha;
   422     vector unsigned char vpermute;
   423     vector unsigned short vf800;
   424     vector unsigned int v8 = vec_splat_u32(8);
   425     vector unsigned int v16 = vec_add(v8, v8);
   426     vector unsigned short v1 = vec_splat_u16(1);
   427     vector unsigned short v3 = vec_splat_u16(3);
   428     /* 
   429        0x10 - 0x1f is the alpha
   430        0x00 - 0x0e evens are the red
   431        0x01 - 0x0f odds are zero
   432      */
   433     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   434                                                        0x10, 0x02, 0x01, 0x01,
   435                                                        0x10, 0x04, 0x01, 0x01,
   436                                                        0x10, 0x06, 0x01,
   437                                                        0x01);
   438     vector unsigned char vredalpha2 =
   439         (vector unsigned
   440          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   441         );
   442     /*
   443        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   444        0x11 - 0x0f odds are blue
   445      */
   446     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   447                                                    0x04, 0x05, 0x06, 0x13,
   448                                                    0x08, 0x09, 0x0a, 0x15,
   449                                                    0x0c, 0x0d, 0x0e, 0x17);
   450     vector unsigned char vblue2 =
   451         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   452         );
   453     /*
   454        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   455        0x10 - 0x0e evens are green
   456      */
   457     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   458                                                     0x04, 0x05, 0x12, 0x07,
   459                                                     0x08, 0x09, 0x14, 0x0b,
   460                                                     0x0c, 0x0d, 0x16, 0x0f);
   461     vector unsigned char vgreen2 =
   462         (vector unsigned
   463          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   464         );
   465 
   466 
   467     assert(srcfmt->BytesPerPixel == 2);
   468     assert(dstfmt->BytesPerPixel == 4);
   469 
   470     vf800 = (vector unsigned short) vec_splat_u8(-7);
   471     vf800 = vec_sl(vf800, vec_splat_u16(8));
   472 
   473     if (dstfmt->Amask && info->a) {
   474         ((unsigned char *) &valpha)[0] = alpha = info->a;
   475         valpha = vec_splat(valpha, 0);
   476     } else {
   477         alpha = 0;
   478         valpha = vec_splat_u8(0);
   479     }
   480 
   481     vpermute = calc_swizzle32(NULL, dstfmt);
   482     while (height--) {
   483         vector unsigned char valigner;
   484         vector unsigned char voverflow;
   485         vector unsigned char vsrc;
   486 
   487         int width = info->dst_w;
   488         int extrawidth;
   489 
   490         /* do scalar until we can align... */
   491 #define ONE_PIXEL_BLEND(condition, widthvar) \
   492         while (condition) { \
   493             unsigned sR, sG, sB; \
   494             unsigned short Pixel = *((unsigned short *)src); \
   495             sR = (Pixel >> 7) & 0xf8; \
   496             sG = (Pixel >> 2) & 0xf8; \
   497             sB = (Pixel << 3) & 0xf8; \
   498             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   499             src += 2; \
   500             dst += 4; \
   501             widthvar--; \
   502         }
   503         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   504 
   505         /* After all that work, here's the vector part! */
   506         extrawidth = (width % 8);       /* trailing unaligned stores */
   507         width -= extrawidth;
   508         vsrc = vec_ld(0, src);
   509         valigner = VEC_ALIGNER(src);
   510 
   511         while (width) {
   512             vector unsigned short vR, vG, vB;
   513             vector unsigned char vdst1, vdst2;
   514 
   515             voverflow = vec_ld(15, src);
   516             vsrc = vec_perm(vsrc, voverflow, valigner);
   517 
   518             vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
   519             vB = vec_sl((vector unsigned short) vsrc, v3);
   520             vG = vec_sl(vB, v3);
   521 
   522             vdst1 =
   523                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   524                                                 valpha, vredalpha1);
   525             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   526             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   527             vdst1 = vec_perm(vdst1, valpha, vpermute);
   528             vec_st(vdst1, 0, dst);
   529 
   530             vdst2 =
   531                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   532                                                 valpha, vredalpha2);
   533             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   534             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   535             vdst2 = vec_perm(vdst2, valpha, vpermute);
   536             vec_st(vdst2, 16, dst);
   537 
   538             width -= 8;
   539             dst += 32;
   540             src += 16;
   541             vsrc = voverflow;
   542         }
   543 
   544         assert(width == 0);
   545 
   546 
   547         /* do scalar until we can align... */
   548         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   549 #undef ONE_PIXEL_BLEND
   550 
   551         src += srcskip;         /* move to next row, accounting for pitch. */
   552         dst += dstskip;
   553     }
   554 
   555 }
   556 
   557 static void BlitNtoNKey(SDL_BlitInfo * info);
   558 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
   559 static void
   560 Blit32to32KeyAltivec(SDL_BlitInfo * info)
   561 {
   562     int height = info->dst_h;
   563     Uint32 *srcp = (Uint32 *) info->src;
   564     int srcskip = info->src_skip;
   565     Uint32 *dstp = (Uint32 *) info->dst;
   566     int dstskip = info->dst_skip;
   567     SDL_PixelFormat *srcfmt = info->src;
   568     int srcbpp = srcfmt->BytesPerPixel;
   569     SDL_PixelFormat *dstfmt = info->dst;
   570     int dstbpp = dstfmt->BytesPerPixel;
   571     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   572     unsigned alpha = dstfmt->Amask ? info->a : 0;
   573     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   574     Uint32 ckey = info->colorkey;
   575     vector unsigned int valpha;
   576     vector unsigned char vpermute;
   577     vector unsigned char vzero;
   578     vector unsigned int vckey;
   579     vector unsigned int vrgbmask;
   580     vpermute = calc_swizzle32(srcfmt, dstfmt);
   581     if (info->dst_w < 16) {
   582         if (copy_alpha) {
   583             BlitNtoNKeyCopyAlpha(info);
   584         } else {
   585             BlitNtoNKey(info);
   586         }
   587         return;
   588     }
   589     vzero = vec_splat_u8(0);
   590     if (alpha) {
   591         ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
   592         valpha =
   593             (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
   594     } else {
   595         valpha = (vector unsigned int) vzero;
   596     }
   597     ckey &= rgbmask;
   598     ((unsigned int *) (char *) &vckey)[0] = ckey;
   599     vckey = vec_splat(vckey, 0);
   600     ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
   601     vrgbmask = vec_splat(vrgbmask, 0);
   602 
   603     while (height--) {
   604 #define ONE_PIXEL_BLEND(condition, widthvar) \
   605         if (copy_alpha) { \
   606             while (condition) { \
   607                 Uint32 Pixel; \
   608                 unsigned sR, sG, sB, sA; \
   609                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   610                           sR, sG, sB, sA); \
   611                 if ( (Pixel & rgbmask) != ckey ) { \
   612                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   613                             sR, sG, sB, sA); \
   614                 } \
   615                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   616                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   617                 widthvar--; \
   618             } \
   619         } else { \
   620             while (condition) { \
   621                 Uint32 Pixel; \
   622                 unsigned sR, sG, sB; \
   623                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   624                 if ( Pixel != ckey ) { \
   625                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   626                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   627                               sR, sG, sB, alpha); \
   628                 } \
   629                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   630                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   631                 widthvar--; \
   632             } \
   633         }
   634         int width = info->dst_w;
   635         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   636         assert(width > 0);
   637         if (width > 0) {
   638             int extrawidth = (width % 4);
   639             vector unsigned char valigner = VEC_ALIGNER(srcp);
   640             vector unsigned int vs = vec_ld(0, srcp);
   641             width -= extrawidth;
   642             assert(width >= 4);
   643             while (width) {
   644                 vector unsigned char vsel;
   645                 vector unsigned int vd;
   646                 vector unsigned int voverflow = vec_ld(15, srcp);
   647                 /* load the source vec */
   648                 vs = vec_perm(vs, voverflow, valigner);
   649                 /* vsel is set for items that match the key */
   650                 vsel = (vector unsigned char) vec_and(vs, vrgbmask);
   651                 vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
   652                 /* permute the src vec to the dest format */
   653                 vs = vec_perm(vs, valpha, vpermute);
   654                 /* load the destination vec */
   655                 vd = vec_ld(0, dstp);
   656                 /* select the source and dest into vs */
   657                 vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
   658                                                    (vector unsigned char) vd,
   659                                                    vsel);
   660 
   661                 vec_st(vd, 0, dstp);
   662                 srcp += 4;
   663                 width -= 4;
   664                 dstp += 4;
   665                 vs = voverflow;
   666             }
   667             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   668 #undef ONE_PIXEL_BLEND
   669             srcp += srcskip >> 2;
   670             dstp += dstskip >> 2;
   671         }
   672     }
   673 }
   674 
   675 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   676 /* Use this on a G5 */
   677 static void
   678 ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
   679 {
   680     int height = info->dst_h;
   681     Uint32 *src = (Uint32 *) info->src;
   682     int srcskip = info->src_skip;
   683     Uint32 *dst = (Uint32 *) info->dst;
   684     int dstskip = info->dst_skip;
   685     SDL_PixelFormat *srcfmt = info->src;
   686     SDL_PixelFormat *dstfmt = info->dst;
   687     vector unsigned int vzero = vec_splat_u32(0);
   688     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   689     if (dstfmt->Amask && !srcfmt->Amask) {
   690         if (info->a) {
   691             vector unsigned char valpha;
   692             ((unsigned char *) &valpha)[0] = info->a;
   693             vzero = (vector unsigned int) vec_splat(valpha, 0);
   694         }
   695     }
   696 
   697     assert(srcfmt->BytesPerPixel == 4);
   698     assert(dstfmt->BytesPerPixel == 4);
   699 
   700     while (height--) {
   701         vector unsigned char valigner;
   702         vector unsigned int vbits;
   703         vector unsigned int voverflow;
   704         Uint32 bits;
   705         Uint8 r, g, b, a;
   706 
   707         int width = info->dst_w;
   708         int extrawidth;
   709 
   710         /* do scalar until we can align... */
   711         while ((UNALIGNED_PTR(dst)) && (width)) {
   712             bits = *(src++);
   713             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   714             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   715             width--;
   716         }
   717 
   718         /* After all that work, here's the vector part! */
   719         extrawidth = (width % 4);
   720         width -= extrawidth;
   721         valigner = VEC_ALIGNER(src);
   722         vbits = vec_ld(0, src);
   723 
   724         while (width) {
   725             voverflow = vec_ld(15, src);
   726             src += 4;
   727             width -= 4;
   728             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   729             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   730             vec_st(vbits, 0, dst);      /* store it back out. */
   731             dst += 4;
   732             vbits = voverflow;
   733         }
   734 
   735         assert(width == 0);
   736 
   737         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   738         while (extrawidth) {
   739             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   740             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   741             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   742             extrawidth--;
   743         }
   744 
   745         src += srcskip >> 2;    /* move to next row, accounting for pitch. */
   746         dst += dstskip >> 2;
   747     }
   748 
   749 }
   750 
   751 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   752 /* Use this on a G4 */
   753 static void
   754 ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
   755 {
   756     const int scalar_dst_lead = sizeof(Uint32) * 4;
   757     const int vector_dst_lead = sizeof(Uint32) * 16;
   758 
   759     int height = info->dst_h;
   760     Uint32 *src = (Uint32 *) info->src;
   761     int srcskip = info->src_skip;
   762     Uint32 *dst = (Uint32 *) info->dst;
   763     int dstskip = info->dst_skip;
   764     SDL_PixelFormat *srcfmt = info->src;
   765     SDL_PixelFormat *dstfmt = info->dst;
   766     vector unsigned int vzero = vec_splat_u32(0);
   767     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   768     if (dstfmt->Amask && !srcfmt->Amask) {
   769         if (info->a) {
   770             vector unsigned char valpha;
   771             ((unsigned char *) &valpha)[0] = info->a;
   772             vzero = (vector unsigned int) vec_splat(valpha, 0);
   773         }
   774     }
   775 
   776     assert(srcfmt->BytesPerPixel == 4);
   777     assert(dstfmt->BytesPerPixel == 4);
   778 
   779     while (height--) {
   780         vector unsigned char valigner;
   781         vector unsigned int vbits;
   782         vector unsigned int voverflow;
   783         Uint32 bits;
   784         Uint8 r, g, b, a;
   785 
   786         int width = info->dst_w;
   787         int extrawidth;
   788 
   789         /* do scalar until we can align... */
   790         while ((UNALIGNED_PTR(dst)) && (width)) {
   791             vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   792                      DST_CHAN_SRC);
   793             vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   794                       DST_CHAN_DEST);
   795             bits = *(src++);
   796             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   797             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   798             width--;
   799         }
   800 
   801         /* After all that work, here's the vector part! */
   802         extrawidth = (width % 4);
   803         width -= extrawidth;
   804         valigner = VEC_ALIGNER(src);
   805         vbits = vec_ld(0, src);
   806 
   807         while (width) {
   808             vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
   809                      DST_CHAN_SRC);
   810             vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
   811                       DST_CHAN_DEST);
   812             voverflow = vec_ld(15, src);
   813             src += 4;
   814             width -= 4;
   815             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   816             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   817             vec_st(vbits, 0, dst);      /* store it back out. */
   818             dst += 4;
   819             vbits = voverflow;
   820         }
   821 
   822         assert(width == 0);
   823 
   824         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   825         while (extrawidth) {
   826             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   827             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   828             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   829             extrawidth--;
   830         }
   831 
   832         src += srcskip >> 2;    /* move to next row, accounting for pitch. */
   833         dst += dstskip >> 2;
   834     }
   835 
   836     vec_dss(DST_CHAN_SRC);
   837     vec_dss(DST_CHAN_DEST);
   838 }
   839 
   840 static Uint32
   841 GetBlitFeatures(void)
   842 {
   843     static Uint32 features = 0xffffffff;
   844     if (features == 0xffffffff) {
   845         /* Provide an override for testing .. */
   846         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   847         if (override) {
   848             features = 0;
   849             SDL_sscanf(override, "%u", &features);
   850         } else {
   851             features = (0
   852                         /* Feature 1 is has-MMX */
   853                         | ((SDL_HasMMX())? 1 : 0)
   854                         /* Feature 2 is has-AltiVec */
   855                         | ((SDL_HasAltiVec())? 2 : 0)
   856                         /* Feature 4 is dont-use-prefetch */
   857                         /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   858                         | ((GetL3CacheSize() == 0) ? 4 : 0)
   859                 );
   860         }
   861     }
   862     return features;
   863 }
   864 
   865 #if __MWERKS__
   866 #pragma altivec_model off
   867 #endif
   868 #else
   869 /* Feature 1 is has-MMX */
   870 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   871 #endif
   872 
   873 /* This is now endian dependent */
   874 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   875 #define HI	1
   876 #define LO	0
   877 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
   878 #define HI	0
   879 #define LO	1
   880 #endif
   881 
   882 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   883 #define RGB888_RGB332(dst, src) { \
   884 	dst = (Uint8)((((src)&0x00E00000)>>16)| \
   885 	              (((src)&0x0000E000)>>11)| \
   886 	              (((src)&0x000000C0)>>6)); \
   887 }
   888 static void
   889 Blit_RGB888_index8(SDL_BlitInfo * info)
   890 {
   891 #ifndef USE_DUFFS_LOOP
   892     int c;
   893 #endif
   894     int width, height;
   895     Uint32 *src;
   896     const Uint8 *map;
   897     Uint8 *dst;
   898     int srcskip, dstskip;
   899 
   900     /* Set up some basic variables */
   901     width = info->dst_w;
   902     height = info->dst_h;
   903     src = (Uint32 *) info->src;
   904     srcskip = info->src_skip / 4;
   905     dst = info->dst;
   906     dstskip = info->dst_skip;
   907     map = info->table;
   908 
   909     if (map == NULL) {
   910         while (height--) {
   911 #ifdef USE_DUFFS_LOOP
   912 			/* *INDENT-OFF* */
   913 			DUFFS_LOOP(
   914 				RGB888_RGB332(*dst++, *src);
   915 			, width);
   916 			/* *INDENT-ON* */
   917 #else
   918             for (c = width / 4; c; --c) {
   919                 /* Pack RGB into 8bit pixel */
   920                 ++src;
   921                 RGB888_RGB332(*dst++, *src);
   922                 ++src;
   923                 RGB888_RGB332(*dst++, *src);
   924                 ++src;
   925                 RGB888_RGB332(*dst++, *src);
   926                 ++src;
   927             }
   928             switch (width & 3) {
   929             case 3:
   930                 RGB888_RGB332(*dst++, *src);
   931                 ++src;
   932             case 2:
   933                 RGB888_RGB332(*dst++, *src);
   934                 ++src;
   935             case 1:
   936                 RGB888_RGB332(*dst++, *src);
   937                 ++src;
   938             }
   939 #endif /* USE_DUFFS_LOOP */
   940             src += srcskip;
   941             dst += dstskip;
   942         }
   943     } else {
   944         int Pixel;
   945 
   946         while (height--) {
   947 #ifdef USE_DUFFS_LOOP
   948 			/* *INDENT-OFF* */
   949 			DUFFS_LOOP(
   950 				RGB888_RGB332(Pixel, *src);
   951 				*dst++ = map[Pixel];
   952 				++src;
   953 			, width);
   954 			/* *INDENT-ON* */
   955 #else
   956             for (c = width / 4; c; --c) {
   957                 /* Pack RGB into 8bit pixel */
   958                 RGB888_RGB332(Pixel, *src);
   959                 *dst++ = map[Pixel];
   960                 ++src;
   961                 RGB888_RGB332(Pixel, *src);
   962                 *dst++ = map[Pixel];
   963                 ++src;
   964                 RGB888_RGB332(Pixel, *src);
   965                 *dst++ = map[Pixel];
   966                 ++src;
   967                 RGB888_RGB332(Pixel, *src);
   968                 *dst++ = map[Pixel];
   969                 ++src;
   970             }
   971             switch (width & 3) {
   972             case 3:
   973                 RGB888_RGB332(Pixel, *src);
   974                 *dst++ = map[Pixel];
   975                 ++src;
   976             case 2:
   977                 RGB888_RGB332(Pixel, *src);
   978                 *dst++ = map[Pixel];
   979                 ++src;
   980             case 1:
   981                 RGB888_RGB332(Pixel, *src);
   982                 *dst++ = map[Pixel];
   983                 ++src;
   984             }
   985 #endif /* USE_DUFFS_LOOP */
   986             src += srcskip;
   987             dst += dstskip;
   988         }
   989     }
   990 }
   991 
   992 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   993 #define RGB888_RGB555(dst, src) { \
   994 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
   995 	                            (((*src)&0x0000F800)>>6)| \
   996 	                            (((*src)&0x000000F8)>>3)); \
   997 }
   998 #define RGB888_RGB555_TWO(dst, src) { \
   999 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
  1000 	                     (((src[HI])&0x0000F800)>>6)| \
  1001 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1002 	                     (((src[LO])&0x00F80000)>>9)| \
  1003 	                     (((src[LO])&0x0000F800)>>6)| \
  1004 	                     (((src[LO])&0x000000F8)>>3); \
  1005 }
  1006 static void
  1007 Blit_RGB888_RGB555(SDL_BlitInfo * info)
  1008 {
  1009 #ifndef USE_DUFFS_LOOP
  1010     int c;
  1011 #endif
  1012     int width, height;
  1013     Uint32 *src;
  1014     Uint16 *dst;
  1015     int srcskip, dstskip;
  1016 
  1017     /* Set up some basic variables */
  1018     width = info->dst_w;
  1019     height = info->dst_h;
  1020     src = (Uint32 *) info->src;
  1021     srcskip = info->src_skip / 4;
  1022     dst = (Uint16 *) info->dst;
  1023     dstskip = info->dst_skip / 2;
  1024 
  1025 #ifdef USE_DUFFS_LOOP
  1026     while (height--) {
  1027 		/* *INDENT-OFF* */
  1028 		DUFFS_LOOP(
  1029 			RGB888_RGB555(dst, src);
  1030 			++src;
  1031 			++dst;
  1032 		, width);
  1033 		/* *INDENT-ON* */
  1034         src += srcskip;
  1035         dst += dstskip;
  1036     }
  1037 #else
  1038     /* Memory align at 4-byte boundary, if necessary */
  1039     if ((long) dst & 0x03) {
  1040         /* Don't do anything if width is 0 */
  1041         if (width == 0) {
  1042             return;
  1043         }
  1044         --width;
  1045 
  1046         while (height--) {
  1047             /* Perform copy alignment */
  1048             RGB888_RGB555(dst, src);
  1049             ++src;
  1050             ++dst;
  1051 
  1052             /* Copy in 4 pixel chunks */
  1053             for (c = width / 4; c; --c) {
  1054                 RGB888_RGB555_TWO(dst, src);
  1055                 src += 2;
  1056                 dst += 2;
  1057                 RGB888_RGB555_TWO(dst, src);
  1058                 src += 2;
  1059                 dst += 2;
  1060             }
  1061             /* Get any leftovers */
  1062             switch (width & 3) {
  1063             case 3:
  1064                 RGB888_RGB555(dst, src);
  1065                 ++src;
  1066                 ++dst;
  1067             case 2:
  1068                 RGB888_RGB555_TWO(dst, src);
  1069                 src += 2;
  1070                 dst += 2;
  1071                 break;
  1072             case 1:
  1073                 RGB888_RGB555(dst, src);
  1074                 ++src;
  1075                 ++dst;
  1076                 break;
  1077             }
  1078             src += srcskip;
  1079             dst += dstskip;
  1080         }
  1081     } else {
  1082         while (height--) {
  1083             /* Copy in 4 pixel chunks */
  1084             for (c = width / 4; c; --c) {
  1085                 RGB888_RGB555_TWO(dst, src);
  1086                 src += 2;
  1087                 dst += 2;
  1088                 RGB888_RGB555_TWO(dst, src);
  1089                 src += 2;
  1090                 dst += 2;
  1091             }
  1092             /* Get any leftovers */
  1093             switch (width & 3) {
  1094             case 3:
  1095                 RGB888_RGB555(dst, src);
  1096                 ++src;
  1097                 ++dst;
  1098             case 2:
  1099                 RGB888_RGB555_TWO(dst, src);
  1100                 src += 2;
  1101                 dst += 2;
  1102                 break;
  1103             case 1:
  1104                 RGB888_RGB555(dst, src);
  1105                 ++src;
  1106                 ++dst;
  1107                 break;
  1108             }
  1109             src += srcskip;
  1110             dst += dstskip;
  1111         }
  1112     }
  1113 #endif /* USE_DUFFS_LOOP */
  1114 }
  1115 
  1116 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1117 #define RGB888_RGB565(dst, src) { \
  1118 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
  1119 	                            (((*src)&0x0000FC00)>>5)| \
  1120 	                            (((*src)&0x000000F8)>>3)); \
  1121 }
  1122 #define RGB888_RGB565_TWO(dst, src) { \
  1123 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1124 	                     (((src[HI])&0x0000FC00)>>5)| \
  1125 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1126 	                     (((src[LO])&0x00F80000)>>8)| \
  1127 	                     (((src[LO])&0x0000FC00)>>5)| \
  1128 	                     (((src[LO])&0x000000F8)>>3); \
  1129 }
  1130 static void
  1131 Blit_RGB888_RGB565(SDL_BlitInfo * info)
  1132 {
  1133 #ifndef USE_DUFFS_LOOP
  1134     int c;
  1135 #endif
  1136     int width, height;
  1137     Uint32 *src;
  1138     Uint16 *dst;
  1139     int srcskip, dstskip;
  1140 
  1141     /* Set up some basic variables */
  1142     width = info->dst_w;
  1143     height = info->dst_h;
  1144     src = (Uint32 *) info->src;
  1145     srcskip = info->src_skip / 4;
  1146     dst = (Uint16 *) info->dst;
  1147     dstskip = info->dst_skip / 2;
  1148 
  1149 #ifdef USE_DUFFS_LOOP
  1150     while (height--) {
  1151 		/* *INDENT-OFF* */
  1152 		DUFFS_LOOP(
  1153 			RGB888_RGB565(dst, src);
  1154 			++src;
  1155 			++dst;
  1156 		, width);
  1157 		/* *INDENT-ON* */
  1158         src += srcskip;
  1159         dst += dstskip;
  1160     }
  1161 #else
  1162     /* Memory align at 4-byte boundary, if necessary */
  1163     if ((long) dst & 0x03) {
  1164         /* Don't do anything if width is 0 */
  1165         if (width == 0) {
  1166             return;
  1167         }
  1168         --width;
  1169 
  1170         while (height--) {
  1171             /* Perform copy alignment */
  1172             RGB888_RGB565(dst, src);
  1173             ++src;
  1174             ++dst;
  1175 
  1176             /* Copy in 4 pixel chunks */
  1177             for (c = width / 4; c; --c) {
  1178                 RGB888_RGB565_TWO(dst, src);
  1179                 src += 2;
  1180                 dst += 2;
  1181                 RGB888_RGB565_TWO(dst, src);
  1182                 src += 2;
  1183                 dst += 2;
  1184             }
  1185             /* Get any leftovers */
  1186             switch (width & 3) {
  1187             case 3:
  1188                 RGB888_RGB565(dst, src);
  1189                 ++src;
  1190                 ++dst;
  1191             case 2:
  1192                 RGB888_RGB565_TWO(dst, src);
  1193                 src += 2;
  1194                 dst += 2;
  1195                 break;
  1196             case 1:
  1197                 RGB888_RGB565(dst, src);
  1198                 ++src;
  1199                 ++dst;
  1200                 break;
  1201             }
  1202             src += srcskip;
  1203             dst += dstskip;
  1204         }
  1205     } else {
  1206         while (height--) {
  1207             /* Copy in 4 pixel chunks */
  1208             for (c = width / 4; c; --c) {
  1209                 RGB888_RGB565_TWO(dst, src);
  1210                 src += 2;
  1211                 dst += 2;
  1212                 RGB888_RGB565_TWO(dst, src);
  1213                 src += 2;
  1214                 dst += 2;
  1215             }
  1216             /* Get any leftovers */
  1217             switch (width & 3) {
  1218             case 3:
  1219                 RGB888_RGB565(dst, src);
  1220                 ++src;
  1221                 ++dst;
  1222             case 2:
  1223                 RGB888_RGB565_TWO(dst, src);
  1224                 src += 2;
  1225                 dst += 2;
  1226                 break;
  1227             case 1:
  1228                 RGB888_RGB565(dst, src);
  1229                 ++src;
  1230                 ++dst;
  1231                 break;
  1232             }
  1233             src += srcskip;
  1234             dst += dstskip;
  1235         }
  1236     }
  1237 #endif /* USE_DUFFS_LOOP */
  1238 }
  1239 
  1240 
  1241 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1242 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1243 static void
  1244 Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
  1245 {
  1246 #ifndef USE_DUFFS_LOOP
  1247     int c;
  1248 #endif
  1249     int width, height;
  1250     Uint8 *src;
  1251     Uint32 *dst;
  1252     int srcskip, dstskip;
  1253 
  1254     /* Set up some basic variables */
  1255     width = info->dst_w;
  1256     height = info->dst_h;
  1257     src = (Uint8 *) info->src;
  1258     srcskip = info->src_skip;
  1259     dst = (Uint32 *) info->dst;
  1260     dstskip = info->dst_skip / 4;
  1261 
  1262 #ifdef USE_DUFFS_LOOP
  1263     while (height--) {
  1264 		/* *INDENT-OFF* */
  1265 		DUFFS_LOOP(
  1266 		{
  1267 			*dst++ = RGB565_32(dst, src, map);
  1268 			src += 2;
  1269 		},
  1270 		width);
  1271 		/* *INDENT-ON* */
  1272         src += srcskip;
  1273         dst += dstskip;
  1274     }
  1275 #else
  1276     while (height--) {
  1277         /* Copy in 4 pixel chunks */
  1278         for (c = width / 4; c; --c) {
  1279             *dst++ = RGB565_32(dst, src, map);
  1280             src += 2;
  1281             *dst++ = RGB565_32(dst, src, map);
  1282             src += 2;
  1283             *dst++ = RGB565_32(dst, src, map);
  1284             src += 2;
  1285             *dst++ = RGB565_32(dst, src, map);
  1286             src += 2;
  1287         }
  1288         /* Get any leftovers */
  1289         switch (width & 3) {
  1290         case 3:
  1291             *dst++ = RGB565_32(dst, src, map);
  1292             src += 2;
  1293         case 2:
  1294             *dst++ = RGB565_32(dst, src, map);
  1295             src += 2;
  1296         case 1:
  1297             *dst++ = RGB565_32(dst, src, map);
  1298             src += 2;
  1299             break;
  1300         }
  1301         src += srcskip;
  1302         dst += dstskip;
  1303     }
  1304 #endif /* USE_DUFFS_LOOP */
  1305 }
  1306 
  1307 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1308 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1309     0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1310     0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1311     0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1312     0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1313     0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1314     0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1315     0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1316     0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1317     0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1318     0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1319     0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1320     0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1321     0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1322     0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1323     0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1324     0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1325     0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1326     0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1327     0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1328     0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1329     0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1330     0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1331     0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1332     0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1333     0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1334     0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1335     0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1336     0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1337     0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1338     0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1339     0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1340     0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1341     0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1342     0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1343     0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1344     0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1345     0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1346     0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1347     0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1348     0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1349     0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1350     0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1351     0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1352     0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1353     0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1354     0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1355     0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1356     0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1357     0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1358     0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1359     0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1360     0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1361     0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1362     0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1363     0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1364     0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1365     0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1366     0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1367     0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1368     0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1369     0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1370     0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1371     0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1372     0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1373     0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1374     0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1375     0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1376     0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1377     0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1378     0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1379     0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1380     0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1381     0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1382     0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1383     0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1384     0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1385     0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1386     0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1387     0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1388     0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1389     0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1390     0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1391     0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1392     0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1393     0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1394     0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1395     0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1396     0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1397     0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1398     0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1399     0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1400     0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1401     0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1402     0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1403     0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1404     0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1405     0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1406     0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1407     0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1408     0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1409     0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1410     0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1411     0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1412     0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1413     0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1414     0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1415     0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1416     0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1417     0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1418     0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1419     0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1420     0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1421     0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1422     0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1423     0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1424     0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1425     0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1426     0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1427     0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1428     0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1429     0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1430     0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1431     0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1432     0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1433     0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1434     0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1435     0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1436     0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1437 };
  1438 static void
  1439 Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
  1440 {
  1441     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1442 }
  1443 
  1444 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1445 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1446     0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1447     0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1448     0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1449     0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1450     0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1451     0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1452     0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1453     0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1454     0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1455     0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1456     0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1457     0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1458     0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1459     0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1460     0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1461     0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1462     0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1463     0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1464     0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1465     0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1466     0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1467     0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1468     0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1469     0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1470     0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1471     0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1472     0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1473     0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1474     0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1475     0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1476     0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1477     0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1478     0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1479     0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1480     0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1481     0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1482     0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1483     0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1484     0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1485     0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1486     0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1487     0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1488     0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1489     0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1490     0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1491     0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1492     0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1493     0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1494     0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1495     0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1496     0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1497     0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1498     0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1499     0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1500     0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1501     0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1502     0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1503     0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1504     0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1505     0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1506     0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1507     0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1508     0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1509     0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1510     0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1511     0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1512     0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1513     0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1514     0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1515     0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1516     0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1517     0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1518     0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1519     0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1520     0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1521     0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1522     0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1523     0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1524     0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1525     0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1526     0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1527     0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1528     0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1529     0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1530     0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1531     0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1532     0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1533     0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1534     0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1535     0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1536     0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1537     0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1538     0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1539     0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1540     0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1541     0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1542     0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1543     0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1544     0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1545     0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1546     0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1547     0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1548     0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1549     0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1550     0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1551     0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1552     0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1553     0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1554     0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1555     0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1556     0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1557     0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1558     0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1559     0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1560     0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1561     0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1562     0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1563     0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1564     0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1565     0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1566     0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1567     0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1568     0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1569     0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1570     0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1571     0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1572     0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1573     0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1574 };
  1575 static void
  1576 Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
  1577 {
  1578     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1579 }
  1580 
  1581 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1582 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1583     0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1584     0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1585     0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1586     0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1587     0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1588     0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1589     0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1590     0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1591     0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1592     0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1593     0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1594     0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1595     0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1596     0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1597     0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1598     0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1599     0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1600     0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1601     0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1602     0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1603     0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1604     0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1605     0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1606     0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1607     0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1608     0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1609     0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1610     0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1611     0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1612     0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1613     0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1614     0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1615     0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1616     0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1617     0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1618     0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1619     0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1620     0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1621     0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1622     0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1623     0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1624     0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1625     0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1626     0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1627     0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1628     0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1629     0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1630     0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1631     0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1632     0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1633     0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1634     0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1635     0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1636     0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1637     0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1638     0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1639     0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1640     0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1641     0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1642     0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1643     0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1644     0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1645     0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1646     0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1647     0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1648     0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1649     0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1650     0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1651     0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1652     0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1653     0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1654     0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1655     0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1656     0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1657     0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1658     0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1659     0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1660     0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1661     0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1662     0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1663     0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1664     0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1665     0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1666     0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1667     0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1668     0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1669     0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1670     0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1671     0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1672     0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1673     0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1674     0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1675     0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1676     0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1677     0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1678     0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1679     0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1680     0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1681     0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1682     0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1683     0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1684     0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1685     0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1686     0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1687     0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1688     0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1689     0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1690     0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1691     0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1692     0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1693     0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1694     0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1695     0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1696     0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1697     0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1698     0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1699     0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1700     0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1701     0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1702     0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1703     0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1704     0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1705     0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1706     0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1707     0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1708     0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1709     0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1710     0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1711 };
  1712 static void
  1713 Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
  1714 {
  1715     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1716 }
  1717 
  1718 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1719 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1720     0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1721     0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1722     0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1723     0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1724     0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1725     0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1726     0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1727     0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1728     0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1729     0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1730     0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1731     0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1732     0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1733     0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1734     0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1735     0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1736     0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1737     0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1738     0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1739     0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1740     0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1741     0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1742     0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1743     0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1744     0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1745     0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1746     0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1747     0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1748     0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1749     0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1750     0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1751     0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1752     0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1753     0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1754     0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1755     0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1756     0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1757     0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1758     0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1759     0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1760     0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1761     0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1762     0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1763     0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1764     0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1765     0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1766     0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1767     0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1768     0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1769     0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1770     0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1771     0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1772     0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1773     0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1774     0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1775     0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1776     0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1777     0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1778     0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1779     0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1780     0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1781     0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1782     0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1783     0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1784     0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1785     0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1786     0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1787     0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1788     0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1789     0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1790     0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1791     0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1792     0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1793     0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1794     0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1795     0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1796     0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1797     0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1798     0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1799     0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1800     0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1801     0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1802     0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1803     0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1804     0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1805     0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1806     0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1807     0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1808     0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1809     0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1810     0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1811     0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1812     0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1813     0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1814     0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1815     0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1816     0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1817     0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1818     0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1819     0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1820     0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1821     0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1822     0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1823     0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1824     0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1825     0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1826     0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1827     0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1828     0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1829     0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1830     0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1831     0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1832     0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1833     0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1834     0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1835     0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1836     0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1837     0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1838     0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1839     0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1840     0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1841     0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1842     0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1843     0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1844     0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1845     0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1846     0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1847     0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1848 };
  1849 static void
  1850 Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
  1851 {
  1852     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1853 }
  1854 
  1855 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1856 #ifndef RGB888_RGB332
  1857 #define RGB888_RGB332(dst, src) { \
  1858 	dst = (((src)&0x00E00000)>>16)| \
  1859 	      (((src)&0x0000E000)>>11)| \
  1860 	      (((src)&0x000000C0)>>6); \
  1861 }
  1862 #endif
  1863 static void
  1864 Blit_RGB888_index8_map(SDL_BlitInfo * info)
  1865 {
  1866 #ifndef USE_DUFFS_LOOP
  1867     int c;
  1868 #endif
  1869     int Pixel;
  1870     int width, height;
  1871     Uint32 *src;
  1872     const Uint8 *map;
  1873     Uint8 *dst;
  1874     int srcskip, dstskip;
  1875 
  1876     /* Set up some basic variables */
  1877     width = info->dst_w;
  1878     height = info->dst_h;
  1879     src = (Uint32 *) info->src;
  1880     srcskip = info->src_skip / 4;
  1881     dst = info->dst;
  1882     dstskip = info->dst_skip;
  1883     map = info->table;
  1884 
  1885 #ifdef USE_DUFFS_LOOP
  1886     while (height--) {
  1887 		/* *INDENT-OFF* */
  1888 		DUFFS_LOOP(
  1889 			RGB888_RGB332(Pixel, *src);
  1890 			*dst++ = map[Pixel];
  1891 			++src;
  1892 		, width);
  1893 		/* *INDENT-ON* */
  1894         src += srcskip;
  1895         dst += dstskip;
  1896     }
  1897 #else
  1898     while (height--) {
  1899         for (c = width / 4; c; --c) {
  1900             /* Pack RGB into 8bit pixel */
  1901             RGB888_RGB332(Pixel, *src);
  1902             *dst++ = map[Pixel];
  1903             ++src;
  1904             RGB888_RGB332(Pixel, *src);
  1905             *dst++ = map[Pixel];
  1906             ++src;
  1907             RGB888_RGB332(Pixel, *src);
  1908             *dst++ = map[Pixel];
  1909             ++src;
  1910             RGB888_RGB332(Pixel, *src);
  1911             *dst++ = map[Pixel];
  1912             ++src;
  1913         }
  1914         switch (width & 3) {
  1915         case 3:
  1916             RGB888_RGB332(Pixel, *src);
  1917             *dst++ = map[Pixel];
  1918             ++src;
  1919         case 2:
  1920             RGB888_RGB332(Pixel, *src);
  1921             *dst++ = map[Pixel];
  1922             ++src;
  1923         case 1:
  1924             RGB888_RGB332(Pixel, *src);
  1925             *dst++ = map[Pixel];
  1926             ++src;
  1927         }
  1928         src += srcskip;
  1929         dst += dstskip;
  1930     }
  1931 #endif /* USE_DUFFS_LOOP */
  1932 }
  1933 static void
  1934 BlitNto1(SDL_BlitInfo * info)
  1935 {
  1936 #ifndef USE_DUFFS_LOOP
  1937     int c;
  1938 #endif
  1939     int width, height;
  1940     Uint8 *src;
  1941     const Uint8 *map;
  1942     Uint8 *dst;
  1943     int srcskip, dstskip;
  1944     int srcbpp;
  1945     Uint32 Pixel;
  1946     int sR, sG, sB;
  1947     SDL_PixelFormat *srcfmt;
  1948 
  1949     /* Set up some basic variables */
  1950     width = info->dst_w;
  1951     height = info->dst_h;
  1952     src = info->src;
  1953     srcskip = info->src_skip;
  1954     dst = info->dst;
  1955     dstskip = info->dst_skip;
  1956     map = info->table;
  1957     srcfmt = info->src_fmt;
  1958     srcbpp = srcfmt->BytesPerPixel;
  1959 
  1960     if (map == NULL) {
  1961         while (height--) {
  1962 #ifdef USE_DUFFS_LOOP
  1963 			/* *INDENT-OFF* */
  1964 			DUFFS_LOOP(
  1965 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1966 								sR, sG, sB);
  1967 				if ( 1 ) {
  1968 				  	/* Pack RGB into 8bit pixel */
  1969 				  	*dst = ((sR>>5)<<(3+2))|
  1970 					        ((sG>>5)<<(2)) |
  1971 					        ((sB>>6)<<(0)) ;
  1972 				}
  1973 				dst++;
  1974 				src += srcbpp;
  1975 			, width);
  1976 			/* *INDENT-ON* */
  1977 #else
  1978             for (c = width; c; --c) {
  1979                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  1980                 if (1) {
  1981                     /* Pack RGB into 8bit pixel */
  1982                     *dst = ((sR >> 5) << (3 + 2)) |
  1983                         ((sG >> 5) << (2)) | ((sB >> 6) << (0));
  1984                 }
  1985                 dst++;
  1986                 src += srcbpp;
  1987             }
  1988 #endif
  1989             src += srcskip;
  1990             dst += dstskip;
  1991         }
  1992     } else {
  1993         while (height--) {
  1994 #ifdef USE_DUFFS_LOOP
  1995 			/* *INDENT-OFF* */
  1996 			DUFFS_LOOP(
  1997 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1998 								sR, sG, sB);
  1999 				if ( 1 ) {
  2000 				  	/* Pack RGB into 8bit pixel */
  2001 				  	*dst = map[((sR>>5)<<(3+2))|
  2002 						   ((sG>>5)<<(2))  |
  2003 						   ((sB>>6)<<(0))  ];
  2004 				}
  2005 				dst++;
  2006 				src += srcbpp;
  2007 			, width);
  2008 			/* *INDENT-ON* */
  2009 #else
  2010             for (c = width; c; --c) {
  2011                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2012                 if (1) {
  2013                     /* Pack RGB into 8bit pixel */
  2014                     *dst = map[((sR >> 5) << (3 + 2)) |
  2015                                ((sG >> 5) << (2)) | ((sB >> 6) << (0))];
  2016                 }
  2017                 dst++;
  2018                 src += srcbpp;
  2019             }
  2020 #endif /* USE_DUFFS_LOOP */
  2021             src += srcskip;
  2022             dst += dstskip;
  2023         }
  2024     }
  2025 }
  2026 
  2027 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  2028 static void
  2029 Blit4to4MaskAlpha(SDL_BlitInfo * info)
  2030 {
  2031     int width = info->dst_w;
  2032     int height = info->dst_h;
  2033     Uint32 *src = (Uint32 *) info->src;
  2034     int srcskip = info->src_skip;
  2035     Uint32 *dst = (Uint32 *) info->dst;
  2036     int dstskip = info->dst_skip;
  2037     SDL_PixelFormat *srcfmt = info->src_fmt;
  2038     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2039 
  2040     if (dstfmt->Amask) {
  2041         /* RGB->RGBA, SET_ALPHA */
  2042         Uint32 mask = (info->a >> dstfmt->Aloss) << dstfmt->Ashift;
  2043 
  2044         while (height--) {
  2045 			/* *INDENT-OFF* */
  2046 			DUFFS_LOOP(
  2047 			{
  2048 				*dst = *src | mask;
  2049 				++dst;
  2050 				++src;
  2051 			},
  2052 			width);
  2053 			/* *INDENT-ON* */
  2054             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2055             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2056         }
  2057     } else {
  2058         /* RGBA->RGB, NO_ALPHA */
  2059         Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2060 
  2061         while (height--) {
  2062 			/* *INDENT-OFF* */
  2063 			DUFFS_LOOP(
  2064 			{
  2065 				*dst = *src & mask;
  2066 				++dst;
  2067 				++src;
  2068 			},
  2069 			width);
  2070 			/* *INDENT-ON* */
  2071             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2072             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2073         }
  2074     }
  2075 }
  2076 
  2077 static void
  2078 BlitNtoN(SDL_BlitInfo * info)
  2079 {
  2080     int width = info->dst_w;
  2081     int height = info->dst_h;
  2082     Uint8 *src = info->src;
  2083     int srcskip = info->src_skip;
  2084     Uint8 *dst = info->dst;
  2085     int dstskip = info->dst_skip;
  2086     SDL_PixelFormat *srcfmt = info->src_fmt;
  2087     int srcbpp = srcfmt->BytesPerPixel;
  2088     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2089     int dstbpp = dstfmt->BytesPerPixel;
  2090     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2091 
  2092     while (height--) {
  2093 		/* *INDENT-OFF* */
  2094 		DUFFS_LOOP(
  2095 		{
  2096             Uint32 Pixel;
  2097 			unsigned sR;
  2098 			unsigned sG;
  2099 			unsigned sB;
  2100 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2101 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2102 			dst += dstbpp;
  2103 			src += srcbpp;
  2104 		},
  2105 		width);
  2106 		/* *INDENT-ON* */
  2107         src += srcskip;
  2108         dst += dstskip;
  2109     }
  2110 }
  2111 
  2112 static void
  2113 BlitNtoNCopyAlpha(SDL_BlitInfo * info)
  2114 {
  2115     int width = info->dst_w;
  2116     int height = info->dst_h;
  2117     Uint8 *src = info->src;
  2118     int srcskip = info->src_skip;
  2119     Uint8 *dst = info->dst;
  2120     int dstskip = info->dst_skip;
  2121     SDL_PixelFormat *srcfmt = info->src_fmt;
  2122     int srcbpp = srcfmt->BytesPerPixel;
  2123     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2124     int dstbpp = dstfmt->BytesPerPixel;
  2125     int c;
  2126 
  2127     /* FIXME: should map alpha to [0..255] correctly! */
  2128     while (height--) {
  2129         for (c = width; c; --c) {
  2130             Uint32 Pixel;
  2131             unsigned sR, sG, sB, sA;
  2132             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2133             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2134             dst += dstbpp;
  2135             src += srcbpp;
  2136         }
  2137         src += srcskip;
  2138         dst += dstskip;
  2139     }
  2140 }
  2141 
  2142 static void
  2143 BlitNto1Key(SDL_BlitInfo * info)
  2144 {
  2145     int width = info->dst_w;
  2146     int height = info->dst_h;
  2147     Uint8 *src = info->src;
  2148     int srcskip = info->src_skip;
  2149     Uint8 *dst = info->dst;
  2150     int dstskip = info->dst_skip;
  2151     SDL_PixelFormat *srcfmt = info->src_fmt;
  2152     const Uint8 *palmap = info->table;
  2153     Uint32 ckey = info->colorkey;
  2154     Uint32 rgbmask = ~srcfmt->Amask;
  2155     int srcbpp;
  2156     Uint32 Pixel;
  2157     unsigned sR, sG, sB;
  2158 
  2159     /* Set up some basic variables */
  2160     srcbpp = srcfmt->BytesPerPixel;
  2161     ckey &= rgbmask;
  2162 
  2163     if (palmap == NULL) {
  2164         while (height--) {
  2165 			/* *INDENT-OFF* */
  2166 			DUFFS_LOOP(
  2167 			{
  2168 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2169 								sR, sG, sB);
  2170 				if ( (Pixel & rgbmask) != ckey ) {
  2171 				  	/* Pack RGB into 8bit pixel */
  2172 				  	*dst = (Uint8)(((sR>>5)<<(3+2))|
  2173 						           ((sG>>5)<<(2)) |
  2174 						           ((sB>>6)<<(0)));
  2175 				}
  2176 				dst++;
  2177 				src += srcbpp;
  2178 			},
  2179 			width);
  2180 			/* *INDENT-ON* */
  2181             src += srcskip;
  2182             dst += dstskip;
  2183         }
  2184     } else {
  2185         while (height--) {
  2186 			/* *INDENT-OFF* */
  2187 			DUFFS_LOOP(
  2188 			{
  2189 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2190 								sR, sG, sB);
  2191 				if ( (Pixel & rgbmask) != ckey ) {
  2192 				  	/* Pack RGB into 8bit pixel */
  2193 				  	*dst = (Uint8)palmap[((sR>>5)<<(3+2))|
  2194 							             ((sG>>5)<<(2))  |
  2195 							             ((sB>>6)<<(0))  ];
  2196 				}
  2197 				dst++;
  2198 				src += srcbpp;
  2199 			},
  2200 			width);
  2201 			/* *INDENT-ON* */
  2202             src += srcskip;
  2203             dst += dstskip;
  2204         }
  2205     }
  2206 }
  2207 
  2208 static void
  2209 Blit2to2Key(SDL_BlitInfo * info)
  2210 {
  2211     int width = info->dst_w;
  2212     int height = info->dst_h;
  2213     Uint16 *srcp = (Uint16 *) info->src;
  2214     int srcskip = info->src_skip;
  2215     Uint16 *dstp = (Uint16 *) info->dst;
  2216     int dstskip = info->dst_skip;
  2217     Uint32 ckey = info->colorkey;
  2218     Uint32 rgbmask = ~info->src_fmt->Amask;
  2219 
  2220     /* Set up some basic variables */
  2221     srcskip /= 2;
  2222     dstskip /= 2;
  2223     ckey &= rgbmask;
  2224 
  2225     while (height--) {
  2226 		/* *INDENT-OFF* */
  2227 		DUFFS_LOOP(
  2228 		{
  2229 			if ( (*srcp & rgbmask) != ckey ) {
  2230 				*dstp = *srcp;
  2231 			}
  2232 			dstp++;
  2233 			srcp++;
  2234 		},
  2235 		width);
  2236 		/* *INDENT-ON* */
  2237         srcp += srcskip;
  2238         dstp += dstskip;
  2239     }
  2240 }
  2241 
  2242 static void
  2243 BlitNtoNKey(SDL_BlitInfo * info)
  2244 {
  2245     int width = info->dst_w;
  2246     int height = info->dst_h;
  2247     Uint8 *src = info->src;
  2248     int srcskip = info->src_skip;
  2249     Uint8 *dst = info->dst;
  2250     int dstskip = info->dst_skip;
  2251     Uint32 ckey = info->colorkey;
  2252     SDL_PixelFormat *srcfmt = info->src_fmt;
  2253     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2254     int srcbpp = srcfmt->BytesPerPixel;
  2255     int dstbpp = dstfmt->BytesPerPixel;
  2256     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2257     Uint32 rgbmask = ~srcfmt->Amask;
  2258 
  2259     /* Set up some basic variables */
  2260     ckey &= rgbmask;
  2261 
  2262     while (height--) {
  2263 		/* *INDENT-OFF* */
  2264 		DUFFS_LOOP(
  2265 		{
  2266             Uint32 Pixel;
  2267 			unsigned sR;
  2268 			unsigned sG;
  2269 			unsigned sB;
  2270 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2271 			if ( (Pixel & rgbmask) != ckey ) {
  2272                 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2273 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2274 			}
  2275 			dst += dstbpp;
  2276 			src += srcbpp;
  2277 		},
  2278 		width);
  2279 		/* *INDENT-ON* */
  2280         src += srcskip;
  2281         dst += dstskip;
  2282     }
  2283 }
  2284 
  2285 static void
  2286 BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
  2287 {
  2288     int width = info->dst_w;
  2289     int height = info->dst_h;
  2290     Uint8 *src = info->src;
  2291     int srcskip = info->src_skip;
  2292     Uint8 *dst = info->dst;
  2293     int dstskip = info->dst_skip;
  2294     Uint32 ckey = info->colorkey;
  2295     SDL_PixelFormat *srcfmt = info->src_fmt;
  2296     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2297     Uint32 rgbmask = ~srcfmt->Amask;
  2298 
  2299     Uint8 srcbpp;
  2300     Uint8 dstbpp;
  2301     Uint32 Pixel;
  2302     unsigned sR, sG, sB, sA;
  2303 
  2304     /* Set up some basic variables */
  2305     srcbpp = srcfmt->BytesPerPixel;
  2306     dstbpp = dstfmt->BytesPerPixel;
  2307     ckey &= rgbmask;
  2308 
  2309     /* FIXME: should map alpha to [0..255] correctly! */
  2310     while (height--) {
  2311 		/* *INDENT-OFF* */
  2312 		DUFFS_LOOP(
  2313 		{
  2314 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2315 			if ( (Pixel & rgbmask) != ckey ) {
  2316 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2317 			}
  2318 			dst += dstbpp;
  2319 			src += srcbpp;
  2320 		},
  2321 		width);
  2322 		/* *INDENT-ON* */
  2323         src += srcskip;
  2324         dst += dstskip;
  2325     }
  2326 }
  2327 
  2328 /* Normal N to N optimized blitters */
  2329 struct blit_table
  2330 {
  2331     Uint32 srcR, srcG, srcB;
  2332     int dstbpp;
  2333     Uint32 dstR, dstG, dstB;
  2334     Uint32 blit_features;
  2335     SDL_BlitFunc blitfunc;
  2336     enum
  2337     { NO_ALPHA = 1, SET_ALPHA = 2, COPY_ALPHA = 4 } alpha;
  2338 };
  2339 static const struct blit_table normal_blit_1[] = {
  2340     /* Default for 8-bit RGB source, an invalid combination */
  2341     {0, 0, 0, 0, 0, 0, 0, 0, NULL},
  2342 };
  2343 static const struct blit_table normal_blit_2[] = {
  2344 #if SDL_ALTIVEC_BLITTERS
  2345     /* has-altivec */
  2346     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000,
  2347      0x00000000,
  2348      2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2349     {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000,
  2350      0x00000000,
  2351      2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2352 #endif
  2353     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00,
  2354      0x000000FF,
  2355      0, Blit_RGB565_ARGB8888, SET_ALPHA},
  2356     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00,
  2357      0x00FF0000,
  2358      0, Blit_RGB565_ABGR8888, SET_ALPHA},
  2359     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000,
  2360      0x0000FF00,
  2361      0, Blit_RGB565_RGBA8888, SET_ALPHA},
  2362     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000,
  2363      0xFF000000,
  2364      0, Blit_RGB565_BGRA8888, SET_ALPHA},
  2365 
  2366     /* Default for 16-bit RGB source, used if no other blitter matches */
  2367     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2368 };
  2369 static const struct blit_table normal_blit_3[] = {
  2370     /* Default for 24-bit RGB source, never optimized */
  2371     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2372 };
  2373 static const struct blit_table normal_blit_4[] = {
  2374 #if SDL_ALTIVEC_BLITTERS
  2375     /* has-altivec | dont-use-prefetch */
  2376     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
  2377      0x00000000,
  2378      6, ConvertAltivec32to32_noprefetch,
  2379      NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2380     /* has-altivec */
  2381     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
  2382      0x00000000,
  2383      2, ConvertAltivec32to32_prefetch,
  2384      NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2385     /* has-altivec */
  2386     {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0,
  2387      0x0000001F,
  2388      2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
  2389 #endif
  2390     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
  2391      0x0000001F,
  2392      0, Blit_RGB888_RGB565, NO_ALPHA},
  2393     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
  2394      0x0000001F,
  2395      0, Blit_RGB888_RGB555, NO_ALPHA},
  2396     /* Default for 32-bit RGB source, used if no other blitter matches */
  2397     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2398 };
  2399 static const struct blit_table *normal_blit[] = {
  2400     normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2401 };
  2402 
  2403 /* Mask matches table, or table entry is zero */
  2404 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2405 
  2406 SDL_BlitFunc
  2407 SDL_CalculateBlitN(SDL_Surface * surface)
  2408 {
  2409     SDL_PixelFormat *srcfmt;
  2410     SDL_PixelFormat *dstfmt;
  2411     const struct blit_table *table;
  2412     int which;
  2413     SDL_BlitFunc blitfun;
  2414 
  2415     /* Set up data for choosing the blit */
  2416     srcfmt = surface->format;
  2417     dstfmt = surface->map->dst->format;
  2418 
  2419     /* We don't support destinations less than 8-bits */
  2420     if (dstfmt->BitsPerPixel < 8) {
  2421         return (NULL);
  2422     }
  2423 
  2424     switch (surface->map->info.flags) {
  2425     case 0:
  2426         blitfun = NULL;
  2427         if (dstfmt->BitsPerPixel == 8) {
  2428             /* We assume 8-bit destinations are palettized */
  2429             if ((srcfmt->BytesPerPixel == 4) &&
  2430                 (srcfmt->Rmask == 0x00FF0000) &&
  2431                 (srcfmt->Gmask == 0x0000FF00) &&
  2432                 (srcfmt->Bmask == 0x000000FF)) {
  2433                 if (surface->map->info.table) {
  2434                     blitfun = Blit_RGB888_index8_map;
  2435                 } else {
  2436                     blitfun = Blit_RGB888_index8;
  2437                 }
  2438             } else {
  2439                 blitfun = BlitNto1;
  2440             }
  2441         } else {
  2442             /* Now the meat, choose the blitter we want */
  2443             int a_need = NO_ALPHA;
  2444             if (dstfmt->Amask)
  2445                 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2446             table = normal_blit[srcfmt->BytesPerPixel - 1];
  2447             for (which = 0; table[which].dstbpp; ++which) {
  2448                 if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2449                     MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2450                     MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2451                     MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2452                     MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2453                     MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2454                     dstfmt->BytesPerPixel == table[which].dstbpp &&
  2455                     (a_need & table[which].alpha) == a_need &&
  2456                     ((table[which].blit_features & GetBlitFeatures()) ==
  2457                      table[which].blit_features))
  2458                     break;
  2459             }
  2460             blitfun = table[which].blitfunc;
  2461 
  2462             if (blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2463                 /* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2464                 if (srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4
  2465                     && srcfmt->Rmask == dstfmt->Rmask
  2466                     && srcfmt->Gmask == dstfmt->Gmask
  2467                     && srcfmt->Bmask == dstfmt->Bmask) {
  2468                     blitfun = Blit4to4MaskAlpha;
  2469                 } else if (a_need == COPY_ALPHA) {
  2470                     blitfun = BlitNtoNCopyAlpha;
  2471                 }
  2472             }
  2473         }
  2474         return (blitfun);
  2475 
  2476     case SDL_COPY_COLORKEY:
  2477         /* colorkey blit: Here we don't have too many options, mostly
  2478            because RLE is the preferred fast way to deal with this.
  2479            If a particular case turns out to be useful we'll add it. */
  2480 
  2481         if (srcfmt->BytesPerPixel == 2 && surface->map->identity)
  2482             return Blit2to2Key;
  2483         else if (dstfmt->BytesPerPixel == 1)
  2484             return BlitNto1Key;
  2485         else {
  2486 #if SDL_ALTIVEC_BLITTERS
  2487             if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
  2488                 && SDL_HasAltiVec()) {
  2489                 return Blit32to32KeyAltivec;
  2490             } else
  2491 #endif
  2492             if (srcfmt->Amask && dstfmt->Amask) {
  2493                 return BlitNtoNKeyCopyAlpha;
  2494             } else {
  2495                 return BlitNtoNKey;
  2496             }
  2497         }
  2498     }
  2499 
  2500     return NULL;
  2501 }
  2502 
  2503 /* vi: set ts=4 sw=4 expandtab: */