src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Sat, 07 Jan 2012 01:25:55 -0500
changeset 6165 b7347e716ab6
parent 6138 4c64952a58fb
child 6281 e46d6f4b469e
permissions -rwxr-xr-x
Fixed bug 1225 - Altivec blitters broken due to SDL_PixelFormat

bastien.bouclet@gmail.com 2011-06-13 05:50:58 PDT

Static pixel format initialization has not been updated to reflect header
changes in SDL_blit_N.c

The attached patch fixes Altivec support for me. altivec.h is needed for some
systems.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2012 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 #include "SDL_video.h"
    24 #include "SDL_endian.h"
    25 #include "SDL_cpuinfo.h"
    26 #include "SDL_blit.h"
    27 
    28 /* Functions to blit from N-bit surfaces to other surfaces */
    29 
    30 #if SDL_ALTIVEC_BLITTERS
    31 #ifdef HAVE_ALTIVEC_H
    32 #include <altivec.h>
    33 #endif
    34 #define assert(X)
    35 #ifdef __MACOSX__
    36 #include <sys/sysctl.h>
    37 static size_t
    38 GetL3CacheSize(void)
    39 {
    40     const char key[] = "hw.l3cachesize";
    41     u_int64_t result = 0;
    42     size_t typeSize = sizeof(result);
    43 
    44 
    45     int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
    46     if (0 != err)
    47         return 0;
    48 
    49     return result;
    50 }
    51 #else
    52 static size_t
    53 GetL3CacheSize(void)
    54 {
    55     /* XXX: Just guess G4 */
    56     return 2097152;
    57 }
    58 #endif /* __MACOSX__ */
    59 
    60 #if (defined(__MACOSX__) && (__GNUC__ < 4))
    61 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    62         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    63 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    64         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    65 #else
    66 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    67         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    68 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    69         (vector unsigned short) { a,b,c,d,e,f,g,h }
    70 #endif
    71 
    72 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    73 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    74                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    75                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    76                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    77                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    78 
    79 #define MAKE8888(dstfmt, r, g, b, a)  \
    80     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    81       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    82       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    83       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    84 
    85 /*
    86  * Data Stream Touch...Altivec cache prefetching.
    87  *
    88  *  Don't use this on a G5...however, the speed boost is very significant
    89  *   on a G4.
    90  */
    91 #define DST_CHAN_SRC 1
    92 #define DST_CHAN_DEST 2
    93 
    94 /* macro to set DST control word value... */
    95 #define DST_CTRL(size, count, stride) \
    96     (((size) << 24) | ((count) << 16) | (stride))
    97 
    98 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    99     ? vec_lvsl(0, src) \
   100     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   101 
   102 /* Calculate the permute vector used for 32->32 swizzling */
   103 static vector unsigned char
   104 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
   105 {
   106     /*
   107      * We have to assume that the bits that aren't used by other
   108      *  colors is alpha, and it's one complete byte, since some formats
   109      *  leave alpha with a zero mask, but we should still swizzle the bits.
   110      */
   111     /* ARGB */
   112     const static const struct SDL_PixelFormat default_pixel_format = {
   113         0, NULL, 0, 0,
   114         {0, 0},
   115         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   116         0, 0, 0, 0,
   117         16, 8, 0, 24,
   118         0, NULL
   119     };
   120     if (!srcfmt) {
   121         srcfmt = &default_pixel_format;
   122     }
   123     if (!dstfmt) {
   124         dstfmt = &default_pixel_format;
   125     }
   126     const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
   127                                                        0x04, 0x04, 0x04, 0x04,
   128                                                        0x08, 0x08, 0x08, 0x08,
   129                                                        0x0C, 0x0C, 0x0C,
   130                                                        0x0C);
   131     vector unsigned char vswiz;
   132     vector unsigned int srcvec;
   133 #define RESHIFT(X) (3 - ((X) >> 3))
   134     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   135     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   136     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   137     Uint32 amask;
   138     /* Use zero for alpha if either surface doesn't have alpha */
   139     if (dstfmt->Amask) {
   140         amask =
   141             ((srcfmt->Amask) ? RESHIFT(srcfmt->
   142                                        Ashift) : 0x10) << (dstfmt->Ashift);
   143     } else {
   144         amask =
   145             0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
   146                           0xFFFFFFFF);
   147     }
   148 #undef RESHIFT
   149     ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
   150     vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
   151     return (vswiz);
   152 }
   153 
   154 static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
   155 static void
   156 Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
   157 {
   158     int height = info->dst_h;
   159     Uint8 *src = (Uint8 *) info->src;
   160     int srcskip = info->src_skip;
   161     Uint8 *dst = (Uint8 *) info->dst;
   162     int dstskip = info->dst_skip;
   163     SDL_PixelFormat *srcfmt = info->src_fmt;
   164     vector unsigned char valpha = vec_splat_u8(0);
   165     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   166     vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
   167                                                     0x00, 0x0a, 0x00, 0x0e,
   168                                                     0x00, 0x12, 0x00, 0x16,
   169                                                     0x00, 0x1a, 0x00, 0x1e);
   170     vector unsigned short v1 = vec_splat_u16(1);
   171     vector unsigned short v3 = vec_splat_u16(3);
   172     vector unsigned short v3f =
   173         VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
   174                           0x003f, 0x003f, 0x003f, 0x003f);
   175     vector unsigned short vfc =
   176         VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
   177                           0x00fc, 0x00fc, 0x00fc, 0x00fc);
   178     vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
   179     vf800 = vec_sl(vf800, vec_splat_u16(8));
   180 
   181     while (height--) {
   182         vector unsigned char valigner;
   183         vector unsigned char voverflow;
   184         vector unsigned char vsrc;
   185 
   186         int width = info->dst_w;
   187         int extrawidth;
   188 
   189         /* do scalar until we can align... */
   190 #define ONE_PIXEL_BLEND(condition, widthvar) \
   191         while (condition) { \
   192             Uint32 Pixel; \
   193             unsigned sR, sG, sB, sA; \
   194             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   195                           sR, sG, sB, sA); \
   196             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   197                                 ((sG << 3) & 0x000007E0) | \
   198                                 ((sB >> 3) & 0x0000001F)); \
   199             dst += 2; \
   200             src += 4; \
   201             widthvar--; \
   202         }
   203 
   204         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   205 
   206         /* After all that work, here's the vector part! */
   207         extrawidth = (width % 8);       /* trailing unaligned stores */
   208         width -= extrawidth;
   209         vsrc = vec_ld(0, src);
   210         valigner = VEC_ALIGNER(src);
   211 
   212         while (width) {
   213             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   214             vector unsigned int vsrc1, vsrc2;
   215             vector unsigned char vdst;
   216 
   217             voverflow = vec_ld(15, src);
   218             vsrc = vec_perm(vsrc, voverflow, valigner);
   219             vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   220             src += 16;
   221             vsrc = voverflow;
   222             voverflow = vec_ld(15, src);
   223             vsrc = vec_perm(vsrc, voverflow, valigner);
   224             vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   225             /* 1555 */
   226             vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
   227             vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
   228             vgpixel = vec_and(vgpixel, vfc);
   229             vgpixel = vec_sl(vgpixel, v3);
   230             vrpixel = vec_sl(vpixel, v1);
   231             vrpixel = vec_and(vrpixel, vf800);
   232             vbpixel = vec_and(vpixel, v3f);
   233             vdst =
   234                 vec_or((vector unsigned char) vrpixel,
   235                        (vector unsigned char) vgpixel);
   236             /* 565 */
   237             vdst = vec_or(vdst, (vector unsigned char) vbpixel);
   238             vec_st(vdst, 0, dst);
   239 
   240             width -= 8;
   241             src += 16;
   242             dst += 16;
   243             vsrc = voverflow;
   244         }
   245 
   246         assert(width == 0);
   247 
   248         /* do scalar until we can align... */
   249         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   250 #undef ONE_PIXEL_BLEND
   251 
   252         src += srcskip;         /* move to next row, accounting for pitch. */
   253         dst += dstskip;
   254     }
   255 
   256 
   257 }
   258 
   259 static void
   260 Blit_RGB565_32Altivec(SDL_BlitInfo * info)
   261 {
   262     int height = info->dst_h;
   263     Uint8 *src = (Uint8 *) info->src;
   264     int srcskip = info->src_skip;
   265     Uint8 *dst = (Uint8 *) info->dst;
   266     int dstskip = info->dst_skip;
   267     SDL_PixelFormat *srcfmt = info->src_fmt;
   268     SDL_PixelFormat *dstfmt = info->dst_fmt;
   269     unsigned alpha;
   270     vector unsigned char valpha;
   271     vector unsigned char vpermute;
   272     vector unsigned short vf800;
   273     vector unsigned int v8 = vec_splat_u32(8);
   274     vector unsigned int v16 = vec_add(v8, v8);
   275     vector unsigned short v2 = vec_splat_u16(2);
   276     vector unsigned short v3 = vec_splat_u16(3);
   277     /* 
   278        0x10 - 0x1f is the alpha
   279        0x00 - 0x0e evens are the red
   280        0x01 - 0x0f odds are zero
   281      */
   282     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   283                                                        0x10, 0x02, 0x01, 0x01,
   284                                                        0x10, 0x04, 0x01, 0x01,
   285                                                        0x10, 0x06, 0x01,
   286                                                        0x01);
   287     vector unsigned char vredalpha2 =
   288         (vector unsigned
   289          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   290         );
   291     /*
   292        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   293        0x11 - 0x0f odds are blue
   294      */
   295     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   296                                                    0x04, 0x05, 0x06, 0x13,
   297                                                    0x08, 0x09, 0x0a, 0x15,
   298                                                    0x0c, 0x0d, 0x0e, 0x17);
   299     vector unsigned char vblue2 =
   300         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   301         );
   302     /*
   303        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   304        0x10 - 0x0e evens are green
   305      */
   306     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   307                                                     0x04, 0x05, 0x12, 0x07,
   308                                                     0x08, 0x09, 0x14, 0x0b,
   309                                                     0x0c, 0x0d, 0x16, 0x0f);
   310     vector unsigned char vgreen2 =
   311         (vector unsigned
   312          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   313         );
   314 
   315 
   316     assert(srcfmt->BytesPerPixel == 2);
   317     assert(dstfmt->BytesPerPixel == 4);
   318 
   319     vf800 = (vector unsigned short) vec_splat_u8(-7);
   320     vf800 = vec_sl(vf800, vec_splat_u16(8));
   321 
   322     if (dstfmt->Amask && info->a) {
   323         ((unsigned char *) &valpha)[0] = alpha = info->a;
   324         valpha = vec_splat(valpha, 0);
   325     } else {
   326         alpha = 0;
   327         valpha = vec_splat_u8(0);
   328     }
   329 
   330     vpermute = calc_swizzle32(NULL, dstfmt);
   331     while (height--) {
   332         vector unsigned char valigner;
   333         vector unsigned char voverflow;
   334         vector unsigned char vsrc;
   335 
   336         int width = info->dst_w;
   337         int extrawidth;
   338 
   339         /* do scalar until we can align... */
   340 #define ONE_PIXEL_BLEND(condition, widthvar) \
   341         while (condition) { \
   342             unsigned sR, sG, sB; \
   343             unsigned short Pixel = *((unsigned short *)src); \
   344             sR = (Pixel >> 8) & 0xf8; \
   345             sG = (Pixel >> 3) & 0xfc; \
   346             sB = (Pixel << 3) & 0xf8; \
   347             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   348             src += 2; \
   349             dst += 4; \
   350             widthvar--; \
   351         }
   352         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   353 
   354         /* After all that work, here's the vector part! */
   355         extrawidth = (width % 8);       /* trailing unaligned stores */
   356         width -= extrawidth;
   357         vsrc = vec_ld(0, src);
   358         valigner = VEC_ALIGNER(src);
   359 
   360         while (width) {
   361             vector unsigned short vR, vG, vB;
   362             vector unsigned char vdst1, vdst2;
   363 
   364             voverflow = vec_ld(15, src);
   365             vsrc = vec_perm(vsrc, voverflow, valigner);
   366 
   367             vR = vec_and((vector unsigned short) vsrc, vf800);
   368             vB = vec_sl((vector unsigned short) vsrc, v3);
   369             vG = vec_sl(vB, v2);
   370 
   371             vdst1 =
   372                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   373                                                 valpha, vredalpha1);
   374             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   375             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   376             vdst1 = vec_perm(vdst1, valpha, vpermute);
   377             vec_st(vdst1, 0, dst);
   378 
   379             vdst2 =
   380                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   381                                                 valpha, vredalpha2);
   382             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   383             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   384             vdst2 = vec_perm(vdst2, valpha, vpermute);
   385             vec_st(vdst2, 16, dst);
   386 
   387             width -= 8;
   388             dst += 32;
   389             src += 16;
   390             vsrc = voverflow;
   391         }
   392 
   393         assert(width == 0);
   394 
   395 
   396         /* do scalar until we can align... */
   397         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   398 #undef ONE_PIXEL_BLEND
   399 
   400         src += srcskip;         /* move to next row, accounting for pitch. */
   401         dst += dstskip;
   402     }
   403 
   404 }
   405 
   406 
   407 static void
   408 Blit_RGB555_32Altivec(SDL_BlitInfo * info)
   409 {
   410     int height = info->dst_h;
   411     Uint8 *src = (Uint8 *) info->src;
   412     int srcskip = info->src_skip;
   413     Uint8 *dst = (Uint8 *) info->dst;
   414     int dstskip = info->dst_skip;
   415     SDL_PixelFormat *srcfmt = info->src_fmt;
   416     SDL_PixelFormat *dstfmt = info->dst_fmt;
   417     unsigned alpha;
   418     vector unsigned char valpha;
   419     vector unsigned char vpermute;
   420     vector unsigned short vf800;
   421     vector unsigned int v8 = vec_splat_u32(8);
   422     vector unsigned int v16 = vec_add(v8, v8);
   423     vector unsigned short v1 = vec_splat_u16(1);
   424     vector unsigned short v3 = vec_splat_u16(3);
   425     /* 
   426        0x10 - 0x1f is the alpha
   427        0x00 - 0x0e evens are the red
   428        0x01 - 0x0f odds are zero
   429      */
   430     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   431                                                        0x10, 0x02, 0x01, 0x01,
   432                                                        0x10, 0x04, 0x01, 0x01,
   433                                                        0x10, 0x06, 0x01,
   434                                                        0x01);
   435     vector unsigned char vredalpha2 =
   436         (vector unsigned
   437          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   438         );
   439     /*
   440        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   441        0x11 - 0x0f odds are blue
   442      */
   443     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   444                                                    0x04, 0x05, 0x06, 0x13,
   445                                                    0x08, 0x09, 0x0a, 0x15,
   446                                                    0x0c, 0x0d, 0x0e, 0x17);
   447     vector unsigned char vblue2 =
   448         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   449         );
   450     /*
   451        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   452        0x10 - 0x0e evens are green
   453      */
   454     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   455                                                     0x04, 0x05, 0x12, 0x07,
   456                                                     0x08, 0x09, 0x14, 0x0b,
   457                                                     0x0c, 0x0d, 0x16, 0x0f);
   458     vector unsigned char vgreen2 =
   459         (vector unsigned
   460          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   461         );
   462 
   463 
   464     assert(srcfmt->BytesPerPixel == 2);
   465     assert(dstfmt->BytesPerPixel == 4);
   466 
   467     vf800 = (vector unsigned short) vec_splat_u8(-7);
   468     vf800 = vec_sl(vf800, vec_splat_u16(8));
   469 
   470     if (dstfmt->Amask && info->a) {
   471         ((unsigned char *) &valpha)[0] = alpha = info->a;
   472         valpha = vec_splat(valpha, 0);
   473     } else {
   474         alpha = 0;
   475         valpha = vec_splat_u8(0);
   476     }
   477 
   478     vpermute = calc_swizzle32(NULL, dstfmt);
   479     while (height--) {
   480         vector unsigned char valigner;
   481         vector unsigned char voverflow;
   482         vector unsigned char vsrc;
   483 
   484         int width = info->dst_w;
   485         int extrawidth;
   486 
   487         /* do scalar until we can align... */
   488 #define ONE_PIXEL_BLEND(condition, widthvar) \
   489         while (condition) { \
   490             unsigned sR, sG, sB; \
   491             unsigned short Pixel = *((unsigned short *)src); \
   492             sR = (Pixel >> 7) & 0xf8; \
   493             sG = (Pixel >> 2) & 0xf8; \
   494             sB = (Pixel << 3) & 0xf8; \
   495             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   496             src += 2; \
   497             dst += 4; \
   498             widthvar--; \
   499         }
   500         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   501 
   502         /* After all that work, here's the vector part! */
   503         extrawidth = (width % 8);       /* trailing unaligned stores */
   504         width -= extrawidth;
   505         vsrc = vec_ld(0, src);
   506         valigner = VEC_ALIGNER(src);
   507 
   508         while (width) {
   509             vector unsigned short vR, vG, vB;
   510             vector unsigned char vdst1, vdst2;
   511 
   512             voverflow = vec_ld(15, src);
   513             vsrc = vec_perm(vsrc, voverflow, valigner);
   514 
   515             vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
   516             vB = vec_sl((vector unsigned short) vsrc, v3);
   517             vG = vec_sl(vB, v3);
   518 
   519             vdst1 =
   520                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   521                                                 valpha, vredalpha1);
   522             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   523             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   524             vdst1 = vec_perm(vdst1, valpha, vpermute);
   525             vec_st(vdst1, 0, dst);
   526 
   527             vdst2 =
   528                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   529                                                 valpha, vredalpha2);
   530             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   531             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   532             vdst2 = vec_perm(vdst2, valpha, vpermute);
   533             vec_st(vdst2, 16, dst);
   534 
   535             width -= 8;
   536             dst += 32;
   537             src += 16;
   538             vsrc = voverflow;
   539         }
   540 
   541         assert(width == 0);
   542 
   543 
   544         /* do scalar until we can align... */
   545         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   546 #undef ONE_PIXEL_BLEND
   547 
   548         src += srcskip;         /* move to next row, accounting for pitch. */
   549         dst += dstskip;
   550     }
   551 
   552 }
   553 
   554 static void BlitNtoNKey(SDL_BlitInfo * info);
   555 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
   556 static void
   557 Blit32to32KeyAltivec(SDL_BlitInfo * info)
   558 {
   559     int height = info->dst_h;
   560     Uint32 *srcp = (Uint32 *) info->src;
   561     int srcskip = info->src_skip / 4;
   562     Uint32 *dstp = (Uint32 *) info->dst;
   563     int dstskip = info->dst_skip / 4;
   564     SDL_PixelFormat *srcfmt = info->src_fmt;
   565     int srcbpp = srcfmt->BytesPerPixel;
   566     SDL_PixelFormat *dstfmt = info->dst_fmt;
   567     int dstbpp = dstfmt->BytesPerPixel;
   568     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   569     unsigned alpha = dstfmt->Amask ? info->a : 0;
   570     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   571     Uint32 ckey = info->colorkey;
   572     vector unsigned int valpha;
   573     vector unsigned char vpermute;
   574     vector unsigned char vzero;
   575     vector unsigned int vckey;
   576     vector unsigned int vrgbmask;
   577     vpermute = calc_swizzle32(srcfmt, dstfmt);
   578     if (info->dst_w < 16) {
   579         if (copy_alpha) {
   580             BlitNtoNKeyCopyAlpha(info);
   581         } else {
   582             BlitNtoNKey(info);
   583         }
   584         return;
   585     }
   586     vzero = vec_splat_u8(0);
   587     if (alpha) {
   588         ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
   589         valpha =
   590             (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
   591     } else {
   592         valpha = (vector unsigned int) vzero;
   593     }
   594     ckey &= rgbmask;
   595     ((unsigned int *) (char *) &vckey)[0] = ckey;
   596     vckey = vec_splat(vckey, 0);
   597     ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
   598     vrgbmask = vec_splat(vrgbmask, 0);
   599 
   600     while (height--) {
   601 #define ONE_PIXEL_BLEND(condition, widthvar) \
   602         if (copy_alpha) { \
   603             while (condition) { \
   604                 Uint32 Pixel; \
   605                 unsigned sR, sG, sB, sA; \
   606                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   607                           sR, sG, sB, sA); \
   608                 if ( (Pixel & rgbmask) != ckey ) { \
   609                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   610                             sR, sG, sB, sA); \
   611                 } \
   612                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   613                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   614                 widthvar--; \
   615             } \
   616         } else { \
   617             while (condition) { \
   618                 Uint32 Pixel; \
   619                 unsigned sR, sG, sB; \
   620                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   621                 if ( Pixel != ckey ) { \
   622                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   623                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   624                               sR, sG, sB, alpha); \
   625                 } \
   626                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   627                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   628                 widthvar--; \
   629             } \
   630         }
   631         int width = info->dst_w;
   632         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   633         assert(width > 0);
   634         if (width > 0) {
   635             int extrawidth = (width % 4);
   636             vector unsigned char valigner = VEC_ALIGNER(srcp);
   637             vector unsigned int vs = vec_ld(0, srcp);
   638             width -= extrawidth;
   639             assert(width >= 4);
   640             while (width) {
   641                 vector unsigned char vsel;
   642                 vector unsigned int vd;
   643                 vector unsigned int voverflow = vec_ld(15, srcp);
   644                 /* load the source vec */
   645                 vs = vec_perm(vs, voverflow, valigner);
   646                 /* vsel is set for items that match the key */
   647                 vsel = (vector unsigned char) vec_and(vs, vrgbmask);
   648                 vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
   649                 /* permute the src vec to the dest format */
   650                 vs = vec_perm(vs, valpha, vpermute);
   651                 /* load the destination vec */
   652                 vd = vec_ld(0, dstp);
   653                 /* select the source and dest into vs */
   654                 vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
   655                                                    (vector unsigned char) vd,
   656                                                    vsel);
   657 
   658                 vec_st(vd, 0, dstp);
   659                 srcp += 4;
   660                 width -= 4;
   661                 dstp += 4;
   662                 vs = voverflow;
   663             }
   664             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   665 #undef ONE_PIXEL_BLEND
   666             srcp += srcskip;
   667             dstp += dstskip;
   668         }
   669     }
   670 }
   671 
   672 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   673 /* Use this on a G5 */
   674 static void
   675 ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
   676 {
   677     int height = info->dst_h;
   678     Uint32 *src = (Uint32 *) info->src;
   679     int srcskip = info->src_skip / 4;
   680     Uint32 *dst = (Uint32 *) info->dst;
   681     int dstskip = info->dst_skip / 4;
   682     SDL_PixelFormat *srcfmt = info->src_fmt;
   683     SDL_PixelFormat *dstfmt = info->dst_fmt;
   684     vector unsigned int vzero = vec_splat_u32(0);
   685     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   686     if (dstfmt->Amask && !srcfmt->Amask) {
   687         if (info->a) {
   688             vector unsigned char valpha;
   689             ((unsigned char *) &valpha)[0] = info->a;
   690             vzero = (vector unsigned int) vec_splat(valpha, 0);
   691         }
   692     }
   693 
   694     assert(srcfmt->BytesPerPixel == 4);
   695     assert(dstfmt->BytesPerPixel == 4);
   696 
   697     while (height--) {
   698         vector unsigned char valigner;
   699         vector unsigned int vbits;
   700         vector unsigned int voverflow;
   701         Uint32 bits;
   702         Uint8 r, g, b, a;
   703 
   704         int width = info->dst_w;
   705         int extrawidth;
   706 
   707         /* do scalar until we can align... */
   708         while ((UNALIGNED_PTR(dst)) && (width)) {
   709             bits = *(src++);
   710             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   711             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   712             width--;
   713         }
   714 
   715         /* After all that work, here's the vector part! */
   716         extrawidth = (width % 4);
   717         width -= extrawidth;
   718         valigner = VEC_ALIGNER(src);
   719         vbits = vec_ld(0, src);
   720 
   721         while (width) {
   722             voverflow = vec_ld(15, src);
   723             src += 4;
   724             width -= 4;
   725             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   726             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   727             vec_st(vbits, 0, dst);      /* store it back out. */
   728             dst += 4;
   729             vbits = voverflow;
   730         }
   731 
   732         assert(width == 0);
   733 
   734         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   735         while (extrawidth) {
   736             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   737             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   738             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   739             extrawidth--;
   740         }
   741 
   742         src += srcskip;
   743         dst += dstskip;
   744     }
   745 
   746 }
   747 
   748 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   749 /* Use this on a G4 */
   750 static void
   751 ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
   752 {
   753     const int scalar_dst_lead = sizeof(Uint32) * 4;
   754     const int vector_dst_lead = sizeof(Uint32) * 16;
   755 
   756     int height = info->dst_h;
   757     Uint32 *src = (Uint32 *) info->src;
   758     int srcskip = info->src_skip / 4;
   759     Uint32 *dst = (Uint32 *) info->dst;
   760     int dstskip = info->dst_skip / 4;
   761     SDL_PixelFormat *srcfmt = info->src_fmt;
   762     SDL_PixelFormat *dstfmt = info->dst_fmt;
   763     vector unsigned int vzero = vec_splat_u32(0);
   764     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   765     if (dstfmt->Amask && !srcfmt->Amask) {
   766         if (info->a) {
   767             vector unsigned char valpha;
   768             ((unsigned char *) &valpha)[0] = info->a;
   769             vzero = (vector unsigned int) vec_splat(valpha, 0);
   770         }
   771     }
   772 
   773     assert(srcfmt->BytesPerPixel == 4);
   774     assert(dstfmt->BytesPerPixel == 4);
   775 
   776     while (height--) {
   777         vector unsigned char valigner;
   778         vector unsigned int vbits;
   779         vector unsigned int voverflow;
   780         Uint32 bits;
   781         Uint8 r, g, b, a;
   782 
   783         int width = info->dst_w;
   784         int extrawidth;
   785 
   786         /* do scalar until we can align... */
   787         while ((UNALIGNED_PTR(dst)) && (width)) {
   788             vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   789                      DST_CHAN_SRC);
   790             vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   791                       DST_CHAN_DEST);
   792             bits = *(src++);
   793             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   794             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   795             width--;
   796         }
   797 
   798         /* After all that work, here's the vector part! */
   799         extrawidth = (width % 4);
   800         width -= extrawidth;
   801         valigner = VEC_ALIGNER(src);
   802         vbits = vec_ld(0, src);
   803 
   804         while (width) {
   805             vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
   806                      DST_CHAN_SRC);
   807             vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
   808                       DST_CHAN_DEST);
   809             voverflow = vec_ld(15, src);
   810             src += 4;
   811             width -= 4;
   812             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   813             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   814             vec_st(vbits, 0, dst);      /* store it back out. */
   815             dst += 4;
   816             vbits = voverflow;
   817         }
   818 
   819         assert(width == 0);
   820 
   821         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   822         while (extrawidth) {
   823             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   824             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   825             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   826             extrawidth--;
   827         }
   828 
   829         src += srcskip;
   830         dst += dstskip;
   831     }
   832 
   833     vec_dss(DST_CHAN_SRC);
   834     vec_dss(DST_CHAN_DEST);
   835 }
   836 
   837 static Uint32
   838 GetBlitFeatures(void)
   839 {
   840     static Uint32 features = 0xffffffff;
   841     if (features == 0xffffffff) {
   842         /* Provide an override for testing .. */
   843         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   844         if (override) {
   845             features = 0;
   846             SDL_sscanf(override, "%u", &features);
   847         } else {
   848             features = (0
   849                         /* Feature 1 is has-MMX */
   850                         | ((SDL_HasMMX())? 1 : 0)
   851                         /* Feature 2 is has-AltiVec */
   852                         | ((SDL_HasAltiVec())? 2 : 0)
   853                         /* Feature 4 is dont-use-prefetch */
   854                         /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   855                         | ((GetL3CacheSize() == 0) ? 4 : 0)
   856                 );
   857         }
   858     }
   859     return features;
   860 }
   861 
   862 #if __MWERKS__
   863 #pragma altivec_model off
   864 #endif
   865 #else
   866 /* Feature 1 is has-MMX */
   867 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   868 #endif
   869 
   870 /* This is now endian dependent */
   871 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   872 #define HI	1
   873 #define LO	0
   874 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
   875 #define HI	0
   876 #define LO	1
   877 #endif
   878 
   879 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   880 #define RGB888_RGB332(dst, src) { \
   881 	dst = (Uint8)((((src)&0x00E00000)>>16)| \
   882 	              (((src)&0x0000E000)>>11)| \
   883 	              (((src)&0x000000C0)>>6)); \
   884 }
   885 static void
   886 Blit_RGB888_index8(SDL_BlitInfo * info)
   887 {
   888 #ifndef USE_DUFFS_LOOP
   889     int c;
   890 #endif
   891     int width, height;
   892     Uint32 *src;
   893     const Uint8 *map;
   894     Uint8 *dst;
   895     int srcskip, dstskip;
   896 
   897     /* Set up some basic variables */
   898     width = info->dst_w;
   899     height = info->dst_h;
   900     src = (Uint32 *) info->src;
   901     srcskip = info->src_skip / 4;
   902     dst = info->dst;
   903     dstskip = info->dst_skip;
   904     map = info->table;
   905 
   906     if (map == NULL) {
   907         while (height--) {
   908 #ifdef USE_DUFFS_LOOP
   909 			/* *INDENT-OFF* */
   910 			DUFFS_LOOP(
   911 				RGB888_RGB332(*dst++, *src);
   912 			, width);
   913 			/* *INDENT-ON* */
   914 #else
   915             for (c = width / 4; c; --c) {
   916                 /* Pack RGB into 8bit pixel */
   917                 ++src;
   918                 RGB888_RGB332(*dst++, *src);
   919                 ++src;
   920                 RGB888_RGB332(*dst++, *src);
   921                 ++src;
   922                 RGB888_RGB332(*dst++, *src);
   923                 ++src;
   924             }
   925             switch (width & 3) {
   926             case 3:
   927                 RGB888_RGB332(*dst++, *src);
   928                 ++src;
   929             case 2:
   930                 RGB888_RGB332(*dst++, *src);
   931                 ++src;
   932             case 1:
   933                 RGB888_RGB332(*dst++, *src);
   934                 ++src;
   935             }
   936 #endif /* USE_DUFFS_LOOP */
   937             src += srcskip;
   938             dst += dstskip;
   939         }
   940     } else {
   941         int Pixel;
   942 
   943         while (height--) {
   944 #ifdef USE_DUFFS_LOOP
   945 			/* *INDENT-OFF* */
   946 			DUFFS_LOOP(
   947 				RGB888_RGB332(Pixel, *src);
   948 				*dst++ = map[Pixel];
   949 				++src;
   950 			, width);
   951 			/* *INDENT-ON* */
   952 #else
   953             for (c = width / 4; c; --c) {
   954                 /* Pack RGB into 8bit pixel */
   955                 RGB888_RGB332(Pixel, *src);
   956                 *dst++ = map[Pixel];
   957                 ++src;
   958                 RGB888_RGB332(Pixel, *src);
   959                 *dst++ = map[Pixel];
   960                 ++src;
   961                 RGB888_RGB332(Pixel, *src);
   962                 *dst++ = map[Pixel];
   963                 ++src;
   964                 RGB888_RGB332(Pixel, *src);
   965                 *dst++ = map[Pixel];
   966                 ++src;
   967             }
   968             switch (width & 3) {
   969             case 3:
   970                 RGB888_RGB332(Pixel, *src);
   971                 *dst++ = map[Pixel];
   972                 ++src;
   973             case 2:
   974                 RGB888_RGB332(Pixel, *src);
   975                 *dst++ = map[Pixel];
   976                 ++src;
   977             case 1:
   978                 RGB888_RGB332(Pixel, *src);
   979                 *dst++ = map[Pixel];
   980                 ++src;
   981             }
   982 #endif /* USE_DUFFS_LOOP */
   983             src += srcskip;
   984             dst += dstskip;
   985         }
   986     }
   987 }
   988 
   989 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   990 #define RGB888_RGB555(dst, src) { \
   991 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
   992 	                            (((*src)&0x0000F800)>>6)| \
   993 	                            (((*src)&0x000000F8)>>3)); \
   994 }
   995 #define RGB888_RGB555_TWO(dst, src) { \
   996 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   997 	                     (((src[HI])&0x0000F800)>>6)| \
   998 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   999 	                     (((src[LO])&0x00F80000)>>9)| \
  1000 	                     (((src[LO])&0x0000F800)>>6)| \
  1001 	                     (((src[LO])&0x000000F8)>>3); \
  1002 }
  1003 static void
  1004 Blit_RGB888_RGB555(SDL_BlitInfo * info)
  1005 {
  1006 #ifndef USE_DUFFS_LOOP
  1007     int c;
  1008 #endif
  1009     int width, height;
  1010     Uint32 *src;
  1011     Uint16 *dst;
  1012     int srcskip, dstskip;
  1013 
  1014     /* Set up some basic variables */
  1015     width = info->dst_w;
  1016     height = info->dst_h;
  1017     src = (Uint32 *) info->src;
  1018     srcskip = info->src_skip / 4;
  1019     dst = (Uint16 *) info->dst;
  1020     dstskip = info->dst_skip / 2;
  1021 
  1022 #ifdef USE_DUFFS_LOOP
  1023     while (height--) {
  1024 		/* *INDENT-OFF* */
  1025 		DUFFS_LOOP(
  1026 			RGB888_RGB555(dst, src);
  1027 			++src;
  1028 			++dst;
  1029 		, width);
  1030 		/* *INDENT-ON* */
  1031         src += srcskip;
  1032         dst += dstskip;
  1033     }
  1034 #else
  1035     /* Memory align at 4-byte boundary, if necessary */
  1036     if ((long) dst & 0x03) {
  1037         /* Don't do anything if width is 0 */
  1038         if (width == 0) {
  1039             return;
  1040         }
  1041         --width;
  1042 
  1043         while (height--) {
  1044             /* Perform copy alignment */
  1045             RGB888_RGB555(dst, src);
  1046             ++src;
  1047             ++dst;
  1048 
  1049             /* Copy in 4 pixel chunks */
  1050             for (c = width / 4; c; --c) {
  1051                 RGB888_RGB555_TWO(dst, src);
  1052                 src += 2;
  1053                 dst += 2;
  1054                 RGB888_RGB555_TWO(dst, src);
  1055                 src += 2;
  1056                 dst += 2;
  1057             }
  1058             /* Get any leftovers */
  1059             switch (width & 3) {
  1060             case 3:
  1061                 RGB888_RGB555(dst, src);
  1062                 ++src;
  1063                 ++dst;
  1064             case 2:
  1065                 RGB888_RGB555_TWO(dst, src);
  1066                 src += 2;
  1067                 dst += 2;
  1068                 break;
  1069             case 1:
  1070                 RGB888_RGB555(dst, src);
  1071                 ++src;
  1072                 ++dst;
  1073                 break;
  1074             }
  1075             src += srcskip;
  1076             dst += dstskip;
  1077         }
  1078     } else {
  1079         while (height--) {
  1080             /* Copy in 4 pixel chunks */
  1081             for (c = width / 4; c; --c) {
  1082                 RGB888_RGB555_TWO(dst, src);
  1083                 src += 2;
  1084                 dst += 2;
  1085                 RGB888_RGB555_TWO(dst, src);
  1086                 src += 2;
  1087                 dst += 2;
  1088             }
  1089             /* Get any leftovers */
  1090             switch (width & 3) {
  1091             case 3:
  1092                 RGB888_RGB555(dst, src);
  1093                 ++src;
  1094                 ++dst;
  1095             case 2:
  1096                 RGB888_RGB555_TWO(dst, src);
  1097                 src += 2;
  1098                 dst += 2;
  1099                 break;
  1100             case 1:
  1101                 RGB888_RGB555(dst, src);
  1102                 ++src;
  1103                 ++dst;
  1104                 break;
  1105             }
  1106             src += srcskip;
  1107             dst += dstskip;
  1108         }
  1109     }
  1110 #endif /* USE_DUFFS_LOOP */
  1111 }
  1112 
  1113 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1114 #define RGB888_RGB565(dst, src) { \
  1115 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
  1116 	                            (((*src)&0x0000FC00)>>5)| \
  1117 	                            (((*src)&0x000000F8)>>3)); \
  1118 }
  1119 #define RGB888_RGB565_TWO(dst, src) { \
  1120 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1121 	                     (((src[HI])&0x0000FC00)>>5)| \
  1122 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1123 	                     (((src[LO])&0x00F80000)>>8)| \
  1124 	                     (((src[LO])&0x0000FC00)>>5)| \
  1125 	                     (((src[LO])&0x000000F8)>>3); \
  1126 }
  1127 static void
  1128 Blit_RGB888_RGB565(SDL_BlitInfo * info)
  1129 {
  1130 #ifndef USE_DUFFS_LOOP
  1131     int c;
  1132 #endif
  1133     int width, height;
  1134     Uint32 *src;
  1135     Uint16 *dst;
  1136     int srcskip, dstskip;
  1137 
  1138     /* Set up some basic variables */
  1139     width = info->dst_w;
  1140     height = info->dst_h;
  1141     src = (Uint32 *) info->src;
  1142     srcskip = info->src_skip / 4;
  1143     dst = (Uint16 *) info->dst;
  1144     dstskip = info->dst_skip / 2;
  1145 
  1146 #ifdef USE_DUFFS_LOOP
  1147     while (height--) {
  1148 		/* *INDENT-OFF* */
  1149 		DUFFS_LOOP(
  1150 			RGB888_RGB565(dst, src);
  1151 			++src;
  1152 			++dst;
  1153 		, width);
  1154 		/* *INDENT-ON* */
  1155         src += srcskip;
  1156         dst += dstskip;
  1157     }
  1158 #else
  1159     /* Memory align at 4-byte boundary, if necessary */
  1160     if ((long) dst & 0x03) {
  1161         /* Don't do anything if width is 0 */
  1162         if (width == 0) {
  1163             return;
  1164         }
  1165         --width;
  1166 
  1167         while (height--) {
  1168             /* Perform copy alignment */
  1169             RGB888_RGB565(dst, src);
  1170             ++src;
  1171             ++dst;
  1172 
  1173             /* Copy in 4 pixel chunks */
  1174             for (c = width / 4; c; --c) {
  1175                 RGB888_RGB565_TWO(dst, src);
  1176                 src += 2;
  1177                 dst += 2;
  1178                 RGB888_RGB565_TWO(dst, src);
  1179                 src += 2;
  1180                 dst += 2;
  1181             }
  1182             /* Get any leftovers */
  1183             switch (width & 3) {
  1184             case 3:
  1185                 RGB888_RGB565(dst, src);
  1186                 ++src;
  1187                 ++dst;
  1188             case 2:
  1189                 RGB888_RGB565_TWO(dst, src);
  1190                 src += 2;
  1191                 dst += 2;
  1192                 break;
  1193             case 1:
  1194                 RGB888_RGB565(dst, src);
  1195                 ++src;
  1196                 ++dst;
  1197                 break;
  1198             }
  1199             src += srcskip;
  1200             dst += dstskip;
  1201         }
  1202     } else {
  1203         while (height--) {
  1204             /* Copy in 4 pixel chunks */
  1205             for (c = width / 4; c; --c) {
  1206                 RGB888_RGB565_TWO(dst, src);
  1207                 src += 2;
  1208                 dst += 2;
  1209                 RGB888_RGB565_TWO(dst, src);
  1210                 src += 2;
  1211                 dst += 2;
  1212             }
  1213             /* Get any leftovers */
  1214             switch (width & 3) {
  1215             case 3:
  1216                 RGB888_RGB565(dst, src);
  1217                 ++src;
  1218                 ++dst;
  1219             case 2:
  1220                 RGB888_RGB565_TWO(dst, src);
  1221                 src += 2;
  1222                 dst += 2;
  1223                 break;
  1224             case 1:
  1225                 RGB888_RGB565(dst, src);
  1226                 ++src;
  1227                 ++dst;
  1228                 break;
  1229             }
  1230             src += srcskip;
  1231             dst += dstskip;
  1232         }
  1233     }
  1234 #endif /* USE_DUFFS_LOOP */
  1235 }
  1236 
  1237 
  1238 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1239 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1240 static void
  1241 Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
  1242 {
  1243 #ifndef USE_DUFFS_LOOP
  1244     int c;
  1245 #endif
  1246     int width, height;
  1247     Uint8 *src;
  1248     Uint32 *dst;
  1249     int srcskip, dstskip;
  1250 
  1251     /* Set up some basic variables */
  1252     width = info->dst_w;
  1253     height = info->dst_h;
  1254     src = (Uint8 *) info->src;
  1255     srcskip = info->src_skip;
  1256     dst = (Uint32 *) info->dst;
  1257     dstskip = info->dst_skip / 4;
  1258 
  1259 #ifdef USE_DUFFS_LOOP
  1260     while (height--) {
  1261 		/* *INDENT-OFF* */
  1262 		DUFFS_LOOP(
  1263 		{
  1264 			*dst++ = RGB565_32(dst, src, map);
  1265 			src += 2;
  1266 		},
  1267 		width);
  1268 		/* *INDENT-ON* */
  1269         src += srcskip;
  1270         dst += dstskip;
  1271     }
  1272 #else
  1273     while (height--) {
  1274         /* Copy in 4 pixel chunks */
  1275         for (c = width / 4; c; --c) {
  1276             *dst++ = RGB565_32(dst, src, map);
  1277             src += 2;
  1278             *dst++ = RGB565_32(dst, src, map);
  1279             src += 2;
  1280             *dst++ = RGB565_32(dst, src, map);
  1281             src += 2;
  1282             *dst++ = RGB565_32(dst, src, map);
  1283             src += 2;
  1284         }
  1285         /* Get any leftovers */
  1286         switch (width & 3) {
  1287         case 3:
  1288             *dst++ = RGB565_32(dst, src, map);
  1289             src += 2;
  1290         case 2:
  1291             *dst++ = RGB565_32(dst, src, map);
  1292             src += 2;
  1293         case 1:
  1294             *dst++ = RGB565_32(dst, src, map);
  1295             src += 2;
  1296             break;
  1297         }
  1298         src += srcskip;
  1299         dst += dstskip;
  1300     }
  1301 #endif /* USE_DUFFS_LOOP */
  1302 }
  1303 
  1304 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1305 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1306     0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1307     0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1308     0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1309     0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1310     0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1311     0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1312     0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1313     0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1314     0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1315     0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1316     0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1317     0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1318     0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1319     0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1320     0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1321     0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1322     0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1323     0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1324     0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1325     0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1326     0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1327     0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1328     0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1329     0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1330     0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1331     0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1332     0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1333     0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1334     0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1335     0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1336     0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1337     0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1338     0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1339     0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1340     0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1341     0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1342     0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1343     0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1344     0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1345     0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1346     0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1347     0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1348     0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1349     0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1350     0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1351     0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1352     0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1353     0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1354     0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1355     0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1356     0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1357     0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1358     0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1359     0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1360     0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1361     0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1362     0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1363     0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1364     0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1365     0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1366     0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1367     0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1368     0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1369     0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1370     0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1371     0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1372     0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1373     0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1374     0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1375     0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1376     0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1377     0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1378     0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1379     0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1380     0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1381     0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1382     0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1383     0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1384     0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1385     0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1386     0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1387     0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1388     0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1389     0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1390     0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1391     0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1392     0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1393     0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1394     0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1395     0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1396     0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1397     0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1398     0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1399     0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1400     0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1401     0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1402     0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1403     0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1404     0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1405     0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1406     0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1407     0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1408     0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1409     0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1410     0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1411     0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1412     0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1413     0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1414     0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1415     0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1416     0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1417     0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1418     0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1419     0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1420     0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1421     0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1422     0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1423     0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1424     0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1425     0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1426     0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1427     0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1428     0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1429     0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1430     0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1431     0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1432     0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1433     0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1434 };
  1435 
  1436 static void
  1437 Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
  1438 {
  1439     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1440 }
  1441 
  1442 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1443 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1444     0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1445     0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1446     0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1447     0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1448     0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1449     0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1450     0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1451     0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1452     0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1453     0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1454     0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1455     0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1456     0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1457     0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1458     0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1459     0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1460     0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1461     0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1462     0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1463     0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1464     0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1465     0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1466     0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1467     0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1468     0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1469     0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1470     0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1471     0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1472     0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1473     0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1474     0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1475     0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1476     0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1477     0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1478     0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1479     0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1480     0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1481     0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1482     0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1483     0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1484     0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1485     0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1486     0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1487     0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1488     0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1489     0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1490     0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1491     0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1492     0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1493     0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1494     0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1495     0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1496     0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1497     0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1498     0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1499     0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1500     0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1501     0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1502     0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1503     0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1504     0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1505     0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1506     0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1507     0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1508     0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1509     0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1510     0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1511     0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1512     0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1513     0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1514     0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1515     0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1516     0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1517     0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1518     0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1519     0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1520     0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1521     0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1522     0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1523     0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1524     0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1525     0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1526     0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1527     0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1528     0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1529     0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1530     0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1531     0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1532     0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1533     0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1534     0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1535     0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1536     0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1537     0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1538     0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1539     0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1540     0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1541     0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1542     0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1543     0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1544     0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1545     0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1546     0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1547     0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1548     0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1549     0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1550     0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1551     0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1552     0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1553     0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1554     0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1555     0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1556     0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1557     0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1558     0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1559     0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1560     0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1561     0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1562     0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1563     0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1564     0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1565     0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1566     0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1567     0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1568     0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1569     0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1570     0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1571     0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1572 };
  1573 
  1574 static void
  1575 Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
  1576 {
  1577     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1578 }
  1579 
  1580 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1581 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1582     0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1583     0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1584     0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1585     0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1586     0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1587     0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1588     0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1589     0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1590     0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1591     0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1592     0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1593     0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1594     0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1595     0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1596     0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1597     0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1598     0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1599     0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1600     0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1601     0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1602     0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1603     0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1604     0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1605     0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1606     0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1607     0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1608     0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1609     0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1610     0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1611     0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1612     0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1613     0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1614     0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1615     0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1616     0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1617     0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1618     0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1619     0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1620     0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1621     0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1622     0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1623     0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1624     0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1625     0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1626     0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1627     0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1628     0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1629     0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1630     0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1631     0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1632     0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1633     0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1634     0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1635     0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1636     0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1637     0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1638     0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1639     0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1640     0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1641     0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1642     0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1643     0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1644     0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1645     0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1646     0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1647     0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1648     0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1649     0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1650     0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1651     0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1652     0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1653     0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1654     0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1655     0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1656     0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1657     0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1658     0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1659     0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1660     0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1661     0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1662     0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1663     0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1664     0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1665     0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1666     0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1667     0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1668     0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1669     0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1670     0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1671     0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1672     0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1673     0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1674     0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1675     0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1676     0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1677     0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1678     0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1679     0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1680     0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1681     0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1682     0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1683     0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1684     0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1685     0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1686     0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1687     0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1688     0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1689     0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1690     0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1691     0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1692     0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1693     0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1694     0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1695     0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1696     0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1697     0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1698     0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1699     0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1700     0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1701     0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1702     0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1703     0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1704     0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1705     0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1706     0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1707     0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1708     0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1709     0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1710 };
  1711 
  1712 static void
  1713 Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
  1714 {
  1715     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1716 }
  1717 
  1718 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1719 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1720     0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1721     0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1722     0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1723     0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1724     0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1725     0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1726     0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1727     0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1728     0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1729     0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1730     0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1731     0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1732     0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1733     0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1734     0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1735     0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1736     0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1737     0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1738     0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1739     0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1740     0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1741     0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1742     0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1743     0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1744     0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1745     0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1746     0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1747     0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1748     0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1749     0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1750     0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1751     0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1752     0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1753     0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1754     0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1755     0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1756     0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1757     0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1758     0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1759     0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1760     0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1761     0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1762     0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1763     0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1764     0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1765     0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1766     0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1767     0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1768     0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1769     0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1770     0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1771     0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1772     0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1773     0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1774     0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1775     0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1776     0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1777     0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1778     0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1779     0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1780     0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1781     0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1782     0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1783     0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1784     0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1785     0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1786     0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1787     0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1788     0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1789     0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1790     0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1791     0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1792     0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1793     0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1794     0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1795     0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1796     0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1797     0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1798     0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1799     0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1800     0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1801     0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1802     0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1803     0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1804     0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1805     0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1806     0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1807     0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1808     0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1809     0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1810     0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1811     0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1812     0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1813     0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1814     0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1815     0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1816     0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1817     0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1818     0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1819     0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1820     0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1821     0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1822     0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1823     0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1824     0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1825     0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1826     0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1827     0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1828     0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1829     0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1830     0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1831     0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1832     0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1833     0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1834     0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1835     0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1836     0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1837     0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1838     0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1839     0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1840     0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1841     0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1842     0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1843     0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1844     0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1845     0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1846     0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1847     0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1848 };
  1849 
  1850 static void
  1851 Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
  1852 {
  1853     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1854 }
  1855 
  1856 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1857 #ifndef RGB888_RGB332
  1858 #define RGB888_RGB332(dst, src) { \
  1859 	dst = (((src)&0x00E00000)>>16)| \
  1860 	      (((src)&0x0000E000)>>11)| \
  1861 	      (((src)&0x000000C0)>>6); \
  1862 }
  1863 #endif
  1864 static void
  1865 Blit_RGB888_index8_map(SDL_BlitInfo * info)
  1866 {
  1867 #ifndef USE_DUFFS_LOOP
  1868     int c;
  1869 #endif
  1870     int Pixel;
  1871     int width, height;
  1872     Uint32 *src;
  1873     const Uint8 *map;
  1874     Uint8 *dst;
  1875     int srcskip, dstskip;
  1876 
  1877     /* Set up some basic variables */
  1878     width = info->dst_w;
  1879     height = info->dst_h;
  1880     src = (Uint32 *) info->src;
  1881     srcskip = info->src_skip / 4;
  1882     dst = info->dst;
  1883     dstskip = info->dst_skip;
  1884     map = info->table;
  1885 
  1886 #ifdef USE_DUFFS_LOOP
  1887     while (height--) {
  1888 		/* *INDENT-OFF* */
  1889 		DUFFS_LOOP(
  1890 			RGB888_RGB332(Pixel, *src);
  1891 			*dst++ = map[Pixel];
  1892 			++src;
  1893 		, width);
  1894 		/* *INDENT-ON* */
  1895         src += srcskip;
  1896         dst += dstskip;
  1897     }
  1898 #else
  1899     while (height--) {
  1900         for (c = width / 4; c; --c) {
  1901             /* Pack RGB into 8bit pixel */
  1902             RGB888_RGB332(Pixel, *src);
  1903             *dst++ = map[Pixel];
  1904             ++src;
  1905             RGB888_RGB332(Pixel, *src);
  1906             *dst++ = map[Pixel];
  1907             ++src;
  1908             RGB888_RGB332(Pixel, *src);
  1909             *dst++ = map[Pixel];
  1910             ++src;
  1911             RGB888_RGB332(Pixel, *src);
  1912             *dst++ = map[Pixel];
  1913             ++src;
  1914         }
  1915         switch (width & 3) {
  1916         case 3:
  1917             RGB888_RGB332(Pixel, *src);
  1918             *dst++ = map[Pixel];
  1919             ++src;
  1920         case 2:
  1921             RGB888_RGB332(Pixel, *src);
  1922             *dst++ = map[Pixel];
  1923             ++src;
  1924         case 1:
  1925             RGB888_RGB332(Pixel, *src);
  1926             *dst++ = map[Pixel];
  1927             ++src;
  1928         }
  1929         src += srcskip;
  1930         dst += dstskip;
  1931     }
  1932 #endif /* USE_DUFFS_LOOP */
  1933 }
  1934 
  1935 static void
  1936 BlitNto1(SDL_BlitInfo * info)
  1937 {
  1938 #ifndef USE_DUFFS_LOOP
  1939     int c;
  1940 #endif
  1941     int width, height;
  1942     Uint8 *src;
  1943     const Uint8 *map;
  1944     Uint8 *dst;
  1945     int srcskip, dstskip;
  1946     int srcbpp;
  1947     Uint32 Pixel;
  1948     int sR, sG, sB;
  1949     SDL_PixelFormat *srcfmt;
  1950 
  1951     /* Set up some basic variables */
  1952     width = info->dst_w;
  1953     height = info->dst_h;
  1954     src = info->src;
  1955     srcskip = info->src_skip;
  1956     dst = info->dst;
  1957     dstskip = info->dst_skip;
  1958     map = info->table;
  1959     srcfmt = info->src_fmt;
  1960     srcbpp = srcfmt->BytesPerPixel;
  1961 
  1962     if (map == NULL) {
  1963         while (height--) {
  1964 #ifdef USE_DUFFS_LOOP
  1965 			/* *INDENT-OFF* */
  1966 			DUFFS_LOOP(
  1967 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1968 								sR, sG, sB);
  1969 				if ( 1 ) {
  1970 				  	/* Pack RGB into 8bit pixel */
  1971 				  	*dst = ((sR>>5)<<(3+2))|
  1972 					        ((sG>>5)<<(2)) |
  1973 					        ((sB>>6)<<(0)) ;
  1974 				}
  1975 				dst++;
  1976 				src += srcbpp;
  1977 			, width);
  1978 			/* *INDENT-ON* */
  1979 #else
  1980             for (c = width; c; --c) {
  1981                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  1982                 if (1) {
  1983                     /* Pack RGB into 8bit pixel */
  1984                     *dst = ((sR >> 5) << (3 + 2)) |
  1985                         ((sG >> 5) << (2)) | ((sB >> 6) << (0));
  1986                 }
  1987                 dst++;
  1988                 src += srcbpp;
  1989             }
  1990 #endif
  1991             src += srcskip;
  1992             dst += dstskip;
  1993         }
  1994     } else {
  1995         while (height--) {
  1996 #ifdef USE_DUFFS_LOOP
  1997 			/* *INDENT-OFF* */
  1998 			DUFFS_LOOP(
  1999 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2000 								sR, sG, sB);
  2001 				if ( 1 ) {
  2002 				  	/* Pack RGB into 8bit pixel */
  2003 				  	*dst = map[((sR>>5)<<(3+2))|
  2004 						   ((sG>>5)<<(2))  |
  2005 						   ((sB>>6)<<(0))  ];
  2006 				}
  2007 				dst++;
  2008 				src += srcbpp;
  2009 			, width);
  2010 			/* *INDENT-ON* */
  2011 #else
  2012             for (c = width; c; --c) {
  2013                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2014                 if (1) {
  2015                     /* Pack RGB into 8bit pixel */
  2016                     *dst = map[((sR >> 5) << (3 + 2)) |
  2017                                ((sG >> 5) << (2)) | ((sB >> 6) << (0))];
  2018                 }
  2019                 dst++;
  2020                 src += srcbpp;
  2021             }
  2022 #endif /* USE_DUFFS_LOOP */
  2023             src += srcskip;
  2024             dst += dstskip;
  2025         }
  2026     }
  2027 }
  2028 
  2029 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  2030 static void
  2031 Blit4to4MaskAlpha(SDL_BlitInfo * info)
  2032 {
  2033     int width = info->dst_w;
  2034     int height = info->dst_h;
  2035     Uint32 *src = (Uint32 *) info->src;
  2036     int srcskip = info->src_skip;
  2037     Uint32 *dst = (Uint32 *) info->dst;
  2038     int dstskip = info->dst_skip;
  2039     SDL_PixelFormat *srcfmt = info->src_fmt;
  2040     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2041 
  2042     if (dstfmt->Amask) {
  2043         /* RGB->RGBA, SET_ALPHA */
  2044         Uint32 mask = (info->a >> dstfmt->Aloss) << dstfmt->Ashift;
  2045 
  2046         while (height--) {
  2047 			/* *INDENT-OFF* */
  2048 			DUFFS_LOOP(
  2049 			{
  2050 				*dst = *src | mask;
  2051 				++dst;
  2052 				++src;
  2053 			},
  2054 			width);
  2055 			/* *INDENT-ON* */
  2056             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2057             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2058         }
  2059     } else {
  2060         /* RGBA->RGB, NO_ALPHA */
  2061         Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2062 
  2063         while (height--) {
  2064 			/* *INDENT-OFF* */
  2065 			DUFFS_LOOP(
  2066 			{
  2067 				*dst = *src & mask;
  2068 				++dst;
  2069 				++src;
  2070 			},
  2071 			width);
  2072 			/* *INDENT-ON* */
  2073             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2074             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2075         }
  2076     }
  2077 }
  2078 
  2079 static void
  2080 BlitNtoN(SDL_BlitInfo * info)
  2081 {
  2082     int width = info->dst_w;
  2083     int height = info->dst_h;
  2084     Uint8 *src = info->src;
  2085     int srcskip = info->src_skip;
  2086     Uint8 *dst = info->dst;
  2087     int dstskip = info->dst_skip;
  2088     SDL_PixelFormat *srcfmt = info->src_fmt;
  2089     int srcbpp = srcfmt->BytesPerPixel;
  2090     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2091     int dstbpp = dstfmt->BytesPerPixel;
  2092     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2093 
  2094     while (height--) {
  2095 		/* *INDENT-OFF* */
  2096 		DUFFS_LOOP(
  2097 		{
  2098             Uint32 Pixel;
  2099 			unsigned sR;
  2100 			unsigned sG;
  2101 			unsigned sB;
  2102 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2103 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2104 			dst += dstbpp;
  2105 			src += srcbpp;
  2106 		},
  2107 		width);
  2108 		/* *INDENT-ON* */
  2109         src += srcskip;
  2110         dst += dstskip;
  2111     }
  2112 }
  2113 
  2114 static void
  2115 BlitNtoNCopyAlpha(SDL_BlitInfo * info)
  2116 {
  2117     int width = info->dst_w;
  2118     int height = info->dst_h;
  2119     Uint8 *src = info->src;
  2120     int srcskip = info->src_skip;
  2121     Uint8 *dst = info->dst;
  2122     int dstskip = info->dst_skip;
  2123     SDL_PixelFormat *srcfmt = info->src_fmt;
  2124     int srcbpp = srcfmt->BytesPerPixel;
  2125     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2126     int dstbpp = dstfmt->BytesPerPixel;
  2127     int c;
  2128 
  2129     while (height--) {
  2130         for (c = width; c; --c) {
  2131             Uint32 Pixel;
  2132             unsigned sR, sG, sB, sA;
  2133             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2134             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2135             dst += dstbpp;
  2136             src += srcbpp;
  2137         }
  2138         src += srcskip;
  2139         dst += dstskip;
  2140     }
  2141 }
  2142 
  2143 static void
  2144 BlitNto1Key(SDL_BlitInfo * info)
  2145 {
  2146     int width = info->dst_w;
  2147     int height = info->dst_h;
  2148     Uint8 *src = info->src;
  2149     int srcskip = info->src_skip;
  2150     Uint8 *dst = info->dst;
  2151     int dstskip = info->dst_skip;
  2152     SDL_PixelFormat *srcfmt = info->src_fmt;
  2153     const Uint8 *palmap = info->table;
  2154     Uint32 ckey = info->colorkey;
  2155     Uint32 rgbmask = ~srcfmt->Amask;
  2156     int srcbpp;
  2157     Uint32 Pixel;
  2158     unsigned sR, sG, sB;
  2159 
  2160     /* Set up some basic variables */
  2161     srcbpp = srcfmt->BytesPerPixel;
  2162     ckey &= rgbmask;
  2163 
  2164     if (palmap == NULL) {
  2165         while (height--) {
  2166 			/* *INDENT-OFF* */
  2167 			DUFFS_LOOP(
  2168 			{
  2169 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2170 								sR, sG, sB);
  2171 				if ( (Pixel & rgbmask) != ckey ) {
  2172 				  	/* Pack RGB into 8bit pixel */
  2173 				  	*dst = (Uint8)(((sR>>5)<<(3+2))|
  2174 						           ((sG>>5)<<(2)) |
  2175 						           ((sB>>6)<<(0)));
  2176 				}
  2177 				dst++;
  2178 				src += srcbpp;
  2179 			},
  2180 			width);
  2181 			/* *INDENT-ON* */
  2182             src += srcskip;
  2183             dst += dstskip;
  2184         }
  2185     } else {
  2186         while (height--) {
  2187 			/* *INDENT-OFF* */
  2188 			DUFFS_LOOP(
  2189 			{
  2190 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2191 								sR, sG, sB);
  2192 				if ( (Pixel & rgbmask) != ckey ) {
  2193 				  	/* Pack RGB into 8bit pixel */
  2194 				  	*dst = (Uint8)palmap[((sR>>5)<<(3+2))|
  2195 							             ((sG>>5)<<(2))  |
  2196 							             ((sB>>6)<<(0))  ];
  2197 				}
  2198 				dst++;
  2199 				src += srcbpp;
  2200 			},
  2201 			width);
  2202 			/* *INDENT-ON* */
  2203             src += srcskip;
  2204             dst += dstskip;
  2205         }
  2206     }
  2207 }
  2208 
  2209 static void
  2210 Blit2to2Key(SDL_BlitInfo * info)
  2211 {
  2212     int width = info->dst_w;
  2213     int height = info->dst_h;
  2214     Uint16 *srcp = (Uint16 *) info->src;
  2215     int srcskip = info->src_skip;
  2216     Uint16 *dstp = (Uint16 *) info->dst;
  2217     int dstskip = info->dst_skip;
  2218     Uint32 ckey = info->colorkey;
  2219     Uint32 rgbmask = ~info->src_fmt->Amask;
  2220 
  2221     /* Set up some basic variables */
  2222     srcskip /= 2;
  2223     dstskip /= 2;
  2224     ckey &= rgbmask;
  2225 
  2226     while (height--) {
  2227 		/* *INDENT-OFF* */
  2228 		DUFFS_LOOP(
  2229 		{
  2230 			if ( (*srcp & rgbmask) != ckey ) {
  2231 				*dstp = *srcp;
  2232 			}
  2233 			dstp++;
  2234 			srcp++;
  2235 		},
  2236 		width);
  2237 		/* *INDENT-ON* */
  2238         srcp += srcskip;
  2239         dstp += dstskip;
  2240     }
  2241 }
  2242 
  2243 static void
  2244 BlitNtoNKey(SDL_BlitInfo * info)
  2245 {
  2246     int width = info->dst_w;
  2247     int height = info->dst_h;
  2248     Uint8 *src = info->src;
  2249     int srcskip = info->src_skip;
  2250     Uint8 *dst = info->dst;
  2251     int dstskip = info->dst_skip;
  2252     Uint32 ckey = info->colorkey;
  2253     SDL_PixelFormat *srcfmt = info->src_fmt;
  2254     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2255     int srcbpp = srcfmt->BytesPerPixel;
  2256     int dstbpp = dstfmt->BytesPerPixel;
  2257     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2258     Uint32 rgbmask = ~srcfmt->Amask;
  2259 
  2260     /* Set up some basic variables */
  2261     ckey &= rgbmask;
  2262 
  2263     while (height--) {
  2264 		/* *INDENT-OFF* */
  2265 		DUFFS_LOOP(
  2266 		{
  2267             Uint32 Pixel;
  2268 			unsigned sR;
  2269 			unsigned sG;
  2270 			unsigned sB;
  2271 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2272 			if ( (Pixel & rgbmask) != ckey ) {
  2273                 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2274 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2275 			}
  2276 			dst += dstbpp;
  2277 			src += srcbpp;
  2278 		},
  2279 		width);
  2280 		/* *INDENT-ON* */
  2281         src += srcskip;
  2282         dst += dstskip;
  2283     }
  2284 }
  2285 
  2286 static void
  2287 BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
  2288 {
  2289     int width = info->dst_w;
  2290     int height = info->dst_h;
  2291     Uint8 *src = info->src;
  2292     int srcskip = info->src_skip;
  2293     Uint8 *dst = info->dst;
  2294     int dstskip = info->dst_skip;
  2295     Uint32 ckey = info->colorkey;
  2296     SDL_PixelFormat *srcfmt = info->src_fmt;
  2297     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2298     Uint32 rgbmask = ~srcfmt->Amask;
  2299 
  2300     Uint8 srcbpp;
  2301     Uint8 dstbpp;
  2302     Uint32 Pixel;
  2303     unsigned sR, sG, sB, sA;
  2304 
  2305     /* Set up some basic variables */
  2306     srcbpp = srcfmt->BytesPerPixel;
  2307     dstbpp = dstfmt->BytesPerPixel;
  2308     ckey &= rgbmask;
  2309 
  2310     while (height--) {
  2311 		/* *INDENT-OFF* */
  2312 		DUFFS_LOOP(
  2313 		{
  2314 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2315 			if ( (Pixel & rgbmask) != ckey ) {
  2316 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2317 			}
  2318 			dst += dstbpp;
  2319 			src += srcbpp;
  2320 		},
  2321 		width);
  2322 		/* *INDENT-ON* */
  2323         src += srcskip;
  2324         dst += dstskip;
  2325     }
  2326 }
  2327 
  2328 /* Normal N to N optimized blitters */
  2329 struct blit_table
  2330 {
  2331     Uint32 srcR, srcG, srcB;
  2332     int dstbpp;
  2333     Uint32 dstR, dstG, dstB;
  2334     Uint32 blit_features;
  2335     SDL_BlitFunc blitfunc;
  2336     enum
  2337     { NO_ALPHA = 1, SET_ALPHA = 2, COPY_ALPHA = 4 } alpha;
  2338 };
  2339 static const struct blit_table normal_blit_1[] = {
  2340     /* Default for 8-bit RGB source, an invalid combination */
  2341     {0, 0, 0, 0, 0, 0, 0, 0, NULL},
  2342 };
  2343 
  2344 static const struct blit_table normal_blit_2[] = {
  2345 #if SDL_ALTIVEC_BLITTERS
  2346     /* has-altivec */
  2347     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000,
  2348      0x00000000,
  2349      2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2350     {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000,
  2351      0x00000000,
  2352      2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2353 #endif
  2354     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00,
  2355      0x000000FF,
  2356      0, Blit_RGB565_ARGB8888, SET_ALPHA},
  2357     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00,
  2358      0x00FF0000,
  2359      0, Blit_RGB565_ABGR8888, SET_ALPHA},
  2360     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000,
  2361      0x0000FF00,
  2362      0, Blit_RGB565_RGBA8888, SET_ALPHA},
  2363     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000,
  2364      0xFF000000,
  2365      0, Blit_RGB565_BGRA8888, SET_ALPHA},
  2366 
  2367     /* Default for 16-bit RGB source, used if no other blitter matches */
  2368     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2369 };
  2370 
  2371 static const struct blit_table normal_blit_3[] = {
  2372     /* Default for 24-bit RGB source, never optimized */
  2373     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2374 };
  2375 
  2376 static const struct blit_table normal_blit_4[] = {
  2377 #if SDL_ALTIVEC_BLITTERS
  2378     /* has-altivec | dont-use-prefetch */
  2379     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
  2380      0x00000000,
  2381      6, ConvertAltivec32to32_noprefetch,
  2382      NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2383     /* has-altivec */
  2384     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
  2385      0x00000000,
  2386      2, ConvertAltivec32to32_prefetch,
  2387      NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2388     /* has-altivec */
  2389     {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0,
  2390      0x0000001F,
  2391      2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
  2392 #endif
  2393     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
  2394      0x0000001F,
  2395      0, Blit_RGB888_RGB565, NO_ALPHA},
  2396     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
  2397      0x0000001F,
  2398      0, Blit_RGB888_RGB555, NO_ALPHA},
  2399     /* Default for 32-bit RGB source, used if no other blitter matches */
  2400     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2401 };
  2402 
  2403 static const struct blit_table *const normal_blit[] = {
  2404     normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2405 };
  2406 
  2407 /* Mask matches table, or table entry is zero */
  2408 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2409 
  2410 SDL_BlitFunc
  2411 SDL_CalculateBlitN(SDL_Surface * surface)
  2412 {
  2413     SDL_PixelFormat *srcfmt;
  2414     SDL_PixelFormat *dstfmt;
  2415     const struct blit_table *table;
  2416     int which;
  2417     SDL_BlitFunc blitfun;
  2418 
  2419     /* Set up data for choosing the blit */
  2420     srcfmt = surface->format;
  2421     dstfmt = surface->map->dst->format;
  2422 
  2423     /* We don't support destinations less than 8-bits */
  2424     if (dstfmt->BitsPerPixel < 8) {
  2425         return (NULL);
  2426     }
  2427 
  2428     switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
  2429     case 0:
  2430         blitfun = NULL;
  2431         if (dstfmt->BitsPerPixel == 8) {
  2432             /* We assume 8-bit destinations are palettized */
  2433             if ((srcfmt->BytesPerPixel == 4) &&
  2434                 (srcfmt->Rmask == 0x00FF0000) &&
  2435                 (srcfmt->Gmask == 0x0000FF00) &&
  2436                 (srcfmt->Bmask == 0x000000FF)) {
  2437                 if (surface->map->info.table) {
  2438                     blitfun = Blit_RGB888_index8_map;
  2439                 } else {
  2440                     blitfun = Blit_RGB888_index8;
  2441                 }
  2442             } else {
  2443                 blitfun = BlitNto1;
  2444             }
  2445         } else {
  2446             /* Now the meat, choose the blitter we want */
  2447             int a_need = NO_ALPHA;
  2448             if (dstfmt->Amask)
  2449                 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2450             table = normal_blit[srcfmt->BytesPerPixel - 1];
  2451             for (which = 0; table[which].dstbpp; ++which) {
  2452                 if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2453                     MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2454                     MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2455                     MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2456                     MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2457                     MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2458                     dstfmt->BytesPerPixel == table[which].dstbpp &&
  2459                     (a_need & table[which].alpha) == a_need &&
  2460                     ((table[which].blit_features & GetBlitFeatures()) ==
  2461                      table[which].blit_features))
  2462                     break;
  2463             }
  2464             blitfun = table[which].blitfunc;
  2465 
  2466             if (blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2467                 /* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2468                 if (srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4
  2469                     && srcfmt->Rmask == dstfmt->Rmask
  2470                     && srcfmt->Gmask == dstfmt->Gmask
  2471                     && srcfmt->Bmask == dstfmt->Bmask) {
  2472                     blitfun = Blit4to4MaskAlpha;
  2473                 } else if (a_need == COPY_ALPHA) {
  2474                     blitfun = BlitNtoNCopyAlpha;
  2475                 }
  2476             }
  2477         }
  2478         return (blitfun);
  2479 
  2480     case SDL_COPY_COLORKEY:
  2481         /* colorkey blit: Here we don't have too many options, mostly
  2482            because RLE is the preferred fast way to deal with this.
  2483            If a particular case turns out to be useful we'll add it. */
  2484 
  2485         if (srcfmt->BytesPerPixel == 2 && surface->map->identity)
  2486             return Blit2to2Key;
  2487         else if (dstfmt->BytesPerPixel == 1)
  2488             return BlitNto1Key;
  2489         else {
  2490 #if SDL_ALTIVEC_BLITTERS
  2491             if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
  2492                 && SDL_HasAltiVec()) {
  2493                 return Blit32to32KeyAltivec;
  2494             } else
  2495 #endif
  2496             if (srcfmt->Amask && dstfmt->Amask) {
  2497                 return BlitNtoNKeyCopyAlpha;
  2498             } else {
  2499                 return BlitNtoNKey;
  2500             }
  2501         }
  2502     }
  2503 
  2504     return NULL;
  2505 }
  2506 
  2507 /* vi: set ts=4 sw=4 expandtab: */