src/video/SDL_blit_N.c
author Philipp Wiesemann <philipp.wiesemann@arcor.de>
Sun, 30 Oct 2016 21:01:33 +0100
changeset 10563 e3d84016cb79
parent 10559 fbf9b0e3589a
child 10616 d17dd08640a4
permissions -rw-r--r--
Fixed outdated info in README.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2016 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../SDL_internal.h"
    22 
    23 #include "SDL_video.h"
    24 #include "SDL_endian.h"
    25 #include "SDL_cpuinfo.h"
    26 #include "SDL_blit.h"
    27 
    28 #include "SDL_assert.h"
    29 
    30 /* Functions to blit from N-bit surfaces to other surfaces */
    31 
    32 #if SDL_ALTIVEC_BLITTERS
    33 #ifdef HAVE_ALTIVEC_H
    34 #include <altivec.h>
    35 #endif
    36 #ifdef __MACOSX__
    37 #include <sys/sysctl.h>
    38 static size_t
    39 GetL3CacheSize(void)
    40 {
    41     const char key[] = "hw.l3cachesize";
    42     u_int64_t result = 0;
    43     size_t typeSize = sizeof(result);
    44 
    45 
    46     int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
    47     if (0 != err)
    48         return 0;
    49 
    50     return result;
    51 }
    52 #else
    53 static size_t
    54 GetL3CacheSize(void)
    55 {
    56     /* XXX: Just guess G4 */
    57     return 2097152;
    58 }
    59 #endif /* __MACOSX__ */
    60 
    61 #if (defined(__MACOSX__) && (__GNUC__ < 4))
    62 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    63         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    64 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    65         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    66 #else
    67 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    68         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    69 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    70         (vector unsigned short) { a,b,c,d,e,f,g,h }
    71 #endif
    72 
    73 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    74 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    75                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    76                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    77                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    78                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    79 
    80 #define MAKE8888(dstfmt, r, g, b, a)  \
    81     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    82       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    83       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    84       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    85 
    86 /*
    87  * Data Stream Touch...Altivec cache prefetching.
    88  *
    89  *  Don't use this on a G5...however, the speed boost is very significant
    90  *   on a G4.
    91  */
    92 #define DST_CHAN_SRC 1
    93 #define DST_CHAN_DEST 2
    94 
    95 /* macro to set DST control word value... */
    96 #define DST_CTRL(size, count, stride) \
    97     (((size) << 24) | ((count) << 16) | (stride))
    98 
    99 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
   100     ? vec_lvsl(0, src) \
   101     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   102 
   103 /* Calculate the permute vector used for 32->32 swizzling */
   104 static vector unsigned char
   105 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
   106 {
   107     /*
   108      * We have to assume that the bits that aren't used by other
   109      *  colors is alpha, and it's one complete byte, since some formats
   110      *  leave alpha with a zero mask, but we should still swizzle the bits.
   111      */
   112     /* ARGB */
   113     const static const struct SDL_PixelFormat default_pixel_format = {
   114         0, NULL, 0, 0,
   115         {0, 0},
   116         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   117         0, 0, 0, 0,
   118         16, 8, 0, 24,
   119         0, NULL
   120     };
   121     const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
   122                                                        0x04, 0x04, 0x04, 0x04,
   123                                                        0x08, 0x08, 0x08, 0x08,
   124                                                        0x0C, 0x0C, 0x0C,
   125                                                        0x0C);
   126     vector unsigned char vswiz;
   127     vector unsigned int srcvec;
   128     Uint32 rmask, gmask, bmask, amask;
   129 
   130     if (!srcfmt) {
   131         srcfmt = &default_pixel_format;
   132     }
   133     if (!dstfmt) {
   134         dstfmt = &default_pixel_format;
   135     }
   136 
   137 #define RESHIFT(X) (3 - ((X) >> 3))
   138     rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   139     gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   140     bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   141 
   142     /* Use zero for alpha if either surface doesn't have alpha */
   143     if (dstfmt->Amask) {
   144         amask =
   145             ((srcfmt->Amask) ? RESHIFT(srcfmt->
   146                                        Ashift) : 0x10) << (dstfmt->Ashift);
   147     } else {
   148         amask =
   149             0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
   150                           0xFFFFFFFF);
   151     }
   152 #undef RESHIFT
   153 
   154     ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
   155     vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
   156     return (vswiz);
   157 }
   158 
   159 static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
   160 static void
   161 Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
   162 {
   163     int height = info->dst_h;
   164     Uint8 *src = (Uint8 *) info->src;
   165     int srcskip = info->src_skip;
   166     Uint8 *dst = (Uint8 *) info->dst;
   167     int dstskip = info->dst_skip;
   168     SDL_PixelFormat *srcfmt = info->src_fmt;
   169     vector unsigned char valpha = vec_splat_u8(0);
   170     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   171     vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
   172                                                     0x00, 0x0a, 0x00, 0x0e,
   173                                                     0x00, 0x12, 0x00, 0x16,
   174                                                     0x00, 0x1a, 0x00, 0x1e);
   175     vector unsigned short v1 = vec_splat_u16(1);
   176     vector unsigned short v3 = vec_splat_u16(3);
   177     vector unsigned short v3f =
   178         VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
   179                           0x003f, 0x003f, 0x003f, 0x003f);
   180     vector unsigned short vfc =
   181         VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
   182                           0x00fc, 0x00fc, 0x00fc, 0x00fc);
   183     vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
   184     vf800 = vec_sl(vf800, vec_splat_u16(8));
   185 
   186     while (height--) {
   187         vector unsigned char valigner;
   188         vector unsigned char voverflow;
   189         vector unsigned char vsrc;
   190 
   191         int width = info->dst_w;
   192         int extrawidth;
   193 
   194         /* do scalar until we can align... */
   195 #define ONE_PIXEL_BLEND(condition, widthvar) \
   196         while (condition) { \
   197             Uint32 Pixel; \
   198             unsigned sR, sG, sB, sA; \
   199             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   200                           sR, sG, sB, sA); \
   201             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   202                                 ((sG << 3) & 0x000007E0) | \
   203                                 ((sB >> 3) & 0x0000001F)); \
   204             dst += 2; \
   205             src += 4; \
   206             widthvar--; \
   207         }
   208 
   209         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   210 
   211         /* After all that work, here's the vector part! */
   212         extrawidth = (width % 8);       /* trailing unaligned stores */
   213         width -= extrawidth;
   214         vsrc = vec_ld(0, src);
   215         valigner = VEC_ALIGNER(src);
   216 
   217         while (width) {
   218             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   219             vector unsigned int vsrc1, vsrc2;
   220             vector unsigned char vdst;
   221 
   222             voverflow = vec_ld(15, src);
   223             vsrc = vec_perm(vsrc, voverflow, valigner);
   224             vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   225             src += 16;
   226             vsrc = voverflow;
   227             voverflow = vec_ld(15, src);
   228             vsrc = vec_perm(vsrc, voverflow, valigner);
   229             vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   230             /* 1555 */
   231             vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
   232             vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
   233             vgpixel = vec_and(vgpixel, vfc);
   234             vgpixel = vec_sl(vgpixel, v3);
   235             vrpixel = vec_sl(vpixel, v1);
   236             vrpixel = vec_and(vrpixel, vf800);
   237             vbpixel = vec_and(vpixel, v3f);
   238             vdst =
   239                 vec_or((vector unsigned char) vrpixel,
   240                        (vector unsigned char) vgpixel);
   241             /* 565 */
   242             vdst = vec_or(vdst, (vector unsigned char) vbpixel);
   243             vec_st(vdst, 0, dst);
   244 
   245             width -= 8;
   246             src += 16;
   247             dst += 16;
   248             vsrc = voverflow;
   249         }
   250 
   251         SDL_assert(width == 0);
   252 
   253         /* do scalar until we can align... */
   254         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   255 #undef ONE_PIXEL_BLEND
   256 
   257         src += srcskip;         /* move to next row, accounting for pitch. */
   258         dst += dstskip;
   259     }
   260 
   261 
   262 }
   263 
   264 static void
   265 Blit_RGB565_32Altivec(SDL_BlitInfo * info)
   266 {
   267     int height = info->dst_h;
   268     Uint8 *src = (Uint8 *) info->src;
   269     int srcskip = info->src_skip;
   270     Uint8 *dst = (Uint8 *) info->dst;
   271     int dstskip = info->dst_skip;
   272     SDL_PixelFormat *srcfmt = info->src_fmt;
   273     SDL_PixelFormat *dstfmt = info->dst_fmt;
   274     unsigned alpha;
   275     vector unsigned char valpha;
   276     vector unsigned char vpermute;
   277     vector unsigned short vf800;
   278     vector unsigned int v8 = vec_splat_u32(8);
   279     vector unsigned int v16 = vec_add(v8, v8);
   280     vector unsigned short v2 = vec_splat_u16(2);
   281     vector unsigned short v3 = vec_splat_u16(3);
   282     /*
   283        0x10 - 0x1f is the alpha
   284        0x00 - 0x0e evens are the red
   285        0x01 - 0x0f odds are zero
   286      */
   287     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   288                                                        0x10, 0x02, 0x01, 0x01,
   289                                                        0x10, 0x04, 0x01, 0x01,
   290                                                        0x10, 0x06, 0x01,
   291                                                        0x01);
   292     vector unsigned char vredalpha2 =
   293         (vector unsigned
   294          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   295         );
   296     /*
   297        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   298        0x11 - 0x0f odds are blue
   299      */
   300     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   301                                                    0x04, 0x05, 0x06, 0x13,
   302                                                    0x08, 0x09, 0x0a, 0x15,
   303                                                    0x0c, 0x0d, 0x0e, 0x17);
   304     vector unsigned char vblue2 =
   305         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   306         );
   307     /*
   308        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   309        0x10 - 0x0e evens are green
   310      */
   311     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   312                                                     0x04, 0x05, 0x12, 0x07,
   313                                                     0x08, 0x09, 0x14, 0x0b,
   314                                                     0x0c, 0x0d, 0x16, 0x0f);
   315     vector unsigned char vgreen2 =
   316         (vector unsigned
   317          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   318         );
   319 
   320     SDL_assert(srcfmt->BytesPerPixel == 2);
   321     SDL_assert(dstfmt->BytesPerPixel == 4);
   322 
   323     vf800 = (vector unsigned short) vec_splat_u8(-7);
   324     vf800 = vec_sl(vf800, vec_splat_u16(8));
   325 
   326     if (dstfmt->Amask && info->a) {
   327         ((unsigned char *) &valpha)[0] = alpha = info->a;
   328         valpha = vec_splat(valpha, 0);
   329     } else {
   330         alpha = 0;
   331         valpha = vec_splat_u8(0);
   332     }
   333 
   334     vpermute = calc_swizzle32(NULL, dstfmt);
   335     while (height--) {
   336         vector unsigned char valigner;
   337         vector unsigned char voverflow;
   338         vector unsigned char vsrc;
   339 
   340         int width = info->dst_w;
   341         int extrawidth;
   342 
   343         /* do scalar until we can align... */
   344 #define ONE_PIXEL_BLEND(condition, widthvar) \
   345         while (condition) { \
   346             unsigned sR, sG, sB; \
   347             unsigned short Pixel = *((unsigned short *)src); \
   348             sR = (Pixel >> 8) & 0xf8; \
   349             sG = (Pixel >> 3) & 0xfc; \
   350             sB = (Pixel << 3) & 0xf8; \
   351             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   352             src += 2; \
   353             dst += 4; \
   354             widthvar--; \
   355         }
   356         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   357 
   358         /* After all that work, here's the vector part! */
   359         extrawidth = (width % 8);       /* trailing unaligned stores */
   360         width -= extrawidth;
   361         vsrc = vec_ld(0, src);
   362         valigner = VEC_ALIGNER(src);
   363 
   364         while (width) {
   365             vector unsigned short vR, vG, vB;
   366             vector unsigned char vdst1, vdst2;
   367 
   368             voverflow = vec_ld(15, src);
   369             vsrc = vec_perm(vsrc, voverflow, valigner);
   370 
   371             vR = vec_and((vector unsigned short) vsrc, vf800);
   372             vB = vec_sl((vector unsigned short) vsrc, v3);
   373             vG = vec_sl(vB, v2);
   374 
   375             vdst1 =
   376                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   377                                                 valpha, vredalpha1);
   378             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   379             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   380             vdst1 = vec_perm(vdst1, valpha, vpermute);
   381             vec_st(vdst1, 0, dst);
   382 
   383             vdst2 =
   384                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   385                                                 valpha, vredalpha2);
   386             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   387             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   388             vdst2 = vec_perm(vdst2, valpha, vpermute);
   389             vec_st(vdst2, 16, dst);
   390 
   391             width -= 8;
   392             dst += 32;
   393             src += 16;
   394             vsrc = voverflow;
   395         }
   396 
   397         SDL_assert(width == 0);
   398 
   399 
   400         /* do scalar until we can align... */
   401         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   402 #undef ONE_PIXEL_BLEND
   403 
   404         src += srcskip;         /* move to next row, accounting for pitch. */
   405         dst += dstskip;
   406     }
   407 
   408 }
   409 
   410 
   411 static void
   412 Blit_RGB555_32Altivec(SDL_BlitInfo * info)
   413 {
   414     int height = info->dst_h;
   415     Uint8 *src = (Uint8 *) info->src;
   416     int srcskip = info->src_skip;
   417     Uint8 *dst = (Uint8 *) info->dst;
   418     int dstskip = info->dst_skip;
   419     SDL_PixelFormat *srcfmt = info->src_fmt;
   420     SDL_PixelFormat *dstfmt = info->dst_fmt;
   421     unsigned alpha;
   422     vector unsigned char valpha;
   423     vector unsigned char vpermute;
   424     vector unsigned short vf800;
   425     vector unsigned int v8 = vec_splat_u32(8);
   426     vector unsigned int v16 = vec_add(v8, v8);
   427     vector unsigned short v1 = vec_splat_u16(1);
   428     vector unsigned short v3 = vec_splat_u16(3);
   429     /*
   430        0x10 - 0x1f is the alpha
   431        0x00 - 0x0e evens are the red
   432        0x01 - 0x0f odds are zero
   433      */
   434     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   435                                                        0x10, 0x02, 0x01, 0x01,
   436                                                        0x10, 0x04, 0x01, 0x01,
   437                                                        0x10, 0x06, 0x01,
   438                                                        0x01);
   439     vector unsigned char vredalpha2 =
   440         (vector unsigned
   441          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   442         );
   443     /*
   444        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   445        0x11 - 0x0f odds are blue
   446      */
   447     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   448                                                    0x04, 0x05, 0x06, 0x13,
   449                                                    0x08, 0x09, 0x0a, 0x15,
   450                                                    0x0c, 0x0d, 0x0e, 0x17);
   451     vector unsigned char vblue2 =
   452         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   453         );
   454     /*
   455        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   456        0x10 - 0x0e evens are green
   457      */
   458     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   459                                                     0x04, 0x05, 0x12, 0x07,
   460                                                     0x08, 0x09, 0x14, 0x0b,
   461                                                     0x0c, 0x0d, 0x16, 0x0f);
   462     vector unsigned char vgreen2 =
   463         (vector unsigned
   464          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   465         );
   466 
   467     SDL_assert(srcfmt->BytesPerPixel == 2);
   468     SDL_assert(dstfmt->BytesPerPixel == 4);
   469 
   470     vf800 = (vector unsigned short) vec_splat_u8(-7);
   471     vf800 = vec_sl(vf800, vec_splat_u16(8));
   472 
   473     if (dstfmt->Amask && info->a) {
   474         ((unsigned char *) &valpha)[0] = alpha = info->a;
   475         valpha = vec_splat(valpha, 0);
   476     } else {
   477         alpha = 0;
   478         valpha = vec_splat_u8(0);
   479     }
   480 
   481     vpermute = calc_swizzle32(NULL, dstfmt);
   482     while (height--) {
   483         vector unsigned char valigner;
   484         vector unsigned char voverflow;
   485         vector unsigned char vsrc;
   486 
   487         int width = info->dst_w;
   488         int extrawidth;
   489 
   490         /* do scalar until we can align... */
   491 #define ONE_PIXEL_BLEND(condition, widthvar) \
   492         while (condition) { \
   493             unsigned sR, sG, sB; \
   494             unsigned short Pixel = *((unsigned short *)src); \
   495             sR = (Pixel >> 7) & 0xf8; \
   496             sG = (Pixel >> 2) & 0xf8; \
   497             sB = (Pixel << 3) & 0xf8; \
   498             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   499             src += 2; \
   500             dst += 4; \
   501             widthvar--; \
   502         }
   503         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   504 
   505         /* After all that work, here's the vector part! */
   506         extrawidth = (width % 8);       /* trailing unaligned stores */
   507         width -= extrawidth;
   508         vsrc = vec_ld(0, src);
   509         valigner = VEC_ALIGNER(src);
   510 
   511         while (width) {
   512             vector unsigned short vR, vG, vB;
   513             vector unsigned char vdst1, vdst2;
   514 
   515             voverflow = vec_ld(15, src);
   516             vsrc = vec_perm(vsrc, voverflow, valigner);
   517 
   518             vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
   519             vB = vec_sl((vector unsigned short) vsrc, v3);
   520             vG = vec_sl(vB, v3);
   521 
   522             vdst1 =
   523                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   524                                                 valpha, vredalpha1);
   525             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   526             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   527             vdst1 = vec_perm(vdst1, valpha, vpermute);
   528             vec_st(vdst1, 0, dst);
   529 
   530             vdst2 =
   531                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   532                                                 valpha, vredalpha2);
   533             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   534             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   535             vdst2 = vec_perm(vdst2, valpha, vpermute);
   536             vec_st(vdst2, 16, dst);
   537 
   538             width -= 8;
   539             dst += 32;
   540             src += 16;
   541             vsrc = voverflow;
   542         }
   543 
   544         SDL_assert(width == 0);
   545 
   546 
   547         /* do scalar until we can align... */
   548         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   549 #undef ONE_PIXEL_BLEND
   550 
   551         src += srcskip;         /* move to next row, accounting for pitch. */
   552         dst += dstskip;
   553     }
   554 
   555 }
   556 
   557 static void BlitNtoNKey(SDL_BlitInfo * info);
   558 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
   559 static void
   560 Blit32to32KeyAltivec(SDL_BlitInfo * info)
   561 {
   562     int height = info->dst_h;
   563     Uint32 *srcp = (Uint32 *) info->src;
   564     int srcskip = info->src_skip / 4;
   565     Uint32 *dstp = (Uint32 *) info->dst;
   566     int dstskip = info->dst_skip / 4;
   567     SDL_PixelFormat *srcfmt = info->src_fmt;
   568     int srcbpp = srcfmt->BytesPerPixel;
   569     SDL_PixelFormat *dstfmt = info->dst_fmt;
   570     int dstbpp = dstfmt->BytesPerPixel;
   571     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   572     unsigned alpha = dstfmt->Amask ? info->a : 0;
   573     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   574     Uint32 ckey = info->colorkey;
   575     vector unsigned int valpha;
   576     vector unsigned char vpermute;
   577     vector unsigned char vzero;
   578     vector unsigned int vckey;
   579     vector unsigned int vrgbmask;
   580     vpermute = calc_swizzle32(srcfmt, dstfmt);
   581     if (info->dst_w < 16) {
   582         if (copy_alpha) {
   583             BlitNtoNKeyCopyAlpha(info);
   584         } else {
   585             BlitNtoNKey(info);
   586         }
   587         return;
   588     }
   589     vzero = vec_splat_u8(0);
   590     if (alpha) {
   591         ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
   592         valpha =
   593             (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
   594     } else {
   595         valpha = (vector unsigned int) vzero;
   596     }
   597     ckey &= rgbmask;
   598     ((unsigned int *) (char *) &vckey)[0] = ckey;
   599     vckey = vec_splat(vckey, 0);
   600     ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
   601     vrgbmask = vec_splat(vrgbmask, 0);
   602 
   603     while (height--) {
   604 #define ONE_PIXEL_BLEND(condition, widthvar) \
   605         if (copy_alpha) { \
   606             while (condition) { \
   607                 Uint32 Pixel; \
   608                 unsigned sR, sG, sB, sA; \
   609                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   610                           sR, sG, sB, sA); \
   611                 if ( (Pixel & rgbmask) != ckey ) { \
   612                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   613                             sR, sG, sB, sA); \
   614                 } \
   615                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   616                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   617                 widthvar--; \
   618             } \
   619         } else { \
   620             while (condition) { \
   621                 Uint32 Pixel; \
   622                 unsigned sR, sG, sB; \
   623                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   624                 if ( Pixel != ckey ) { \
   625                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   626                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   627                               sR, sG, sB, alpha); \
   628                 } \
   629                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   630                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   631                 widthvar--; \
   632             } \
   633         }
   634         int width = info->dst_w;
   635         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   636         SDL_assert(width > 0);
   637         if (width > 0) {
   638             int extrawidth = (width % 4);
   639             vector unsigned char valigner = VEC_ALIGNER(srcp);
   640             vector unsigned int vs = vec_ld(0, srcp);
   641             width -= extrawidth;
   642             SDL_assert(width >= 4);
   643             while (width) {
   644                 vector unsigned char vsel;
   645                 vector unsigned int vd;
   646                 vector unsigned int voverflow = vec_ld(15, srcp);
   647                 /* load the source vec */
   648                 vs = vec_perm(vs, voverflow, valigner);
   649                 /* vsel is set for items that match the key */
   650                 vsel = (vector unsigned char) vec_and(vs, vrgbmask);
   651                 vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
   652                 /* permute the src vec to the dest format */
   653                 vs = vec_perm(vs, valpha, vpermute);
   654                 /* load the destination vec */
   655                 vd = vec_ld(0, dstp);
   656                 /* select the source and dest into vs */
   657                 vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
   658                                                    (vector unsigned char) vd,
   659                                                    vsel);
   660 
   661                 vec_st(vd, 0, dstp);
   662                 srcp += 4;
   663                 width -= 4;
   664                 dstp += 4;
   665                 vs = voverflow;
   666             }
   667             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   668 #undef ONE_PIXEL_BLEND
   669             srcp += srcskip;
   670             dstp += dstskip;
   671         }
   672     }
   673 }
   674 
   675 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   676 /* Use this on a G5 */
   677 static void
   678 ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
   679 {
   680     int height = info->dst_h;
   681     Uint32 *src = (Uint32 *) info->src;
   682     int srcskip = info->src_skip / 4;
   683     Uint32 *dst = (Uint32 *) info->dst;
   684     int dstskip = info->dst_skip / 4;
   685     SDL_PixelFormat *srcfmt = info->src_fmt;
   686     SDL_PixelFormat *dstfmt = info->dst_fmt;
   687     vector unsigned int vzero = vec_splat_u32(0);
   688     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   689     if (dstfmt->Amask && !srcfmt->Amask) {
   690         if (info->a) {
   691             vector unsigned char valpha;
   692             ((unsigned char *) &valpha)[0] = info->a;
   693             vzero = (vector unsigned int) vec_splat(valpha, 0);
   694         }
   695     }
   696 
   697     SDL_assert(srcfmt->BytesPerPixel == 4);
   698     SDL_assert(dstfmt->BytesPerPixel == 4);
   699 
   700     while (height--) {
   701         vector unsigned char valigner;
   702         vector unsigned int vbits;
   703         vector unsigned int voverflow;
   704         Uint32 bits;
   705         Uint8 r, g, b, a;
   706 
   707         int width = info->dst_w;
   708         int extrawidth;
   709 
   710         /* do scalar until we can align... */
   711         while ((UNALIGNED_PTR(dst)) && (width)) {
   712             bits = *(src++);
   713             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   714             if(!srcfmt->Amask)
   715               a = info->a;
   716             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   717             width--;
   718         }
   719 
   720         /* After all that work, here's the vector part! */
   721         extrawidth = (width % 4);
   722         width -= extrawidth;
   723         valigner = VEC_ALIGNER(src);
   724         vbits = vec_ld(0, src);
   725 
   726         while (width) {
   727             voverflow = vec_ld(15, src);
   728             src += 4;
   729             width -= 4;
   730             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   731             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   732             vec_st(vbits, 0, dst);      /* store it back out. */
   733             dst += 4;
   734             vbits = voverflow;
   735         }
   736 
   737         SDL_assert(width == 0);
   738 
   739         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   740         while (extrawidth) {
   741             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   742             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   743             if(!srcfmt->Amask)
   744               a = info->a;
   745             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   746             extrawidth--;
   747         }
   748 
   749         src += srcskip;
   750         dst += dstskip;
   751     }
   752 
   753 }
   754 
   755 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   756 /* Use this on a G4 */
   757 static void
   758 ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
   759 {
   760     const int scalar_dst_lead = sizeof(Uint32) * 4;
   761     const int vector_dst_lead = sizeof(Uint32) * 16;
   762 
   763     int height = info->dst_h;
   764     Uint32 *src = (Uint32 *) info->src;
   765     int srcskip = info->src_skip / 4;
   766     Uint32 *dst = (Uint32 *) info->dst;
   767     int dstskip = info->dst_skip / 4;
   768     SDL_PixelFormat *srcfmt = info->src_fmt;
   769     SDL_PixelFormat *dstfmt = info->dst_fmt;
   770     vector unsigned int vzero = vec_splat_u32(0);
   771     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   772     if (dstfmt->Amask && !srcfmt->Amask) {
   773         if (info->a) {
   774             vector unsigned char valpha;
   775             ((unsigned char *) &valpha)[0] = info->a;
   776             vzero = (vector unsigned int) vec_splat(valpha, 0);
   777         }
   778     }
   779 
   780     SDL_assert(srcfmt->BytesPerPixel == 4);
   781     SDL_assert(dstfmt->BytesPerPixel == 4);
   782 
   783     while (height--) {
   784         vector unsigned char valigner;
   785         vector unsigned int vbits;
   786         vector unsigned int voverflow;
   787         Uint32 bits;
   788         Uint8 r, g, b, a;
   789 
   790         int width = info->dst_w;
   791         int extrawidth;
   792 
   793         /* do scalar until we can align... */
   794         while ((UNALIGNED_PTR(dst)) && (width)) {
   795             vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   796                      DST_CHAN_SRC);
   797             vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   798                       DST_CHAN_DEST);
   799             bits = *(src++);
   800             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   801             if(!srcfmt->Amask)
   802               a = info->a;
   803             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   804             width--;
   805         }
   806 
   807         /* After all that work, here's the vector part! */
   808         extrawidth = (width % 4);
   809         width -= extrawidth;
   810         valigner = VEC_ALIGNER(src);
   811         vbits = vec_ld(0, src);
   812 
   813         while (width) {
   814             vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
   815                      DST_CHAN_SRC);
   816             vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
   817                       DST_CHAN_DEST);
   818             voverflow = vec_ld(15, src);
   819             src += 4;
   820             width -= 4;
   821             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   822             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   823             vec_st(vbits, 0, dst);      /* store it back out. */
   824             dst += 4;
   825             vbits = voverflow;
   826         }
   827 
   828         SDL_assert(width == 0);
   829 
   830         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   831         while (extrawidth) {
   832             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   833             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   834             if(!srcfmt->Amask)
   835               a = info->a;
   836             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   837             extrawidth--;
   838         }
   839 
   840         src += srcskip;
   841         dst += dstskip;
   842     }
   843 
   844     vec_dss(DST_CHAN_SRC);
   845     vec_dss(DST_CHAN_DEST);
   846 }
   847 
   848 static Uint32
   849 GetBlitFeatures(void)
   850 {
   851     static Uint32 features = 0xffffffff;
   852     if (features == 0xffffffff) {
   853         /* Provide an override for testing .. */
   854         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   855         if (override) {
   856             features = 0;
   857             SDL_sscanf(override, "%u", &features);
   858         } else {
   859             features = (0
   860                         /* Feature 1 is has-MMX */
   861                         | ((SDL_HasMMX())? 1 : 0)
   862                         /* Feature 2 is has-AltiVec */
   863                         | ((SDL_HasAltiVec())? 2 : 0)
   864                         /* Feature 4 is dont-use-prefetch */
   865                         /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   866                         | ((GetL3CacheSize() == 0) ? 4 : 0)
   867                 );
   868         }
   869     }
   870     return features;
   871 }
   872 
   873 #if __MWERKS__
   874 #pragma altivec_model off
   875 #endif
   876 #else
   877 /* Feature 1 is has-MMX */
   878 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   879 #endif
   880 
   881 /* This is now endian dependent */
   882 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   883 #define HI  1
   884 #define LO  0
   885 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
   886 #define HI  0
   887 #define LO  1
   888 #endif
   889 
   890 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   891 #define RGB888_RGB332(dst, src) { \
   892     dst = (Uint8)((((src)&0x00E00000)>>16)| \
   893                   (((src)&0x0000E000)>>11)| \
   894                   (((src)&0x000000C0)>>6)); \
   895 }
   896 static void
   897 Blit_RGB888_index8(SDL_BlitInfo * info)
   898 {
   899 #ifndef USE_DUFFS_LOOP
   900     int c;
   901 #endif
   902     int width, height;
   903     Uint32 *src;
   904     const Uint8 *map;
   905     Uint8 *dst;
   906     int srcskip, dstskip;
   907 
   908     /* Set up some basic variables */
   909     width = info->dst_w;
   910     height = info->dst_h;
   911     src = (Uint32 *) info->src;
   912     srcskip = info->src_skip / 4;
   913     dst = info->dst;
   914     dstskip = info->dst_skip;
   915     map = info->table;
   916 
   917     if (map == NULL) {
   918         while (height--) {
   919 #ifdef USE_DUFFS_LOOP
   920             /* *INDENT-OFF* */
   921             DUFFS_LOOP(
   922                 RGB888_RGB332(*dst++, *src);
   923             , width);
   924             /* *INDENT-ON* */
   925 #else
   926             for (c = width / 4; c; --c) {
   927                 /* Pack RGB into 8bit pixel */
   928                 ++src;
   929                 RGB888_RGB332(*dst++, *src);
   930                 ++src;
   931                 RGB888_RGB332(*dst++, *src);
   932                 ++src;
   933                 RGB888_RGB332(*dst++, *src);
   934                 ++src;
   935             }
   936             switch (width & 3) {
   937             case 3:
   938                 RGB888_RGB332(*dst++, *src);
   939                 ++src;
   940             case 2:
   941                 RGB888_RGB332(*dst++, *src);
   942                 ++src;
   943             case 1:
   944                 RGB888_RGB332(*dst++, *src);
   945                 ++src;
   946             }
   947 #endif /* USE_DUFFS_LOOP */
   948             src += srcskip;
   949             dst += dstskip;
   950         }
   951     } else {
   952         int Pixel;
   953 
   954         while (height--) {
   955 #ifdef USE_DUFFS_LOOP
   956             /* *INDENT-OFF* */
   957             DUFFS_LOOP(
   958                 RGB888_RGB332(Pixel, *src);
   959                 *dst++ = map[Pixel];
   960                 ++src;
   961             , width);
   962             /* *INDENT-ON* */
   963 #else
   964             for (c = width / 4; c; --c) {
   965                 /* Pack RGB into 8bit pixel */
   966                 RGB888_RGB332(Pixel, *src);
   967                 *dst++ = map[Pixel];
   968                 ++src;
   969                 RGB888_RGB332(Pixel, *src);
   970                 *dst++ = map[Pixel];
   971                 ++src;
   972                 RGB888_RGB332(Pixel, *src);
   973                 *dst++ = map[Pixel];
   974                 ++src;
   975                 RGB888_RGB332(Pixel, *src);
   976                 *dst++ = map[Pixel];
   977                 ++src;
   978             }
   979             switch (width & 3) {
   980             case 3:
   981                 RGB888_RGB332(Pixel, *src);
   982                 *dst++ = map[Pixel];
   983                 ++src;
   984             case 2:
   985                 RGB888_RGB332(Pixel, *src);
   986                 *dst++ = map[Pixel];
   987                 ++src;
   988             case 1:
   989                 RGB888_RGB332(Pixel, *src);
   990                 *dst++ = map[Pixel];
   991                 ++src;
   992             }
   993 #endif /* USE_DUFFS_LOOP */
   994             src += srcskip;
   995             dst += dstskip;
   996         }
   997     }
   998 }
   999 
  1000 /* Special optimized blit for RGB 10-10-10 --> RGB 3-3-2 */
  1001 #define RGB101010_RGB332(dst, src) { \
  1002     dst = (Uint8)((((src)&0x38000000)>>22)| \
  1003                   (((src)&0x000E0000)>>15)| \
  1004                   (((src)&0x00000300)>>8)); \
  1005 }
  1006 static void
  1007 Blit_RGB101010_index8(SDL_BlitInfo * info)
  1008 {
  1009 #ifndef USE_DUFFS_LOOP
  1010     int c;
  1011 #endif
  1012     int width, height;
  1013     Uint32 *src;
  1014     const Uint8 *map;
  1015     Uint8 *dst;
  1016     int srcskip, dstskip;
  1017 
  1018     /* Set up some basic variables */
  1019     width = info->dst_w;
  1020     height = info->dst_h;
  1021     src = (Uint32 *) info->src;
  1022     srcskip = info->src_skip / 4;
  1023     dst = info->dst;
  1024     dstskip = info->dst_skip;
  1025     map = info->table;
  1026 
  1027     if (map == NULL) {
  1028         while (height--) {
  1029 #ifdef USE_DUFFS_LOOP
  1030             /* *INDENT-OFF* */
  1031             DUFFS_LOOP(
  1032                 RGB101010_RGB332(*dst++, *src);
  1033             , width);
  1034             /* *INDENT-ON* */
  1035 #else
  1036             for (c = width / 4; c; --c) {
  1037                 /* Pack RGB into 8bit pixel */
  1038                 ++src;
  1039                 RGB101010_RGB332(*dst++, *src);
  1040                 ++src;
  1041                 RGB101010_RGB332(*dst++, *src);
  1042                 ++src;
  1043                 RGB101010_RGB332(*dst++, *src);
  1044                 ++src;
  1045             }
  1046             switch (width & 3) {
  1047             case 3:
  1048                 RGB101010_RGB332(*dst++, *src);
  1049                 ++src;
  1050             case 2:
  1051                 RGB101010_RGB332(*dst++, *src);
  1052                 ++src;
  1053             case 1:
  1054                 RGB101010_RGB332(*dst++, *src);
  1055                 ++src;
  1056             }
  1057 #endif /* USE_DUFFS_LOOP */
  1058             src += srcskip;
  1059             dst += dstskip;
  1060         }
  1061     } else {
  1062         int Pixel;
  1063 
  1064         while (height--) {
  1065 #ifdef USE_DUFFS_LOOP
  1066             /* *INDENT-OFF* */
  1067             DUFFS_LOOP(
  1068                 RGB101010_RGB332(Pixel, *src);
  1069                 *dst++ = map[Pixel];
  1070                 ++src;
  1071             , width);
  1072             /* *INDENT-ON* */
  1073 #else
  1074             for (c = width / 4; c; --c) {
  1075                 /* Pack RGB into 8bit pixel */
  1076                 RGB101010_RGB332(Pixel, *src);
  1077                 *dst++ = map[Pixel];
  1078                 ++src;
  1079                 RGB101010_RGB332(Pixel, *src);
  1080                 *dst++ = map[Pixel];
  1081                 ++src;
  1082                 RGB101010_RGB332(Pixel, *src);
  1083                 *dst++ = map[Pixel];
  1084                 ++src;
  1085                 RGB101010_RGB332(Pixel, *src);
  1086                 *dst++ = map[Pixel];
  1087                 ++src;
  1088             }
  1089             switch (width & 3) {
  1090             case 3:
  1091                 RGB101010_RGB332(Pixel, *src);
  1092                 *dst++ = map[Pixel];
  1093                 ++src;
  1094             case 2:
  1095                 RGB101010_RGB332(Pixel, *src);
  1096                 *dst++ = map[Pixel];
  1097                 ++src;
  1098             case 1:
  1099                 RGB101010_RGB332(Pixel, *src);
  1100                 *dst++ = map[Pixel];
  1101                 ++src;
  1102             }
  1103 #endif /* USE_DUFFS_LOOP */
  1104             src += srcskip;
  1105             dst += dstskip;
  1106         }
  1107     }
  1108 }
  1109 
  1110 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
  1111 #define RGB888_RGB555(dst, src) { \
  1112     *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
  1113                                 (((*src)&0x0000F800)>>6)| \
  1114                                 (((*src)&0x000000F8)>>3)); \
  1115 }
  1116 #define RGB888_RGB555_TWO(dst, src) { \
  1117     *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
  1118                          (((src[HI])&0x0000F800)>>6)| \
  1119                          (((src[HI])&0x000000F8)>>3))<<16)| \
  1120                          (((src[LO])&0x00F80000)>>9)| \
  1121                          (((src[LO])&0x0000F800)>>6)| \
  1122                          (((src[LO])&0x000000F8)>>3); \
  1123 }
  1124 static void
  1125 Blit_RGB888_RGB555(SDL_BlitInfo * info)
  1126 {
  1127 #ifndef USE_DUFFS_LOOP
  1128     int c;
  1129 #endif
  1130     int width, height;
  1131     Uint32 *src;
  1132     Uint16 *dst;
  1133     int srcskip, dstskip;
  1134 
  1135     /* Set up some basic variables */
  1136     width = info->dst_w;
  1137     height = info->dst_h;
  1138     src = (Uint32 *) info->src;
  1139     srcskip = info->src_skip / 4;
  1140     dst = (Uint16 *) info->dst;
  1141     dstskip = info->dst_skip / 2;
  1142 
  1143 #ifdef USE_DUFFS_LOOP
  1144     while (height--) {
  1145         /* *INDENT-OFF* */
  1146         DUFFS_LOOP(
  1147             RGB888_RGB555(dst, src);
  1148             ++src;
  1149             ++dst;
  1150         , width);
  1151         /* *INDENT-ON* */
  1152         src += srcskip;
  1153         dst += dstskip;
  1154     }
  1155 #else
  1156     /* Memory align at 4-byte boundary, if necessary */
  1157     if ((long) dst & 0x03) {
  1158         /* Don't do anything if width is 0 */
  1159         if (width == 0) {
  1160             return;
  1161         }
  1162         --width;
  1163 
  1164         while (height--) {
  1165             /* Perform copy alignment */
  1166             RGB888_RGB555(dst, src);
  1167             ++src;
  1168             ++dst;
  1169 
  1170             /* Copy in 4 pixel chunks */
  1171             for (c = width / 4; c; --c) {
  1172                 RGB888_RGB555_TWO(dst, src);
  1173                 src += 2;
  1174                 dst += 2;
  1175                 RGB888_RGB555_TWO(dst, src);
  1176                 src += 2;
  1177                 dst += 2;
  1178             }
  1179             /* Get any leftovers */
  1180             switch (width & 3) {
  1181             case 3:
  1182                 RGB888_RGB555(dst, src);
  1183                 ++src;
  1184                 ++dst;
  1185             case 2:
  1186                 RGB888_RGB555_TWO(dst, src);
  1187                 src += 2;
  1188                 dst += 2;
  1189                 break;
  1190             case 1:
  1191                 RGB888_RGB555(dst, src);
  1192                 ++src;
  1193                 ++dst;
  1194                 break;
  1195             }
  1196             src += srcskip;
  1197             dst += dstskip;
  1198         }
  1199     } else {
  1200         while (height--) {
  1201             /* Copy in 4 pixel chunks */
  1202             for (c = width / 4; c; --c) {
  1203                 RGB888_RGB555_TWO(dst, src);
  1204                 src += 2;
  1205                 dst += 2;
  1206                 RGB888_RGB555_TWO(dst, src);
  1207                 src += 2;
  1208                 dst += 2;
  1209             }
  1210             /* Get any leftovers */
  1211             switch (width & 3) {
  1212             case 3:
  1213                 RGB888_RGB555(dst, src);
  1214                 ++src;
  1215                 ++dst;
  1216             case 2:
  1217                 RGB888_RGB555_TWO(dst, src);
  1218                 src += 2;
  1219                 dst += 2;
  1220                 break;
  1221             case 1:
  1222                 RGB888_RGB555(dst, src);
  1223                 ++src;
  1224                 ++dst;
  1225                 break;
  1226             }
  1227             src += srcskip;
  1228             dst += dstskip;
  1229         }
  1230     }
  1231 #endif /* USE_DUFFS_LOOP */
  1232 }
  1233 
  1234 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1235 #define RGB888_RGB565(dst, src) { \
  1236     *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
  1237                                 (((*src)&0x0000FC00)>>5)| \
  1238                                 (((*src)&0x000000F8)>>3)); \
  1239 }
  1240 #define RGB888_RGB565_TWO(dst, src) { \
  1241     *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1242                          (((src[HI])&0x0000FC00)>>5)| \
  1243                          (((src[HI])&0x000000F8)>>3))<<16)| \
  1244                          (((src[LO])&0x00F80000)>>8)| \
  1245                          (((src[LO])&0x0000FC00)>>5)| \
  1246                          (((src[LO])&0x000000F8)>>3); \
  1247 }
  1248 static void
  1249 Blit_RGB888_RGB565(SDL_BlitInfo * info)
  1250 {
  1251 #ifndef USE_DUFFS_LOOP
  1252     int c;
  1253 #endif
  1254     int width, height;
  1255     Uint32 *src;
  1256     Uint16 *dst;
  1257     int srcskip, dstskip;
  1258 
  1259     /* Set up some basic variables */
  1260     width = info->dst_w;
  1261     height = info->dst_h;
  1262     src = (Uint32 *) info->src;
  1263     srcskip = info->src_skip / 4;
  1264     dst = (Uint16 *) info->dst;
  1265     dstskip = info->dst_skip / 2;
  1266 
  1267 #ifdef USE_DUFFS_LOOP
  1268     while (height--) {
  1269         /* *INDENT-OFF* */
  1270         DUFFS_LOOP(
  1271             RGB888_RGB565(dst, src);
  1272             ++src;
  1273             ++dst;
  1274         , width);
  1275         /* *INDENT-ON* */
  1276         src += srcskip;
  1277         dst += dstskip;
  1278     }
  1279 #else
  1280     /* Memory align at 4-byte boundary, if necessary */
  1281     if ((long) dst & 0x03) {
  1282         /* Don't do anything if width is 0 */
  1283         if (width == 0) {
  1284             return;
  1285         }
  1286         --width;
  1287 
  1288         while (height--) {
  1289             /* Perform copy alignment */
  1290             RGB888_RGB565(dst, src);
  1291             ++src;
  1292             ++dst;
  1293 
  1294             /* Copy in 4 pixel chunks */
  1295             for (c = width / 4; c; --c) {
  1296                 RGB888_RGB565_TWO(dst, src);
  1297                 src += 2;
  1298                 dst += 2;
  1299                 RGB888_RGB565_TWO(dst, src);
  1300                 src += 2;
  1301                 dst += 2;
  1302             }
  1303             /* Get any leftovers */
  1304             switch (width & 3) {
  1305             case 3:
  1306                 RGB888_RGB565(dst, src);
  1307                 ++src;
  1308                 ++dst;
  1309             case 2:
  1310                 RGB888_RGB565_TWO(dst, src);
  1311                 src += 2;
  1312                 dst += 2;
  1313                 break;
  1314             case 1:
  1315                 RGB888_RGB565(dst, src);
  1316                 ++src;
  1317                 ++dst;
  1318                 break;
  1319             }
  1320             src += srcskip;
  1321             dst += dstskip;
  1322         }
  1323     } else {
  1324         while (height--) {
  1325             /* Copy in 4 pixel chunks */
  1326             for (c = width / 4; c; --c) {
  1327                 RGB888_RGB565_TWO(dst, src);
  1328                 src += 2;
  1329                 dst += 2;
  1330                 RGB888_RGB565_TWO(dst, src);
  1331                 src += 2;
  1332                 dst += 2;
  1333             }
  1334             /* Get any leftovers */
  1335             switch (width & 3) {
  1336             case 3:
  1337                 RGB888_RGB565(dst, src);
  1338                 ++src;
  1339                 ++dst;
  1340             case 2:
  1341                 RGB888_RGB565_TWO(dst, src);
  1342                 src += 2;
  1343                 dst += 2;
  1344                 break;
  1345             case 1:
  1346                 RGB888_RGB565(dst, src);
  1347                 ++src;
  1348                 ++dst;
  1349                 break;
  1350             }
  1351             src += srcskip;
  1352             dst += dstskip;
  1353         }
  1354     }
  1355 #endif /* USE_DUFFS_LOOP */
  1356 }
  1357 
  1358 
  1359 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1360 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1361 static void
  1362 Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
  1363 {
  1364 #ifndef USE_DUFFS_LOOP
  1365     int c;
  1366 #endif
  1367     int width, height;
  1368     Uint8 *src;
  1369     Uint32 *dst;
  1370     int srcskip, dstskip;
  1371 
  1372     /* Set up some basic variables */
  1373     width = info->dst_w;
  1374     height = info->dst_h;
  1375     src = (Uint8 *) info->src;
  1376     srcskip = info->src_skip;
  1377     dst = (Uint32 *) info->dst;
  1378     dstskip = info->dst_skip / 4;
  1379 
  1380 #ifdef USE_DUFFS_LOOP
  1381     while (height--) {
  1382         /* *INDENT-OFF* */
  1383         DUFFS_LOOP(
  1384         {
  1385             *dst++ = RGB565_32(dst, src, map);
  1386             src += 2;
  1387         },
  1388         width);
  1389         /* *INDENT-ON* */
  1390         src += srcskip;
  1391         dst += dstskip;
  1392     }
  1393 #else
  1394     while (height--) {
  1395         /* Copy in 4 pixel chunks */
  1396         for (c = width / 4; c; --c) {
  1397             *dst++ = RGB565_32(dst, src, map);
  1398             src += 2;
  1399             *dst++ = RGB565_32(dst, src, map);
  1400             src += 2;
  1401             *dst++ = RGB565_32(dst, src, map);
  1402             src += 2;
  1403             *dst++ = RGB565_32(dst, src, map);
  1404             src += 2;
  1405         }
  1406         /* Get any leftovers */
  1407         switch (width & 3) {
  1408         case 3:
  1409             *dst++ = RGB565_32(dst, src, map);
  1410             src += 2;
  1411         case 2:
  1412             *dst++ = RGB565_32(dst, src, map);
  1413             src += 2;
  1414         case 1:
  1415             *dst++ = RGB565_32(dst, src, map);
  1416             src += 2;
  1417             break;
  1418         }
  1419         src += srcskip;
  1420         dst += dstskip;
  1421     }
  1422 #endif /* USE_DUFFS_LOOP */
  1423 }
  1424 
  1425 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1426 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1427     0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1428     0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1429     0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1430     0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1431     0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1432     0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1433     0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1434     0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1435     0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1436     0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1437     0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1438     0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1439     0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1440     0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1441     0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1442     0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1443     0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1444     0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1445     0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1446     0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1447     0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1448     0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1449     0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1450     0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1451     0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1452     0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1453     0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1454     0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1455     0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1456     0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1457     0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1458     0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1459     0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1460     0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1461     0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1462     0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1463     0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1464     0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1465     0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1466     0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1467     0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1468     0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1469     0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1470     0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1471     0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1472     0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1473     0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1474     0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1475     0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1476     0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1477     0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1478     0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1479     0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1480     0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1481     0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1482     0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1483     0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1484     0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1485     0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1486     0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1487     0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1488     0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1489     0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1490     0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1491     0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1492     0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1493     0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1494     0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1495     0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1496     0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1497     0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1498     0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1499     0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1500     0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1501     0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1502     0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1503     0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1504     0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1505     0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1506     0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1507     0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1508     0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1509     0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1510     0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1511     0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1512     0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1513     0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1514     0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1515     0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1516     0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1517     0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1518     0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1519     0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1520     0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1521     0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1522     0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1523     0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1524     0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1525     0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1526     0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1527     0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1528     0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1529     0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1530     0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1531     0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1532     0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1533     0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1534     0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1535     0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1536     0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1537     0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1538     0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1539     0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1540     0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1541     0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1542     0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1543     0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1544     0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1545     0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1546     0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1547     0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1548     0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1549     0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1550     0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1551     0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1552     0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1553     0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1554     0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1555 };
  1556 
  1557 static void
  1558 Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
  1559 {
  1560     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1561 }
  1562 
  1563 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1564 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1565     0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1566     0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1567     0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1568     0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1569     0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1570     0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1571     0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1572     0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1573     0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1574     0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1575     0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1576     0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1577     0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1578     0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1579     0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1580     0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1581     0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1582     0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1583     0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1584     0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1585     0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1586     0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1587     0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1588     0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1589     0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1590     0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1591     0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1592     0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1593     0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1594     0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1595     0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1596     0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1597     0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1598     0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1599     0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1600     0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1601     0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1602     0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1603     0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1604     0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1605     0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1606     0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1607     0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1608     0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1609     0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1610     0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1611     0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1612     0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1613     0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1614     0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1615     0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1616     0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1617     0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1618     0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1619     0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1620     0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1621     0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1622     0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1623     0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1624     0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1625     0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1626     0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1627     0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1628     0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1629     0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1630     0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1631     0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1632     0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1633     0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1634     0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1635     0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1636     0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1637     0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1638     0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1639     0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1640     0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1641     0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1642     0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1643     0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1644     0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1645     0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1646     0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1647     0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1648     0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1649     0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1650     0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1651     0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1652     0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1653     0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1654     0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1655     0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1656     0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1657     0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1658     0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1659     0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1660     0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1661     0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1662     0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1663     0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1664     0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1665     0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1666     0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1667     0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1668     0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1669     0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1670     0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1671     0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1672     0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1673     0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1674     0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1675     0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1676     0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1677     0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1678     0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1679     0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1680     0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1681     0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1682     0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1683     0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1684     0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1685     0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1686     0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1687     0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1688     0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1689     0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1690     0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1691     0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1692     0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1693 };
  1694 
  1695 static void
  1696 Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
  1697 {
  1698     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1699 }
  1700 
  1701 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1702 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1703     0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1704     0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1705     0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1706     0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1707     0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1708     0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1709     0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1710     0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1711     0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1712     0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1713     0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1714     0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1715     0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1716     0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1717     0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1718     0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1719     0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1720     0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1721     0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1722     0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1723     0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1724     0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1725     0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1726     0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1727     0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1728     0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1729     0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1730     0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1731     0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1732     0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1733     0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1734     0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1735     0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1736     0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1737     0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1738     0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1739     0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1740     0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1741     0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1742     0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1743     0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1744     0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1745     0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1746     0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1747     0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1748     0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1749     0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1750     0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1751     0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1752     0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1753     0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1754     0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1755     0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1756     0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1757     0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1758     0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1759     0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1760     0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1761     0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1762     0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1763     0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1764     0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1765     0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1766     0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1767     0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1768     0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1769     0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1770     0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1771     0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1772     0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1773     0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1774     0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1775     0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1776     0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1777     0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1778     0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1779     0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1780     0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1781     0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1782     0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1783     0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1784     0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1785     0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1786     0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1787     0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1788     0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1789     0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1790     0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1791     0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1792     0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1793     0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1794     0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1795     0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1796     0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1797     0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1798     0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1799     0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1800     0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1801     0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1802     0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1803     0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1804     0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1805     0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1806     0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1807     0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1808     0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1809     0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1810     0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1811     0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1812     0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1813     0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1814     0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1815     0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1816     0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1817     0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1818     0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1819     0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1820     0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1821     0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1822     0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1823     0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1824     0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1825     0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1826     0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1827     0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1828     0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1829     0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1830     0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1831 };
  1832 
  1833 static void
  1834 Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
  1835 {
  1836     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1837 }
  1838 
  1839 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1840 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1841     0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1842     0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1843     0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1844     0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1845     0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1846     0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1847     0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1848     0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1849     0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1850     0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1851     0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1852     0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1853     0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1854     0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1855     0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1856     0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1857     0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1858     0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1859     0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1860     0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1861     0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1862     0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1863     0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1864     0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1865     0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1866     0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1867     0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1868     0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1869     0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1870     0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1871     0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1872     0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1873     0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1874     0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1875     0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1876     0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1877     0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1878     0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1879     0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1880     0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1881     0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1882     0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1883     0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1884     0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1885     0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1886     0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1887     0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1888     0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1889     0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1890     0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1891     0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1892     0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1893     0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1894     0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1895     0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1896     0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1897     0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1898     0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1899     0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1900     0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1901     0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1902     0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1903     0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1904     0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1905     0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1906     0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1907     0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1908     0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1909     0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1910     0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1911     0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1912     0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1913     0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1914     0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1915     0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1916     0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1917     0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1918     0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1919     0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1920     0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1921     0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1922     0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1923     0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1924     0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1925     0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1926     0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1927     0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1928     0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1929     0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1930     0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1931     0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1932     0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1933     0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1934     0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1935     0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1936     0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1937     0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1938     0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1939     0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1940     0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1941     0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1942     0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1943     0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1944     0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1945     0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1946     0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1947     0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1948     0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1949     0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1950     0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1951     0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1952     0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1953     0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1954     0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1955     0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1956     0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1957     0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1958     0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1959     0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1960     0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1961     0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1962     0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1963     0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1964     0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1965     0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1966     0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1967     0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1968     0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1969 };
  1970 
  1971 static void
  1972 Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
  1973 {
  1974     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1975 }
  1976 
  1977 static void
  1978 BlitNto1(SDL_BlitInfo * info)
  1979 {
  1980 #ifndef USE_DUFFS_LOOP
  1981     int c;
  1982 #endif
  1983     int width, height;
  1984     Uint8 *src;
  1985     const Uint8 *map;
  1986     Uint8 *dst;
  1987     int srcskip, dstskip;
  1988     int srcbpp;
  1989     Uint32 Pixel;
  1990     int sR, sG, sB;
  1991     SDL_PixelFormat *srcfmt;
  1992 
  1993     /* Set up some basic variables */
  1994     width = info->dst_w;
  1995     height = info->dst_h;
  1996     src = info->src;
  1997     srcskip = info->src_skip;
  1998     dst = info->dst;
  1999     dstskip = info->dst_skip;
  2000     map = info->table;
  2001     srcfmt = info->src_fmt;
  2002     srcbpp = srcfmt->BytesPerPixel;
  2003 
  2004     if (map == NULL) {
  2005         while (height--) {
  2006 #ifdef USE_DUFFS_LOOP
  2007             /* *INDENT-OFF* */
  2008             DUFFS_LOOP(
  2009                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2010                                 sR, sG, sB);
  2011                 if ( 1 ) {
  2012                     /* Pack RGB into 8bit pixel */
  2013                     *dst = ((sR>>5)<<(3+2))|
  2014                             ((sG>>5)<<(2)) |
  2015                             ((sB>>6)<<(0)) ;
  2016                 }
  2017                 dst++;
  2018                 src += srcbpp;
  2019             , width);
  2020             /* *INDENT-ON* */
  2021 #else
  2022             for (c = width; c; --c) {
  2023                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2024                 if (1) {
  2025                     /* Pack RGB into 8bit pixel */
  2026                     *dst = ((sR >> 5) << (3 + 2)) |
  2027                         ((sG >> 5) << (2)) | ((sB >> 6) << (0));
  2028                 }
  2029                 dst++;
  2030                 src += srcbpp;
  2031             }
  2032 #endif
  2033             src += srcskip;
  2034             dst += dstskip;
  2035         }
  2036     } else {
  2037         while (height--) {
  2038 #ifdef USE_DUFFS_LOOP
  2039             /* *INDENT-OFF* */
  2040             DUFFS_LOOP(
  2041                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2042                                 sR, sG, sB);
  2043                 if ( 1 ) {
  2044                     /* Pack RGB into 8bit pixel */
  2045                     *dst = map[((sR>>5)<<(3+2))|
  2046                            ((sG>>5)<<(2))  |
  2047                            ((sB>>6)<<(0))  ];
  2048                 }
  2049                 dst++;
  2050                 src += srcbpp;
  2051             , width);
  2052             /* *INDENT-ON* */
  2053 #else
  2054             for (c = width; c; --c) {
  2055                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2056                 if (1) {
  2057                     /* Pack RGB into 8bit pixel */
  2058                     *dst = map[((sR >> 5) << (3 + 2)) |
  2059                                ((sG >> 5) << (2)) | ((sB >> 6) << (0))];
  2060                 }
  2061                 dst++;
  2062                 src += srcbpp;
  2063             }
  2064 #endif /* USE_DUFFS_LOOP */
  2065             src += srcskip;
  2066             dst += dstskip;
  2067         }
  2068     }
  2069 }
  2070 
  2071 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  2072 static void
  2073 Blit4to4MaskAlpha(SDL_BlitInfo * info)
  2074 {
  2075     int width = info->dst_w;
  2076     int height = info->dst_h;
  2077     Uint32 *src = (Uint32 *) info->src;
  2078     int srcskip = info->src_skip;
  2079     Uint32 *dst = (Uint32 *) info->dst;
  2080     int dstskip = info->dst_skip;
  2081     SDL_PixelFormat *srcfmt = info->src_fmt;
  2082     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2083 
  2084     if (dstfmt->Amask) {
  2085         /* RGB->RGBA, SET_ALPHA */
  2086         Uint32 mask = (info->a >> dstfmt->Aloss) << dstfmt->Ashift;
  2087 
  2088         while (height--) {
  2089             /* *INDENT-OFF* */
  2090             DUFFS_LOOP(
  2091             {
  2092                 *dst = *src | mask;
  2093                 ++dst;
  2094                 ++src;
  2095             },
  2096             width);
  2097             /* *INDENT-ON* */
  2098             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2099             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2100         }
  2101     } else {
  2102         /* RGBA->RGB, NO_ALPHA */
  2103         Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2104 
  2105         while (height--) {
  2106             /* *INDENT-OFF* */
  2107             DUFFS_LOOP(
  2108             {
  2109                 *dst = *src & mask;
  2110                 ++dst;
  2111                 ++src;
  2112             },
  2113             width);
  2114             /* *INDENT-ON* */
  2115             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2116             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2117         }
  2118     }
  2119 }
  2120 
  2121 /* blits 32 bit RGBA<->RGBA with both surfaces having the same R,G,B,A fields */
  2122 static void
  2123 Blit4to4CopyAlpha(SDL_BlitInfo * info)
  2124 {
  2125     int width = info->dst_w;
  2126     int height = info->dst_h;
  2127     Uint32 *src = (Uint32 *) info->src;
  2128     int srcskip = info->src_skip;
  2129     Uint32 *dst = (Uint32 *) info->dst;
  2130     int dstskip = info->dst_skip;
  2131 
  2132     /* RGBA->RGBA, COPY_ALPHA */
  2133     while (height--) {
  2134         /* *INDENT-OFF* */
  2135         DUFFS_LOOP(
  2136         {
  2137             *dst = *src;
  2138             ++dst;
  2139             ++src;
  2140         },
  2141         width);
  2142         /* *INDENT-ON* */
  2143         src = (Uint32 *) ((Uint8 *) src + srcskip);
  2144         dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2145     }
  2146 }
  2147 
  2148 static void
  2149 BlitNtoN(SDL_BlitInfo * info)
  2150 {
  2151     int width = info->dst_w;
  2152     int height = info->dst_h;
  2153     Uint8 *src = info->src;
  2154     int srcskip = info->src_skip;
  2155     Uint8 *dst = info->dst;
  2156     int dstskip = info->dst_skip;
  2157     SDL_PixelFormat *srcfmt = info->src_fmt;
  2158     int srcbpp = srcfmt->BytesPerPixel;
  2159     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2160     int dstbpp = dstfmt->BytesPerPixel;
  2161     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2162 
  2163     while (height--) {
  2164         /* *INDENT-OFF* */
  2165         DUFFS_LOOP(
  2166         {
  2167             Uint32 Pixel;
  2168             unsigned sR;
  2169             unsigned sG;
  2170             unsigned sB;
  2171             DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2172             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2173             dst += dstbpp;
  2174             src += srcbpp;
  2175         },
  2176         width);
  2177         /* *INDENT-ON* */
  2178         src += srcskip;
  2179         dst += dstskip;
  2180     }
  2181 }
  2182 
  2183 static void
  2184 BlitNtoNCopyAlpha(SDL_BlitInfo * info)
  2185 {
  2186     int width = info->dst_w;
  2187     int height = info->dst_h;
  2188     Uint8 *src = info->src;
  2189     int srcskip = info->src_skip;
  2190     Uint8 *dst = info->dst;
  2191     int dstskip = info->dst_skip;
  2192     SDL_PixelFormat *srcfmt = info->src_fmt;
  2193     int srcbpp = srcfmt->BytesPerPixel;
  2194     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2195     int dstbpp = dstfmt->BytesPerPixel;
  2196     int c;
  2197 
  2198     while (height--) {
  2199         for (c = width; c; --c) {
  2200             Uint32 Pixel;
  2201             unsigned sR, sG, sB, sA;
  2202             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2203             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2204             dst += dstbpp;
  2205             src += srcbpp;
  2206         }
  2207         src += srcskip;
  2208         dst += dstskip;
  2209     }
  2210 }
  2211 
  2212 static void
  2213 BlitNto1Key(SDL_BlitInfo * info)
  2214 {
  2215     int width = info->dst_w;
  2216     int height = info->dst_h;
  2217     Uint8 *src = info->src;
  2218     int srcskip = info->src_skip;
  2219     Uint8 *dst = info->dst;
  2220     int dstskip = info->dst_skip;
  2221     SDL_PixelFormat *srcfmt = info->src_fmt;
  2222     const Uint8 *palmap = info->table;
  2223     Uint32 ckey = info->colorkey;
  2224     Uint32 rgbmask = ~srcfmt->Amask;
  2225     int srcbpp;
  2226     Uint32 Pixel;
  2227     unsigned sR, sG, sB;
  2228 
  2229     /* Set up some basic variables */
  2230     srcbpp = srcfmt->BytesPerPixel;
  2231     ckey &= rgbmask;
  2232 
  2233     if (palmap == NULL) {
  2234         while (height--) {
  2235             /* *INDENT-OFF* */
  2236             DUFFS_LOOP(
  2237             {
  2238                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2239                                 sR, sG, sB);
  2240                 if ( (Pixel & rgbmask) != ckey ) {
  2241                     /* Pack RGB into 8bit pixel */
  2242                     *dst = (Uint8)(((sR>>5)<<(3+2))|
  2243                                    ((sG>>5)<<(2)) |
  2244                                    ((sB>>6)<<(0)));
  2245                 }
  2246                 dst++;
  2247                 src += srcbpp;
  2248             },
  2249             width);
  2250             /* *INDENT-ON* */
  2251             src += srcskip;
  2252             dst += dstskip;
  2253         }
  2254     } else {
  2255         while (height--) {
  2256             /* *INDENT-OFF* */
  2257             DUFFS_LOOP(
  2258             {
  2259                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2260                                 sR, sG, sB);
  2261                 if ( (Pixel & rgbmask) != ckey ) {
  2262                     /* Pack RGB into 8bit pixel */
  2263                     *dst = (Uint8)palmap[((sR>>5)<<(3+2))|
  2264                                          ((sG>>5)<<(2))  |
  2265                                          ((sB>>6)<<(0))  ];
  2266                 }
  2267                 dst++;
  2268                 src += srcbpp;
  2269             },
  2270             width);
  2271             /* *INDENT-ON* */
  2272             src += srcskip;
  2273             dst += dstskip;
  2274         }
  2275     }
  2276 }
  2277 
  2278 static void
  2279 Blit2to2Key(SDL_BlitInfo * info)
  2280 {
  2281     int width = info->dst_w;
  2282     int height = info->dst_h;
  2283     Uint16 *srcp = (Uint16 *) info->src;
  2284     int srcskip = info->src_skip;
  2285     Uint16 *dstp = (Uint16 *) info->dst;
  2286     int dstskip = info->dst_skip;
  2287     Uint32 ckey = info->colorkey;
  2288     Uint32 rgbmask = ~info->src_fmt->Amask;
  2289 
  2290     /* Set up some basic variables */
  2291     srcskip /= 2;
  2292     dstskip /= 2;
  2293     ckey &= rgbmask;
  2294 
  2295     while (height--) {
  2296         /* *INDENT-OFF* */
  2297         DUFFS_LOOP(
  2298         {
  2299             if ( (*srcp & rgbmask) != ckey ) {
  2300                 *dstp = *srcp;
  2301             }
  2302             dstp++;
  2303             srcp++;
  2304         },
  2305         width);
  2306         /* *INDENT-ON* */
  2307         srcp += srcskip;
  2308         dstp += dstskip;
  2309     }
  2310 }
  2311 
  2312 static void
  2313 BlitNtoNKey(SDL_BlitInfo * info)
  2314 {
  2315     int width = info->dst_w;
  2316     int height = info->dst_h;
  2317     Uint8 *src = info->src;
  2318     int srcskip = info->src_skip;
  2319     Uint8 *dst = info->dst;
  2320     int dstskip = info->dst_skip;
  2321     Uint32 ckey = info->colorkey;
  2322     SDL_PixelFormat *srcfmt = info->src_fmt;
  2323     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2324     int srcbpp = srcfmt->BytesPerPixel;
  2325     int dstbpp = dstfmt->BytesPerPixel;
  2326     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2327     Uint32 rgbmask = ~srcfmt->Amask;
  2328 
  2329     /* Set up some basic variables */
  2330     ckey &= rgbmask;
  2331 
  2332     while (height--) {
  2333         /* *INDENT-OFF* */
  2334         DUFFS_LOOP(
  2335         {
  2336             Uint32 Pixel;
  2337             unsigned sR;
  2338             unsigned sG;
  2339             unsigned sB;
  2340             RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2341             if ( (Pixel & rgbmask) != ckey ) {
  2342                 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2343                 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2344             }
  2345             dst += dstbpp;
  2346             src += srcbpp;
  2347         },
  2348         width);
  2349         /* *INDENT-ON* */
  2350         src += srcskip;
  2351         dst += dstskip;
  2352     }
  2353 }
  2354 
  2355 static void
  2356 BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
  2357 {
  2358     int width = info->dst_w;
  2359     int height = info->dst_h;
  2360     Uint8 *src = info->src;
  2361     int srcskip = info->src_skip;
  2362     Uint8 *dst = info->dst;
  2363     int dstskip = info->dst_skip;
  2364     Uint32 ckey = info->colorkey;
  2365     SDL_PixelFormat *srcfmt = info->src_fmt;
  2366     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2367     Uint32 rgbmask = ~srcfmt->Amask;
  2368 
  2369     Uint8 srcbpp;
  2370     Uint8 dstbpp;
  2371     Uint32 Pixel;
  2372     unsigned sR, sG, sB, sA;
  2373 
  2374     /* Set up some basic variables */
  2375     srcbpp = srcfmt->BytesPerPixel;
  2376     dstbpp = dstfmt->BytesPerPixel;
  2377     ckey &= rgbmask;
  2378 
  2379     while (height--) {
  2380         /* *INDENT-OFF* */
  2381         DUFFS_LOOP(
  2382         {
  2383             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2384             if ( (Pixel & rgbmask) != ckey ) {
  2385                   ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2386             }
  2387             dst += dstbpp;
  2388             src += srcbpp;
  2389         },
  2390         width);
  2391         /* *INDENT-ON* */
  2392         src += srcskip;
  2393         dst += dstskip;
  2394     }
  2395 }
  2396 
  2397 /* Special optimized blit for ARGB 2-10-10-10 --> RGBA */
  2398 static void
  2399 Blit2101010toN(SDL_BlitInfo * info)
  2400 {
  2401     int width = info->dst_w;
  2402     int height = info->dst_h;
  2403     Uint8 *src = info->src;
  2404     int srcskip = info->src_skip;
  2405     Uint8 *dst = info->dst;
  2406     int dstskip = info->dst_skip;
  2407     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2408     int dstbpp = dstfmt->BytesPerPixel;
  2409     Uint32 Pixel;
  2410     unsigned sR, sG, sB, sA;
  2411 
  2412     while (height--) {
  2413         /* *INDENT-OFF* */
  2414         DUFFS_LOOP(
  2415         {
  2416             Pixel = *(Uint32 *)src;
  2417             RGBA_FROM_ARGB2101010(Pixel, sR, sG, sB, sA);
  2418             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2419             dst += dstbpp;
  2420             src += 4;
  2421         },
  2422         width);
  2423         /* *INDENT-ON* */
  2424         src += srcskip;
  2425         dst += dstskip;
  2426     }
  2427 }
  2428 
  2429 /* Special optimized blit for RGBA --> ARGB 2-10-10-10 */
  2430 static void
  2431 BlitNto2101010(SDL_BlitInfo * info)
  2432 {
  2433     int width = info->dst_w;
  2434     int height = info->dst_h;
  2435     Uint8 *src = info->src;
  2436     int srcskip = info->src_skip;
  2437     Uint8 *dst = info->dst;
  2438     int dstskip = info->dst_skip;
  2439     SDL_PixelFormat *srcfmt = info->src_fmt;
  2440     int srcbpp = srcfmt->BytesPerPixel;
  2441     Uint32 Pixel;
  2442     unsigned sR, sG, sB, sA;
  2443 
  2444     while (height--) {
  2445         /* *INDENT-OFF* */
  2446         DUFFS_LOOP(
  2447         {
  2448             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2449             ARGB2101010_FROM_RGBA(Pixel, sR, sG, sB, sA);
  2450             *(Uint32 *)dst = Pixel;
  2451             dst += 4;
  2452             src += srcbpp;
  2453         },
  2454         width);
  2455         /* *INDENT-ON* */
  2456         src += srcskip;
  2457         dst += dstskip;
  2458     }
  2459 }
  2460 
  2461 /* Normal N to N optimized blitters */
  2462 struct blit_table
  2463 {
  2464     Uint32 srcR, srcG, srcB;
  2465     int dstbpp;
  2466     Uint32 dstR, dstG, dstB;
  2467     Uint32 blit_features;
  2468     SDL_BlitFunc blitfunc;
  2469     enum
  2470     { NO_ALPHA = 1, SET_ALPHA = 2, COPY_ALPHA = 4 } alpha;
  2471 };
  2472 static const struct blit_table normal_blit_1[] = {
  2473     /* Default for 8-bit RGB source, never optimized */
  2474     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2475 };
  2476 
  2477 static const struct blit_table normal_blit_2[] = {
  2478 #if SDL_ALTIVEC_BLITTERS
  2479     /* has-altivec */
  2480     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
  2481      2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2482     {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
  2483      2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2484 #endif
  2485     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
  2486      0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2487     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
  2488      0, Blit_RGB565_ABGR8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2489     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000, 0x0000FF00,
  2490      0, Blit_RGB565_RGBA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2491     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000,
  2492      0, Blit_RGB565_BGRA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2493 
  2494     /* Default for 16-bit RGB source, used if no other blitter matches */
  2495     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2496 };
  2497 
  2498 static const struct blit_table normal_blit_3[] = {
  2499     /* Default for 24-bit RGB source, never optimized */
  2500     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2501 };
  2502 
  2503 static const struct blit_table normal_blit_4[] = {
  2504 #if SDL_ALTIVEC_BLITTERS
  2505     /* has-altivec | dont-use-prefetch */
  2506     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
  2507      6, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2508     /* has-altivec */
  2509     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
  2510      2, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2511     /* has-altivec */
  2512     {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
  2513      2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
  2514 #endif
  2515     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
  2516      0, Blit_RGB888_RGB565, NO_ALPHA},
  2517     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
  2518      0, Blit_RGB888_RGB555, NO_ALPHA},
  2519     /* Default for 32-bit RGB source, used if no other blitter matches */
  2520     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2521 };
  2522 
  2523 static const struct blit_table *const normal_blit[] = {
  2524     normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2525 };
  2526 
  2527 /* Mask matches table, or table entry is zero */
  2528 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2529 
  2530 SDL_BlitFunc
  2531 SDL_CalculateBlitN(SDL_Surface * surface)
  2532 {
  2533     SDL_PixelFormat *srcfmt;
  2534     SDL_PixelFormat *dstfmt;
  2535     const struct blit_table *table;
  2536     int which;
  2537     SDL_BlitFunc blitfun;
  2538 
  2539     /* Set up data for choosing the blit */
  2540     srcfmt = surface->format;
  2541     dstfmt = surface->map->dst->format;
  2542 
  2543     /* We don't support destinations less than 8-bits */
  2544     if (dstfmt->BitsPerPixel < 8) {
  2545         return (NULL);
  2546     }
  2547 
  2548     switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
  2549     case 0:
  2550         blitfun = NULL;
  2551         if (dstfmt->BitsPerPixel == 8) {
  2552             if ((srcfmt->BytesPerPixel == 4) &&
  2553                 (srcfmt->Rmask == 0x00FF0000) &&
  2554                 (srcfmt->Gmask == 0x0000FF00) &&
  2555                 (srcfmt->Bmask == 0x000000FF)) {
  2556                 blitfun = Blit_RGB888_index8;
  2557             } else if ((srcfmt->BytesPerPixel == 4) &&
  2558                 (srcfmt->Rmask == 0x3FF00000) &&
  2559                 (srcfmt->Gmask == 0x000FFC00) &&
  2560                 (srcfmt->Bmask == 0x000003FF)) {
  2561                 blitfun = Blit_RGB101010_index8;
  2562             } else {
  2563                 blitfun = BlitNto1;
  2564             }
  2565         } else {
  2566             /* Now the meat, choose the blitter we want */
  2567             int a_need = NO_ALPHA;
  2568             if (dstfmt->Amask)
  2569                 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2570             table = normal_blit[srcfmt->BytesPerPixel - 1];
  2571             for (which = 0; table[which].dstbpp; ++which) {
  2572                 if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2573                     MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2574                     MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2575                     MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2576                     MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2577                     MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2578                     dstfmt->BytesPerPixel == table[which].dstbpp &&
  2579                     (a_need & table[which].alpha) == a_need &&
  2580                     ((table[which].blit_features & GetBlitFeatures()) ==
  2581                      table[which].blit_features))
  2582                     break;
  2583             }
  2584             blitfun = table[which].blitfunc;
  2585 
  2586             if (blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2587                 if (srcfmt->format == SDL_PIXELFORMAT_ARGB2101010) {
  2588                     blitfun = Blit2101010toN;
  2589                 } else if (dstfmt->format == SDL_PIXELFORMAT_ARGB2101010) {
  2590                     blitfun = BlitNto2101010;
  2591                 } else if (srcfmt->BytesPerPixel == 4 &&
  2592                             dstfmt->BytesPerPixel == 4 &&
  2593                             srcfmt->Rmask == dstfmt->Rmask &&
  2594                             srcfmt->Gmask == dstfmt->Gmask &&
  2595                             srcfmt->Bmask == dstfmt->Bmask) {
  2596                     if (a_need == COPY_ALPHA) {
  2597                         if (srcfmt->Amask == dstfmt->Amask) {
  2598                             /* Fastpath C fallback: 32bit RGBA<->RGBA blit with matching RGBA */
  2599                             blitfun = Blit4to4CopyAlpha;
  2600                         } else {
  2601                             blitfun = BlitNtoNCopyAlpha;
  2602                         }
  2603                     } else {
  2604                         /* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2605                         blitfun = Blit4to4MaskAlpha;
  2606                     }
  2607                 } else if (a_need == COPY_ALPHA) {
  2608                     blitfun = BlitNtoNCopyAlpha;
  2609                 }
  2610             }
  2611         }
  2612         return (blitfun);
  2613 
  2614     case SDL_COPY_COLORKEY:
  2615         /* colorkey blit: Here we don't have too many options, mostly
  2616            because RLE is the preferred fast way to deal with this.
  2617            If a particular case turns out to be useful we'll add it. */
  2618 
  2619         if (srcfmt->BytesPerPixel == 2 && surface->map->identity)
  2620             return Blit2to2Key;
  2621         else if (dstfmt->BytesPerPixel == 1)
  2622             return BlitNto1Key;
  2623         else {
  2624 #if SDL_ALTIVEC_BLITTERS
  2625             if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
  2626                 && SDL_HasAltiVec()) {
  2627                 return Blit32to32KeyAltivec;
  2628             } else
  2629 #endif
  2630             if (srcfmt->Amask && dstfmt->Amask) {
  2631                 return BlitNtoNKeyCopyAlpha;
  2632             } else {
  2633                 return BlitNtoNKey;
  2634             }
  2635         }
  2636     }
  2637 
  2638     return NULL;
  2639 }
  2640 
  2641 /* vi: set ts=4 sw=4 expandtab: */