src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Mon, 04 May 2015 21:47:40 -0700
changeset 9583 0bd764eb2b99
parent 8149 681eb46b8ac4
child 9619 b94b6d0bff0f
permissions -rw-r--r--
Fixed bug 2976 - Fix RGBA<->RGBA blit that was broken with the optimization from Bug 11

id.zeta

The optimization from Bug 11 added a code branch on cases where the source RGB masks match the destination RGB masks and a optimized blit function Blit4to4MaskAlpha that always overrides the source alpha info would be chosen. Unfortunately, the branch also errorneously took over the RGBA<->RGBA blitting cases where the source alpha info should be copied, while they would instead get overriden in Blit4to4MaskAlpha.

The attached patch fixes that by handling the RGBA<->RGBA cases correctly in that branch with the original BlitNtoNCopyAlpha as well as uses an optimized Blit4to4CopyAlpha along the same vein.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2014 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../SDL_internal.h"
    22 
    23 #include "SDL_video.h"
    24 #include "SDL_endian.h"
    25 #include "SDL_cpuinfo.h"
    26 #include "SDL_blit.h"
    27 
    28 #include "SDL_assert.h"
    29 
    30 /* Functions to blit from N-bit surfaces to other surfaces */
    31 
    32 #if SDL_ALTIVEC_BLITTERS
    33 #ifdef HAVE_ALTIVEC_H
    34 #include <altivec.h>
    35 #endif
    36 #ifdef __MACOSX__
    37 #include <sys/sysctl.h>
    38 static size_t
    39 GetL3CacheSize(void)
    40 {
    41     const char key[] = "hw.l3cachesize";
    42     u_int64_t result = 0;
    43     size_t typeSize = sizeof(result);
    44 
    45 
    46     int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
    47     if (0 != err)
    48         return 0;
    49 
    50     return result;
    51 }
    52 #else
    53 static size_t
    54 GetL3CacheSize(void)
    55 {
    56     /* XXX: Just guess G4 */
    57     return 2097152;
    58 }
    59 #endif /* __MACOSX__ */
    60 
    61 #if (defined(__MACOSX__) && (__GNUC__ < 4))
    62 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    63         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    64 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    65         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    66 #else
    67 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    68         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    69 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    70         (vector unsigned short) { a,b,c,d,e,f,g,h }
    71 #endif
    72 
    73 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    74 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    75                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    76                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    77                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    78                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    79 
    80 #define MAKE8888(dstfmt, r, g, b, a)  \
    81     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    82       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    83       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    84       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    85 
    86 /*
    87  * Data Stream Touch...Altivec cache prefetching.
    88  *
    89  *  Don't use this on a G5...however, the speed boost is very significant
    90  *   on a G4.
    91  */
    92 #define DST_CHAN_SRC 1
    93 #define DST_CHAN_DEST 2
    94 
    95 /* macro to set DST control word value... */
    96 #define DST_CTRL(size, count, stride) \
    97     (((size) << 24) | ((count) << 16) | (stride))
    98 
    99 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
   100     ? vec_lvsl(0, src) \
   101     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   102 
   103 /* Calculate the permute vector used for 32->32 swizzling */
   104 static vector unsigned char
   105 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
   106 {
   107     /*
   108      * We have to assume that the bits that aren't used by other
   109      *  colors is alpha, and it's one complete byte, since some formats
   110      *  leave alpha with a zero mask, but we should still swizzle the bits.
   111      */
   112     /* ARGB */
   113     const static const struct SDL_PixelFormat default_pixel_format = {
   114         0, NULL, 0, 0,
   115         {0, 0},
   116         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   117         0, 0, 0, 0,
   118         16, 8, 0, 24,
   119         0, NULL
   120     };
   121     if (!srcfmt) {
   122         srcfmt = &default_pixel_format;
   123     }
   124     if (!dstfmt) {
   125         dstfmt = &default_pixel_format;
   126     }
   127     const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
   128                                                        0x04, 0x04, 0x04, 0x04,
   129                                                        0x08, 0x08, 0x08, 0x08,
   130                                                        0x0C, 0x0C, 0x0C,
   131                                                        0x0C);
   132     vector unsigned char vswiz;
   133     vector unsigned int srcvec;
   134 #define RESHIFT(X) (3 - ((X) >> 3))
   135     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   136     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   137     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   138     Uint32 amask;
   139     /* Use zero for alpha if either surface doesn't have alpha */
   140     if (dstfmt->Amask) {
   141         amask =
   142             ((srcfmt->Amask) ? RESHIFT(srcfmt->
   143                                        Ashift) : 0x10) << (dstfmt->Ashift);
   144     } else {
   145         amask =
   146             0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
   147                           0xFFFFFFFF);
   148     }
   149 #undef RESHIFT
   150     ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
   151     vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
   152     return (vswiz);
   153 }
   154 
   155 static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
   156 static void
   157 Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
   158 {
   159     int height = info->dst_h;
   160     Uint8 *src = (Uint8 *) info->src;
   161     int srcskip = info->src_skip;
   162     Uint8 *dst = (Uint8 *) info->dst;
   163     int dstskip = info->dst_skip;
   164     SDL_PixelFormat *srcfmt = info->src_fmt;
   165     vector unsigned char valpha = vec_splat_u8(0);
   166     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   167     vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
   168                                                     0x00, 0x0a, 0x00, 0x0e,
   169                                                     0x00, 0x12, 0x00, 0x16,
   170                                                     0x00, 0x1a, 0x00, 0x1e);
   171     vector unsigned short v1 = vec_splat_u16(1);
   172     vector unsigned short v3 = vec_splat_u16(3);
   173     vector unsigned short v3f =
   174         VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
   175                           0x003f, 0x003f, 0x003f, 0x003f);
   176     vector unsigned short vfc =
   177         VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
   178                           0x00fc, 0x00fc, 0x00fc, 0x00fc);
   179     vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
   180     vf800 = vec_sl(vf800, vec_splat_u16(8));
   181 
   182     while (height--) {
   183         vector unsigned char valigner;
   184         vector unsigned char voverflow;
   185         vector unsigned char vsrc;
   186 
   187         int width = info->dst_w;
   188         int extrawidth;
   189 
   190         /* do scalar until we can align... */
   191 #define ONE_PIXEL_BLEND(condition, widthvar) \
   192         while (condition) { \
   193             Uint32 Pixel; \
   194             unsigned sR, sG, sB, sA; \
   195             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   196                           sR, sG, sB, sA); \
   197             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   198                                 ((sG << 3) & 0x000007E0) | \
   199                                 ((sB >> 3) & 0x0000001F)); \
   200             dst += 2; \
   201             src += 4; \
   202             widthvar--; \
   203         }
   204 
   205         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   206 
   207         /* After all that work, here's the vector part! */
   208         extrawidth = (width % 8);       /* trailing unaligned stores */
   209         width -= extrawidth;
   210         vsrc = vec_ld(0, src);
   211         valigner = VEC_ALIGNER(src);
   212 
   213         while (width) {
   214             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   215             vector unsigned int vsrc1, vsrc2;
   216             vector unsigned char vdst;
   217 
   218             voverflow = vec_ld(15, src);
   219             vsrc = vec_perm(vsrc, voverflow, valigner);
   220             vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   221             src += 16;
   222             vsrc = voverflow;
   223             voverflow = vec_ld(15, src);
   224             vsrc = vec_perm(vsrc, voverflow, valigner);
   225             vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   226             /* 1555 */
   227             vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
   228             vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
   229             vgpixel = vec_and(vgpixel, vfc);
   230             vgpixel = vec_sl(vgpixel, v3);
   231             vrpixel = vec_sl(vpixel, v1);
   232             vrpixel = vec_and(vrpixel, vf800);
   233             vbpixel = vec_and(vpixel, v3f);
   234             vdst =
   235                 vec_or((vector unsigned char) vrpixel,
   236                        (vector unsigned char) vgpixel);
   237             /* 565 */
   238             vdst = vec_or(vdst, (vector unsigned char) vbpixel);
   239             vec_st(vdst, 0, dst);
   240 
   241             width -= 8;
   242             src += 16;
   243             dst += 16;
   244             vsrc = voverflow;
   245         }
   246 
   247         SDL_assert(width == 0);
   248 
   249         /* do scalar until we can align... */
   250         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   251 #undef ONE_PIXEL_BLEND
   252 
   253         src += srcskip;         /* move to next row, accounting for pitch. */
   254         dst += dstskip;
   255     }
   256 
   257 
   258 }
   259 
   260 static void
   261 Blit_RGB565_32Altivec(SDL_BlitInfo * info)
   262 {
   263     int height = info->dst_h;
   264     Uint8 *src = (Uint8 *) info->src;
   265     int srcskip = info->src_skip;
   266     Uint8 *dst = (Uint8 *) info->dst;
   267     int dstskip = info->dst_skip;
   268     SDL_PixelFormat *srcfmt = info->src_fmt;
   269     SDL_PixelFormat *dstfmt = info->dst_fmt;
   270     unsigned alpha;
   271     vector unsigned char valpha;
   272     vector unsigned char vpermute;
   273     vector unsigned short vf800;
   274     vector unsigned int v8 = vec_splat_u32(8);
   275     vector unsigned int v16 = vec_add(v8, v8);
   276     vector unsigned short v2 = vec_splat_u16(2);
   277     vector unsigned short v3 = vec_splat_u16(3);
   278     /*
   279        0x10 - 0x1f is the alpha
   280        0x00 - 0x0e evens are the red
   281        0x01 - 0x0f odds are zero
   282      */
   283     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   284                                                        0x10, 0x02, 0x01, 0x01,
   285                                                        0x10, 0x04, 0x01, 0x01,
   286                                                        0x10, 0x06, 0x01,
   287                                                        0x01);
   288     vector unsigned char vredalpha2 =
   289         (vector unsigned
   290          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   291         );
   292     /*
   293        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   294        0x11 - 0x0f odds are blue
   295      */
   296     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   297                                                    0x04, 0x05, 0x06, 0x13,
   298                                                    0x08, 0x09, 0x0a, 0x15,
   299                                                    0x0c, 0x0d, 0x0e, 0x17);
   300     vector unsigned char vblue2 =
   301         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   302         );
   303     /*
   304        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   305        0x10 - 0x0e evens are green
   306      */
   307     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   308                                                     0x04, 0x05, 0x12, 0x07,
   309                                                     0x08, 0x09, 0x14, 0x0b,
   310                                                     0x0c, 0x0d, 0x16, 0x0f);
   311     vector unsigned char vgreen2 =
   312         (vector unsigned
   313          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   314         );
   315 
   316     SDL_assert(srcfmt->BytesPerPixel == 2);
   317     SDL_assert(dstfmt->BytesPerPixel == 4);
   318 
   319     vf800 = (vector unsigned short) vec_splat_u8(-7);
   320     vf800 = vec_sl(vf800, vec_splat_u16(8));
   321 
   322     if (dstfmt->Amask && info->a) {
   323         ((unsigned char *) &valpha)[0] = alpha = info->a;
   324         valpha = vec_splat(valpha, 0);
   325     } else {
   326         alpha = 0;
   327         valpha = vec_splat_u8(0);
   328     }
   329 
   330     vpermute = calc_swizzle32(NULL, dstfmt);
   331     while (height--) {
   332         vector unsigned char valigner;
   333         vector unsigned char voverflow;
   334         vector unsigned char vsrc;
   335 
   336         int width = info->dst_w;
   337         int extrawidth;
   338 
   339         /* do scalar until we can align... */
   340 #define ONE_PIXEL_BLEND(condition, widthvar) \
   341         while (condition) { \
   342             unsigned sR, sG, sB; \
   343             unsigned short Pixel = *((unsigned short *)src); \
   344             sR = (Pixel >> 8) & 0xf8; \
   345             sG = (Pixel >> 3) & 0xfc; \
   346             sB = (Pixel << 3) & 0xf8; \
   347             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   348             src += 2; \
   349             dst += 4; \
   350             widthvar--; \
   351         }
   352         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   353 
   354         /* After all that work, here's the vector part! */
   355         extrawidth = (width % 8);       /* trailing unaligned stores */
   356         width -= extrawidth;
   357         vsrc = vec_ld(0, src);
   358         valigner = VEC_ALIGNER(src);
   359 
   360         while (width) {
   361             vector unsigned short vR, vG, vB;
   362             vector unsigned char vdst1, vdst2;
   363 
   364             voverflow = vec_ld(15, src);
   365             vsrc = vec_perm(vsrc, voverflow, valigner);
   366 
   367             vR = vec_and((vector unsigned short) vsrc, vf800);
   368             vB = vec_sl((vector unsigned short) vsrc, v3);
   369             vG = vec_sl(vB, v2);
   370 
   371             vdst1 =
   372                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   373                                                 valpha, vredalpha1);
   374             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   375             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   376             vdst1 = vec_perm(vdst1, valpha, vpermute);
   377             vec_st(vdst1, 0, dst);
   378 
   379             vdst2 =
   380                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   381                                                 valpha, vredalpha2);
   382             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   383             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   384             vdst2 = vec_perm(vdst2, valpha, vpermute);
   385             vec_st(vdst2, 16, dst);
   386 
   387             width -= 8;
   388             dst += 32;
   389             src += 16;
   390             vsrc = voverflow;
   391         }
   392 
   393         SDL_assert(width == 0);
   394 
   395 
   396         /* do scalar until we can align... */
   397         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   398 #undef ONE_PIXEL_BLEND
   399 
   400         src += srcskip;         /* move to next row, accounting for pitch. */
   401         dst += dstskip;
   402     }
   403 
   404 }
   405 
   406 
   407 static void
   408 Blit_RGB555_32Altivec(SDL_BlitInfo * info)
   409 {
   410     int height = info->dst_h;
   411     Uint8 *src = (Uint8 *) info->src;
   412     int srcskip = info->src_skip;
   413     Uint8 *dst = (Uint8 *) info->dst;
   414     int dstskip = info->dst_skip;
   415     SDL_PixelFormat *srcfmt = info->src_fmt;
   416     SDL_PixelFormat *dstfmt = info->dst_fmt;
   417     unsigned alpha;
   418     vector unsigned char valpha;
   419     vector unsigned char vpermute;
   420     vector unsigned short vf800;
   421     vector unsigned int v8 = vec_splat_u32(8);
   422     vector unsigned int v16 = vec_add(v8, v8);
   423     vector unsigned short v1 = vec_splat_u16(1);
   424     vector unsigned short v3 = vec_splat_u16(3);
   425     /*
   426        0x10 - 0x1f is the alpha
   427        0x00 - 0x0e evens are the red
   428        0x01 - 0x0f odds are zero
   429      */
   430     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   431                                                        0x10, 0x02, 0x01, 0x01,
   432                                                        0x10, 0x04, 0x01, 0x01,
   433                                                        0x10, 0x06, 0x01,
   434                                                        0x01);
   435     vector unsigned char vredalpha2 =
   436         (vector unsigned
   437          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   438         );
   439     /*
   440        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   441        0x11 - 0x0f odds are blue
   442      */
   443     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   444                                                    0x04, 0x05, 0x06, 0x13,
   445                                                    0x08, 0x09, 0x0a, 0x15,
   446                                                    0x0c, 0x0d, 0x0e, 0x17);
   447     vector unsigned char vblue2 =
   448         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   449         );
   450     /*
   451        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   452        0x10 - 0x0e evens are green
   453      */
   454     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   455                                                     0x04, 0x05, 0x12, 0x07,
   456                                                     0x08, 0x09, 0x14, 0x0b,
   457                                                     0x0c, 0x0d, 0x16, 0x0f);
   458     vector unsigned char vgreen2 =
   459         (vector unsigned
   460          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   461         );
   462 
   463     SDL_assert(srcfmt->BytesPerPixel == 2);
   464     SDL_assert(dstfmt->BytesPerPixel == 4);
   465 
   466     vf800 = (vector unsigned short) vec_splat_u8(-7);
   467     vf800 = vec_sl(vf800, vec_splat_u16(8));
   468 
   469     if (dstfmt->Amask && info->a) {
   470         ((unsigned char *) &valpha)[0] = alpha = info->a;
   471         valpha = vec_splat(valpha, 0);
   472     } else {
   473         alpha = 0;
   474         valpha = vec_splat_u8(0);
   475     }
   476 
   477     vpermute = calc_swizzle32(NULL, dstfmt);
   478     while (height--) {
   479         vector unsigned char valigner;
   480         vector unsigned char voverflow;
   481         vector unsigned char vsrc;
   482 
   483         int width = info->dst_w;
   484         int extrawidth;
   485 
   486         /* do scalar until we can align... */
   487 #define ONE_PIXEL_BLEND(condition, widthvar) \
   488         while (condition) { \
   489             unsigned sR, sG, sB; \
   490             unsigned short Pixel = *((unsigned short *)src); \
   491             sR = (Pixel >> 7) & 0xf8; \
   492             sG = (Pixel >> 2) & 0xf8; \
   493             sB = (Pixel << 3) & 0xf8; \
   494             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   495             src += 2; \
   496             dst += 4; \
   497             widthvar--; \
   498         }
   499         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   500 
   501         /* After all that work, here's the vector part! */
   502         extrawidth = (width % 8);       /* trailing unaligned stores */
   503         width -= extrawidth;
   504         vsrc = vec_ld(0, src);
   505         valigner = VEC_ALIGNER(src);
   506 
   507         while (width) {
   508             vector unsigned short vR, vG, vB;
   509             vector unsigned char vdst1, vdst2;
   510 
   511             voverflow = vec_ld(15, src);
   512             vsrc = vec_perm(vsrc, voverflow, valigner);
   513 
   514             vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
   515             vB = vec_sl((vector unsigned short) vsrc, v3);
   516             vG = vec_sl(vB, v3);
   517 
   518             vdst1 =
   519                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   520                                                 valpha, vredalpha1);
   521             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   522             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   523             vdst1 = vec_perm(vdst1, valpha, vpermute);
   524             vec_st(vdst1, 0, dst);
   525 
   526             vdst2 =
   527                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   528                                                 valpha, vredalpha2);
   529             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   530             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   531             vdst2 = vec_perm(vdst2, valpha, vpermute);
   532             vec_st(vdst2, 16, dst);
   533 
   534             width -= 8;
   535             dst += 32;
   536             src += 16;
   537             vsrc = voverflow;
   538         }
   539 
   540         SDL_assert(width == 0);
   541 
   542 
   543         /* do scalar until we can align... */
   544         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   545 #undef ONE_PIXEL_BLEND
   546 
   547         src += srcskip;         /* move to next row, accounting for pitch. */
   548         dst += dstskip;
   549     }
   550 
   551 }
   552 
   553 static void BlitNtoNKey(SDL_BlitInfo * info);
   554 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
   555 static void
   556 Blit32to32KeyAltivec(SDL_BlitInfo * info)
   557 {
   558     int height = info->dst_h;
   559     Uint32 *srcp = (Uint32 *) info->src;
   560     int srcskip = info->src_skip / 4;
   561     Uint32 *dstp = (Uint32 *) info->dst;
   562     int dstskip = info->dst_skip / 4;
   563     SDL_PixelFormat *srcfmt = info->src_fmt;
   564     int srcbpp = srcfmt->BytesPerPixel;
   565     SDL_PixelFormat *dstfmt = info->dst_fmt;
   566     int dstbpp = dstfmt->BytesPerPixel;
   567     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   568     unsigned alpha = dstfmt->Amask ? info->a : 0;
   569     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   570     Uint32 ckey = info->colorkey;
   571     vector unsigned int valpha;
   572     vector unsigned char vpermute;
   573     vector unsigned char vzero;
   574     vector unsigned int vckey;
   575     vector unsigned int vrgbmask;
   576     vpermute = calc_swizzle32(srcfmt, dstfmt);
   577     if (info->dst_w < 16) {
   578         if (copy_alpha) {
   579             BlitNtoNKeyCopyAlpha(info);
   580         } else {
   581             BlitNtoNKey(info);
   582         }
   583         return;
   584     }
   585     vzero = vec_splat_u8(0);
   586     if (alpha) {
   587         ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
   588         valpha =
   589             (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
   590     } else {
   591         valpha = (vector unsigned int) vzero;
   592     }
   593     ckey &= rgbmask;
   594     ((unsigned int *) (char *) &vckey)[0] = ckey;
   595     vckey = vec_splat(vckey, 0);
   596     ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
   597     vrgbmask = vec_splat(vrgbmask, 0);
   598 
   599     while (height--) {
   600 #define ONE_PIXEL_BLEND(condition, widthvar) \
   601         if (copy_alpha) { \
   602             while (condition) { \
   603                 Uint32 Pixel; \
   604                 unsigned sR, sG, sB, sA; \
   605                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   606                           sR, sG, sB, sA); \
   607                 if ( (Pixel & rgbmask) != ckey ) { \
   608                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   609                             sR, sG, sB, sA); \
   610                 } \
   611                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   612                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   613                 widthvar--; \
   614             } \
   615         } else { \
   616             while (condition) { \
   617                 Uint32 Pixel; \
   618                 unsigned sR, sG, sB; \
   619                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   620                 if ( Pixel != ckey ) { \
   621                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   622                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   623                               sR, sG, sB, alpha); \
   624                 } \
   625                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   626                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   627                 widthvar--; \
   628             } \
   629         }
   630         int width = info->dst_w;
   631         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   632         SDL_assert(width > 0);
   633         if (width > 0) {
   634             int extrawidth = (width % 4);
   635             vector unsigned char valigner = VEC_ALIGNER(srcp);
   636             vector unsigned int vs = vec_ld(0, srcp);
   637             width -= extrawidth;
   638             SDL_assert(width >= 4);
   639             while (width) {
   640                 vector unsigned char vsel;
   641                 vector unsigned int vd;
   642                 vector unsigned int voverflow = vec_ld(15, srcp);
   643                 /* load the source vec */
   644                 vs = vec_perm(vs, voverflow, valigner);
   645                 /* vsel is set for items that match the key */
   646                 vsel = (vector unsigned char) vec_and(vs, vrgbmask);
   647                 vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
   648                 /* permute the src vec to the dest format */
   649                 vs = vec_perm(vs, valpha, vpermute);
   650                 /* load the destination vec */
   651                 vd = vec_ld(0, dstp);
   652                 /* select the source and dest into vs */
   653                 vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
   654                                                    (vector unsigned char) vd,
   655                                                    vsel);
   656 
   657                 vec_st(vd, 0, dstp);
   658                 srcp += 4;
   659                 width -= 4;
   660                 dstp += 4;
   661                 vs = voverflow;
   662             }
   663             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   664 #undef ONE_PIXEL_BLEND
   665             srcp += srcskip;
   666             dstp += dstskip;
   667         }
   668     }
   669 }
   670 
   671 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   672 /* Use this on a G5 */
   673 static void
   674 ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
   675 {
   676     int height = info->dst_h;
   677     Uint32 *src = (Uint32 *) info->src;
   678     int srcskip = info->src_skip / 4;
   679     Uint32 *dst = (Uint32 *) info->dst;
   680     int dstskip = info->dst_skip / 4;
   681     SDL_PixelFormat *srcfmt = info->src_fmt;
   682     SDL_PixelFormat *dstfmt = info->dst_fmt;
   683     vector unsigned int vzero = vec_splat_u32(0);
   684     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   685     if (dstfmt->Amask && !srcfmt->Amask) {
   686         if (info->a) {
   687             vector unsigned char valpha;
   688             ((unsigned char *) &valpha)[0] = info->a;
   689             vzero = (vector unsigned int) vec_splat(valpha, 0);
   690         }
   691     }
   692 
   693     SDL_assert(srcfmt->BytesPerPixel == 4);
   694     SDL_assert(dstfmt->BytesPerPixel == 4);
   695 
   696     while (height--) {
   697         vector unsigned char valigner;
   698         vector unsigned int vbits;
   699         vector unsigned int voverflow;
   700         Uint32 bits;
   701         Uint8 r, g, b, a;
   702 
   703         int width = info->dst_w;
   704         int extrawidth;
   705 
   706         /* do scalar until we can align... */
   707         while ((UNALIGNED_PTR(dst)) && (width)) {
   708             bits = *(src++);
   709             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   710             if(!srcfmt->Amask)
   711               a = info->a;
   712             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   713             width--;
   714         }
   715 
   716         /* After all that work, here's the vector part! */
   717         extrawidth = (width % 4);
   718         width -= extrawidth;
   719         valigner = VEC_ALIGNER(src);
   720         vbits = vec_ld(0, src);
   721 
   722         while (width) {
   723             voverflow = vec_ld(15, src);
   724             src += 4;
   725             width -= 4;
   726             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   727             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   728             vec_st(vbits, 0, dst);      /* store it back out. */
   729             dst += 4;
   730             vbits = voverflow;
   731         }
   732 
   733         SDL_assert(width == 0);
   734 
   735         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   736         while (extrawidth) {
   737             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   738             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   739             if(!srcfmt->Amask)
   740               a = info->a;
   741             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   742             extrawidth--;
   743         }
   744 
   745         src += srcskip;
   746         dst += dstskip;
   747     }
   748 
   749 }
   750 
   751 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   752 /* Use this on a G4 */
   753 static void
   754 ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
   755 {
   756     const int scalar_dst_lead = sizeof(Uint32) * 4;
   757     const int vector_dst_lead = sizeof(Uint32) * 16;
   758 
   759     int height = info->dst_h;
   760     Uint32 *src = (Uint32 *) info->src;
   761     int srcskip = info->src_skip / 4;
   762     Uint32 *dst = (Uint32 *) info->dst;
   763     int dstskip = info->dst_skip / 4;
   764     SDL_PixelFormat *srcfmt = info->src_fmt;
   765     SDL_PixelFormat *dstfmt = info->dst_fmt;
   766     vector unsigned int vzero = vec_splat_u32(0);
   767     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   768     if (dstfmt->Amask && !srcfmt->Amask) {
   769         if (info->a) {
   770             vector unsigned char valpha;
   771             ((unsigned char *) &valpha)[0] = info->a;
   772             vzero = (vector unsigned int) vec_splat(valpha, 0);
   773         }
   774     }
   775 
   776     SDL_assert(srcfmt->BytesPerPixel == 4);
   777     SDL_assert(dstfmt->BytesPerPixel == 4);
   778 
   779     while (height--) {
   780         vector unsigned char valigner;
   781         vector unsigned int vbits;
   782         vector unsigned int voverflow;
   783         Uint32 bits;
   784         Uint8 r, g, b, a;
   785 
   786         int width = info->dst_w;
   787         int extrawidth;
   788 
   789         /* do scalar until we can align... */
   790         while ((UNALIGNED_PTR(dst)) && (width)) {
   791             vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   792                      DST_CHAN_SRC);
   793             vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   794                       DST_CHAN_DEST);
   795             bits = *(src++);
   796             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   797             if(!srcfmt->Amask)
   798               a = info->a;
   799             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   800             width--;
   801         }
   802 
   803         /* After all that work, here's the vector part! */
   804         extrawidth = (width % 4);
   805         width -= extrawidth;
   806         valigner = VEC_ALIGNER(src);
   807         vbits = vec_ld(0, src);
   808 
   809         while (width) {
   810             vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
   811                      DST_CHAN_SRC);
   812             vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
   813                       DST_CHAN_DEST);
   814             voverflow = vec_ld(15, src);
   815             src += 4;
   816             width -= 4;
   817             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   818             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   819             vec_st(vbits, 0, dst);      /* store it back out. */
   820             dst += 4;
   821             vbits = voverflow;
   822         }
   823 
   824         SDL_assert(width == 0);
   825 
   826         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   827         while (extrawidth) {
   828             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   829             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   830             if(!srcfmt->Amask)
   831               a = info->a;
   832             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   833             extrawidth--;
   834         }
   835 
   836         src += srcskip;
   837         dst += dstskip;
   838     }
   839 
   840     vec_dss(DST_CHAN_SRC);
   841     vec_dss(DST_CHAN_DEST);
   842 }
   843 
   844 static Uint32
   845 GetBlitFeatures(void)
   846 {
   847     static Uint32 features = 0xffffffff;
   848     if (features == 0xffffffff) {
   849         /* Provide an override for testing .. */
   850         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   851         if (override) {
   852             features = 0;
   853             SDL_sscanf(override, "%u", &features);
   854         } else {
   855             features = (0
   856                         /* Feature 1 is has-MMX */
   857                         | ((SDL_HasMMX())? 1 : 0)
   858                         /* Feature 2 is has-AltiVec */
   859                         | ((SDL_HasAltiVec())? 2 : 0)
   860                         /* Feature 4 is dont-use-prefetch */
   861                         /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   862                         | ((GetL3CacheSize() == 0) ? 4 : 0)
   863                 );
   864         }
   865     }
   866     return features;
   867 }
   868 
   869 #if __MWERKS__
   870 #pragma altivec_model off
   871 #endif
   872 #else
   873 /* Feature 1 is has-MMX */
   874 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   875 #endif
   876 
   877 /* This is now endian dependent */
   878 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   879 #define HI  1
   880 #define LO  0
   881 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
   882 #define HI  0
   883 #define LO  1
   884 #endif
   885 
   886 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   887 #define RGB888_RGB332(dst, src) { \
   888     dst = (Uint8)((((src)&0x00E00000)>>16)| \
   889                   (((src)&0x0000E000)>>11)| \
   890                   (((src)&0x000000C0)>>6)); \
   891 }
   892 static void
   893 Blit_RGB888_index8(SDL_BlitInfo * info)
   894 {
   895 #ifndef USE_DUFFS_LOOP
   896     int c;
   897 #endif
   898     int width, height;
   899     Uint32 *src;
   900     const Uint8 *map;
   901     Uint8 *dst;
   902     int srcskip, dstskip;
   903 
   904     /* Set up some basic variables */
   905     width = info->dst_w;
   906     height = info->dst_h;
   907     src = (Uint32 *) info->src;
   908     srcskip = info->src_skip / 4;
   909     dst = info->dst;
   910     dstskip = info->dst_skip;
   911     map = info->table;
   912 
   913     if (map == NULL) {
   914         while (height--) {
   915 #ifdef USE_DUFFS_LOOP
   916             /* *INDENT-OFF* */
   917             DUFFS_LOOP(
   918                 RGB888_RGB332(*dst++, *src);
   919             , width);
   920             /* *INDENT-ON* */
   921 #else
   922             for (c = width / 4; c; --c) {
   923                 /* Pack RGB into 8bit pixel */
   924                 ++src;
   925                 RGB888_RGB332(*dst++, *src);
   926                 ++src;
   927                 RGB888_RGB332(*dst++, *src);
   928                 ++src;
   929                 RGB888_RGB332(*dst++, *src);
   930                 ++src;
   931             }
   932             switch (width & 3) {
   933             case 3:
   934                 RGB888_RGB332(*dst++, *src);
   935                 ++src;
   936             case 2:
   937                 RGB888_RGB332(*dst++, *src);
   938                 ++src;
   939             case 1:
   940                 RGB888_RGB332(*dst++, *src);
   941                 ++src;
   942             }
   943 #endif /* USE_DUFFS_LOOP */
   944             src += srcskip;
   945             dst += dstskip;
   946         }
   947     } else {
   948         int Pixel;
   949 
   950         while (height--) {
   951 #ifdef USE_DUFFS_LOOP
   952             /* *INDENT-OFF* */
   953             DUFFS_LOOP(
   954                 RGB888_RGB332(Pixel, *src);
   955                 *dst++ = map[Pixel];
   956                 ++src;
   957             , width);
   958             /* *INDENT-ON* */
   959 #else
   960             for (c = width / 4; c; --c) {
   961                 /* Pack RGB into 8bit pixel */
   962                 RGB888_RGB332(Pixel, *src);
   963                 *dst++ = map[Pixel];
   964                 ++src;
   965                 RGB888_RGB332(Pixel, *src);
   966                 *dst++ = map[Pixel];
   967                 ++src;
   968                 RGB888_RGB332(Pixel, *src);
   969                 *dst++ = map[Pixel];
   970                 ++src;
   971                 RGB888_RGB332(Pixel, *src);
   972                 *dst++ = map[Pixel];
   973                 ++src;
   974             }
   975             switch (width & 3) {
   976             case 3:
   977                 RGB888_RGB332(Pixel, *src);
   978                 *dst++ = map[Pixel];
   979                 ++src;
   980             case 2:
   981                 RGB888_RGB332(Pixel, *src);
   982                 *dst++ = map[Pixel];
   983                 ++src;
   984             case 1:
   985                 RGB888_RGB332(Pixel, *src);
   986                 *dst++ = map[Pixel];
   987                 ++src;
   988             }
   989 #endif /* USE_DUFFS_LOOP */
   990             src += srcskip;
   991             dst += dstskip;
   992         }
   993     }
   994 }
   995 
   996 /* Special optimized blit for RGB 10-10-10 --> RGB 3-3-2 */
   997 #define RGB101010_RGB332(dst, src) { \
   998     dst = (Uint8)((((src)&0x38000000)>>22)| \
   999                   (((src)&0x000E0000)>>15)| \
  1000                   (((src)&0x00000300)>>8)); \
  1001 }
  1002 static void
  1003 Blit_RGB101010_index8(SDL_BlitInfo * info)
  1004 {
  1005 #ifndef USE_DUFFS_LOOP
  1006     int c;
  1007 #endif
  1008     int width, height;
  1009     Uint32 *src;
  1010     const Uint8 *map;
  1011     Uint8 *dst;
  1012     int srcskip, dstskip;
  1013 
  1014     /* Set up some basic variables */
  1015     width = info->dst_w;
  1016     height = info->dst_h;
  1017     src = (Uint32 *) info->src;
  1018     srcskip = info->src_skip / 4;
  1019     dst = info->dst;
  1020     dstskip = info->dst_skip;
  1021     map = info->table;
  1022 
  1023     if (map == NULL) {
  1024         while (height--) {
  1025 #ifdef USE_DUFFS_LOOP
  1026             /* *INDENT-OFF* */
  1027             DUFFS_LOOP(
  1028                 RGB101010_RGB332(*dst++, *src);
  1029             , width);
  1030             /* *INDENT-ON* */
  1031 #else
  1032             for (c = width / 4; c; --c) {
  1033                 /* Pack RGB into 8bit pixel */
  1034                 ++src;
  1035                 RGB101010_RGB332(*dst++, *src);
  1036                 ++src;
  1037                 RGB101010_RGB332(*dst++, *src);
  1038                 ++src;
  1039                 RGB101010_RGB332(*dst++, *src);
  1040                 ++src;
  1041             }
  1042             switch (width & 3) {
  1043             case 3:
  1044                 RGB101010_RGB332(*dst++, *src);
  1045                 ++src;
  1046             case 2:
  1047                 RGB101010_RGB332(*dst++, *src);
  1048                 ++src;
  1049             case 1:
  1050                 RGB101010_RGB332(*dst++, *src);
  1051                 ++src;
  1052             }
  1053 #endif /* USE_DUFFS_LOOP */
  1054             src += srcskip;
  1055             dst += dstskip;
  1056         }
  1057     } else {
  1058         int Pixel;
  1059 
  1060         while (height--) {
  1061 #ifdef USE_DUFFS_LOOP
  1062             /* *INDENT-OFF* */
  1063             DUFFS_LOOP(
  1064                 RGB101010_RGB332(Pixel, *src);
  1065                 *dst++ = map[Pixel];
  1066                 ++src;
  1067             , width);
  1068             /* *INDENT-ON* */
  1069 #else
  1070             for (c = width / 4; c; --c) {
  1071                 /* Pack RGB into 8bit pixel */
  1072                 RGB101010_RGB332(Pixel, *src);
  1073                 *dst++ = map[Pixel];
  1074                 ++src;
  1075                 RGB101010_RGB332(Pixel, *src);
  1076                 *dst++ = map[Pixel];
  1077                 ++src;
  1078                 RGB101010_RGB332(Pixel, *src);
  1079                 *dst++ = map[Pixel];
  1080                 ++src;
  1081                 RGB101010_RGB332(Pixel, *src);
  1082                 *dst++ = map[Pixel];
  1083                 ++src;
  1084             }
  1085             switch (width & 3) {
  1086             case 3:
  1087                 RGB101010_RGB332(Pixel, *src);
  1088                 *dst++ = map[Pixel];
  1089                 ++src;
  1090             case 2:
  1091                 RGB101010_RGB332(Pixel, *src);
  1092                 *dst++ = map[Pixel];
  1093                 ++src;
  1094             case 1:
  1095                 RGB101010_RGB332(Pixel, *src);
  1096                 *dst++ = map[Pixel];
  1097                 ++src;
  1098             }
  1099 #endif /* USE_DUFFS_LOOP */
  1100             src += srcskip;
  1101             dst += dstskip;
  1102         }
  1103     }
  1104 }
  1105 
  1106 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
  1107 #define RGB888_RGB555(dst, src) { \
  1108     *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
  1109                                 (((*src)&0x0000F800)>>6)| \
  1110                                 (((*src)&0x000000F8)>>3)); \
  1111 }
  1112 #define RGB888_RGB555_TWO(dst, src) { \
  1113     *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
  1114                          (((src[HI])&0x0000F800)>>6)| \
  1115                          (((src[HI])&0x000000F8)>>3))<<16)| \
  1116                          (((src[LO])&0x00F80000)>>9)| \
  1117                          (((src[LO])&0x0000F800)>>6)| \
  1118                          (((src[LO])&0x000000F8)>>3); \
  1119 }
  1120 static void
  1121 Blit_RGB888_RGB555(SDL_BlitInfo * info)
  1122 {
  1123 #ifndef USE_DUFFS_LOOP
  1124     int c;
  1125 #endif
  1126     int width, height;
  1127     Uint32 *src;
  1128     Uint16 *dst;
  1129     int srcskip, dstskip;
  1130 
  1131     /* Set up some basic variables */
  1132     width = info->dst_w;
  1133     height = info->dst_h;
  1134     src = (Uint32 *) info->src;
  1135     srcskip = info->src_skip / 4;
  1136     dst = (Uint16 *) info->dst;
  1137     dstskip = info->dst_skip / 2;
  1138 
  1139 #ifdef USE_DUFFS_LOOP
  1140     while (height--) {
  1141         /* *INDENT-OFF* */
  1142         DUFFS_LOOP(
  1143             RGB888_RGB555(dst, src);
  1144             ++src;
  1145             ++dst;
  1146         , width);
  1147         /* *INDENT-ON* */
  1148         src += srcskip;
  1149         dst += dstskip;
  1150     }
  1151 #else
  1152     /* Memory align at 4-byte boundary, if necessary */
  1153     if ((long) dst & 0x03) {
  1154         /* Don't do anything if width is 0 */
  1155         if (width == 0) {
  1156             return;
  1157         }
  1158         --width;
  1159 
  1160         while (height--) {
  1161             /* Perform copy alignment */
  1162             RGB888_RGB555(dst, src);
  1163             ++src;
  1164             ++dst;
  1165 
  1166             /* Copy in 4 pixel chunks */
  1167             for (c = width / 4; c; --c) {
  1168                 RGB888_RGB555_TWO(dst, src);
  1169                 src += 2;
  1170                 dst += 2;
  1171                 RGB888_RGB555_TWO(dst, src);
  1172                 src += 2;
  1173                 dst += 2;
  1174             }
  1175             /* Get any leftovers */
  1176             switch (width & 3) {
  1177             case 3:
  1178                 RGB888_RGB555(dst, src);
  1179                 ++src;
  1180                 ++dst;
  1181             case 2:
  1182                 RGB888_RGB555_TWO(dst, src);
  1183                 src += 2;
  1184                 dst += 2;
  1185                 break;
  1186             case 1:
  1187                 RGB888_RGB555(dst, src);
  1188                 ++src;
  1189                 ++dst;
  1190                 break;
  1191             }
  1192             src += srcskip;
  1193             dst += dstskip;
  1194         }
  1195     } else {
  1196         while (height--) {
  1197             /* Copy in 4 pixel chunks */
  1198             for (c = width / 4; c; --c) {
  1199                 RGB888_RGB555_TWO(dst, src);
  1200                 src += 2;
  1201                 dst += 2;
  1202                 RGB888_RGB555_TWO(dst, src);
  1203                 src += 2;
  1204                 dst += 2;
  1205             }
  1206             /* Get any leftovers */
  1207             switch (width & 3) {
  1208             case 3:
  1209                 RGB888_RGB555(dst, src);
  1210                 ++src;
  1211                 ++dst;
  1212             case 2:
  1213                 RGB888_RGB555_TWO(dst, src);
  1214                 src += 2;
  1215                 dst += 2;
  1216                 break;
  1217             case 1:
  1218                 RGB888_RGB555(dst, src);
  1219                 ++src;
  1220                 ++dst;
  1221                 break;
  1222             }
  1223             src += srcskip;
  1224             dst += dstskip;
  1225         }
  1226     }
  1227 #endif /* USE_DUFFS_LOOP */
  1228 }
  1229 
  1230 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1231 #define RGB888_RGB565(dst, src) { \
  1232     *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
  1233                                 (((*src)&0x0000FC00)>>5)| \
  1234                                 (((*src)&0x000000F8)>>3)); \
  1235 }
  1236 #define RGB888_RGB565_TWO(dst, src) { \
  1237     *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1238                          (((src[HI])&0x0000FC00)>>5)| \
  1239                          (((src[HI])&0x000000F8)>>3))<<16)| \
  1240                          (((src[LO])&0x00F80000)>>8)| \
  1241                          (((src[LO])&0x0000FC00)>>5)| \
  1242                          (((src[LO])&0x000000F8)>>3); \
  1243 }
  1244 static void
  1245 Blit_RGB888_RGB565(SDL_BlitInfo * info)
  1246 {
  1247 #ifndef USE_DUFFS_LOOP
  1248     int c;
  1249 #endif
  1250     int width, height;
  1251     Uint32 *src;
  1252     Uint16 *dst;
  1253     int srcskip, dstskip;
  1254 
  1255     /* Set up some basic variables */
  1256     width = info->dst_w;
  1257     height = info->dst_h;
  1258     src = (Uint32 *) info->src;
  1259     srcskip = info->src_skip / 4;
  1260     dst = (Uint16 *) info->dst;
  1261     dstskip = info->dst_skip / 2;
  1262 
  1263 #ifdef USE_DUFFS_LOOP
  1264     while (height--) {
  1265         /* *INDENT-OFF* */
  1266         DUFFS_LOOP(
  1267             RGB888_RGB565(dst, src);
  1268             ++src;
  1269             ++dst;
  1270         , width);
  1271         /* *INDENT-ON* */
  1272         src += srcskip;
  1273         dst += dstskip;
  1274     }
  1275 #else
  1276     /* Memory align at 4-byte boundary, if necessary */
  1277     if ((long) dst & 0x03) {
  1278         /* Don't do anything if width is 0 */
  1279         if (width == 0) {
  1280             return;
  1281         }
  1282         --width;
  1283 
  1284         while (height--) {
  1285             /* Perform copy alignment */
  1286             RGB888_RGB565(dst, src);
  1287             ++src;
  1288             ++dst;
  1289 
  1290             /* Copy in 4 pixel chunks */
  1291             for (c = width / 4; c; --c) {
  1292                 RGB888_RGB565_TWO(dst, src);
  1293                 src += 2;
  1294                 dst += 2;
  1295                 RGB888_RGB565_TWO(dst, src);
  1296                 src += 2;
  1297                 dst += 2;
  1298             }
  1299             /* Get any leftovers */
  1300             switch (width & 3) {
  1301             case 3:
  1302                 RGB888_RGB565(dst, src);
  1303                 ++src;
  1304                 ++dst;
  1305             case 2:
  1306                 RGB888_RGB565_TWO(dst, src);
  1307                 src += 2;
  1308                 dst += 2;
  1309                 break;
  1310             case 1:
  1311                 RGB888_RGB565(dst, src);
  1312                 ++src;
  1313                 ++dst;
  1314                 break;
  1315             }
  1316             src += srcskip;
  1317             dst += dstskip;
  1318         }
  1319     } else {
  1320         while (height--) {
  1321             /* Copy in 4 pixel chunks */
  1322             for (c = width / 4; c; --c) {
  1323                 RGB888_RGB565_TWO(dst, src);
  1324                 src += 2;
  1325                 dst += 2;
  1326                 RGB888_RGB565_TWO(dst, src);
  1327                 src += 2;
  1328                 dst += 2;
  1329             }
  1330             /* Get any leftovers */
  1331             switch (width & 3) {
  1332             case 3:
  1333                 RGB888_RGB565(dst, src);
  1334                 ++src;
  1335                 ++dst;
  1336             case 2:
  1337                 RGB888_RGB565_TWO(dst, src);
  1338                 src += 2;
  1339                 dst += 2;
  1340                 break;
  1341             case 1:
  1342                 RGB888_RGB565(dst, src);
  1343                 ++src;
  1344                 ++dst;
  1345                 break;
  1346             }
  1347             src += srcskip;
  1348             dst += dstskip;
  1349         }
  1350     }
  1351 #endif /* USE_DUFFS_LOOP */
  1352 }
  1353 
  1354 
  1355 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1356 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1357 static void
  1358 Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
  1359 {
  1360 #ifndef USE_DUFFS_LOOP
  1361     int c;
  1362 #endif
  1363     int width, height;
  1364     Uint8 *src;
  1365     Uint32 *dst;
  1366     int srcskip, dstskip;
  1367 
  1368     /* Set up some basic variables */
  1369     width = info->dst_w;
  1370     height = info->dst_h;
  1371     src = (Uint8 *) info->src;
  1372     srcskip = info->src_skip;
  1373     dst = (Uint32 *) info->dst;
  1374     dstskip = info->dst_skip / 4;
  1375 
  1376 #ifdef USE_DUFFS_LOOP
  1377     while (height--) {
  1378         /* *INDENT-OFF* */
  1379         DUFFS_LOOP(
  1380         {
  1381             *dst++ = RGB565_32(dst, src, map);
  1382             src += 2;
  1383         },
  1384         width);
  1385         /* *INDENT-ON* */
  1386         src += srcskip;
  1387         dst += dstskip;
  1388     }
  1389 #else
  1390     while (height--) {
  1391         /* Copy in 4 pixel chunks */
  1392         for (c = width / 4; c; --c) {
  1393             *dst++ = RGB565_32(dst, src, map);
  1394             src += 2;
  1395             *dst++ = RGB565_32(dst, src, map);
  1396             src += 2;
  1397             *dst++ = RGB565_32(dst, src, map);
  1398             src += 2;
  1399             *dst++ = RGB565_32(dst, src, map);
  1400             src += 2;
  1401         }
  1402         /* Get any leftovers */
  1403         switch (width & 3) {
  1404         case 3:
  1405             *dst++ = RGB565_32(dst, src, map);
  1406             src += 2;
  1407         case 2:
  1408             *dst++ = RGB565_32(dst, src, map);
  1409             src += 2;
  1410         case 1:
  1411             *dst++ = RGB565_32(dst, src, map);
  1412             src += 2;
  1413             break;
  1414         }
  1415         src += srcskip;
  1416         dst += dstskip;
  1417     }
  1418 #endif /* USE_DUFFS_LOOP */
  1419 }
  1420 
  1421 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1422 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1423     0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1424     0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1425     0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1426     0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1427     0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1428     0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1429     0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1430     0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1431     0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1432     0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1433     0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1434     0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1435     0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1436     0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1437     0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1438     0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1439     0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1440     0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1441     0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1442     0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1443     0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1444     0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1445     0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1446     0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1447     0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1448     0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1449     0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1450     0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1451     0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1452     0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1453     0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1454     0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1455     0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1456     0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1457     0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1458     0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1459     0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1460     0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1461     0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1462     0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1463     0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1464     0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1465     0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1466     0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1467     0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1468     0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1469     0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1470     0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1471     0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1472     0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1473     0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1474     0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1475     0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1476     0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1477     0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1478     0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1479     0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1480     0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1481     0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1482     0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1483     0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1484     0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1485     0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1486     0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1487     0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1488     0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1489     0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1490     0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1491     0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1492     0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1493     0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1494     0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1495     0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1496     0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1497     0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1498     0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1499     0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1500     0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1501     0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1502     0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1503     0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1504     0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1505     0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1506     0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1507     0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1508     0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1509     0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1510     0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1511     0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1512     0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1513     0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1514     0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1515     0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1516     0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1517     0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1518     0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1519     0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1520     0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1521     0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1522     0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1523     0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1524     0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1525     0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1526     0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1527     0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1528     0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1529     0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1530     0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1531     0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1532     0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1533     0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1534     0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1535     0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1536     0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1537     0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1538     0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1539     0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1540     0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1541     0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1542     0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1543     0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1544     0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1545     0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1546     0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1547     0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1548     0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1549     0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1550     0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1551 };
  1552 
  1553 static void
  1554 Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
  1555 {
  1556     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1557 }
  1558 
  1559 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1560 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1561     0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1562     0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1563     0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1564     0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1565     0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1566     0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1567     0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1568     0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1569     0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1570     0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1571     0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1572     0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1573     0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1574     0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1575     0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1576     0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1577     0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1578     0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1579     0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1580     0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1581     0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1582     0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1583     0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1584     0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1585     0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1586     0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1587     0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1588     0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1589     0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1590     0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1591     0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1592     0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1593     0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1594     0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1595     0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1596     0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1597     0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1598     0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1599     0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1600     0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1601     0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1602     0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1603     0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1604     0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1605     0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1606     0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1607     0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1608     0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1609     0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1610     0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1611     0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1612     0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1613     0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1614     0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1615     0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1616     0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1617     0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1618     0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1619     0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1620     0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1621     0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1622     0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1623     0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1624     0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1625     0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1626     0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1627     0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1628     0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1629     0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1630     0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1631     0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1632     0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1633     0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1634     0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1635     0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1636     0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1637     0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1638     0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1639     0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1640     0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1641     0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1642     0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1643     0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1644     0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1645     0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1646     0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1647     0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1648     0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1649     0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1650     0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1651     0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1652     0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1653     0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1654     0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1655     0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1656     0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1657     0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1658     0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1659     0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1660     0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1661     0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1662     0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1663     0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1664     0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1665     0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1666     0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1667     0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1668     0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1669     0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1670     0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1671     0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1672     0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1673     0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1674     0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1675     0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1676     0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1677     0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1678     0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1679     0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1680     0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1681     0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1682     0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1683     0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1684     0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1685     0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1686     0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1687     0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1688     0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1689 };
  1690 
  1691 static void
  1692 Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
  1693 {
  1694     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1695 }
  1696 
  1697 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1698 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1699     0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1700     0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1701     0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1702     0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1703     0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1704     0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1705     0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1706     0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1707     0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1708     0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1709     0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1710     0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1711     0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1712     0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1713     0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1714     0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1715     0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1716     0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1717     0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1718     0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1719     0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1720     0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1721     0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1722     0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1723     0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1724     0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1725     0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1726     0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1727     0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1728     0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1729     0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1730     0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1731     0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1732     0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1733     0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1734     0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1735     0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1736     0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1737     0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1738     0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1739     0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1740     0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1741     0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1742     0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1743     0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1744     0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1745     0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1746     0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1747     0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1748     0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1749     0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1750     0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1751     0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1752     0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1753     0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1754     0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1755     0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1756     0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1757     0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1758     0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1759     0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1760     0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1761     0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1762     0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1763     0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1764     0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1765     0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1766     0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1767     0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1768     0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1769     0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1770     0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1771     0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1772     0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1773     0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1774     0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1775     0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1776     0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1777     0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1778     0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1779     0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1780     0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1781     0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1782     0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1783     0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1784     0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1785     0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1786     0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1787     0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1788     0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1789     0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1790     0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1791     0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1792     0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1793     0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1794     0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1795     0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1796     0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1797     0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1798     0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1799     0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1800     0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1801     0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1802     0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1803     0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1804     0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1805     0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1806     0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1807     0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1808     0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1809     0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1810     0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1811     0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1812     0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1813     0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1814     0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1815     0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1816     0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1817     0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1818     0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1819     0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1820     0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1821     0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1822     0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1823     0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1824     0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1825     0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1826     0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1827 };
  1828 
  1829 static void
  1830 Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
  1831 {
  1832     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1833 }
  1834 
  1835 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1836 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1837     0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1838     0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1839     0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1840     0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1841     0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1842     0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1843     0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1844     0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1845     0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1846     0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1847     0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1848     0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1849     0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1850     0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1851     0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1852     0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1853     0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1854     0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1855     0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1856     0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1857     0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1858     0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1859     0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1860     0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1861     0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1862     0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1863     0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1864     0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1865     0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1866     0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1867     0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1868     0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1869     0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1870     0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1871     0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1872     0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1873     0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1874     0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1875     0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1876     0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1877     0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1878     0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1879     0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1880     0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1881     0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1882     0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1883     0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1884     0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1885     0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1886     0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1887     0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1888     0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1889     0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1890     0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1891     0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1892     0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1893     0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1894     0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1895     0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1896     0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1897     0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1898     0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1899     0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1900     0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1901     0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1902     0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1903     0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1904     0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1905     0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1906     0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1907     0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1908     0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1909     0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1910     0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1911     0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1912     0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1913     0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1914     0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1915     0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1916     0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1917     0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1918     0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1919     0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1920     0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1921     0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1922     0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1923     0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1924     0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1925     0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1926     0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1927     0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1928     0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1929     0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1930     0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1931     0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1932     0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1933     0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1934     0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1935     0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1936     0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1937     0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1938     0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1939     0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1940     0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1941     0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1942     0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1943     0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1944     0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1945     0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1946     0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1947     0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1948     0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1949     0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1950     0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1951     0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1952     0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1953     0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1954     0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1955     0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1956     0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1957     0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1958     0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1959     0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1960     0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1961     0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1962     0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1963     0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1964     0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1965 };
  1966 
  1967 static void
  1968 Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
  1969 {
  1970     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1971 }
  1972 
  1973 static void
  1974 BlitNto1(SDL_BlitInfo * info)
  1975 {
  1976 #ifndef USE_DUFFS_LOOP
  1977     int c;
  1978 #endif
  1979     int width, height;
  1980     Uint8 *src;
  1981     const Uint8 *map;
  1982     Uint8 *dst;
  1983     int srcskip, dstskip;
  1984     int srcbpp;
  1985     Uint32 Pixel;
  1986     int sR, sG, sB;
  1987     SDL_PixelFormat *srcfmt;
  1988 
  1989     /* Set up some basic variables */
  1990     width = info->dst_w;
  1991     height = info->dst_h;
  1992     src = info->src;
  1993     srcskip = info->src_skip;
  1994     dst = info->dst;
  1995     dstskip = info->dst_skip;
  1996     map = info->table;
  1997     srcfmt = info->src_fmt;
  1998     srcbpp = srcfmt->BytesPerPixel;
  1999 
  2000     if (map == NULL) {
  2001         while (height--) {
  2002 #ifdef USE_DUFFS_LOOP
  2003             /* *INDENT-OFF* */
  2004             DUFFS_LOOP(
  2005                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2006                                 sR, sG, sB);
  2007                 if ( 1 ) {
  2008                     /* Pack RGB into 8bit pixel */
  2009                     *dst = ((sR>>5)<<(3+2))|
  2010                             ((sG>>5)<<(2)) |
  2011                             ((sB>>6)<<(0)) ;
  2012                 }
  2013                 dst++;
  2014                 src += srcbpp;
  2015             , width);
  2016             /* *INDENT-ON* */
  2017 #else
  2018             for (c = width; c; --c) {
  2019                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2020                 if (1) {
  2021                     /* Pack RGB into 8bit pixel */
  2022                     *dst = ((sR >> 5) << (3 + 2)) |
  2023                         ((sG >> 5) << (2)) | ((sB >> 6) << (0));
  2024                 }
  2025                 dst++;
  2026                 src += srcbpp;
  2027             }
  2028 #endif
  2029             src += srcskip;
  2030             dst += dstskip;
  2031         }
  2032     } else {
  2033         while (height--) {
  2034 #ifdef USE_DUFFS_LOOP
  2035             /* *INDENT-OFF* */
  2036             DUFFS_LOOP(
  2037                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2038                                 sR, sG, sB);
  2039                 if ( 1 ) {
  2040                     /* Pack RGB into 8bit pixel */
  2041                     *dst = map[((sR>>5)<<(3+2))|
  2042                            ((sG>>5)<<(2))  |
  2043                            ((sB>>6)<<(0))  ];
  2044                 }
  2045                 dst++;
  2046                 src += srcbpp;
  2047             , width);
  2048             /* *INDENT-ON* */
  2049 #else
  2050             for (c = width; c; --c) {
  2051                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2052                 if (1) {
  2053                     /* Pack RGB into 8bit pixel */
  2054                     *dst = map[((sR >> 5) << (3 + 2)) |
  2055                                ((sG >> 5) << (2)) | ((sB >> 6) << (0))];
  2056                 }
  2057                 dst++;
  2058                 src += srcbpp;
  2059             }
  2060 #endif /* USE_DUFFS_LOOP */
  2061             src += srcskip;
  2062             dst += dstskip;
  2063         }
  2064     }
  2065 }
  2066 
  2067 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  2068 static void
  2069 Blit4to4MaskAlpha(SDL_BlitInfo * info)
  2070 {
  2071     int width = info->dst_w;
  2072     int height = info->dst_h;
  2073     Uint32 *src = (Uint32 *) info->src;
  2074     int srcskip = info->src_skip;
  2075     Uint32 *dst = (Uint32 *) info->dst;
  2076     int dstskip = info->dst_skip;
  2077     SDL_PixelFormat *srcfmt = info->src_fmt;
  2078     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2079 
  2080     if (dstfmt->Amask) {
  2081         /* RGB->RGBA, SET_ALPHA */
  2082         Uint32 mask = (info->a >> dstfmt->Aloss) << dstfmt->Ashift;
  2083 
  2084         while (height--) {
  2085             /* *INDENT-OFF* */
  2086             DUFFS_LOOP(
  2087             {
  2088                 *dst = *src | mask;
  2089                 ++dst;
  2090                 ++src;
  2091             },
  2092             width);
  2093             /* *INDENT-ON* */
  2094             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2095             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2096         }
  2097     } else {
  2098         /* RGBA->RGB, NO_ALPHA */
  2099         Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2100 
  2101         while (height--) {
  2102             /* *INDENT-OFF* */
  2103             DUFFS_LOOP(
  2104             {
  2105                 *dst = *src & mask;
  2106                 ++dst;
  2107                 ++src;
  2108             },
  2109             width);
  2110             /* *INDENT-ON* */
  2111             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2112             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2113         }
  2114     }
  2115 }
  2116 
  2117 /* blits 32 bit RGBA<->RGBA with both surfaces having the same R,G,B,A fields */
  2118 static void
  2119 Blit4to4CopyAlpha(SDL_BlitInfo * info)
  2120 {
  2121     int width = info->dst_w;
  2122     int height = info->dst_h;
  2123     Uint32 *src = (Uint32 *) info->src;
  2124     int srcskip = info->src_skip;
  2125     Uint32 *dst = (Uint32 *) info->dst;
  2126     int dstskip = info->dst_skip;
  2127 
  2128     /* RGBA->RGBA, COPY_ALPHA */
  2129     while (height--) {
  2130         /* *INDENT-OFF* */
  2131         DUFFS_LOOP(
  2132         {
  2133             *dst = *src;
  2134             ++dst;
  2135             ++src;
  2136         },
  2137         width);
  2138         /* *INDENT-ON* */
  2139         src = (Uint32 *) ((Uint8 *) src + srcskip);
  2140         dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2141     }
  2142 }
  2143 
  2144 static void
  2145 BlitNtoN(SDL_BlitInfo * info)
  2146 {
  2147     int width = info->dst_w;
  2148     int height = info->dst_h;
  2149     Uint8 *src = info->src;
  2150     int srcskip = info->src_skip;
  2151     Uint8 *dst = info->dst;
  2152     int dstskip = info->dst_skip;
  2153     SDL_PixelFormat *srcfmt = info->src_fmt;
  2154     int srcbpp = srcfmt->BytesPerPixel;
  2155     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2156     int dstbpp = dstfmt->BytesPerPixel;
  2157     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2158 
  2159     while (height--) {
  2160         /* *INDENT-OFF* */
  2161         DUFFS_LOOP(
  2162         {
  2163             Uint32 Pixel;
  2164             unsigned sR;
  2165             unsigned sG;
  2166             unsigned sB;
  2167             DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2168             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2169             dst += dstbpp;
  2170             src += srcbpp;
  2171         },
  2172         width);
  2173         /* *INDENT-ON* */
  2174         src += srcskip;
  2175         dst += dstskip;
  2176     }
  2177 }
  2178 
  2179 static void
  2180 BlitNtoNCopyAlpha(SDL_BlitInfo * info)
  2181 {
  2182     int width = info->dst_w;
  2183     int height = info->dst_h;
  2184     Uint8 *src = info->src;
  2185     int srcskip = info->src_skip;
  2186     Uint8 *dst = info->dst;
  2187     int dstskip = info->dst_skip;
  2188     SDL_PixelFormat *srcfmt = info->src_fmt;
  2189     int srcbpp = srcfmt->BytesPerPixel;
  2190     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2191     int dstbpp = dstfmt->BytesPerPixel;
  2192     int c;
  2193 
  2194     while (height--) {
  2195         for (c = width; c; --c) {
  2196             Uint32 Pixel;
  2197             unsigned sR, sG, sB, sA;
  2198             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2199             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2200             dst += dstbpp;
  2201             src += srcbpp;
  2202         }
  2203         src += srcskip;
  2204         dst += dstskip;
  2205     }
  2206 }
  2207 
  2208 static void
  2209 BlitNto1Key(SDL_BlitInfo * info)
  2210 {
  2211     int width = info->dst_w;
  2212     int height = info->dst_h;
  2213     Uint8 *src = info->src;
  2214     int srcskip = info->src_skip;
  2215     Uint8 *dst = info->dst;
  2216     int dstskip = info->dst_skip;
  2217     SDL_PixelFormat *srcfmt = info->src_fmt;
  2218     const Uint8 *palmap = info->table;
  2219     Uint32 ckey = info->colorkey;
  2220     Uint32 rgbmask = ~srcfmt->Amask;
  2221     int srcbpp;
  2222     Uint32 Pixel;
  2223     unsigned sR, sG, sB;
  2224 
  2225     /* Set up some basic variables */
  2226     srcbpp = srcfmt->BytesPerPixel;
  2227     ckey &= rgbmask;
  2228 
  2229     if (palmap == NULL) {
  2230         while (height--) {
  2231             /* *INDENT-OFF* */
  2232             DUFFS_LOOP(
  2233             {
  2234                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2235                                 sR, sG, sB);
  2236                 if ( (Pixel & rgbmask) != ckey ) {
  2237                     /* Pack RGB into 8bit pixel */
  2238                     *dst = (Uint8)(((sR>>5)<<(3+2))|
  2239                                    ((sG>>5)<<(2)) |
  2240                                    ((sB>>6)<<(0)));
  2241                 }
  2242                 dst++;
  2243                 src += srcbpp;
  2244             },
  2245             width);
  2246             /* *INDENT-ON* */
  2247             src += srcskip;
  2248             dst += dstskip;
  2249         }
  2250     } else {
  2251         while (height--) {
  2252             /* *INDENT-OFF* */
  2253             DUFFS_LOOP(
  2254             {
  2255                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2256                                 sR, sG, sB);
  2257                 if ( (Pixel & rgbmask) != ckey ) {
  2258                     /* Pack RGB into 8bit pixel */
  2259                     *dst = (Uint8)palmap[((sR>>5)<<(3+2))|
  2260                                          ((sG>>5)<<(2))  |
  2261                                          ((sB>>6)<<(0))  ];
  2262                 }
  2263                 dst++;
  2264                 src += srcbpp;
  2265             },
  2266             width);
  2267             /* *INDENT-ON* */
  2268             src += srcskip;
  2269             dst += dstskip;
  2270         }
  2271     }
  2272 }
  2273 
  2274 static void
  2275 Blit2to2Key(SDL_BlitInfo * info)
  2276 {
  2277     int width = info->dst_w;
  2278     int height = info->dst_h;
  2279     Uint16 *srcp = (Uint16 *) info->src;
  2280     int srcskip = info->src_skip;
  2281     Uint16 *dstp = (Uint16 *) info->dst;
  2282     int dstskip = info->dst_skip;
  2283     Uint32 ckey = info->colorkey;
  2284     Uint32 rgbmask = ~info->src_fmt->Amask;
  2285 
  2286     /* Set up some basic variables */
  2287     srcskip /= 2;
  2288     dstskip /= 2;
  2289     ckey &= rgbmask;
  2290 
  2291     while (height--) {
  2292         /* *INDENT-OFF* */
  2293         DUFFS_LOOP(
  2294         {
  2295             if ( (*srcp & rgbmask) != ckey ) {
  2296                 *dstp = *srcp;
  2297             }
  2298             dstp++;
  2299             srcp++;
  2300         },
  2301         width);
  2302         /* *INDENT-ON* */
  2303         srcp += srcskip;
  2304         dstp += dstskip;
  2305     }
  2306 }
  2307 
  2308 static void
  2309 BlitNtoNKey(SDL_BlitInfo * info)
  2310 {
  2311     int width = info->dst_w;
  2312     int height = info->dst_h;
  2313     Uint8 *src = info->src;
  2314     int srcskip = info->src_skip;
  2315     Uint8 *dst = info->dst;
  2316     int dstskip = info->dst_skip;
  2317     Uint32 ckey = info->colorkey;
  2318     SDL_PixelFormat *srcfmt = info->src_fmt;
  2319     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2320     int srcbpp = srcfmt->BytesPerPixel;
  2321     int dstbpp = dstfmt->BytesPerPixel;
  2322     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2323     Uint32 rgbmask = ~srcfmt->Amask;
  2324 
  2325     /* Set up some basic variables */
  2326     ckey &= rgbmask;
  2327 
  2328     while (height--) {
  2329         /* *INDENT-OFF* */
  2330         DUFFS_LOOP(
  2331         {
  2332             Uint32 Pixel;
  2333             unsigned sR;
  2334             unsigned sG;
  2335             unsigned sB;
  2336             RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2337             if ( (Pixel & rgbmask) != ckey ) {
  2338                 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2339                 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2340             }
  2341             dst += dstbpp;
  2342             src += srcbpp;
  2343         },
  2344         width);
  2345         /* *INDENT-ON* */
  2346         src += srcskip;
  2347         dst += dstskip;
  2348     }
  2349 }
  2350 
  2351 static void
  2352 BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
  2353 {
  2354     int width = info->dst_w;
  2355     int height = info->dst_h;
  2356     Uint8 *src = info->src;
  2357     int srcskip = info->src_skip;
  2358     Uint8 *dst = info->dst;
  2359     int dstskip = info->dst_skip;
  2360     Uint32 ckey = info->colorkey;
  2361     SDL_PixelFormat *srcfmt = info->src_fmt;
  2362     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2363     Uint32 rgbmask = ~srcfmt->Amask;
  2364 
  2365     Uint8 srcbpp;
  2366     Uint8 dstbpp;
  2367     Uint32 Pixel;
  2368     unsigned sR, sG, sB, sA;
  2369 
  2370     /* Set up some basic variables */
  2371     srcbpp = srcfmt->BytesPerPixel;
  2372     dstbpp = dstfmt->BytesPerPixel;
  2373     ckey &= rgbmask;
  2374 
  2375     while (height--) {
  2376         /* *INDENT-OFF* */
  2377         DUFFS_LOOP(
  2378         {
  2379             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2380             if ( (Pixel & rgbmask) != ckey ) {
  2381                   ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2382             }
  2383             dst += dstbpp;
  2384             src += srcbpp;
  2385         },
  2386         width);
  2387         /* *INDENT-ON* */
  2388         src += srcskip;
  2389         dst += dstskip;
  2390     }
  2391 }
  2392 
  2393 /* Special optimized blit for ARGB 2-10-10-10 --> RGBA */
  2394 static void
  2395 Blit2101010toN(SDL_BlitInfo * info)
  2396 {
  2397     int width = info->dst_w;
  2398     int height = info->dst_h;
  2399     Uint8 *src = info->src;
  2400     int srcskip = info->src_skip;
  2401     Uint8 *dst = info->dst;
  2402     int dstskip = info->dst_skip;
  2403     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2404     int dstbpp = dstfmt->BytesPerPixel;
  2405     Uint32 Pixel;
  2406     unsigned sR, sG, sB, sA;
  2407 
  2408     while (height--) {
  2409         /* *INDENT-OFF* */
  2410         DUFFS_LOOP(
  2411         {
  2412             Pixel = *(Uint32 *)src;
  2413             RGBA_FROM_ARGB2101010(Pixel, sR, sG, sB, sA);
  2414             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2415             dst += dstbpp;
  2416             src += 4;
  2417         },
  2418         width);
  2419         /* *INDENT-ON* */
  2420         src += srcskip;
  2421         dst += dstskip;
  2422     }
  2423 }
  2424 
  2425 /* Special optimized blit for RGBA --> ARGB 2-10-10-10 */
  2426 static void
  2427 BlitNto2101010(SDL_BlitInfo * info)
  2428 {
  2429     int width = info->dst_w;
  2430     int height = info->dst_h;
  2431     Uint8 *src = info->src;
  2432     int srcskip = info->src_skip;
  2433     Uint8 *dst = info->dst;
  2434     int dstskip = info->dst_skip;
  2435     SDL_PixelFormat *srcfmt = info->src_fmt;
  2436     int srcbpp = srcfmt->BytesPerPixel;
  2437     Uint32 Pixel;
  2438     unsigned sR, sG, sB, sA;
  2439 
  2440     while (height--) {
  2441         /* *INDENT-OFF* */
  2442         DUFFS_LOOP(
  2443         {
  2444             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2445             ARGB2101010_FROM_RGBA(Pixel, sR, sG, sB, sA);
  2446             *(Uint32 *)dst = Pixel;
  2447             dst += 4;
  2448             src += srcbpp;
  2449         },
  2450         width);
  2451         /* *INDENT-ON* */
  2452         src += srcskip;
  2453         dst += dstskip;
  2454     }
  2455 }
  2456 
  2457 /* Normal N to N optimized blitters */
  2458 struct blit_table
  2459 {
  2460     Uint32 srcR, srcG, srcB;
  2461     int dstbpp;
  2462     Uint32 dstR, dstG, dstB;
  2463     Uint32 blit_features;
  2464     SDL_BlitFunc blitfunc;
  2465     enum
  2466     { NO_ALPHA = 1, SET_ALPHA = 2, COPY_ALPHA = 4 } alpha;
  2467 };
  2468 static const struct blit_table normal_blit_1[] = {
  2469     /* Default for 8-bit RGB source, never optimized */
  2470     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2471 };
  2472 
  2473 static const struct blit_table normal_blit_2[] = {
  2474 #if SDL_ALTIVEC_BLITTERS
  2475     /* has-altivec */
  2476     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
  2477      2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2478     {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
  2479      2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2480 #endif
  2481     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
  2482      0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2483     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
  2484      0, Blit_RGB565_ABGR8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2485     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000, 0x0000FF00,
  2486      0, Blit_RGB565_RGBA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2487     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000,
  2488      0, Blit_RGB565_BGRA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2489 
  2490     /* Default for 16-bit RGB source, used if no other blitter matches */
  2491     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2492 };
  2493 
  2494 static const struct blit_table normal_blit_3[] = {
  2495     /* Default for 24-bit RGB source, never optimized */
  2496     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2497 };
  2498 
  2499 static const struct blit_table normal_blit_4[] = {
  2500 #if SDL_ALTIVEC_BLITTERS
  2501     /* has-altivec | dont-use-prefetch */
  2502     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
  2503      6, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2504     /* has-altivec */
  2505     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
  2506      2, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2507     /* has-altivec */
  2508     {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
  2509      2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
  2510 #endif
  2511     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
  2512      0, Blit_RGB888_RGB565, NO_ALPHA},
  2513     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
  2514      0, Blit_RGB888_RGB555, NO_ALPHA},
  2515     /* Default for 32-bit RGB source, used if no other blitter matches */
  2516     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2517 };
  2518 
  2519 static const struct blit_table *const normal_blit[] = {
  2520     normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2521 };
  2522 
  2523 /* Mask matches table, or table entry is zero */
  2524 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2525 
  2526 SDL_BlitFunc
  2527 SDL_CalculateBlitN(SDL_Surface * surface)
  2528 {
  2529     SDL_PixelFormat *srcfmt;
  2530     SDL_PixelFormat *dstfmt;
  2531     const struct blit_table *table;
  2532     int which;
  2533     SDL_BlitFunc blitfun;
  2534 
  2535     /* Set up data for choosing the blit */
  2536     srcfmt = surface->format;
  2537     dstfmt = surface->map->dst->format;
  2538 
  2539     /* We don't support destinations less than 8-bits */
  2540     if (dstfmt->BitsPerPixel < 8) {
  2541         return (NULL);
  2542     }
  2543 
  2544     switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
  2545     case 0:
  2546         blitfun = NULL;
  2547         if (dstfmt->BitsPerPixel == 8) {
  2548             if ((srcfmt->BytesPerPixel == 4) &&
  2549                 (srcfmt->Rmask == 0x00FF0000) &&
  2550                 (srcfmt->Gmask == 0x0000FF00) &&
  2551                 (srcfmt->Bmask == 0x000000FF)) {
  2552                 blitfun = Blit_RGB888_index8;
  2553             } else if ((srcfmt->BytesPerPixel == 4) &&
  2554                 (srcfmt->Rmask == 0x3FF00000) &&
  2555                 (srcfmt->Gmask == 0x000FFC00) &&
  2556                 (srcfmt->Bmask == 0x000003FF)) {
  2557                 blitfun = Blit_RGB101010_index8;
  2558             } else {
  2559                 blitfun = BlitNto1;
  2560             }
  2561         } else {
  2562             /* Now the meat, choose the blitter we want */
  2563             int a_need = NO_ALPHA;
  2564             if (dstfmt->Amask)
  2565                 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2566             table = normal_blit[srcfmt->BytesPerPixel - 1];
  2567             for (which = 0; table[which].dstbpp; ++which) {
  2568                 if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2569                     MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2570                     MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2571                     MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2572                     MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2573                     MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2574                     dstfmt->BytesPerPixel == table[which].dstbpp &&
  2575                     (a_need & table[which].alpha) == a_need &&
  2576                     ((table[which].blit_features & GetBlitFeatures()) ==
  2577                      table[which].blit_features))
  2578                     break;
  2579             }
  2580             blitfun = table[which].blitfunc;
  2581 
  2582             if (blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2583                 if (srcfmt->format == SDL_PIXELFORMAT_ARGB2101010) {
  2584                     blitfun = Blit2101010toN;
  2585                 } else if (dstfmt->format == SDL_PIXELFORMAT_ARGB2101010) {
  2586                     blitfun = BlitNto2101010;
  2587                 } else if (srcfmt->BytesPerPixel == 4 &&
  2588                             dstfmt->BytesPerPixel == 4 &&
  2589                             srcfmt->Rmask == dstfmt->Rmask &&
  2590                             srcfmt->Gmask == dstfmt->Gmask &&
  2591                             srcfmt->Bmask == dstfmt->Bmask) {
  2592                     if (a_need == COPY_ALPHA) {
  2593                         if (srcfmt->Amask == dstfmt->Amask) {
  2594                             /* Fastpath C fallback: 32bit RGBA<->RGBA blit with matching RGBA */
  2595                             blitfun = Blit4to4CopyAlpha;
  2596                         } else {
  2597                             blitfun = BlitNtoNCopyAlpha;
  2598                         }
  2599                     } else {
  2600                         /* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2601                         blitfun = Blit4to4MaskAlpha;
  2602                     }
  2603                 } else if (a_need == COPY_ALPHA) {
  2604                     blitfun = BlitNtoNCopyAlpha;
  2605                 }
  2606             }
  2607         }
  2608         return (blitfun);
  2609 
  2610     case SDL_COPY_COLORKEY:
  2611         /* colorkey blit: Here we don't have too many options, mostly
  2612            because RLE is the preferred fast way to deal with this.
  2613            If a particular case turns out to be useful we'll add it. */
  2614 
  2615         if (srcfmt->BytesPerPixel == 2 && surface->map->identity)
  2616             return Blit2to2Key;
  2617         else if (dstfmt->BytesPerPixel == 1)
  2618             return BlitNto1Key;
  2619         else {
  2620 #if SDL_ALTIVEC_BLITTERS
  2621             if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
  2622                 && SDL_HasAltiVec()) {
  2623                 return Blit32to32KeyAltivec;
  2624             } else
  2625 #endif
  2626             if (srcfmt->Amask && dstfmt->Amask) {
  2627                 return BlitNtoNKeyCopyAlpha;
  2628             } else {
  2629                 return BlitNtoNKey;
  2630             }
  2631         }
  2632     }
  2633 
  2634     return NULL;
  2635 }
  2636 
  2637 /* vi: set ts=4 sw=4 expandtab: */