src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Sat, 19 Oct 2019 01:54:02 -0700
changeset 13150 29acbbbb41b2
parent 13064 9aa7aea27dec
permissions -rw-r--r--
Don't try to use the Xbox HID protocol with the NVIDIA Shield controllers
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2019 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../SDL_internal.h"
    22 
    23 #include "SDL_video.h"
    24 #include "SDL_endian.h"
    25 #include "SDL_cpuinfo.h"
    26 #include "SDL_blit.h"
    27 
    28 #include "SDL_assert.h"
    29 
    30 /* General optimized routines that write char by char */
    31 #define HAVE_FAST_WRITE_INT8 1
    32 
    33 /* On some CPU, it's slower than combining and write a word */
    34 #if defined(__MIPS__) 
    35 #  undef  HAVE_FAST_WRITE_INT8
    36 #  define HAVE_FAST_WRITE_INT8 0
    37 #endif
    38 
    39 /* Functions to blit from N-bit surfaces to other surfaces */
    40 
    41 #if SDL_ALTIVEC_BLITTERS
    42 #ifdef HAVE_ALTIVEC_H
    43 #include <altivec.h>
    44 #endif
    45 #ifdef __MACOSX__
    46 #include <sys/sysctl.h>
    47 static size_t
    48 GetL3CacheSize(void)
    49 {
    50     const char key[] = "hw.l3cachesize";
    51     u_int64_t result = 0;
    52     size_t typeSize = sizeof(result);
    53 
    54 
    55     int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
    56     if (0 != err)
    57         return 0;
    58 
    59     return result;
    60 }
    61 #else
    62 static size_t
    63 GetL3CacheSize(void)
    64 {
    65     /* XXX: Just guess G4 */
    66     return 2097152;
    67 }
    68 #endif /* __MACOSX__ */
    69 
    70 #if (defined(__MACOSX__) && (__GNUC__ < 4))
    71 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    72         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    73 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    74         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    75 #else
    76 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    77         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    78 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    79         (vector unsigned short) { a,b,c,d,e,f,g,h }
    80 #endif
    81 
    82 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    83 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    84                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    85                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    86                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    87                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    88 
    89 #define MAKE8888(dstfmt, r, g, b, a)  \
    90     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    91       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    92       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    93       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    94 
    95 /*
    96  * Data Stream Touch...Altivec cache prefetching.
    97  *
    98  *  Don't use this on a G5...however, the speed boost is very significant
    99  *   on a G4.
   100  */
   101 #define DST_CHAN_SRC 1
   102 #define DST_CHAN_DEST 2
   103 
   104 /* macro to set DST control word value... */
   105 #define DST_CTRL(size, count, stride) \
   106     (((size) << 24) | ((count) << 16) | (stride))
   107 
   108 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
   109     ? vec_lvsl(0, src) \
   110     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   111 
   112 /* Calculate the permute vector used for 32->32 swizzling */
   113 static vector unsigned char
   114 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
   115 {
   116     /*
   117      * We have to assume that the bits that aren't used by other
   118      *  colors is alpha, and it's one complete byte, since some formats
   119      *  leave alpha with a zero mask, but we should still swizzle the bits.
   120      */
   121     /* ARGB */
   122     const static const struct SDL_PixelFormat default_pixel_format = {
   123         0, NULL, 0, 0,
   124         {0, 0},
   125         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   126         0, 0, 0, 0,
   127         16, 8, 0, 24,
   128         0, NULL
   129     };
   130     const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
   131                                                        0x04, 0x04, 0x04, 0x04,
   132                                                        0x08, 0x08, 0x08, 0x08,
   133                                                        0x0C, 0x0C, 0x0C,
   134                                                        0x0C);
   135     vector unsigned char vswiz;
   136     vector unsigned int srcvec;
   137     Uint32 rmask, gmask, bmask, amask;
   138 
   139     if (!srcfmt) {
   140         srcfmt = &default_pixel_format;
   141     }
   142     if (!dstfmt) {
   143         dstfmt = &default_pixel_format;
   144     }
   145 
   146 #define RESHIFT(X) (3 - ((X) >> 3))
   147     rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   148     gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   149     bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   150 
   151     /* Use zero for alpha if either surface doesn't have alpha */
   152     if (dstfmt->Amask) {
   153         amask =
   154             ((srcfmt->Amask) ? RESHIFT(srcfmt->
   155                                        Ashift) : 0x10) << (dstfmt->Ashift);
   156     } else {
   157         amask =
   158             0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
   159                           0xFFFFFFFF);
   160     }
   161 #undef RESHIFT
   162 
   163     ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
   164     vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
   165     return (vswiz);
   166 }
   167 
   168 #if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
   169 /* reorder bytes for PowerPC little endian */
   170 static vector unsigned char reorder_ppc64le_vec(vector unsigned char vpermute)
   171 {
   172     /* The result vector of calc_swizzle32 reorder bytes using vec_perm.
   173        The LE transformation for vec_perm has an implicit assumption
   174        that the permutation is being used to reorder vector elements,
   175        not to reorder bytes within those elements.  
   176        Unfortunatly the result order is not the expected one for powerpc
   177        little endian when the two first vector parameters of vec_perm are
   178        not of type 'vector char'. This is because the numbering from the
   179        left for BE, and numbering from the right for LE, produces a
   180        different interpretation of what the odd and even lanes are.
   181        Refer to fedora bug 1392465
   182      */
   183 
   184     const vector unsigned char ppc64le_reorder = VECUINT8_LITERAL(
   185                                       0x01, 0x00, 0x03, 0x02,
   186                                       0x05, 0x04, 0x07, 0x06,
   187                                       0x09, 0x08, 0x0B, 0x0A,
   188                                       0x0D, 0x0C, 0x0F, 0x0E );
   189 
   190     vector unsigned char vswiz_ppc64le;
   191     vswiz_ppc64le = vec_perm(vpermute, vpermute, ppc64le_reorder);
   192     return(vswiz_ppc64le);
   193 }
   194 #endif
   195 
   196 static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
   197 static void
   198 Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
   199 {
   200     int height = info->dst_h;
   201     Uint8 *src = (Uint8 *) info->src;
   202     int srcskip = info->src_skip;
   203     Uint8 *dst = (Uint8 *) info->dst;
   204     int dstskip = info->dst_skip;
   205     SDL_PixelFormat *srcfmt = info->src_fmt;
   206     vector unsigned char valpha = vec_splat_u8(0);
   207     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   208     vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
   209                                                     0x00, 0x0a, 0x00, 0x0e,
   210                                                     0x00, 0x12, 0x00, 0x16,
   211                                                     0x00, 0x1a, 0x00, 0x1e);
   212     vector unsigned short v1 = vec_splat_u16(1);
   213     vector unsigned short v3 = vec_splat_u16(3);
   214     vector unsigned short v3f =
   215         VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
   216                           0x003f, 0x003f, 0x003f, 0x003f);
   217     vector unsigned short vfc =
   218         VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
   219                           0x00fc, 0x00fc, 0x00fc, 0x00fc);
   220     vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
   221     vf800 = vec_sl(vf800, vec_splat_u16(8));
   222 
   223     while (height--) {
   224         vector unsigned char valigner;
   225         vector unsigned char voverflow;
   226         vector unsigned char vsrc;
   227 
   228         int width = info->dst_w;
   229         int extrawidth;
   230 
   231         /* do scalar until we can align... */
   232 #define ONE_PIXEL_BLEND(condition, widthvar) \
   233         while (condition) { \
   234             Uint32 Pixel; \
   235             unsigned sR, sG, sB, sA; \
   236             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   237                           sR, sG, sB, sA); \
   238             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   239                                 ((sG << 3) & 0x000007E0) | \
   240                                 ((sB >> 3) & 0x0000001F)); \
   241             dst += 2; \
   242             src += 4; \
   243             widthvar--; \
   244         }
   245 
   246         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   247 
   248         /* After all that work, here's the vector part! */
   249         extrawidth = (width % 8);       /* trailing unaligned stores */
   250         width -= extrawidth;
   251         vsrc = vec_ld(0, src);
   252         valigner = VEC_ALIGNER(src);
   253 
   254         while (width) {
   255             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   256             vector unsigned int vsrc1, vsrc2;
   257             vector unsigned char vdst;
   258 
   259             voverflow = vec_ld(15, src);
   260             vsrc = vec_perm(vsrc, voverflow, valigner);
   261             vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   262             src += 16;
   263             vsrc = voverflow;
   264             voverflow = vec_ld(15, src);
   265             vsrc = vec_perm(vsrc, voverflow, valigner);
   266             vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   267             /* 1555 */
   268             vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
   269             vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
   270             vgpixel = vec_and(vgpixel, vfc);
   271             vgpixel = vec_sl(vgpixel, v3);
   272             vrpixel = vec_sl(vpixel, v1);
   273             vrpixel = vec_and(vrpixel, vf800);
   274             vbpixel = vec_and(vpixel, v3f);
   275             vdst =
   276                 vec_or((vector unsigned char) vrpixel,
   277                        (vector unsigned char) vgpixel);
   278             /* 565 */
   279             vdst = vec_or(vdst, (vector unsigned char) vbpixel);
   280             vec_st(vdst, 0, dst);
   281 
   282             width -= 8;
   283             src += 16;
   284             dst += 16;
   285             vsrc = voverflow;
   286         }
   287 
   288         SDL_assert(width == 0);
   289 
   290         /* do scalar until we can align... */
   291         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   292 #undef ONE_PIXEL_BLEND
   293 
   294         src += srcskip;         /* move to next row, accounting for pitch. */
   295         dst += dstskip;
   296     }
   297 
   298 
   299 }
   300 
   301 static void
   302 Blit_RGB565_32Altivec(SDL_BlitInfo * info)
   303 {
   304     int height = info->dst_h;
   305     Uint8 *src = (Uint8 *) info->src;
   306     int srcskip = info->src_skip;
   307     Uint8 *dst = (Uint8 *) info->dst;
   308     int dstskip = info->dst_skip;
   309     SDL_PixelFormat *srcfmt = info->src_fmt;
   310     SDL_PixelFormat *dstfmt = info->dst_fmt;
   311     unsigned alpha;
   312     vector unsigned char valpha;
   313     vector unsigned char vpermute;
   314     vector unsigned short vf800;
   315     vector unsigned int v8 = vec_splat_u32(8);
   316     vector unsigned int v16 = vec_add(v8, v8);
   317     vector unsigned short v2 = vec_splat_u16(2);
   318     vector unsigned short v3 = vec_splat_u16(3);
   319     /*
   320        0x10 - 0x1f is the alpha
   321        0x00 - 0x0e evens are the red
   322        0x01 - 0x0f odds are zero
   323      */
   324     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   325                                                        0x10, 0x02, 0x01, 0x01,
   326                                                        0x10, 0x04, 0x01, 0x01,
   327                                                        0x10, 0x06, 0x01,
   328                                                        0x01);
   329     vector unsigned char vredalpha2 =
   330         (vector unsigned
   331          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   332         );
   333     /*
   334        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   335        0x11 - 0x0f odds are blue
   336      */
   337     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   338                                                    0x04, 0x05, 0x06, 0x13,
   339                                                    0x08, 0x09, 0x0a, 0x15,
   340                                                    0x0c, 0x0d, 0x0e, 0x17);
   341     vector unsigned char vblue2 =
   342         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   343         );
   344     /*
   345        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   346        0x10 - 0x0e evens are green
   347      */
   348     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   349                                                     0x04, 0x05, 0x12, 0x07,
   350                                                     0x08, 0x09, 0x14, 0x0b,
   351                                                     0x0c, 0x0d, 0x16, 0x0f);
   352     vector unsigned char vgreen2 =
   353         (vector unsigned
   354          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   355         );
   356 
   357     SDL_assert(srcfmt->BytesPerPixel == 2);
   358     SDL_assert(dstfmt->BytesPerPixel == 4);
   359 
   360     vf800 = (vector unsigned short) vec_splat_u8(-7);
   361     vf800 = vec_sl(vf800, vec_splat_u16(8));
   362 
   363     if (dstfmt->Amask && info->a) {
   364         ((unsigned char *) &valpha)[0] = alpha = info->a;
   365         valpha = vec_splat(valpha, 0);
   366     } else {
   367         alpha = 0;
   368         valpha = vec_splat_u8(0);
   369     }
   370 
   371     vpermute = calc_swizzle32(NULL, dstfmt);
   372     while (height--) {
   373         vector unsigned char valigner;
   374         vector unsigned char voverflow;
   375         vector unsigned char vsrc;
   376 
   377         int width = info->dst_w;
   378         int extrawidth;
   379 
   380         /* do scalar until we can align... */
   381 #define ONE_PIXEL_BLEND(condition, widthvar) \
   382         while (condition) { \
   383             unsigned sR, sG, sB; \
   384             unsigned short Pixel = *((unsigned short *)src); \
   385             sR = (Pixel >> 8) & 0xf8; \
   386             sG = (Pixel >> 3) & 0xfc; \
   387             sB = (Pixel << 3) & 0xf8; \
   388             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   389             src += 2; \
   390             dst += 4; \
   391             widthvar--; \
   392         }
   393         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   394 
   395         /* After all that work, here's the vector part! */
   396         extrawidth = (width % 8);       /* trailing unaligned stores */
   397         width -= extrawidth;
   398         vsrc = vec_ld(0, src);
   399         valigner = VEC_ALIGNER(src);
   400 
   401         while (width) {
   402             vector unsigned short vR, vG, vB;
   403             vector unsigned char vdst1, vdst2;
   404 
   405             voverflow = vec_ld(15, src);
   406             vsrc = vec_perm(vsrc, voverflow, valigner);
   407 
   408             vR = vec_and((vector unsigned short) vsrc, vf800);
   409             vB = vec_sl((vector unsigned short) vsrc, v3);
   410             vG = vec_sl(vB, v2);
   411 
   412             vdst1 =
   413                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   414                                                 valpha, vredalpha1);
   415             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   416             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   417             vdst1 = vec_perm(vdst1, valpha, vpermute);
   418             vec_st(vdst1, 0, dst);
   419 
   420             vdst2 =
   421                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   422                                                 valpha, vredalpha2);
   423             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   424             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   425             vdst2 = vec_perm(vdst2, valpha, vpermute);
   426             vec_st(vdst2, 16, dst);
   427 
   428             width -= 8;
   429             dst += 32;
   430             src += 16;
   431             vsrc = voverflow;
   432         }
   433 
   434         SDL_assert(width == 0);
   435 
   436 
   437         /* do scalar until we can align... */
   438         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   439 #undef ONE_PIXEL_BLEND
   440 
   441         src += srcskip;         /* move to next row, accounting for pitch. */
   442         dst += dstskip;
   443     }
   444 
   445 }
   446 
   447 
   448 static void
   449 Blit_RGB555_32Altivec(SDL_BlitInfo * info)
   450 {
   451     int height = info->dst_h;
   452     Uint8 *src = (Uint8 *) info->src;
   453     int srcskip = info->src_skip;
   454     Uint8 *dst = (Uint8 *) info->dst;
   455     int dstskip = info->dst_skip;
   456     SDL_PixelFormat *srcfmt = info->src_fmt;
   457     SDL_PixelFormat *dstfmt = info->dst_fmt;
   458     unsigned alpha;
   459     vector unsigned char valpha;
   460     vector unsigned char vpermute;
   461     vector unsigned short vf800;
   462     vector unsigned int v8 = vec_splat_u32(8);
   463     vector unsigned int v16 = vec_add(v8, v8);
   464     vector unsigned short v1 = vec_splat_u16(1);
   465     vector unsigned short v3 = vec_splat_u16(3);
   466     /*
   467        0x10 - 0x1f is the alpha
   468        0x00 - 0x0e evens are the red
   469        0x01 - 0x0f odds are zero
   470      */
   471     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   472                                                        0x10, 0x02, 0x01, 0x01,
   473                                                        0x10, 0x04, 0x01, 0x01,
   474                                                        0x10, 0x06, 0x01,
   475                                                        0x01);
   476     vector unsigned char vredalpha2 =
   477         (vector unsigned
   478          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   479         );
   480     /*
   481        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   482        0x11 - 0x0f odds are blue
   483      */
   484     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   485                                                    0x04, 0x05, 0x06, 0x13,
   486                                                    0x08, 0x09, 0x0a, 0x15,
   487                                                    0x0c, 0x0d, 0x0e, 0x17);
   488     vector unsigned char vblue2 =
   489         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   490         );
   491     /*
   492        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   493        0x10 - 0x0e evens are green
   494      */
   495     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   496                                                     0x04, 0x05, 0x12, 0x07,
   497                                                     0x08, 0x09, 0x14, 0x0b,
   498                                                     0x0c, 0x0d, 0x16, 0x0f);
   499     vector unsigned char vgreen2 =
   500         (vector unsigned
   501          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   502         );
   503 
   504     SDL_assert(srcfmt->BytesPerPixel == 2);
   505     SDL_assert(dstfmt->BytesPerPixel == 4);
   506 
   507     vf800 = (vector unsigned short) vec_splat_u8(-7);
   508     vf800 = vec_sl(vf800, vec_splat_u16(8));
   509 
   510     if (dstfmt->Amask && info->a) {
   511         ((unsigned char *) &valpha)[0] = alpha = info->a;
   512         valpha = vec_splat(valpha, 0);
   513     } else {
   514         alpha = 0;
   515         valpha = vec_splat_u8(0);
   516     }
   517 
   518     vpermute = calc_swizzle32(NULL, dstfmt);
   519     while (height--) {
   520         vector unsigned char valigner;
   521         vector unsigned char voverflow;
   522         vector unsigned char vsrc;
   523 
   524         int width = info->dst_w;
   525         int extrawidth;
   526 
   527         /* do scalar until we can align... */
   528 #define ONE_PIXEL_BLEND(condition, widthvar) \
   529         while (condition) { \
   530             unsigned sR, sG, sB; \
   531             unsigned short Pixel = *((unsigned short *)src); \
   532             sR = (Pixel >> 7) & 0xf8; \
   533             sG = (Pixel >> 2) & 0xf8; \
   534             sB = (Pixel << 3) & 0xf8; \
   535             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   536             src += 2; \
   537             dst += 4; \
   538             widthvar--; \
   539         }
   540         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   541 
   542         /* After all that work, here's the vector part! */
   543         extrawidth = (width % 8);       /* trailing unaligned stores */
   544         width -= extrawidth;
   545         vsrc = vec_ld(0, src);
   546         valigner = VEC_ALIGNER(src);
   547 
   548         while (width) {
   549             vector unsigned short vR, vG, vB;
   550             vector unsigned char vdst1, vdst2;
   551 
   552             voverflow = vec_ld(15, src);
   553             vsrc = vec_perm(vsrc, voverflow, valigner);
   554 
   555             vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
   556             vB = vec_sl((vector unsigned short) vsrc, v3);
   557             vG = vec_sl(vB, v3);
   558 
   559             vdst1 =
   560                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   561                                                 valpha, vredalpha1);
   562             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   563             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   564             vdst1 = vec_perm(vdst1, valpha, vpermute);
   565             vec_st(vdst1, 0, dst);
   566 
   567             vdst2 =
   568                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   569                                                 valpha, vredalpha2);
   570             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   571             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   572             vdst2 = vec_perm(vdst2, valpha, vpermute);
   573             vec_st(vdst2, 16, dst);
   574 
   575             width -= 8;
   576             dst += 32;
   577             src += 16;
   578             vsrc = voverflow;
   579         }
   580 
   581         SDL_assert(width == 0);
   582 
   583 
   584         /* do scalar until we can align... */
   585         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   586 #undef ONE_PIXEL_BLEND
   587 
   588         src += srcskip;         /* move to next row, accounting for pitch. */
   589         dst += dstskip;
   590     }
   591 
   592 }
   593 
   594 static void BlitNtoNKey(SDL_BlitInfo * info);
   595 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
   596 static void
   597 Blit32to32KeyAltivec(SDL_BlitInfo * info)
   598 {
   599     int height = info->dst_h;
   600     Uint32 *srcp = (Uint32 *) info->src;
   601     int srcskip = info->src_skip / 4;
   602     Uint32 *dstp = (Uint32 *) info->dst;
   603     int dstskip = info->dst_skip / 4;
   604     SDL_PixelFormat *srcfmt = info->src_fmt;
   605     int srcbpp = srcfmt->BytesPerPixel;
   606     SDL_PixelFormat *dstfmt = info->dst_fmt;
   607     int dstbpp = dstfmt->BytesPerPixel;
   608     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   609     unsigned alpha = dstfmt->Amask ? info->a : 0;
   610     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   611     Uint32 ckey = info->colorkey;
   612     vector unsigned int valpha;
   613     vector unsigned char vpermute;
   614     vector unsigned char vzero;
   615     vector unsigned int vckey;
   616     vector unsigned int vrgbmask;
   617     vpermute = calc_swizzle32(srcfmt, dstfmt);
   618     if (info->dst_w < 16) {
   619         if (copy_alpha) {
   620             BlitNtoNKeyCopyAlpha(info);
   621         } else {
   622             BlitNtoNKey(info);
   623         }
   624         return;
   625     }
   626     vzero = vec_splat_u8(0);
   627     if (alpha) {
   628         ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
   629         valpha =
   630             (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
   631     } else {
   632         valpha = (vector unsigned int) vzero;
   633     }
   634     ckey &= rgbmask;
   635     ((unsigned int *) (char *) &vckey)[0] = ckey;
   636     vckey = vec_splat(vckey, 0);
   637     ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
   638     vrgbmask = vec_splat(vrgbmask, 0);
   639 
   640     while (height--) {
   641 #define ONE_PIXEL_BLEND(condition, widthvar) \
   642         if (copy_alpha) { \
   643             while (condition) { \
   644                 Uint32 Pixel; \
   645                 unsigned sR, sG, sB, sA; \
   646                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   647                           sR, sG, sB, sA); \
   648                 if ( (Pixel & rgbmask) != ckey ) { \
   649                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   650                             sR, sG, sB, sA); \
   651                 } \
   652                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   653                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   654                 widthvar--; \
   655             } \
   656         } else { \
   657             while (condition) { \
   658                 Uint32 Pixel; \
   659                 unsigned sR, sG, sB; \
   660                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   661                 if ( Pixel != ckey ) { \
   662                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   663                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   664                               sR, sG, sB, alpha); \
   665                 } \
   666                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   667                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   668                 widthvar--; \
   669             } \
   670         }
   671         int width = info->dst_w;
   672         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   673         SDL_assert(width > 0);
   674         if (width > 0) {
   675             int extrawidth = (width % 4);
   676             vector unsigned char valigner = VEC_ALIGNER(srcp);
   677             vector unsigned int vs = vec_ld(0, srcp);
   678             width -= extrawidth;
   679             SDL_assert(width >= 4);
   680             while (width) {
   681                 vector unsigned char vsel;
   682                 vector unsigned int vd;
   683                 vector unsigned int voverflow = vec_ld(15, srcp);
   684                 /* load the source vec */
   685                 vs = vec_perm(vs, voverflow, valigner);
   686                 /* vsel is set for items that match the key */
   687                 vsel = (vector unsigned char) vec_and(vs, vrgbmask);
   688                 vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
   689 #if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
   690                 /* reorder bytes for PowerPC little endian */
   691                 vpermute = reorder_ppc64le_vec(vpermute);
   692 #endif
   693                 /* permute the src vec to the dest format */
   694                 vs = vec_perm(vs, valpha, vpermute);
   695                 /* load the destination vec */
   696                 vd = vec_ld(0, dstp);
   697                 /* select the source and dest into vs */
   698                 vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
   699                                                    (vector unsigned char) vd,
   700                                                    vsel);
   701 
   702                 vec_st(vd, 0, dstp);
   703                 srcp += 4;
   704                 width -= 4;
   705                 dstp += 4;
   706                 vs = voverflow;
   707             }
   708             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   709 #undef ONE_PIXEL_BLEND
   710             srcp += srcskip;
   711             dstp += dstskip;
   712         }
   713     }
   714 }
   715 
   716 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   717 /* Use this on a G5 */
   718 static void
   719 ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
   720 {
   721     int height = info->dst_h;
   722     Uint32 *src = (Uint32 *) info->src;
   723     int srcskip = info->src_skip / 4;
   724     Uint32 *dst = (Uint32 *) info->dst;
   725     int dstskip = info->dst_skip / 4;
   726     SDL_PixelFormat *srcfmt = info->src_fmt;
   727     SDL_PixelFormat *dstfmt = info->dst_fmt;
   728     vector unsigned int vzero = vec_splat_u32(0);
   729     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   730     if (dstfmt->Amask && !srcfmt->Amask) {
   731         if (info->a) {
   732             vector unsigned char valpha;
   733             ((unsigned char *) &valpha)[0] = info->a;
   734             vzero = (vector unsigned int) vec_splat(valpha, 0);
   735         }
   736     }
   737 
   738     SDL_assert(srcfmt->BytesPerPixel == 4);
   739     SDL_assert(dstfmt->BytesPerPixel == 4);
   740 
   741     while (height--) {
   742         vector unsigned char valigner;
   743         vector unsigned int vbits;
   744         vector unsigned int voverflow;
   745         Uint32 bits;
   746         Uint8 r, g, b, a;
   747 
   748         int width = info->dst_w;
   749         int extrawidth;
   750 
   751         /* do scalar until we can align... */
   752         while ((UNALIGNED_PTR(dst)) && (width)) {
   753             bits = *(src++);
   754             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   755             if(!srcfmt->Amask)
   756               a = info->a;
   757             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   758             width--;
   759         }
   760 
   761         /* After all that work, here's the vector part! */
   762         extrawidth = (width % 4);
   763         width -= extrawidth;
   764         valigner = VEC_ALIGNER(src);
   765         vbits = vec_ld(0, src);
   766 
   767         while (width) {
   768             voverflow = vec_ld(15, src);
   769             src += 4;
   770             width -= 4;
   771             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   772 #if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
   773             /* reorder bytes for PowerPC little endian */
   774             vpermute = reorder_ppc64le_vec(vpermute);
   775 #endif
   776             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   777             vec_st(vbits, 0, dst);      /* store it back out. */
   778             dst += 4;
   779             vbits = voverflow;
   780         }
   781 
   782         SDL_assert(width == 0);
   783 
   784         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   785         while (extrawidth) {
   786             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   787             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   788             if(!srcfmt->Amask)
   789               a = info->a;
   790             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   791             extrawidth--;
   792         }
   793 
   794         src += srcskip;
   795         dst += dstskip;
   796     }
   797 
   798 }
   799 
   800 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   801 /* Use this on a G4 */
   802 static void
   803 ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
   804 {
   805     const int scalar_dst_lead = sizeof(Uint32) * 4;
   806     const int vector_dst_lead = sizeof(Uint32) * 16;
   807 
   808     int height = info->dst_h;
   809     Uint32 *src = (Uint32 *) info->src;
   810     int srcskip = info->src_skip / 4;
   811     Uint32 *dst = (Uint32 *) info->dst;
   812     int dstskip = info->dst_skip / 4;
   813     SDL_PixelFormat *srcfmt = info->src_fmt;
   814     SDL_PixelFormat *dstfmt = info->dst_fmt;
   815     vector unsigned int vzero = vec_splat_u32(0);
   816     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   817     if (dstfmt->Amask && !srcfmt->Amask) {
   818         if (info->a) {
   819             vector unsigned char valpha;
   820             ((unsigned char *) &valpha)[0] = info->a;
   821             vzero = (vector unsigned int) vec_splat(valpha, 0);
   822         }
   823     }
   824 
   825     SDL_assert(srcfmt->BytesPerPixel == 4);
   826     SDL_assert(dstfmt->BytesPerPixel == 4);
   827 
   828     while (height--) {
   829         vector unsigned char valigner;
   830         vector unsigned int vbits;
   831         vector unsigned int voverflow;
   832         Uint32 bits;
   833         Uint8 r, g, b, a;
   834 
   835         int width = info->dst_w;
   836         int extrawidth;
   837 
   838         /* do scalar until we can align... */
   839         while ((UNALIGNED_PTR(dst)) && (width)) {
   840             vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   841                      DST_CHAN_SRC);
   842             vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   843                       DST_CHAN_DEST);
   844             bits = *(src++);
   845             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   846             if(!srcfmt->Amask)
   847               a = info->a;
   848             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   849             width--;
   850         }
   851 
   852         /* After all that work, here's the vector part! */
   853         extrawidth = (width % 4);
   854         width -= extrawidth;
   855         valigner = VEC_ALIGNER(src);
   856         vbits = vec_ld(0, src);
   857 
   858         while (width) {
   859             vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
   860                      DST_CHAN_SRC);
   861             vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
   862                       DST_CHAN_DEST);
   863             voverflow = vec_ld(15, src);
   864             src += 4;
   865             width -= 4;
   866             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   867 #if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
   868             /* reorder bytes for PowerPC little endian */
   869             vpermute = reorder_ppc64le_vec(vpermute);
   870 #endif
   871             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   872             vec_st(vbits, 0, dst);      /* store it back out. */
   873             dst += 4;
   874             vbits = voverflow;
   875         }
   876 
   877         SDL_assert(width == 0);
   878 
   879         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   880         while (extrawidth) {
   881             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   882             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   883             if(!srcfmt->Amask)
   884               a = info->a;
   885             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   886             extrawidth--;
   887         }
   888 
   889         src += srcskip;
   890         dst += dstskip;
   891     }
   892 
   893     vec_dss(DST_CHAN_SRC);
   894     vec_dss(DST_CHAN_DEST);
   895 }
   896 
   897 static Uint32
   898 GetBlitFeatures(void)
   899 {
   900     static Uint32 features = 0xffffffff;
   901     if (features == 0xffffffff) {
   902         /* Provide an override for testing .. */
   903         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   904         if (override) {
   905             features = 0;
   906             SDL_sscanf(override, "%u", &features);
   907         } else {
   908             features = (0
   909                         /* Feature 1 is has-MMX */
   910                         | ((SDL_HasMMX())? 1 : 0)
   911                         /* Feature 2 is has-AltiVec */
   912                         | ((SDL_HasAltiVec())? 2 : 0)
   913                         /* Feature 4 is dont-use-prefetch */
   914                         /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   915                         | ((GetL3CacheSize() == 0) ? 4 : 0)
   916                 );
   917         }
   918     }
   919     return features;
   920 }
   921 
   922 #if __MWERKS__
   923 #pragma altivec_model off
   924 #endif
   925 #else
   926 /* Feature 1 is has-MMX */
   927 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   928 #endif
   929 
   930 /* This is now endian dependent */
   931 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   932 #define HI  1
   933 #define LO  0
   934 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
   935 #define HI  0
   936 #define LO  1
   937 #endif
   938 
   939 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   940 #define RGB888_RGB332(dst, src) { \
   941     dst = (Uint8)((((src)&0x00E00000)>>16)| \
   942                   (((src)&0x0000E000)>>11)| \
   943                   (((src)&0x000000C0)>>6)); \
   944 }
   945 static void
   946 Blit_RGB888_index8(SDL_BlitInfo * info)
   947 {
   948 #ifndef USE_DUFFS_LOOP
   949     int c;
   950 #endif
   951     int width, height;
   952     Uint32 *src;
   953     const Uint8 *map;
   954     Uint8 *dst;
   955     int srcskip, dstskip;
   956 
   957     /* Set up some basic variables */
   958     width = info->dst_w;
   959     height = info->dst_h;
   960     src = (Uint32 *) info->src;
   961     srcskip = info->src_skip / 4;
   962     dst = info->dst;
   963     dstskip = info->dst_skip;
   964     map = info->table;
   965 
   966     if (map == NULL) {
   967         while (height--) {
   968 #ifdef USE_DUFFS_LOOP
   969             /* *INDENT-OFF* */
   970             DUFFS_LOOP(
   971                 RGB888_RGB332(*dst++, *src);
   972             , width);
   973             /* *INDENT-ON* */
   974 #else
   975             for (c = width / 4; c; --c) {
   976                 /* Pack RGB into 8bit pixel */
   977                 ++src;
   978                 RGB888_RGB332(*dst++, *src);
   979                 ++src;
   980                 RGB888_RGB332(*dst++, *src);
   981                 ++src;
   982                 RGB888_RGB332(*dst++, *src);
   983                 ++src;
   984             }
   985             switch (width & 3) {
   986             case 3:
   987                 RGB888_RGB332(*dst++, *src);
   988                 ++src;
   989             case 2:
   990                 RGB888_RGB332(*dst++, *src);
   991                 ++src;
   992             case 1:
   993                 RGB888_RGB332(*dst++, *src);
   994                 ++src;
   995             }
   996 #endif /* USE_DUFFS_LOOP */
   997             src += srcskip;
   998             dst += dstskip;
   999         }
  1000     } else {
  1001         int Pixel;
  1002 
  1003         while (height--) {
  1004 #ifdef USE_DUFFS_LOOP
  1005             /* *INDENT-OFF* */
  1006             DUFFS_LOOP(
  1007                 RGB888_RGB332(Pixel, *src);
  1008                 *dst++ = map[Pixel];
  1009                 ++src;
  1010             , width);
  1011             /* *INDENT-ON* */
  1012 #else
  1013             for (c = width / 4; c; --c) {
  1014                 /* Pack RGB into 8bit pixel */
  1015                 RGB888_RGB332(Pixel, *src);
  1016                 *dst++ = map[Pixel];
  1017                 ++src;
  1018                 RGB888_RGB332(Pixel, *src);
  1019                 *dst++ = map[Pixel];
  1020                 ++src;
  1021                 RGB888_RGB332(Pixel, *src);
  1022                 *dst++ = map[Pixel];
  1023                 ++src;
  1024                 RGB888_RGB332(Pixel, *src);
  1025                 *dst++ = map[Pixel];
  1026                 ++src;
  1027             }
  1028             switch (width & 3) {
  1029             case 3:
  1030                 RGB888_RGB332(Pixel, *src);
  1031                 *dst++ = map[Pixel];
  1032                 ++src;
  1033             case 2:
  1034                 RGB888_RGB332(Pixel, *src);
  1035                 *dst++ = map[Pixel];
  1036                 ++src;
  1037             case 1:
  1038                 RGB888_RGB332(Pixel, *src);
  1039                 *dst++ = map[Pixel];
  1040                 ++src;
  1041             }
  1042 #endif /* USE_DUFFS_LOOP */
  1043             src += srcskip;
  1044             dst += dstskip;
  1045         }
  1046     }
  1047 }
  1048 
  1049 /* Special optimized blit for RGB 10-10-10 --> RGB 3-3-2 */
  1050 #define RGB101010_RGB332(dst, src) { \
  1051     dst = (Uint8)((((src)&0x38000000)>>22)| \
  1052                   (((src)&0x000E0000)>>15)| \
  1053                   (((src)&0x00000300)>>8)); \
  1054 }
  1055 static void
  1056 Blit_RGB101010_index8(SDL_BlitInfo * info)
  1057 {
  1058 #ifndef USE_DUFFS_LOOP
  1059     int c;
  1060 #endif
  1061     int width, height;
  1062     Uint32 *src;
  1063     const Uint8 *map;
  1064     Uint8 *dst;
  1065     int srcskip, dstskip;
  1066 
  1067     /* Set up some basic variables */
  1068     width = info->dst_w;
  1069     height = info->dst_h;
  1070     src = (Uint32 *) info->src;
  1071     srcskip = info->src_skip / 4;
  1072     dst = info->dst;
  1073     dstskip = info->dst_skip;
  1074     map = info->table;
  1075 
  1076     if (map == NULL) {
  1077         while (height--) {
  1078 #ifdef USE_DUFFS_LOOP
  1079             /* *INDENT-OFF* */
  1080             DUFFS_LOOP(
  1081                 RGB101010_RGB332(*dst++, *src);
  1082             , width);
  1083             /* *INDENT-ON* */
  1084 #else
  1085             for (c = width / 4; c; --c) {
  1086                 /* Pack RGB into 8bit pixel */
  1087                 ++src;
  1088                 RGB101010_RGB332(*dst++, *src);
  1089                 ++src;
  1090                 RGB101010_RGB332(*dst++, *src);
  1091                 ++src;
  1092                 RGB101010_RGB332(*dst++, *src);
  1093                 ++src;
  1094             }
  1095             switch (width & 3) {
  1096             case 3:
  1097                 RGB101010_RGB332(*dst++, *src);
  1098                 ++src;
  1099             case 2:
  1100                 RGB101010_RGB332(*dst++, *src);
  1101                 ++src;
  1102             case 1:
  1103                 RGB101010_RGB332(*dst++, *src);
  1104                 ++src;
  1105             }
  1106 #endif /* USE_DUFFS_LOOP */
  1107             src += srcskip;
  1108             dst += dstskip;
  1109         }
  1110     } else {
  1111         int Pixel;
  1112 
  1113         while (height--) {
  1114 #ifdef USE_DUFFS_LOOP
  1115             /* *INDENT-OFF* */
  1116             DUFFS_LOOP(
  1117                 RGB101010_RGB332(Pixel, *src);
  1118                 *dst++ = map[Pixel];
  1119                 ++src;
  1120             , width);
  1121             /* *INDENT-ON* */
  1122 #else
  1123             for (c = width / 4; c; --c) {
  1124                 /* Pack RGB into 8bit pixel */
  1125                 RGB101010_RGB332(Pixel, *src);
  1126                 *dst++ = map[Pixel];
  1127                 ++src;
  1128                 RGB101010_RGB332(Pixel, *src);
  1129                 *dst++ = map[Pixel];
  1130                 ++src;
  1131                 RGB101010_RGB332(Pixel, *src);
  1132                 *dst++ = map[Pixel];
  1133                 ++src;
  1134                 RGB101010_RGB332(Pixel, *src);
  1135                 *dst++ = map[Pixel];
  1136                 ++src;
  1137             }
  1138             switch (width & 3) {
  1139             case 3:
  1140                 RGB101010_RGB332(Pixel, *src);
  1141                 *dst++ = map[Pixel];
  1142                 ++src;
  1143             case 2:
  1144                 RGB101010_RGB332(Pixel, *src);
  1145                 *dst++ = map[Pixel];
  1146                 ++src;
  1147             case 1:
  1148                 RGB101010_RGB332(Pixel, *src);
  1149                 *dst++ = map[Pixel];
  1150                 ++src;
  1151             }
  1152 #endif /* USE_DUFFS_LOOP */
  1153             src += srcskip;
  1154             dst += dstskip;
  1155         }
  1156     }
  1157 }
  1158 
  1159 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
  1160 #define RGB888_RGB555(dst, src) { \
  1161     *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
  1162                                 (((*src)&0x0000F800)>>6)| \
  1163                                 (((*src)&0x000000F8)>>3)); \
  1164 }
  1165 #ifndef USE_DUFFS_LOOP
  1166 #define RGB888_RGB555_TWO(dst, src) { \
  1167     *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
  1168                          (((src[HI])&0x0000F800)>>6)| \
  1169                          (((src[HI])&0x000000F8)>>3))<<16)| \
  1170                          (((src[LO])&0x00F80000)>>9)| \
  1171                          (((src[LO])&0x0000F800)>>6)| \
  1172                          (((src[LO])&0x000000F8)>>3); \
  1173 }
  1174 #endif
  1175 static void
  1176 Blit_RGB888_RGB555(SDL_BlitInfo * info)
  1177 {
  1178 #ifndef USE_DUFFS_LOOP
  1179     int c;
  1180 #endif
  1181     int width, height;
  1182     Uint32 *src;
  1183     Uint16 *dst;
  1184     int srcskip, dstskip;
  1185 
  1186     /* Set up some basic variables */
  1187     width = info->dst_w;
  1188     height = info->dst_h;
  1189     src = (Uint32 *) info->src;
  1190     srcskip = info->src_skip / 4;
  1191     dst = (Uint16 *) info->dst;
  1192     dstskip = info->dst_skip / 2;
  1193 
  1194 #ifdef USE_DUFFS_LOOP
  1195     while (height--) {
  1196         /* *INDENT-OFF* */
  1197         DUFFS_LOOP(
  1198             RGB888_RGB555(dst, src);
  1199             ++src;
  1200             ++dst;
  1201         , width);
  1202         /* *INDENT-ON* */
  1203         src += srcskip;
  1204         dst += dstskip;
  1205     }
  1206 #else
  1207     /* Memory align at 4-byte boundary, if necessary */
  1208     if ((long) dst & 0x03) {
  1209         /* Don't do anything if width is 0 */
  1210         if (width == 0) {
  1211             return;
  1212         }
  1213         --width;
  1214 
  1215         while (height--) {
  1216             /* Perform copy alignment */
  1217             RGB888_RGB555(dst, src);
  1218             ++src;
  1219             ++dst;
  1220 
  1221             /* Copy in 4 pixel chunks */
  1222             for (c = width / 4; c; --c) {
  1223                 RGB888_RGB555_TWO(dst, src);
  1224                 src += 2;
  1225                 dst += 2;
  1226                 RGB888_RGB555_TWO(dst, src);
  1227                 src += 2;
  1228                 dst += 2;
  1229             }
  1230             /* Get any leftovers */
  1231             switch (width & 3) {
  1232             case 3:
  1233                 RGB888_RGB555(dst, src);
  1234                 ++src;
  1235                 ++dst;
  1236             case 2:
  1237                 RGB888_RGB555_TWO(dst, src);
  1238                 src += 2;
  1239                 dst += 2;
  1240                 break;
  1241             case 1:
  1242                 RGB888_RGB555(dst, src);
  1243                 ++src;
  1244                 ++dst;
  1245                 break;
  1246             }
  1247             src += srcskip;
  1248             dst += dstskip;
  1249         }
  1250     } else {
  1251         while (height--) {
  1252             /* Copy in 4 pixel chunks */
  1253             for (c = width / 4; c; --c) {
  1254                 RGB888_RGB555_TWO(dst, src);
  1255                 src += 2;
  1256                 dst += 2;
  1257                 RGB888_RGB555_TWO(dst, src);
  1258                 src += 2;
  1259                 dst += 2;
  1260             }
  1261             /* Get any leftovers */
  1262             switch (width & 3) {
  1263             case 3:
  1264                 RGB888_RGB555(dst, src);
  1265                 ++src;
  1266                 ++dst;
  1267             case 2:
  1268                 RGB888_RGB555_TWO(dst, src);
  1269                 src += 2;
  1270                 dst += 2;
  1271                 break;
  1272             case 1:
  1273                 RGB888_RGB555(dst, src);
  1274                 ++src;
  1275                 ++dst;
  1276                 break;
  1277             }
  1278             src += srcskip;
  1279             dst += dstskip;
  1280         }
  1281     }
  1282 #endif /* USE_DUFFS_LOOP */
  1283 }
  1284 
  1285 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1286 #define RGB888_RGB565(dst, src) { \
  1287     *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
  1288                                 (((*src)&0x0000FC00)>>5)| \
  1289                                 (((*src)&0x000000F8)>>3)); \
  1290 }
  1291 #ifndef USE_DUFFS_LOOP
  1292 #define RGB888_RGB565_TWO(dst, src) { \
  1293     *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1294                          (((src[HI])&0x0000FC00)>>5)| \
  1295                          (((src[HI])&0x000000F8)>>3))<<16)| \
  1296                          (((src[LO])&0x00F80000)>>8)| \
  1297                          (((src[LO])&0x0000FC00)>>5)| \
  1298                          (((src[LO])&0x000000F8)>>3); \
  1299 }
  1300 #endif
  1301 static void
  1302 Blit_RGB888_RGB565(SDL_BlitInfo * info)
  1303 {
  1304 #ifndef USE_DUFFS_LOOP
  1305     int c;
  1306 #endif
  1307     int width, height;
  1308     Uint32 *src;
  1309     Uint16 *dst;
  1310     int srcskip, dstskip;
  1311 
  1312     /* Set up some basic variables */
  1313     width = info->dst_w;
  1314     height = info->dst_h;
  1315     src = (Uint32 *) info->src;
  1316     srcskip = info->src_skip / 4;
  1317     dst = (Uint16 *) info->dst;
  1318     dstskip = info->dst_skip / 2;
  1319 
  1320 #ifdef USE_DUFFS_LOOP
  1321     while (height--) {
  1322         /* *INDENT-OFF* */
  1323         DUFFS_LOOP(
  1324             RGB888_RGB565(dst, src);
  1325             ++src;
  1326             ++dst;
  1327         , width);
  1328         /* *INDENT-ON* */
  1329         src += srcskip;
  1330         dst += dstskip;
  1331     }
  1332 #else
  1333     /* Memory align at 4-byte boundary, if necessary */
  1334     if ((long) dst & 0x03) {
  1335         /* Don't do anything if width is 0 */
  1336         if (width == 0) {
  1337             return;
  1338         }
  1339         --width;
  1340 
  1341         while (height--) {
  1342             /* Perform copy alignment */
  1343             RGB888_RGB565(dst, src);
  1344             ++src;
  1345             ++dst;
  1346 
  1347             /* Copy in 4 pixel chunks */
  1348             for (c = width / 4; c; --c) {
  1349                 RGB888_RGB565_TWO(dst, src);
  1350                 src += 2;
  1351                 dst += 2;
  1352                 RGB888_RGB565_TWO(dst, src);
  1353                 src += 2;
  1354                 dst += 2;
  1355             }
  1356             /* Get any leftovers */
  1357             switch (width & 3) {
  1358             case 3:
  1359                 RGB888_RGB565(dst, src);
  1360                 ++src;
  1361                 ++dst;
  1362             case 2:
  1363                 RGB888_RGB565_TWO(dst, src);
  1364                 src += 2;
  1365                 dst += 2;
  1366                 break;
  1367             case 1:
  1368                 RGB888_RGB565(dst, src);
  1369                 ++src;
  1370                 ++dst;
  1371                 break;
  1372             }
  1373             src += srcskip;
  1374             dst += dstskip;
  1375         }
  1376     } else {
  1377         while (height--) {
  1378             /* Copy in 4 pixel chunks */
  1379             for (c = width / 4; c; --c) {
  1380                 RGB888_RGB565_TWO(dst, src);
  1381                 src += 2;
  1382                 dst += 2;
  1383                 RGB888_RGB565_TWO(dst, src);
  1384                 src += 2;
  1385                 dst += 2;
  1386             }
  1387             /* Get any leftovers */
  1388             switch (width & 3) {
  1389             case 3:
  1390                 RGB888_RGB565(dst, src);
  1391                 ++src;
  1392                 ++dst;
  1393             case 2:
  1394                 RGB888_RGB565_TWO(dst, src);
  1395                 src += 2;
  1396                 dst += 2;
  1397                 break;
  1398             case 1:
  1399                 RGB888_RGB565(dst, src);
  1400                 ++src;
  1401                 ++dst;
  1402                 break;
  1403             }
  1404             src += srcskip;
  1405             dst += dstskip;
  1406         }
  1407     }
  1408 #endif /* USE_DUFFS_LOOP */
  1409 }
  1410 
  1411 
  1412 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1413 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1414 static void
  1415 Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
  1416 {
  1417 #ifndef USE_DUFFS_LOOP
  1418     int c;
  1419 #endif
  1420     int width, height;
  1421     Uint8 *src;
  1422     Uint32 *dst;
  1423     int srcskip, dstskip;
  1424 
  1425     /* Set up some basic variables */
  1426     width = info->dst_w;
  1427     height = info->dst_h;
  1428     src = (Uint8 *) info->src;
  1429     srcskip = info->src_skip;
  1430     dst = (Uint32 *) info->dst;
  1431     dstskip = info->dst_skip / 4;
  1432 
  1433 #ifdef USE_DUFFS_LOOP
  1434     while (height--) {
  1435         /* *INDENT-OFF* */
  1436         DUFFS_LOOP(
  1437         {
  1438             *dst++ = RGB565_32(dst, src, map);
  1439             src += 2;
  1440         },
  1441         width);
  1442         /* *INDENT-ON* */
  1443         src += srcskip;
  1444         dst += dstskip;
  1445     }
  1446 #else
  1447     while (height--) {
  1448         /* Copy in 4 pixel chunks */
  1449         for (c = width / 4; c; --c) {
  1450             *dst++ = RGB565_32(dst, src, map);
  1451             src += 2;
  1452             *dst++ = RGB565_32(dst, src, map);
  1453             src += 2;
  1454             *dst++ = RGB565_32(dst, src, map);
  1455             src += 2;
  1456             *dst++ = RGB565_32(dst, src, map);
  1457             src += 2;
  1458         }
  1459         /* Get any leftovers */
  1460         switch (width & 3) {
  1461         case 3:
  1462             *dst++ = RGB565_32(dst, src, map);
  1463             src += 2;
  1464         case 2:
  1465             *dst++ = RGB565_32(dst, src, map);
  1466             src += 2;
  1467         case 1:
  1468             *dst++ = RGB565_32(dst, src, map);
  1469             src += 2;
  1470             break;
  1471         }
  1472         src += srcskip;
  1473         dst += dstskip;
  1474     }
  1475 #endif /* USE_DUFFS_LOOP */
  1476 }
  1477 
  1478 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1479 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1480     0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1481     0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1482     0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1483     0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1484     0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1485     0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1486     0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1487     0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1488     0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1489     0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1490     0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1491     0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1492     0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1493     0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1494     0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1495     0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1496     0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1497     0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1498     0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1499     0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1500     0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1501     0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1502     0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1503     0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1504     0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1505     0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1506     0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1507     0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1508     0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1509     0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1510     0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1511     0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1512     0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1513     0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1514     0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1515     0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1516     0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1517     0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1518     0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1519     0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1520     0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1521     0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1522     0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1523     0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1524     0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1525     0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1526     0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1527     0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1528     0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1529     0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1530     0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1531     0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1532     0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1533     0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1534     0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1535     0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1536     0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1537     0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1538     0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1539     0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1540     0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1541     0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1542     0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1543     0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1544     0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1545     0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1546     0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1547     0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1548     0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1549     0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1550     0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1551     0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1552     0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1553     0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1554     0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1555     0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1556     0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1557     0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1558     0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1559     0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1560     0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1561     0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1562     0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1563     0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1564     0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1565     0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1566     0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1567     0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1568     0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1569     0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1570     0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1571     0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1572     0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1573     0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1574     0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1575     0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1576     0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1577     0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1578     0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1579     0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1580     0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1581     0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1582     0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1583     0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1584     0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1585     0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1586     0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1587     0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1588     0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1589     0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1590     0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1591     0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1592     0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1593     0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1594     0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1595     0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1596     0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1597     0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1598     0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1599     0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1600     0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1601     0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1602     0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1603     0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1604     0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1605     0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1606     0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1607     0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1608 };
  1609 
  1610 static void
  1611 Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
  1612 {
  1613     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1614 }
  1615 
  1616 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1617 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1618     0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1619     0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1620     0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1621     0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1622     0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1623     0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1624     0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1625     0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1626     0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1627     0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1628     0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1629     0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1630     0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1631     0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1632     0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1633     0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1634     0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1635     0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1636     0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1637     0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1638     0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1639     0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1640     0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1641     0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1642     0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1643     0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1644     0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1645     0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1646     0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1647     0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1648     0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1649     0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1650     0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1651     0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1652     0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1653     0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1654     0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1655     0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1656     0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1657     0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1658     0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1659     0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1660     0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1661     0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1662     0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1663     0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1664     0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1665     0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1666     0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1667     0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1668     0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1669     0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1670     0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1671     0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1672     0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1673     0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1674     0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1675     0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1676     0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1677     0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1678     0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1679     0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1680     0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1681     0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1682     0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1683     0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1684     0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1685     0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1686     0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1687     0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1688     0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1689     0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1690     0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1691     0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1692     0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1693     0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1694     0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1695     0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1696     0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1697     0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1698     0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1699     0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1700     0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1701     0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1702     0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1703     0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1704     0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1705     0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1706     0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1707     0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1708     0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1709     0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1710     0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1711     0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1712     0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1713     0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1714     0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1715     0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1716     0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1717     0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1718     0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1719     0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1720     0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1721     0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1722     0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1723     0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1724     0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1725     0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1726     0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1727     0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1728     0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1729     0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1730     0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1731     0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1732     0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1733     0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1734     0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1735     0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1736     0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1737     0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1738     0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1739     0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1740     0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1741     0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1742     0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1743     0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1744     0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1745     0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1746 };
  1747 
  1748 static void
  1749 Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
  1750 {
  1751     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1752 }
  1753 
  1754 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1755 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1756     0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1757     0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1758     0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1759     0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1760     0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1761     0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1762     0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1763     0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1764     0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1765     0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1766     0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1767     0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1768     0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1769     0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1770     0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1771     0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1772     0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1773     0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1774     0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1775     0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1776     0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1777     0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1778     0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1779     0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1780     0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1781     0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1782     0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1783     0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1784     0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1785     0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1786     0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1787     0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1788     0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1789     0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1790     0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1791     0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1792     0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1793     0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1794     0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1795     0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1796     0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1797     0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1798     0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1799     0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1800     0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1801     0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1802     0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1803     0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1804     0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1805     0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1806     0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1807     0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1808     0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1809     0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1810     0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1811     0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1812     0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1813     0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1814     0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1815     0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1816     0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1817     0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1818     0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1819     0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1820     0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1821     0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1822     0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1823     0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1824     0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1825     0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1826     0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1827     0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1828     0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1829     0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1830     0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1831     0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1832     0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1833     0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1834     0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1835     0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1836     0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1837     0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1838     0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1839     0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1840     0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1841     0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1842     0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1843     0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1844     0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1845     0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1846     0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1847     0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1848     0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1849     0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1850     0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1851     0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1852     0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1853     0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1854     0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1855     0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1856     0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1857     0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1858     0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1859     0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1860     0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1861     0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1862     0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1863     0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1864     0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1865     0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1866     0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1867     0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1868     0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1869     0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1870     0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1871     0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1872     0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1873     0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1874     0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1875     0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1876     0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1877     0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1878     0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1879     0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1880     0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1881     0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1882     0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1883     0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1884 };
  1885 
  1886 static void
  1887 Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
  1888 {
  1889     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1890 }
  1891 
  1892 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1893 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1894     0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1895     0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1896     0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1897     0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1898     0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1899     0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1900     0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1901     0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1902     0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1903     0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1904     0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1905     0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1906     0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1907     0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1908     0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1909     0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1910     0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1911     0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1912     0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1913     0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1914     0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1915     0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1916     0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1917     0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1918     0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1919     0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1920     0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1921     0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1922     0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1923     0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1924     0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1925     0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1926     0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1927     0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1928     0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1929     0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1930     0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1931     0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1932     0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1933     0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1934     0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1935     0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1936     0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1937     0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1938     0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1939     0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1940     0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1941     0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1942     0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1943     0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1944     0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1945     0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1946     0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1947     0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1948     0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1949     0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1950     0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1951     0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1952     0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1953     0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1954     0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1955     0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1956     0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1957     0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1958     0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1959     0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1960     0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1961     0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1962     0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1963     0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1964     0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1965     0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1966     0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1967     0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1968     0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1969     0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1970     0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1971     0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1972     0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1973     0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1974     0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1975     0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1976     0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1977     0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1978     0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1979     0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1980     0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1981     0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1982     0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1983     0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1984     0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1985     0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1986     0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1987     0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1988     0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1989     0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1990     0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1991     0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1992     0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1993     0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1994     0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1995     0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1996     0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1997     0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1998     0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1999     0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  2000     0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  2001     0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  2002     0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  2003     0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  2004     0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  2005     0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  2006     0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  2007     0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  2008     0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  2009     0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  2010     0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  2011     0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  2012     0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  2013     0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  2014     0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  2015     0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  2016     0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  2017     0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  2018     0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  2019     0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  2020     0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  2021     0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  2022 };
  2023 
  2024 static void
  2025 Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
  2026 {
  2027     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  2028 }
  2029 
  2030 static void
  2031 BlitNto1(SDL_BlitInfo * info)
  2032 {
  2033 #ifndef USE_DUFFS_LOOP
  2034     int c;
  2035 #endif
  2036     int width, height;
  2037     Uint8 *src;
  2038     const Uint8 *map;
  2039     Uint8 *dst;
  2040     int srcskip, dstskip;
  2041     int srcbpp;
  2042     Uint32 Pixel;
  2043     int sR, sG, sB;
  2044     SDL_PixelFormat *srcfmt;
  2045 
  2046     /* Set up some basic variables */
  2047     width = info->dst_w;
  2048     height = info->dst_h;
  2049     src = info->src;
  2050     srcskip = info->src_skip;
  2051     dst = info->dst;
  2052     dstskip = info->dst_skip;
  2053     map = info->table;
  2054     srcfmt = info->src_fmt;
  2055     srcbpp = srcfmt->BytesPerPixel;
  2056 
  2057     if (map == NULL) {
  2058         while (height--) {
  2059 #ifdef USE_DUFFS_LOOP
  2060             /* *INDENT-OFF* */
  2061             DUFFS_LOOP(
  2062                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2063                                 sR, sG, sB);
  2064                 if ( 1 ) {
  2065                     /* Pack RGB into 8bit pixel */
  2066                     *dst = ((sR>>5)<<(3+2))|
  2067                             ((sG>>5)<<(2)) |
  2068                             ((sB>>6)<<(0)) ;
  2069                 }
  2070                 dst++;
  2071                 src += srcbpp;
  2072             , width);
  2073             /* *INDENT-ON* */
  2074 #else
  2075             for (c = width; c; --c) {
  2076                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2077                 if (1) {
  2078                     /* Pack RGB into 8bit pixel */
  2079                     *dst = ((sR >> 5) << (3 + 2)) |
  2080                         ((sG >> 5) << (2)) | ((sB >> 6) << (0));
  2081                 }
  2082                 dst++;
  2083                 src += srcbpp;
  2084             }
  2085 #endif
  2086             src += srcskip;
  2087             dst += dstskip;
  2088         }
  2089     } else {
  2090         while (height--) {
  2091 #ifdef USE_DUFFS_LOOP
  2092             /* *INDENT-OFF* */
  2093             DUFFS_LOOP(
  2094                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2095                                 sR, sG, sB);
  2096                 if ( 1 ) {
  2097                     /* Pack RGB into 8bit pixel */
  2098                     *dst = map[((sR>>5)<<(3+2))|
  2099                            ((sG>>5)<<(2))  |
  2100                            ((sB>>6)<<(0))  ];
  2101                 }
  2102                 dst++;
  2103                 src += srcbpp;
  2104             , width);
  2105             /* *INDENT-ON* */
  2106 #else
  2107             for (c = width; c; --c) {
  2108                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2109                 if (1) {
  2110                     /* Pack RGB into 8bit pixel */
  2111                     *dst = map[((sR >> 5) << (3 + 2)) |
  2112                                ((sG >> 5) << (2)) | ((sB >> 6) << (0))];
  2113                 }
  2114                 dst++;
  2115                 src += srcbpp;
  2116             }
  2117 #endif /* USE_DUFFS_LOOP */
  2118             src += srcskip;
  2119             dst += dstskip;
  2120         }
  2121     }
  2122 }
  2123 
  2124 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  2125 static void
  2126 Blit4to4MaskAlpha(SDL_BlitInfo * info)
  2127 {
  2128     int width = info->dst_w;
  2129     int height = info->dst_h;
  2130     Uint32 *src = (Uint32 *) info->src;
  2131     int srcskip = info->src_skip;
  2132     Uint32 *dst = (Uint32 *) info->dst;
  2133     int dstskip = info->dst_skip;
  2134     SDL_PixelFormat *srcfmt = info->src_fmt;
  2135     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2136 
  2137     if (dstfmt->Amask) {
  2138         /* RGB->RGBA, SET_ALPHA */
  2139         Uint32 mask = (info->a >> dstfmt->Aloss) << dstfmt->Ashift;
  2140 
  2141         while (height--) {
  2142             /* *INDENT-OFF* */
  2143             DUFFS_LOOP(
  2144             {
  2145                 *dst = *src | mask;
  2146                 ++dst;
  2147                 ++src;
  2148             },
  2149             width);
  2150             /* *INDENT-ON* */
  2151             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2152             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2153         }
  2154     } else {
  2155         /* RGBA->RGB, NO_ALPHA */
  2156         Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2157 
  2158         while (height--) {
  2159             /* *INDENT-OFF* */
  2160             DUFFS_LOOP(
  2161             {
  2162                 *dst = *src & mask;
  2163                 ++dst;
  2164                 ++src;
  2165             },
  2166             width);
  2167             /* *INDENT-ON* */
  2168             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2169             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2170         }
  2171     }
  2172 }
  2173 
  2174 /* blits 32 bit RGBA<->RGBA with both surfaces having the same R,G,B,A fields */
  2175 static void
  2176 Blit4to4CopyAlpha(SDL_BlitInfo * info)
  2177 {
  2178     int width = info->dst_w;
  2179     int height = info->dst_h;
  2180     Uint32 *src = (Uint32 *) info->src;
  2181     int srcskip = info->src_skip;
  2182     Uint32 *dst = (Uint32 *) info->dst;
  2183     int dstskip = info->dst_skip;
  2184 
  2185     /* RGBA->RGBA, COPY_ALPHA */
  2186     while (height--) {
  2187         /* *INDENT-OFF* */
  2188         DUFFS_LOOP(
  2189         {
  2190             *dst = *src;
  2191             ++dst;
  2192             ++src;
  2193         },
  2194         width);
  2195         /* *INDENT-ON* */
  2196         src = (Uint32 *) ((Uint8 *) src + srcskip);
  2197         dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2198     }
  2199 }
  2200 
  2201 /* permutation for mapping srcfmt to dstfmt, overloading or not the alpha channel */
  2202 static void
  2203 get_permutation(SDL_PixelFormat *srcfmt, SDL_PixelFormat *dstfmt,
  2204         int *_p0 , int *_p1, int *_p2, int *_p3, int *_alpha_channel)
  2205 {
  2206     int alpha_channel = 0, p0, p1, p2, p3;
  2207 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2208     int Pixel = 0x04030201; /* identity permutation */
  2209 #else
  2210     int Pixel = 0x01020304; /* identity permutation */
  2211     int srcbpp = srcfmt->BytesPerPixel;
  2212     int dstbpp = dstfmt->BytesPerPixel;
  2213 #endif
  2214 
  2215     if (srcfmt->Amask) {
  2216         RGBA_FROM_PIXEL(Pixel, srcfmt, p0, p1, p2, p3);
  2217     } else {
  2218         RGB_FROM_PIXEL(Pixel, srcfmt, p0, p1, p2);
  2219         p3 = 0;
  2220     }
  2221 
  2222     if (dstfmt->Amask) {
  2223         if (srcfmt->Amask) {
  2224             PIXEL_FROM_RGBA(Pixel, dstfmt, p0, p1, p2, p3);
  2225         } else {
  2226             PIXEL_FROM_RGBA(Pixel, dstfmt, p0, p1, p2, 0);
  2227         }
  2228     } else {
  2229         PIXEL_FROM_RGB(Pixel, dstfmt, p0, p1, p2);
  2230     }
  2231 
  2232 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2233     p0 = Pixel & 0xFF;
  2234     p1 = (Pixel >> 8) & 0xFF;
  2235     p2 = (Pixel >> 16) & 0xFF;
  2236     p3 = (Pixel >> 24) & 0xFF;
  2237 #else
  2238     p3 = Pixel & 0xFF;
  2239     p2 = (Pixel >> 8) & 0xFF;
  2240     p1 = (Pixel >> 16) & 0xFF;
  2241     p0 = (Pixel >> 24) & 0xFF;
  2242 #endif
  2243 
  2244     if (p0 == 0) {
  2245         p0 = 1;
  2246         alpha_channel = 0;
  2247     } else if (p1 == 0) {
  2248         p1 = 1;
  2249         alpha_channel = 1;
  2250     } else if (p2 == 0) {
  2251         p2 = 1;
  2252         alpha_channel = 2;
  2253     } else if (p3 == 0) {
  2254         p3 = 1;
  2255         alpha_channel = 3;
  2256     }
  2257 
  2258 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2259 #else
  2260     if (srcbpp == 3 && dstbpp == 4) {
  2261         if (p0 != 1) p0--;
  2262         if (p1 != 1) p1--;
  2263         if (p2 != 1) p2--;
  2264         if (p3 != 1) p3--;
  2265     } else if (srcbpp == 4 && dstbpp == 3) {
  2266         p0 = p1;
  2267         p1 = p2;
  2268         p2 = p3;
  2269     }
  2270 #endif
  2271     *_p0 = p0 - 1;
  2272     *_p1 = p1 - 1;
  2273     *_p2 = p2 - 1;
  2274     *_p3 = p3 - 1;
  2275 
  2276     if (_alpha_channel) {
  2277         *_alpha_channel = alpha_channel;
  2278     }
  2279     return;
  2280 }
  2281 
  2282 
  2283 static void
  2284 BlitNtoN(SDL_BlitInfo * info)
  2285 {
  2286     int width = info->dst_w;
  2287     int height = info->dst_h;
  2288     Uint8 *src = info->src;
  2289     int srcskip = info->src_skip;
  2290     Uint8 *dst = info->dst;
  2291     int dstskip = info->dst_skip;
  2292     SDL_PixelFormat *srcfmt = info->src_fmt;
  2293     int srcbpp = srcfmt->BytesPerPixel;
  2294     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2295     int dstbpp = dstfmt->BytesPerPixel;
  2296     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2297 
  2298 #if HAVE_FAST_WRITE_INT8
  2299     /* Blit with permutation: 4->4 */
  2300     if (srcbpp == 4 && dstbpp == 4 &&
  2301         srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
  2302         dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
  2303 
  2304         /* Find the appropriate permutation */
  2305         int alpha_channel, p0, p1, p2, p3;
  2306         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
  2307 
  2308         while (height--) {
  2309             /* *INDENT-OFF* */
  2310             DUFFS_LOOP(
  2311             {
  2312                 dst[0] = src[p0];
  2313                 dst[1] = src[p1];
  2314                 dst[2] = src[p2];
  2315                 dst[3] = src[p3];
  2316                 dst[alpha_channel] = alpha;
  2317                 src += 4;
  2318                 dst += 4;
  2319             }, width);
  2320             /* *INDENT-ON* */
  2321             src += srcskip;
  2322             dst += dstskip;
  2323         }
  2324         return;
  2325     }
  2326 #endif
  2327 
  2328     /* Blit with permutation: 4->3 */
  2329     if (srcbpp == 4 && dstbpp == 3 &&
  2330         srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
  2331 
  2332         /* Find the appropriate permutation */
  2333         int p0, p1, p2, p3;
  2334         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
  2335 
  2336         while (height--) {
  2337             /* *INDENT-OFF* */
  2338             DUFFS_LOOP(
  2339             {
  2340                 dst[0] = src[p0];
  2341                 dst[1] = src[p1];
  2342                 dst[2] = src[p2];
  2343                 src += 4;
  2344                 dst += 3;
  2345             }, width);
  2346             /* *INDENT-ON* */
  2347             src += srcskip;
  2348             dst += dstskip;
  2349         }
  2350         return;
  2351     }
  2352 
  2353 #if HAVE_FAST_WRITE_INT8
  2354     /* Blit with permutation: 3->4 */
  2355     if (srcbpp == 3 && dstbpp == 4 &&
  2356         dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
  2357 
  2358         /* Find the appropriate permutation */
  2359         int alpha_channel, p0, p1, p2, p3;
  2360         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
  2361 
  2362         while (height--) {
  2363             /* *INDENT-OFF* */
  2364             DUFFS_LOOP(
  2365             {
  2366                 dst[0] = src[p0];
  2367                 dst[1] = src[p1];
  2368                 dst[2] = src[p2];
  2369                 dst[3] = src[p3];
  2370                 dst[alpha_channel] = alpha;
  2371                 src += 3;
  2372                 dst += 4;
  2373             }, width);
  2374             /* *INDENT-ON* */
  2375             src += srcskip;
  2376             dst += dstskip;
  2377         }
  2378         return;
  2379     }
  2380 #endif
  2381 
  2382     while (height--) {
  2383         /* *INDENT-OFF* */
  2384         DUFFS_LOOP(
  2385         {
  2386             Uint32 Pixel;
  2387             unsigned sR;
  2388             unsigned sG;
  2389             unsigned sB;
  2390             DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2391             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2392             dst += dstbpp;
  2393             src += srcbpp;
  2394         },
  2395         width);
  2396         /* *INDENT-ON* */
  2397         src += srcskip;
  2398         dst += dstskip;
  2399     }
  2400 }
  2401 
  2402 static void
  2403 BlitNtoNCopyAlpha(SDL_BlitInfo * info)
  2404 {
  2405     int width = info->dst_w;
  2406     int height = info->dst_h;
  2407     Uint8 *src = info->src;
  2408     int srcskip = info->src_skip;
  2409     Uint8 *dst = info->dst;
  2410     int dstskip = info->dst_skip;
  2411     SDL_PixelFormat *srcfmt = info->src_fmt;
  2412     int srcbpp = srcfmt->BytesPerPixel;
  2413     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2414     int dstbpp = dstfmt->BytesPerPixel;
  2415     int c;
  2416 
  2417 #if HAVE_FAST_WRITE_INT8
  2418     /* Blit with permutation: 4->4 */
  2419     if (srcbpp == 4 && dstbpp == 4 &&
  2420         srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
  2421         dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
  2422 
  2423         /* Find the appropriate permutation */
  2424         int p0, p1, p2, p3;
  2425         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
  2426 
  2427         while (height--) {
  2428             /* *INDENT-OFF* */
  2429             DUFFS_LOOP(
  2430             {
  2431                 dst[0] = src[p0];
  2432                 dst[1] = src[p1];
  2433                 dst[2] = src[p2];
  2434                 dst[3] = src[p3];
  2435                 src += 4;
  2436                 dst += 4;
  2437             }, width);
  2438             /* *INDENT-ON* */
  2439             src += srcskip;
  2440             dst += dstskip;
  2441         }
  2442         return;
  2443     }
  2444 #endif
  2445 
  2446     while (height--) {
  2447         for (c = width; c; --c) {
  2448             Uint32 Pixel;
  2449             unsigned sR, sG, sB, sA;
  2450             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2451             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2452             dst += dstbpp;
  2453             src += srcbpp;
  2454         }
  2455         src += srcskip;
  2456         dst += dstskip;
  2457     }
  2458 }
  2459 
  2460 static void
  2461 BlitNto1Key(SDL_BlitInfo * info)
  2462 {
  2463     int width = info->dst_w;
  2464     int height = info->dst_h;
  2465     Uint8 *src = info->src;
  2466     int srcskip = info->src_skip;
  2467     Uint8 *dst = info->dst;
  2468     int dstskip = info->dst_skip;
  2469     SDL_PixelFormat *srcfmt = info->src_fmt;
  2470     const Uint8 *palmap = info->table;
  2471     Uint32 ckey = info->colorkey;
  2472     Uint32 rgbmask = ~srcfmt->Amask;
  2473     int srcbpp;
  2474     Uint32 Pixel;
  2475     unsigned sR, sG, sB;
  2476 
  2477     /* Set up some basic variables */
  2478     srcbpp = srcfmt->BytesPerPixel;
  2479     ckey &= rgbmask;
  2480 
  2481     if (palmap == NULL) {
  2482         while (height--) {
  2483             /* *INDENT-OFF* */
  2484             DUFFS_LOOP(
  2485             {
  2486                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2487                                 sR, sG, sB);
  2488                 if ( (Pixel & rgbmask) != ckey ) {
  2489                     /* Pack RGB into 8bit pixel */
  2490                     *dst = (Uint8)(((sR>>5)<<(3+2))|
  2491                                    ((sG>>5)<<(2)) |
  2492                                    ((sB>>6)<<(0)));
  2493                 }
  2494                 dst++;
  2495                 src += srcbpp;
  2496             },
  2497             width);
  2498             /* *INDENT-ON* */
  2499             src += srcskip;
  2500             dst += dstskip;
  2501         }
  2502     } else {
  2503         while (height--) {
  2504             /* *INDENT-OFF* */
  2505             DUFFS_LOOP(
  2506             {
  2507                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2508                                 sR, sG, sB);
  2509                 if ( (Pixel & rgbmask) != ckey ) {
  2510                     /* Pack RGB into 8bit pixel */
  2511                     *dst = (Uint8)palmap[((sR>>5)<<(3+2))|
  2512                                          ((sG>>5)<<(2))  |
  2513                                          ((sB>>6)<<(0))  ];
  2514                 }
  2515                 dst++;
  2516                 src += srcbpp;
  2517             },
  2518             width);
  2519             /* *INDENT-ON* */
  2520             src += srcskip;
  2521             dst += dstskip;
  2522         }
  2523     }
  2524 }
  2525 
  2526 static void
  2527 Blit2to2Key(SDL_BlitInfo * info)
  2528 {
  2529     int width = info->dst_w;
  2530     int height = info->dst_h;
  2531     Uint16 *srcp = (Uint16 *) info->src;
  2532     int srcskip = info->src_skip;
  2533     Uint16 *dstp = (Uint16 *) info->dst;
  2534     int dstskip = info->dst_skip;
  2535     Uint32 ckey = info->colorkey;
  2536     Uint32 rgbmask = ~info->src_fmt->Amask;
  2537 
  2538     /* Set up some basic variables */
  2539     srcskip /= 2;
  2540     dstskip /= 2;
  2541     ckey &= rgbmask;
  2542 
  2543     while (height--) {
  2544         /* *INDENT-OFF* */
  2545         DUFFS_LOOP(
  2546         {
  2547             if ( (*srcp & rgbmask) != ckey ) {
  2548                 *dstp = *srcp;
  2549             }
  2550             dstp++;
  2551             srcp++;
  2552         },
  2553         width);
  2554         /* *INDENT-ON* */
  2555         srcp += srcskip;
  2556         dstp += dstskip;
  2557     }
  2558 }
  2559 
  2560 static void
  2561 BlitNtoNKey(SDL_BlitInfo * info)
  2562 {
  2563     int width = info->dst_w;
  2564     int height = info->dst_h;
  2565     Uint8 *src = info->src;
  2566     int srcskip = info->src_skip;
  2567     Uint8 *dst = info->dst;
  2568     int dstskip = info->dst_skip;
  2569     Uint32 ckey = info->colorkey;
  2570     SDL_PixelFormat *srcfmt = info->src_fmt;
  2571     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2572     int srcbpp = srcfmt->BytesPerPixel;
  2573     int dstbpp = dstfmt->BytesPerPixel;
  2574     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2575     Uint32 rgbmask = ~srcfmt->Amask;
  2576     int sfmt = srcfmt->format;
  2577     int dfmt = dstfmt->format;
  2578 
  2579     /* Set up some basic variables */
  2580     ckey &= rgbmask;
  2581 
  2582     /* BPP 4, same rgb */
  2583     if (srcbpp == 4 && dstbpp == 4 && srcfmt->Rmask == dstfmt->Rmask && srcfmt->Gmask == dstfmt->Gmask && srcfmt->Bmask == dstfmt->Bmask) {
  2584         Uint32 *src32 = (Uint32*)src;
  2585         Uint32 *dst32 = (Uint32*)dst;
  2586 
  2587         if (dstfmt->Amask) {
  2588             /* RGB->RGBA, SET_ALPHA */
  2589             Uint32 mask = info->a << dstfmt->Ashift;
  2590             while (height--) {
  2591                 /* *INDENT-OFF* */
  2592                 DUFFS_LOOP(
  2593                 {
  2594                     if ((*src32 & rgbmask) != ckey) {
  2595                         *dst32 = *src32 | mask;
  2596                     }
  2597                     ++dst32;
  2598                     ++src32;
  2599                 }, width);
  2600                 /* *INDENT-ON* */
  2601                 src32 = (Uint32 *) ((Uint8 *) src32 + srcskip);
  2602                 dst32 = (Uint32 *) ((Uint8 *) dst32 + dstskip);
  2603             }
  2604             return;
  2605         } else {
  2606             /* RGBA->RGB, NO_ALPHA */
  2607             Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2608             while (height--) {
  2609                 /* *INDENT-OFF* */
  2610                 DUFFS_LOOP(
  2611                 {
  2612                     if ((*src32 & rgbmask) != ckey) {
  2613                         *dst32 = *src32 & mask;
  2614                     }
  2615                     ++dst32;
  2616                     ++src32;
  2617                 }, width);
  2618                 /* *INDENT-ON* */
  2619                 src32 = (Uint32 *) ((Uint8 *) src32 + srcskip);
  2620                 dst32 = (Uint32 *) ((Uint8 *) dst32 + dstskip);
  2621             }
  2622             return;
  2623         }
  2624     }
  2625 
  2626 #if HAVE_FAST_WRITE_INT8
  2627     /* Blit with permutation: 4->4 */
  2628     if (srcbpp == 4 && dstbpp == 4 &&
  2629         srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
  2630         dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
  2631 
  2632         /* Find the appropriate permutation */
  2633         int alpha_channel, p0, p1, p2, p3;
  2634         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
  2635 
  2636         while (height--) {
  2637             /* *INDENT-OFF* */
  2638             DUFFS_LOOP(
  2639             {
  2640                 Uint32 *src32 = (Uint32*)src;
  2641 
  2642                 if ((*src32 & rgbmask) != ckey) {
  2643                     dst[0] = src[p0];
  2644                     dst[1] = src[p1];
  2645                     dst[2] = src[p2];
  2646                     dst[3] = src[p3];
  2647                     dst[alpha_channel] = alpha;
  2648                 }
  2649                 src += 4;
  2650                 dst += 4;
  2651             }, width);
  2652             /* *INDENT-ON* */
  2653             src += srcskip;
  2654             dst += dstskip;
  2655         }
  2656         return;
  2657     }
  2658 #endif
  2659 
  2660     /* BPP 3, same rgb triplet */
  2661     if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_RGB24) ||
  2662         (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_BGR24)) {
  2663 
  2664 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2665         Uint8 k0 = ckey & 0xFF;
  2666         Uint8 k1 = (ckey >> 8)  & 0xFF;
  2667         Uint8 k2 = (ckey >> 16) & 0xFF;
  2668 #else
  2669         Uint8 k0 = (ckey >> 16) & 0xFF;
  2670         Uint8 k1 = (ckey >> 8) & 0xFF;
  2671         Uint8 k2 = ckey & 0xFF;
  2672 #endif
  2673 
  2674         while (height--) {
  2675             /* *INDENT-OFF* */
  2676             DUFFS_LOOP(
  2677             {
  2678                 Uint8 s0 = src[0];
  2679                 Uint8 s1 = src[1];
  2680                 Uint8 s2 = src[2];
  2681 
  2682                 if (k0 != s0 || k1 != s1 || k2 != s2) {
  2683                     dst[0] = s0;
  2684                     dst[1] = s1;
  2685                     dst[2] = s2;
  2686                 }
  2687                 src += 3;
  2688                 dst += 3;
  2689             },
  2690             width);
  2691             /* *INDENT-ON* */
  2692             src += srcskip;
  2693             dst += dstskip;
  2694         }
  2695         return;
  2696     }
  2697 
  2698     /* BPP 3, inversed rgb triplet */
  2699     if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_BGR24) ||
  2700         (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_RGB24)) {
  2701 
  2702 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2703         Uint8 k0 = ckey & 0xFF;
  2704         Uint8 k1 = (ckey >> 8)  & 0xFF;
  2705         Uint8 k2 = (ckey >> 16) & 0xFF;
  2706 #else
  2707         Uint8 k0 = (ckey >> 16) & 0xFF;
  2708         Uint8 k1 = (ckey >> 8) & 0xFF;
  2709         Uint8 k2 = ckey & 0xFF;
  2710 #endif
  2711 
  2712         while (height--) {
  2713             /* *INDENT-OFF* */
  2714             DUFFS_LOOP(
  2715             {
  2716                 Uint8 s0 = src[0];
  2717                 Uint8 s1 = src[1];
  2718                 Uint8 s2 = src[2];
  2719                 if (k0 != s0 || k1 != s1 || k2 != s2) {
  2720                     /* Inversed RGB */
  2721                     dst[0] = s2;
  2722                     dst[1] = s1;
  2723                     dst[2] = s0;
  2724                 }
  2725                 src += 3;
  2726                 dst += 3;
  2727             },
  2728             width);
  2729             /* *INDENT-ON* */
  2730             src += srcskip;
  2731             dst += dstskip;
  2732         }
  2733         return;
  2734     }
  2735 
  2736     /* Blit with permutation: 4->3 */
  2737     if (srcbpp == 4 && dstbpp == 3 &&
  2738         srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
  2739 
  2740         /* Find the appropriate permutation */
  2741         int p0, p1, p2, p3;
  2742         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
  2743 
  2744         while (height--) {
  2745             /* *INDENT-OFF* */
  2746             DUFFS_LOOP(
  2747             {
  2748                 Uint32 *src32 = (Uint32*)src;
  2749                 if ((*src32 & rgbmask) != ckey) {
  2750                     dst[0] = src[p0];
  2751                     dst[1] = src[p1];
  2752                     dst[2] = src[p2];
  2753                 }
  2754                 src += 4;
  2755                 dst += 3;
  2756             }, width);
  2757             /* *INDENT-ON* */
  2758             src += srcskip;
  2759             dst += dstskip;
  2760         }
  2761         return;
  2762     }
  2763 
  2764 #if HAVE_FAST_WRITE_INT8
  2765     /* Blit with permutation: 3->4 */
  2766     if (srcbpp == 3 && dstbpp == 4 &&
  2767         dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
  2768 
  2769 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2770         Uint8 k0 = ckey & 0xFF;
  2771         Uint8 k1 = (ckey >> 8)  & 0xFF;
  2772         Uint8 k2 = (ckey >> 16) & 0xFF;
  2773 #else
  2774         Uint8 k0 = (ckey >> 16) & 0xFF;
  2775         Uint8 k1 = (ckey >> 8) & 0xFF;
  2776         Uint8 k2 = ckey  & 0xFF;
  2777 #endif
  2778 
  2779         /* Find the appropriate permutation */
  2780         int alpha_channel, p0, p1, p2, p3;
  2781         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
  2782 
  2783         while (height--) {
  2784             /* *INDENT-OFF* */
  2785             DUFFS_LOOP(
  2786             {
  2787                 Uint8 s0 = src[0];
  2788                 Uint8 s1 = src[1];
  2789                 Uint8 s2 = src[2];
  2790 
  2791                 if (k0 != s0 || k1 != s1 || k2 != s2) {
  2792                     dst[0] = src[p0];
  2793                     dst[1] = src[p1];
  2794                     dst[2] = src[p2];
  2795                     dst[3] = src[p3];
  2796                     dst[alpha_channel] = alpha;
  2797                 }
  2798                 src += 3;
  2799                 dst += 4;
  2800             }, width);
  2801             /* *INDENT-ON* */
  2802             src += srcskip;
  2803             dst += dstskip;
  2804         }
  2805         return;
  2806     }
  2807 #endif
  2808 
  2809     while (height--) {
  2810         /* *INDENT-OFF* */
  2811         DUFFS_LOOP(
  2812         {
  2813             Uint32 Pixel;
  2814             unsigned sR;
  2815             unsigned sG;
  2816             unsigned sB;
  2817             RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2818             if ( (Pixel & rgbmask) != ckey ) {
  2819                 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2820                 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2821             }
  2822             dst += dstbpp;
  2823             src += srcbpp;
  2824         },
  2825         width);
  2826         /* *INDENT-ON* */
  2827         src += srcskip;
  2828         dst += dstskip;
  2829     }
  2830 }
  2831 
  2832 static void
  2833 BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
  2834 {
  2835     int width = info->dst_w;
  2836     int height = info->dst_h;
  2837     Uint8 *src = info->src;
  2838     int srcskip = info->src_skip;
  2839     Uint8 *dst = info->dst;
  2840     int dstskip = info->dst_skip;
  2841     Uint32 ckey = info->colorkey;
  2842     SDL_PixelFormat *srcfmt = info->src_fmt;
  2843     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2844     Uint32 rgbmask = ~srcfmt->Amask;
  2845 
  2846     Uint8 srcbpp;
  2847     Uint8 dstbpp;
  2848     Uint32 Pixel;
  2849     unsigned sR, sG, sB, sA;
  2850 
  2851     /* Set up some basic variables */
  2852     srcbpp = srcfmt->BytesPerPixel;
  2853     dstbpp = dstfmt->BytesPerPixel;
  2854     ckey &= rgbmask;
  2855 
  2856     /* Fastpath: same source/destination format, with Amask, bpp 32, loop is vectorized. ~10x faster */
  2857     if (srcfmt->format == dstfmt->format) {
  2858 
  2859         if (srcfmt->format == SDL_PIXELFORMAT_ARGB8888 ||
  2860             srcfmt->format == SDL_PIXELFORMAT_ABGR8888 ||
  2861             srcfmt->format == SDL_PIXELFORMAT_BGRA8888 ||
  2862             srcfmt->format == SDL_PIXELFORMAT_RGBA8888) {
  2863 
  2864             Uint32 *src32 = (Uint32*)src;
  2865             Uint32 *dst32 = (Uint32*)dst;
  2866             while (height--) {
  2867                 /* *INDENT-OFF* */
  2868                 DUFFS_LOOP(
  2869                 {
  2870                     if ((*src32 & rgbmask) != ckey) {
  2871                         *dst32 = *src32;
  2872                     }
  2873                     ++src32;
  2874                     ++dst32;
  2875                 },
  2876                 width);
  2877                 /* *INDENT-ON* */
  2878                 src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
  2879                 dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
  2880             }
  2881         }
  2882         return;
  2883     }
  2884 
  2885 #if HAVE_FAST_WRITE_INT8
  2886     /* Blit with permutation: 4->4 */
  2887     if (srcbpp == 4 && dstbpp == 4 &&
  2888         srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
  2889         dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
  2890 
  2891         /* Find the appropriate permutation */
  2892         int p0, p1, p2, p3;
  2893         get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
  2894 
  2895         while (height--) {
  2896             /* *INDENT-OFF* */
  2897             DUFFS_LOOP(
  2898             {
  2899                 Uint32 *src32 = (Uint32*)src;
  2900                 if ((*src32 & rgbmask) != ckey) {
  2901                     dst[0] = src[p0];
  2902                     dst[1] = src[p1];
  2903                     dst[2] = src[p2];
  2904                     dst[3] = src[p3];
  2905                 }
  2906                 src += 4;
  2907                 dst += 4;
  2908             }, width);
  2909             /* *INDENT-ON* */
  2910             src += srcskip;
  2911             dst += dstskip;
  2912         }
  2913         return;
  2914     }
  2915 #endif
  2916 
  2917     while (height--) {
  2918         /* *INDENT-OFF* */
  2919         DUFFS_LOOP(
  2920         {
  2921             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2922             if ( (Pixel & rgbmask) != ckey ) {
  2923                   ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2924             }
  2925             dst += dstbpp;
  2926             src += srcbpp;
  2927         },
  2928         width);
  2929         /* *INDENT-ON* */
  2930         src += srcskip;
  2931         dst += dstskip;
  2932     }
  2933 }
  2934 
  2935 /* Special optimized blit for ARGB 2-10-10-10 --> RGBA */
  2936 static void
  2937 Blit2101010toN(SDL_BlitInfo * info)
  2938 {
  2939     int width = info->dst_w;
  2940     int height = info->dst_h;
  2941     Uint8 *src = info->src;
  2942     int srcskip = info->src_skip;
  2943     Uint8 *dst = info->dst;
  2944     int dstskip = info->dst_skip;
  2945     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2946     int dstbpp = dstfmt->BytesPerPixel;
  2947     Uint32 Pixel;
  2948     unsigned sR, sG, sB, sA;
  2949 
  2950     while (height--) {
  2951         /* *INDENT-OFF* */
  2952         DUFFS_LOOP(
  2953         {
  2954             Pixel = *(Uint32 *)src;
  2955             RGBA_FROM_ARGB2101010(Pixel, sR, sG, sB, sA);
  2956             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2957             dst += dstbpp;
  2958             src += 4;
  2959         },
  2960         width);
  2961         /* *INDENT-ON* */
  2962         src += srcskip;
  2963         dst += dstskip;
  2964     }
  2965 }
  2966 
  2967 /* Special optimized blit for RGBA --> ARGB 2-10-10-10 */
  2968 static void
  2969 BlitNto2101010(SDL_BlitInfo * info)
  2970 {
  2971     int width = info->dst_w;
  2972     int height = info->dst_h;
  2973     Uint8 *src = info->src;
  2974     int srcskip = info->src_skip;
  2975     Uint8 *dst = info->dst;
  2976     int dstskip = info->dst_skip;
  2977     SDL_PixelFormat *srcfmt = info->src_fmt;
  2978     int srcbpp = srcfmt->BytesPerPixel;
  2979     Uint32 Pixel;
  2980     unsigned sR, sG, sB, sA;
  2981 
  2982     while (height--) {
  2983         /* *INDENT-OFF* */
  2984         DUFFS_LOOP(
  2985         {
  2986             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2987             ARGB2101010_FROM_RGBA(Pixel, sR, sG, sB, sA);
  2988             *(Uint32 *)dst = Pixel;
  2989             dst += 4;
  2990             src += srcbpp;
  2991         },
  2992         width);
  2993         /* *INDENT-ON* */
  2994         src += srcskip;
  2995         dst += dstskip;
  2996     }
  2997 }
  2998 
  2999 /* Blit_3or4_to_3or4__same_rgb: 3 or 4 bpp, same RGB triplet */
  3000 static void
  3001 Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo * info)
  3002 {
  3003     int width = info->dst_w;
  3004     int height = info->dst_h;
  3005     Uint8 *src = info->src;
  3006     int srcskip = info->src_skip;
  3007     Uint8 *dst = info->dst;
  3008     int dstskip = info->dst_skip;
  3009     SDL_PixelFormat *srcfmt = info->src_fmt;
  3010     int srcbpp = srcfmt->BytesPerPixel;
  3011     SDL_PixelFormat *dstfmt = info->dst_fmt;
  3012     int dstbpp = dstfmt->BytesPerPixel;
  3013 
  3014     if (dstfmt->Amask) {
  3015         /* SET_ALPHA */
  3016         Uint32 mask = info->a << dstfmt->Ashift;
  3017 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  3018         int i0 = 0, i1 = 1, i2 = 2;
  3019 #else
  3020         int i0 = srcbpp - 1 - 0;
  3021         int i1 = srcbpp - 1 - 1;
  3022         int i2 = srcbpp - 1 - 2;
  3023 #endif
  3024         while (height--) {
  3025             /* *INDENT-OFF* */
  3026             DUFFS_LOOP(
  3027             {
  3028                 Uint32 *dst32 = (Uint32*)dst;
  3029                 Uint8 s0 = src[i0];
  3030                 Uint8 s1 = src[i1];
  3031                 Uint8 s2 = src[i2];
  3032                 *dst32 = (s0) | (s1 << 8) | (s2 << 16) | mask;
  3033                 dst += 4;
  3034                 src += srcbpp;
  3035             }, width);
  3036             /* *INDENT-ON* */
  3037             src += srcskip;
  3038             dst += dstskip;
  3039         }
  3040     } else {
  3041         /* NO_ALPHA */
  3042 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  3043         int i0 = 0, i1 = 1, i2 = 2;
  3044         int j0 = 0, j1 = 1, j2 = 2;
  3045 #else
  3046         int i0 = srcbpp - 1 - 0;
  3047         int i1 = srcbpp - 1 - 1;
  3048         int i2 = srcbpp - 1 - 2;
  3049         int j0 = dstbpp - 1 - 0;
  3050         int j1 = dstbpp - 1 - 1;
  3051         int j2 = dstbpp - 1 - 2;
  3052 #endif
  3053         while (height--) {
  3054             /* *INDENT-OFF* */
  3055             DUFFS_LOOP(
  3056             {
  3057                 Uint8 s0 = src[i0];
  3058                 Uint8 s1 = src[i1];
  3059                 Uint8 s2 = src[i2];
  3060                 dst[j0] = s0;
  3061                 dst[j1] = s1;
  3062                 dst[j2] = s2;
  3063                 dst += dstbpp;
  3064                 src += srcbpp;
  3065             }, width);
  3066             /* *INDENT-ON* */
  3067             src += srcskip;
  3068             dst += dstskip;
  3069         }
  3070     }
  3071     return;
  3072 }
  3073 
  3074 /* Blit_3or4_to_3or4__inversed_rgb: 3 or 4 bpp, inversed RGB triplet */
  3075 static void
  3076 Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo * info)
  3077 {
  3078     int width = info->dst_w;
  3079     int height = info->dst_h;
  3080     Uint8 *src = info->src;
  3081     int srcskip = info->src_skip;
  3082     Uint8 *dst = info->dst;
  3083     int dstskip = info->dst_skip;
  3084     SDL_PixelFormat *srcfmt = info->src_fmt;
  3085     int srcbpp = srcfmt->BytesPerPixel;
  3086     SDL_PixelFormat *dstfmt = info->dst_fmt;
  3087     int dstbpp = dstfmt->BytesPerPixel;
  3088 
  3089     if (dstfmt->Amask) {
  3090         if (srcfmt->Amask) {
  3091             /* COPY_ALPHA */
  3092             /* Only to switch ABGR8888 <-> ARGB8888 */
  3093             while (height--) {
  3094 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  3095                 int i0 = 0, i1 = 1, i2 = 2, i3 = 3;
  3096 #else
  3097                 int i0 = 3, i1 = 2, i2 = 1, i3 = 0;
  3098 #endif
  3099                 /* *INDENT-OFF* */
  3100                 DUFFS_LOOP(
  3101                 {
  3102                     Uint32 *dst32 = (Uint32*)dst;
  3103                     Uint8 s0 = src[i0];
  3104                     Uint8 s1 = src[i1];
  3105                     Uint8 s2 = src[i2];
  3106                     Uint32 alphashift = src[i3] << dstfmt->Ashift;
  3107                     /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
  3108                     *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
  3109                     dst += 4;
  3110                     src += 4;
  3111                 }, width);
  3112                 /* *INDENT-ON* */
  3113                 src += srcskip;
  3114                 dst += dstskip;
  3115             }
  3116         } else {
  3117             /* SET_ALPHA */
  3118             Uint32 mask = info->a << dstfmt->Ashift;
  3119 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  3120             int i0 = 0, i1 = 1, i2 = 2;
  3121 #else
  3122             int i0 = srcbpp - 1 - 0;
  3123             int i1 = srcbpp - 1 - 1;
  3124             int i2 = srcbpp - 1 - 2;
  3125 #endif
  3126             while (height--) {
  3127                 /* *INDENT-OFF* */
  3128                 DUFFS_LOOP(
  3129                 {
  3130                     Uint32 *dst32 = (Uint32*)dst;
  3131                     Uint8 s0 = src[i0];
  3132                     Uint8 s1 = src[i1];
  3133                     Uint8 s2 = src[i2];
  3134                     /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
  3135                     *dst32 = (s0 << 16) | (s1 << 8) | (s2) | mask;
  3136                     dst += 4;
  3137                     src += srcbpp;
  3138                 }, width);
  3139                 /* *INDENT-ON* */
  3140                 src += srcskip;
  3141                 dst += dstskip;
  3142             }
  3143         }
  3144     } else {
  3145         /* NO_ALPHA */
  3146 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  3147         int i0 = 0, i1 = 1, i2 = 2;
  3148         int j0 = 2, j1 = 1, j2 = 0;
  3149 #else
  3150         int i0 = srcbpp - 1 - 0;
  3151         int i1 = srcbpp - 1 - 1;
  3152         int i2 = srcbpp - 1 - 2;
  3153         int j0 = dstbpp - 1 - 2;
  3154         int j1 = dstbpp - 1 - 1;
  3155         int j2 = dstbpp - 1 - 0;
  3156 #endif
  3157         while (height--) {
  3158             /* *INDENT-OFF* */
  3159             DUFFS_LOOP(
  3160             {
  3161                 Uint8 s0 = src[i0];
  3162                 Uint8 s1 = src[i1];
  3163                 Uint8 s2 = src[i2];
  3164                 /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
  3165                 dst[j0] = s0;
  3166                 dst[j1] = s1;
  3167                 dst[j2] = s2;
  3168                 dst += dstbpp;
  3169                 src += srcbpp;
  3170             }, width);
  3171             /* *INDENT-ON* */
  3172             src += srcskip;
  3173             dst += dstskip;
  3174         }
  3175     }
  3176     return;
  3177 }
  3178 
  3179 /* Normal N to N optimized blitters */
  3180 #define NO_ALPHA   1
  3181 #define SET_ALPHA  2
  3182 #define COPY_ALPHA 4
  3183 struct blit_table
  3184 {
  3185     Uint32 srcR, srcG, srcB;
  3186     int dstbpp;
  3187     Uint32 dstR, dstG, dstB;
  3188     Uint32 blit_features;
  3189     SDL_BlitFunc blitfunc;
  3190     Uint32 alpha;  /* bitwise NO_ALPHA, SET_ALPHA, COPY_ALPHA */
  3191 };
  3192 static const struct blit_table normal_blit_1[] = {
  3193     /* Default for 8-bit RGB source, never optimized */
  3194     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  3195 };
  3196 
  3197 static const struct blit_table normal_blit_2[] = {
  3198 #if SDL_ALTIVEC_BLITTERS
  3199     /* has-altivec */
  3200     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
  3201      2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  3202     {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
  3203      2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  3204 #endif
  3205     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
  3206      0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  3207     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
  3208      0, Blit_RGB565_ABGR8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  3209     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000, 0x0000FF00,
  3210      0, Blit_RGB565_RGBA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  3211     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000,
  3212      0, Blit_RGB565_BGRA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  3213 
  3214     /* Default for 16-bit RGB source, used if no other blitter matches */
  3215     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  3216 };
  3217 
  3218 static const struct blit_table normal_blit_3[] = {
  3219     /* 3->4 with same rgb triplet */
  3220     {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
  3221      0, Blit_3or4_to_3or4__same_rgb,
  3222 #if HAVE_FAST_WRITE_INT8
  3223         NO_ALPHA |
  3224 #endif
  3225         SET_ALPHA},
  3226     {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
  3227      0, Blit_3or4_to_3or4__same_rgb,
  3228 #if HAVE_FAST_WRITE_INT8
  3229         NO_ALPHA |
  3230 #endif
  3231         SET_ALPHA},
  3232     /* 3->4 with inversed rgb triplet */
  3233     {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
  3234      0, Blit_3or4_to_3or4__inversed_rgb,
  3235 #if HAVE_FAST_WRITE_INT8
  3236         NO_ALPHA |
  3237 #endif
  3238         SET_ALPHA},
  3239     {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
  3240      0, Blit_3or4_to_3or4__inversed_rgb,
  3241 #if HAVE_FAST_WRITE_INT8
  3242         NO_ALPHA |
  3243 #endif
  3244         SET_ALPHA},
  3245     /* 3->3 to switch RGB 24 <-> BGR 24 */
  3246     {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
  3247      0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
  3248     {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
  3249      0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
  3250     /* Default for 24-bit RGB source, never optimized */
  3251     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  3252 };
  3253 
  3254 static const struct blit_table normal_blit_4[] = {
  3255 #if SDL_ALTIVEC_BLITTERS
  3256     /* has-altivec | dont-use-prefetch */
  3257     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
  3258      6, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  3259     /* has-altivec */
  3260     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
  3261      2, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  3262     /* has-altivec */
  3263     {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
  3264      2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
  3265 #endif
  3266     /* 4->3 with same rgb triplet */
  3267     {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
  3268      0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
  3269     {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
  3270      0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
  3271     /* 4->3 with inversed rgb triplet */
  3272     {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
  3273      0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
  3274     {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
  3275      0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
  3276     /* 4->4 with inversed rgb triplet, and COPY_ALPHA to switch ABGR8888 <-> ARGB8888 */
  3277     {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
  3278      0, Blit_3or4_to_3or4__inversed_rgb,
  3279 #if HAVE_FAST_WRITE_INT8
  3280         NO_ALPHA |
  3281 #endif
  3282         SET_ALPHA | COPY_ALPHA},
  3283     {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
  3284      0, Blit_3or4_to_3or4__inversed_rgb,
  3285 #if HAVE_FAST_WRITE_INT8
  3286         NO_ALPHA |
  3287 #endif
  3288         SET_ALPHA | COPY_ALPHA},
  3289     /* RGB 888 and RGB 565 */
  3290     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
  3291      0, Blit_RGB888_RGB565, NO_ALPHA},
  3292     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
  3293      0, Blit_RGB888_RGB555, NO_ALPHA},
  3294     /* Default for 32-bit RGB source, used if no other blitter matches */
  3295     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  3296 };
  3297 
  3298 static const struct blit_table *const normal_blit[] = {
  3299     normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  3300 };
  3301 
  3302 /* Mask matches table, or table entry is zero */
  3303 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  3304 
  3305 SDL_BlitFunc
  3306 SDL_CalculateBlitN(SDL_Surface * surface)
  3307 {
  3308     SDL_PixelFormat *srcfmt;
  3309     SDL_PixelFormat *dstfmt;
  3310     const struct blit_table *table;
  3311     int which;
  3312     SDL_BlitFunc blitfun;
  3313 
  3314     /* Set up data for choosing the blit */
  3315     srcfmt = surface->format;
  3316     dstfmt = surface->map->dst->format;
  3317 
  3318     /* We don't support destinations less than 8-bits */
  3319     if (dstfmt->BitsPerPixel < 8) {
  3320         return (NULL);
  3321     }
  3322 
  3323     switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
  3324     case 0:
  3325         blitfun = NULL;
  3326         if (dstfmt->BitsPerPixel == 8) {
  3327             if ((srcfmt->BytesPerPixel == 4) &&
  3328                 (srcfmt->Rmask == 0x00FF0000) &&
  3329                 (srcfmt->Gmask == 0x0000FF00) &&
  3330                 (srcfmt->Bmask == 0x000000FF)) {
  3331                 blitfun = Blit_RGB888_index8;
  3332             } else if ((srcfmt->BytesPerPixel == 4) &&
  3333                 (srcfmt->Rmask == 0x3FF00000) &&
  3334                 (srcfmt->Gmask == 0x000FFC00) &&
  3335                 (srcfmt->Bmask == 0x000003FF)) {
  3336                 blitfun = Blit_RGB101010_index8;
  3337             } else {
  3338                 blitfun = BlitNto1;
  3339             }
  3340         } else {
  3341             /* Now the meat, choose the blitter we want */
  3342             Uint32 a_need = NO_ALPHA;
  3343             if (dstfmt->Amask)
  3344                 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  3345             table = normal_blit[srcfmt->BytesPerPixel - 1];
  3346             for (which = 0; table[which].dstbpp; ++which) {
  3347                 if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
  3348                     MASKOK(srcfmt->Gmask, table[which].srcG) &&
  3349                     MASKOK(srcfmt->Bmask, table[which].srcB) &&
  3350                     MASKOK(dstfmt->Rmask, table[which].dstR) &&
  3351                     MASKOK(dstfmt->Gmask, table[which].dstG) &&
  3352                     MASKOK(dstfmt->Bmask, table[which].dstB) &&
  3353                     dstfmt->BytesPerPixel == table[which].dstbpp &&
  3354                     (a_need & table[which].alpha) == a_need &&
  3355                     ((table[which].blit_features & GetBlitFeatures()) ==
  3356                      table[which].blit_features))
  3357                     break;
  3358             }
  3359             blitfun = table[which].blitfunc;
  3360 
  3361             if (blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  3362                 if (srcfmt->format == SDL_PIXELFORMAT_ARGB2101010) {
  3363                     blitfun = Blit2101010toN;
  3364                 } else if (dstfmt->format == SDL_PIXELFORMAT_ARGB2101010) {
  3365                     blitfun = BlitNto2101010;
  3366                 } else if (srcfmt->BytesPerPixel == 4 &&
  3367                             dstfmt->BytesPerPixel == 4 &&
  3368                             srcfmt->Rmask == dstfmt->Rmask &&
  3369                             srcfmt->Gmask == dstfmt->Gmask &&
  3370                             srcfmt->Bmask == dstfmt->Bmask) {
  3371                     if (a_need == COPY_ALPHA) {
  3372                         if (srcfmt->Amask == dstfmt->Amask) {
  3373                             /* Fastpath C fallback: 32bit RGBA<->RGBA blit with matching RGBA */
  3374                             blitfun = Blit4to4CopyAlpha;
  3375                         } else {
  3376                             blitfun = BlitNtoNCopyAlpha;
  3377                         }
  3378                     } else {
  3379                         /* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  3380                         blitfun = Blit4to4MaskAlpha;
  3381                     }
  3382                 } else if (a_need == COPY_ALPHA) {
  3383                     blitfun = BlitNtoNCopyAlpha;
  3384                 }
  3385             }
  3386         }
  3387         return (blitfun);
  3388 
  3389     case SDL_COPY_COLORKEY:
  3390         /* colorkey blit: Here we don't have too many options, mostly
  3391            because RLE is the preferred fast way to deal with this.
  3392            If a particular case turns out to be useful we'll add it. */
  3393 
  3394         if (srcfmt->BytesPerPixel == 2 && surface->map->identity)
  3395             return Blit2to2Key;
  3396         else if (dstfmt->BytesPerPixel == 1)
  3397             return BlitNto1Key;
  3398         else {
  3399 #if SDL_ALTIVEC_BLITTERS
  3400             if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
  3401                 && SDL_HasAltiVec()) {
  3402                 return Blit32to32KeyAltivec;
  3403             } else
  3404 #endif
  3405             if (srcfmt->Amask && dstfmt->Amask) {
  3406                 return BlitNtoNKeyCopyAlpha;
  3407             } else {
  3408                 return BlitNtoNKey;
  3409             }
  3410         }
  3411     }
  3412 
  3413     return NULL;
  3414 }
  3415 
  3416 /* vi: set ts=4 sw=4 expandtab: */