src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Wed, 03 Jun 2009 04:37:27 +0000
changeset 3162 dc1eb82ffdaa
parent 3050 e763aa6db88b
child 3697 f7b03b6838cb
permissions -rw-r--r--
Von: Thomas Zimmermann
Betreff: [SDL] [PATCH] Make static variables const
Datum: Tue, 19 May 2009 19:45:37 +0200

Hi,

this is a set of simple changes which make some of SDL's internal static
arrays constant. The purpose is to shrink the number of write-able
static bytes and thus increase the number of memory pages shared between
SDL applications.

The patch set is against trunk@4513. Each of the attached patch files is
specific to a sub-system. The set is completed by a second mail, because
of the list's 40 KiB limit.

The files readelf-r4513.txt and readelf-const-patch.txt where made by
calling 'readelf -S libSDL.so'. They show the difference in ELF sections
without and with the patch. Some numbers measured on my x86-64:

Before

[13] .rodata PROGBITS 00000000000eaaa0 000eaaa0
0000000000008170 0000000000000000 A 0 0 32
[19] .data.rel.ro PROGBITS 00000000003045e0 001045e0
00000000000023d0 0000000000000000 WA 0 0 32
[23] .data PROGBITS 00000000003076e0 001076e0
0000000000004988 0000000000000000 WA 0 0 32

After

[13] .rodata PROGBITS 00000000000eaaa0 000eaaa0
0000000000009a50 0000000000000000 A 0 0 32
[19] .data.rel.ro PROGBITS 0000000000306040 00106040
0000000000002608 0000000000000000 WA 0 0 32
[23] .data PROGBITS 0000000000309360 00109360
0000000000002e88 0000000000000000 WA 0 0 32

The size of the write-able data section decreased considerably. Some
entries became const-after-relocation, while most of its content went
straight into the read-only data section.

Best regards, Thomas
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2009 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #include "SDL_video.h"
    25 #include "SDL_endian.h"
    26 #include "SDL_cpuinfo.h"
    27 #include "SDL_blit.h"
    28 
    29 /* Functions to blit from N-bit surfaces to other surfaces */
    30 
    31 #if SDL_ALTIVEC_BLITTERS
    32 #if __MWERKS__
    33 #pragma altivec_model on
    34 #endif
    35 #ifdef HAVE_ALTIVEC_H
    36 #include <altivec.h>
    37 #endif
    38 #define assert(X)
    39 #ifdef __MACOSX__
    40 #include <sys/sysctl.h>
    41 static size_t
    42 GetL3CacheSize(void)
    43 {
    44     const char key[] = "hw.l3cachesize";
    45     u_int64_t result = 0;
    46     size_t typeSize = sizeof(result);
    47 
    48 
    49     int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
    50     if (0 != err)
    51         return 0;
    52 
    53     return result;
    54 }
    55 #else
    56 static size_t
    57 GetL3CacheSize(void)
    58 {
    59     /* XXX: Just guess G4 */
    60     return 2097152;
    61 }
    62 #endif /* __MACOSX__ */
    63 
    64 #if (defined(__MACOSX__) && (__GNUC__ < 4))
    65 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    66         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    67 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    68         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    69 #else
    70 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    71         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    72 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    73         (vector unsigned short) { a,b,c,d,e,f,g,h }
    74 #endif
    75 
    76 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    77 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    78                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    79                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    80                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    81                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    82 
    83 #define MAKE8888(dstfmt, r, g, b, a)  \
    84     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    85       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    86       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    87       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    88 
    89 /*
    90  * Data Stream Touch...Altivec cache prefetching.
    91  *
    92  *  Don't use this on a G5...however, the speed boost is very significant
    93  *   on a G4.
    94  */
    95 #define DST_CHAN_SRC 1
    96 #define DST_CHAN_DEST 2
    97 
    98 /* macro to set DST control word value... */
    99 #define DST_CTRL(size, count, stride) \
   100     (((size) << 24) | ((count) << 16) | (stride))
   101 
   102 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
   103     ? vec_lvsl(0, src) \
   104     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   105 
   106 /* Calculate the permute vector used for 32->32 swizzling */
   107 static vector unsigned char
   108 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
   109 {
   110     /*
   111      * We have to assume that the bits that aren't used by other
   112      *  colors is alpha, and it's one complete byte, since some formats
   113      *  leave alpha with a zero mask, but we should still swizzle the bits.
   114      */
   115     /* ARGB */
   116     const static const struct SDL_PixelFormat default_pixel_format = {
   117         NULL, 32, 4,
   118         0, 0, 0, 0,
   119         16, 8, 0, 24,
   120         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000
   121     };
   122     if (!srcfmt) {
   123         srcfmt = &default_pixel_format;
   124     }
   125     if (!dstfmt) {
   126         dstfmt = &default_pixel_format;
   127     }
   128     const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
   129                                                        0x04, 0x04, 0x04, 0x04,
   130                                                        0x08, 0x08, 0x08, 0x08,
   131                                                        0x0C, 0x0C, 0x0C,
   132                                                        0x0C);
   133     vector unsigned char vswiz;
   134     vector unsigned int srcvec;
   135 #define RESHIFT(X) (3 - ((X) >> 3))
   136     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   137     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   138     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   139     Uint32 amask;
   140     /* Use zero for alpha if either surface doesn't have alpha */
   141     if (dstfmt->Amask) {
   142         amask =
   143             ((srcfmt->Amask) ? RESHIFT(srcfmt->
   144                                        Ashift) : 0x10) << (dstfmt->Ashift);
   145     } else {
   146         amask =
   147             0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
   148                           0xFFFFFFFF);
   149     }
   150 #undef RESHIFT
   151     ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
   152     vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
   153     return (vswiz);
   154 }
   155 
   156 static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
   157 static void
   158 Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
   159 {
   160     int height = info->dst_h;
   161     Uint8 *src = (Uint8 *) info->src;
   162     int srcskip = info->src_skip;
   163     Uint8 *dst = (Uint8 *) info->dst;
   164     int dstskip = info->dst_skip;
   165     SDL_PixelFormat *srcfmt = info->src_fmt;
   166     vector unsigned char valpha = vec_splat_u8(0);
   167     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   168     vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
   169                                                     0x00, 0x0a, 0x00, 0x0e,
   170                                                     0x00, 0x12, 0x00, 0x16,
   171                                                     0x00, 0x1a, 0x00, 0x1e);
   172     vector unsigned short v1 = vec_splat_u16(1);
   173     vector unsigned short v3 = vec_splat_u16(3);
   174     vector unsigned short v3f =
   175         VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
   176                           0x003f, 0x003f, 0x003f, 0x003f);
   177     vector unsigned short vfc =
   178         VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
   179                           0x00fc, 0x00fc, 0x00fc, 0x00fc);
   180     vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
   181     vf800 = vec_sl(vf800, vec_splat_u16(8));
   182 
   183     while (height--) {
   184         vector unsigned char valigner;
   185         vector unsigned char voverflow;
   186         vector unsigned char vsrc;
   187 
   188         int width = info->dst_w;
   189         int extrawidth;
   190 
   191         /* do scalar until we can align... */
   192 #define ONE_PIXEL_BLEND(condition, widthvar) \
   193         while (condition) { \
   194             Uint32 Pixel; \
   195             unsigned sR, sG, sB, sA; \
   196             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   197                           sR, sG, sB, sA); \
   198             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   199                                 ((sG << 3) & 0x000007E0) | \
   200                                 ((sB >> 3) & 0x0000001F)); \
   201             dst += 2; \
   202             src += 4; \
   203             widthvar--; \
   204         }
   205 
   206         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   207 
   208         /* After all that work, here's the vector part! */
   209         extrawidth = (width % 8);       /* trailing unaligned stores */
   210         width -= extrawidth;
   211         vsrc = vec_ld(0, src);
   212         valigner = VEC_ALIGNER(src);
   213 
   214         while (width) {
   215             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   216             vector unsigned int vsrc1, vsrc2;
   217             vector unsigned char vdst;
   218 
   219             voverflow = vec_ld(15, src);
   220             vsrc = vec_perm(vsrc, voverflow, valigner);
   221             vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   222             src += 16;
   223             vsrc = voverflow;
   224             voverflow = vec_ld(15, src);
   225             vsrc = vec_perm(vsrc, voverflow, valigner);
   226             vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   227             /* 1555 */
   228             vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
   229             vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
   230             vgpixel = vec_and(vgpixel, vfc);
   231             vgpixel = vec_sl(vgpixel, v3);
   232             vrpixel = vec_sl(vpixel, v1);
   233             vrpixel = vec_and(vrpixel, vf800);
   234             vbpixel = vec_and(vpixel, v3f);
   235             vdst =
   236                 vec_or((vector unsigned char) vrpixel,
   237                        (vector unsigned char) vgpixel);
   238             /* 565 */
   239             vdst = vec_or(vdst, (vector unsigned char) vbpixel);
   240             vec_st(vdst, 0, dst);
   241 
   242             width -= 8;
   243             src += 16;
   244             dst += 16;
   245             vsrc = voverflow;
   246         }
   247 
   248         assert(width == 0);
   249 
   250         /* do scalar until we can align... */
   251         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   252 #undef ONE_PIXEL_BLEND
   253 
   254         src += srcskip;         /* move to next row, accounting for pitch. */
   255         dst += dstskip;
   256     }
   257 
   258 
   259 }
   260 
   261 static void
   262 Blit_RGB565_32Altivec(SDL_BlitInfo * info)
   263 {
   264     int height = info->dst_h;
   265     Uint8 *src = (Uint8 *) info->src;
   266     int srcskip = info->src_skip;
   267     Uint8 *dst = (Uint8 *) info->dst;
   268     int dstskip = info->dst_skip;
   269     SDL_PixelFormat *srcfmt = info->src_fmt;
   270     SDL_PixelFormat *dstfmt = info->dst_fmt;
   271     unsigned alpha;
   272     vector unsigned char valpha;
   273     vector unsigned char vpermute;
   274     vector unsigned short vf800;
   275     vector unsigned int v8 = vec_splat_u32(8);
   276     vector unsigned int v16 = vec_add(v8, v8);
   277     vector unsigned short v2 = vec_splat_u16(2);
   278     vector unsigned short v3 = vec_splat_u16(3);
   279     /* 
   280        0x10 - 0x1f is the alpha
   281        0x00 - 0x0e evens are the red
   282        0x01 - 0x0f odds are zero
   283      */
   284     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   285                                                        0x10, 0x02, 0x01, 0x01,
   286                                                        0x10, 0x04, 0x01, 0x01,
   287                                                        0x10, 0x06, 0x01,
   288                                                        0x01);
   289     vector unsigned char vredalpha2 =
   290         (vector unsigned
   291          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   292         );
   293     /*
   294        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   295        0x11 - 0x0f odds are blue
   296      */
   297     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   298                                                    0x04, 0x05, 0x06, 0x13,
   299                                                    0x08, 0x09, 0x0a, 0x15,
   300                                                    0x0c, 0x0d, 0x0e, 0x17);
   301     vector unsigned char vblue2 =
   302         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   303         );
   304     /*
   305        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   306        0x10 - 0x0e evens are green
   307      */
   308     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   309                                                     0x04, 0x05, 0x12, 0x07,
   310                                                     0x08, 0x09, 0x14, 0x0b,
   311                                                     0x0c, 0x0d, 0x16, 0x0f);
   312     vector unsigned char vgreen2 =
   313         (vector unsigned
   314          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   315         );
   316 
   317 
   318     assert(srcfmt->BytesPerPixel == 2);
   319     assert(dstfmt->BytesPerPixel == 4);
   320 
   321     vf800 = (vector unsigned short) vec_splat_u8(-7);
   322     vf800 = vec_sl(vf800, vec_splat_u16(8));
   323 
   324     if (dstfmt->Amask && info->a) {
   325         ((unsigned char *) &valpha)[0] = alpha = info->a;
   326         valpha = vec_splat(valpha, 0);
   327     } else {
   328         alpha = 0;
   329         valpha = vec_splat_u8(0);
   330     }
   331 
   332     vpermute = calc_swizzle32(NULL, dstfmt);
   333     while (height--) {
   334         vector unsigned char valigner;
   335         vector unsigned char voverflow;
   336         vector unsigned char vsrc;
   337 
   338         int width = info->dst_w;
   339         int extrawidth;
   340 
   341         /* do scalar until we can align... */
   342 #define ONE_PIXEL_BLEND(condition, widthvar) \
   343         while (condition) { \
   344             unsigned sR, sG, sB; \
   345             unsigned short Pixel = *((unsigned short *)src); \
   346             sR = (Pixel >> 8) & 0xf8; \
   347             sG = (Pixel >> 3) & 0xfc; \
   348             sB = (Pixel << 3) & 0xf8; \
   349             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   350             src += 2; \
   351             dst += 4; \
   352             widthvar--; \
   353         }
   354         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   355 
   356         /* After all that work, here's the vector part! */
   357         extrawidth = (width % 8);       /* trailing unaligned stores */
   358         width -= extrawidth;
   359         vsrc = vec_ld(0, src);
   360         valigner = VEC_ALIGNER(src);
   361 
   362         while (width) {
   363             vector unsigned short vR, vG, vB;
   364             vector unsigned char vdst1, vdst2;
   365 
   366             voverflow = vec_ld(15, src);
   367             vsrc = vec_perm(vsrc, voverflow, valigner);
   368 
   369             vR = vec_and((vector unsigned short) vsrc, vf800);
   370             vB = vec_sl((vector unsigned short) vsrc, v3);
   371             vG = vec_sl(vB, v2);
   372 
   373             vdst1 =
   374                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   375                                                 valpha, vredalpha1);
   376             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   377             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   378             vdst1 = vec_perm(vdst1, valpha, vpermute);
   379             vec_st(vdst1, 0, dst);
   380 
   381             vdst2 =
   382                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   383                                                 valpha, vredalpha2);
   384             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   385             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   386             vdst2 = vec_perm(vdst2, valpha, vpermute);
   387             vec_st(vdst2, 16, dst);
   388 
   389             width -= 8;
   390             dst += 32;
   391             src += 16;
   392             vsrc = voverflow;
   393         }
   394 
   395         assert(width == 0);
   396 
   397 
   398         /* do scalar until we can align... */
   399         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   400 #undef ONE_PIXEL_BLEND
   401 
   402         src += srcskip;         /* move to next row, accounting for pitch. */
   403         dst += dstskip;
   404     }
   405 
   406 }
   407 
   408 
   409 static void
   410 Blit_RGB555_32Altivec(SDL_BlitInfo * info)
   411 {
   412     int height = info->dst_h;
   413     Uint8 *src = (Uint8 *) info->src;
   414     int srcskip = info->src_skip;
   415     Uint8 *dst = (Uint8 *) info->dst;
   416     int dstskip = info->dst_skip;
   417     SDL_PixelFormat *srcfmt = info->src_fmt;
   418     SDL_PixelFormat *dstfmt = info->dst_fmt;
   419     unsigned alpha;
   420     vector unsigned char valpha;
   421     vector unsigned char vpermute;
   422     vector unsigned short vf800;
   423     vector unsigned int v8 = vec_splat_u32(8);
   424     vector unsigned int v16 = vec_add(v8, v8);
   425     vector unsigned short v1 = vec_splat_u16(1);
   426     vector unsigned short v3 = vec_splat_u16(3);
   427     /* 
   428        0x10 - 0x1f is the alpha
   429        0x00 - 0x0e evens are the red
   430        0x01 - 0x0f odds are zero
   431      */
   432     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   433                                                        0x10, 0x02, 0x01, 0x01,
   434                                                        0x10, 0x04, 0x01, 0x01,
   435                                                        0x10, 0x06, 0x01,
   436                                                        0x01);
   437     vector unsigned char vredalpha2 =
   438         (vector unsigned
   439          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   440         );
   441     /*
   442        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   443        0x11 - 0x0f odds are blue
   444      */
   445     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   446                                                    0x04, 0x05, 0x06, 0x13,
   447                                                    0x08, 0x09, 0x0a, 0x15,
   448                                                    0x0c, 0x0d, 0x0e, 0x17);
   449     vector unsigned char vblue2 =
   450         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   451         );
   452     /*
   453        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   454        0x10 - 0x0e evens are green
   455      */
   456     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   457                                                     0x04, 0x05, 0x12, 0x07,
   458                                                     0x08, 0x09, 0x14, 0x0b,
   459                                                     0x0c, 0x0d, 0x16, 0x0f);
   460     vector unsigned char vgreen2 =
   461         (vector unsigned
   462          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   463         );
   464 
   465 
   466     assert(srcfmt->BytesPerPixel == 2);
   467     assert(dstfmt->BytesPerPixel == 4);
   468 
   469     vf800 = (vector unsigned short) vec_splat_u8(-7);
   470     vf800 = vec_sl(vf800, vec_splat_u16(8));
   471 
   472     if (dstfmt->Amask && info->a) {
   473         ((unsigned char *) &valpha)[0] = alpha = info->a;
   474         valpha = vec_splat(valpha, 0);
   475     } else {
   476         alpha = 0;
   477         valpha = vec_splat_u8(0);
   478     }
   479 
   480     vpermute = calc_swizzle32(NULL, dstfmt);
   481     while (height--) {
   482         vector unsigned char valigner;
   483         vector unsigned char voverflow;
   484         vector unsigned char vsrc;
   485 
   486         int width = info->dst_w;
   487         int extrawidth;
   488 
   489         /* do scalar until we can align... */
   490 #define ONE_PIXEL_BLEND(condition, widthvar) \
   491         while (condition) { \
   492             unsigned sR, sG, sB; \
   493             unsigned short Pixel = *((unsigned short *)src); \
   494             sR = (Pixel >> 7) & 0xf8; \
   495             sG = (Pixel >> 2) & 0xf8; \
   496             sB = (Pixel << 3) & 0xf8; \
   497             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   498             src += 2; \
   499             dst += 4; \
   500             widthvar--; \
   501         }
   502         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   503 
   504         /* After all that work, here's the vector part! */
   505         extrawidth = (width % 8);       /* trailing unaligned stores */
   506         width -= extrawidth;
   507         vsrc = vec_ld(0, src);
   508         valigner = VEC_ALIGNER(src);
   509 
   510         while (width) {
   511             vector unsigned short vR, vG, vB;
   512             vector unsigned char vdst1, vdst2;
   513 
   514             voverflow = vec_ld(15, src);
   515             vsrc = vec_perm(vsrc, voverflow, valigner);
   516 
   517             vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
   518             vB = vec_sl((vector unsigned short) vsrc, v3);
   519             vG = vec_sl(vB, v3);
   520 
   521             vdst1 =
   522                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   523                                                 valpha, vredalpha1);
   524             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   525             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   526             vdst1 = vec_perm(vdst1, valpha, vpermute);
   527             vec_st(vdst1, 0, dst);
   528 
   529             vdst2 =
   530                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   531                                                 valpha, vredalpha2);
   532             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   533             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   534             vdst2 = vec_perm(vdst2, valpha, vpermute);
   535             vec_st(vdst2, 16, dst);
   536 
   537             width -= 8;
   538             dst += 32;
   539             src += 16;
   540             vsrc = voverflow;
   541         }
   542 
   543         assert(width == 0);
   544 
   545 
   546         /* do scalar until we can align... */
   547         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   548 #undef ONE_PIXEL_BLEND
   549 
   550         src += srcskip;         /* move to next row, accounting for pitch. */
   551         dst += dstskip;
   552     }
   553 
   554 }
   555 
   556 static void BlitNtoNKey(SDL_BlitInfo * info);
   557 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
   558 static void
   559 Blit32to32KeyAltivec(SDL_BlitInfo * info)
   560 {
   561     int height = info->dst_h;
   562     Uint32 *srcp = (Uint32 *) info->src;
   563     int srcskip = info->src_skip / 4;
   564     Uint32 *dstp = (Uint32 *) info->dst;
   565     int dstskip = info->dst_skip / 4;
   566     SDL_PixelFormat *srcfmt = info->src_fmt;
   567     int srcbpp = srcfmt->BytesPerPixel;
   568     SDL_PixelFormat *dstfmt = info->dst_fmt;
   569     int dstbpp = dstfmt->BytesPerPixel;
   570     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   571     unsigned alpha = dstfmt->Amask ? info->a : 0;
   572     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   573     Uint32 ckey = info->colorkey;
   574     vector unsigned int valpha;
   575     vector unsigned char vpermute;
   576     vector unsigned char vzero;
   577     vector unsigned int vckey;
   578     vector unsigned int vrgbmask;
   579     vpermute = calc_swizzle32(srcfmt, dstfmt);
   580     if (info->dst_w < 16) {
   581         if (copy_alpha) {
   582             BlitNtoNKeyCopyAlpha(info);
   583         } else {
   584             BlitNtoNKey(info);
   585         }
   586         return;
   587     }
   588     vzero = vec_splat_u8(0);
   589     if (alpha) {
   590         ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
   591         valpha =
   592             (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
   593     } else {
   594         valpha = (vector unsigned int) vzero;
   595     }
   596     ckey &= rgbmask;
   597     ((unsigned int *) (char *) &vckey)[0] = ckey;
   598     vckey = vec_splat(vckey, 0);
   599     ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
   600     vrgbmask = vec_splat(vrgbmask, 0);
   601 
   602     while (height--) {
   603 #define ONE_PIXEL_BLEND(condition, widthvar) \
   604         if (copy_alpha) { \
   605             while (condition) { \
   606                 Uint32 Pixel; \
   607                 unsigned sR, sG, sB, sA; \
   608                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   609                           sR, sG, sB, sA); \
   610                 if ( (Pixel & rgbmask) != ckey ) { \
   611                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   612                             sR, sG, sB, sA); \
   613                 } \
   614                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   615                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   616                 widthvar--; \
   617             } \
   618         } else { \
   619             while (condition) { \
   620                 Uint32 Pixel; \
   621                 unsigned sR, sG, sB; \
   622                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   623                 if ( Pixel != ckey ) { \
   624                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   625                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   626                               sR, sG, sB, alpha); \
   627                 } \
   628                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   629                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   630                 widthvar--; \
   631             } \
   632         }
   633         int width = info->dst_w;
   634         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   635         assert(width > 0);
   636         if (width > 0) {
   637             int extrawidth = (width % 4);
   638             vector unsigned char valigner = VEC_ALIGNER(srcp);
   639             vector unsigned int vs = vec_ld(0, srcp);
   640             width -= extrawidth;
   641             assert(width >= 4);
   642             while (width) {
   643                 vector unsigned char vsel;
   644                 vector unsigned int vd;
   645                 vector unsigned int voverflow = vec_ld(15, srcp);
   646                 /* load the source vec */
   647                 vs = vec_perm(vs, voverflow, valigner);
   648                 /* vsel is set for items that match the key */
   649                 vsel = (vector unsigned char) vec_and(vs, vrgbmask);
   650                 vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
   651                 /* permute the src vec to the dest format */
   652                 vs = vec_perm(vs, valpha, vpermute);
   653                 /* load the destination vec */
   654                 vd = vec_ld(0, dstp);
   655                 /* select the source and dest into vs */
   656                 vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
   657                                                    (vector unsigned char) vd,
   658                                                    vsel);
   659 
   660                 vec_st(vd, 0, dstp);
   661                 srcp += 4;
   662                 width -= 4;
   663                 dstp += 4;
   664                 vs = voverflow;
   665             }
   666             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   667 #undef ONE_PIXEL_BLEND
   668             srcp += srcskip;
   669             dstp += dstskip;
   670         }
   671     }
   672 }
   673 
   674 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   675 /* Use this on a G5 */
   676 static void
   677 ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
   678 {
   679     int height = info->dst_h;
   680     Uint32 *src = (Uint32 *) info->src;
   681     int srcskip = info->src_skip / 4;
   682     Uint32 *dst = (Uint32 *) info->dst;
   683     int dstskip = info->dst_skip / 4;
   684     SDL_PixelFormat *srcfmt = info->src_fmt;
   685     SDL_PixelFormat *dstfmt = info->dst_fmt;
   686     vector unsigned int vzero = vec_splat_u32(0);
   687     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   688     if (dstfmt->Amask && !srcfmt->Amask) {
   689         if (info->a) {
   690             vector unsigned char valpha;
   691             ((unsigned char *) &valpha)[0] = info->a;
   692             vzero = (vector unsigned int) vec_splat(valpha, 0);
   693         }
   694     }
   695 
   696     assert(srcfmt->BytesPerPixel == 4);
   697     assert(dstfmt->BytesPerPixel == 4);
   698 
   699     while (height--) {
   700         vector unsigned char valigner;
   701         vector unsigned int vbits;
   702         vector unsigned int voverflow;
   703         Uint32 bits;
   704         Uint8 r, g, b, a;
   705 
   706         int width = info->dst_w;
   707         int extrawidth;
   708 
   709         /* do scalar until we can align... */
   710         while ((UNALIGNED_PTR(dst)) && (width)) {
   711             bits = *(src++);
   712             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   713             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   714             width--;
   715         }
   716 
   717         /* After all that work, here's the vector part! */
   718         extrawidth = (width % 4);
   719         width -= extrawidth;
   720         valigner = VEC_ALIGNER(src);
   721         vbits = vec_ld(0, src);
   722 
   723         while (width) {
   724             voverflow = vec_ld(15, src);
   725             src += 4;
   726             width -= 4;
   727             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   728             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   729             vec_st(vbits, 0, dst);      /* store it back out. */
   730             dst += 4;
   731             vbits = voverflow;
   732         }
   733 
   734         assert(width == 0);
   735 
   736         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   737         while (extrawidth) {
   738             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   739             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   740             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   741             extrawidth--;
   742         }
   743 
   744         src += srcskip;
   745         dst += dstskip;
   746     }
   747 
   748 }
   749 
   750 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   751 /* Use this on a G4 */
   752 static void
   753 ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
   754 {
   755     const int scalar_dst_lead = sizeof(Uint32) * 4;
   756     const int vector_dst_lead = sizeof(Uint32) * 16;
   757 
   758     int height = info->dst_h;
   759     Uint32 *src = (Uint32 *) info->src;
   760     int srcskip = info->src_skip / 4;
   761     Uint32 *dst = (Uint32 *) info->dst;
   762     int dstskip = info->dst_skip / 4;
   763     SDL_PixelFormat *srcfmt = info->src_fmt;
   764     SDL_PixelFormat *dstfmt = info->dst_fmt;
   765     vector unsigned int vzero = vec_splat_u32(0);
   766     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   767     if (dstfmt->Amask && !srcfmt->Amask) {
   768         if (info->a) {
   769             vector unsigned char valpha;
   770             ((unsigned char *) &valpha)[0] = info->a;
   771             vzero = (vector unsigned int) vec_splat(valpha, 0);
   772         }
   773     }
   774 
   775     assert(srcfmt->BytesPerPixel == 4);
   776     assert(dstfmt->BytesPerPixel == 4);
   777 
   778     while (height--) {
   779         vector unsigned char valigner;
   780         vector unsigned int vbits;
   781         vector unsigned int voverflow;
   782         Uint32 bits;
   783         Uint8 r, g, b, a;
   784 
   785         int width = info->dst_w;
   786         int extrawidth;
   787 
   788         /* do scalar until we can align... */
   789         while ((UNALIGNED_PTR(dst)) && (width)) {
   790             vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   791                      DST_CHAN_SRC);
   792             vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   793                       DST_CHAN_DEST);
   794             bits = *(src++);
   795             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   796             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   797             width--;
   798         }
   799 
   800         /* After all that work, here's the vector part! */
   801         extrawidth = (width % 4);
   802         width -= extrawidth;
   803         valigner = VEC_ALIGNER(src);
   804         vbits = vec_ld(0, src);
   805 
   806         while (width) {
   807             vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
   808                      DST_CHAN_SRC);
   809             vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
   810                       DST_CHAN_DEST);
   811             voverflow = vec_ld(15, src);
   812             src += 4;
   813             width -= 4;
   814             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   815             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   816             vec_st(vbits, 0, dst);      /* store it back out. */
   817             dst += 4;
   818             vbits = voverflow;
   819         }
   820 
   821         assert(width == 0);
   822 
   823         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   824         while (extrawidth) {
   825             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   826             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   827             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   828             extrawidth--;
   829         }
   830 
   831         src += srcskip;
   832         dst += dstskip;
   833     }
   834 
   835     vec_dss(DST_CHAN_SRC);
   836     vec_dss(DST_CHAN_DEST);
   837 }
   838 
   839 static Uint32
   840 GetBlitFeatures(void)
   841 {
   842     static Uint32 features = 0xffffffff;
   843     if (features == 0xffffffff) {
   844         /* Provide an override for testing .. */
   845         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   846         if (override) {
   847             features = 0;
   848             SDL_sscanf(override, "%u", &features);
   849         } else {
   850             features = (0
   851                         /* Feature 1 is has-MMX */
   852                         | ((SDL_HasMMX())? 1 : 0)
   853                         /* Feature 2 is has-AltiVec */
   854                         | ((SDL_HasAltiVec())? 2 : 0)
   855                         /* Feature 4 is dont-use-prefetch */
   856                         /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   857                         | ((GetL3CacheSize() == 0) ? 4 : 0)
   858                 );
   859         }
   860     }
   861     return features;
   862 }
   863 
   864 #if __MWERKS__
   865 #pragma altivec_model off
   866 #endif
   867 #else
   868 /* Feature 1 is has-MMX */
   869 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   870 #endif
   871 
   872 /* This is now endian dependent */
   873 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   874 #define HI	1
   875 #define LO	0
   876 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
   877 #define HI	0
   878 #define LO	1
   879 #endif
   880 
   881 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   882 #define RGB888_RGB332(dst, src) { \
   883 	dst = (Uint8)((((src)&0x00E00000)>>16)| \
   884 	              (((src)&0x0000E000)>>11)| \
   885 	              (((src)&0x000000C0)>>6)); \
   886 }
   887 static void
   888 Blit_RGB888_index8(SDL_BlitInfo * info)
   889 {
   890 #ifndef USE_DUFFS_LOOP
   891     int c;
   892 #endif
   893     int width, height;
   894     Uint32 *src;
   895     const Uint8 *map;
   896     Uint8 *dst;
   897     int srcskip, dstskip;
   898 
   899     /* Set up some basic variables */
   900     width = info->dst_w;
   901     height = info->dst_h;
   902     src = (Uint32 *) info->src;
   903     srcskip = info->src_skip / 4;
   904     dst = info->dst;
   905     dstskip = info->dst_skip;
   906     map = info->table;
   907 
   908     if (map == NULL) {
   909         while (height--) {
   910 #ifdef USE_DUFFS_LOOP
   911 			/* *INDENT-OFF* */
   912 			DUFFS_LOOP(
   913 				RGB888_RGB332(*dst++, *src);
   914 			, width);
   915 			/* *INDENT-ON* */
   916 #else
   917             for (c = width / 4; c; --c) {
   918                 /* Pack RGB into 8bit pixel */
   919                 ++src;
   920                 RGB888_RGB332(*dst++, *src);
   921                 ++src;
   922                 RGB888_RGB332(*dst++, *src);
   923                 ++src;
   924                 RGB888_RGB332(*dst++, *src);
   925                 ++src;
   926             }
   927             switch (width & 3) {
   928             case 3:
   929                 RGB888_RGB332(*dst++, *src);
   930                 ++src;
   931             case 2:
   932                 RGB888_RGB332(*dst++, *src);
   933                 ++src;
   934             case 1:
   935                 RGB888_RGB332(*dst++, *src);
   936                 ++src;
   937             }
   938 #endif /* USE_DUFFS_LOOP */
   939             src += srcskip;
   940             dst += dstskip;
   941         }
   942     } else {
   943         int Pixel;
   944 
   945         while (height--) {
   946 #ifdef USE_DUFFS_LOOP
   947 			/* *INDENT-OFF* */
   948 			DUFFS_LOOP(
   949 				RGB888_RGB332(Pixel, *src);
   950 				*dst++ = map[Pixel];
   951 				++src;
   952 			, width);
   953 			/* *INDENT-ON* */
   954 #else
   955             for (c = width / 4; c; --c) {
   956                 /* Pack RGB into 8bit pixel */
   957                 RGB888_RGB332(Pixel, *src);
   958                 *dst++ = map[Pixel];
   959                 ++src;
   960                 RGB888_RGB332(Pixel, *src);
   961                 *dst++ = map[Pixel];
   962                 ++src;
   963                 RGB888_RGB332(Pixel, *src);
   964                 *dst++ = map[Pixel];
   965                 ++src;
   966                 RGB888_RGB332(Pixel, *src);
   967                 *dst++ = map[Pixel];
   968                 ++src;
   969             }
   970             switch (width & 3) {
   971             case 3:
   972                 RGB888_RGB332(Pixel, *src);
   973                 *dst++ = map[Pixel];
   974                 ++src;
   975             case 2:
   976                 RGB888_RGB332(Pixel, *src);
   977                 *dst++ = map[Pixel];
   978                 ++src;
   979             case 1:
   980                 RGB888_RGB332(Pixel, *src);
   981                 *dst++ = map[Pixel];
   982                 ++src;
   983             }
   984 #endif /* USE_DUFFS_LOOP */
   985             src += srcskip;
   986             dst += dstskip;
   987         }
   988     }
   989 }
   990 
   991 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   992 #define RGB888_RGB555(dst, src) { \
   993 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
   994 	                            (((*src)&0x0000F800)>>6)| \
   995 	                            (((*src)&0x000000F8)>>3)); \
   996 }
   997 #define RGB888_RGB555_TWO(dst, src) { \
   998 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   999 	                     (((src[HI])&0x0000F800)>>6)| \
  1000 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1001 	                     (((src[LO])&0x00F80000)>>9)| \
  1002 	                     (((src[LO])&0x0000F800)>>6)| \
  1003 	                     (((src[LO])&0x000000F8)>>3); \
  1004 }
  1005 static void
  1006 Blit_RGB888_RGB555(SDL_BlitInfo * info)
  1007 {
  1008 #ifndef USE_DUFFS_LOOP
  1009     int c;
  1010 #endif
  1011     int width, height;
  1012     Uint32 *src;
  1013     Uint16 *dst;
  1014     int srcskip, dstskip;
  1015 
  1016     /* Set up some basic variables */
  1017     width = info->dst_w;
  1018     height = info->dst_h;
  1019     src = (Uint32 *) info->src;
  1020     srcskip = info->src_skip / 4;
  1021     dst = (Uint16 *) info->dst;
  1022     dstskip = info->dst_skip / 2;
  1023 
  1024 #ifdef USE_DUFFS_LOOP
  1025     while (height--) {
  1026 		/* *INDENT-OFF* */
  1027 		DUFFS_LOOP(
  1028 			RGB888_RGB555(dst, src);
  1029 			++src;
  1030 			++dst;
  1031 		, width);
  1032 		/* *INDENT-ON* */
  1033         src += srcskip;
  1034         dst += dstskip;
  1035     }
  1036 #else
  1037     /* Memory align at 4-byte boundary, if necessary */
  1038     if ((long) dst & 0x03) {
  1039         /* Don't do anything if width is 0 */
  1040         if (width == 0) {
  1041             return;
  1042         }
  1043         --width;
  1044 
  1045         while (height--) {
  1046             /* Perform copy alignment */
  1047             RGB888_RGB555(dst, src);
  1048             ++src;
  1049             ++dst;
  1050 
  1051             /* Copy in 4 pixel chunks */
  1052             for (c = width / 4; c; --c) {
  1053                 RGB888_RGB555_TWO(dst, src);
  1054                 src += 2;
  1055                 dst += 2;
  1056                 RGB888_RGB555_TWO(dst, src);
  1057                 src += 2;
  1058                 dst += 2;
  1059             }
  1060             /* Get any leftovers */
  1061             switch (width & 3) {
  1062             case 3:
  1063                 RGB888_RGB555(dst, src);
  1064                 ++src;
  1065                 ++dst;
  1066             case 2:
  1067                 RGB888_RGB555_TWO(dst, src);
  1068                 src += 2;
  1069                 dst += 2;
  1070                 break;
  1071             case 1:
  1072                 RGB888_RGB555(dst, src);
  1073                 ++src;
  1074                 ++dst;
  1075                 break;
  1076             }
  1077             src += srcskip;
  1078             dst += dstskip;
  1079         }
  1080     } else {
  1081         while (height--) {
  1082             /* Copy in 4 pixel chunks */
  1083             for (c = width / 4; c; --c) {
  1084                 RGB888_RGB555_TWO(dst, src);
  1085                 src += 2;
  1086                 dst += 2;
  1087                 RGB888_RGB555_TWO(dst, src);
  1088                 src += 2;
  1089                 dst += 2;
  1090             }
  1091             /* Get any leftovers */
  1092             switch (width & 3) {
  1093             case 3:
  1094                 RGB888_RGB555(dst, src);
  1095                 ++src;
  1096                 ++dst;
  1097             case 2:
  1098                 RGB888_RGB555_TWO(dst, src);
  1099                 src += 2;
  1100                 dst += 2;
  1101                 break;
  1102             case 1:
  1103                 RGB888_RGB555(dst, src);
  1104                 ++src;
  1105                 ++dst;
  1106                 break;
  1107             }
  1108             src += srcskip;
  1109             dst += dstskip;
  1110         }
  1111     }
  1112 #endif /* USE_DUFFS_LOOP */
  1113 }
  1114 
  1115 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1116 #define RGB888_RGB565(dst, src) { \
  1117 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
  1118 	                            (((*src)&0x0000FC00)>>5)| \
  1119 	                            (((*src)&0x000000F8)>>3)); \
  1120 }
  1121 #define RGB888_RGB565_TWO(dst, src) { \
  1122 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1123 	                     (((src[HI])&0x0000FC00)>>5)| \
  1124 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1125 	                     (((src[LO])&0x00F80000)>>8)| \
  1126 	                     (((src[LO])&0x0000FC00)>>5)| \
  1127 	                     (((src[LO])&0x000000F8)>>3); \
  1128 }
  1129 static void
  1130 Blit_RGB888_RGB565(SDL_BlitInfo * info)
  1131 {
  1132 #ifndef USE_DUFFS_LOOP
  1133     int c;
  1134 #endif
  1135     int width, height;
  1136     Uint32 *src;
  1137     Uint16 *dst;
  1138     int srcskip, dstskip;
  1139 
  1140     /* Set up some basic variables */
  1141     width = info->dst_w;
  1142     height = info->dst_h;
  1143     src = (Uint32 *) info->src;
  1144     srcskip = info->src_skip / 4;
  1145     dst = (Uint16 *) info->dst;
  1146     dstskip = info->dst_skip / 2;
  1147 
  1148 #ifdef USE_DUFFS_LOOP
  1149     while (height--) {
  1150 		/* *INDENT-OFF* */
  1151 		DUFFS_LOOP(
  1152 			RGB888_RGB565(dst, src);
  1153 			++src;
  1154 			++dst;
  1155 		, width);
  1156 		/* *INDENT-ON* */
  1157         src += srcskip;
  1158         dst += dstskip;
  1159     }
  1160 #else
  1161     /* Memory align at 4-byte boundary, if necessary */
  1162     if ((long) dst & 0x03) {
  1163         /* Don't do anything if width is 0 */
  1164         if (width == 0) {
  1165             return;
  1166         }
  1167         --width;
  1168 
  1169         while (height--) {
  1170             /* Perform copy alignment */
  1171             RGB888_RGB565(dst, src);
  1172             ++src;
  1173             ++dst;
  1174 
  1175             /* Copy in 4 pixel chunks */
  1176             for (c = width / 4; c; --c) {
  1177                 RGB888_RGB565_TWO(dst, src);
  1178                 src += 2;
  1179                 dst += 2;
  1180                 RGB888_RGB565_TWO(dst, src);
  1181                 src += 2;
  1182                 dst += 2;
  1183             }
  1184             /* Get any leftovers */
  1185             switch (width & 3) {
  1186             case 3:
  1187                 RGB888_RGB565(dst, src);
  1188                 ++src;
  1189                 ++dst;
  1190             case 2:
  1191                 RGB888_RGB565_TWO(dst, src);
  1192                 src += 2;
  1193                 dst += 2;
  1194                 break;
  1195             case 1:
  1196                 RGB888_RGB565(dst, src);
  1197                 ++src;
  1198                 ++dst;
  1199                 break;
  1200             }
  1201             src += srcskip;
  1202             dst += dstskip;
  1203         }
  1204     } else {
  1205         while (height--) {
  1206             /* Copy in 4 pixel chunks */
  1207             for (c = width / 4; c; --c) {
  1208                 RGB888_RGB565_TWO(dst, src);
  1209                 src += 2;
  1210                 dst += 2;
  1211                 RGB888_RGB565_TWO(dst, src);
  1212                 src += 2;
  1213                 dst += 2;
  1214             }
  1215             /* Get any leftovers */
  1216             switch (width & 3) {
  1217             case 3:
  1218                 RGB888_RGB565(dst, src);
  1219                 ++src;
  1220                 ++dst;
  1221             case 2:
  1222                 RGB888_RGB565_TWO(dst, src);
  1223                 src += 2;
  1224                 dst += 2;
  1225                 break;
  1226             case 1:
  1227                 RGB888_RGB565(dst, src);
  1228                 ++src;
  1229                 ++dst;
  1230                 break;
  1231             }
  1232             src += srcskip;
  1233             dst += dstskip;
  1234         }
  1235     }
  1236 #endif /* USE_DUFFS_LOOP */
  1237 }
  1238 
  1239 
  1240 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1241 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1242 static void
  1243 Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
  1244 {
  1245 #ifndef USE_DUFFS_LOOP
  1246     int c;
  1247 #endif
  1248     int width, height;
  1249     Uint8 *src;
  1250     Uint32 *dst;
  1251     int srcskip, dstskip;
  1252 
  1253     /* Set up some basic variables */
  1254     width = info->dst_w;
  1255     height = info->dst_h;
  1256     src = (Uint8 *) info->src;
  1257     srcskip = info->src_skip;
  1258     dst = (Uint32 *) info->dst;
  1259     dstskip = info->dst_skip / 4;
  1260 
  1261 #ifdef USE_DUFFS_LOOP
  1262     while (height--) {
  1263 		/* *INDENT-OFF* */
  1264 		DUFFS_LOOP(
  1265 		{
  1266 			*dst++ = RGB565_32(dst, src, map);
  1267 			src += 2;
  1268 		},
  1269 		width);
  1270 		/* *INDENT-ON* */
  1271         src += srcskip;
  1272         dst += dstskip;
  1273     }
  1274 #else
  1275     while (height--) {
  1276         /* Copy in 4 pixel chunks */
  1277         for (c = width / 4; c; --c) {
  1278             *dst++ = RGB565_32(dst, src, map);
  1279             src += 2;
  1280             *dst++ = RGB565_32(dst, src, map);
  1281             src += 2;
  1282             *dst++ = RGB565_32(dst, src, map);
  1283             src += 2;
  1284             *dst++ = RGB565_32(dst, src, map);
  1285             src += 2;
  1286         }
  1287         /* Get any leftovers */
  1288         switch (width & 3) {
  1289         case 3:
  1290             *dst++ = RGB565_32(dst, src, map);
  1291             src += 2;
  1292         case 2:
  1293             *dst++ = RGB565_32(dst, src, map);
  1294             src += 2;
  1295         case 1:
  1296             *dst++ = RGB565_32(dst, src, map);
  1297             src += 2;
  1298             break;
  1299         }
  1300         src += srcskip;
  1301         dst += dstskip;
  1302     }
  1303 #endif /* USE_DUFFS_LOOP */
  1304 }
  1305 
  1306 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1307 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1308     0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1309     0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1310     0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1311     0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1312     0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1313     0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1314     0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1315     0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1316     0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1317     0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1318     0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1319     0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1320     0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1321     0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1322     0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1323     0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1324     0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1325     0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1326     0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1327     0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1328     0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1329     0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1330     0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1331     0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1332     0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1333     0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1334     0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1335     0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1336     0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1337     0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1338     0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1339     0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1340     0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1341     0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1342     0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1343     0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1344     0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1345     0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1346     0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1347     0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1348     0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1349     0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1350     0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1351     0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1352     0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1353     0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1354     0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1355     0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1356     0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1357     0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1358     0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1359     0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1360     0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1361     0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1362     0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1363     0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1364     0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1365     0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1366     0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1367     0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1368     0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1369     0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1370     0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1371     0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1372     0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1373     0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1374     0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1375     0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1376     0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1377     0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1378     0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1379     0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1380     0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1381     0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1382     0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1383     0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1384     0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1385     0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1386     0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1387     0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1388     0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1389     0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1390     0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1391     0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1392     0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1393     0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1394     0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1395     0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1396     0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1397     0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1398     0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1399     0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1400     0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1401     0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1402     0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1403     0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1404     0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1405     0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1406     0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1407     0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1408     0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1409     0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1410     0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1411     0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1412     0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1413     0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1414     0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1415     0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1416     0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1417     0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1418     0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1419     0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1420     0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1421     0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1422     0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1423     0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1424     0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1425     0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1426     0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1427     0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1428     0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1429     0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1430     0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1431     0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1432     0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1433     0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1434     0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1435     0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1436 };
  1437 
  1438 static void
  1439 Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
  1440 {
  1441     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1442 }
  1443 
  1444 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1445 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1446     0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1447     0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1448     0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1449     0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1450     0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1451     0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1452     0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1453     0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1454     0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1455     0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1456     0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1457     0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1458     0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1459     0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1460     0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1461     0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1462     0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1463     0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1464     0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1465     0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1466     0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1467     0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1468     0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1469     0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1470     0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1471     0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1472     0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1473     0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1474     0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1475     0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1476     0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1477     0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1478     0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1479     0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1480     0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1481     0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1482     0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1483     0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1484     0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1485     0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1486     0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1487     0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1488     0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1489     0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1490     0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1491     0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1492     0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1493     0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1494     0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1495     0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1496     0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1497     0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1498     0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1499     0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1500     0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1501     0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1502     0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1503     0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1504     0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1505     0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1506     0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1507     0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1508     0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1509     0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1510     0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1511     0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1512     0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1513     0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1514     0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1515     0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1516     0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1517     0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1518     0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1519     0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1520     0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1521     0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1522     0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1523     0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1524     0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1525     0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1526     0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1527     0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1528     0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1529     0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1530     0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1531     0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1532     0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1533     0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1534     0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1535     0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1536     0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1537     0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1538     0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1539     0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1540     0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1541     0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1542     0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1543     0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1544     0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1545     0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1546     0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1547     0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1548     0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1549     0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1550     0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1551     0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1552     0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1553     0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1554     0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1555     0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1556     0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1557     0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1558     0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1559     0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1560     0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1561     0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1562     0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1563     0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1564     0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1565     0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1566     0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1567     0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1568     0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1569     0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1570     0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1571     0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1572     0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1573     0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1574 };
  1575 
  1576 static void
  1577 Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
  1578 {
  1579     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1580 }
  1581 
  1582 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1583 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1584     0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1585     0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1586     0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1587     0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1588     0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1589     0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1590     0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1591     0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1592     0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1593     0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1594     0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1595     0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1596     0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1597     0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1598     0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1599     0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1600     0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1601     0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1602     0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1603     0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1604     0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1605     0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1606     0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1607     0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1608     0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1609     0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1610     0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1611     0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1612     0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1613     0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1614     0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1615     0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1616     0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1617     0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1618     0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1619     0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1620     0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1621     0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1622     0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1623     0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1624     0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1625     0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1626     0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1627     0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1628     0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1629     0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1630     0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1631     0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1632     0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1633     0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1634     0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1635     0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1636     0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1637     0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1638     0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1639     0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1640     0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1641     0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1642     0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1643     0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1644     0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1645     0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1646     0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1647     0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1648     0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1649     0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1650     0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1651     0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1652     0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1653     0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1654     0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1655     0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1656     0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1657     0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1658     0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1659     0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1660     0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1661     0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1662     0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1663     0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1664     0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1665     0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1666     0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1667     0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1668     0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1669     0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1670     0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1671     0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1672     0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1673     0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1674     0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1675     0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1676     0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1677     0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1678     0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1679     0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1680     0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1681     0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1682     0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1683     0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1684     0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1685     0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1686     0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1687     0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1688     0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1689     0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1690     0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1691     0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1692     0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1693     0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1694     0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1695     0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1696     0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1697     0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1698     0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1699     0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1700     0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1701     0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1702     0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1703     0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1704     0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1705     0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1706     0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1707     0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1708     0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1709     0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1710     0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1711     0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1712 };
  1713 
  1714 static void
  1715 Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
  1716 {
  1717     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1718 }
  1719 
  1720 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1721 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1722     0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1723     0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1724     0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1725     0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1726     0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1727     0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1728     0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1729     0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1730     0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1731     0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1732     0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1733     0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1734     0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1735     0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1736     0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1737     0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1738     0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1739     0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1740     0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1741     0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1742     0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1743     0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1744     0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1745     0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1746     0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1747     0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1748     0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1749     0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1750     0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1751     0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1752     0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1753     0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1754     0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1755     0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1756     0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1757     0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1758     0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1759     0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1760     0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1761     0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1762     0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1763     0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1764     0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1765     0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1766     0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1767     0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1768     0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1769     0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1770     0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1771     0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1772     0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1773     0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1774     0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1775     0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1776     0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1777     0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1778     0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1779     0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1780     0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1781     0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1782     0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1783     0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1784     0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1785     0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1786     0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1787     0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1788     0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1789     0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1790     0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1791     0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1792     0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1793     0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1794     0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1795     0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1796     0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1797     0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1798     0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1799     0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1800     0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1801     0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1802     0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1803     0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1804     0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1805     0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1806     0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1807     0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1808     0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1809     0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1810     0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1811     0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1812     0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1813     0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1814     0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1815     0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1816     0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1817     0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1818     0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1819     0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1820     0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1821     0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1822     0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1823     0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1824     0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1825     0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1826     0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1827     0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1828     0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1829     0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1830     0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1831     0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1832     0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1833     0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1834     0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1835     0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1836     0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1837     0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1838     0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1839     0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1840     0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1841     0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1842     0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1843     0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1844     0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1845     0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1846     0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1847     0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1848     0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1849     0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1850 };
  1851 
  1852 static void
  1853 Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
  1854 {
  1855     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1856 }
  1857 
  1858 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1859 #ifndef RGB888_RGB332
  1860 #define RGB888_RGB332(dst, src) { \
  1861 	dst = (((src)&0x00E00000)>>16)| \
  1862 	      (((src)&0x0000E000)>>11)| \
  1863 	      (((src)&0x000000C0)>>6); \
  1864 }
  1865 #endif
  1866 static void
  1867 Blit_RGB888_index8_map(SDL_BlitInfo * info)
  1868 {
  1869 #ifndef USE_DUFFS_LOOP
  1870     int c;
  1871 #endif
  1872     int Pixel;
  1873     int width, height;
  1874     Uint32 *src;
  1875     const Uint8 *map;
  1876     Uint8 *dst;
  1877     int srcskip, dstskip;
  1878 
  1879     /* Set up some basic variables */
  1880     width = info->dst_w;
  1881     height = info->dst_h;
  1882     src = (Uint32 *) info->src;
  1883     srcskip = info->src_skip / 4;
  1884     dst = info->dst;
  1885     dstskip = info->dst_skip;
  1886     map = info->table;
  1887 
  1888 #ifdef USE_DUFFS_LOOP
  1889     while (height--) {
  1890 		/* *INDENT-OFF* */
  1891 		DUFFS_LOOP(
  1892 			RGB888_RGB332(Pixel, *src);
  1893 			*dst++ = map[Pixel];
  1894 			++src;
  1895 		, width);
  1896 		/* *INDENT-ON* */
  1897         src += srcskip;
  1898         dst += dstskip;
  1899     }
  1900 #else
  1901     while (height--) {
  1902         for (c = width / 4; c; --c) {
  1903             /* Pack RGB into 8bit pixel */
  1904             RGB888_RGB332(Pixel, *src);
  1905             *dst++ = map[Pixel];
  1906             ++src;
  1907             RGB888_RGB332(Pixel, *src);
  1908             *dst++ = map[Pixel];
  1909             ++src;
  1910             RGB888_RGB332(Pixel, *src);
  1911             *dst++ = map[Pixel];
  1912             ++src;
  1913             RGB888_RGB332(Pixel, *src);
  1914             *dst++ = map[Pixel];
  1915             ++src;
  1916         }
  1917         switch (width & 3) {
  1918         case 3:
  1919             RGB888_RGB332(Pixel, *src);
  1920             *dst++ = map[Pixel];
  1921             ++src;
  1922         case 2:
  1923             RGB888_RGB332(Pixel, *src);
  1924             *dst++ = map[Pixel];
  1925             ++src;
  1926         case 1:
  1927             RGB888_RGB332(Pixel, *src);
  1928             *dst++ = map[Pixel];
  1929             ++src;
  1930         }
  1931         src += srcskip;
  1932         dst += dstskip;
  1933     }
  1934 #endif /* USE_DUFFS_LOOP */
  1935 }
  1936 
  1937 static void
  1938 BlitNto1(SDL_BlitInfo * info)
  1939 {
  1940 #ifndef USE_DUFFS_LOOP
  1941     int c;
  1942 #endif
  1943     int width, height;
  1944     Uint8 *src;
  1945     const Uint8 *map;
  1946     Uint8 *dst;
  1947     int srcskip, dstskip;
  1948     int srcbpp;
  1949     Uint32 Pixel;
  1950     int sR, sG, sB;
  1951     SDL_PixelFormat *srcfmt;
  1952 
  1953     /* Set up some basic variables */
  1954     width = info->dst_w;
  1955     height = info->dst_h;
  1956     src = info->src;
  1957     srcskip = info->src_skip;
  1958     dst = info->dst;
  1959     dstskip = info->dst_skip;
  1960     map = info->table;
  1961     srcfmt = info->src_fmt;
  1962     srcbpp = srcfmt->BytesPerPixel;
  1963 
  1964     if (map == NULL) {
  1965         while (height--) {
  1966 #ifdef USE_DUFFS_LOOP
  1967 			/* *INDENT-OFF* */
  1968 			DUFFS_LOOP(
  1969 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1970 								sR, sG, sB);
  1971 				if ( 1 ) {
  1972 				  	/* Pack RGB into 8bit pixel */
  1973 				  	*dst = ((sR>>5)<<(3+2))|
  1974 					        ((sG>>5)<<(2)) |
  1975 					        ((sB>>6)<<(0)) ;
  1976 				}
  1977 				dst++;
  1978 				src += srcbpp;
  1979 			, width);
  1980 			/* *INDENT-ON* */
  1981 #else
  1982             for (c = width; c; --c) {
  1983                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  1984                 if (1) {
  1985                     /* Pack RGB into 8bit pixel */
  1986                     *dst = ((sR >> 5) << (3 + 2)) |
  1987                         ((sG >> 5) << (2)) | ((sB >> 6) << (0));
  1988                 }
  1989                 dst++;
  1990                 src += srcbpp;
  1991             }
  1992 #endif
  1993             src += srcskip;
  1994             dst += dstskip;
  1995         }
  1996     } else {
  1997         while (height--) {
  1998 #ifdef USE_DUFFS_LOOP
  1999 			/* *INDENT-OFF* */
  2000 			DUFFS_LOOP(
  2001 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2002 								sR, sG, sB);
  2003 				if ( 1 ) {
  2004 				  	/* Pack RGB into 8bit pixel */
  2005 				  	*dst = map[((sR>>5)<<(3+2))|
  2006 						   ((sG>>5)<<(2))  |
  2007 						   ((sB>>6)<<(0))  ];
  2008 				}
  2009 				dst++;
  2010 				src += srcbpp;
  2011 			, width);
  2012 			/* *INDENT-ON* */
  2013 #else
  2014             for (c = width; c; --c) {
  2015                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2016                 if (1) {
  2017                     /* Pack RGB into 8bit pixel */
  2018                     *dst = map[((sR >> 5) << (3 + 2)) |
  2019                                ((sG >> 5) << (2)) | ((sB >> 6) << (0))];
  2020                 }
  2021                 dst++;
  2022                 src += srcbpp;
  2023             }
  2024 #endif /* USE_DUFFS_LOOP */
  2025             src += srcskip;
  2026             dst += dstskip;
  2027         }
  2028     }
  2029 }
  2030 
  2031 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  2032 static void
  2033 Blit4to4MaskAlpha(SDL_BlitInfo * info)
  2034 {
  2035     int width = info->dst_w;
  2036     int height = info->dst_h;
  2037     Uint32 *src = (Uint32 *) info->src;
  2038     int srcskip = info->src_skip;
  2039     Uint32 *dst = (Uint32 *) info->dst;
  2040     int dstskip = info->dst_skip;
  2041     SDL_PixelFormat *srcfmt = info->src_fmt;
  2042     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2043 
  2044     if (dstfmt->Amask) {
  2045         /* RGB->RGBA, SET_ALPHA */
  2046         Uint32 mask = (info->a >> dstfmt->Aloss) << dstfmt->Ashift;
  2047 
  2048         while (height--) {
  2049 			/* *INDENT-OFF* */
  2050 			DUFFS_LOOP(
  2051 			{
  2052 				*dst = *src | mask;
  2053 				++dst;
  2054 				++src;
  2055 			},
  2056 			width);
  2057 			/* *INDENT-ON* */
  2058             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2059             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2060         }
  2061     } else {
  2062         /* RGBA->RGB, NO_ALPHA */
  2063         Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2064 
  2065         while (height--) {
  2066 			/* *INDENT-OFF* */
  2067 			DUFFS_LOOP(
  2068 			{
  2069 				*dst = *src & mask;
  2070 				++dst;
  2071 				++src;
  2072 			},
  2073 			width);
  2074 			/* *INDENT-ON* */
  2075             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2076             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2077         }
  2078     }
  2079 }
  2080 
  2081 static void
  2082 BlitNtoN(SDL_BlitInfo * info)
  2083 {
  2084     int width = info->dst_w;
  2085     int height = info->dst_h;
  2086     Uint8 *src = info->src;
  2087     int srcskip = info->src_skip;
  2088     Uint8 *dst = info->dst;
  2089     int dstskip = info->dst_skip;
  2090     SDL_PixelFormat *srcfmt = info->src_fmt;
  2091     int srcbpp = srcfmt->BytesPerPixel;
  2092     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2093     int dstbpp = dstfmt->BytesPerPixel;
  2094     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2095 
  2096     while (height--) {
  2097 		/* *INDENT-OFF* */
  2098 		DUFFS_LOOP(
  2099 		{
  2100             Uint32 Pixel;
  2101 			unsigned sR;
  2102 			unsigned sG;
  2103 			unsigned sB;
  2104 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2105 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2106 			dst += dstbpp;
  2107 			src += srcbpp;
  2108 		},
  2109 		width);
  2110 		/* *INDENT-ON* */
  2111         src += srcskip;
  2112         dst += dstskip;
  2113     }
  2114 }
  2115 
  2116 static void
  2117 BlitNtoNCopyAlpha(SDL_BlitInfo * info)
  2118 {
  2119     int width = info->dst_w;
  2120     int height = info->dst_h;
  2121     Uint8 *src = info->src;
  2122     int srcskip = info->src_skip;
  2123     Uint8 *dst = info->dst;
  2124     int dstskip = info->dst_skip;
  2125     SDL_PixelFormat *srcfmt = info->src_fmt;
  2126     int srcbpp = srcfmt->BytesPerPixel;
  2127     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2128     int dstbpp = dstfmt->BytesPerPixel;
  2129     int c;
  2130 
  2131     /* FIXME: should map alpha to [0..255] correctly! */
  2132     while (height--) {
  2133         for (c = width; c; --c) {
  2134             Uint32 Pixel;
  2135             unsigned sR, sG, sB, sA;
  2136             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2137             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2138             dst += dstbpp;
  2139             src += srcbpp;
  2140         }
  2141         src += srcskip;
  2142         dst += dstskip;
  2143     }
  2144 }
  2145 
  2146 static void
  2147 BlitNto1Key(SDL_BlitInfo * info)
  2148 {
  2149     int width = info->dst_w;
  2150     int height = info->dst_h;
  2151     Uint8 *src = info->src;
  2152     int srcskip = info->src_skip;
  2153     Uint8 *dst = info->dst;
  2154     int dstskip = info->dst_skip;
  2155     SDL_PixelFormat *srcfmt = info->src_fmt;
  2156     const Uint8 *palmap = info->table;
  2157     Uint32 ckey = info->colorkey;
  2158     Uint32 rgbmask = ~srcfmt->Amask;
  2159     int srcbpp;
  2160     Uint32 Pixel;
  2161     unsigned sR, sG, sB;
  2162 
  2163     /* Set up some basic variables */
  2164     srcbpp = srcfmt->BytesPerPixel;
  2165     ckey &= rgbmask;
  2166 
  2167     if (palmap == NULL) {
  2168         while (height--) {
  2169 			/* *INDENT-OFF* */
  2170 			DUFFS_LOOP(
  2171 			{
  2172 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2173 								sR, sG, sB);
  2174 				if ( (Pixel & rgbmask) != ckey ) {
  2175 				  	/* Pack RGB into 8bit pixel */
  2176 				  	*dst = (Uint8)(((sR>>5)<<(3+2))|
  2177 						           ((sG>>5)<<(2)) |
  2178 						           ((sB>>6)<<(0)));
  2179 				}
  2180 				dst++;
  2181 				src += srcbpp;
  2182 			},
  2183 			width);
  2184 			/* *INDENT-ON* */
  2185             src += srcskip;
  2186             dst += dstskip;
  2187         }
  2188     } else {
  2189         while (height--) {
  2190 			/* *INDENT-OFF* */
  2191 			DUFFS_LOOP(
  2192 			{
  2193 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2194 								sR, sG, sB);
  2195 				if ( (Pixel & rgbmask) != ckey ) {
  2196 				  	/* Pack RGB into 8bit pixel */
  2197 				  	*dst = (Uint8)palmap[((sR>>5)<<(3+2))|
  2198 							             ((sG>>5)<<(2))  |
  2199 							             ((sB>>6)<<(0))  ];
  2200 				}
  2201 				dst++;
  2202 				src += srcbpp;
  2203 			},
  2204 			width);
  2205 			/* *INDENT-ON* */
  2206             src += srcskip;
  2207             dst += dstskip;
  2208         }
  2209     }
  2210 }
  2211 
  2212 static void
  2213 Blit2to2Key(SDL_BlitInfo * info)
  2214 {
  2215     int width = info->dst_w;
  2216     int height = info->dst_h;
  2217     Uint16 *srcp = (Uint16 *) info->src;
  2218     int srcskip = info->src_skip;
  2219     Uint16 *dstp = (Uint16 *) info->dst;
  2220     int dstskip = info->dst_skip;
  2221     Uint32 ckey = info->colorkey;
  2222     Uint32 rgbmask = ~info->src_fmt->Amask;
  2223 
  2224     /* Set up some basic variables */
  2225     srcskip /= 2;
  2226     dstskip /= 2;
  2227     ckey &= rgbmask;
  2228 
  2229     while (height--) {
  2230 		/* *INDENT-OFF* */
  2231 		DUFFS_LOOP(
  2232 		{
  2233 			if ( (*srcp & rgbmask) != ckey ) {
  2234 				*dstp = *srcp;
  2235 			}
  2236 			dstp++;
  2237 			srcp++;
  2238 		},
  2239 		width);
  2240 		/* *INDENT-ON* */
  2241         srcp += srcskip;
  2242         dstp += dstskip;
  2243     }
  2244 }
  2245 
  2246 static void
  2247 BlitNtoNKey(SDL_BlitInfo * info)
  2248 {
  2249     int width = info->dst_w;
  2250     int height = info->dst_h;
  2251     Uint8 *src = info->src;
  2252     int srcskip = info->src_skip;
  2253     Uint8 *dst = info->dst;
  2254     int dstskip = info->dst_skip;
  2255     Uint32 ckey = info->colorkey;
  2256     SDL_PixelFormat *srcfmt = info->src_fmt;
  2257     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2258     int srcbpp = srcfmt->BytesPerPixel;
  2259     int dstbpp = dstfmt->BytesPerPixel;
  2260     unsigned alpha = dstfmt->Amask ? info->a : 0;
  2261     Uint32 rgbmask = ~srcfmt->Amask;
  2262 
  2263     /* Set up some basic variables */
  2264     ckey &= rgbmask;
  2265 
  2266     while (height--) {
  2267 		/* *INDENT-OFF* */
  2268 		DUFFS_LOOP(
  2269 		{
  2270             Uint32 Pixel;
  2271 			unsigned sR;
  2272 			unsigned sG;
  2273 			unsigned sB;
  2274 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2275 			if ( (Pixel & rgbmask) != ckey ) {
  2276                 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2277 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2278 			}
  2279 			dst += dstbpp;
  2280 			src += srcbpp;
  2281 		},
  2282 		width);
  2283 		/* *INDENT-ON* */
  2284         src += srcskip;
  2285         dst += dstskip;
  2286     }
  2287 }
  2288 
  2289 static void
  2290 BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
  2291 {
  2292     int width = info->dst_w;
  2293     int height = info->dst_h;
  2294     Uint8 *src = info->src;
  2295     int srcskip = info->src_skip;
  2296     Uint8 *dst = info->dst;
  2297     int dstskip = info->dst_skip;
  2298     Uint32 ckey = info->colorkey;
  2299     SDL_PixelFormat *srcfmt = info->src_fmt;
  2300     SDL_PixelFormat *dstfmt = info->dst_fmt;
  2301     Uint32 rgbmask = ~srcfmt->Amask;
  2302 
  2303     Uint8 srcbpp;
  2304     Uint8 dstbpp;
  2305     Uint32 Pixel;
  2306     unsigned sR, sG, sB, sA;
  2307 
  2308     /* Set up some basic variables */
  2309     srcbpp = srcfmt->BytesPerPixel;
  2310     dstbpp = dstfmt->BytesPerPixel;
  2311     ckey &= rgbmask;
  2312 
  2313     /* FIXME: should map alpha to [0..255] correctly! */
  2314     while (height--) {
  2315 		/* *INDENT-OFF* */
  2316 		DUFFS_LOOP(
  2317 		{
  2318 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2319 			if ( (Pixel & rgbmask) != ckey ) {
  2320 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2321 			}
  2322 			dst += dstbpp;
  2323 			src += srcbpp;
  2324 		},
  2325 		width);
  2326 		/* *INDENT-ON* */
  2327         src += srcskip;
  2328         dst += dstskip;
  2329     }
  2330 }
  2331 
  2332 /* Normal N to N optimized blitters */
  2333 struct blit_table
  2334 {
  2335     Uint32 srcR, srcG, srcB;
  2336     int dstbpp;
  2337     Uint32 dstR, dstG, dstB;
  2338     Uint32 blit_features;
  2339     SDL_BlitFunc blitfunc;
  2340     enum
  2341     { NO_ALPHA = 1, SET_ALPHA = 2, COPY_ALPHA = 4 } alpha;
  2342 };
  2343 static const struct blit_table normal_blit_1[] = {
  2344     /* Default for 8-bit RGB source, an invalid combination */
  2345     {0, 0, 0, 0, 0, 0, 0, 0, NULL},
  2346 };
  2347 
  2348 static const struct blit_table normal_blit_2[] = {
  2349 #if SDL_ALTIVEC_BLITTERS
  2350     /* has-altivec */
  2351     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000,
  2352      0x00000000,
  2353      2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2354     {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000,
  2355      0x00000000,
  2356      2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2357 #endif
  2358     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00,
  2359      0x000000FF,
  2360      0, Blit_RGB565_ARGB8888, SET_ALPHA},
  2361     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00,
  2362      0x00FF0000,
  2363      0, Blit_RGB565_ABGR8888, SET_ALPHA},
  2364     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000,
  2365      0x0000FF00,
  2366      0, Blit_RGB565_RGBA8888, SET_ALPHA},
  2367     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000,
  2368      0xFF000000,
  2369      0, Blit_RGB565_BGRA8888, SET_ALPHA},
  2370 
  2371     /* Default for 16-bit RGB source, used if no other blitter matches */
  2372     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2373 };
  2374 
  2375 static const struct blit_table normal_blit_3[] = {
  2376     /* Default for 24-bit RGB source, never optimized */
  2377     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2378 };
  2379 
  2380 static const struct blit_table normal_blit_4[] = {
  2381 #if SDL_ALTIVEC_BLITTERS
  2382     /* has-altivec | dont-use-prefetch */
  2383     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
  2384      0x00000000,
  2385      6, ConvertAltivec32to32_noprefetch,
  2386      NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2387     /* has-altivec */
  2388     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
  2389      0x00000000,
  2390      2, ConvertAltivec32to32_prefetch,
  2391      NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2392     /* has-altivec */
  2393     {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0,
  2394      0x0000001F,
  2395      2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
  2396 #endif
  2397     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
  2398      0x0000001F,
  2399      0, Blit_RGB888_RGB565, NO_ALPHA},
  2400     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
  2401      0x0000001F,
  2402      0, Blit_RGB888_RGB555, NO_ALPHA},
  2403     /* Default for 32-bit RGB source, used if no other blitter matches */
  2404     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
  2405 };
  2406 
  2407 static const struct blit_table *const normal_blit[] = {
  2408     normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2409 };
  2410 
  2411 /* Mask matches table, or table entry is zero */
  2412 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2413 
  2414 SDL_BlitFunc
  2415 SDL_CalculateBlitN(SDL_Surface * surface)
  2416 {
  2417     SDL_PixelFormat *srcfmt;
  2418     SDL_PixelFormat *dstfmt;
  2419     const struct blit_table *table;
  2420     int which;
  2421     SDL_BlitFunc blitfun;
  2422 
  2423     /* Set up data for choosing the blit */
  2424     srcfmt = surface->format;
  2425     dstfmt = surface->map->dst->format;
  2426 
  2427     /* We don't support destinations less than 8-bits */
  2428     if (dstfmt->BitsPerPixel < 8) {
  2429         return (NULL);
  2430     }
  2431 
  2432     switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
  2433     case 0:
  2434         blitfun = NULL;
  2435         if (dstfmt->BitsPerPixel == 8) {
  2436             /* We assume 8-bit destinations are palettized */
  2437             if ((srcfmt->BytesPerPixel == 4) &&
  2438                 (srcfmt->Rmask == 0x00FF0000) &&
  2439                 (srcfmt->Gmask == 0x0000FF00) &&
  2440                 (srcfmt->Bmask == 0x000000FF)) {
  2441                 if (surface->map->info.table) {
  2442                     blitfun = Blit_RGB888_index8_map;
  2443                 } else {
  2444                     blitfun = Blit_RGB888_index8;
  2445                 }
  2446             } else {
  2447                 blitfun = BlitNto1;
  2448             }
  2449         } else {
  2450             /* Now the meat, choose the blitter we want */
  2451             int a_need = NO_ALPHA;
  2452             if (dstfmt->Amask)
  2453                 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2454             table = normal_blit[srcfmt->BytesPerPixel - 1];
  2455             for (which = 0; table[which].dstbpp; ++which) {
  2456                 if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2457                     MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2458                     MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2459                     MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2460                     MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2461                     MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2462                     dstfmt->BytesPerPixel == table[which].dstbpp &&
  2463                     (a_need & table[which].alpha) == a_need &&
  2464                     ((table[which].blit_features & GetBlitFeatures()) ==
  2465                      table[which].blit_features))
  2466                     break;
  2467             }
  2468             blitfun = table[which].blitfunc;
  2469 
  2470             if (blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
  2471                 /* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2472                 if (srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4
  2473                     && srcfmt->Rmask == dstfmt->Rmask
  2474                     && srcfmt->Gmask == dstfmt->Gmask
  2475                     && srcfmt->Bmask == dstfmt->Bmask) {
  2476                     blitfun = Blit4to4MaskAlpha;
  2477                 } else if (a_need == COPY_ALPHA) {
  2478                     blitfun = BlitNtoNCopyAlpha;
  2479                 }
  2480             }
  2481         }
  2482         return (blitfun);
  2483 
  2484     case SDL_COPY_COLORKEY:
  2485         /* colorkey blit: Here we don't have too many options, mostly
  2486            because RLE is the preferred fast way to deal with this.
  2487            If a particular case turns out to be useful we'll add it. */
  2488 
  2489         if (srcfmt->BytesPerPixel == 2 && surface->map->identity)
  2490             return Blit2to2Key;
  2491         else if (dstfmt->BytesPerPixel == 1)
  2492             return BlitNto1Key;
  2493         else {
  2494 #if SDL_ALTIVEC_BLITTERS
  2495             if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
  2496                 && SDL_HasAltiVec()) {
  2497                 return Blit32to32KeyAltivec;
  2498             } else
  2499 #endif
  2500             if (srcfmt->Amask && dstfmt->Amask) {
  2501                 return BlitNtoNKeyCopyAlpha;
  2502             } else {
  2503                 return BlitNtoNKey;
  2504             }
  2505         }
  2506     }
  2507 
  2508     return NULL;
  2509 }
  2510 
  2511 /* vi: set ts=4 sw=4 expandtab: */