src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Mon, 29 May 2006 04:04:35 +0000
branchSDL-1.3
changeset 1668 4da1ee79c9af
parent 1662 782fd950bd46
permissions -rw-r--r--
more tweaking indent options
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #include "SDL_video.h"
    25 #include "SDL_endian.h"
    26 #include "SDL_cpuinfo.h"
    27 #include "SDL_blit.h"
    28 
    29 /* Functions to blit from N-bit surfaces to other surfaces */
    30 
    31 #if SDL_ALTIVEC_BLITTERS
    32 #if __MWERKS__
    33 #pragma altivec_model on
    34 #endif
    35 #ifdef HAVE_ALTIVEC_H
    36 #include <altivec.h>
    37 #endif
    38 #define assert(X)
    39 #ifdef __MACOSX__
    40 #include <sys/sysctl.h>
    41 static size_t
    42 GetL3CacheSize(void)
    43 {
    44     const char key[] = "hw.l3cachesize";
    45     u_int64_t result = 0;
    46     size_t typeSize = sizeof(result);
    47 
    48 
    49     int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
    50     if (0 != err)
    51         return 0;
    52 
    53     return result;
    54 }
    55 #else
    56 static size_t
    57 GetL3CacheSize(void)
    58 {
    59     /* XXX: Just guess G4 */
    60     return 2097152;
    61 }
    62 #endif /* __MACOSX__ */
    63 
    64 #if (defined(__MACOSX__) && (__GNUC__ < 4))
    65 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    66         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
    67 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    68         (vector unsigned short) ( a,b,c,d,e,f,g,h )
    69 #else
    70 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
    71         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
    72 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
    73         (vector unsigned short) { a,b,c,d,e,f,g,h }
    74 #endif
    75 
    76 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
    77 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
    78                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
    79                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
    80                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
    81                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
    82 
    83 #define MAKE8888(dstfmt, r, g, b, a)  \
    84     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
    85       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
    86       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
    87       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
    88 
    89 /*
    90  * Data Stream Touch...Altivec cache prefetching.
    91  *
    92  *  Don't use this on a G5...however, the speed boost is very significant
    93  *   on a G4.
    94  */
    95 #define DST_CHAN_SRC 1
    96 #define DST_CHAN_DEST 2
    97 
    98 /* macro to set DST control word value... */
    99 #define DST_CTRL(size, count, stride) \
   100     (((size) << 24) | ((count) << 16) | (stride))
   101 
   102 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
   103     ? vec_lvsl(0, src) \
   104     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
   105 
   106 /* Calculate the permute vector used for 32->32 swizzling */
   107 static vector unsigned char
   108 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
   109 {
   110     /*
   111      * We have to assume that the bits that aren't used by other
   112      *  colors is alpha, and it's one complete byte, since some formats
   113      *  leave alpha with a zero mask, but we should still swizzle the bits.
   114      */
   115     /* ARGB */
   116     const static struct SDL_PixelFormat default_pixel_format = {
   117         NULL, 0, 0,
   118         0, 0, 0, 0,
   119         16, 8, 0, 24,
   120         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
   121         0, 0
   122     };
   123     if (!srcfmt) {
   124         srcfmt = &default_pixel_format;
   125     }
   126     if (!dstfmt) {
   127         dstfmt = &default_pixel_format;
   128     }
   129     const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
   130                                                        0x04, 0x04, 0x04, 0x04,
   131                                                        0x08, 0x08, 0x08, 0x08,
   132                                                        0x0C, 0x0C, 0x0C,
   133                                                        0x0C);
   134     vector unsigned char vswiz;
   135     vector unsigned int srcvec;
   136 #define RESHIFT(X) (3 - ((X) >> 3))
   137     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
   138     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
   139     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
   140     Uint32 amask;
   141     /* Use zero for alpha if either surface doesn't have alpha */
   142     if (dstfmt->Amask) {
   143         amask =
   144             ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->
   145                                                                    Ashift);
   146     } else {
   147         amask =
   148             0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
   149                           0xFFFFFFFF);
   150     }
   151 #undef RESHIFT
   152     ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
   153     vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
   154     return (vswiz);
   155 }
   156 
   157 static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
   158 static void
   159 Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
   160 {
   161     int height = info->d_height;
   162     Uint8 *src = (Uint8 *) info->s_pixels;
   163     int srcskip = info->s_skip;
   164     Uint8 *dst = (Uint8 *) info->d_pixels;
   165     int dstskip = info->d_skip;
   166     SDL_PixelFormat *srcfmt = info->src;
   167     vector unsigned char valpha = vec_splat_u8(0);
   168     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
   169     vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
   170                                                     0x00, 0x0a, 0x00, 0x0e,
   171                                                     0x00, 0x12, 0x00, 0x16,
   172                                                     0x00, 0x1a, 0x00, 0x1e);
   173     vector unsigned short v1 = vec_splat_u16(1);
   174     vector unsigned short v3 = vec_splat_u16(3);
   175     vector unsigned short v3f =
   176         VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
   177                           0x003f, 0x003f, 0x003f, 0x003f);
   178     vector unsigned short vfc =
   179         VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
   180                           0x00fc, 0x00fc, 0x00fc, 0x00fc);
   181     vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
   182     vf800 = vec_sl(vf800, vec_splat_u16(8));
   183 
   184     while (height--) {
   185         vector unsigned char valigner;
   186         vector unsigned char voverflow;
   187         vector unsigned char vsrc;
   188 
   189         int width = info->d_width;
   190         int extrawidth;
   191 
   192         /* do scalar until we can align... */
   193 #define ONE_PIXEL_BLEND(condition, widthvar) \
   194         while (condition) { \
   195             Uint32 Pixel; \
   196             unsigned sR, sG, sB, sA; \
   197             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
   198                           sR, sG, sB, sA); \
   199             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
   200                                 ((sG << 3) & 0x000007E0) | \
   201                                 ((sB >> 3) & 0x0000001F)); \
   202             dst += 2; \
   203             src += 4; \
   204             widthvar--; \
   205         }
   206 
   207         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   208 
   209         /* After all that work, here's the vector part! */
   210         extrawidth = (width % 8);       /* trailing unaligned stores */
   211         width -= extrawidth;
   212         vsrc = vec_ld(0, src);
   213         valigner = VEC_ALIGNER(src);
   214 
   215         while (width) {
   216             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
   217             vector unsigned int vsrc1, vsrc2;
   218             vector unsigned char vdst;
   219 
   220             voverflow = vec_ld(15, src);
   221             vsrc = vec_perm(vsrc, voverflow, valigner);
   222             vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   223             src += 16;
   224             vsrc = voverflow;
   225             voverflow = vec_ld(15, src);
   226             vsrc = vec_perm(vsrc, voverflow, valigner);
   227             vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
   228             /* 1555 */
   229             vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
   230             vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
   231             vgpixel = vec_and(vgpixel, vfc);
   232             vgpixel = vec_sl(vgpixel, v3);
   233             vrpixel = vec_sl(vpixel, v1);
   234             vrpixel = vec_and(vrpixel, vf800);
   235             vbpixel = vec_and(vpixel, v3f);
   236             vdst =
   237                 vec_or((vector unsigned char) vrpixel,
   238                        (vector unsigned char) vgpixel);
   239             /* 565 */
   240             vdst = vec_or(vdst, (vector unsigned char) vbpixel);
   241             vec_st(vdst, 0, dst);
   242 
   243             width -= 8;
   244             src += 16;
   245             dst += 16;
   246             vsrc = voverflow;
   247         }
   248 
   249         assert(width == 0);
   250 
   251         /* do scalar until we can align... */
   252         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   253 #undef ONE_PIXEL_BLEND
   254 
   255         src += srcskip;         /* move to next row, accounting for pitch. */
   256         dst += dstskip;
   257     }
   258 
   259 
   260 }
   261 
   262 static void
   263 Blit_RGB565_32Altivec(SDL_BlitInfo * info)
   264 {
   265     int height = info->d_height;
   266     Uint8 *src = (Uint8 *) info->s_pixels;
   267     int srcskip = info->s_skip;
   268     Uint8 *dst = (Uint8 *) info->d_pixels;
   269     int dstskip = info->d_skip;
   270     SDL_PixelFormat *srcfmt = info->src;
   271     SDL_PixelFormat *dstfmt = info->dst;
   272     unsigned alpha;
   273     vector unsigned char valpha;
   274     vector unsigned char vpermute;
   275     vector unsigned short vf800;
   276     vector unsigned int v8 = vec_splat_u32(8);
   277     vector unsigned int v16 = vec_add(v8, v8);
   278     vector unsigned short v2 = vec_splat_u16(2);
   279     vector unsigned short v3 = vec_splat_u16(3);
   280     /* 
   281        0x10 - 0x1f is the alpha
   282        0x00 - 0x0e evens are the red
   283        0x01 - 0x0f odds are zero
   284      */
   285     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   286                                                        0x10, 0x02, 0x01, 0x01,
   287                                                        0x10, 0x04, 0x01, 0x01,
   288                                                        0x10, 0x06, 0x01,
   289                                                        0x01);
   290     vector unsigned char vredalpha2 =
   291         (vector unsigned
   292          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   293         );
   294     /*
   295        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   296        0x11 - 0x0f odds are blue
   297      */
   298     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   299                                                    0x04, 0x05, 0x06, 0x13,
   300                                                    0x08, 0x09, 0x0a, 0x15,
   301                                                    0x0c, 0x0d, 0x0e, 0x17);
   302     vector unsigned char vblue2 =
   303         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   304         );
   305     /*
   306        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   307        0x10 - 0x0e evens are green
   308      */
   309     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   310                                                     0x04, 0x05, 0x12, 0x07,
   311                                                     0x08, 0x09, 0x14, 0x0b,
   312                                                     0x0c, 0x0d, 0x16, 0x0f);
   313     vector unsigned char vgreen2 =
   314         (vector unsigned
   315          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   316         );
   317 
   318 
   319     assert(srcfmt->BytesPerPixel == 2);
   320     assert(dstfmt->BytesPerPixel == 4);
   321 
   322     vf800 = (vector unsigned short) vec_splat_u8(-7);
   323     vf800 = vec_sl(vf800, vec_splat_u16(8));
   324 
   325     if (dstfmt->Amask && srcfmt->alpha) {
   326         ((unsigned char *) &valpha)[0] = alpha = srcfmt->alpha;
   327         valpha = vec_splat(valpha, 0);
   328     } else {
   329         alpha = 0;
   330         valpha = vec_splat_u8(0);
   331     }
   332 
   333     vpermute = calc_swizzle32(NULL, dstfmt);
   334     while (height--) {
   335         vector unsigned char valigner;
   336         vector unsigned char voverflow;
   337         vector unsigned char vsrc;
   338 
   339         int width = info->d_width;
   340         int extrawidth;
   341 
   342         /* do scalar until we can align... */
   343 #define ONE_PIXEL_BLEND(condition, widthvar) \
   344         while (condition) { \
   345             unsigned sR, sG, sB; \
   346             unsigned short Pixel = *((unsigned short *)src); \
   347             sR = (Pixel >> 8) & 0xf8; \
   348             sG = (Pixel >> 3) & 0xfc; \
   349             sB = (Pixel << 3) & 0xf8; \
   350             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   351             src += 2; \
   352             dst += 4; \
   353             widthvar--; \
   354         }
   355         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   356 
   357         /* After all that work, here's the vector part! */
   358         extrawidth = (width % 8);       /* trailing unaligned stores */
   359         width -= extrawidth;
   360         vsrc = vec_ld(0, src);
   361         valigner = VEC_ALIGNER(src);
   362 
   363         while (width) {
   364             vector unsigned short vR, vG, vB;
   365             vector unsigned char vdst1, vdst2;
   366 
   367             voverflow = vec_ld(15, src);
   368             vsrc = vec_perm(vsrc, voverflow, valigner);
   369 
   370             vR = vec_and((vector unsigned short) vsrc, vf800);
   371             vB = vec_sl((vector unsigned short) vsrc, v3);
   372             vG = vec_sl(vB, v2);
   373 
   374             vdst1 =
   375                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   376                                                 valpha, vredalpha1);
   377             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   378             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   379             vdst1 = vec_perm(vdst1, valpha, vpermute);
   380             vec_st(vdst1, 0, dst);
   381 
   382             vdst2 =
   383                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   384                                                 valpha, vredalpha2);
   385             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   386             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   387             vdst2 = vec_perm(vdst2, valpha, vpermute);
   388             vec_st(vdst2, 16, dst);
   389 
   390             width -= 8;
   391             dst += 32;
   392             src += 16;
   393             vsrc = voverflow;
   394         }
   395 
   396         assert(width == 0);
   397 
   398 
   399         /* do scalar until we can align... */
   400         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   401 #undef ONE_PIXEL_BLEND
   402 
   403         src += srcskip;         /* move to next row, accounting for pitch. */
   404         dst += dstskip;
   405     }
   406 
   407 }
   408 
   409 
   410 static void
   411 Blit_RGB555_32Altivec(SDL_BlitInfo * info)
   412 {
   413     int height = info->d_height;
   414     Uint8 *src = (Uint8 *) info->s_pixels;
   415     int srcskip = info->s_skip;
   416     Uint8 *dst = (Uint8 *) info->d_pixels;
   417     int dstskip = info->d_skip;
   418     SDL_PixelFormat *srcfmt = info->src;
   419     SDL_PixelFormat *dstfmt = info->dst;
   420     unsigned alpha;
   421     vector unsigned char valpha;
   422     vector unsigned char vpermute;
   423     vector unsigned short vf800;
   424     vector unsigned int v8 = vec_splat_u32(8);
   425     vector unsigned int v16 = vec_add(v8, v8);
   426     vector unsigned short v1 = vec_splat_u16(1);
   427     vector unsigned short v3 = vec_splat_u16(3);
   428     /* 
   429        0x10 - 0x1f is the alpha
   430        0x00 - 0x0e evens are the red
   431        0x01 - 0x0f odds are zero
   432      */
   433     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
   434                                                        0x10, 0x02, 0x01, 0x01,
   435                                                        0x10, 0x04, 0x01, 0x01,
   436                                                        0x10, 0x06, 0x01,
   437                                                        0x01);
   438     vector unsigned char vredalpha2 =
   439         (vector unsigned
   440          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
   441         );
   442     /*
   443        0x00 - 0x0f is ARxx ARxx ARxx ARxx
   444        0x11 - 0x0f odds are blue
   445      */
   446     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
   447                                                    0x04, 0x05, 0x06, 0x13,
   448                                                    0x08, 0x09, 0x0a, 0x15,
   449                                                    0x0c, 0x0d, 0x0e, 0x17);
   450     vector unsigned char vblue2 =
   451         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
   452         );
   453     /*
   454        0x00 - 0x0f is ARxB ARxB ARxB ARxB
   455        0x10 - 0x0e evens are green
   456      */
   457     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
   458                                                     0x04, 0x05, 0x12, 0x07,
   459                                                     0x08, 0x09, 0x14, 0x0b,
   460                                                     0x0c, 0x0d, 0x16, 0x0f);
   461     vector unsigned char vgreen2 =
   462         (vector unsigned
   463          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
   464         );
   465 
   466 
   467     assert(srcfmt->BytesPerPixel == 2);
   468     assert(dstfmt->BytesPerPixel == 4);
   469 
   470     vf800 = (vector unsigned short) vec_splat_u8(-7);
   471     vf800 = vec_sl(vf800, vec_splat_u16(8));
   472 
   473     if (dstfmt->Amask && srcfmt->alpha) {
   474         ((unsigned char *) &valpha)[0] = alpha = srcfmt->alpha;
   475         valpha = vec_splat(valpha, 0);
   476     } else {
   477         alpha = 0;
   478         valpha = vec_splat_u8(0);
   479     }
   480 
   481     vpermute = calc_swizzle32(NULL, dstfmt);
   482     while (height--) {
   483         vector unsigned char valigner;
   484         vector unsigned char voverflow;
   485         vector unsigned char vsrc;
   486 
   487         int width = info->d_width;
   488         int extrawidth;
   489 
   490         /* do scalar until we can align... */
   491 #define ONE_PIXEL_BLEND(condition, widthvar) \
   492         while (condition) { \
   493             unsigned sR, sG, sB; \
   494             unsigned short Pixel = *((unsigned short *)src); \
   495             sR = (Pixel >> 7) & 0xf8; \
   496             sG = (Pixel >> 2) & 0xf8; \
   497             sB = (Pixel << 3) & 0xf8; \
   498             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
   499             src += 2; \
   500             dst += 4; \
   501             widthvar--; \
   502         }
   503         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   504 
   505         /* After all that work, here's the vector part! */
   506         extrawidth = (width % 8);       /* trailing unaligned stores */
   507         width -= extrawidth;
   508         vsrc = vec_ld(0, src);
   509         valigner = VEC_ALIGNER(src);
   510 
   511         while (width) {
   512             vector unsigned short vR, vG, vB;
   513             vector unsigned char vdst1, vdst2;
   514 
   515             voverflow = vec_ld(15, src);
   516             vsrc = vec_perm(vsrc, voverflow, valigner);
   517 
   518             vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
   519             vB = vec_sl((vector unsigned short) vsrc, v3);
   520             vG = vec_sl(vB, v3);
   521 
   522             vdst1 =
   523                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   524                                                 valpha, vredalpha1);
   525             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
   526             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
   527             vdst1 = vec_perm(vdst1, valpha, vpermute);
   528             vec_st(vdst1, 0, dst);
   529 
   530             vdst2 =
   531                 (vector unsigned char) vec_perm((vector unsigned char) vR,
   532                                                 valpha, vredalpha2);
   533             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
   534             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
   535             vdst2 = vec_perm(vdst2, valpha, vpermute);
   536             vec_st(vdst2, 16, dst);
   537 
   538             width -= 8;
   539             dst += 32;
   540             src += 16;
   541             vsrc = voverflow;
   542         }
   543 
   544         assert(width == 0);
   545 
   546 
   547         /* do scalar until we can align... */
   548         ONE_PIXEL_BLEND((extrawidth), extrawidth);
   549 #undef ONE_PIXEL_BLEND
   550 
   551         src += srcskip;         /* move to next row, accounting for pitch. */
   552         dst += dstskip;
   553     }
   554 
   555 }
   556 
   557 static void BlitNtoNKey(SDL_BlitInfo * info);
   558 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
   559 static void
   560 Blit32to32KeyAltivec(SDL_BlitInfo * info)
   561 {
   562     int height = info->d_height;
   563     Uint32 *srcp = (Uint32 *) info->s_pixels;
   564     int srcskip = info->s_skip;
   565     Uint32 *dstp = (Uint32 *) info->d_pixels;
   566     int dstskip = info->d_skip;
   567     SDL_PixelFormat *srcfmt = info->src;
   568     int srcbpp = srcfmt->BytesPerPixel;
   569     SDL_PixelFormat *dstfmt = info->dst;
   570     int dstbpp = dstfmt->BytesPerPixel;
   571     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
   572     unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   573     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   574     Uint32 ckey = info->src->colorkey;
   575     vector unsigned int valpha;
   576     vector unsigned char vpermute;
   577     vector unsigned char vzero;
   578     vector unsigned int vckey;
   579     vector unsigned int vrgbmask;
   580     vpermute = calc_swizzle32(srcfmt, dstfmt);
   581     if (info->d_width < 16) {
   582         if (copy_alpha) {
   583             BlitNtoNKeyCopyAlpha(info);
   584         } else {
   585             BlitNtoNKey(info);
   586         }
   587         return;
   588     }
   589     vzero = vec_splat_u8(0);
   590     if (alpha) {
   591         ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
   592         valpha =
   593             (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
   594     } else {
   595         valpha = (vector unsigned int) vzero;
   596     }
   597     ckey &= rgbmask;
   598     ((unsigned int *) (char *) &vckey)[0] = ckey;
   599     vckey = vec_splat(vckey, 0);
   600     ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
   601     vrgbmask = vec_splat(vrgbmask, 0);
   602 
   603     while (height--) {
   604 #define ONE_PIXEL_BLEND(condition, widthvar) \
   605         if (copy_alpha) { \
   606             while (condition) { \
   607                 Uint32 Pixel; \
   608                 unsigned sR, sG, sB, sA; \
   609                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
   610                           sR, sG, sB, sA); \
   611                 if ( (Pixel & rgbmask) != ckey ) { \
   612                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   613                             sR, sG, sB, sA); \
   614                 } \
   615                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
   616                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
   617                 widthvar--; \
   618             } \
   619         } else { \
   620             while (condition) { \
   621                 Uint32 Pixel; \
   622                 unsigned sR, sG, sB; \
   623                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
   624                 if ( Pixel != ckey ) { \
   625                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
   626                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
   627                               sR, sG, sB, alpha); \
   628                 } \
   629                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
   630                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
   631                 widthvar--; \
   632             } \
   633         }
   634         int width = info->d_width;
   635         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
   636         assert(width > 0);
   637         if (width > 0) {
   638             int extrawidth = (width % 4);
   639             vector unsigned char valigner = VEC_ALIGNER(srcp);
   640             vector unsigned int vs = vec_ld(0, srcp);
   641             width -= extrawidth;
   642             assert(width >= 4);
   643             while (width) {
   644                 vector unsigned char vsel;
   645                 vector unsigned int vd;
   646                 vector unsigned int voverflow = vec_ld(15, srcp);
   647                 /* load the source vec */
   648                 vs = vec_perm(vs, voverflow, valigner);
   649                 /* vsel is set for items that match the key */
   650                 vsel = (vector unsigned char) vec_and(vs, vrgbmask);
   651                 vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
   652                 /* permute the src vec to the dest format */
   653                 vs = vec_perm(vs, valpha, vpermute);
   654                 /* load the destination vec */
   655                 vd = vec_ld(0, dstp);
   656                 /* select the source and dest into vs */
   657                 vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
   658                                                    (vector unsigned char) vd,
   659                                                    vsel);
   660 
   661                 vec_st(vd, 0, dstp);
   662                 srcp += 4;
   663                 width -= 4;
   664                 dstp += 4;
   665                 vs = voverflow;
   666             }
   667             ONE_PIXEL_BLEND((extrawidth), extrawidth);
   668 #undef ONE_PIXEL_BLEND
   669             srcp += srcskip >> 2;
   670             dstp += dstskip >> 2;
   671         }
   672     }
   673 }
   674 
   675 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   676 /* Use this on a G5 */
   677 static void
   678 ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
   679 {
   680     int height = info->d_height;
   681     Uint32 *src = (Uint32 *) info->s_pixels;
   682     int srcskip = info->s_skip;
   683     Uint32 *dst = (Uint32 *) info->d_pixels;
   684     int dstskip = info->d_skip;
   685     SDL_PixelFormat *srcfmt = info->src;
   686     SDL_PixelFormat *dstfmt = info->dst;
   687     vector unsigned int vzero = vec_splat_u32(0);
   688     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   689     if (dstfmt->Amask && !srcfmt->Amask) {
   690         if (srcfmt->alpha) {
   691             vector unsigned char valpha;
   692             ((unsigned char *) &valpha)[0] = srcfmt->alpha;
   693             vzero = (vector unsigned int) vec_splat(valpha, 0);
   694         }
   695     }
   696 
   697     assert(srcfmt->BytesPerPixel == 4);
   698     assert(dstfmt->BytesPerPixel == 4);
   699 
   700     while (height--) {
   701         vector unsigned char valigner;
   702         vector unsigned int vbits;
   703         vector unsigned int voverflow;
   704         Uint32 bits;
   705         Uint8 r, g, b, a;
   706 
   707         int width = info->d_width;
   708         int extrawidth;
   709 
   710         /* do scalar until we can align... */
   711         while ((UNALIGNED_PTR(dst)) && (width)) {
   712             bits = *(src++);
   713             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   714             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   715             width--;
   716         }
   717 
   718         /* After all that work, here's the vector part! */
   719         extrawidth = (width % 4);
   720         width -= extrawidth;
   721         valigner = VEC_ALIGNER(src);
   722         vbits = vec_ld(0, src);
   723 
   724         while (width) {
   725             voverflow = vec_ld(15, src);
   726             src += 4;
   727             width -= 4;
   728             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   729             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   730             vec_st(vbits, 0, dst);      /* store it back out. */
   731             dst += 4;
   732             vbits = voverflow;
   733         }
   734 
   735         assert(width == 0);
   736 
   737         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   738         while (extrawidth) {
   739             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   740             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   741             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   742             extrawidth--;
   743         }
   744 
   745         src += srcskip >> 2;    /* move to next row, accounting for pitch. */
   746         dst += dstskip >> 2;
   747     }
   748 
   749 }
   750 
   751 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
   752 /* Use this on a G4 */
   753 static void
   754 ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
   755 {
   756     const int scalar_dst_lead = sizeof(Uint32) * 4;
   757     const int vector_dst_lead = sizeof(Uint32) * 16;
   758 
   759     int height = info->d_height;
   760     Uint32 *src = (Uint32 *) info->s_pixels;
   761     int srcskip = info->s_skip;
   762     Uint32 *dst = (Uint32 *) info->d_pixels;
   763     int dstskip = info->d_skip;
   764     SDL_PixelFormat *srcfmt = info->src;
   765     SDL_PixelFormat *dstfmt = info->dst;
   766     vector unsigned int vzero = vec_splat_u32(0);
   767     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
   768     if (dstfmt->Amask && !srcfmt->Amask) {
   769         if (srcfmt->alpha) {
   770             vector unsigned char valpha;
   771             ((unsigned char *) &valpha)[0] = srcfmt->alpha;
   772             vzero = (vector unsigned int) vec_splat(valpha, 0);
   773         }
   774     }
   775 
   776     assert(srcfmt->BytesPerPixel == 4);
   777     assert(dstfmt->BytesPerPixel == 4);
   778 
   779     while (height--) {
   780         vector unsigned char valigner;
   781         vector unsigned int vbits;
   782         vector unsigned int voverflow;
   783         Uint32 bits;
   784         Uint8 r, g, b, a;
   785 
   786         int width = info->d_width;
   787         int extrawidth;
   788 
   789         /* do scalar until we can align... */
   790         while ((UNALIGNED_PTR(dst)) && (width)) {
   791             vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   792                      DST_CHAN_SRC);
   793             vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
   794                       DST_CHAN_DEST);
   795             bits = *(src++);
   796             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   797             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   798             width--;
   799         }
   800 
   801         /* After all that work, here's the vector part! */
   802         extrawidth = (width % 4);
   803         width -= extrawidth;
   804         valigner = VEC_ALIGNER(src);
   805         vbits = vec_ld(0, src);
   806 
   807         while (width) {
   808             vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
   809                      DST_CHAN_SRC);
   810             vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
   811                       DST_CHAN_DEST);
   812             voverflow = vec_ld(15, src);
   813             src += 4;
   814             width -= 4;
   815             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
   816             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
   817             vec_st(vbits, 0, dst);      /* store it back out. */
   818             dst += 4;
   819             vbits = voverflow;
   820         }
   821 
   822         assert(width == 0);
   823 
   824         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
   825         while (extrawidth) {
   826             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
   827             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
   828             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
   829             extrawidth--;
   830         }
   831 
   832         src += srcskip >> 2;    /* move to next row, accounting for pitch. */
   833         dst += dstskip >> 2;
   834     }
   835 
   836     vec_dss(DST_CHAN_SRC);
   837     vec_dss(DST_CHAN_DEST);
   838 }
   839 
   840 static Uint32
   841 GetBlitFeatures(void)
   842 {
   843     static Uint32 features = 0xffffffff;
   844     if (features == 0xffffffff) {
   845         /* Provide an override for testing .. */
   846         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
   847         if (override) {
   848             features = 0;
   849             SDL_sscanf(override, "%u", &features);
   850         } else {
   851             features = (0
   852                         /* Feature 1 is has-MMX */
   853                         | ((SDL_HasMMX())? 1 : 0)
   854                         /* Feature 2 is has-AltiVec */
   855                         | ((SDL_HasAltiVec())? 2 : 0)
   856                         /* Feature 4 is dont-use-prefetch */
   857                         /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
   858                         | ((GetL3CacheSize() == 0) ? 4 : 0)
   859                 );
   860         }
   861     }
   862     return features;
   863 }
   864 
   865 #if __MWERKS__
   866 #pragma altivec_model off
   867 #endif
   868 #else
   869 /* Feature 1 is has-MMX */
   870 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
   871 #endif
   872 
   873 /* This is now endian dependent */
   874 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   875 #define HI	1
   876 #define LO	0
   877 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
   878 #define HI	0
   879 #define LO	1
   880 #endif
   881 
   882 #if SDL_HERMES_BLITTERS
   883 
   884 /* Heheheh, we coerce Hermes into using SDL blit information */
   885 #define X86_ASSEMBLER
   886 #define HermesConverterInterface	SDL_BlitInfo
   887 #define HermesClearInterface		void
   888 #define STACKCALL
   889 
   890 #include "../hermes/HeadMMX.h"
   891 #include "../hermes/HeadX86.h"
   892 
   893 #else
   894 
   895 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   896 #define RGB888_RGB332(dst, src) { \
   897 	dst = (Uint8)((((src)&0x00E00000)>>16)| \
   898 	              (((src)&0x0000E000)>>11)| \
   899 	              (((src)&0x000000C0)>>6)); \
   900 }
   901 static void
   902 Blit_RGB888_index8(SDL_BlitInfo * info)
   903 {
   904 #ifndef USE_DUFFS_LOOP
   905     int c;
   906 #endif
   907     int width, height;
   908     Uint32 *src;
   909     const Uint8 *map;
   910     Uint8 *dst;
   911     int srcskip, dstskip;
   912 
   913     /* Set up some basic variables */
   914     width = info->d_width;
   915     height = info->d_height;
   916     src = (Uint32 *) info->s_pixels;
   917     srcskip = info->s_skip / 4;
   918     dst = info->d_pixels;
   919     dstskip = info->d_skip;
   920     map = info->table;
   921 
   922     if (map == NULL) {
   923         while (height--) {
   924 #ifdef USE_DUFFS_LOOP
   925 			/* *INDENT-OFF* */
   926 			DUFFS_LOOP(
   927 				RGB888_RGB332(*dst++, *src);
   928 			, width);
   929 			/* *INDENT-ON* */
   930 #else
   931             for (c = width / 4; c; --c) {
   932                 /* Pack RGB into 8bit pixel */
   933                 ++src;
   934                 RGB888_RGB332(*dst++, *src);
   935                 ++src;
   936                 RGB888_RGB332(*dst++, *src);
   937                 ++src;
   938                 RGB888_RGB332(*dst++, *src);
   939                 ++src;
   940             }
   941             switch (width & 3) {
   942             case 3:
   943                 RGB888_RGB332(*dst++, *src);
   944                 ++src;
   945             case 2:
   946                 RGB888_RGB332(*dst++, *src);
   947                 ++src;
   948             case 1:
   949                 RGB888_RGB332(*dst++, *src);
   950                 ++src;
   951             }
   952 #endif /* USE_DUFFS_LOOP */
   953             src += srcskip;
   954             dst += dstskip;
   955         }
   956     } else {
   957         int Pixel;
   958 
   959         while (height--) {
   960 #ifdef USE_DUFFS_LOOP
   961 			/* *INDENT-OFF* */
   962 			DUFFS_LOOP(
   963 				RGB888_RGB332(Pixel, *src);
   964 				*dst++ = map[Pixel];
   965 				++src;
   966 			, width);
   967 			/* *INDENT-ON* */
   968 #else
   969             for (c = width / 4; c; --c) {
   970                 /* Pack RGB into 8bit pixel */
   971                 RGB888_RGB332(Pixel, *src);
   972                 *dst++ = map[Pixel];
   973                 ++src;
   974                 RGB888_RGB332(Pixel, *src);
   975                 *dst++ = map[Pixel];
   976                 ++src;
   977                 RGB888_RGB332(Pixel, *src);
   978                 *dst++ = map[Pixel];
   979                 ++src;
   980                 RGB888_RGB332(Pixel, *src);
   981                 *dst++ = map[Pixel];
   982                 ++src;
   983             }
   984             switch (width & 3) {
   985             case 3:
   986                 RGB888_RGB332(Pixel, *src);
   987                 *dst++ = map[Pixel];
   988                 ++src;
   989             case 2:
   990                 RGB888_RGB332(Pixel, *src);
   991                 *dst++ = map[Pixel];
   992                 ++src;
   993             case 1:
   994                 RGB888_RGB332(Pixel, *src);
   995                 *dst++ = map[Pixel];
   996                 ++src;
   997             }
   998 #endif /* USE_DUFFS_LOOP */
   999             src += srcskip;
  1000             dst += dstskip;
  1001         }
  1002     }
  1003 }
  1004 
  1005 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
  1006 #define RGB888_RGB555(dst, src) { \
  1007 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
  1008 	                            (((*src)&0x0000F800)>>6)| \
  1009 	                            (((*src)&0x000000F8)>>3)); \
  1010 }
  1011 #define RGB888_RGB555_TWO(dst, src) { \
  1012 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
  1013 	                     (((src[HI])&0x0000F800)>>6)| \
  1014 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1015 	                     (((src[LO])&0x00F80000)>>9)| \
  1016 	                     (((src[LO])&0x0000F800)>>6)| \
  1017 	                     (((src[LO])&0x000000F8)>>3); \
  1018 }
  1019 static void
  1020 Blit_RGB888_RGB555(SDL_BlitInfo * info)
  1021 {
  1022 #ifndef USE_DUFFS_LOOP
  1023     int c;
  1024 #endif
  1025     int width, height;
  1026     Uint32 *src;
  1027     Uint16 *dst;
  1028     int srcskip, dstskip;
  1029 
  1030     /* Set up some basic variables */
  1031     width = info->d_width;
  1032     height = info->d_height;
  1033     src = (Uint32 *) info->s_pixels;
  1034     srcskip = info->s_skip / 4;
  1035     dst = (Uint16 *) info->d_pixels;
  1036     dstskip = info->d_skip / 2;
  1037 
  1038 #ifdef USE_DUFFS_LOOP
  1039     while (height--) {
  1040 		/* *INDENT-OFF* */
  1041 		DUFFS_LOOP(
  1042 			RGB888_RGB555(dst, src);
  1043 			++src;
  1044 			++dst;
  1045 		, width);
  1046 		/* *INDENT-ON* */
  1047         src += srcskip;
  1048         dst += dstskip;
  1049     }
  1050 #else
  1051     /* Memory align at 4-byte boundary, if necessary */
  1052     if ((long) dst & 0x03) {
  1053         /* Don't do anything if width is 0 */
  1054         if (width == 0) {
  1055             return;
  1056         }
  1057         --width;
  1058 
  1059         while (height--) {
  1060             /* Perform copy alignment */
  1061             RGB888_RGB555(dst, src);
  1062             ++src;
  1063             ++dst;
  1064 
  1065             /* Copy in 4 pixel chunks */
  1066             for (c = width / 4; c; --c) {
  1067                 RGB888_RGB555_TWO(dst, src);
  1068                 src += 2;
  1069                 dst += 2;
  1070                 RGB888_RGB555_TWO(dst, src);
  1071                 src += 2;
  1072                 dst += 2;
  1073             }
  1074             /* Get any leftovers */
  1075             switch (width & 3) {
  1076             case 3:
  1077                 RGB888_RGB555(dst, src);
  1078                 ++src;
  1079                 ++dst;
  1080             case 2:
  1081                 RGB888_RGB555_TWO(dst, src);
  1082                 src += 2;
  1083                 dst += 2;
  1084                 break;
  1085             case 1:
  1086                 RGB888_RGB555(dst, src);
  1087                 ++src;
  1088                 ++dst;
  1089                 break;
  1090             }
  1091             src += srcskip;
  1092             dst += dstskip;
  1093         }
  1094     } else {
  1095         while (height--) {
  1096             /* Copy in 4 pixel chunks */
  1097             for (c = width / 4; c; --c) {
  1098                 RGB888_RGB555_TWO(dst, src);
  1099                 src += 2;
  1100                 dst += 2;
  1101                 RGB888_RGB555_TWO(dst, src);
  1102                 src += 2;
  1103                 dst += 2;
  1104             }
  1105             /* Get any leftovers */
  1106             switch (width & 3) {
  1107             case 3:
  1108                 RGB888_RGB555(dst, src);
  1109                 ++src;
  1110                 ++dst;
  1111             case 2:
  1112                 RGB888_RGB555_TWO(dst, src);
  1113                 src += 2;
  1114                 dst += 2;
  1115                 break;
  1116             case 1:
  1117                 RGB888_RGB555(dst, src);
  1118                 ++src;
  1119                 ++dst;
  1120                 break;
  1121             }
  1122             src += srcskip;
  1123             dst += dstskip;
  1124         }
  1125     }
  1126 #endif /* USE_DUFFS_LOOP */
  1127 }
  1128 
  1129 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
  1130 #define RGB888_RGB565(dst, src) { \
  1131 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
  1132 	                            (((*src)&0x0000FC00)>>5)| \
  1133 	                            (((*src)&0x000000F8)>>3)); \
  1134 }
  1135 #define RGB888_RGB565_TWO(dst, src) { \
  1136 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
  1137 	                     (((src[HI])&0x0000FC00)>>5)| \
  1138 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
  1139 	                     (((src[LO])&0x00F80000)>>8)| \
  1140 	                     (((src[LO])&0x0000FC00)>>5)| \
  1141 	                     (((src[LO])&0x000000F8)>>3); \
  1142 }
  1143 static void
  1144 Blit_RGB888_RGB565(SDL_BlitInfo * info)
  1145 {
  1146 #ifndef USE_DUFFS_LOOP
  1147     int c;
  1148 #endif
  1149     int width, height;
  1150     Uint32 *src;
  1151     Uint16 *dst;
  1152     int srcskip, dstskip;
  1153 
  1154     /* Set up some basic variables */
  1155     width = info->d_width;
  1156     height = info->d_height;
  1157     src = (Uint32 *) info->s_pixels;
  1158     srcskip = info->s_skip / 4;
  1159     dst = (Uint16 *) info->d_pixels;
  1160     dstskip = info->d_skip / 2;
  1161 
  1162 #ifdef USE_DUFFS_LOOP
  1163     while (height--) {
  1164 		/* *INDENT-OFF* */
  1165 		DUFFS_LOOP(
  1166 			RGB888_RGB565(dst, src);
  1167 			++src;
  1168 			++dst;
  1169 		, width);
  1170 		/* *INDENT-ON* */
  1171         src += srcskip;
  1172         dst += dstskip;
  1173     }
  1174 #else
  1175     /* Memory align at 4-byte boundary, if necessary */
  1176     if ((long) dst & 0x03) {
  1177         /* Don't do anything if width is 0 */
  1178         if (width == 0) {
  1179             return;
  1180         }
  1181         --width;
  1182 
  1183         while (height--) {
  1184             /* Perform copy alignment */
  1185             RGB888_RGB565(dst, src);
  1186             ++src;
  1187             ++dst;
  1188 
  1189             /* Copy in 4 pixel chunks */
  1190             for (c = width / 4; c; --c) {
  1191                 RGB888_RGB565_TWO(dst, src);
  1192                 src += 2;
  1193                 dst += 2;
  1194                 RGB888_RGB565_TWO(dst, src);
  1195                 src += 2;
  1196                 dst += 2;
  1197             }
  1198             /* Get any leftovers */
  1199             switch (width & 3) {
  1200             case 3:
  1201                 RGB888_RGB565(dst, src);
  1202                 ++src;
  1203                 ++dst;
  1204             case 2:
  1205                 RGB888_RGB565_TWO(dst, src);
  1206                 src += 2;
  1207                 dst += 2;
  1208                 break;
  1209             case 1:
  1210                 RGB888_RGB565(dst, src);
  1211                 ++src;
  1212                 ++dst;
  1213                 break;
  1214             }
  1215             src += srcskip;
  1216             dst += dstskip;
  1217         }
  1218     } else {
  1219         while (height--) {
  1220             /* Copy in 4 pixel chunks */
  1221             for (c = width / 4; c; --c) {
  1222                 RGB888_RGB565_TWO(dst, src);
  1223                 src += 2;
  1224                 dst += 2;
  1225                 RGB888_RGB565_TWO(dst, src);
  1226                 src += 2;
  1227                 dst += 2;
  1228             }
  1229             /* Get any leftovers */
  1230             switch (width & 3) {
  1231             case 3:
  1232                 RGB888_RGB565(dst, src);
  1233                 ++src;
  1234                 ++dst;
  1235             case 2:
  1236                 RGB888_RGB565_TWO(dst, src);
  1237                 src += 2;
  1238                 dst += 2;
  1239                 break;
  1240             case 1:
  1241                 RGB888_RGB565(dst, src);
  1242                 ++src;
  1243                 ++dst;
  1244                 break;
  1245             }
  1246             src += srcskip;
  1247             dst += dstskip;
  1248         }
  1249     }
  1250 #endif /* USE_DUFFS_LOOP */
  1251 }
  1252 
  1253 #endif /* SDL_HERMES_BLITTERS */
  1254 
  1255 
  1256 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
  1257 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
  1258 static void
  1259 Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
  1260 {
  1261 #ifndef USE_DUFFS_LOOP
  1262     int c;
  1263 #endif
  1264     int width, height;
  1265     Uint8 *src;
  1266     Uint32 *dst;
  1267     int srcskip, dstskip;
  1268 
  1269     /* Set up some basic variables */
  1270     width = info->d_width;
  1271     height = info->d_height;
  1272     src = (Uint8 *) info->s_pixels;
  1273     srcskip = info->s_skip;
  1274     dst = (Uint32 *) info->d_pixels;
  1275     dstskip = info->d_skip / 4;
  1276 
  1277 #ifdef USE_DUFFS_LOOP
  1278     while (height--) {
  1279 		/* *INDENT-OFF* */
  1280 		DUFFS_LOOP(
  1281 		{
  1282 			*dst++ = RGB565_32(dst, src, map);
  1283 			src += 2;
  1284 		},
  1285 		width);
  1286 		/* *INDENT-ON* */
  1287         src += srcskip;
  1288         dst += dstskip;
  1289     }
  1290 #else
  1291     while (height--) {
  1292         /* Copy in 4 pixel chunks */
  1293         for (c = width / 4; c; --c) {
  1294             *dst++ = RGB565_32(dst, src, map);
  1295             src += 2;
  1296             *dst++ = RGB565_32(dst, src, map);
  1297             src += 2;
  1298             *dst++ = RGB565_32(dst, src, map);
  1299             src += 2;
  1300             *dst++ = RGB565_32(dst, src, map);
  1301             src += 2;
  1302         }
  1303         /* Get any leftovers */
  1304         switch (width & 3) {
  1305         case 3:
  1306             *dst++ = RGB565_32(dst, src, map);
  1307             src += 2;
  1308         case 2:
  1309             *dst++ = RGB565_32(dst, src, map);
  1310             src += 2;
  1311         case 1:
  1312             *dst++ = RGB565_32(dst, src, map);
  1313             src += 2;
  1314             break;
  1315         }
  1316         src += srcskip;
  1317         dst += dstskip;
  1318     }
  1319 #endif /* USE_DUFFS_LOOP */
  1320 }
  1321 
  1322 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
  1323 static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1324     0x00000000, 0xff000000, 0x00000008, 0xff002000,
  1325     0x00000010, 0xff004000, 0x00000018, 0xff006100,
  1326     0x00000020, 0xff008100, 0x00000029, 0xff00a100,
  1327     0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
  1328     0x00000041, 0xff080000, 0x0000004a, 0xff082000,
  1329     0x00000052, 0xff084000, 0x0000005a, 0xff086100,
  1330     0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
  1331     0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
  1332     0x00000083, 0xff100000, 0x0000008b, 0xff102000,
  1333     0x00000094, 0xff104000, 0x0000009c, 0xff106100,
  1334     0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
  1335     0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
  1336     0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
  1337     0x000000d5, 0xff184000, 0x000000de, 0xff186100,
  1338     0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
  1339     0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
  1340     0x00000400, 0xff200000, 0x00000408, 0xff202000,
  1341     0x00000410, 0xff204000, 0x00000418, 0xff206100,
  1342     0x00000420, 0xff208100, 0x00000429, 0xff20a100,
  1343     0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
  1344     0x00000441, 0xff290000, 0x0000044a, 0xff292000,
  1345     0x00000452, 0xff294000, 0x0000045a, 0xff296100,
  1346     0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
  1347     0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
  1348     0x00000483, 0xff310000, 0x0000048b, 0xff312000,
  1349     0x00000494, 0xff314000, 0x0000049c, 0xff316100,
  1350     0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
  1351     0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
  1352     0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
  1353     0x000004d5, 0xff394000, 0x000004de, 0xff396100,
  1354     0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
  1355     0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
  1356     0x00000800, 0xff410000, 0x00000808, 0xff412000,
  1357     0x00000810, 0xff414000, 0x00000818, 0xff416100,
  1358     0x00000820, 0xff418100, 0x00000829, 0xff41a100,
  1359     0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
  1360     0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
  1361     0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
  1362     0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
  1363     0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
  1364     0x00000883, 0xff520000, 0x0000088b, 0xff522000,
  1365     0x00000894, 0xff524000, 0x0000089c, 0xff526100,
  1366     0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
  1367     0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
  1368     0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
  1369     0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
  1370     0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
  1371     0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
  1372     0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
  1373     0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
  1374     0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
  1375     0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
  1376     0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
  1377     0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
  1378     0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
  1379     0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
  1380     0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
  1381     0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
  1382     0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
  1383     0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
  1384     0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
  1385     0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
  1386     0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
  1387     0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
  1388     0x00001000, 0xff830000, 0x00001008, 0xff832000,
  1389     0x00001010, 0xff834000, 0x00001018, 0xff836100,
  1390     0x00001020, 0xff838100, 0x00001029, 0xff83a100,
  1391     0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
  1392     0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
  1393     0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
  1394     0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
  1395     0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
  1396     0x00001083, 0xff940000, 0x0000108b, 0xff942000,
  1397     0x00001094, 0xff944000, 0x0000109c, 0xff946100,
  1398     0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
  1399     0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
  1400     0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
  1401     0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
  1402     0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
  1403     0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
  1404     0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
  1405     0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
  1406     0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
  1407     0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
  1408     0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
  1409     0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
  1410     0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
  1411     0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
  1412     0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
  1413     0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
  1414     0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
  1415     0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
  1416     0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
  1417     0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
  1418     0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
  1419     0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
  1420     0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
  1421     0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
  1422     0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
  1423     0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
  1424     0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
  1425     0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
  1426     0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
  1427     0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
  1428     0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
  1429     0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
  1430     0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
  1431     0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
  1432     0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
  1433     0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
  1434     0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
  1435     0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
  1436     0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
  1437     0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
  1438     0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
  1439     0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
  1440     0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
  1441     0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
  1442     0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
  1443     0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
  1444     0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
  1445     0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
  1446     0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
  1447     0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
  1448     0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
  1449     0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
  1450     0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
  1451     0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
  1452 };
  1453 static void
  1454 Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
  1455 {
  1456     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1457 }
  1458 
  1459 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
  1460 static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1461     0xff000000, 0x00000000, 0xff080000, 0x00002000,
  1462     0xff100000, 0x00004000, 0xff180000, 0x00006100,
  1463     0xff200000, 0x00008100, 0xff290000, 0x0000a100,
  1464     0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
  1465     0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
  1466     0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
  1467     0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
  1468     0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
  1469     0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
  1470     0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
  1471     0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
  1472     0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
  1473     0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
  1474     0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
  1475     0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
  1476     0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
  1477     0xff000400, 0x00000020, 0xff080400, 0x00002020,
  1478     0xff100400, 0x00004020, 0xff180400, 0x00006120,
  1479     0xff200400, 0x00008120, 0xff290400, 0x0000a120,
  1480     0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
  1481     0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
  1482     0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
  1483     0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
  1484     0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
  1485     0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
  1486     0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
  1487     0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
  1488     0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
  1489     0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
  1490     0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
  1491     0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
  1492     0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
  1493     0xff000800, 0x00000041, 0xff080800, 0x00002041,
  1494     0xff100800, 0x00004041, 0xff180800, 0x00006141,
  1495     0xff200800, 0x00008141, 0xff290800, 0x0000a141,
  1496     0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
  1497     0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
  1498     0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
  1499     0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
  1500     0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
  1501     0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
  1502     0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
  1503     0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
  1504     0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
  1505     0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
  1506     0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
  1507     0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
  1508     0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
  1509     0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
  1510     0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
  1511     0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
  1512     0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
  1513     0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
  1514     0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
  1515     0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
  1516     0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
  1517     0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
  1518     0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
  1519     0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
  1520     0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
  1521     0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
  1522     0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
  1523     0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
  1524     0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
  1525     0xff001000, 0x00000083, 0xff081000, 0x00002083,
  1526     0xff101000, 0x00004083, 0xff181000, 0x00006183,
  1527     0xff201000, 0x00008183, 0xff291000, 0x0000a183,
  1528     0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
  1529     0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
  1530     0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
  1531     0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
  1532     0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
  1533     0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
  1534     0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
  1535     0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
  1536     0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
  1537     0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
  1538     0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
  1539     0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
  1540     0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
  1541     0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
  1542     0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
  1543     0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
  1544     0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
  1545     0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
  1546     0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
  1547     0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
  1548     0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
  1549     0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
  1550     0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
  1551     0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
  1552     0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
  1553     0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
  1554     0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
  1555     0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
  1556     0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
  1557     0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
  1558     0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
  1559     0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
  1560     0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
  1561     0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
  1562     0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
  1563     0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
  1564     0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
  1565     0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
  1566     0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
  1567     0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
  1568     0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
  1569     0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
  1570     0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
  1571     0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
  1572     0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
  1573     0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
  1574     0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
  1575     0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
  1576     0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
  1577     0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
  1578     0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
  1579     0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
  1580     0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
  1581     0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
  1582     0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
  1583     0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
  1584     0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
  1585     0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
  1586     0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
  1587     0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
  1588     0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
  1589 };
  1590 static void
  1591 Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
  1592 {
  1593     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1594 }
  1595 
  1596 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
  1597 static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1598     0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
  1599     0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
  1600     0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
  1601     0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
  1602     0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
  1603     0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
  1604     0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
  1605     0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
  1606     0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
  1607     0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
  1608     0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
  1609     0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
  1610     0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
  1611     0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
  1612     0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
  1613     0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
  1614     0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
  1615     0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
  1616     0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
  1617     0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
  1618     0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
  1619     0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
  1620     0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
  1621     0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
  1622     0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
  1623     0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
  1624     0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
  1625     0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
  1626     0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
  1627     0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
  1628     0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
  1629     0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
  1630     0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
  1631     0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
  1632     0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
  1633     0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
  1634     0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
  1635     0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
  1636     0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
  1637     0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
  1638     0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
  1639     0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
  1640     0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
  1641     0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
  1642     0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
  1643     0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
  1644     0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
  1645     0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
  1646     0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
  1647     0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
  1648     0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
  1649     0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
  1650     0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
  1651     0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
  1652     0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
  1653     0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
  1654     0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
  1655     0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
  1656     0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
  1657     0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
  1658     0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
  1659     0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
  1660     0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
  1661     0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
  1662     0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
  1663     0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
  1664     0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
  1665     0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
  1666     0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
  1667     0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
  1668     0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
  1669     0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
  1670     0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
  1671     0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
  1672     0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
  1673     0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
  1674     0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
  1675     0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
  1676     0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
  1677     0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
  1678     0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
  1679     0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
  1680     0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
  1681     0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
  1682     0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
  1683     0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
  1684     0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
  1685     0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
  1686     0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
  1687     0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
  1688     0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
  1689     0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
  1690     0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
  1691     0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
  1692     0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
  1693     0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
  1694     0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
  1695     0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
  1696     0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
  1697     0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
  1698     0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
  1699     0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
  1700     0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
  1701     0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
  1702     0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
  1703     0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
  1704     0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
  1705     0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
  1706     0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
  1707     0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
  1708     0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
  1709     0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
  1710     0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
  1711     0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
  1712     0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
  1713     0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
  1714     0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
  1715     0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
  1716     0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
  1717     0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
  1718     0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
  1719     0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
  1720     0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
  1721     0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
  1722     0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
  1723     0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
  1724     0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
  1725     0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
  1726 };
  1727 static void
  1728 Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
  1729 {
  1730     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1731 }
  1732 
  1733 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
  1734 static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1735     0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
  1736     0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
  1737     0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
  1738     0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
  1739     0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
  1740     0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
  1741     0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
  1742     0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
  1743     0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
  1744     0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
  1745     0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
  1746     0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
  1747     0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
  1748     0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
  1749     0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
  1750     0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
  1751     0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
  1752     0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
  1753     0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
  1754     0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
  1755     0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
  1756     0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
  1757     0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
  1758     0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
  1759     0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
  1760     0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
  1761     0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
  1762     0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
  1763     0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
  1764     0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
  1765     0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
  1766     0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
  1767     0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
  1768     0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
  1769     0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
  1770     0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
  1771     0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
  1772     0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
  1773     0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
  1774     0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
  1775     0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
  1776     0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
  1777     0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
  1778     0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
  1779     0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
  1780     0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
  1781     0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
  1782     0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
  1783     0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
  1784     0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
  1785     0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
  1786     0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
  1787     0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
  1788     0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
  1789     0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
  1790     0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
  1791     0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
  1792     0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
  1793     0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
  1794     0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
  1795     0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
  1796     0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
  1797     0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
  1798     0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
  1799     0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
  1800     0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
  1801     0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
  1802     0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
  1803     0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
  1804     0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
  1805     0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
  1806     0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
  1807     0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
  1808     0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
  1809     0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
  1810     0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
  1811     0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
  1812     0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
  1813     0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
  1814     0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
  1815     0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
  1816     0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
  1817     0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
  1818     0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
  1819     0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
  1820     0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
  1821     0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
  1822     0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
  1823     0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
  1824     0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
  1825     0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
  1826     0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
  1827     0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
  1828     0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
  1829     0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
  1830     0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
  1831     0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
  1832     0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
  1833     0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
  1834     0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
  1835     0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
  1836     0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
  1837     0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
  1838     0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
  1839     0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
  1840     0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
  1841     0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
  1842     0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
  1843     0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
  1844     0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
  1845     0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
  1846     0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
  1847     0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
  1848     0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
  1849     0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
  1850     0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
  1851     0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
  1852     0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
  1853     0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
  1854     0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
  1855     0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
  1856     0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
  1857     0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
  1858     0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
  1859     0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
  1860     0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
  1861     0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
  1862     0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
  1863 };
  1864 static void
  1865 Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
  1866 {
  1867     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1868 }
  1869 
  1870 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
  1871 #ifndef RGB888_RGB332
  1872 #define RGB888_RGB332(dst, src) { \
  1873 	dst = (((src)&0x00E00000)>>16)| \
  1874 	      (((src)&0x0000E000)>>11)| \
  1875 	      (((src)&0x000000C0)>>6); \
  1876 }
  1877 #endif
  1878 static void
  1879 Blit_RGB888_index8_map(SDL_BlitInfo * info)
  1880 {
  1881 #ifndef USE_DUFFS_LOOP
  1882     int c;
  1883 #endif
  1884     int Pixel;
  1885     int width, height;
  1886     Uint32 *src;
  1887     const Uint8 *map;
  1888     Uint8 *dst;
  1889     int srcskip, dstskip;
  1890 
  1891     /* Set up some basic variables */
  1892     width = info->d_width;
  1893     height = info->d_height;
  1894     src = (Uint32 *) info->s_pixels;
  1895     srcskip = info->s_skip / 4;
  1896     dst = info->d_pixels;
  1897     dstskip = info->d_skip;
  1898     map = info->table;
  1899 
  1900 #ifdef USE_DUFFS_LOOP
  1901     while (height--) {
  1902 		/* *INDENT-OFF* */
  1903 		DUFFS_LOOP(
  1904 			RGB888_RGB332(Pixel, *src);
  1905 			*dst++ = map[Pixel];
  1906 			++src;
  1907 		, width);
  1908 		/* *INDENT-ON* */
  1909         src += srcskip;
  1910         dst += dstskip;
  1911     }
  1912 #else
  1913     while (height--) {
  1914         for (c = width / 4; c; --c) {
  1915             /* Pack RGB into 8bit pixel */
  1916             RGB888_RGB332(Pixel, *src);
  1917             *dst++ = map[Pixel];
  1918             ++src;
  1919             RGB888_RGB332(Pixel, *src);
  1920             *dst++ = map[Pixel];
  1921             ++src;
  1922             RGB888_RGB332(Pixel, *src);
  1923             *dst++ = map[Pixel];
  1924             ++src;
  1925             RGB888_RGB332(Pixel, *src);
  1926             *dst++ = map[Pixel];
  1927             ++src;
  1928         }
  1929         switch (width & 3) {
  1930         case 3:
  1931             RGB888_RGB332(Pixel, *src);
  1932             *dst++ = map[Pixel];
  1933             ++src;
  1934         case 2:
  1935             RGB888_RGB332(Pixel, *src);
  1936             *dst++ = map[Pixel];
  1937             ++src;
  1938         case 1:
  1939             RGB888_RGB332(Pixel, *src);
  1940             *dst++ = map[Pixel];
  1941             ++src;
  1942         }
  1943         src += srcskip;
  1944         dst += dstskip;
  1945     }
  1946 #endif /* USE_DUFFS_LOOP */
  1947 }
  1948 static void
  1949 BlitNto1(SDL_BlitInfo * info)
  1950 {
  1951 #ifndef USE_DUFFS_LOOP
  1952     int c;
  1953 #endif
  1954     int width, height;
  1955     Uint8 *src;
  1956     const Uint8 *map;
  1957     Uint8 *dst;
  1958     int srcskip, dstskip;
  1959     int srcbpp;
  1960     Uint32 Pixel;
  1961     int sR, sG, sB;
  1962     SDL_PixelFormat *srcfmt;
  1963 
  1964     /* Set up some basic variables */
  1965     width = info->d_width;
  1966     height = info->d_height;
  1967     src = info->s_pixels;
  1968     srcskip = info->s_skip;
  1969     dst = info->d_pixels;
  1970     dstskip = info->d_skip;
  1971     map = info->table;
  1972     srcfmt = info->src;
  1973     srcbpp = srcfmt->BytesPerPixel;
  1974 
  1975     if (map == NULL) {
  1976         while (height--) {
  1977 #ifdef USE_DUFFS_LOOP
  1978 			/* *INDENT-OFF* */
  1979 			DUFFS_LOOP(
  1980 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  1981 								sR, sG, sB);
  1982 				if ( 1 ) {
  1983 				  	/* Pack RGB into 8bit pixel */
  1984 				  	*dst = ((sR>>5)<<(3+2))|
  1985 					        ((sG>>5)<<(2)) |
  1986 					        ((sB>>6)<<(0)) ;
  1987 				}
  1988 				dst++;
  1989 				src += srcbpp;
  1990 			, width);
  1991 			/* *INDENT-ON* */
  1992 #else
  1993             for (c = width; c; --c) {
  1994                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  1995                 if (1) {
  1996                     /* Pack RGB into 8bit pixel */
  1997                     *dst = ((sR >> 5) << (3 + 2)) |
  1998                         ((sG >> 5) << (2)) | ((sB >> 6) << (0));
  1999                 }
  2000                 dst++;
  2001                 src += srcbpp;
  2002             }
  2003 #endif
  2004             src += srcskip;
  2005             dst += dstskip;
  2006         }
  2007     } else {
  2008         while (height--) {
  2009 #ifdef USE_DUFFS_LOOP
  2010 			/* *INDENT-OFF* */
  2011 			DUFFS_LOOP(
  2012 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2013 								sR, sG, sB);
  2014 				if ( 1 ) {
  2015 				  	/* Pack RGB into 8bit pixel */
  2016 				  	*dst = map[((sR>>5)<<(3+2))|
  2017 						   ((sG>>5)<<(2))  |
  2018 						   ((sB>>6)<<(0))  ];
  2019 				}
  2020 				dst++;
  2021 				src += srcbpp;
  2022 			, width);
  2023 			/* *INDENT-ON* */
  2024 #else
  2025             for (c = width; c; --c) {
  2026                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2027                 if (1) {
  2028                     /* Pack RGB into 8bit pixel */
  2029                     *dst = map[((sR >> 5) << (3 + 2)) |
  2030                                ((sG >> 5) << (2)) | ((sB >> 6) << (0))];
  2031                 }
  2032                 dst++;
  2033                 src += srcbpp;
  2034             }
  2035 #endif /* USE_DUFFS_LOOP */
  2036             src += srcskip;
  2037             dst += dstskip;
  2038         }
  2039     }
  2040 }
  2041 
  2042 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
  2043 static void
  2044 Blit4to4MaskAlpha(SDL_BlitInfo * info)
  2045 {
  2046     int width = info->d_width;
  2047     int height = info->d_height;
  2048     Uint32 *src = (Uint32 *) info->s_pixels;
  2049     int srcskip = info->s_skip;
  2050     Uint32 *dst = (Uint32 *) info->d_pixels;
  2051     int dstskip = info->d_skip;
  2052     SDL_PixelFormat *srcfmt = info->src;
  2053     SDL_PixelFormat *dstfmt = info->dst;
  2054 
  2055     if (dstfmt->Amask) {
  2056         /* RGB->RGBA, SET_ALPHA */
  2057         Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
  2058 
  2059         while (height--) {
  2060 			/* *INDENT-OFF* */
  2061 			DUFFS_LOOP(
  2062 			{
  2063 				*dst = *src | mask;
  2064 				++dst;
  2065 				++src;
  2066 			},
  2067 			width);
  2068 			/* *INDENT-ON* */
  2069             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2070             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2071         }
  2072     } else {
  2073         /* RGBA->RGB, NO_ALPHA */
  2074         Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  2075 
  2076         while (height--) {
  2077 			/* *INDENT-OFF* */
  2078 			DUFFS_LOOP(
  2079 			{
  2080 				*dst = *src & mask;
  2081 				++dst;
  2082 				++src;
  2083 			},
  2084 			width);
  2085 			/* *INDENT-ON* */
  2086             src = (Uint32 *) ((Uint8 *) src + srcskip);
  2087             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
  2088         }
  2089     }
  2090 }
  2091 
  2092 static void
  2093 BlitNtoN(SDL_BlitInfo * info)
  2094 {
  2095     int width = info->d_width;
  2096     int height = info->d_height;
  2097     Uint8 *src = info->s_pixels;
  2098     int srcskip = info->s_skip;
  2099     Uint8 *dst = info->d_pixels;
  2100     int dstskip = info->d_skip;
  2101     SDL_PixelFormat *srcfmt = info->src;
  2102     int srcbpp = srcfmt->BytesPerPixel;
  2103     SDL_PixelFormat *dstfmt = info->dst;
  2104     int dstbpp = dstfmt->BytesPerPixel;
  2105     unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2106 
  2107     while (height--) {
  2108 		/* *INDENT-OFF* */
  2109 		DUFFS_LOOP(
  2110 		{
  2111 		        Uint32 Pixel;
  2112 			unsigned sR;
  2113 			unsigned sG;
  2114 			unsigned sB;
  2115 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  2116 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2117 			dst += dstbpp;
  2118 			src += srcbpp;
  2119 		},
  2120 		width);
  2121 		/* *INDENT-ON* */
  2122         src += srcskip;
  2123         dst += dstskip;
  2124     }
  2125 }
  2126 
  2127 static void
  2128 BlitNtoNCopyAlpha(SDL_BlitInfo * info)
  2129 {
  2130     int width = info->d_width;
  2131     int height = info->d_height;
  2132     Uint8 *src = info->s_pixels;
  2133     int srcskip = info->s_skip;
  2134     Uint8 *dst = info->d_pixels;
  2135     int dstskip = info->d_skip;
  2136     SDL_PixelFormat *srcfmt = info->src;
  2137     int srcbpp = srcfmt->BytesPerPixel;
  2138     SDL_PixelFormat *dstfmt = info->dst;
  2139     int dstbpp = dstfmt->BytesPerPixel;
  2140     int c;
  2141 
  2142     /* FIXME: should map alpha to [0..255] correctly! */
  2143     while (height--) {
  2144         for (c = width; c; --c) {
  2145             Uint32 Pixel;
  2146             unsigned sR, sG, sB, sA;
  2147             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2148             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2149             dst += dstbpp;
  2150             src += srcbpp;
  2151         }
  2152         src += srcskip;
  2153         dst += dstskip;
  2154     }
  2155 }
  2156 
  2157 static void
  2158 BlitNto1Key(SDL_BlitInfo * info)
  2159 {
  2160     int width = info->d_width;
  2161     int height = info->d_height;
  2162     Uint8 *src = info->s_pixels;
  2163     int srcskip = info->s_skip;
  2164     Uint8 *dst = info->d_pixels;
  2165     int dstskip = info->d_skip;
  2166     SDL_PixelFormat *srcfmt = info->src;
  2167     const Uint8 *palmap = info->table;
  2168     Uint32 ckey = srcfmt->colorkey;
  2169     Uint32 rgbmask = ~srcfmt->Amask;
  2170     int srcbpp;
  2171     Uint32 Pixel;
  2172     unsigned sR, sG, sB;
  2173 
  2174     /* Set up some basic variables */
  2175     srcbpp = srcfmt->BytesPerPixel;
  2176     ckey &= rgbmask;
  2177 
  2178     if (palmap == NULL) {
  2179         while (height--) {
  2180 			/* *INDENT-OFF* */
  2181 			DUFFS_LOOP(
  2182 			{
  2183 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2184 								sR, sG, sB);
  2185 				if ( (Pixel & rgbmask) != ckey ) {
  2186 				  	/* Pack RGB into 8bit pixel */
  2187 				  	*dst = (Uint8)(((sR>>5)<<(3+2))|
  2188 						           ((sG>>5)<<(2)) |
  2189 						           ((sB>>6)<<(0)));
  2190 				}
  2191 				dst++;
  2192 				src += srcbpp;
  2193 			},
  2194 			width);
  2195 			/* *INDENT-ON* */
  2196             src += srcskip;
  2197             dst += dstskip;
  2198         }
  2199     } else {
  2200         while (height--) {
  2201 			/* *INDENT-OFF* */
  2202 			DUFFS_LOOP(
  2203 			{
  2204 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
  2205 								sR, sG, sB);
  2206 				if ( (Pixel & rgbmask) != ckey ) {
  2207 				  	/* Pack RGB into 8bit pixel */
  2208 				  	*dst = (Uint8)palmap[((sR>>5)<<(3+2))|
  2209 							             ((sG>>5)<<(2))  |
  2210 							             ((sB>>6)<<(0))  ];
  2211 				}
  2212 				dst++;
  2213 				src += srcbpp;
  2214 			},
  2215 			width);
  2216 			/* *INDENT-ON* */
  2217             src += srcskip;
  2218             dst += dstskip;
  2219         }
  2220     }
  2221 }
  2222 
  2223 static void
  2224 Blit2to2Key(SDL_BlitInfo * info)
  2225 {
  2226     int width = info->d_width;
  2227     int height = info->d_height;
  2228     Uint16 *srcp = (Uint16 *) info->s_pixels;
  2229     int srcskip = info->s_skip;
  2230     Uint16 *dstp = (Uint16 *) info->d_pixels;
  2231     int dstskip = info->d_skip;
  2232     Uint32 ckey = info->src->colorkey;
  2233     Uint32 rgbmask = ~info->src->Amask;
  2234 
  2235     /* Set up some basic variables */
  2236     srcskip /= 2;
  2237     dstskip /= 2;
  2238     ckey &= rgbmask;
  2239 
  2240     while (height--) {
  2241 		/* *INDENT-OFF* */
  2242 		DUFFS_LOOP(
  2243 		{
  2244 			if ( (*srcp & rgbmask) != ckey ) {
  2245 				*dstp = *srcp;
  2246 			}
  2247 			dstp++;
  2248 			srcp++;
  2249 		},
  2250 		width);
  2251 		/* *INDENT-ON* */
  2252         srcp += srcskip;
  2253         dstp += dstskip;
  2254     }
  2255 }
  2256 
  2257 static void
  2258 BlitNtoNKey(SDL_BlitInfo * info)
  2259 {
  2260     int width = info->d_width;
  2261     int height = info->d_height;
  2262     Uint8 *src = info->s_pixels;
  2263     int srcskip = info->s_skip;
  2264     Uint8 *dst = info->d_pixels;
  2265     int dstskip = info->d_skip;
  2266     Uint32 ckey = info->src->colorkey;
  2267     SDL_PixelFormat *srcfmt = info->src;
  2268     SDL_PixelFormat *dstfmt = info->dst;
  2269     int srcbpp = srcfmt->BytesPerPixel;
  2270     int dstbpp = dstfmt->BytesPerPixel;
  2271     unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
  2272     Uint32 rgbmask = ~srcfmt->Amask;
  2273 
  2274     /* Set up some basic variables */
  2275     ckey &= rgbmask;
  2276 
  2277     while (height--) {
  2278 		/* *INDENT-OFF* */
  2279 		DUFFS_LOOP(
  2280 		{
  2281 		        Uint32 Pixel;
  2282 			unsigned sR;
  2283 			unsigned sG;
  2284 			unsigned sB;
  2285 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2286 			if ( (Pixel & rgbmask) != ckey ) {
  2287 			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2288 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2289 					      sR, sG, sB, alpha);
  2290 			}
  2291 			dst += dstbpp;
  2292 			src += srcbpp;
  2293 		},
  2294 		width);
  2295 		/* *INDENT-ON* */
  2296         src += srcskip;
  2297         dst += dstskip;
  2298     }
  2299 }
  2300 
  2301 static void
  2302 BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
  2303 {
  2304     int width = info->d_width;
  2305     int height = info->d_height;
  2306     Uint8 *src = info->s_pixels;
  2307     int srcskip = info->s_skip;
  2308     Uint8 *dst = info->d_pixels;
  2309     int dstskip = info->d_skip;
  2310     Uint32 ckey = info->src->colorkey;
  2311     SDL_PixelFormat *srcfmt = info->src;
  2312     SDL_PixelFormat *dstfmt = info->dst;
  2313     Uint32 rgbmask = ~srcfmt->Amask;
  2314 
  2315     Uint8 srcbpp;
  2316     Uint8 dstbpp;
  2317     Uint32 Pixel;
  2318     unsigned sR, sG, sB, sA;
  2319 
  2320     /* Set up some basic variables */
  2321     srcbpp = srcfmt->BytesPerPixel;
  2322     dstbpp = dstfmt->BytesPerPixel;
  2323     ckey &= rgbmask;
  2324 
  2325     /* FIXME: should map alpha to [0..255] correctly! */
  2326     while (height--) {
  2327 		/* *INDENT-OFF* */
  2328 		DUFFS_LOOP(
  2329 		{
  2330 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
  2331 				      sR, sG, sB, sA);
  2332 			if ( (Pixel & rgbmask) != ckey ) {
  2333 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
  2334 						sR, sG, sB, sA);
  2335 			}
  2336 			dst += dstbpp;
  2337 			src += srcbpp;
  2338 		},
  2339 		width);
  2340 		/* *INDENT-ON* */
  2341         src += srcskip;
  2342         dst += dstskip;
  2343     }
  2344 }
  2345 
  2346 /* Normal N to N optimized blitters */
  2347 struct blit_table
  2348 {
  2349     Uint32 srcR, srcG, srcB;
  2350     int dstbpp;
  2351     Uint32 dstR, dstG, dstB;
  2352     Uint32 blit_features;
  2353     void *aux_data;
  2354     SDL_loblit blitfunc;
  2355     enum
  2356     { NO_ALPHA = 1, SET_ALPHA = 2, COPY_ALPHA = 4 } alpha;
  2357 };
  2358 static const struct blit_table normal_blit_1[] = {
  2359     /* Default for 8-bit RGB source, an invalid combination */
  2360     {0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL},
  2361 };
  2362 static const struct blit_table normal_blit_2[] = {
  2363 #if SDL_HERMES_BLITTERS
  2364     {0x0000F800, 0x000007E0, 0x0000001F, 2, 0x0000001F, 0x000007E0,
  2365      0x0000F800,
  2366      0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA},
  2367     {0x0000F800, 0x000007E0, 0x0000001F, 2, 0x00007C00, 0x000003E0,
  2368      0x0000001F,
  2369      0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA},
  2370     {0x0000F800, 0x000007E0, 0x0000001F, 2, 0x0000001F, 0x000003E0,
  2371      0x00007C00,
  2372      0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA},
  2373 #elif SDL_ALTIVEC_BLITTERS
  2374     /* has-altivec */
  2375     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000,
  2376      0x00000000,
  2377      2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2378     {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000,
  2379      0x00000000,
  2380      2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2381 #endif
  2382     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00,
  2383      0x000000FF,
  2384      0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA},
  2385     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00,
  2386      0x00FF0000,
  2387      0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA},
  2388     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000,
  2389      0x0000FF00,
  2390      0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA},
  2391     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000,
  2392      0xFF000000,
  2393      0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA},
  2394 
  2395     /* Default for 16-bit RGB source, used if no other blitter matches */
  2396     {0, 0, 0, 0, 0, 0, 0, 0, NULL, BlitNtoN, 0}
  2397 };
  2398 static const struct blit_table normal_blit_3[] = {
  2399     /* Default for 24-bit RGB source, never optimized */
  2400     {0, 0, 0, 0, 0, 0, 0, 0, NULL, BlitNtoN, 0}
  2401 };
  2402 static const struct blit_table normal_blit_4[] = {
  2403 #if SDL_HERMES_BLITTERS
  2404     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
  2405      0x0000001F,
  2406      1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA},
  2407     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
  2408      0x0000001F,
  2409      0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA},
  2410     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000001F, 0x000007E0,
  2411      0x0000F800,
  2412      1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA},
  2413     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000001F, 0x000007E0,
  2414      0x0000F800,
  2415      0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA},
  2416     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
  2417      0x0000001F,
  2418      1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA},
  2419     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
  2420      0x0000001F,
  2421      0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA},
  2422     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000001F, 0x000003E0,
  2423      0x00007C00,
  2424      1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA},
  2425     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000001F, 0x000003E0,
  2426      0x00007C00,
  2427      0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA},
  2428     {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00,
  2429      0x000000FF,
  2430      0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA},
  2431     {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00,
  2432      0x00FF0000,
  2433      0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA},
  2434     {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00,
  2435      0x00FF0000,
  2436      0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA},
  2437     {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0xFF000000, 0x00FF0000,
  2438      0x0000FF00,
  2439      0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA},
  2440     {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x0000FF00, 0x00FF0000,
  2441      0xFF000000,
  2442      0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA},
  2443 #else
  2444 #if SDL_ALTIVEC_BLITTERS
  2445     /* has-altivec | dont-use-prefetch */
  2446     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
  2447      0x00000000,
  2448      6, NULL, ConvertAltivec32to32_noprefetch,
  2449      NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2450     /* has-altivec */
  2451     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
  2452      0x00000000,
  2453      2, NULL, ConvertAltivec32to32_prefetch,
  2454      NO_ALPHA | COPY_ALPHA | SET_ALPHA},
  2455     /* has-altivec */
  2456     {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0,
  2457      0x0000001F,
  2458      2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA},
  2459 #endif
  2460     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
  2461      0x0000001F,
  2462      0, NULL, Blit_RGB888_RGB565, NO_ALPHA},
  2463     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
  2464      0x0000001F,
  2465      0, NULL, Blit_RGB888_RGB555, NO_ALPHA},
  2466 #endif
  2467     /* Default for 32-bit RGB source, used if no other blitter matches */
  2468     {0, 0, 0, 0, 0, 0, 0, 0, NULL, BlitNtoN, 0}
  2469 };
  2470 static const struct blit_table *normal_blit[] = {
  2471     normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2472 };
  2473 
  2474 /* Mask matches table, or table entry is zero */
  2475 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2476 
  2477 SDL_loblit
  2478 SDL_CalculateBlitN(SDL_Surface * surface, int blit_index)
  2479 {
  2480     struct private_swaccel *sdata;
  2481     SDL_PixelFormat *srcfmt;
  2482     SDL_PixelFormat *dstfmt;
  2483     const struct blit_table *table;
  2484     int which;
  2485     SDL_loblit blitfun;
  2486 
  2487     /* Set up data for choosing the blit */
  2488     sdata = surface->map->sw_data;
  2489     srcfmt = surface->format;
  2490     dstfmt = surface->map->dst->format;
  2491 
  2492     if (blit_index & 2) {
  2493         /* alpha or alpha+colorkey */
  2494         return SDL_CalculateAlphaBlit(surface, blit_index);
  2495     }
  2496 
  2497     /* We don't support destinations less than 8-bits */
  2498     if (dstfmt->BitsPerPixel < 8) {
  2499         return (NULL);
  2500     }
  2501 
  2502     if (blit_index == 1) {
  2503         /* colorkey blit: Here we don't have too many options, mostly
  2504            because RLE is the preferred fast way to deal with this.
  2505            If a particular case turns out to be useful we'll add it. */
  2506 
  2507         if (srcfmt->BytesPerPixel == 2 && surface->map->identity)
  2508             return Blit2to2Key;
  2509         else if (dstfmt->BytesPerPixel == 1)
  2510             return BlitNto1Key;
  2511         else {
  2512 #if SDL_ALTIVEC_BLITTERS
  2513             if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
  2514                 && SDL_HasAltiVec()) {
  2515                 return Blit32to32KeyAltivec;
  2516             } else
  2517 #endif
  2518 
  2519             if (srcfmt->Amask && dstfmt->Amask)
  2520                 return BlitNtoNKeyCopyAlpha;
  2521             else
  2522                 return BlitNtoNKey;
  2523         }
  2524     }
  2525 
  2526     blitfun = NULL;
  2527     if (dstfmt->BitsPerPixel == 8) {
  2528         /* We assume 8-bit destinations are palettized */
  2529         if ((srcfmt->BytesPerPixel == 4) &&
  2530             (srcfmt->Rmask == 0x00FF0000) &&
  2531             (srcfmt->Gmask == 0x0000FF00) && (srcfmt->Bmask == 0x000000FF)) {
  2532             if (surface->map->table) {
  2533                 blitfun = Blit_RGB888_index8_map;
  2534             } else {
  2535 #if SDL_HERMES_BLITTERS
  2536                 sdata->aux_data = ConvertX86p32_8RGB332;
  2537                 blitfun = ConvertX86;
  2538 #else
  2539                 blitfun = Blit_RGB888_index8;
  2540 #endif
  2541             }
  2542         } else {
  2543             blitfun = BlitNto1;
  2544         }
  2545     } else {
  2546         /* Now the meat, choose the blitter we want */
  2547         int a_need = NO_ALPHA;
  2548         if (dstfmt->Amask)
  2549             a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2550         table = normal_blit[srcfmt->BytesPerPixel - 1];
  2551         for (which = 0; table[which].dstbpp; ++which) {
  2552             if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2553                 MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2554                 MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2555                 MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2556                 MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2557                 MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2558                 dstfmt->BytesPerPixel == table[which].dstbpp &&
  2559                 (a_need & table[which].alpha) == a_need &&
  2560                 ((table[which].blit_features & GetBlitFeatures()) ==
  2561                  table[which].blit_features))
  2562                 break;
  2563         }
  2564         sdata->aux_data = table[which].aux_data;
  2565         blitfun = table[which].blitfunc;
  2566 
  2567         if (blitfun == BlitNtoN) {      /* default C fallback catch-all. Slow! */
  2568             /* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
  2569             if (srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
  2570                 srcfmt->Rmask == dstfmt->Rmask &&
  2571                 srcfmt->Gmask == dstfmt->Gmask &&
  2572                 srcfmt->Bmask == dstfmt->Bmask) {
  2573                 blitfun = Blit4to4MaskAlpha;
  2574             } else if (a_need == COPY_ALPHA) {
  2575                 blitfun = BlitNtoNCopyAlpha;
  2576             }
  2577         }
  2578     }
  2579 
  2580 #ifdef DEBUG_ASM
  2581 #if SDL_HERMES_BLITTERS
  2582     if (blitfun == ConvertMMX)
  2583         fprintf(stderr, "Using mmx blit\n");
  2584     else if (blitfun == ConvertX86)
  2585         fprintf(stderr, "Using asm blit\n");
  2586     else
  2587 #endif
  2588     if ((blitfun == BlitNtoN) || (blitfun == BlitNto1))
  2589         fprintf(stderr, "Using C blit\n");
  2590     else
  2591         fprintf(stderr, "Using optimized C blit\n");
  2592 #endif /* DEBUG_ASM */
  2593 
  2594     return (blitfun);
  2595 }
  2596 
  2597 /* vi: set ts=4 sw=4 expandtab: */