src/video/SDL_RLEaccel.c
author Sylvain Becker <sylvain.becker@gmail.com>
Mon, 04 Feb 2019 09:11:07 +0100
changeset 12591 66cd8731c3b1
parent 12503 806492103856
permissions -rw-r--r--
Fixed bug 4484 - use SIMD aligned memory for SDL_Surface

Surfaces are allocated using SDL_SIMDAlloc()
They are marked with SDL_SIMD_ALIGNED flag to appropriatly free them with SDL_SIMDFree()
(Flag is cleared when pixels is free'd in RLE, in case user would hijack the pixels ptr)

When providing its own memory pointer (SDL_CreateRGBSurfaceFrom()) and clearing
SDL_PREALLOC to delegate to SDL the memory free, it's the responsability of the user
to add SDL_SIMD_ALIGNED or not, whether the pointer has been allocated with SDL_malloc() or
SDL_SIMDAlloc().
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2019 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../SDL_internal.h"
    22 
    23 /*
    24  * RLE encoding for software colorkey and alpha-channel acceleration
    25  *
    26  * Original version by Sam Lantinga
    27  *
    28  * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and
    29  * decoder. Added per-surface alpha blitter. Added per-pixel alpha
    30  * format, encoder and blitter.
    31  *
    32  * Many thanks to Xark and johns for hints, benchmarks and useful comments
    33  * leading to this code.
    34  *
    35  * Welcome to Macro Mayhem.
    36  */
    37 
    38 /*
    39  * The encoding translates the image data to a stream of segments of the form
    40  *
    41  * <skip> <run> <data>
    42  *
    43  * where <skip> is the number of transparent pixels to skip,
    44  *       <run>  is the number of opaque pixels to blit,
    45  * and   <data> are the pixels themselves.
    46  *
    47  * This basic structure is used both for colorkeyed surfaces, used for simple
    48  * binary transparency and for per-surface alpha blending, and for surfaces
    49  * with per-pixel alpha. The details differ, however:
    50  *
    51  * Encoding of colorkeyed surfaces:
    52  *
    53  *   Encoded pixels always have the same format as the target surface.
    54  *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
    55  *   where they are 16 bit. This makes the pixel data aligned at all times.
    56  *   Segments never wrap around from one scan line to the next.
    57  *
    58  *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
    59  *   beginning of a line.
    60  *
    61  * Encoding of surfaces with per-pixel alpha:
    62  *
    63  *   The sequence begins with a struct RLEDestFormat describing the target
    64  *   pixel format, to provide reliable un-encoding.
    65  *
    66  *   Each scan line is encoded twice: First all completely opaque pixels,
    67  *   encoded in the target format as described above, and then all
    68  *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
    69  *   in the following 32-bit format:
    70  *
    71  *   For 32-bit targets, each pixel has the target RGB format but with
    72  *   the alpha value occupying the highest 8 bits. The <skip> and <run>
    73  *   counts are 16 bit.
    74  *
    75  *   For 16-bit targets, each pixel has the target RGB format, but with
    76  *   the middle component (usually green) shifted 16 steps to the left,
    77  *   and the hole filled with the 5 most significant bits of the alpha value.
    78  *   i.e. if the target has the format         rrrrrggggggbbbbb,
    79  *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
    80  *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
    81  *   for the translucent lines. Two padding bytes may be inserted
    82  *   before each translucent line to keep them 32-bit aligned.
    83  *
    84  *   The end of the sequence is marked by a zero <skip>,<run> pair at the
    85  *   beginning of an opaque line.
    86  */
    87 
    88 #include "SDL_video.h"
    89 #include "SDL_sysvideo.h"
    90 #include "SDL_blit.h"
    91 #include "SDL_RLEaccel_c.h"
    92 #include "../cpuinfo/SDL_simd.h"
    93 
    94 #ifndef MIN
    95 #define MIN(a, b) ((a) < (b) ? (a) : (b))
    96 #endif
    97 
    98 #define PIXEL_COPY(to, from, len, bpp)          \
    99     SDL_memcpy(to, from, (size_t)(len) * (bpp))
   100 
   101 /*
   102  * Various colorkey blit methods, for opaque and per-surface alpha
   103  */
   104 
   105 #define OPAQUE_BLIT(to, from, length, bpp, alpha)   \
   106     PIXEL_COPY(to, from, length, bpp)
   107 
   108 /*
   109  * For 32bpp pixels on the form 0x00rrggbb:
   110  * If we treat the middle component separately, we can process the two
   111  * remaining in parallel. This is safe to do because of the gap to the left
   112  * of each component, so the bits from the multiplication don't collide.
   113  * This can be used for any RGB permutation of course.
   114  */
   115 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha)      \
   116     do {                                                    \
   117         int i;                                              \
   118         Uint32 *src = (Uint32 *)(from);                     \
   119         Uint32 *dst = (Uint32 *)(to);                       \
   120         for (i = 0; i < (int)(length); i++) {               \
   121             Uint32 s = *src++;                              \
   122             Uint32 d = *dst;                                \
   123             Uint32 s1 = s & 0xff00ff;                       \
   124             Uint32 d1 = d & 0xff00ff;                       \
   125             d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
   126             s &= 0xff00;                                    \
   127             d &= 0xff00;                                    \
   128             d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
   129             *dst++ = d1 | d;                                \
   130         }                                                   \
   131     } while (0)
   132 
   133 /*
   134  * For 16bpp pixels we can go a step further: put the middle component
   135  * in the high 16 bits of a 32 bit word, and process all three RGB
   136  * components at the same time. Since the smallest gap is here just
   137  * 5 bits, we have to scale alpha down to 5 bits as well.
   138  */
   139 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha)  \
   140     do {                                                \
   141         int i;                                          \
   142         Uint16 *src = (Uint16 *)(from);                 \
   143         Uint16 *dst = (Uint16 *)(to);                   \
   144         Uint32 ALPHA = alpha >> 3;                      \
   145         for(i = 0; i < (int)(length); i++) {            \
   146             Uint32 s = *src++;                          \
   147             Uint32 d = *dst;                            \
   148             s = (s | s << 16) & 0x07e0f81f;             \
   149             d = (d | d << 16) & 0x07e0f81f;             \
   150             d += (s - d) * ALPHA >> 5;                  \
   151             d &= 0x07e0f81f;                            \
   152             *dst++ = (Uint16)(d | d >> 16);             \
   153         }                                               \
   154     } while(0)
   155 
   156 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha)  \
   157     do {                                                \
   158         int i;                                          \
   159         Uint16 *src = (Uint16 *)(from);                 \
   160         Uint16 *dst = (Uint16 *)(to);                   \
   161         Uint32 ALPHA = alpha >> 3;                      \
   162         for(i = 0; i < (int)(length); i++) {            \
   163             Uint32 s = *src++;                          \
   164             Uint32 d = *dst;                            \
   165             s = (s | s << 16) & 0x03e07c1f;             \
   166             d = (d | d << 16) & 0x03e07c1f;             \
   167             d += (s - d) * ALPHA >> 5;                  \
   168             d &= 0x03e07c1f;                            \
   169             *dst++ = (Uint16)(d | d >> 16);             \
   170         }                                               \
   171     } while(0)
   172 
   173 /*
   174  * The general slow catch-all function, for remaining depths and formats
   175  */
   176 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)            \
   177     do {                                                        \
   178         int i;                                                  \
   179         Uint8 *src = from;                                      \
   180         Uint8 *dst = to;                                        \
   181         for (i = 0; i < (int)(length); i++) {                   \
   182             Uint32 s, d;                                        \
   183             unsigned rs, gs, bs, rd, gd, bd;                    \
   184             switch (bpp) {                                      \
   185             case 2:                                             \
   186                 s = *(Uint16 *)src;                             \
   187                 d = *(Uint16 *)dst;                             \
   188                 break;                                          \
   189             case 3:                                             \
   190                 if (SDL_BYTEORDER == SDL_BIG_ENDIAN) {          \
   191                     s = (src[0] << 16) | (src[1] << 8) | src[2]; \
   192                     d = (dst[0] << 16) | (dst[1] << 8) | dst[2]; \
   193                 } else {                                        \
   194                     s = (src[2] << 16) | (src[1] << 8) | src[0]; \
   195                     d = (dst[2] << 16) | (dst[1] << 8) | dst[0]; \
   196                 }                                               \
   197                 break;                                          \
   198             case 4:                                             \
   199                 s = *(Uint32 *)src;                             \
   200                 d = *(Uint32 *)dst;                             \
   201                 break;                                          \
   202             }                                                   \
   203             RGB_FROM_PIXEL(s, fmt, rs, gs, bs);                 \
   204             RGB_FROM_PIXEL(d, fmt, rd, gd, bd);                 \
   205             rd += (rs - rd) * alpha >> 8;                       \
   206             gd += (gs - gd) * alpha >> 8;                       \
   207             bd += (bs - bd) * alpha >> 8;                       \
   208             PIXEL_FROM_RGB(d, fmt, rd, gd, bd);                 \
   209             switch (bpp) {                                      \
   210             case 2:                                             \
   211                 *(Uint16 *)dst = (Uint16)d;                     \
   212                 break;                                          \
   213             case 3:                                             \
   214                 if (SDL_BYTEORDER == SDL_BIG_ENDIAN) {          \
   215                     dst[0] = (Uint8)(d >> 16);                  \
   216                     dst[1] = (Uint8)(d >> 8);                   \
   217                     dst[2] = (Uint8)(d);                        \
   218                 } else {                                        \
   219                     dst[0] = (Uint8)d;                          \
   220                     dst[1] = (Uint8)(d >> 8);                   \
   221                     dst[2] = (Uint8)(d >> 16);                  \
   222                 }                                               \
   223                 break;                                          \
   224             case 4:                                             \
   225                 *(Uint32 *)dst = d;                             \
   226                 break;                                          \
   227             }                                                   \
   228             src += bpp;                                         \
   229             dst += bpp;                                         \
   230         }                                                       \
   231     } while(0)
   232 
   233 /*
   234  * Special case: 50% alpha (alpha=128)
   235  * This is treated specially because it can be optimized very well, and
   236  * since it is good for many cases of semi-translucency.
   237  * The theory is to do all three components at the same time:
   238  * First zero the lowest bit of each component, which gives us room to
   239  * add them. Then shift right and add the sum of the lowest bits.
   240  */
   241 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)       \
   242     do {                                                        \
   243         int i;                                                  \
   244         Uint32 *src = (Uint32 *)(from);                         \
   245         Uint32 *dst = (Uint32 *)(to);                           \
   246         for(i = 0; i < (int)(length); i++) {                    \
   247             Uint32 s = *src++;                                  \
   248             Uint32 d = *dst;                                    \
   249             *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \
   250                  + (s & d & 0x00010101);                        \
   251         }                                                       \
   252     } while(0)
   253 
   254 /*
   255  * For 16bpp, we can actually blend two pixels in parallel, if we take
   256  * care to shift before we add, not after.
   257  */
   258 
   259 /* helper: blend a single 16 bit pixel at 50% */
   260 #define BLEND16_50(dst, src, mask)                              \
   261     do {                                                        \
   262         Uint32 s = *src++;                                      \
   263         Uint32 d = *dst;                                        \
   264         *dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +    \
   265                           (s & d & (~mask & 0xffff)));          \
   266     } while(0)
   267 
   268 /* basic 16bpp blender. mask is the pixels to keep when adding. */
   269 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)     \
   270     do {                                                        \
   271         unsigned n = (length);                                  \
   272         Uint16 *src = (Uint16 *)(from);                         \
   273         Uint16 *dst = (Uint16 *)(to);                           \
   274         if (((uintptr_t)src ^ (uintptr_t)dst) & 3) {            \
   275             /* source and destination not in phase, blit one by one */ \
   276             while (n--)                                         \
   277                 BLEND16_50(dst, src, mask);                     \
   278         } else {                                                \
   279             if ((uintptr_t)src & 3) {                           \
   280                 /* first odd pixel */                           \
   281                 BLEND16_50(dst, src, mask);                     \
   282                 n--;                                            \
   283             }                                                   \
   284             for (; n > 1; n -= 2) {                             \
   285                 Uint32 s = *(Uint32 *)src;                      \
   286                 Uint32 d = *(Uint32 *)dst;                      \
   287                 *(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1) \
   288                     + ((d & (mask | mask << 16)) >> 1)          \
   289                     + (s & d & (~(mask | mask << 16)));         \
   290                 src += 2;                                       \
   291                 dst += 2;                                       \
   292             }                                                   \
   293             if (n)                                              \
   294                 BLEND16_50(dst, src, mask); /* last odd pixel */ \
   295         }                                                       \
   296     } while(0)
   297 
   298 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)       \
   299     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de)
   300 
   301 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)       \
   302     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
   303 
   304 #define CHOOSE_BLIT(blitter, alpha, fmt)                        \
   305     do {                                                        \
   306         if (alpha == 255) {                                     \
   307             switch (fmt->BytesPerPixel) {                       \
   308             case 1: blitter(1, Uint8, OPAQUE_BLIT); break;      \
   309             case 2: blitter(2, Uint8, OPAQUE_BLIT); break;      \
   310             case 3: blitter(3, Uint8, OPAQUE_BLIT); break;      \
   311             case 4: blitter(4, Uint16, OPAQUE_BLIT); break;     \
   312             }                                                   \
   313         } else {                                                \
   314             switch (fmt->BytesPerPixel) {                       \
   315             case 1:                                             \
   316                 /* No 8bpp alpha blitting */                    \
   317                 break;                                          \
   318                                                                 \
   319             case 2:                                             \
   320                 switch (fmt->Rmask | fmt->Gmask | fmt->Bmask) { \
   321                 case 0xffff:                                    \
   322                     if (fmt->Gmask == 0x07e0                    \
   323                         || fmt->Rmask == 0x07e0                 \
   324                         || fmt->Bmask == 0x07e0) {              \
   325                         if (alpha == 128) {                     \
   326                             blitter(2, Uint8, ALPHA_BLIT16_565_50); \
   327                         } else {                                \
   328                             blitter(2, Uint8, ALPHA_BLIT16_565); \
   329                         }                                       \
   330                     } else                                      \
   331                         goto general16;                         \
   332                     break;                                      \
   333                                                                 \
   334                 case 0x7fff:                                    \
   335                     if (fmt->Gmask == 0x03e0                    \
   336                         || fmt->Rmask == 0x03e0                 \
   337                         || fmt->Bmask == 0x03e0) {              \
   338                         if (alpha == 128) {                     \
   339                             blitter(2, Uint8, ALPHA_BLIT16_555_50); \
   340                         } else {                                \
   341                             blitter(2, Uint8, ALPHA_BLIT16_555); \
   342                         }                                       \
   343                         break;                                  \
   344                     } else                                      \
   345                         goto general16;                         \
   346                     break;                                      \
   347                                                                 \
   348                 default:                                        \
   349     general16:                                                  \
   350                     blitter(2, Uint8, ALPHA_BLIT_ANY);          \
   351                 }                                               \
   352                 break;                                          \
   353                                                                 \
   354             case 3:                                             \
   355                 blitter(3, Uint8, ALPHA_BLIT_ANY);              \
   356                 break;                                          \
   357                                                                 \
   358             case 4:                                             \
   359                 if ((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \
   360                     && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \
   361                     || fmt->Bmask == 0xff00)) {                 \
   362                     if (alpha == 128) {                         \
   363                         blitter(4, Uint16, ALPHA_BLIT32_888_50); \
   364                     } else {                                    \
   365                         blitter(4, Uint16, ALPHA_BLIT32_888);   \
   366                     }                                           \
   367                 } else                                          \
   368                     blitter(4, Uint16, ALPHA_BLIT_ANY);         \
   369                 break;                                          \
   370             }                                                   \
   371         }                                                       \
   372     } while(0)
   373 
   374 /*
   375  * Set a pixel value using the given format, except that the alpha value is
   376  * placed in the top byte. This is the format used for RLE with alpha.
   377  */
   378 #define RLEPIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)                      \
   379 {                                                                       \
   380     Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|                             \
   381         ((g>>fmt->Gloss)<<fmt->Gshift)|                                 \
   382         ((b>>fmt->Bloss)<<fmt->Bshift)|                                 \
   383         (a<<24);                                                        \
   384 }
   385 
   386 /*
   387  * This takes care of the case when the surface is clipped on the left and/or
   388  * right. Top clipping has already been taken care of.
   389  */
   390 static void
   391 RLEClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
   392             Uint8 * dstbuf, SDL_Rect * srcrect, unsigned alpha)
   393 {
   394     SDL_PixelFormat *fmt = surf_dst->format;
   395 
   396 #define RLECLIPBLIT(bpp, Type, do_blit)                         \
   397     do {                                                        \
   398         int linecount = srcrect->h;                             \
   399         int ofs = 0;                                            \
   400         int left = srcrect->x;                                  \
   401         int right = left + srcrect->w;                          \
   402         dstbuf -= left * bpp;                                   \
   403         for (;;) {                                              \
   404             int run;                                            \
   405             ofs += *(Type *)srcbuf;                             \
   406             run = ((Type *)srcbuf)[1];                          \
   407             srcbuf += 2 * sizeof(Type);                         \
   408             if (run) {                                          \
   409                 /* clip to left and right borders */            \
   410                 if (ofs < right) {                              \
   411                     int start = 0;                              \
   412                     int len = run;                              \
   413                     int startcol;                               \
   414                     if (left - ofs > 0) {                       \
   415                         start = left - ofs;                     \
   416                         len -= start;                           \
   417                         if (len <= 0)                           \
   418                             goto nocopy ## bpp ## do_blit;      \
   419                     }                                           \
   420                     startcol = ofs + start;                     \
   421                     if (len > right - startcol)                 \
   422                         len = right - startcol;                 \
   423                     do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
   424                         len, bpp, alpha);                       \
   425                 }                                               \
   426     nocopy ## bpp ## do_blit:                                   \
   427                 srcbuf += run * bpp;                            \
   428                 ofs += run;                                     \
   429             } else if (!ofs)                                    \
   430                 break;                                          \
   431                                                                 \
   432             if (ofs == w) {                                     \
   433                 ofs = 0;                                        \
   434                 dstbuf += surf_dst->pitch;                      \
   435                 if (!--linecount)                               \
   436                     break;                                      \
   437             }                                                   \
   438         }                                                       \
   439     } while(0)
   440 
   441     CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
   442 
   443 #undef RLECLIPBLIT
   444 
   445 }
   446 
   447 
   448 /* blit a colorkeyed RLE surface */
   449 int SDLCALL
   450 SDL_RLEBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
   451             SDL_Surface * surf_dst, SDL_Rect * dstrect)
   452 {
   453     Uint8 *dstbuf;
   454     Uint8 *srcbuf;
   455     int x, y;
   456     int w = surf_src->w;
   457     unsigned alpha;
   458 
   459     /* Lock the destination if necessary */
   460     if (SDL_MUSTLOCK(surf_dst)) {
   461         if (SDL_LockSurface(surf_dst) < 0) {
   462             return (-1);
   463         }
   464     }
   465 
   466     /* Set up the source and destination pointers */
   467     x = dstrect->x;
   468     y = dstrect->y;
   469     dstbuf = (Uint8 *) surf_dst->pixels
   470         + y * surf_dst->pitch + x * surf_src->format->BytesPerPixel;
   471     srcbuf = (Uint8 *) surf_src->map->data;
   472 
   473     {
   474         /* skip lines at the top if necessary */
   475         int vskip = srcrect->y;
   476         int ofs = 0;
   477         if (vskip) {
   478 
   479 #define RLESKIP(bpp, Type)          \
   480         for(;;) {           \
   481             int run;            \
   482             ofs += *(Type *)srcbuf; \
   483             run = ((Type *)srcbuf)[1];  \
   484             srcbuf += sizeof(Type) * 2; \
   485             if(run) {           \
   486             srcbuf += run * bpp;    \
   487             ofs += run;     \
   488             } else if(!ofs)     \
   489             goto done;      \
   490             if(ofs == w) {      \
   491             ofs = 0;        \
   492             if(!--vskip)        \
   493                 break;      \
   494             }               \
   495         }
   496 
   497             switch (surf_src->format->BytesPerPixel) {
   498             case 1:
   499                 RLESKIP(1, Uint8);
   500                 break;
   501             case 2:
   502                 RLESKIP(2, Uint8);
   503                 break;
   504             case 3:
   505                 RLESKIP(3, Uint8);
   506                 break;
   507             case 4:
   508                 RLESKIP(4, Uint16);
   509                 break;
   510             }
   511 
   512 #undef RLESKIP
   513 
   514         }
   515     }
   516 
   517     alpha = surf_src->map->info.a;
   518     /* if left or right edge clipping needed, call clip blit */
   519     if (srcrect->x || srcrect->w != surf_src->w) {
   520         RLEClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect, alpha);
   521     } else {
   522         SDL_PixelFormat *fmt = surf_src->format;
   523 
   524 #define RLEBLIT(bpp, Type, do_blit)                       \
   525         do {                                  \
   526         int linecount = srcrect->h;                   \
   527         int ofs = 0;                              \
   528         for(;;) {                             \
   529             unsigned run;                         \
   530             ofs += *(Type *)srcbuf;                   \
   531             run = ((Type *)srcbuf)[1];                    \
   532             srcbuf += 2 * sizeof(Type);                   \
   533             if(run) {                             \
   534             do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
   535             srcbuf += run * bpp;                      \
   536             ofs += run;                       \
   537             } else if(!ofs)                       \
   538             break;                            \
   539             if(ofs == w) {                        \
   540             ofs = 0;                          \
   541             dstbuf += surf_dst->pitch;                     \
   542             if(!--linecount)                      \
   543                 break;                        \
   544             }                                 \
   545         }                                 \
   546         } while(0)
   547 
   548         CHOOSE_BLIT(RLEBLIT, alpha, fmt);
   549 
   550 #undef RLEBLIT
   551     }
   552 
   553   done:
   554     /* Unlock the destination if necessary */
   555     if (SDL_MUSTLOCK(surf_dst)) {
   556         SDL_UnlockSurface(surf_dst);
   557     }
   558     return (0);
   559 }
   560 
   561 #undef OPAQUE_BLIT
   562 
   563 /*
   564  * Per-pixel blitting macros for translucent pixels:
   565  * These use the same techniques as the per-surface blitting macros
   566  */
   567 
   568 /*
   569  * For 32bpp pixels, we have made sure the alpha is stored in the top
   570  * 8 bits, so proceed as usual
   571  */
   572 #define BLIT_TRANSL_888(src, dst)               \
   573     do {                            \
   574         Uint32 s = src;                     \
   575     Uint32 d = dst;                     \
   576     unsigned alpha = s >> 24;               \
   577     Uint32 s1 = s & 0xff00ff;               \
   578     Uint32 d1 = d & 0xff00ff;               \
   579     d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;    \
   580     s &= 0xff00;                        \
   581     d &= 0xff00;                        \
   582     d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
   583     dst = d1 | d | 0xff000000;              \
   584     } while(0)
   585 
   586 /*
   587  * For 16bpp pixels, we have stored the 5 most significant alpha bits in
   588  * bits 5-10. As before, we can process all 3 RGB components at the same time.
   589  */
   590 #define BLIT_TRANSL_565(src, dst)       \
   591     do {                    \
   592     Uint32 s = src;             \
   593     Uint32 d = dst;             \
   594     unsigned alpha = (s & 0x3e0) >> 5;  \
   595     s &= 0x07e0f81f;            \
   596     d = (d | d << 16) & 0x07e0f81f;     \
   597     d += (s - d) * alpha >> 5;      \
   598     d &= 0x07e0f81f;            \
   599     dst = (Uint16)(d | d >> 16);            \
   600     } while(0)
   601 
   602 #define BLIT_TRANSL_555(src, dst)       \
   603     do {                    \
   604     Uint32 s = src;             \
   605     Uint32 d = dst;             \
   606     unsigned alpha = (s & 0x3e0) >> 5;  \
   607     s &= 0x03e07c1f;            \
   608     d = (d | d << 16) & 0x03e07c1f;     \
   609     d += (s - d) * alpha >> 5;      \
   610     d &= 0x03e07c1f;            \
   611     dst = (Uint16)(d | d >> 16);            \
   612     } while(0)
   613 
   614 /* used to save the destination format in the encoding. Designed to be
   615    macro-compatible with SDL_PixelFormat but without the unneeded fields */
   616 typedef struct
   617 {
   618     Uint8 BytesPerPixel;
   619     Uint8 padding[3];
   620     Uint32 Rmask;
   621     Uint32 Gmask;
   622     Uint32 Bmask;
   623     Uint32 Amask;
   624     Uint8 Rloss;
   625     Uint8 Gloss;
   626     Uint8 Bloss;
   627     Uint8 Aloss;
   628     Uint8 Rshift;
   629     Uint8 Gshift;
   630     Uint8 Bshift;
   631     Uint8 Ashift;
   632 } RLEDestFormat;
   633 
   634 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
   635 static void
   636 RLEAlphaClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
   637                  Uint8 * dstbuf, SDL_Rect * srcrect)
   638 {
   639     SDL_PixelFormat *df = surf_dst->format;
   640     /*
   641      * clipped blitter: Ptype is the destination pixel type,
   642      * Ctype the translucent count type, and do_blend the macro
   643      * to blend one pixel.
   644      */
   645 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)              \
   646     do {                                  \
   647     int linecount = srcrect->h;                   \
   648     int left = srcrect->x;                        \
   649     int right = left + srcrect->w;                    \
   650     dstbuf -= left * sizeof(Ptype);                   \
   651     do {                                  \
   652         int ofs = 0;                          \
   653         /* blit opaque pixels on one line */              \
   654         do {                              \
   655         unsigned run;                         \
   656         ofs += ((Ctype *)srcbuf)[0];                  \
   657         run = ((Ctype *)srcbuf)[1];               \
   658         srcbuf += 2 * sizeof(Ctype);                  \
   659         if(run) {                         \
   660             /* clip to left and right borders */          \
   661             int cofs = ofs;                   \
   662             int crun = run;                   \
   663             if(left - cofs > 0) {                 \
   664             crun -= left - cofs;                  \
   665             cofs = left;                      \
   666             }                             \
   667             if(crun > right - cofs)               \
   668             crun = right - cofs;                  \
   669             if(crun > 0)                      \
   670             PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),     \
   671                    srcbuf + (cofs - ofs) * sizeof(Ptype), \
   672                    (unsigned)crun, sizeof(Ptype));    \
   673             srcbuf += run * sizeof(Ptype);            \
   674             ofs += run;                       \
   675         } else if(!ofs)                       \
   676             return;                       \
   677         } while(ofs < w);                         \
   678         /* skip padding if necessary */               \
   679         if(sizeof(Ptype) == 2)                    \
   680         srcbuf += (uintptr_t)srcbuf & 2;              \
   681         /* blit translucent pixels on the same line */        \
   682         ofs = 0;                              \
   683         do {                              \
   684         unsigned run;                         \
   685         ofs += ((Uint16 *)srcbuf)[0];                 \
   686         run = ((Uint16 *)srcbuf)[1];                  \
   687         srcbuf += 4;                          \
   688         if(run) {                         \
   689             /* clip to left and right borders */          \
   690             int cofs = ofs;                   \
   691             int crun = run;                   \
   692             if(left - cofs > 0) {                 \
   693             crun -= left - cofs;                  \
   694             cofs = left;                      \
   695             }                             \
   696             if(crun > right - cofs)               \
   697             crun = right - cofs;                  \
   698             if(crun > 0) {                    \
   699             Ptype *dst = (Ptype *)dstbuf + cofs;          \
   700             Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);    \
   701             int i;                        \
   702             for(i = 0; i < crun; i++)             \
   703                 do_blend(src[i], dst[i]);             \
   704             }                             \
   705             srcbuf += run * 4;                    \
   706             ofs += run;                       \
   707         }                             \
   708         } while(ofs < w);                         \
   709         dstbuf += surf_dst->pitch;                     \
   710     } while(--linecount);                         \
   711     } while(0)
   712 
   713     switch (df->BytesPerPixel) {
   714     case 2:
   715         if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0)
   716             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
   717         else
   718             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
   719         break;
   720     case 4:
   721         RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
   722         break;
   723     }
   724 }
   725 
   726 /* blit a pixel-alpha RLE surface */
   727 int SDLCALL
   728 SDL_RLEAlphaBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
   729                  SDL_Surface * surf_dst, SDL_Rect * dstrect)
   730 {
   731     int x, y;
   732     int w = surf_src->w;
   733     Uint8 *srcbuf, *dstbuf;
   734     SDL_PixelFormat *df = surf_dst->format;
   735 
   736     /* Lock the destination if necessary */
   737     if (SDL_MUSTLOCK(surf_dst)) {
   738         if (SDL_LockSurface(surf_dst) < 0) {
   739             return -1;
   740         }
   741     }
   742 
   743     x = dstrect->x;
   744     y = dstrect->y;
   745     dstbuf = (Uint8 *) surf_dst->pixels + y * surf_dst->pitch + x * df->BytesPerPixel;
   746     srcbuf = (Uint8 *) surf_src->map->data + sizeof(RLEDestFormat);
   747 
   748     {
   749         /* skip lines at the top if necessary */
   750         int vskip = srcrect->y;
   751         if (vskip) {
   752             int ofs;
   753             if (df->BytesPerPixel == 2) {
   754                 /* the 16/32 interleaved format */
   755                 do {
   756                     /* skip opaque line */
   757                     ofs = 0;
   758                     do {
   759                         int run;
   760                         ofs += srcbuf[0];
   761                         run = srcbuf[1];
   762                         srcbuf += 2;
   763                         if (run) {
   764                             srcbuf += 2 * run;
   765                             ofs += run;
   766                         } else if (!ofs)
   767                             goto done;
   768                     } while (ofs < w);
   769 
   770                     /* skip padding */
   771                     srcbuf += (uintptr_t) srcbuf & 2;
   772 
   773                     /* skip translucent line */
   774                     ofs = 0;
   775                     do {
   776                         int run;
   777                         ofs += ((Uint16 *) srcbuf)[0];
   778                         run = ((Uint16 *) srcbuf)[1];
   779                         srcbuf += 4 * (run + 1);
   780                         ofs += run;
   781                     } while (ofs < w);
   782                 } while (--vskip);
   783             } else {
   784                 /* the 32/32 interleaved format */
   785                 vskip <<= 1;    /* opaque and translucent have same format */
   786                 do {
   787                     ofs = 0;
   788                     do {
   789                         int run;
   790                         ofs += ((Uint16 *) srcbuf)[0];
   791                         run = ((Uint16 *) srcbuf)[1];
   792                         srcbuf += 4;
   793                         if (run) {
   794                             srcbuf += 4 * run;
   795                             ofs += run;
   796                         } else if (!ofs)
   797                             goto done;
   798                     } while (ofs < w);
   799                 } while (--vskip);
   800             }
   801         }
   802     }
   803 
   804     /* if left or right edge clipping needed, call clip blit */
   805     if (srcrect->x || srcrect->w != surf_src->w) {
   806         RLEAlphaClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect);
   807     } else {
   808 
   809         /*
   810          * non-clipped blitter. Ptype is the destination pixel type,
   811          * Ctype the translucent count type, and do_blend the
   812          * macro to blend one pixel.
   813          */
   814 #define RLEALPHABLIT(Ptype, Ctype, do_blend)                 \
   815     do {                                 \
   816         int linecount = srcrect->h;                  \
   817         do {                             \
   818         int ofs = 0;                         \
   819         /* blit opaque pixels on one line */             \
   820         do {                             \
   821             unsigned run;                    \
   822             ofs += ((Ctype *)srcbuf)[0];             \
   823             run = ((Ctype *)srcbuf)[1];              \
   824             srcbuf += 2 * sizeof(Ctype);             \
   825             if(run) {                        \
   826             PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
   827                    run, sizeof(Ptype));          \
   828             srcbuf += run * sizeof(Ptype);           \
   829             ofs += run;                  \
   830             } else if(!ofs)                  \
   831             goto done;                   \
   832         } while(ofs < w);                    \
   833         /* skip padding if necessary */              \
   834         if(sizeof(Ptype) == 2)                   \
   835             srcbuf += (uintptr_t)srcbuf & 2;             \
   836         /* blit translucent pixels on the same line */       \
   837         ofs = 0;                         \
   838         do {                             \
   839             unsigned run;                    \
   840             ofs += ((Uint16 *)srcbuf)[0];            \
   841             run = ((Uint16 *)srcbuf)[1];             \
   842             srcbuf += 4;                     \
   843             if(run) {                        \
   844             Ptype *dst = (Ptype *)dstbuf + ofs;      \
   845             unsigned i;                  \
   846             for(i = 0; i < run; i++) {           \
   847                 Uint32 src = *(Uint32 *)srcbuf;      \
   848                 do_blend(src, *dst);             \
   849                 srcbuf += 4;                 \
   850                 dst++;                   \
   851             }                        \
   852             ofs += run;                  \
   853             }                            \
   854         } while(ofs < w);                    \
   855         dstbuf += surf_dst->pitch;                    \
   856         } while(--linecount);                    \
   857     } while(0)
   858 
   859         switch (df->BytesPerPixel) {
   860         case 2:
   861             if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0
   862                 || df->Bmask == 0x07e0)
   863                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
   864             else
   865                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
   866             break;
   867         case 4:
   868             RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
   869             break;
   870         }
   871     }
   872 
   873   done:
   874     /* Unlock the destination if necessary */
   875     if (SDL_MUSTLOCK(surf_dst)) {
   876         SDL_UnlockSurface(surf_dst);
   877     }
   878     return 0;
   879 }
   880 
   881 /*
   882  * Auxiliary functions:
   883  * The encoding functions take 32bpp rgb + a, and
   884  * return the number of bytes copied to the destination.
   885  * The decoding functions copy to 32bpp rgb + a, and
   886  * return the number of bytes copied from the source.
   887  * These are only used in the encoder and un-RLE code and are therefore not
   888  * highly optimised.
   889  */
   890 
   891 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
   892 static int
   893 copy_opaque_16(void *dst, Uint32 * src, int n,
   894                SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   895 {
   896     int i;
   897     Uint16 *d = dst;
   898     for (i = 0; i < n; i++) {
   899         unsigned r, g, b;
   900         RGB_FROM_PIXEL(*src, sfmt, r, g, b);
   901         PIXEL_FROM_RGB(*d, dfmt, r, g, b);
   902         src++;
   903         d++;
   904     }
   905     return n * 2;
   906 }
   907 
   908 /* decode opaque pixels from 16bpp to 32bpp rgb + a */
   909 static int
   910 uncopy_opaque_16(Uint32 * dst, void *src, int n,
   911                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   912 {
   913     int i;
   914     Uint16 *s = src;
   915     unsigned alpha = dfmt->Amask ? 255 : 0;
   916     for (i = 0; i < n; i++) {
   917         unsigned r, g, b;
   918         RGB_FROM_PIXEL(*s, sfmt, r, g, b);
   919         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
   920         s++;
   921         dst++;
   922     }
   923     return n * 2;
   924 }
   925 
   926 
   927 
   928 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
   929 static int
   930 copy_transl_565(void *dst, Uint32 * src, int n,
   931                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   932 {
   933     int i;
   934     Uint32 *d = dst;
   935     for (i = 0; i < n; i++) {
   936         unsigned r, g, b, a;
   937         Uint16 pix;
   938         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   939         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   940         *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
   941         src++;
   942         d++;
   943     }
   944     return n * 4;
   945 }
   946 
   947 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
   948 static int
   949 copy_transl_555(void *dst, Uint32 * src, int n,
   950                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   951 {
   952     int i;
   953     Uint32 *d = dst;
   954     for (i = 0; i < n; i++) {
   955         unsigned r, g, b, a;
   956         Uint16 pix;
   957         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   958         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   959         *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
   960         src++;
   961         d++;
   962     }
   963     return n * 4;
   964 }
   965 
   966 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
   967 static int
   968 uncopy_transl_16(Uint32 * dst, void *src, int n,
   969                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   970 {
   971     int i;
   972     Uint32 *s = src;
   973     for (i = 0; i < n; i++) {
   974         unsigned r, g, b, a;
   975         Uint32 pix = *s++;
   976         a = (pix & 0x3e0) >> 2;
   977         pix = (pix & ~0x3e0) | pix >> 16;
   978         RGB_FROM_PIXEL(pix, sfmt, r, g, b);
   979         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
   980         dst++;
   981     }
   982     return n * 4;
   983 }
   984 
   985 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   986 static int
   987 copy_32(void *dst, Uint32 * src, int n,
   988         SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   989 {
   990     int i;
   991     Uint32 *d = dst;
   992     for (i = 0; i < n; i++) {
   993         unsigned r, g, b, a;
   994         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   995         RLEPIXEL_FROM_RGBA(*d, dfmt, r, g, b, a);
   996         d++;
   997         src++;
   998     }
   999     return n * 4;
  1000 }
  1001 
  1002 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
  1003 static int
  1004 uncopy_32(Uint32 * dst, void *src, int n,
  1005           RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
  1006 {
  1007     int i;
  1008     Uint32 *s = src;
  1009     for (i = 0; i < n; i++) {
  1010         unsigned r, g, b, a;
  1011         Uint32 pixel = *s++;
  1012         RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
  1013         a = pixel >> 24;
  1014         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
  1015         dst++;
  1016     }
  1017     return n * 4;
  1018 }
  1019 
  1020 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
  1021 
  1022 #define ISTRANSL(pixel, fmt)    \
  1023     ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
  1024 
  1025 /* convert surface to be quickly alpha-blittable onto dest, if possible */
  1026 static int
  1027 RLEAlphaSurface(SDL_Surface * surface)
  1028 {
  1029     SDL_Surface *dest;
  1030     SDL_PixelFormat *df;
  1031     int maxsize = 0;
  1032     int max_opaque_run;
  1033     int max_transl_run = 65535;
  1034     unsigned masksum;
  1035     Uint8 *rlebuf, *dst;
  1036     int (*copy_opaque) (void *, Uint32 *, int,
  1037                         SDL_PixelFormat *, SDL_PixelFormat *);
  1038     int (*copy_transl) (void *, Uint32 *, int,
  1039                         SDL_PixelFormat *, SDL_PixelFormat *);
  1040 
  1041     dest = surface->map->dst;
  1042     if (!dest)
  1043         return -1;
  1044     df = dest->format;
  1045     if (surface->format->BitsPerPixel != 32)
  1046         return -1;              /* only 32bpp source supported */
  1047 
  1048     /* find out whether the destination is one we support,
  1049        and determine the max size of the encoded result */
  1050     masksum = df->Rmask | df->Gmask | df->Bmask;
  1051     switch (df->BytesPerPixel) {
  1052     case 2:
  1053         /* 16bpp: only support 565 and 555 formats */
  1054         switch (masksum) {
  1055         case 0xffff:
  1056             if (df->Gmask == 0x07e0
  1057                 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
  1058                 copy_opaque = copy_opaque_16;
  1059                 copy_transl = copy_transl_565;
  1060             } else
  1061                 return -1;
  1062             break;
  1063         case 0x7fff:
  1064             if (df->Gmask == 0x03e0
  1065                 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
  1066                 copy_opaque = copy_opaque_16;
  1067                 copy_transl = copy_transl_555;
  1068             } else
  1069                 return -1;
  1070             break;
  1071         default:
  1072             return -1;
  1073         }
  1074         max_opaque_run = 255;   /* runs stored as bytes */
  1075 
  1076         /* worst case is alternating opaque and translucent pixels,
  1077            with room for alignment padding between lines */
  1078         maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
  1079         break;
  1080     case 4:
  1081         if (masksum != 0x00ffffff)
  1082             return -1;          /* requires unused high byte */
  1083         copy_opaque = copy_32;
  1084         copy_transl = copy_32;
  1085         max_opaque_run = 255;   /* runs stored as short ints */
  1086 
  1087         /* worst case is alternating opaque and translucent pixels */
  1088         maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
  1089         break;
  1090     default:
  1091         return -1;              /* anything else unsupported right now */
  1092     }
  1093 
  1094     maxsize += sizeof(RLEDestFormat);
  1095     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1096     if (!rlebuf) {
  1097         return SDL_OutOfMemory();
  1098     }
  1099     {
  1100         /* save the destination format so we can undo the encoding later */
  1101         RLEDestFormat *r = (RLEDestFormat *) rlebuf;
  1102         r->BytesPerPixel = df->BytesPerPixel;
  1103         r->Rmask = df->Rmask;
  1104         r->Gmask = df->Gmask;
  1105         r->Bmask = df->Bmask;
  1106         r->Amask = df->Amask;
  1107         r->Rloss = df->Rloss;
  1108         r->Gloss = df->Gloss;
  1109         r->Bloss = df->Bloss;
  1110         r->Aloss = df->Aloss;
  1111         r->Rshift = df->Rshift;
  1112         r->Gshift = df->Gshift;
  1113         r->Bshift = df->Bshift;
  1114         r->Ashift = df->Ashift;
  1115     }
  1116     dst = rlebuf + sizeof(RLEDestFormat);
  1117 
  1118     /* Do the actual encoding */
  1119     {
  1120         int x, y;
  1121         int h = surface->h, w = surface->w;
  1122         SDL_PixelFormat *sf = surface->format;
  1123         Uint32 *src = (Uint32 *) surface->pixels;
  1124         Uint8 *lastline = dst;  /* end of last non-blank line */
  1125 
  1126         /* opaque counts are 8 or 16 bits, depending on target depth */
  1127 #define ADD_OPAQUE_COUNTS(n, m)         \
  1128     if(df->BytesPerPixel == 4) {        \
  1129         ((Uint16 *)dst)[0] = n;     \
  1130         ((Uint16 *)dst)[1] = m;     \
  1131         dst += 4;               \
  1132     } else {                \
  1133         dst[0] = n;             \
  1134         dst[1] = m;             \
  1135         dst += 2;               \
  1136     }
  1137 
  1138         /* translucent counts are always 16 bit */
  1139 #define ADD_TRANSL_COUNTS(n, m)     \
  1140     (((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
  1141 
  1142         for (y = 0; y < h; y++) {
  1143             int runstart, skipstart;
  1144             int blankline = 0;
  1145             /* First encode all opaque pixels of a scan line */
  1146             x = 0;
  1147             do {
  1148                 int run, skip, len;
  1149                 skipstart = x;
  1150                 while (x < w && !ISOPAQUE(src[x], sf))
  1151                     x++;
  1152                 runstart = x;
  1153                 while (x < w && ISOPAQUE(src[x], sf))
  1154                     x++;
  1155                 skip = runstart - skipstart;
  1156                 if (skip == w)
  1157                     blankline = 1;
  1158                 run = x - runstart;
  1159                 while (skip > max_opaque_run) {
  1160                     ADD_OPAQUE_COUNTS(max_opaque_run, 0);
  1161                     skip -= max_opaque_run;
  1162                 }
  1163                 len = MIN(run, max_opaque_run);
  1164                 ADD_OPAQUE_COUNTS(skip, len);
  1165                 dst += copy_opaque(dst, src + runstart, len, sf, df);
  1166                 runstart += len;
  1167                 run -= len;
  1168                 while (run) {
  1169                     len = MIN(run, max_opaque_run);
  1170                     ADD_OPAQUE_COUNTS(0, len);
  1171                     dst += copy_opaque(dst, src + runstart, len, sf, df);
  1172                     runstart += len;
  1173                     run -= len;
  1174                 }
  1175             } while (x < w);
  1176 
  1177             /* Make sure the next output address is 32-bit aligned */
  1178             dst += (uintptr_t) dst & 2;
  1179 
  1180             /* Next, encode all translucent pixels of the same scan line */
  1181             x = 0;
  1182             do {
  1183                 int run, skip, len;
  1184                 skipstart = x;
  1185                 while (x < w && !ISTRANSL(src[x], sf))
  1186                     x++;
  1187                 runstart = x;
  1188                 while (x < w && ISTRANSL(src[x], sf))
  1189                     x++;
  1190                 skip = runstart - skipstart;
  1191                 blankline &= (skip == w);
  1192                 run = x - runstart;
  1193                 while (skip > max_transl_run) {
  1194                     ADD_TRANSL_COUNTS(max_transl_run, 0);
  1195                     skip -= max_transl_run;
  1196                 }
  1197                 len = MIN(run, max_transl_run);
  1198                 ADD_TRANSL_COUNTS(skip, len);
  1199                 dst += copy_transl(dst, src + runstart, len, sf, df);
  1200                 runstart += len;
  1201                 run -= len;
  1202                 while (run) {
  1203                     len = MIN(run, max_transl_run);
  1204                     ADD_TRANSL_COUNTS(0, len);
  1205                     dst += copy_transl(dst, src + runstart, len, sf, df);
  1206                     runstart += len;
  1207                     run -= len;
  1208                 }
  1209                 if (!blankline)
  1210                     lastline = dst;
  1211             } while (x < w);
  1212 
  1213             src += surface->pitch >> 2;
  1214         }
  1215         dst = lastline;         /* back up past trailing blank lines */
  1216         ADD_OPAQUE_COUNTS(0, 0);
  1217     }
  1218 
  1219 #undef ADD_OPAQUE_COUNTS
  1220 #undef ADD_TRANSL_COUNTS
  1221 
  1222     /* Now that we have it encoded, release the original pixels */
  1223     if (!(surface->flags & SDL_PREALLOC)) {
  1224         SDL_SIMDFree(surface->pixels);
  1225         surface->pixels = NULL;
  1226         surface->flags &= ~SDL_SIMD_ALIGNED;
  1227     }
  1228 
  1229     /* realloc the buffer to release unused memory */
  1230     {
  1231         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1232         if (!p)
  1233             p = rlebuf;
  1234         surface->map->data = p;
  1235     }
  1236 
  1237     return 0;
  1238 }
  1239 
  1240 static Uint32
  1241 getpix_8(Uint8 * srcbuf)
  1242 {
  1243     return *srcbuf;
  1244 }
  1245 
  1246 static Uint32
  1247 getpix_16(Uint8 * srcbuf)
  1248 {
  1249     return *(Uint16 *) srcbuf;
  1250 }
  1251 
  1252 static Uint32
  1253 getpix_24(Uint8 * srcbuf)
  1254 {
  1255 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  1256     return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
  1257 #else
  1258     return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
  1259 #endif
  1260 }
  1261 
  1262 static Uint32
  1263 getpix_32(Uint8 * srcbuf)
  1264 {
  1265     return *(Uint32 *) srcbuf;
  1266 }
  1267 
  1268 typedef Uint32(*getpix_func) (Uint8 *);
  1269 
  1270 static const getpix_func getpixes[4] = {
  1271     getpix_8, getpix_16, getpix_24, getpix_32
  1272 };
  1273 
  1274 static int
  1275 RLEColorkeySurface(SDL_Surface * surface)
  1276 {
  1277     Uint8 *rlebuf, *dst;
  1278     int maxn;
  1279     int y;
  1280     Uint8 *srcbuf, *lastline;
  1281     int maxsize = 0;
  1282     const int bpp = surface->format->BytesPerPixel;
  1283     getpix_func getpix;
  1284     Uint32 ckey, rgbmask;
  1285     int w, h;
  1286 
  1287     /* calculate the worst case size for the compressed surface */
  1288     switch (bpp) {
  1289     case 1:
  1290         /* worst case is alternating opaque and transparent pixels,
  1291            starting with an opaque pixel */
  1292         maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
  1293         break;
  1294     case 2:
  1295     case 3:
  1296         /* worst case is solid runs, at most 255 pixels wide */
  1297         maxsize = surface->h * (2 * (surface->w / 255 + 1)
  1298                                 + surface->w * bpp) + 2;
  1299         break;
  1300     case 4:
  1301         /* worst case is solid runs, at most 65535 pixels wide */
  1302         maxsize = surface->h * (4 * (surface->w / 65535 + 1)
  1303                                 + surface->w * 4) + 4;
  1304         break;
  1305 
  1306     default:
  1307         return -1;
  1308     }
  1309 
  1310     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1311     if (rlebuf == NULL) {
  1312         return SDL_OutOfMemory();
  1313     }
  1314 
  1315     /* Set up the conversion */
  1316     srcbuf = (Uint8 *) surface->pixels;
  1317     maxn = bpp == 4 ? 65535 : 255;
  1318     dst = rlebuf;
  1319     rgbmask = ~surface->format->Amask;
  1320     ckey = surface->map->info.colorkey & rgbmask;
  1321     lastline = dst;
  1322     getpix = getpixes[bpp - 1];
  1323     w = surface->w;
  1324     h = surface->h;
  1325 
  1326 #define ADD_COUNTS(n, m)            \
  1327     if(bpp == 4) {              \
  1328         ((Uint16 *)dst)[0] = n;     \
  1329         ((Uint16 *)dst)[1] = m;     \
  1330         dst += 4;               \
  1331     } else {                \
  1332         dst[0] = n;             \
  1333         dst[1] = m;             \
  1334         dst += 2;               \
  1335     }
  1336 
  1337     for (y = 0; y < h; y++) {
  1338         int x = 0;
  1339         int blankline = 0;
  1340         do {
  1341             int run, skip, len;
  1342             int runstart;
  1343             int skipstart = x;
  1344 
  1345             /* find run of transparent, then opaque pixels */
  1346             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
  1347                 x++;
  1348             runstart = x;
  1349             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
  1350                 x++;
  1351             skip = runstart - skipstart;
  1352             if (skip == w)
  1353                 blankline = 1;
  1354             run = x - runstart;
  1355 
  1356             /* encode segment */
  1357             while (skip > maxn) {
  1358                 ADD_COUNTS(maxn, 0);
  1359                 skip -= maxn;
  1360             }
  1361             len = MIN(run, maxn);
  1362             ADD_COUNTS(skip, len);
  1363             SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1364             dst += len * bpp;
  1365             run -= len;
  1366             runstart += len;
  1367             while (run) {
  1368                 len = MIN(run, maxn);
  1369                 ADD_COUNTS(0, len);
  1370                 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1371                 dst += len * bpp;
  1372                 runstart += len;
  1373                 run -= len;
  1374             }
  1375             if (!blankline)
  1376                 lastline = dst;
  1377         } while (x < w);
  1378 
  1379         srcbuf += surface->pitch;
  1380     }
  1381     dst = lastline;             /* back up bast trailing blank lines */
  1382     ADD_COUNTS(0, 0);
  1383 
  1384 #undef ADD_COUNTS
  1385 
  1386     /* Now that we have it encoded, release the original pixels */
  1387     if (!(surface->flags & SDL_PREALLOC)) {
  1388         SDL_SIMDFree(surface->pixels);
  1389         surface->pixels = NULL;
  1390         surface->flags &= ~SDL_SIMD_ALIGNED;
  1391     }
  1392 
  1393     /* realloc the buffer to release unused memory */
  1394     {
  1395         /* If realloc returns NULL, the original block is left intact */
  1396         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1397         if (!p)
  1398             p = rlebuf;
  1399         surface->map->data = p;
  1400     }
  1401 
  1402     return 0;
  1403 }
  1404 
  1405 int
  1406 SDL_RLESurface(SDL_Surface * surface)
  1407 {
  1408     int flags;
  1409 
  1410     /* Clear any previous RLE conversion */
  1411     if ((surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL) {
  1412         SDL_UnRLESurface(surface, 1);
  1413     }
  1414 
  1415     /* We don't support RLE encoding of bitmaps */
  1416     if (surface->format->BitsPerPixel < 8) {
  1417         return -1;
  1418     }
  1419 
  1420     /* Make sure the pixels are available */
  1421     if (!surface->pixels) {
  1422         return -1;
  1423     }
  1424 
  1425     /* If we don't have colorkey or blending, nothing to do... */
  1426     flags = surface->map->info.flags;
  1427     if (!(flags & (SDL_COPY_COLORKEY | SDL_COPY_BLEND))) {
  1428         return -1;
  1429     }
  1430 
  1431     /* Pass on combinations not supported */
  1432     if ((flags & SDL_COPY_MODULATE_COLOR) ||
  1433         ((flags & SDL_COPY_MODULATE_ALPHA) && surface->format->Amask) ||
  1434         (flags & (SDL_COPY_ADD | SDL_COPY_MOD)) ||
  1435         (flags & SDL_COPY_NEAREST)) {
  1436         return -1;
  1437     }
  1438 
  1439     /* Encode and set up the blit */
  1440     if (!surface->format->Amask || !(flags & SDL_COPY_BLEND)) {
  1441         if (!surface->map->identity) {
  1442             return -1;
  1443         }
  1444         if (RLEColorkeySurface(surface) < 0) {
  1445             return -1;
  1446         }
  1447         surface->map->blit = SDL_RLEBlit;
  1448         surface->map->info.flags |= SDL_COPY_RLE_COLORKEY;
  1449     } else {
  1450         if (RLEAlphaSurface(surface) < 0) {
  1451             return -1;
  1452         }
  1453         surface->map->blit = SDL_RLEAlphaBlit;
  1454         surface->map->info.flags |= SDL_COPY_RLE_ALPHAKEY;
  1455     }
  1456 
  1457     /* The surface is now accelerated */
  1458     surface->flags |= SDL_RLEACCEL;
  1459 
  1460     return (0);
  1461 }
  1462 
  1463 /*
  1464  * Un-RLE a surface with pixel alpha
  1465  * This may not give back exactly the image before RLE-encoding; all
  1466  * completely transparent pixels will be lost, and color and alpha depth
  1467  * may have been reduced (when encoding for 16bpp targets).
  1468  */
  1469 static SDL_bool
  1470 UnRLEAlpha(SDL_Surface * surface)
  1471 {
  1472     Uint8 *srcbuf;
  1473     Uint32 *dst;
  1474     SDL_PixelFormat *sf = surface->format;
  1475     RLEDestFormat *df = surface->map->data;
  1476     int (*uncopy_opaque) (Uint32 *, void *, int,
  1477                           RLEDestFormat *, SDL_PixelFormat *);
  1478     int (*uncopy_transl) (Uint32 *, void *, int,
  1479                           RLEDestFormat *, SDL_PixelFormat *);
  1480     int w = surface->w;
  1481     int bpp = df->BytesPerPixel;
  1482 
  1483     if (bpp == 2) {
  1484         uncopy_opaque = uncopy_opaque_16;
  1485         uncopy_transl = uncopy_transl_16;
  1486     } else {
  1487         uncopy_opaque = uncopy_transl = uncopy_32;
  1488     }
  1489 
  1490     surface->pixels = SDL_SIMDAlloc(surface->h * surface->pitch);
  1491     if (!surface->pixels) {
  1492         return (SDL_FALSE);
  1493     }
  1494     surface->flags |= SDL_SIMD_ALIGNED;
  1495     /* fill background with transparent pixels */
  1496     SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
  1497 
  1498     dst = surface->pixels;
  1499     srcbuf = (Uint8 *) (df + 1);
  1500     for (;;) {
  1501         /* copy opaque pixels */
  1502         int ofs = 0;
  1503         do {
  1504             unsigned run;
  1505             if (bpp == 2) {
  1506                 ofs += srcbuf[0];
  1507                 run = srcbuf[1];
  1508                 srcbuf += 2;
  1509             } else {
  1510                 ofs += ((Uint16 *) srcbuf)[0];
  1511                 run = ((Uint16 *) srcbuf)[1];
  1512                 srcbuf += 4;
  1513             }
  1514             if (run) {
  1515                 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
  1516                 ofs += run;
  1517             } else if (!ofs) {
  1518                 goto end_function;
  1519             }
  1520         } while (ofs < w);
  1521 
  1522         /* skip padding if needed */
  1523         if (bpp == 2)
  1524             srcbuf += (uintptr_t) srcbuf & 2;
  1525 
  1526         /* copy translucent pixels */
  1527         ofs = 0;
  1528         do {
  1529             unsigned run;
  1530             ofs += ((Uint16 *) srcbuf)[0];
  1531             run = ((Uint16 *) srcbuf)[1];
  1532             srcbuf += 4;
  1533             if (run) {
  1534                 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
  1535                 ofs += run;
  1536             }
  1537         } while (ofs < w);
  1538         dst += surface->pitch >> 2;
  1539     }
  1540 
  1541 end_function:
  1542     return (SDL_TRUE);
  1543 }
  1544 
  1545 void
  1546 SDL_UnRLESurface(SDL_Surface * surface, int recode)
  1547 {
  1548     if (surface->flags & SDL_RLEACCEL) {
  1549         surface->flags &= ~SDL_RLEACCEL;
  1550 
  1551         if (recode && !(surface->flags & SDL_PREALLOC)) {
  1552             if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
  1553                 SDL_Rect full;
  1554 
  1555                 /* re-create the original surface */
  1556                 surface->pixels = SDL_SIMDAlloc(surface->h * surface->pitch);
  1557                 if (!surface->pixels) {
  1558                     /* Oh crap... */
  1559                     surface->flags |= SDL_RLEACCEL;
  1560                     return;
  1561                 }
  1562                 surface->flags |= SDL_SIMD_ALIGNED;
  1563 
  1564                 /* fill it with the background color */
  1565                 SDL_FillRect(surface, NULL, surface->map->info.colorkey);
  1566 
  1567                 /* now render the encoded surface */
  1568                 full.x = full.y = 0;
  1569                 full.w = surface->w;
  1570                 full.h = surface->h;
  1571                 SDL_RLEBlit(surface, &full, surface, &full);
  1572             } else {
  1573                 if (!UnRLEAlpha(surface)) {
  1574                     /* Oh crap... */
  1575                     surface->flags |= SDL_RLEACCEL;
  1576                     return;
  1577                 }
  1578             }
  1579         }
  1580         surface->map->info.flags &=
  1581             ~(SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY);
  1582 
  1583         SDL_free(surface->map->data);
  1584         surface->map->data = NULL;
  1585     }
  1586 }
  1587 
  1588 /* vi: set ts=4 sw=4 expandtab: */