src/video/SDL_RLEaccel.c
author Sam Lantinga <slouken@libsdl.org>
Fri, 18 May 2018 13:09:30 -0700
changeset 11983 3a50eb90e4b2
parent 11811 5d94cb6b24d3
child 12462 b57435750c5b
permissions -rw-r--r--
Merged latest changes from Steam Link app
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../SDL_internal.h"
    22 
    23 /*
    24  * RLE encoding for software colorkey and alpha-channel acceleration
    25  *
    26  * Original version by Sam Lantinga
    27  *
    28  * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and
    29  * decoder. Added per-surface alpha blitter. Added per-pixel alpha
    30  * format, encoder and blitter.
    31  *
    32  * Many thanks to Xark and johns for hints, benchmarks and useful comments
    33  * leading to this code.
    34  *
    35  * Welcome to Macro Mayhem.
    36  */
    37 
    38 /*
    39  * The encoding translates the image data to a stream of segments of the form
    40  *
    41  * <skip> <run> <data>
    42  *
    43  * where <skip> is the number of transparent pixels to skip,
    44  *       <run>  is the number of opaque pixels to blit,
    45  * and   <data> are the pixels themselves.
    46  *
    47  * This basic structure is used both for colorkeyed surfaces, used for simple
    48  * binary transparency and for per-surface alpha blending, and for surfaces
    49  * with per-pixel alpha. The details differ, however:
    50  *
    51  * Encoding of colorkeyed surfaces:
    52  *
    53  *   Encoded pixels always have the same format as the target surface.
    54  *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
    55  *   where they are 16 bit. This makes the pixel data aligned at all times.
    56  *   Segments never wrap around from one scan line to the next.
    57  *
    58  *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
    59  *   beginning of a line.
    60  *
    61  * Encoding of surfaces with per-pixel alpha:
    62  *
    63  *   The sequence begins with a struct RLEDestFormat describing the target
    64  *   pixel format, to provide reliable un-encoding.
    65  *
    66  *   Each scan line is encoded twice: First all completely opaque pixels,
    67  *   encoded in the target format as described above, and then all
    68  *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
    69  *   in the following 32-bit format:
    70  *
    71  *   For 32-bit targets, each pixel has the target RGB format but with
    72  *   the alpha value occupying the highest 8 bits. The <skip> and <run>
    73  *   counts are 16 bit.
    74  *
    75  *   For 16-bit targets, each pixel has the target RGB format, but with
    76  *   the middle component (usually green) shifted 16 steps to the left,
    77  *   and the hole filled with the 5 most significant bits of the alpha value.
    78  *   i.e. if the target has the format         rrrrrggggggbbbbb,
    79  *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
    80  *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
    81  *   for the translucent lines. Two padding bytes may be inserted
    82  *   before each translucent line to keep them 32-bit aligned.
    83  *
    84  *   The end of the sequence is marked by a zero <skip>,<run> pair at the
    85  *   beginning of an opaque line.
    86  */
    87 
    88 #include "SDL_video.h"
    89 #include "SDL_sysvideo.h"
    90 #include "SDL_blit.h"
    91 #include "SDL_RLEaccel_c.h"
    92 
    93 #ifndef MIN
    94 #define MIN(a, b) ((a) < (b) ? (a) : (b))
    95 #endif
    96 
    97 #define PIXEL_COPY(to, from, len, bpp)          \
    98     SDL_memcpy(to, from, (size_t)(len) * (bpp))
    99 
   100 /*
   101  * Various colorkey blit methods, for opaque and per-surface alpha
   102  */
   103 
   104 #define OPAQUE_BLIT(to, from, length, bpp, alpha)   \
   105     PIXEL_COPY(to, from, length, bpp)
   106 
   107 /*
   108  * For 32bpp pixels on the form 0x00rrggbb:
   109  * If we treat the middle component separately, we can process the two
   110  * remaining in parallel. This is safe to do because of the gap to the left
   111  * of each component, so the bits from the multiplication don't collide.
   112  * This can be used for any RGB permutation of course.
   113  */
   114 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha)      \
   115     do {                                                    \
   116         int i;                                              \
   117         Uint32 *src = (Uint32 *)(from);                     \
   118         Uint32 *dst = (Uint32 *)(to);                       \
   119         for (i = 0; i < (int)(length); i++) {               \
   120             Uint32 s = *src++;                              \
   121             Uint32 d = *dst;                                \
   122             Uint32 s1 = s & 0xff00ff;                       \
   123             Uint32 d1 = d & 0xff00ff;                       \
   124             d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
   125             s &= 0xff00;                                    \
   126             d &= 0xff00;                                    \
   127             d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
   128             *dst++ = d1 | d;                                \
   129         }                                                   \
   130     } while (0)
   131 
   132 /*
   133  * For 16bpp pixels we can go a step further: put the middle component
   134  * in the high 16 bits of a 32 bit word, and process all three RGB
   135  * components at the same time. Since the smallest gap is here just
   136  * 5 bits, we have to scale alpha down to 5 bits as well.
   137  */
   138 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha)  \
   139     do {                                                \
   140         int i;                                          \
   141         Uint16 *src = (Uint16 *)(from);                 \
   142         Uint16 *dst = (Uint16 *)(to);                   \
   143         Uint32 ALPHA = alpha >> 3;                      \
   144         for(i = 0; i < (int)(length); i++) {            \
   145             Uint32 s = *src++;                          \
   146             Uint32 d = *dst;                            \
   147             s = (s | s << 16) & 0x07e0f81f;             \
   148             d = (d | d << 16) & 0x07e0f81f;             \
   149             d += (s - d) * ALPHA >> 5;                  \
   150             d &= 0x07e0f81f;                            \
   151             *dst++ = (Uint16)(d | d >> 16);             \
   152         }                                               \
   153     } while(0)
   154 
   155 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha)  \
   156     do {                                                \
   157         int i;                                          \
   158         Uint16 *src = (Uint16 *)(from);                 \
   159         Uint16 *dst = (Uint16 *)(to);                   \
   160         Uint32 ALPHA = alpha >> 3;                      \
   161         for(i = 0; i < (int)(length); i++) {            \
   162             Uint32 s = *src++;                          \
   163             Uint32 d = *dst;                            \
   164             s = (s | s << 16) & 0x03e07c1f;             \
   165             d = (d | d << 16) & 0x03e07c1f;             \
   166             d += (s - d) * ALPHA >> 5;                  \
   167             d &= 0x03e07c1f;                            \
   168             *dst++ = (Uint16)(d | d >> 16);             \
   169         }                                               \
   170     } while(0)
   171 
   172 /*
   173  * The general slow catch-all function, for remaining depths and formats
   174  */
   175 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)            \
   176     do {                                                        \
   177         int i;                                                  \
   178         Uint8 *src = from;                                      \
   179         Uint8 *dst = to;                                        \
   180         for (i = 0; i < (int)(length); i++) {                   \
   181             Uint32 s, d;                                        \
   182             unsigned rs, gs, bs, rd, gd, bd;                    \
   183             switch (bpp) {                                      \
   184             case 2:                                             \
   185                 s = *(Uint16 *)src;                             \
   186                 d = *(Uint16 *)dst;                             \
   187                 break;                                          \
   188             case 3:                                             \
   189                 if (SDL_BYTEORDER == SDL_BIG_ENDIAN) {          \
   190                     s = (src[0] << 16) | (src[1] << 8) | src[2]; \
   191                     d = (dst[0] << 16) | (dst[1] << 8) | dst[2]; \
   192                 } else {                                        \
   193                     s = (src[2] << 16) | (src[1] << 8) | src[0]; \
   194                     d = (dst[2] << 16) | (dst[1] << 8) | dst[0]; \
   195                 }                                               \
   196                 break;                                          \
   197             case 4:                                             \
   198                 s = *(Uint32 *)src;                             \
   199                 d = *(Uint32 *)dst;                             \
   200                 break;                                          \
   201             }                                                   \
   202             RGB_FROM_PIXEL(s, fmt, rs, gs, bs);                 \
   203             RGB_FROM_PIXEL(d, fmt, rd, gd, bd);                 \
   204             rd += (rs - rd) * alpha >> 8;                       \
   205             gd += (gs - gd) * alpha >> 8;                       \
   206             bd += (bs - bd) * alpha >> 8;                       \
   207             PIXEL_FROM_RGB(d, fmt, rd, gd, bd);                 \
   208             switch (bpp) {                                      \
   209             case 2:                                             \
   210                 *(Uint16 *)dst = (Uint16)d;                     \
   211                 break;                                          \
   212             case 3:                                             \
   213                 if (SDL_BYTEORDER == SDL_BIG_ENDIAN) {          \
   214                     dst[0] = (Uint8)(d >> 16);                  \
   215                     dst[1] = (Uint8)(d >> 8);                   \
   216                     dst[2] = (Uint8)(d);                        \
   217                 } else {                                        \
   218                     dst[0] = (Uint8)d;                          \
   219                     dst[1] = (Uint8)(d >> 8);                   \
   220                     dst[2] = (Uint8)(d >> 16);                  \
   221                 }                                               \
   222                 break;                                          \
   223             case 4:                                             \
   224                 *(Uint32 *)dst = d;                             \
   225                 break;                                          \
   226             }                                                   \
   227             src += bpp;                                         \
   228             dst += bpp;                                         \
   229         }                                                       \
   230     } while(0)
   231 
   232 /*
   233  * Special case: 50% alpha (alpha=128)
   234  * This is treated specially because it can be optimized very well, and
   235  * since it is good for many cases of semi-translucency.
   236  * The theory is to do all three components at the same time:
   237  * First zero the lowest bit of each component, which gives us room to
   238  * add them. Then shift right and add the sum of the lowest bits.
   239  */
   240 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)       \
   241     do {                                                        \
   242         int i;                                                  \
   243         Uint32 *src = (Uint32 *)(from);                         \
   244         Uint32 *dst = (Uint32 *)(to);                           \
   245         for(i = 0; i < (int)(length); i++) {                    \
   246             Uint32 s = *src++;                                  \
   247             Uint32 d = *dst;                                    \
   248             *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \
   249                  + (s & d & 0x00010101);                        \
   250         }                                                       \
   251     } while(0)
   252 
   253 /*
   254  * For 16bpp, we can actually blend two pixels in parallel, if we take
   255  * care to shift before we add, not after.
   256  */
   257 
   258 /* helper: blend a single 16 bit pixel at 50% */
   259 #define BLEND16_50(dst, src, mask)                              \
   260     do {                                                        \
   261         Uint32 s = *src++;                                      \
   262         Uint32 d = *dst;                                        \
   263         *dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +    \
   264                           (s & d & (~mask & 0xffff)));          \
   265     } while(0)
   266 
   267 /* basic 16bpp blender. mask is the pixels to keep when adding. */
   268 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)     \
   269     do {                                                        \
   270         unsigned n = (length);                                  \
   271         Uint16 *src = (Uint16 *)(from);                         \
   272         Uint16 *dst = (Uint16 *)(to);                           \
   273         if (((uintptr_t)src ^ (uintptr_t)dst) & 3) {            \
   274             /* source and destination not in phase, blit one by one */ \
   275             while (n--)                                         \
   276                 BLEND16_50(dst, src, mask);                     \
   277         } else {                                                \
   278             if ((uintptr_t)src & 3) {                           \
   279                 /* first odd pixel */                           \
   280                 BLEND16_50(dst, src, mask);                     \
   281                 n--;                                            \
   282             }                                                   \
   283             for (; n > 1; n -= 2) {                             \
   284                 Uint32 s = *(Uint32 *)src;                      \
   285                 Uint32 d = *(Uint32 *)dst;                      \
   286                 *(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1) \
   287                     + ((d & (mask | mask << 16)) >> 1)          \
   288                     + (s & d & (~(mask | mask << 16)));         \
   289                 src += 2;                                       \
   290                 dst += 2;                                       \
   291             }                                                   \
   292             if (n)                                              \
   293                 BLEND16_50(dst, src, mask); /* last odd pixel */ \
   294         }                                                       \
   295     } while(0)
   296 
   297 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)       \
   298     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de)
   299 
   300 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)       \
   301     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
   302 
   303 #define CHOOSE_BLIT(blitter, alpha, fmt)                        \
   304     do {                                                        \
   305         if (alpha == 255) {                                     \
   306             switch (fmt->BytesPerPixel) {                       \
   307             case 1: blitter(1, Uint8, OPAQUE_BLIT); break;      \
   308             case 2: blitter(2, Uint8, OPAQUE_BLIT); break;      \
   309             case 3: blitter(3, Uint8, OPAQUE_BLIT); break;      \
   310             case 4: blitter(4, Uint16, OPAQUE_BLIT); break;     \
   311             }                                                   \
   312         } else {                                                \
   313             switch (fmt->BytesPerPixel) {                       \
   314             case 1:                                             \
   315                 /* No 8bpp alpha blitting */                    \
   316                 break;                                          \
   317                                                                 \
   318             case 2:                                             \
   319                 switch (fmt->Rmask | fmt->Gmask | fmt->Bmask) { \
   320                 case 0xffff:                                    \
   321                     if (fmt->Gmask == 0x07e0                    \
   322                         || fmt->Rmask == 0x07e0                 \
   323                         || fmt->Bmask == 0x07e0) {              \
   324                         if (alpha == 128) {                     \
   325                             blitter(2, Uint8, ALPHA_BLIT16_565_50); \
   326                         } else {                                \
   327                             blitter(2, Uint8, ALPHA_BLIT16_565); \
   328                         }                                       \
   329                     } else                                      \
   330                         goto general16;                         \
   331                     break;                                      \
   332                                                                 \
   333                 case 0x7fff:                                    \
   334                     if (fmt->Gmask == 0x03e0                    \
   335                         || fmt->Rmask == 0x03e0                 \
   336                         || fmt->Bmask == 0x03e0) {              \
   337                         if (alpha == 128) {                     \
   338                             blitter(2, Uint8, ALPHA_BLIT16_555_50); \
   339                         } else {                                \
   340                             blitter(2, Uint8, ALPHA_BLIT16_555); \
   341                         }                                       \
   342                         break;                                  \
   343                     } else                                      \
   344                         goto general16;                         \
   345                     break;                                      \
   346                                                                 \
   347                 default:                                        \
   348     general16:                                                  \
   349                     blitter(2, Uint8, ALPHA_BLIT_ANY);          \
   350                 }                                               \
   351                 break;                                          \
   352                                                                 \
   353             case 3:                                             \
   354                 blitter(3, Uint8, ALPHA_BLIT_ANY);              \
   355                 break;                                          \
   356                                                                 \
   357             case 4:                                             \
   358                 if ((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \
   359                     && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \
   360                     || fmt->Bmask == 0xff00)) {                 \
   361                     if (alpha == 128) {                         \
   362                         blitter(4, Uint16, ALPHA_BLIT32_888_50); \
   363                     } else {                                    \
   364                         blitter(4, Uint16, ALPHA_BLIT32_888);   \
   365                     }                                           \
   366                 } else                                          \
   367                     blitter(4, Uint16, ALPHA_BLIT_ANY);         \
   368                 break;                                          \
   369             }                                                   \
   370         }                                                       \
   371     } while(0)
   372 
   373 /*
   374  * Set a pixel value using the given format, except that the alpha value is
   375  * placed in the top byte. This is the format used for RLE with alpha.
   376  */
   377 #define RLEPIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)                      \
   378 {                                                                       \
   379     Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|                             \
   380         ((g>>fmt->Gloss)<<fmt->Gshift)|                                 \
   381         ((b>>fmt->Bloss)<<fmt->Bshift)|                                 \
   382         (a<<24);                                                        \
   383 }
   384 
   385 /*
   386  * This takes care of the case when the surface is clipped on the left and/or
   387  * right. Top clipping has already been taken care of.
   388  */
   389 static void
   390 RLEClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
   391             Uint8 * dstbuf, SDL_Rect * srcrect, unsigned alpha)
   392 {
   393     SDL_PixelFormat *fmt = surf_dst->format;
   394 
   395 #define RLECLIPBLIT(bpp, Type, do_blit)                         \
   396     do {                                                        \
   397         int linecount = srcrect->h;                             \
   398         int ofs = 0;                                            \
   399         int left = srcrect->x;                                  \
   400         int right = left + srcrect->w;                          \
   401         dstbuf -= left * bpp;                                   \
   402         for (;;) {                                              \
   403             int run;                                            \
   404             ofs += *(Type *)srcbuf;                             \
   405             run = ((Type *)srcbuf)[1];                          \
   406             srcbuf += 2 * sizeof(Type);                         \
   407             if (run) {                                          \
   408                 /* clip to left and right borders */            \
   409                 if (ofs < right) {                              \
   410                     int start = 0;                              \
   411                     int len = run;                              \
   412                     int startcol;                               \
   413                     if (left - ofs > 0) {                       \
   414                         start = left - ofs;                     \
   415                         len -= start;                           \
   416                         if (len <= 0)                           \
   417                             goto nocopy ## bpp ## do_blit;      \
   418                     }                                           \
   419                     startcol = ofs + start;                     \
   420                     if (len > right - startcol)                 \
   421                         len = right - startcol;                 \
   422                     do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
   423                         len, bpp, alpha);                       \
   424                 }                                               \
   425     nocopy ## bpp ## do_blit:                                   \
   426                 srcbuf += run * bpp;                            \
   427                 ofs += run;                                     \
   428             } else if (!ofs)                                    \
   429                 break;                                          \
   430                                                                 \
   431             if (ofs == w) {                                     \
   432                 ofs = 0;                                        \
   433                 dstbuf += surf_dst->pitch;                      \
   434                 if (!--linecount)                               \
   435                     break;                                      \
   436             }                                                   \
   437         }                                                       \
   438     } while(0)
   439 
   440     CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
   441 
   442 #undef RLECLIPBLIT
   443 
   444 }
   445 
   446 
   447 /* blit a colorkeyed RLE surface */
   448 int SDLCALL
   449 SDL_RLEBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
   450             SDL_Surface * surf_dst, SDL_Rect * dstrect)
   451 {
   452     Uint8 *dstbuf;
   453     Uint8 *srcbuf;
   454     int x, y;
   455     int w = surf_src->w;
   456     unsigned alpha;
   457 
   458     /* Lock the destination if necessary */
   459     if (SDL_MUSTLOCK(surf_dst)) {
   460         if (SDL_LockSurface(surf_dst) < 0) {
   461             return (-1);
   462         }
   463     }
   464 
   465     /* Set up the source and destination pointers */
   466     x = dstrect->x;
   467     y = dstrect->y;
   468     dstbuf = (Uint8 *) surf_dst->pixels
   469         + y * surf_dst->pitch + x * surf_src->format->BytesPerPixel;
   470     srcbuf = (Uint8 *) surf_src->map->data;
   471 
   472     {
   473         /* skip lines at the top if necessary */
   474         int vskip = srcrect->y;
   475         int ofs = 0;
   476         if (vskip) {
   477 
   478 #define RLESKIP(bpp, Type)          \
   479         for(;;) {           \
   480             int run;            \
   481             ofs += *(Type *)srcbuf; \
   482             run = ((Type *)srcbuf)[1];  \
   483             srcbuf += sizeof(Type) * 2; \
   484             if(run) {           \
   485             srcbuf += run * bpp;    \
   486             ofs += run;     \
   487             } else if(!ofs)     \
   488             goto done;      \
   489             if(ofs == w) {      \
   490             ofs = 0;        \
   491             if(!--vskip)        \
   492                 break;      \
   493             }               \
   494         }
   495 
   496             switch (surf_src->format->BytesPerPixel) {
   497             case 1:
   498                 RLESKIP(1, Uint8);
   499                 break;
   500             case 2:
   501                 RLESKIP(2, Uint8);
   502                 break;
   503             case 3:
   504                 RLESKIP(3, Uint8);
   505                 break;
   506             case 4:
   507                 RLESKIP(4, Uint16);
   508                 break;
   509             }
   510 
   511 #undef RLESKIP
   512 
   513         }
   514     }
   515 
   516     alpha = surf_src->map->info.a;
   517     /* if left or right edge clipping needed, call clip blit */
   518     if (srcrect->x || srcrect->w != surf_src->w) {
   519         RLEClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect, alpha);
   520     } else {
   521         SDL_PixelFormat *fmt = surf_src->format;
   522 
   523 #define RLEBLIT(bpp, Type, do_blit)                       \
   524         do {                                  \
   525         int linecount = srcrect->h;                   \
   526         int ofs = 0;                              \
   527         for(;;) {                             \
   528             unsigned run;                         \
   529             ofs += *(Type *)srcbuf;                   \
   530             run = ((Type *)srcbuf)[1];                    \
   531             srcbuf += 2 * sizeof(Type);                   \
   532             if(run) {                             \
   533             do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
   534             srcbuf += run * bpp;                      \
   535             ofs += run;                       \
   536             } else if(!ofs)                       \
   537             break;                            \
   538             if(ofs == w) {                        \
   539             ofs = 0;                          \
   540             dstbuf += surf_dst->pitch;                     \
   541             if(!--linecount)                      \
   542                 break;                        \
   543             }                                 \
   544         }                                 \
   545         } while(0)
   546 
   547         CHOOSE_BLIT(RLEBLIT, alpha, fmt);
   548 
   549 #undef RLEBLIT
   550     }
   551 
   552   done:
   553     /* Unlock the destination if necessary */
   554     if (SDL_MUSTLOCK(surf_dst)) {
   555         SDL_UnlockSurface(surf_dst);
   556     }
   557     return (0);
   558 }
   559 
   560 #undef OPAQUE_BLIT
   561 
   562 /*
   563  * Per-pixel blitting macros for translucent pixels:
   564  * These use the same techniques as the per-surface blitting macros
   565  */
   566 
   567 /*
   568  * For 32bpp pixels, we have made sure the alpha is stored in the top
   569  * 8 bits, so proceed as usual
   570  */
   571 #define BLIT_TRANSL_888(src, dst)               \
   572     do {                            \
   573         Uint32 s = src;                     \
   574     Uint32 d = dst;                     \
   575     unsigned alpha = s >> 24;               \
   576     Uint32 s1 = s & 0xff00ff;               \
   577     Uint32 d1 = d & 0xff00ff;               \
   578     d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;    \
   579     s &= 0xff00;                        \
   580     d &= 0xff00;                        \
   581     d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
   582     dst = d1 | d | 0xff000000;              \
   583     } while(0)
   584 
   585 /*
   586  * For 16bpp pixels, we have stored the 5 most significant alpha bits in
   587  * bits 5-10. As before, we can process all 3 RGB components at the same time.
   588  */
   589 #define BLIT_TRANSL_565(src, dst)       \
   590     do {                    \
   591     Uint32 s = src;             \
   592     Uint32 d = dst;             \
   593     unsigned alpha = (s & 0x3e0) >> 5;  \
   594     s &= 0x07e0f81f;            \
   595     d = (d | d << 16) & 0x07e0f81f;     \
   596     d += (s - d) * alpha >> 5;      \
   597     d &= 0x07e0f81f;            \
   598     dst = (Uint16)(d | d >> 16);            \
   599     } while(0)
   600 
   601 #define BLIT_TRANSL_555(src, dst)       \
   602     do {                    \
   603     Uint32 s = src;             \
   604     Uint32 d = dst;             \
   605     unsigned alpha = (s & 0x3e0) >> 5;  \
   606     s &= 0x03e07c1f;            \
   607     d = (d | d << 16) & 0x03e07c1f;     \
   608     d += (s - d) * alpha >> 5;      \
   609     d &= 0x03e07c1f;            \
   610     dst = (Uint16)(d | d >> 16);            \
   611     } while(0)
   612 
   613 /* used to save the destination format in the encoding. Designed to be
   614    macro-compatible with SDL_PixelFormat but without the unneeded fields */
   615 typedef struct
   616 {
   617     Uint8 BytesPerPixel;
   618     Uint8 padding[3];
   619     Uint32 Rmask;
   620     Uint32 Gmask;
   621     Uint32 Bmask;
   622     Uint32 Amask;
   623     Uint8 Rloss;
   624     Uint8 Gloss;
   625     Uint8 Bloss;
   626     Uint8 Aloss;
   627     Uint8 Rshift;
   628     Uint8 Gshift;
   629     Uint8 Bshift;
   630     Uint8 Ashift;
   631 } RLEDestFormat;
   632 
   633 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
   634 static void
   635 RLEAlphaClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
   636                  Uint8 * dstbuf, SDL_Rect * srcrect)
   637 {
   638     SDL_PixelFormat *df = surf_dst->format;
   639     /*
   640      * clipped blitter: Ptype is the destination pixel type,
   641      * Ctype the translucent count type, and do_blend the macro
   642      * to blend one pixel.
   643      */
   644 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)              \
   645     do {                                  \
   646     int linecount = srcrect->h;                   \
   647     int left = srcrect->x;                        \
   648     int right = left + srcrect->w;                    \
   649     dstbuf -= left * sizeof(Ptype);                   \
   650     do {                                  \
   651         int ofs = 0;                          \
   652         /* blit opaque pixels on one line */              \
   653         do {                              \
   654         unsigned run;                         \
   655         ofs += ((Ctype *)srcbuf)[0];                  \
   656         run = ((Ctype *)srcbuf)[1];               \
   657         srcbuf += 2 * sizeof(Ctype);                  \
   658         if(run) {                         \
   659             /* clip to left and right borders */          \
   660             int cofs = ofs;                   \
   661             int crun = run;                   \
   662             if(left - cofs > 0) {                 \
   663             crun -= left - cofs;                  \
   664             cofs = left;                      \
   665             }                             \
   666             if(crun > right - cofs)               \
   667             crun = right - cofs;                  \
   668             if(crun > 0)                      \
   669             PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),     \
   670                    srcbuf + (cofs - ofs) * sizeof(Ptype), \
   671                    (unsigned)crun, sizeof(Ptype));    \
   672             srcbuf += run * sizeof(Ptype);            \
   673             ofs += run;                       \
   674         } else if(!ofs)                       \
   675             return;                       \
   676         } while(ofs < w);                         \
   677         /* skip padding if necessary */               \
   678         if(sizeof(Ptype) == 2)                    \
   679         srcbuf += (uintptr_t)srcbuf & 2;              \
   680         /* blit translucent pixels on the same line */        \
   681         ofs = 0;                              \
   682         do {                              \
   683         unsigned run;                         \
   684         ofs += ((Uint16 *)srcbuf)[0];                 \
   685         run = ((Uint16 *)srcbuf)[1];                  \
   686         srcbuf += 4;                          \
   687         if(run) {                         \
   688             /* clip to left and right borders */          \
   689             int cofs = ofs;                   \
   690             int crun = run;                   \
   691             if(left - cofs > 0) {                 \
   692             crun -= left - cofs;                  \
   693             cofs = left;                      \
   694             }                             \
   695             if(crun > right - cofs)               \
   696             crun = right - cofs;                  \
   697             if(crun > 0) {                    \
   698             Ptype *dst = (Ptype *)dstbuf + cofs;          \
   699             Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);    \
   700             int i;                        \
   701             for(i = 0; i < crun; i++)             \
   702                 do_blend(src[i], dst[i]);             \
   703             }                             \
   704             srcbuf += run * 4;                    \
   705             ofs += run;                       \
   706         }                             \
   707         } while(ofs < w);                         \
   708         dstbuf += surf_dst->pitch;                     \
   709     } while(--linecount);                         \
   710     } while(0)
   711 
   712     switch (df->BytesPerPixel) {
   713     case 2:
   714         if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0)
   715             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
   716         else
   717             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
   718         break;
   719     case 4:
   720         RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
   721         break;
   722     }
   723 }
   724 
   725 /* blit a pixel-alpha RLE surface */
   726 int SDLCALL
   727 SDL_RLEAlphaBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
   728                  SDL_Surface * surf_dst, SDL_Rect * dstrect)
   729 {
   730     int x, y;
   731     int w = surf_src->w;
   732     Uint8 *srcbuf, *dstbuf;
   733     SDL_PixelFormat *df = surf_dst->format;
   734 
   735     /* Lock the destination if necessary */
   736     if (SDL_MUSTLOCK(surf_dst)) {
   737         if (SDL_LockSurface(surf_dst) < 0) {
   738             return -1;
   739         }
   740     }
   741 
   742     x = dstrect->x;
   743     y = dstrect->y;
   744     dstbuf = (Uint8 *) surf_dst->pixels + y * surf_dst->pitch + x * df->BytesPerPixel;
   745     srcbuf = (Uint8 *) surf_src->map->data + sizeof(RLEDestFormat);
   746 
   747     {
   748         /* skip lines at the top if necessary */
   749         int vskip = srcrect->y;
   750         if (vskip) {
   751             int ofs;
   752             if (df->BytesPerPixel == 2) {
   753                 /* the 16/32 interleaved format */
   754                 do {
   755                     /* skip opaque line */
   756                     ofs = 0;
   757                     do {
   758                         int run;
   759                         ofs += srcbuf[0];
   760                         run = srcbuf[1];
   761                         srcbuf += 2;
   762                         if (run) {
   763                             srcbuf += 2 * run;
   764                             ofs += run;
   765                         } else if (!ofs)
   766                             goto done;
   767                     } while (ofs < w);
   768 
   769                     /* skip padding */
   770                     srcbuf += (uintptr_t) srcbuf & 2;
   771 
   772                     /* skip translucent line */
   773                     ofs = 0;
   774                     do {
   775                         int run;
   776                         ofs += ((Uint16 *) srcbuf)[0];
   777                         run = ((Uint16 *) srcbuf)[1];
   778                         srcbuf += 4 * (run + 1);
   779                         ofs += run;
   780                     } while (ofs < w);
   781                 } while (--vskip);
   782             } else {
   783                 /* the 32/32 interleaved format */
   784                 vskip <<= 1;    /* opaque and translucent have same format */
   785                 do {
   786                     ofs = 0;
   787                     do {
   788                         int run;
   789                         ofs += ((Uint16 *) srcbuf)[0];
   790                         run = ((Uint16 *) srcbuf)[1];
   791                         srcbuf += 4;
   792                         if (run) {
   793                             srcbuf += 4 * run;
   794                             ofs += run;
   795                         } else if (!ofs)
   796                             goto done;
   797                     } while (ofs < w);
   798                 } while (--vskip);
   799             }
   800         }
   801     }
   802 
   803     /* if left or right edge clipping needed, call clip blit */
   804     if (srcrect->x || srcrect->w != surf_src->w) {
   805         RLEAlphaClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect);
   806     } else {
   807 
   808         /*
   809          * non-clipped blitter. Ptype is the destination pixel type,
   810          * Ctype the translucent count type, and do_blend the
   811          * macro to blend one pixel.
   812          */
   813 #define RLEALPHABLIT(Ptype, Ctype, do_blend)                 \
   814     do {                                 \
   815         int linecount = srcrect->h;                  \
   816         do {                             \
   817         int ofs = 0;                         \
   818         /* blit opaque pixels on one line */             \
   819         do {                             \
   820             unsigned run;                    \
   821             ofs += ((Ctype *)srcbuf)[0];             \
   822             run = ((Ctype *)srcbuf)[1];              \
   823             srcbuf += 2 * sizeof(Ctype);             \
   824             if(run) {                        \
   825             PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
   826                    run, sizeof(Ptype));          \
   827             srcbuf += run * sizeof(Ptype);           \
   828             ofs += run;                  \
   829             } else if(!ofs)                  \
   830             goto done;                   \
   831         } while(ofs < w);                    \
   832         /* skip padding if necessary */              \
   833         if(sizeof(Ptype) == 2)                   \
   834             srcbuf += (uintptr_t)srcbuf & 2;             \
   835         /* blit translucent pixels on the same line */       \
   836         ofs = 0;                         \
   837         do {                             \
   838             unsigned run;                    \
   839             ofs += ((Uint16 *)srcbuf)[0];            \
   840             run = ((Uint16 *)srcbuf)[1];             \
   841             srcbuf += 4;                     \
   842             if(run) {                        \
   843             Ptype *dst = (Ptype *)dstbuf + ofs;      \
   844             unsigned i;                  \
   845             for(i = 0; i < run; i++) {           \
   846                 Uint32 src = *(Uint32 *)srcbuf;      \
   847                 do_blend(src, *dst);             \
   848                 srcbuf += 4;                 \
   849                 dst++;                   \
   850             }                        \
   851             ofs += run;                  \
   852             }                            \
   853         } while(ofs < w);                    \
   854         dstbuf += surf_dst->pitch;                    \
   855         } while(--linecount);                    \
   856     } while(0)
   857 
   858         switch (df->BytesPerPixel) {
   859         case 2:
   860             if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0
   861                 || df->Bmask == 0x07e0)
   862                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
   863             else
   864                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
   865             break;
   866         case 4:
   867             RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
   868             break;
   869         }
   870     }
   871 
   872   done:
   873     /* Unlock the destination if necessary */
   874     if (SDL_MUSTLOCK(surf_dst)) {
   875         SDL_UnlockSurface(surf_dst);
   876     }
   877     return 0;
   878 }
   879 
   880 /*
   881  * Auxiliary functions:
   882  * The encoding functions take 32bpp rgb + a, and
   883  * return the number of bytes copied to the destination.
   884  * The decoding functions copy to 32bpp rgb + a, and
   885  * return the number of bytes copied from the source.
   886  * These are only used in the encoder and un-RLE code and are therefore not
   887  * highly optimised.
   888  */
   889 
   890 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
   891 static int
   892 copy_opaque_16(void *dst, Uint32 * src, int n,
   893                SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   894 {
   895     int i;
   896     Uint16 *d = dst;
   897     for (i = 0; i < n; i++) {
   898         unsigned r, g, b;
   899         RGB_FROM_PIXEL(*src, sfmt, r, g, b);
   900         PIXEL_FROM_RGB(*d, dfmt, r, g, b);
   901         src++;
   902         d++;
   903     }
   904     return n * 2;
   905 }
   906 
   907 /* decode opaque pixels from 16bpp to 32bpp rgb + a */
   908 static int
   909 uncopy_opaque_16(Uint32 * dst, void *src, int n,
   910                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   911 {
   912     int i;
   913     Uint16 *s = src;
   914     unsigned alpha = dfmt->Amask ? 255 : 0;
   915     for (i = 0; i < n; i++) {
   916         unsigned r, g, b;
   917         RGB_FROM_PIXEL(*s, sfmt, r, g, b);
   918         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
   919         s++;
   920         dst++;
   921     }
   922     return n * 2;
   923 }
   924 
   925 
   926 
   927 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
   928 static int
   929 copy_transl_565(void *dst, Uint32 * src, int n,
   930                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   931 {
   932     int i;
   933     Uint32 *d = dst;
   934     for (i = 0; i < n; i++) {
   935         unsigned r, g, b, a;
   936         Uint16 pix;
   937         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   938         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   939         *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
   940         src++;
   941         d++;
   942     }
   943     return n * 4;
   944 }
   945 
   946 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
   947 static int
   948 copy_transl_555(void *dst, Uint32 * src, int n,
   949                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   950 {
   951     int i;
   952     Uint32 *d = dst;
   953     for (i = 0; i < n; i++) {
   954         unsigned r, g, b, a;
   955         Uint16 pix;
   956         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   957         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   958         *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
   959         src++;
   960         d++;
   961     }
   962     return n * 4;
   963 }
   964 
   965 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
   966 static int
   967 uncopy_transl_16(Uint32 * dst, void *src, int n,
   968                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   969 {
   970     int i;
   971     Uint32 *s = src;
   972     for (i = 0; i < n; i++) {
   973         unsigned r, g, b, a;
   974         Uint32 pix = *s++;
   975         a = (pix & 0x3e0) >> 2;
   976         pix = (pix & ~0x3e0) | pix >> 16;
   977         RGB_FROM_PIXEL(pix, sfmt, r, g, b);
   978         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
   979         dst++;
   980     }
   981     return n * 4;
   982 }
   983 
   984 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   985 static int
   986 copy_32(void *dst, Uint32 * src, int n,
   987         SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   988 {
   989     int i;
   990     Uint32 *d = dst;
   991     for (i = 0; i < n; i++) {
   992         unsigned r, g, b, a;
   993         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   994         RLEPIXEL_FROM_RGBA(*d, dfmt, r, g, b, a);
   995         d++;
   996         src++;
   997     }
   998     return n * 4;
   999 }
  1000 
  1001 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
  1002 static int
  1003 uncopy_32(Uint32 * dst, void *src, int n,
  1004           RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
  1005 {
  1006     int i;
  1007     Uint32 *s = src;
  1008     for (i = 0; i < n; i++) {
  1009         unsigned r, g, b, a;
  1010         Uint32 pixel = *s++;
  1011         RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
  1012         a = pixel >> 24;
  1013         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
  1014         dst++;
  1015     }
  1016     return n * 4;
  1017 }
  1018 
  1019 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
  1020 
  1021 #define ISTRANSL(pixel, fmt)    \
  1022     ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
  1023 
  1024 /* convert surface to be quickly alpha-blittable onto dest, if possible */
  1025 static int
  1026 RLEAlphaSurface(SDL_Surface * surface)
  1027 {
  1028     SDL_Surface *dest;
  1029     SDL_PixelFormat *df;
  1030     int maxsize = 0;
  1031     int max_opaque_run;
  1032     int max_transl_run = 65535;
  1033     unsigned masksum;
  1034     Uint8 *rlebuf, *dst;
  1035     int (*copy_opaque) (void *, Uint32 *, int,
  1036                         SDL_PixelFormat *, SDL_PixelFormat *);
  1037     int (*copy_transl) (void *, Uint32 *, int,
  1038                         SDL_PixelFormat *, SDL_PixelFormat *);
  1039 
  1040     dest = surface->map->dst;
  1041     if (!dest)
  1042         return -1;
  1043     df = dest->format;
  1044     if (surface->format->BitsPerPixel != 32)
  1045         return -1;              /* only 32bpp source supported */
  1046 
  1047     /* find out whether the destination is one we support,
  1048        and determine the max size of the encoded result */
  1049     masksum = df->Rmask | df->Gmask | df->Bmask;
  1050     switch (df->BytesPerPixel) {
  1051     case 2:
  1052         /* 16bpp: only support 565 and 555 formats */
  1053         switch (masksum) {
  1054         case 0xffff:
  1055             if (df->Gmask == 0x07e0
  1056                 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
  1057                 copy_opaque = copy_opaque_16;
  1058                 copy_transl = copy_transl_565;
  1059             } else
  1060                 return -1;
  1061             break;
  1062         case 0x7fff:
  1063             if (df->Gmask == 0x03e0
  1064                 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
  1065                 copy_opaque = copy_opaque_16;
  1066                 copy_transl = copy_transl_555;
  1067             } else
  1068                 return -1;
  1069             break;
  1070         default:
  1071             return -1;
  1072         }
  1073         max_opaque_run = 255;   /* runs stored as bytes */
  1074 
  1075         /* worst case is alternating opaque and translucent pixels,
  1076            with room for alignment padding between lines */
  1077         maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
  1078         break;
  1079     case 4:
  1080         if (masksum != 0x00ffffff)
  1081             return -1;          /* requires unused high byte */
  1082         copy_opaque = copy_32;
  1083         copy_transl = copy_32;
  1084         max_opaque_run = 255;   /* runs stored as short ints */
  1085 
  1086         /* worst case is alternating opaque and translucent pixels */
  1087         maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
  1088         break;
  1089     default:
  1090         return -1;              /* anything else unsupported right now */
  1091     }
  1092 
  1093     maxsize += sizeof(RLEDestFormat);
  1094     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1095     if (!rlebuf) {
  1096         return SDL_OutOfMemory();
  1097     }
  1098     {
  1099         /* save the destination format so we can undo the encoding later */
  1100         RLEDestFormat *r = (RLEDestFormat *) rlebuf;
  1101         r->BytesPerPixel = df->BytesPerPixel;
  1102         r->Rmask = df->Rmask;
  1103         r->Gmask = df->Gmask;
  1104         r->Bmask = df->Bmask;
  1105         r->Amask = df->Amask;
  1106         r->Rloss = df->Rloss;
  1107         r->Gloss = df->Gloss;
  1108         r->Bloss = df->Bloss;
  1109         r->Aloss = df->Aloss;
  1110         r->Rshift = df->Rshift;
  1111         r->Gshift = df->Gshift;
  1112         r->Bshift = df->Bshift;
  1113         r->Ashift = df->Ashift;
  1114     }
  1115     dst = rlebuf + sizeof(RLEDestFormat);
  1116 
  1117     /* Do the actual encoding */
  1118     {
  1119         int x, y;
  1120         int h = surface->h, w = surface->w;
  1121         SDL_PixelFormat *sf = surface->format;
  1122         Uint32 *src = (Uint32 *) surface->pixels;
  1123         Uint8 *lastline = dst;  /* end of last non-blank line */
  1124 
  1125         /* opaque counts are 8 or 16 bits, depending on target depth */
  1126 #define ADD_OPAQUE_COUNTS(n, m)         \
  1127     if(df->BytesPerPixel == 4) {        \
  1128         ((Uint16 *)dst)[0] = n;     \
  1129         ((Uint16 *)dst)[1] = m;     \
  1130         dst += 4;               \
  1131     } else {                \
  1132         dst[0] = n;             \
  1133         dst[1] = m;             \
  1134         dst += 2;               \
  1135     }
  1136 
  1137         /* translucent counts are always 16 bit */
  1138 #define ADD_TRANSL_COUNTS(n, m)     \
  1139     (((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
  1140 
  1141         for (y = 0; y < h; y++) {
  1142             int runstart, skipstart;
  1143             int blankline = 0;
  1144             /* First encode all opaque pixels of a scan line */
  1145             x = 0;
  1146             do {
  1147                 int run, skip, len;
  1148                 skipstart = x;
  1149                 while (x < w && !ISOPAQUE(src[x], sf))
  1150                     x++;
  1151                 runstart = x;
  1152                 while (x < w && ISOPAQUE(src[x], sf))
  1153                     x++;
  1154                 skip = runstart - skipstart;
  1155                 if (skip == w)
  1156                     blankline = 1;
  1157                 run = x - runstart;
  1158                 while (skip > max_opaque_run) {
  1159                     ADD_OPAQUE_COUNTS(max_opaque_run, 0);
  1160                     skip -= max_opaque_run;
  1161                 }
  1162                 len = MIN(run, max_opaque_run);
  1163                 ADD_OPAQUE_COUNTS(skip, len);
  1164                 dst += copy_opaque(dst, src + runstart, len, sf, df);
  1165                 runstart += len;
  1166                 run -= len;
  1167                 while (run) {
  1168                     len = MIN(run, max_opaque_run);
  1169                     ADD_OPAQUE_COUNTS(0, len);
  1170                     dst += copy_opaque(dst, src + runstart, len, sf, df);
  1171                     runstart += len;
  1172                     run -= len;
  1173                 }
  1174             } while (x < w);
  1175 
  1176             /* Make sure the next output address is 32-bit aligned */
  1177             dst += (uintptr_t) dst & 2;
  1178 
  1179             /* Next, encode all translucent pixels of the same scan line */
  1180             x = 0;
  1181             do {
  1182                 int run, skip, len;
  1183                 skipstart = x;
  1184                 while (x < w && !ISTRANSL(src[x], sf))
  1185                     x++;
  1186                 runstart = x;
  1187                 while (x < w && ISTRANSL(src[x], sf))
  1188                     x++;
  1189                 skip = runstart - skipstart;
  1190                 blankline &= (skip == w);
  1191                 run = x - runstart;
  1192                 while (skip > max_transl_run) {
  1193                     ADD_TRANSL_COUNTS(max_transl_run, 0);
  1194                     skip -= max_transl_run;
  1195                 }
  1196                 len = MIN(run, max_transl_run);
  1197                 ADD_TRANSL_COUNTS(skip, len);
  1198                 dst += copy_transl(dst, src + runstart, len, sf, df);
  1199                 runstart += len;
  1200                 run -= len;
  1201                 while (run) {
  1202                     len = MIN(run, max_transl_run);
  1203                     ADD_TRANSL_COUNTS(0, len);
  1204                     dst += copy_transl(dst, src + runstart, len, sf, df);
  1205                     runstart += len;
  1206                     run -= len;
  1207                 }
  1208                 if (!blankline)
  1209                     lastline = dst;
  1210             } while (x < w);
  1211 
  1212             src += surface->pitch >> 2;
  1213         }
  1214         dst = lastline;         /* back up past trailing blank lines */
  1215         ADD_OPAQUE_COUNTS(0, 0);
  1216     }
  1217 
  1218 #undef ADD_OPAQUE_COUNTS
  1219 #undef ADD_TRANSL_COUNTS
  1220 
  1221     /* Now that we have it encoded, release the original pixels */
  1222     if (!(surface->flags & SDL_PREALLOC)) {
  1223         SDL_free(surface->pixels);
  1224         surface->pixels = NULL;
  1225     }
  1226 
  1227     /* realloc the buffer to release unused memory */
  1228     {
  1229         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1230         if (!p)
  1231             p = rlebuf;
  1232         surface->map->data = p;
  1233     }
  1234 
  1235     return 0;
  1236 }
  1237 
  1238 static Uint32
  1239 getpix_8(Uint8 * srcbuf)
  1240 {
  1241     return *srcbuf;
  1242 }
  1243 
  1244 static Uint32
  1245 getpix_16(Uint8 * srcbuf)
  1246 {
  1247     return *(Uint16 *) srcbuf;
  1248 }
  1249 
  1250 static Uint32
  1251 getpix_24(Uint8 * srcbuf)
  1252 {
  1253 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  1254     return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
  1255 #else
  1256     return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
  1257 #endif
  1258 }
  1259 
  1260 static Uint32
  1261 getpix_32(Uint8 * srcbuf)
  1262 {
  1263     return *(Uint32 *) srcbuf;
  1264 }
  1265 
  1266 typedef Uint32(*getpix_func) (Uint8 *);
  1267 
  1268 static const getpix_func getpixes[4] = {
  1269     getpix_8, getpix_16, getpix_24, getpix_32
  1270 };
  1271 
  1272 static int
  1273 RLEColorkeySurface(SDL_Surface * surface)
  1274 {
  1275     Uint8 *rlebuf, *dst;
  1276     int maxn;
  1277     int y;
  1278     Uint8 *srcbuf, *lastline;
  1279     int maxsize = 0;
  1280     const int bpp = surface->format->BytesPerPixel;
  1281     getpix_func getpix;
  1282     Uint32 ckey, rgbmask;
  1283     int w, h;
  1284 
  1285     /* calculate the worst case size for the compressed surface */
  1286     switch (bpp) {
  1287     case 1:
  1288         /* worst case is alternating opaque and transparent pixels,
  1289            starting with an opaque pixel */
  1290         maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
  1291         break;
  1292     case 2:
  1293     case 3:
  1294         /* worst case is solid runs, at most 255 pixels wide */
  1295         maxsize = surface->h * (2 * (surface->w / 255 + 1)
  1296                                 + surface->w * bpp) + 2;
  1297         break;
  1298     case 4:
  1299         /* worst case is solid runs, at most 65535 pixels wide */
  1300         maxsize = surface->h * (4 * (surface->w / 65535 + 1)
  1301                                 + surface->w * 4) + 4;
  1302         break;
  1303 
  1304     default:
  1305         return -1;
  1306     }
  1307 
  1308     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1309     if (rlebuf == NULL) {
  1310         return SDL_OutOfMemory();
  1311     }
  1312 
  1313     /* Set up the conversion */
  1314     srcbuf = (Uint8 *) surface->pixels;
  1315     maxn = bpp == 4 ? 65535 : 255;
  1316     dst = rlebuf;
  1317     rgbmask = ~surface->format->Amask;
  1318     ckey = surface->map->info.colorkey & rgbmask;
  1319     lastline = dst;
  1320     getpix = getpixes[bpp - 1];
  1321     w = surface->w;
  1322     h = surface->h;
  1323 
  1324 #define ADD_COUNTS(n, m)            \
  1325     if(bpp == 4) {              \
  1326         ((Uint16 *)dst)[0] = n;     \
  1327         ((Uint16 *)dst)[1] = m;     \
  1328         dst += 4;               \
  1329     } else {                \
  1330         dst[0] = n;             \
  1331         dst[1] = m;             \
  1332         dst += 2;               \
  1333     }
  1334 
  1335     for (y = 0; y < h; y++) {
  1336         int x = 0;
  1337         int blankline = 0;
  1338         do {
  1339             int run, skip, len;
  1340             int runstart;
  1341             int skipstart = x;
  1342 
  1343             /* find run of transparent, then opaque pixels */
  1344             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
  1345                 x++;
  1346             runstart = x;
  1347             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
  1348                 x++;
  1349             skip = runstart - skipstart;
  1350             if (skip == w)
  1351                 blankline = 1;
  1352             run = x - runstart;
  1353 
  1354             /* encode segment */
  1355             while (skip > maxn) {
  1356                 ADD_COUNTS(maxn, 0);
  1357                 skip -= maxn;
  1358             }
  1359             len = MIN(run, maxn);
  1360             ADD_COUNTS(skip, len);
  1361             SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1362             dst += len * bpp;
  1363             run -= len;
  1364             runstart += len;
  1365             while (run) {
  1366                 len = MIN(run, maxn);
  1367                 ADD_COUNTS(0, len);
  1368                 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1369                 dst += len * bpp;
  1370                 runstart += len;
  1371                 run -= len;
  1372             }
  1373             if (!blankline)
  1374                 lastline = dst;
  1375         } while (x < w);
  1376 
  1377         srcbuf += surface->pitch;
  1378     }
  1379     dst = lastline;             /* back up bast trailing blank lines */
  1380     ADD_COUNTS(0, 0);
  1381 
  1382 #undef ADD_COUNTS
  1383 
  1384     /* Now that we have it encoded, release the original pixels */
  1385     if (!(surface->flags & SDL_PREALLOC)) {
  1386         SDL_free(surface->pixels);
  1387         surface->pixels = NULL;
  1388     }
  1389 
  1390     /* realloc the buffer to release unused memory */
  1391     {
  1392         /* If realloc returns NULL, the original block is left intact */
  1393         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1394         if (!p)
  1395             p = rlebuf;
  1396         surface->map->data = p;
  1397     }
  1398 
  1399     return 0;
  1400 }
  1401 
  1402 int
  1403 SDL_RLESurface(SDL_Surface * surface)
  1404 {
  1405     int flags;
  1406 
  1407     /* Clear any previous RLE conversion */
  1408     if ((surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL) {
  1409         SDL_UnRLESurface(surface, 1);
  1410     }
  1411 
  1412     /* We don't support RLE encoding of bitmaps */
  1413     if (surface->format->BitsPerPixel < 8) {
  1414         return -1;
  1415     }
  1416 
  1417     /* Make sure the pixels are available */
  1418     if (!surface->pixels) {
  1419         return -1;
  1420     }
  1421 
  1422     /* If we don't have colorkey or blending, nothing to do... */
  1423     flags = surface->map->info.flags;
  1424     if (!(flags & (SDL_COPY_COLORKEY | SDL_COPY_BLEND))) {
  1425         return -1;
  1426     }
  1427 
  1428     /* Pass on combinations not supported */
  1429     if ((flags & SDL_COPY_MODULATE_COLOR) ||
  1430         ((flags & SDL_COPY_MODULATE_ALPHA) && surface->format->Amask) ||
  1431         (flags & (SDL_COPY_ADD | SDL_COPY_MOD)) ||
  1432         (flags & SDL_COPY_NEAREST)) {
  1433         return -1;
  1434     }
  1435 
  1436     /* Encode and set up the blit */
  1437     if (!surface->format->Amask || !(flags & SDL_COPY_BLEND)) {
  1438         if (!surface->map->identity) {
  1439             return -1;
  1440         }
  1441         if (RLEColorkeySurface(surface) < 0) {
  1442             return -1;
  1443         }
  1444         surface->map->blit = SDL_RLEBlit;
  1445         surface->map->info.flags |= SDL_COPY_RLE_COLORKEY;
  1446     } else {
  1447         if (RLEAlphaSurface(surface) < 0) {
  1448             return -1;
  1449         }
  1450         surface->map->blit = SDL_RLEAlphaBlit;
  1451         surface->map->info.flags |= SDL_COPY_RLE_ALPHAKEY;
  1452     }
  1453 
  1454     /* The surface is now accelerated */
  1455     surface->flags |= SDL_RLEACCEL;
  1456 
  1457     return (0);
  1458 }
  1459 
  1460 /*
  1461  * Un-RLE a surface with pixel alpha
  1462  * This may not give back exactly the image before RLE-encoding; all
  1463  * completely transparent pixels will be lost, and color and alpha depth
  1464  * may have been reduced (when encoding for 16bpp targets).
  1465  */
  1466 static SDL_bool
  1467 UnRLEAlpha(SDL_Surface * surface)
  1468 {
  1469     Uint8 *srcbuf;
  1470     Uint32 *dst;
  1471     SDL_PixelFormat *sf = surface->format;
  1472     RLEDestFormat *df = surface->map->data;
  1473     int (*uncopy_opaque) (Uint32 *, void *, int,
  1474                           RLEDestFormat *, SDL_PixelFormat *);
  1475     int (*uncopy_transl) (Uint32 *, void *, int,
  1476                           RLEDestFormat *, SDL_PixelFormat *);
  1477     int w = surface->w;
  1478     int bpp = df->BytesPerPixel;
  1479 
  1480     if (bpp == 2) {
  1481         uncopy_opaque = uncopy_opaque_16;
  1482         uncopy_transl = uncopy_transl_16;
  1483     } else {
  1484         uncopy_opaque = uncopy_transl = uncopy_32;
  1485     }
  1486 
  1487     surface->pixels = SDL_malloc(surface->h * surface->pitch);
  1488     if (!surface->pixels) {
  1489         return (SDL_FALSE);
  1490     }
  1491     /* fill background with transparent pixels */
  1492     SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
  1493 
  1494     dst = surface->pixels;
  1495     srcbuf = (Uint8 *) (df + 1);
  1496     for (;;) {
  1497         /* copy opaque pixels */
  1498         int ofs = 0;
  1499         do {
  1500             unsigned run;
  1501             if (bpp == 2) {
  1502                 ofs += srcbuf[0];
  1503                 run = srcbuf[1];
  1504                 srcbuf += 2;
  1505             } else {
  1506                 ofs += ((Uint16 *) srcbuf)[0];
  1507                 run = ((Uint16 *) srcbuf)[1];
  1508                 srcbuf += 4;
  1509             }
  1510             if (run) {
  1511                 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
  1512                 ofs += run;
  1513             } else if (!ofs)
  1514                 return (SDL_TRUE);
  1515         } while (ofs < w);
  1516 
  1517         /* skip padding if needed */
  1518         if (bpp == 2)
  1519             srcbuf += (uintptr_t) srcbuf & 2;
  1520 
  1521         /* copy translucent pixels */
  1522         ofs = 0;
  1523         do {
  1524             unsigned run;
  1525             ofs += ((Uint16 *) srcbuf)[0];
  1526             run = ((Uint16 *) srcbuf)[1];
  1527             srcbuf += 4;
  1528             if (run) {
  1529                 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
  1530                 ofs += run;
  1531             }
  1532         } while (ofs < w);
  1533         dst += surface->pitch >> 2;
  1534     }
  1535     /* Make the compiler happy */
  1536     return (SDL_TRUE);
  1537 }
  1538 
  1539 void
  1540 SDL_UnRLESurface(SDL_Surface * surface, int recode)
  1541 {
  1542     if (surface->flags & SDL_RLEACCEL) {
  1543         surface->flags &= ~SDL_RLEACCEL;
  1544 
  1545         if (recode && !(surface->flags & SDL_PREALLOC)) {
  1546             if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
  1547                 SDL_Rect full;
  1548 
  1549                 /* re-create the original surface */
  1550                 surface->pixels = SDL_malloc(surface->h * surface->pitch);
  1551                 if (!surface->pixels) {
  1552                     /* Oh crap... */
  1553                     surface->flags |= SDL_RLEACCEL;
  1554                     return;
  1555                 }
  1556 
  1557                 /* fill it with the background color */
  1558                 SDL_FillRect(surface, NULL, surface->map->info.colorkey);
  1559 
  1560                 /* now render the encoded surface */
  1561                 full.x = full.y = 0;
  1562                 full.w = surface->w;
  1563                 full.h = surface->h;
  1564                 SDL_RLEBlit(surface, &full, surface, &full);
  1565             } else {
  1566                 if (!UnRLEAlpha(surface)) {
  1567                     /* Oh crap... */
  1568                     surface->flags |= SDL_RLEACCEL;
  1569                     return;
  1570                 }
  1571             }
  1572         }
  1573         surface->map->info.flags &=
  1574             ~(SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY);
  1575 
  1576         SDL_free(surface->map->data);
  1577         surface->map->data = NULL;
  1578     }
  1579 }
  1580 
  1581 /* vi: set ts=4 sw=4 expandtab: */