src/video/SDL_RLEaccel.c
author Manuel Alfayate Corchete <redwindwanderer@gmail.com>
Wed, 05 Aug 2020 02:06:59 +0200
changeset 13975 eb3673a9aca3
parent 13440 ac297b67f6d9
permissions -rw-r--r--
kmsdrm: double and triple buffered versions of SwapWindow() are now both reimplemented in atomic pageflipping versions, and can be selected via SDL_VIDEO_DOUBLE_BUFFER env variable.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../SDL_internal.h"
    22 
    23 #if SDL_HAVE_RLE
    24 
    25 /*
    26  * RLE encoding for software colorkey and alpha-channel acceleration
    27  *
    28  * Original version by Sam Lantinga
    29  *
    30  * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and
    31  * decoder. Added per-surface alpha blitter. Added per-pixel alpha
    32  * format, encoder and blitter.
    33  *
    34  * Many thanks to Xark and johns for hints, benchmarks and useful comments
    35  * leading to this code.
    36  *
    37  * Welcome to Macro Mayhem.
    38  */
    39 
    40 /*
    41  * The encoding translates the image data to a stream of segments of the form
    42  *
    43  * <skip> <run> <data>
    44  *
    45  * where <skip> is the number of transparent pixels to skip,
    46  *       <run>  is the number of opaque pixels to blit,
    47  * and   <data> are the pixels themselves.
    48  *
    49  * This basic structure is used both for colorkeyed surfaces, used for simple
    50  * binary transparency and for per-surface alpha blending, and for surfaces
    51  * with per-pixel alpha. The details differ, however:
    52  *
    53  * Encoding of colorkeyed surfaces:
    54  *
    55  *   Encoded pixels always have the same format as the target surface.
    56  *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
    57  *   where they are 16 bit. This makes the pixel data aligned at all times.
    58  *   Segments never wrap around from one scan line to the next.
    59  *
    60  *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
    61  *   beginning of a line.
    62  *
    63  * Encoding of surfaces with per-pixel alpha:
    64  *
    65  *   The sequence begins with a struct RLEDestFormat describing the target
    66  *   pixel format, to provide reliable un-encoding.
    67  *
    68  *   Each scan line is encoded twice: First all completely opaque pixels,
    69  *   encoded in the target format as described above, and then all
    70  *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
    71  *   in the following 32-bit format:
    72  *
    73  *   For 32-bit targets, each pixel has the target RGB format but with
    74  *   the alpha value occupying the highest 8 bits. The <skip> and <run>
    75  *   counts are 16 bit.
    76  *
    77  *   For 16-bit targets, each pixel has the target RGB format, but with
    78  *   the middle component (usually green) shifted 16 steps to the left,
    79  *   and the hole filled with the 5 most significant bits of the alpha value.
    80  *   i.e. if the target has the format         rrrrrggggggbbbbb,
    81  *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
    82  *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
    83  *   for the translucent lines. Two padding bytes may be inserted
    84  *   before each translucent line to keep them 32-bit aligned.
    85  *
    86  *   The end of the sequence is marked by a zero <skip>,<run> pair at the
    87  *   beginning of an opaque line.
    88  */
    89 
    90 #include "SDL_video.h"
    91 #include "SDL_sysvideo.h"
    92 #include "SDL_blit.h"
    93 #include "SDL_RLEaccel_c.h"
    94 
    95 #ifndef MIN
    96 #define MIN(a, b) ((a) < (b) ? (a) : (b))
    97 #endif
    98 
    99 #define PIXEL_COPY(to, from, len, bpp)          \
   100     SDL_memcpy(to, from, (size_t)(len) * (bpp))
   101 
   102 /*
   103  * Various colorkey blit methods, for opaque and per-surface alpha
   104  */
   105 
   106 #define OPAQUE_BLIT(to, from, length, bpp, alpha)   \
   107     PIXEL_COPY(to, from, length, bpp)
   108 
   109 /*
   110  * For 32bpp pixels on the form 0x00rrggbb:
   111  * If we treat the middle component separately, we can process the two
   112  * remaining in parallel. This is safe to do because of the gap to the left
   113  * of each component, so the bits from the multiplication don't collide.
   114  * This can be used for any RGB permutation of course.
   115  */
   116 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha)      \
   117     do {                                                    \
   118         int i;                                              \
   119         Uint32 *src = (Uint32 *)(from);                     \
   120         Uint32 *dst = (Uint32 *)(to);                       \
   121         for (i = 0; i < (int)(length); i++) {               \
   122             Uint32 s = *src++;                              \
   123             Uint32 d = *dst;                                \
   124             Uint32 s1 = s & 0xff00ff;                       \
   125             Uint32 d1 = d & 0xff00ff;                       \
   126             d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
   127             s &= 0xff00;                                    \
   128             d &= 0xff00;                                    \
   129             d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
   130             *dst++ = d1 | d;                                \
   131         }                                                   \
   132     } while (0)
   133 
   134 /*
   135  * For 16bpp pixels we can go a step further: put the middle component
   136  * in the high 16 bits of a 32 bit word, and process all three RGB
   137  * components at the same time. Since the smallest gap is here just
   138  * 5 bits, we have to scale alpha down to 5 bits as well.
   139  */
   140 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha)  \
   141     do {                                                \
   142         int i;                                          \
   143         Uint16 *src = (Uint16 *)(from);                 \
   144         Uint16 *dst = (Uint16 *)(to);                   \
   145         Uint32 ALPHA = alpha >> 3;                      \
   146         for(i = 0; i < (int)(length); i++) {            \
   147             Uint32 s = *src++;                          \
   148             Uint32 d = *dst;                            \
   149             s = (s | s << 16) & 0x07e0f81f;             \
   150             d = (d | d << 16) & 0x07e0f81f;             \
   151             d += (s - d) * ALPHA >> 5;                  \
   152             d &= 0x07e0f81f;                            \
   153             *dst++ = (Uint16)(d | d >> 16);             \
   154         }                                               \
   155     } while(0)
   156 
   157 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha)  \
   158     do {                                                \
   159         int i;                                          \
   160         Uint16 *src = (Uint16 *)(from);                 \
   161         Uint16 *dst = (Uint16 *)(to);                   \
   162         Uint32 ALPHA = alpha >> 3;                      \
   163         for(i = 0; i < (int)(length); i++) {            \
   164             Uint32 s = *src++;                          \
   165             Uint32 d = *dst;                            \
   166             s = (s | s << 16) & 0x03e07c1f;             \
   167             d = (d | d << 16) & 0x03e07c1f;             \
   168             d += (s - d) * ALPHA >> 5;                  \
   169             d &= 0x03e07c1f;                            \
   170             *dst++ = (Uint16)(d | d >> 16);             \
   171         }                                               \
   172     } while(0)
   173 
   174 /*
   175  * The general slow catch-all function, for remaining depths and formats
   176  */
   177 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)            \
   178     do {                                                        \
   179         int i;                                                  \
   180         Uint8 *src = from;                                      \
   181         Uint8 *dst = to;                                        \
   182         for (i = 0; i < (int)(length); i++) {                   \
   183             Uint32 s, d;                                        \
   184             unsigned rs, gs, bs, rd, gd, bd;                    \
   185             switch (bpp) {                                      \
   186             case 2:                                             \
   187                 s = *(Uint16 *)src;                             \
   188                 d = *(Uint16 *)dst;                             \
   189                 break;                                          \
   190             case 3:                                             \
   191                 if (SDL_BYTEORDER == SDL_BIG_ENDIAN) {          \
   192                     s = (src[0] << 16) | (src[1] << 8) | src[2]; \
   193                     d = (dst[0] << 16) | (dst[1] << 8) | dst[2]; \
   194                 } else {                                        \
   195                     s = (src[2] << 16) | (src[1] << 8) | src[0]; \
   196                     d = (dst[2] << 16) | (dst[1] << 8) | dst[0]; \
   197                 }                                               \
   198                 break;                                          \
   199             case 4:                                             \
   200                 s = *(Uint32 *)src;                             \
   201                 d = *(Uint32 *)dst;                             \
   202                 break;                                          \
   203             }                                                   \
   204             RGB_FROM_PIXEL(s, fmt, rs, gs, bs);                 \
   205             RGB_FROM_PIXEL(d, fmt, rd, gd, bd);                 \
   206             rd += (rs - rd) * alpha >> 8;                       \
   207             gd += (gs - gd) * alpha >> 8;                       \
   208             bd += (bs - bd) * alpha >> 8;                       \
   209             PIXEL_FROM_RGB(d, fmt, rd, gd, bd);                 \
   210             switch (bpp) {                                      \
   211             case 2:                                             \
   212                 *(Uint16 *)dst = (Uint16)d;                     \
   213                 break;                                          \
   214             case 3:                                             \
   215                 if (SDL_BYTEORDER == SDL_BIG_ENDIAN) {          \
   216                     dst[0] = (Uint8)(d >> 16);                  \
   217                     dst[1] = (Uint8)(d >> 8);                   \
   218                     dst[2] = (Uint8)(d);                        \
   219                 } else {                                        \
   220                     dst[0] = (Uint8)d;                          \
   221                     dst[1] = (Uint8)(d >> 8);                   \
   222                     dst[2] = (Uint8)(d >> 16);                  \
   223                 }                                               \
   224                 break;                                          \
   225             case 4:                                             \
   226                 *(Uint32 *)dst = d;                             \
   227                 break;                                          \
   228             }                                                   \
   229             src += bpp;                                         \
   230             dst += bpp;                                         \
   231         }                                                       \
   232     } while(0)
   233 
   234 /*
   235  * Special case: 50% alpha (alpha=128)
   236  * This is treated specially because it can be optimized very well, and
   237  * since it is good for many cases of semi-translucency.
   238  * The theory is to do all three components at the same time:
   239  * First zero the lowest bit of each component, which gives us room to
   240  * add them. Then shift right and add the sum of the lowest bits.
   241  */
   242 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)       \
   243     do {                                                        \
   244         int i;                                                  \
   245         Uint32 *src = (Uint32 *)(from);                         \
   246         Uint32 *dst = (Uint32 *)(to);                           \
   247         for(i = 0; i < (int)(length); i++) {                    \
   248             Uint32 s = *src++;                                  \
   249             Uint32 d = *dst;                                    \
   250             *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \
   251                  + (s & d & 0x00010101);                        \
   252         }                                                       \
   253     } while(0)
   254 
   255 /*
   256  * For 16bpp, we can actually blend two pixels in parallel, if we take
   257  * care to shift before we add, not after.
   258  */
   259 
   260 /* helper: blend a single 16 bit pixel at 50% */
   261 #define BLEND16_50(dst, src, mask)                              \
   262     do {                                                        \
   263         Uint32 s = *src++;                                      \
   264         Uint32 d = *dst;                                        \
   265         *dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +    \
   266                           (s & d & (~mask & 0xffff)));          \
   267     } while(0)
   268 
   269 /* basic 16bpp blender. mask is the pixels to keep when adding. */
   270 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)     \
   271     do {                                                        \
   272         unsigned n = (length);                                  \
   273         Uint16 *src = (Uint16 *)(from);                         \
   274         Uint16 *dst = (Uint16 *)(to);                           \
   275         if (((uintptr_t)src ^ (uintptr_t)dst) & 3) {            \
   276             /* source and destination not in phase, blit one by one */ \
   277             while (n--)                                         \
   278                 BLEND16_50(dst, src, mask);                     \
   279         } else {                                                \
   280             if ((uintptr_t)src & 3) {                           \
   281                 /* first odd pixel */                           \
   282                 BLEND16_50(dst, src, mask);                     \
   283                 n--;                                            \
   284             }                                                   \
   285             for (; n > 1; n -= 2) {                             \
   286                 Uint32 s = *(Uint32 *)src;                      \
   287                 Uint32 d = *(Uint32 *)dst;                      \
   288                 *(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1) \
   289                     + ((d & (mask | mask << 16)) >> 1)          \
   290                     + (s & d & (~(mask | mask << 16)));         \
   291                 src += 2;                                       \
   292                 dst += 2;                                       \
   293             }                                                   \
   294             if (n)                                              \
   295                 BLEND16_50(dst, src, mask); /* last odd pixel */ \
   296         }                                                       \
   297     } while(0)
   298 
   299 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)       \
   300     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7deU)
   301 
   302 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)       \
   303     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbdeU)
   304 
   305 #define CHOOSE_BLIT(blitter, alpha, fmt)                        \
   306     do {                                                        \
   307         if (alpha == 255) {                                     \
   308             switch (fmt->BytesPerPixel) {                       \
   309             case 1: blitter(1, Uint8, OPAQUE_BLIT); break;      \
   310             case 2: blitter(2, Uint8, OPAQUE_BLIT); break;      \
   311             case 3: blitter(3, Uint8, OPAQUE_BLIT); break;      \
   312             case 4: blitter(4, Uint16, OPAQUE_BLIT); break;     \
   313             }                                                   \
   314         } else {                                                \
   315             switch (fmt->BytesPerPixel) {                       \
   316             case 1:                                             \
   317                 /* No 8bpp alpha blitting */                    \
   318                 break;                                          \
   319                                                                 \
   320             case 2:                                             \
   321                 switch (fmt->Rmask | fmt->Gmask | fmt->Bmask) { \
   322                 case 0xffff:                                    \
   323                     if (fmt->Gmask == 0x07e0                    \
   324                         || fmt->Rmask == 0x07e0                 \
   325                         || fmt->Bmask == 0x07e0) {              \
   326                         if (alpha == 128) {                     \
   327                             blitter(2, Uint8, ALPHA_BLIT16_565_50); \
   328                         } else {                                \
   329                             blitter(2, Uint8, ALPHA_BLIT16_565); \
   330                         }                                       \
   331                     } else                                      \
   332                         goto general16;                         \
   333                     break;                                      \
   334                                                                 \
   335                 case 0x7fff:                                    \
   336                     if (fmt->Gmask == 0x03e0                    \
   337                         || fmt->Rmask == 0x03e0                 \
   338                         || fmt->Bmask == 0x03e0) {              \
   339                         if (alpha == 128) {                     \
   340                             blitter(2, Uint8, ALPHA_BLIT16_555_50); \
   341                         } else {                                \
   342                             blitter(2, Uint8, ALPHA_BLIT16_555); \
   343                         }                                       \
   344                         break;                                  \
   345                     } else                                      \
   346                         goto general16;                         \
   347                     break;                                      \
   348                                                                 \
   349                 default:                                        \
   350     general16:                                                  \
   351                     blitter(2, Uint8, ALPHA_BLIT_ANY);          \
   352                 }                                               \
   353                 break;                                          \
   354                                                                 \
   355             case 3:                                             \
   356                 blitter(3, Uint8, ALPHA_BLIT_ANY);              \
   357                 break;                                          \
   358                                                                 \
   359             case 4:                                             \
   360                 if ((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \
   361                     && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \
   362                     || fmt->Bmask == 0xff00)) {                 \
   363                     if (alpha == 128) {                         \
   364                         blitter(4, Uint16, ALPHA_BLIT32_888_50); \
   365                     } else {                                    \
   366                         blitter(4, Uint16, ALPHA_BLIT32_888);   \
   367                     }                                           \
   368                 } else                                          \
   369                     blitter(4, Uint16, ALPHA_BLIT_ANY);         \
   370                 break;                                          \
   371             }                                                   \
   372         }                                                       \
   373     } while(0)
   374 
   375 /*
   376  * Set a pixel value using the given format, except that the alpha value is
   377  * placed in the top byte. This is the format used for RLE with alpha.
   378  */
   379 #define RLEPIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)                      \
   380 {                                                                       \
   381     Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|                             \
   382         ((g>>fmt->Gloss)<<fmt->Gshift)|                                 \
   383         ((b>>fmt->Bloss)<<fmt->Bshift)|                                 \
   384         (a<<24);                                                        \
   385 }
   386 
   387 /*
   388  * This takes care of the case when the surface is clipped on the left and/or
   389  * right. Top clipping has already been taken care of.
   390  */
   391 static void
   392 RLEClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
   393             Uint8 * dstbuf, SDL_Rect * srcrect, unsigned alpha)
   394 {
   395     SDL_PixelFormat *fmt = surf_dst->format;
   396 
   397 #define RLECLIPBLIT(bpp, Type, do_blit)                         \
   398     do {                                                        \
   399         int linecount = srcrect->h;                             \
   400         int ofs = 0;                                            \
   401         int left = srcrect->x;                                  \
   402         int right = left + srcrect->w;                          \
   403         dstbuf -= left * bpp;                                   \
   404         for (;;) {                                              \
   405             int run;                                            \
   406             ofs += *(Type *)srcbuf;                             \
   407             run = ((Type *)srcbuf)[1];                          \
   408             srcbuf += 2 * sizeof(Type);                         \
   409             if (run) {                                          \
   410                 /* clip to left and right borders */            \
   411                 if (ofs < right) {                              \
   412                     int start = 0;                              \
   413                     int len = run;                              \
   414                     int startcol;                               \
   415                     if (left - ofs > 0) {                       \
   416                         start = left - ofs;                     \
   417                         len -= start;                           \
   418                         if (len <= 0)                           \
   419                             goto nocopy ## bpp ## do_blit;      \
   420                     }                                           \
   421                     startcol = ofs + start;                     \
   422                     if (len > right - startcol)                 \
   423                         len = right - startcol;                 \
   424                     do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
   425                         len, bpp, alpha);                       \
   426                 }                                               \
   427     nocopy ## bpp ## do_blit:                                   \
   428                 srcbuf += run * bpp;                            \
   429                 ofs += run;                                     \
   430             } else if (!ofs)                                    \
   431                 break;                                          \
   432                                                                 \
   433             if (ofs == w) {                                     \
   434                 ofs = 0;                                        \
   435                 dstbuf += surf_dst->pitch;                      \
   436                 if (!--linecount)                               \
   437                     break;                                      \
   438             }                                                   \
   439         }                                                       \
   440     } while(0)
   441 
   442     CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
   443 
   444 #undef RLECLIPBLIT
   445 
   446 }
   447 
   448 
   449 /* blit a colorkeyed RLE surface */
   450 static int SDLCALL
   451 SDL_RLEBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
   452             SDL_Surface * surf_dst, SDL_Rect * dstrect)
   453 {
   454     Uint8 *dstbuf;
   455     Uint8 *srcbuf;
   456     int x, y;
   457     int w = surf_src->w;
   458     unsigned alpha;
   459 
   460     /* Lock the destination if necessary */
   461     if (SDL_MUSTLOCK(surf_dst)) {
   462         if (SDL_LockSurface(surf_dst) < 0) {
   463             return (-1);
   464         }
   465     }
   466 
   467     /* Set up the source and destination pointers */
   468     x = dstrect->x;
   469     y = dstrect->y;
   470     dstbuf = (Uint8 *) surf_dst->pixels
   471         + y * surf_dst->pitch + x * surf_src->format->BytesPerPixel;
   472     srcbuf = (Uint8 *) surf_src->map->data;
   473 
   474     {
   475         /* skip lines at the top if necessary */
   476         int vskip = srcrect->y;
   477         int ofs = 0;
   478         if (vskip) {
   479 
   480 #define RLESKIP(bpp, Type)          \
   481         for(;;) {           \
   482             int run;            \
   483             ofs += *(Type *)srcbuf; \
   484             run = ((Type *)srcbuf)[1];  \
   485             srcbuf += sizeof(Type) * 2; \
   486             if(run) {           \
   487             srcbuf += run * bpp;    \
   488             ofs += run;     \
   489             } else if(!ofs)     \
   490             goto done;      \
   491             if(ofs == w) {      \
   492             ofs = 0;        \
   493             if(!--vskip)        \
   494                 break;      \
   495             }               \
   496         }
   497 
   498             switch (surf_src->format->BytesPerPixel) {
   499             case 1:
   500                 RLESKIP(1, Uint8);
   501                 break;
   502             case 2:
   503                 RLESKIP(2, Uint8);
   504                 break;
   505             case 3:
   506                 RLESKIP(3, Uint8);
   507                 break;
   508             case 4:
   509                 RLESKIP(4, Uint16);
   510                 break;
   511             }
   512 
   513 #undef RLESKIP
   514 
   515         }
   516     }
   517 
   518     alpha = surf_src->map->info.a;
   519     /* if left or right edge clipping needed, call clip blit */
   520     if (srcrect->x || srcrect->w != surf_src->w) {
   521         RLEClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect, alpha);
   522     } else {
   523         SDL_PixelFormat *fmt = surf_src->format;
   524 
   525 #define RLEBLIT(bpp, Type, do_blit)                       \
   526         do {                                  \
   527         int linecount = srcrect->h;                   \
   528         int ofs = 0;                              \
   529         for(;;) {                             \
   530             unsigned run;                         \
   531             ofs += *(Type *)srcbuf;                   \
   532             run = ((Type *)srcbuf)[1];                    \
   533             srcbuf += 2 * sizeof(Type);                   \
   534             if(run) {                             \
   535             do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
   536             srcbuf += run * bpp;                      \
   537             ofs += run;                       \
   538             } else if(!ofs)                       \
   539             break;                            \
   540             if(ofs == w) {                        \
   541             ofs = 0;                          \
   542             dstbuf += surf_dst->pitch;                     \
   543             if(!--linecount)                      \
   544                 break;                        \
   545             }                                 \
   546         }                                 \
   547         } while(0)
   548 
   549         CHOOSE_BLIT(RLEBLIT, alpha, fmt);
   550 
   551 #undef RLEBLIT
   552     }
   553 
   554   done:
   555     /* Unlock the destination if necessary */
   556     if (SDL_MUSTLOCK(surf_dst)) {
   557         SDL_UnlockSurface(surf_dst);
   558     }
   559     return (0);
   560 }
   561 
   562 #undef OPAQUE_BLIT
   563 
   564 /*
   565  * Per-pixel blitting macros for translucent pixels:
   566  * These use the same techniques as the per-surface blitting macros
   567  */
   568 
   569 /*
   570  * For 32bpp pixels, we have made sure the alpha is stored in the top
   571  * 8 bits, so proceed as usual
   572  */
   573 #define BLIT_TRANSL_888(src, dst)               \
   574     do {                            \
   575         Uint32 s = src;                     \
   576     Uint32 d = dst;                     \
   577     unsigned alpha = s >> 24;               \
   578     Uint32 s1 = s & 0xff00ff;               \
   579     Uint32 d1 = d & 0xff00ff;               \
   580     d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;    \
   581     s &= 0xff00;                        \
   582     d &= 0xff00;                        \
   583     d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
   584     dst = d1 | d | 0xff000000;              \
   585     } while(0)
   586 
   587 /*
   588  * For 16bpp pixels, we have stored the 5 most significant alpha bits in
   589  * bits 5-10. As before, we can process all 3 RGB components at the same time.
   590  */
   591 #define BLIT_TRANSL_565(src, dst)       \
   592     do {                    \
   593     Uint32 s = src;             \
   594     Uint32 d = dst;             \
   595     unsigned alpha = (s & 0x3e0) >> 5;  \
   596     s &= 0x07e0f81f;            \
   597     d = (d | d << 16) & 0x07e0f81f;     \
   598     d += (s - d) * alpha >> 5;      \
   599     d &= 0x07e0f81f;            \
   600     dst = (Uint16)(d | d >> 16);            \
   601     } while(0)
   602 
   603 #define BLIT_TRANSL_555(src, dst)       \
   604     do {                    \
   605     Uint32 s = src;             \
   606     Uint32 d = dst;             \
   607     unsigned alpha = (s & 0x3e0) >> 5;  \
   608     s &= 0x03e07c1f;            \
   609     d = (d | d << 16) & 0x03e07c1f;     \
   610     d += (s - d) * alpha >> 5;      \
   611     d &= 0x03e07c1f;            \
   612     dst = (Uint16)(d | d >> 16);            \
   613     } while(0)
   614 
   615 /* used to save the destination format in the encoding. Designed to be
   616    macro-compatible with SDL_PixelFormat but without the unneeded fields */
   617 typedef struct
   618 {
   619     Uint8 BytesPerPixel;
   620     Uint8 padding[3];
   621     Uint32 Rmask;
   622     Uint32 Gmask;
   623     Uint32 Bmask;
   624     Uint32 Amask;
   625     Uint8 Rloss;
   626     Uint8 Gloss;
   627     Uint8 Bloss;
   628     Uint8 Aloss;
   629     Uint8 Rshift;
   630     Uint8 Gshift;
   631     Uint8 Bshift;
   632     Uint8 Ashift;
   633 } RLEDestFormat;
   634 
   635 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
   636 static void
   637 RLEAlphaClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
   638                  Uint8 * dstbuf, SDL_Rect * srcrect)
   639 {
   640     SDL_PixelFormat *df = surf_dst->format;
   641     /*
   642      * clipped blitter: Ptype is the destination pixel type,
   643      * Ctype the translucent count type, and do_blend the macro
   644      * to blend one pixel.
   645      */
   646 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)              \
   647     do {                                  \
   648     int linecount = srcrect->h;                   \
   649     int left = srcrect->x;                        \
   650     int right = left + srcrect->w;                    \
   651     dstbuf -= left * sizeof(Ptype);                   \
   652     do {                                  \
   653         int ofs = 0;                          \
   654         /* blit opaque pixels on one line */              \
   655         do {                              \
   656         unsigned run;                         \
   657         ofs += ((Ctype *)srcbuf)[0];                  \
   658         run = ((Ctype *)srcbuf)[1];               \
   659         srcbuf += 2 * sizeof(Ctype);                  \
   660         if(run) {                         \
   661             /* clip to left and right borders */          \
   662             int cofs = ofs;                   \
   663             int crun = run;                   \
   664             if(left - cofs > 0) {                 \
   665             crun -= left - cofs;                  \
   666             cofs = left;                      \
   667             }                             \
   668             if(crun > right - cofs)               \
   669             crun = right - cofs;                  \
   670             if(crun > 0)                      \
   671             PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),     \
   672                    srcbuf + (cofs - ofs) * sizeof(Ptype), \
   673                    (unsigned)crun, sizeof(Ptype));    \
   674             srcbuf += run * sizeof(Ptype);            \
   675             ofs += run;                       \
   676         } else if(!ofs)                       \
   677             return;                       \
   678         } while(ofs < w);                         \
   679         /* skip padding if necessary */               \
   680         if(sizeof(Ptype) == 2)                    \
   681         srcbuf += (uintptr_t)srcbuf & 2;              \
   682         /* blit translucent pixels on the same line */        \
   683         ofs = 0;                              \
   684         do {                              \
   685         unsigned run;                         \
   686         ofs += ((Uint16 *)srcbuf)[0];                 \
   687         run = ((Uint16 *)srcbuf)[1];                  \
   688         srcbuf += 4;                          \
   689         if(run) {                         \
   690             /* clip to left and right borders */          \
   691             int cofs = ofs;                   \
   692             int crun = run;                   \
   693             if(left - cofs > 0) {                 \
   694             crun -= left - cofs;                  \
   695             cofs = left;                      \
   696             }                             \
   697             if(crun > right - cofs)               \
   698             crun = right - cofs;                  \
   699             if(crun > 0) {                    \
   700             Ptype *dst = (Ptype *)dstbuf + cofs;          \
   701             Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);    \
   702             int i;                        \
   703             for(i = 0; i < crun; i++)             \
   704                 do_blend(src[i], dst[i]);             \
   705             }                             \
   706             srcbuf += run * 4;                    \
   707             ofs += run;                       \
   708         }                             \
   709         } while(ofs < w);                         \
   710         dstbuf += surf_dst->pitch;                     \
   711     } while(--linecount);                         \
   712     } while(0)
   713 
   714     switch (df->BytesPerPixel) {
   715     case 2:
   716         if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0)
   717             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
   718         else
   719             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
   720         break;
   721     case 4:
   722         RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
   723         break;
   724     }
   725 }
   726 
   727 /* blit a pixel-alpha RLE surface */
   728 static int SDLCALL
   729 SDL_RLEAlphaBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
   730                  SDL_Surface * surf_dst, SDL_Rect * dstrect)
   731 {
   732     int x, y;
   733     int w = surf_src->w;
   734     Uint8 *srcbuf, *dstbuf;
   735     SDL_PixelFormat *df = surf_dst->format;
   736 
   737     /* Lock the destination if necessary */
   738     if (SDL_MUSTLOCK(surf_dst)) {
   739         if (SDL_LockSurface(surf_dst) < 0) {
   740             return -1;
   741         }
   742     }
   743 
   744     x = dstrect->x;
   745     y = dstrect->y;
   746     dstbuf = (Uint8 *) surf_dst->pixels + y * surf_dst->pitch + x * df->BytesPerPixel;
   747     srcbuf = (Uint8 *) surf_src->map->data + sizeof(RLEDestFormat);
   748 
   749     {
   750         /* skip lines at the top if necessary */
   751         int vskip = srcrect->y;
   752         if (vskip) {
   753             int ofs;
   754             if (df->BytesPerPixel == 2) {
   755                 /* the 16/32 interleaved format */
   756                 do {
   757                     /* skip opaque line */
   758                     ofs = 0;
   759                     do {
   760                         int run;
   761                         ofs += srcbuf[0];
   762                         run = srcbuf[1];
   763                         srcbuf += 2;
   764                         if (run) {
   765                             srcbuf += 2 * run;
   766                             ofs += run;
   767                         } else if (!ofs)
   768                             goto done;
   769                     } while (ofs < w);
   770 
   771                     /* skip padding */
   772                     srcbuf += (uintptr_t) srcbuf & 2;
   773 
   774                     /* skip translucent line */
   775                     ofs = 0;
   776                     do {
   777                         int run;
   778                         ofs += ((Uint16 *) srcbuf)[0];
   779                         run = ((Uint16 *) srcbuf)[1];
   780                         srcbuf += 4 * (run + 1);
   781                         ofs += run;
   782                     } while (ofs < w);
   783                 } while (--vskip);
   784             } else {
   785                 /* the 32/32 interleaved format */
   786                 vskip <<= 1;    /* opaque and translucent have same format */
   787                 do {
   788                     ofs = 0;
   789                     do {
   790                         int run;
   791                         ofs += ((Uint16 *) srcbuf)[0];
   792                         run = ((Uint16 *) srcbuf)[1];
   793                         srcbuf += 4;
   794                         if (run) {
   795                             srcbuf += 4 * run;
   796                             ofs += run;
   797                         } else if (!ofs)
   798                             goto done;
   799                     } while (ofs < w);
   800                 } while (--vskip);
   801             }
   802         }
   803     }
   804 
   805     /* if left or right edge clipping needed, call clip blit */
   806     if (srcrect->x || srcrect->w != surf_src->w) {
   807         RLEAlphaClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect);
   808     } else {
   809 
   810         /*
   811          * non-clipped blitter. Ptype is the destination pixel type,
   812          * Ctype the translucent count type, and do_blend the
   813          * macro to blend one pixel.
   814          */
   815 #define RLEALPHABLIT(Ptype, Ctype, do_blend)                 \
   816     do {                                 \
   817         int linecount = srcrect->h;                  \
   818         do {                             \
   819         int ofs = 0;                         \
   820         /* blit opaque pixels on one line */             \
   821         do {                             \
   822             unsigned run;                    \
   823             ofs += ((Ctype *)srcbuf)[0];             \
   824             run = ((Ctype *)srcbuf)[1];              \
   825             srcbuf += 2 * sizeof(Ctype);             \
   826             if(run) {                        \
   827             PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
   828                    run, sizeof(Ptype));          \
   829             srcbuf += run * sizeof(Ptype);           \
   830             ofs += run;                  \
   831             } else if(!ofs)                  \
   832             goto done;                   \
   833         } while(ofs < w);                    \
   834         /* skip padding if necessary */              \
   835         if(sizeof(Ptype) == 2)                   \
   836             srcbuf += (uintptr_t)srcbuf & 2;             \
   837         /* blit translucent pixels on the same line */       \
   838         ofs = 0;                         \
   839         do {                             \
   840             unsigned run;                    \
   841             ofs += ((Uint16 *)srcbuf)[0];            \
   842             run = ((Uint16 *)srcbuf)[1];             \
   843             srcbuf += 4;                     \
   844             if(run) {                        \
   845             Ptype *dst = (Ptype *)dstbuf + ofs;      \
   846             unsigned i;                  \
   847             for(i = 0; i < run; i++) {           \
   848                 Uint32 src = *(Uint32 *)srcbuf;      \
   849                 do_blend(src, *dst);             \
   850                 srcbuf += 4;                 \
   851                 dst++;                   \
   852             }                        \
   853             ofs += run;                  \
   854             }                            \
   855         } while(ofs < w);                    \
   856         dstbuf += surf_dst->pitch;                    \
   857         } while(--linecount);                    \
   858     } while(0)
   859 
   860         switch (df->BytesPerPixel) {
   861         case 2:
   862             if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0
   863                 || df->Bmask == 0x07e0)
   864                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
   865             else
   866                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
   867             break;
   868         case 4:
   869             RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
   870             break;
   871         }
   872     }
   873 
   874   done:
   875     /* Unlock the destination if necessary */
   876     if (SDL_MUSTLOCK(surf_dst)) {
   877         SDL_UnlockSurface(surf_dst);
   878     }
   879     return 0;
   880 }
   881 
   882 /*
   883  * Auxiliary functions:
   884  * The encoding functions take 32bpp rgb + a, and
   885  * return the number of bytes copied to the destination.
   886  * The decoding functions copy to 32bpp rgb + a, and
   887  * return the number of bytes copied from the source.
   888  * These are only used in the encoder and un-RLE code and are therefore not
   889  * highly optimised.
   890  */
   891 
   892 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
   893 static int
   894 copy_opaque_16(void *dst, Uint32 * src, int n,
   895                SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   896 {
   897     int i;
   898     Uint16 *d = dst;
   899     for (i = 0; i < n; i++) {
   900         unsigned r, g, b;
   901         RGB_FROM_PIXEL(*src, sfmt, r, g, b);
   902         PIXEL_FROM_RGB(*d, dfmt, r, g, b);
   903         src++;
   904         d++;
   905     }
   906     return n * 2;
   907 }
   908 
   909 /* decode opaque pixels from 16bpp to 32bpp rgb + a */
   910 static int
   911 uncopy_opaque_16(Uint32 * dst, void *src, int n,
   912                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   913 {
   914     int i;
   915     Uint16 *s = src;
   916     unsigned alpha = dfmt->Amask ? 255 : 0;
   917     for (i = 0; i < n; i++) {
   918         unsigned r, g, b;
   919         RGB_FROM_PIXEL(*s, sfmt, r, g, b);
   920         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
   921         s++;
   922         dst++;
   923     }
   924     return n * 2;
   925 }
   926 
   927 
   928 
   929 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
   930 static int
   931 copy_transl_565(void *dst, Uint32 * src, int n,
   932                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   933 {
   934     int i;
   935     Uint32 *d = dst;
   936     for (i = 0; i < n; i++) {
   937         unsigned r, g, b, a;
   938         Uint16 pix;
   939         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   940         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   941         *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
   942         src++;
   943         d++;
   944     }
   945     return n * 4;
   946 }
   947 
   948 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
   949 static int
   950 copy_transl_555(void *dst, Uint32 * src, int n,
   951                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   952 {
   953     int i;
   954     Uint32 *d = dst;
   955     for (i = 0; i < n; i++) {
   956         unsigned r, g, b, a;
   957         Uint16 pix;
   958         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   959         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   960         *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
   961         src++;
   962         d++;
   963     }
   964     return n * 4;
   965 }
   966 
   967 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
   968 static int
   969 uncopy_transl_16(Uint32 * dst, void *src, int n,
   970                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   971 {
   972     int i;
   973     Uint32 *s = src;
   974     for (i = 0; i < n; i++) {
   975         unsigned r, g, b, a;
   976         Uint32 pix = *s++;
   977         a = (pix & 0x3e0) >> 2;
   978         pix = (pix & ~0x3e0) | pix >> 16;
   979         RGB_FROM_PIXEL(pix, sfmt, r, g, b);
   980         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
   981         dst++;
   982     }
   983     return n * 4;
   984 }
   985 
   986 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   987 static int
   988 copy_32(void *dst, Uint32 * src, int n,
   989         SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   990 {
   991     int i;
   992     Uint32 *d = dst;
   993     for (i = 0; i < n; i++) {
   994         unsigned r, g, b, a;
   995         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   996         RLEPIXEL_FROM_RGBA(*d, dfmt, r, g, b, a);
   997         d++;
   998         src++;
   999     }
  1000     return n * 4;
  1001 }
  1002 
  1003 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
  1004 static int
  1005 uncopy_32(Uint32 * dst, void *src, int n,
  1006           RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
  1007 {
  1008     int i;
  1009     Uint32 *s = src;
  1010     for (i = 0; i < n; i++) {
  1011         unsigned r, g, b, a;
  1012         Uint32 pixel = *s++;
  1013         RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
  1014         a = pixel >> 24;
  1015         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
  1016         dst++;
  1017     }
  1018     return n * 4;
  1019 }
  1020 
  1021 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
  1022 
  1023 #define ISTRANSL(pixel, fmt)    \
  1024     ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
  1025 
  1026 /* convert surface to be quickly alpha-blittable onto dest, if possible */
  1027 static int
  1028 RLEAlphaSurface(SDL_Surface * surface)
  1029 {
  1030     SDL_Surface *dest;
  1031     SDL_PixelFormat *df;
  1032     int maxsize = 0;
  1033     int max_opaque_run;
  1034     int max_transl_run = 65535;
  1035     unsigned masksum;
  1036     Uint8 *rlebuf, *dst;
  1037     int (*copy_opaque) (void *, Uint32 *, int,
  1038                         SDL_PixelFormat *, SDL_PixelFormat *);
  1039     int (*copy_transl) (void *, Uint32 *, int,
  1040                         SDL_PixelFormat *, SDL_PixelFormat *);
  1041 
  1042     dest = surface->map->dst;
  1043     if (!dest)
  1044         return -1;
  1045     df = dest->format;
  1046     if (surface->format->BitsPerPixel != 32)
  1047         return -1;              /* only 32bpp source supported */
  1048 
  1049     /* find out whether the destination is one we support,
  1050        and determine the max size of the encoded result */
  1051     masksum = df->Rmask | df->Gmask | df->Bmask;
  1052     switch (df->BytesPerPixel) {
  1053     case 2:
  1054         /* 16bpp: only support 565 and 555 formats */
  1055         switch (masksum) {
  1056         case 0xffff:
  1057             if (df->Gmask == 0x07e0
  1058                 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
  1059                 copy_opaque = copy_opaque_16;
  1060                 copy_transl = copy_transl_565;
  1061             } else
  1062                 return -1;
  1063             break;
  1064         case 0x7fff:
  1065             if (df->Gmask == 0x03e0
  1066                 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
  1067                 copy_opaque = copy_opaque_16;
  1068                 copy_transl = copy_transl_555;
  1069             } else
  1070                 return -1;
  1071             break;
  1072         default:
  1073             return -1;
  1074         }
  1075         max_opaque_run = 255;   /* runs stored as bytes */
  1076 
  1077         /* worst case is alternating opaque and translucent pixels,
  1078            with room for alignment padding between lines */
  1079         maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
  1080         break;
  1081     case 4:
  1082         if (masksum != 0x00ffffff)
  1083             return -1;          /* requires unused high byte */
  1084         copy_opaque = copy_32;
  1085         copy_transl = copy_32;
  1086         max_opaque_run = 255;   /* runs stored as short ints */
  1087 
  1088         /* worst case is alternating opaque and translucent pixels */
  1089         maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
  1090         break;
  1091     default:
  1092         return -1;              /* anything else unsupported right now */
  1093     }
  1094 
  1095     maxsize += sizeof(RLEDestFormat);
  1096     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1097     if (!rlebuf) {
  1098         return SDL_OutOfMemory();
  1099     }
  1100     {
  1101         /* save the destination format so we can undo the encoding later */
  1102         RLEDestFormat *r = (RLEDestFormat *) rlebuf;
  1103         r->BytesPerPixel = df->BytesPerPixel;
  1104         r->Rmask = df->Rmask;
  1105         r->Gmask = df->Gmask;
  1106         r->Bmask = df->Bmask;
  1107         r->Amask = df->Amask;
  1108         r->Rloss = df->Rloss;
  1109         r->Gloss = df->Gloss;
  1110         r->Bloss = df->Bloss;
  1111         r->Aloss = df->Aloss;
  1112         r->Rshift = df->Rshift;
  1113         r->Gshift = df->Gshift;
  1114         r->Bshift = df->Bshift;
  1115         r->Ashift = df->Ashift;
  1116     }
  1117     dst = rlebuf + sizeof(RLEDestFormat);
  1118 
  1119     /* Do the actual encoding */
  1120     {
  1121         int x, y;
  1122         int h = surface->h, w = surface->w;
  1123         SDL_PixelFormat *sf = surface->format;
  1124         Uint32 *src = (Uint32 *) surface->pixels;
  1125         Uint8 *lastline = dst;  /* end of last non-blank line */
  1126 
  1127         /* opaque counts are 8 or 16 bits, depending on target depth */
  1128 #define ADD_OPAQUE_COUNTS(n, m)         \
  1129     if(df->BytesPerPixel == 4) {        \
  1130         ((Uint16 *)dst)[0] = n;     \
  1131         ((Uint16 *)dst)[1] = m;     \
  1132         dst += 4;               \
  1133     } else {                \
  1134         dst[0] = n;             \
  1135         dst[1] = m;             \
  1136         dst += 2;               \
  1137     }
  1138 
  1139         /* translucent counts are always 16 bit */
  1140 #define ADD_TRANSL_COUNTS(n, m)     \
  1141     (((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
  1142 
  1143         for (y = 0; y < h; y++) {
  1144             int runstart, skipstart;
  1145             int blankline = 0;
  1146             /* First encode all opaque pixels of a scan line */
  1147             x = 0;
  1148             do {
  1149                 int run, skip, len;
  1150                 skipstart = x;
  1151                 while (x < w && !ISOPAQUE(src[x], sf))
  1152                     x++;
  1153                 runstart = x;
  1154                 while (x < w && ISOPAQUE(src[x], sf))
  1155                     x++;
  1156                 skip = runstart - skipstart;
  1157                 if (skip == w)
  1158                     blankline = 1;
  1159                 run = x - runstart;
  1160                 while (skip > max_opaque_run) {
  1161                     ADD_OPAQUE_COUNTS(max_opaque_run, 0);
  1162                     skip -= max_opaque_run;
  1163                 }
  1164                 len = MIN(run, max_opaque_run);
  1165                 ADD_OPAQUE_COUNTS(skip, len);
  1166                 dst += copy_opaque(dst, src + runstart, len, sf, df);
  1167                 runstart += len;
  1168                 run -= len;
  1169                 while (run) {
  1170                     len = MIN(run, max_opaque_run);
  1171                     ADD_OPAQUE_COUNTS(0, len);
  1172                     dst += copy_opaque(dst, src + runstart, len, sf, df);
  1173                     runstart += len;
  1174                     run -= len;
  1175                 }
  1176             } while (x < w);
  1177 
  1178             /* Make sure the next output address is 32-bit aligned */
  1179             dst += (uintptr_t) dst & 2;
  1180 
  1181             /* Next, encode all translucent pixels of the same scan line */
  1182             x = 0;
  1183             do {
  1184                 int run, skip, len;
  1185                 skipstart = x;
  1186                 while (x < w && !ISTRANSL(src[x], sf))
  1187                     x++;
  1188                 runstart = x;
  1189                 while (x < w && ISTRANSL(src[x], sf))
  1190                     x++;
  1191                 skip = runstart - skipstart;
  1192                 blankline &= (skip == w);
  1193                 run = x - runstart;
  1194                 while (skip > max_transl_run) {
  1195                     ADD_TRANSL_COUNTS(max_transl_run, 0);
  1196                     skip -= max_transl_run;
  1197                 }
  1198                 len = MIN(run, max_transl_run);
  1199                 ADD_TRANSL_COUNTS(skip, len);
  1200                 dst += copy_transl(dst, src + runstart, len, sf, df);
  1201                 runstart += len;
  1202                 run -= len;
  1203                 while (run) {
  1204                     len = MIN(run, max_transl_run);
  1205                     ADD_TRANSL_COUNTS(0, len);
  1206                     dst += copy_transl(dst, src + runstart, len, sf, df);
  1207                     runstart += len;
  1208                     run -= len;
  1209                 }
  1210                 if (!blankline)
  1211                     lastline = dst;
  1212             } while (x < w);
  1213 
  1214             src += surface->pitch >> 2;
  1215         }
  1216         dst = lastline;         /* back up past trailing blank lines */
  1217         ADD_OPAQUE_COUNTS(0, 0);
  1218     }
  1219 
  1220 #undef ADD_OPAQUE_COUNTS
  1221 #undef ADD_TRANSL_COUNTS
  1222 
  1223     /* Now that we have it encoded, release the original pixels */
  1224     if (!(surface->flags & SDL_PREALLOC)) {
  1225         SDL_SIMDFree(surface->pixels);
  1226         surface->pixels = NULL;
  1227         surface->flags &= ~SDL_SIMD_ALIGNED;
  1228     }
  1229 
  1230     /* realloc the buffer to release unused memory */
  1231     {
  1232         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1233         if (!p)
  1234             p = rlebuf;
  1235         surface->map->data = p;
  1236     }
  1237 
  1238     return 0;
  1239 }
  1240 
  1241 static Uint32
  1242 getpix_8(const Uint8 * srcbuf)
  1243 {
  1244     return *srcbuf;
  1245 }
  1246 
  1247 static Uint32
  1248 getpix_16(const Uint8 * srcbuf)
  1249 {
  1250     return *(const Uint16 *) srcbuf;
  1251 }
  1252 
  1253 static Uint32
  1254 getpix_24(const Uint8 * srcbuf)
  1255 {
  1256 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  1257     return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
  1258 #else
  1259     return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
  1260 #endif
  1261 }
  1262 
  1263 static Uint32
  1264 getpix_32(const Uint8 * srcbuf)
  1265 {
  1266     return *(const Uint32 *) srcbuf;
  1267 }
  1268 
  1269 typedef Uint32(*getpix_func) (const Uint8 *);
  1270 
  1271 static const getpix_func getpixes[4] = {
  1272     getpix_8, getpix_16, getpix_24, getpix_32
  1273 };
  1274 
  1275 static int
  1276 RLEColorkeySurface(SDL_Surface * surface)
  1277 {
  1278     Uint8 *rlebuf, *dst;
  1279     int maxn;
  1280     int y;
  1281     Uint8 *srcbuf, *lastline;
  1282     int maxsize = 0;
  1283     const int bpp = surface->format->BytesPerPixel;
  1284     getpix_func getpix;
  1285     Uint32 ckey, rgbmask;
  1286     int w, h;
  1287 
  1288     /* calculate the worst case size for the compressed surface */
  1289     switch (bpp) {
  1290     case 1:
  1291         /* worst case is alternating opaque and transparent pixels,
  1292            starting with an opaque pixel */
  1293         maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
  1294         break;
  1295     case 2:
  1296     case 3:
  1297         /* worst case is solid runs, at most 255 pixels wide */
  1298         maxsize = surface->h * (2 * (surface->w / 255 + 1)
  1299                                 + surface->w * bpp) + 2;
  1300         break;
  1301     case 4:
  1302         /* worst case is solid runs, at most 65535 pixels wide */
  1303         maxsize = surface->h * (4 * (surface->w / 65535 + 1)
  1304                                 + surface->w * 4) + 4;
  1305         break;
  1306 
  1307     default:
  1308         return -1;
  1309     }
  1310 
  1311     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1312     if (rlebuf == NULL) {
  1313         return SDL_OutOfMemory();
  1314     }
  1315 
  1316     /* Set up the conversion */
  1317     srcbuf = (Uint8 *) surface->pixels;
  1318     maxn = bpp == 4 ? 65535 : 255;
  1319     dst = rlebuf;
  1320     rgbmask = ~surface->format->Amask;
  1321     ckey = surface->map->info.colorkey & rgbmask;
  1322     lastline = dst;
  1323     getpix = getpixes[bpp - 1];
  1324     w = surface->w;
  1325     h = surface->h;
  1326 
  1327 #define ADD_COUNTS(n, m)            \
  1328     if(bpp == 4) {              \
  1329         ((Uint16 *)dst)[0] = n;     \
  1330         ((Uint16 *)dst)[1] = m;     \
  1331         dst += 4;               \
  1332     } else {                \
  1333         dst[0] = n;             \
  1334         dst[1] = m;             \
  1335         dst += 2;               \
  1336     }
  1337 
  1338     for (y = 0; y < h; y++) {
  1339         int x = 0;
  1340         int blankline = 0;
  1341         do {
  1342             int run, skip, len;
  1343             int runstart;
  1344             int skipstart = x;
  1345 
  1346             /* find run of transparent, then opaque pixels */
  1347             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
  1348                 x++;
  1349             runstart = x;
  1350             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
  1351                 x++;
  1352             skip = runstart - skipstart;
  1353             if (skip == w)
  1354                 blankline = 1;
  1355             run = x - runstart;
  1356 
  1357             /* encode segment */
  1358             while (skip > maxn) {
  1359                 ADD_COUNTS(maxn, 0);
  1360                 skip -= maxn;
  1361             }
  1362             len = MIN(run, maxn);
  1363             ADD_COUNTS(skip, len);
  1364             SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1365             dst += len * bpp;
  1366             run -= len;
  1367             runstart += len;
  1368             while (run) {
  1369                 len = MIN(run, maxn);
  1370                 ADD_COUNTS(0, len);
  1371                 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1372                 dst += len * bpp;
  1373                 runstart += len;
  1374                 run -= len;
  1375             }
  1376             if (!blankline)
  1377                 lastline = dst;
  1378         } while (x < w);
  1379 
  1380         srcbuf += surface->pitch;
  1381     }
  1382     dst = lastline;             /* back up bast trailing blank lines */
  1383     ADD_COUNTS(0, 0);
  1384 
  1385 #undef ADD_COUNTS
  1386 
  1387     /* Now that we have it encoded, release the original pixels */
  1388     if (!(surface->flags & SDL_PREALLOC)) {
  1389         SDL_SIMDFree(surface->pixels);
  1390         surface->pixels = NULL;
  1391         surface->flags &= ~SDL_SIMD_ALIGNED;
  1392     }
  1393 
  1394     /* realloc the buffer to release unused memory */
  1395     {
  1396         /* If realloc returns NULL, the original block is left intact */
  1397         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1398         if (!p)
  1399             p = rlebuf;
  1400         surface->map->data = p;
  1401     }
  1402 
  1403     return 0;
  1404 }
  1405 
  1406 int
  1407 SDL_RLESurface(SDL_Surface * surface)
  1408 {
  1409     int flags;
  1410 
  1411     /* Clear any previous RLE conversion */
  1412     if ((surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL) {
  1413         SDL_UnRLESurface(surface, 1);
  1414     }
  1415 
  1416     /* We don't support RLE encoding of bitmaps */
  1417     if (surface->format->BitsPerPixel < 8) {
  1418         return -1;
  1419     }
  1420 
  1421     /* Make sure the pixels are available */
  1422     if (!surface->pixels) {
  1423         return -1;
  1424     }
  1425 
  1426     /* If we don't have colorkey or blending, nothing to do... */
  1427     flags = surface->map->info.flags;
  1428     if (!(flags & (SDL_COPY_COLORKEY | SDL_COPY_BLEND))) {
  1429         return -1;
  1430     }
  1431 
  1432     /* Pass on combinations not supported */
  1433     if ((flags & SDL_COPY_MODULATE_COLOR) ||
  1434         ((flags & SDL_COPY_MODULATE_ALPHA) && surface->format->Amask) ||
  1435         (flags & (SDL_COPY_ADD | SDL_COPY_MOD | SDL_COPY_MUL)) ||
  1436         (flags & SDL_COPY_NEAREST)) {
  1437         return -1;
  1438     }
  1439 
  1440     /* Encode and set up the blit */
  1441     if (!surface->format->Amask || !(flags & SDL_COPY_BLEND)) {
  1442         if (!surface->map->identity) {
  1443             return -1;
  1444         }
  1445         if (RLEColorkeySurface(surface) < 0) {
  1446             return -1;
  1447         }
  1448         surface->map->blit = SDL_RLEBlit;
  1449         surface->map->info.flags |= SDL_COPY_RLE_COLORKEY;
  1450     } else {
  1451         if (RLEAlphaSurface(surface) < 0) {
  1452             return -1;
  1453         }
  1454         surface->map->blit = SDL_RLEAlphaBlit;
  1455         surface->map->info.flags |= SDL_COPY_RLE_ALPHAKEY;
  1456     }
  1457 
  1458     /* The surface is now accelerated */
  1459     surface->flags |= SDL_RLEACCEL;
  1460 
  1461     return (0);
  1462 }
  1463 
  1464 /*
  1465  * Un-RLE a surface with pixel alpha
  1466  * This may not give back exactly the image before RLE-encoding; all
  1467  * completely transparent pixels will be lost, and color and alpha depth
  1468  * may have been reduced (when encoding for 16bpp targets).
  1469  */
  1470 static SDL_bool
  1471 UnRLEAlpha(SDL_Surface * surface)
  1472 {
  1473     Uint8 *srcbuf;
  1474     Uint32 *dst;
  1475     SDL_PixelFormat *sf = surface->format;
  1476     RLEDestFormat *df = surface->map->data;
  1477     int (*uncopy_opaque) (Uint32 *, void *, int,
  1478                           RLEDestFormat *, SDL_PixelFormat *);
  1479     int (*uncopy_transl) (Uint32 *, void *, int,
  1480                           RLEDestFormat *, SDL_PixelFormat *);
  1481     int w = surface->w;
  1482     int bpp = df->BytesPerPixel;
  1483 
  1484     if (bpp == 2) {
  1485         uncopy_opaque = uncopy_opaque_16;
  1486         uncopy_transl = uncopy_transl_16;
  1487     } else {
  1488         uncopy_opaque = uncopy_transl = uncopy_32;
  1489     }
  1490 
  1491     surface->pixels = SDL_SIMDAlloc(surface->h * surface->pitch);
  1492     if (!surface->pixels) {
  1493         return (SDL_FALSE);
  1494     }
  1495     surface->flags |= SDL_SIMD_ALIGNED;
  1496     /* fill background with transparent pixels */
  1497     SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
  1498 
  1499     dst = surface->pixels;
  1500     srcbuf = (Uint8 *) (df + 1);
  1501     for (;;) {
  1502         /* copy opaque pixels */
  1503         int ofs = 0;
  1504         do {
  1505             unsigned run;
  1506             if (bpp == 2) {
  1507                 ofs += srcbuf[0];
  1508                 run = srcbuf[1];
  1509                 srcbuf += 2;
  1510             } else {
  1511                 ofs += ((Uint16 *) srcbuf)[0];
  1512                 run = ((Uint16 *) srcbuf)[1];
  1513                 srcbuf += 4;
  1514             }
  1515             if (run) {
  1516                 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
  1517                 ofs += run;
  1518             } else if (!ofs) {
  1519                 goto end_function;
  1520             }
  1521         } while (ofs < w);
  1522 
  1523         /* skip padding if needed */
  1524         if (bpp == 2)
  1525             srcbuf += (uintptr_t) srcbuf & 2;
  1526 
  1527         /* copy translucent pixels */
  1528         ofs = 0;
  1529         do {
  1530             unsigned run;
  1531             ofs += ((Uint16 *) srcbuf)[0];
  1532             run = ((Uint16 *) srcbuf)[1];
  1533             srcbuf += 4;
  1534             if (run) {
  1535                 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
  1536                 ofs += run;
  1537             }
  1538         } while (ofs < w);
  1539         dst += surface->pitch >> 2;
  1540     }
  1541 
  1542 end_function:
  1543     return (SDL_TRUE);
  1544 }
  1545 
  1546 void
  1547 SDL_UnRLESurface(SDL_Surface * surface, int recode)
  1548 {
  1549     if (surface->flags & SDL_RLEACCEL) {
  1550         surface->flags &= ~SDL_RLEACCEL;
  1551 
  1552         if (recode && !(surface->flags & SDL_PREALLOC)) {
  1553             if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
  1554                 SDL_Rect full;
  1555 
  1556                 /* re-create the original surface */
  1557                 surface->pixels = SDL_SIMDAlloc(surface->h * surface->pitch);
  1558                 if (!surface->pixels) {
  1559                     /* Oh crap... */
  1560                     surface->flags |= SDL_RLEACCEL;
  1561                     return;
  1562                 }
  1563                 surface->flags |= SDL_SIMD_ALIGNED;
  1564 
  1565                 /* fill it with the background color */
  1566                 SDL_FillRect(surface, NULL, surface->map->info.colorkey);
  1567 
  1568                 /* now render the encoded surface */
  1569                 full.x = full.y = 0;
  1570                 full.w = surface->w;
  1571                 full.h = surface->h;
  1572                 SDL_RLEBlit(surface, &full, surface, &full);
  1573             } else {
  1574                 if (!UnRLEAlpha(surface)) {
  1575                     /* Oh crap... */
  1576                     surface->flags |= SDL_RLEACCEL;
  1577                     return;
  1578                 }
  1579             }
  1580         }
  1581         surface->map->info.flags &=
  1582             ~(SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY);
  1583 
  1584         SDL_free(surface->map->data);
  1585         surface->map->data = NULL;
  1586     }
  1587 }
  1588 
  1589 #endif /* SDL_HAVE_RLE */
  1590 
  1591 /* vi: set ts=4 sw=4 expandtab: */