src/video/SDL_RLEaccel.c
author Sam Lantinga <slouken@libsdl.org>
Sat, 18 May 2013 14:17:52 -0700
changeset 7191 75360622e65f
parent 7128 e10c5a9cc858
child 7351 668a3dc28361
permissions -rw-r--r--
File style cleanup for the SDL 2.0 release
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2013 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 /*
    24  * RLE encoding for software colorkey and alpha-channel acceleration
    25  *
    26  * Original version by Sam Lantinga
    27  *
    28  * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and
    29  * decoder. Added per-surface alpha blitter. Added per-pixel alpha
    30  * format, encoder and blitter.
    31  *
    32  * Many thanks to Xark and johns for hints, benchmarks and useful comments
    33  * leading to this code.
    34  *
    35  * Welcome to Macro Mayhem.
    36  */
    37 
    38 /*
    39  * The encoding translates the image data to a stream of segments of the form
    40  *
    41  * <skip> <run> <data>
    42  *
    43  * where <skip> is the number of transparent pixels to skip,
    44  *       <run>  is the number of opaque pixels to blit,
    45  * and   <data> are the pixels themselves.
    46  *
    47  * This basic structure is used both for colorkeyed surfaces, used for simple
    48  * binary transparency and for per-surface alpha blending, and for surfaces
    49  * with per-pixel alpha. The details differ, however:
    50  *
    51  * Encoding of colorkeyed surfaces:
    52  *
    53  *   Encoded pixels always have the same format as the target surface.
    54  *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
    55  *   where they are 16 bit. This makes the pixel data aligned at all times.
    56  *   Segments never wrap around from one scan line to the next.
    57  *
    58  *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
    59  *   beginning of a line.
    60  *
    61  * Encoding of surfaces with per-pixel alpha:
    62  *
    63  *   The sequence begins with a struct RLEDestFormat describing the target
    64  *   pixel format, to provide reliable un-encoding.
    65  *
    66  *   Each scan line is encoded twice: First all completely opaque pixels,
    67  *   encoded in the target format as described above, and then all
    68  *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
    69  *   in the following 32-bit format:
    70  *
    71  *   For 32-bit targets, each pixel has the target RGB format but with
    72  *   the alpha value occupying the highest 8 bits. The <skip> and <run>
    73  *   counts are 16 bit.
    74  *
    75  *   For 16-bit targets, each pixel has the target RGB format, but with
    76  *   the middle component (usually green) shifted 16 steps to the left,
    77  *   and the hole filled with the 5 most significant bits of the alpha value.
    78  *   i.e. if the target has the format         rrrrrggggggbbbbb,
    79  *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
    80  *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
    81  *   for the translucent lines. Two padding bytes may be inserted
    82  *   before each translucent line to keep them 32-bit aligned.
    83  *
    84  *   The end of the sequence is marked by a zero <skip>,<run> pair at the
    85  *   beginning of an opaque line.
    86  */
    87 
    88 #include "SDL_video.h"
    89 #include "SDL_sysvideo.h"
    90 #include "SDL_blit.h"
    91 #include "SDL_RLEaccel_c.h"
    92 
    93 #ifndef MAX
    94 #define MAX(a, b) ((a) > (b) ? (a) : (b))
    95 #endif
    96 #ifndef MIN
    97 #define MIN(a, b) ((a) < (b) ? (a) : (b))
    98 #endif
    99 
   100 #define PIXEL_COPY(to, from, len, bpp)          \
   101 do {                            \
   102     if(bpp == 4) {                  \
   103     SDL_memcpy4(to, from, (size_t)(len));       \
   104     } else {                        \
   105     SDL_memcpy(to, from, (size_t)(len) * (bpp));    \
   106     }                           \
   107 } while(0)
   108 
   109 /*
   110  * Various colorkey blit methods, for opaque and per-surface alpha
   111  */
   112 
   113 #define OPAQUE_BLIT(to, from, length, bpp, alpha)   \
   114     PIXEL_COPY(to, from, length, bpp)
   115 
   116 /*
   117  * For 32bpp pixels on the form 0x00rrggbb:
   118  * If we treat the middle component separately, we can process the two
   119  * remaining in parallel. This is safe to do because of the gap to the left
   120  * of each component, so the bits from the multiplication don't collide.
   121  * This can be used for any RGB permutation of course.
   122  */
   123 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha)      \
   124     do {                            \
   125         int i;                          \
   126     Uint32 *src = (Uint32 *)(from);             \
   127     Uint32 *dst = (Uint32 *)(to);               \
   128     for(i = 0; i < (int)(length); i++) {            \
   129         Uint32 s = *src++;                  \
   130         Uint32 d = *dst;                    \
   131         Uint32 s1 = s & 0xff00ff;               \
   132         Uint32 d1 = d & 0xff00ff;               \
   133         d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;    \
   134         s &= 0xff00;                    \
   135         d &= 0xff00;                    \
   136         d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
   137         *dst++ = d1 | d;                    \
   138     }                           \
   139     } while(0)
   140 
   141 /*
   142  * For 16bpp pixels we can go a step further: put the middle component
   143  * in the high 16 bits of a 32 bit word, and process all three RGB
   144  * components at the same time. Since the smallest gap is here just
   145  * 5 bits, we have to scale alpha down to 5 bits as well.
   146  */
   147 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha)  \
   148     do {                        \
   149         int i;                      \
   150     Uint16 *src = (Uint16 *)(from);         \
   151     Uint16 *dst = (Uint16 *)(to);           \
   152     Uint32 ALPHA = alpha >> 3;          \
   153     for(i = 0; i < (int)(length); i++) {        \
   154         Uint32 s = *src++;              \
   155         Uint32 d = *dst;                \
   156         s = (s | s << 16) & 0x07e0f81f;     \
   157         d = (d | d << 16) & 0x07e0f81f;     \
   158         d += (s - d) * ALPHA >> 5;          \
   159         d &= 0x07e0f81f;                \
   160         *dst++ = (Uint16)(d | d >> 16);         \
   161     }                       \
   162     } while(0)
   163 
   164 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha)  \
   165     do {                        \
   166         int i;                      \
   167     Uint16 *src = (Uint16 *)(from);         \
   168     Uint16 *dst = (Uint16 *)(to);           \
   169     Uint32 ALPHA = alpha >> 3;          \
   170     for(i = 0; i < (int)(length); i++) {        \
   171         Uint32 s = *src++;              \
   172         Uint32 d = *dst;                \
   173         s = (s | s << 16) & 0x03e07c1f;     \
   174         d = (d | d << 16) & 0x03e07c1f;     \
   175         d += (s - d) * ALPHA >> 5;          \
   176         d &= 0x03e07c1f;                \
   177         *dst++ = (Uint16)(d | d >> 16);         \
   178     }                       \
   179     } while(0)
   180 
   181 /*
   182  * The general slow catch-all function, for remaining depths and formats
   183  */
   184 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)            \
   185     do {                                \
   186         int i;                              \
   187     Uint8 *src = from;                      \
   188     Uint8 *dst = to;                        \
   189     for(i = 0; i < (int)(length); i++) {                \
   190         Uint32 s, d;                        \
   191         unsigned rs, gs, bs, rd, gd, bd;                \
   192         switch(bpp) {                       \
   193         case 2:                         \
   194         s = *(Uint16 *)src;                 \
   195         d = *(Uint16 *)dst;                 \
   196         break;                          \
   197         case 3:                         \
   198         if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {           \
   199             s = (src[0] << 16) | (src[1] << 8) | src[2];    \
   200             d = (dst[0] << 16) | (dst[1] << 8) | dst[2];    \
   201         } else {                        \
   202             s = (src[2] << 16) | (src[1] << 8) | src[0];    \
   203             d = (dst[2] << 16) | (dst[1] << 8) | dst[0];    \
   204         }                           \
   205         break;                          \
   206         case 4:                         \
   207         s = *(Uint32 *)src;                 \
   208         d = *(Uint32 *)dst;                 \
   209         break;                          \
   210         }                               \
   211         RGB_FROM_PIXEL(s, fmt, rs, gs, bs);             \
   212         RGB_FROM_PIXEL(d, fmt, rd, gd, bd);             \
   213         rd += (rs - rd) * alpha >> 8;               \
   214         gd += (gs - gd) * alpha >> 8;               \
   215         bd += (bs - bd) * alpha >> 8;               \
   216         PIXEL_FROM_RGB(d, fmt, rd, gd, bd);             \
   217         switch(bpp) {                       \
   218         case 2:                         \
   219         *(Uint16 *)dst = (Uint16)d;                 \
   220         break;                          \
   221         case 3:                         \
   222         if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {           \
   223             dst[0] = (Uint8)(d >> 16);                  \
   224             dst[1] = (Uint8)(d >> 8);                   \
   225             dst[2] = (Uint8)(d);                        \
   226         } else {                        \
   227             dst[0] = (Uint8)d;                      \
   228             dst[1] = (Uint8)(d >> 8);                   \
   229             dst[2] = (Uint8)(d >> 16);                  \
   230         }                           \
   231         break;                          \
   232         case 4:                         \
   233         *(Uint32 *)dst = d;                 \
   234         break;                          \
   235         }                               \
   236         src += bpp;                         \
   237         dst += bpp;                         \
   238     }                               \
   239     } while(0)
   240 
   241 /*
   242  * Special case: 50% alpha (alpha=128)
   243  * This is treated specially because it can be optimized very well, and
   244  * since it is good for many cases of semi-translucency.
   245  * The theory is to do all three components at the same time:
   246  * First zero the lowest bit of each component, which gives us room to
   247  * add them. Then shift right and add the sum of the lowest bits.
   248  */
   249 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)       \
   250     do {                                \
   251         int i;                              \
   252     Uint32 *src = (Uint32 *)(from);                 \
   253     Uint32 *dst = (Uint32 *)(to);                   \
   254     for(i = 0; i < (int)(length); i++) {                \
   255         Uint32 s = *src++;                      \
   256         Uint32 d = *dst;                        \
   257         *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)   \
   258              + (s & d & 0x00010101);                \
   259     }                               \
   260     } while(0)
   261 
   262 /*
   263  * For 16bpp, we can actually blend two pixels in parallel, if we take
   264  * care to shift before we add, not after.
   265  */
   266 
   267 /* helper: blend a single 16 bit pixel at 50% */
   268 #define BLEND16_50(dst, src, mask)          \
   269     do {                        \
   270     Uint32 s = *src++;              \
   271     Uint32 d = *dst;                \
   272     *dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +    \
   273                       (s & d & (~mask & 0xffff)));      \
   274     } while(0)
   275 
   276 /* basic 16bpp blender. mask is the pixels to keep when adding. */
   277 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)     \
   278     do {                                \
   279     unsigned n = (length);                      \
   280     Uint16 *src = (Uint16 *)(from);                 \
   281     Uint16 *dst = (Uint16 *)(to);                   \
   282     if(((uintptr_t)src ^ (uintptr_t)dst) & 3) {         \
   283         /* source and destination not in phase, blit one by one */  \
   284         while(n--)                          \
   285         BLEND16_50(dst, src, mask);             \
   286     } else {                            \
   287         if((uintptr_t)src & 3) {                    \
   288         /* first odd pixel */                   \
   289         BLEND16_50(dst, src, mask);             \
   290         n--;                            \
   291         }                               \
   292         for(; n > 1; n -= 2) {                  \
   293         Uint32 s = *(Uint32 *)src;              \
   294         Uint32 d = *(Uint32 *)dst;              \
   295         *(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1)   \
   296                        + ((d & (mask | mask << 16)) >> 1)   \
   297                        + (s & d & (~(mask | mask << 16)));  \
   298         src += 2;                       \
   299         dst += 2;                       \
   300         }                               \
   301         if(n)                           \
   302         BLEND16_50(dst, src, mask); /* last odd pixel */    \
   303     }                               \
   304     } while(0)
   305 
   306 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)   \
   307     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de)
   308 
   309 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)   \
   310     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
   311 
   312 #define CHOOSE_BLIT(blitter, alpha, fmt)                \
   313     do {                                \
   314         if(alpha == 255) {                      \
   315         switch(fmt->BytesPerPixel) {                \
   316         case 1: blitter(1, Uint8, OPAQUE_BLIT); break;      \
   317         case 2: blitter(2, Uint8, OPAQUE_BLIT); break;      \
   318         case 3: blitter(3, Uint8, OPAQUE_BLIT); break;      \
   319         case 4: blitter(4, Uint16, OPAQUE_BLIT); break;     \
   320         }                               \
   321     } else {                            \
   322         switch(fmt->BytesPerPixel) {                \
   323         case 1:                         \
   324         /* No 8bpp alpha blitting */                \
   325         break;                          \
   326                                     \
   327         case 2:                         \
   328         switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) {      \
   329         case 0xffff:                        \
   330             if(fmt->Gmask == 0x07e0             \
   331                || fmt->Rmask == 0x07e0              \
   332                || fmt->Bmask == 0x07e0) {           \
   333             if(alpha == 128)                \
   334                 blitter(2, Uint8, ALPHA_BLIT16_565_50); \
   335             else {                      \
   336                 blitter(2, Uint8, ALPHA_BLIT16_565);    \
   337             }                       \
   338             } else                      \
   339             goto general16;                 \
   340             break;                      \
   341                                     \
   342         case 0x7fff:                        \
   343             if(fmt->Gmask == 0x03e0             \
   344                || fmt->Rmask == 0x03e0              \
   345                || fmt->Bmask == 0x03e0) {           \
   346             if(alpha == 128)                \
   347                 blitter(2, Uint8, ALPHA_BLIT16_555_50); \
   348             else {                      \
   349                 blitter(2, Uint8, ALPHA_BLIT16_555);    \
   350             }                       \
   351             break;                      \
   352             }                           \
   353             /* fallthrough */                   \
   354                                     \
   355         default:                        \
   356         general16:                      \
   357             blitter(2, Uint8, ALPHA_BLIT_ANY);          \
   358         }                           \
   359         break;                          \
   360                                     \
   361         case 3:                         \
   362         blitter(3, Uint8, ALPHA_BLIT_ANY);          \
   363         break;                          \
   364                                     \
   365         case 4:                         \
   366         if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \
   367            && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \
   368                || fmt->Bmask == 0xff00)) {          \
   369             if(alpha == 128)                    \
   370             blitter(4, Uint16, ALPHA_BLIT32_888_50);    \
   371             else                        \
   372             blitter(4, Uint16, ALPHA_BLIT32_888);       \
   373         } else                          \
   374             blitter(4, Uint16, ALPHA_BLIT_ANY);         \
   375         break;                          \
   376         }                               \
   377     }                               \
   378     } while(0)
   379 
   380 /*
   381  * This takes care of the case when the surface is clipped on the left and/or
   382  * right. Top clipping has already been taken care of.
   383  */
   384 static void
   385 RLEClipBlit(int w, Uint8 * srcbuf, SDL_Surface * dst,
   386             Uint8 * dstbuf, SDL_Rect * srcrect, unsigned alpha)
   387 {
   388     SDL_PixelFormat *fmt = dst->format;
   389 
   390 #define RLECLIPBLIT(bpp, Type, do_blit)                    \
   391     do {                                   \
   392     int linecount = srcrect->h;                    \
   393     int ofs = 0;                               \
   394     int left = srcrect->x;                         \
   395     int right = left + srcrect->w;                     \
   396     dstbuf -= left * bpp;                          \
   397     for(;;) {                              \
   398         int run;                               \
   399         ofs += *(Type *)srcbuf;                    \
   400         run = ((Type *)srcbuf)[1];                     \
   401         srcbuf += 2 * sizeof(Type);                    \
   402         if(run) {                              \
   403         /* clip to left and right borders */               \
   404         if(ofs < right) {                      \
   405             int start = 0;                     \
   406             int len = run;                     \
   407             int startcol;                      \
   408             if(left - ofs > 0) {                   \
   409             start = left - ofs;                \
   410             len -= start;                      \
   411             if(len <= 0)                       \
   412                 goto nocopy ## bpp ## do_blit;         \
   413             }                              \
   414             startcol = ofs + start;                \
   415             if(len > right - startcol)                 \
   416             len = right - startcol;                \
   417             do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
   418                 len, bpp, alpha);                  \
   419         }                              \
   420         nocopy ## bpp ## do_blit:                      \
   421         srcbuf += run * bpp;                       \
   422         ofs += run;                        \
   423         } else if(!ofs)                        \
   424         break;                             \
   425         if(ofs == w) {                         \
   426         ofs = 0;                           \
   427         dstbuf += dst->pitch;                      \
   428         if(!--linecount)                       \
   429             break;                         \
   430         }                                  \
   431     }                                  \
   432     } while(0)
   433 
   434     CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
   435 
   436 #undef RLECLIPBLIT
   437 
   438 }
   439 
   440 
   441 /* blit a colorkeyed RLE surface */
   442 int
   443 SDL_RLEBlit(SDL_Surface * src, SDL_Rect * srcrect,
   444             SDL_Surface * dst, SDL_Rect * dstrect)
   445 {
   446     Uint8 *dstbuf;
   447     Uint8 *srcbuf;
   448     int x, y;
   449     int w = src->w;
   450     unsigned alpha;
   451 
   452     /* Lock the destination if necessary */
   453     if (SDL_MUSTLOCK(dst)) {
   454         if (SDL_LockSurface(dst) < 0) {
   455             return (-1);
   456         }
   457     }
   458 
   459     /* Set up the source and destination pointers */
   460     x = dstrect->x;
   461     y = dstrect->y;
   462     dstbuf = (Uint8 *) dst->pixels
   463         + y * dst->pitch + x * src->format->BytesPerPixel;
   464     srcbuf = (Uint8 *) src->map->data;
   465 
   466     {
   467         /* skip lines at the top if necessary */
   468         int vskip = srcrect->y;
   469         int ofs = 0;
   470         if (vskip) {
   471 
   472 #define RLESKIP(bpp, Type)          \
   473         for(;;) {           \
   474             int run;            \
   475             ofs += *(Type *)srcbuf; \
   476             run = ((Type *)srcbuf)[1];  \
   477             srcbuf += sizeof(Type) * 2; \
   478             if(run) {           \
   479             srcbuf += run * bpp;    \
   480             ofs += run;     \
   481             } else if(!ofs)     \
   482             goto done;      \
   483             if(ofs == w) {      \
   484             ofs = 0;        \
   485             if(!--vskip)        \
   486                 break;      \
   487             }               \
   488         }
   489 
   490             switch (src->format->BytesPerPixel) {
   491             case 1:
   492                 RLESKIP(1, Uint8);
   493                 break;
   494             case 2:
   495                 RLESKIP(2, Uint8);
   496                 break;
   497             case 3:
   498                 RLESKIP(3, Uint8);
   499                 break;
   500             case 4:
   501                 RLESKIP(4, Uint16);
   502                 break;
   503             }
   504 
   505 #undef RLESKIP
   506 
   507         }
   508     }
   509 
   510     alpha = src->map->info.a;
   511     /* if left or right edge clipping needed, call clip blit */
   512     if (srcrect->x || srcrect->w != src->w) {
   513         RLEClipBlit(w, srcbuf, dst, dstbuf, srcrect, alpha);
   514     } else {
   515         SDL_PixelFormat *fmt = src->format;
   516 
   517 #define RLEBLIT(bpp, Type, do_blit)                       \
   518         do {                                  \
   519         int linecount = srcrect->h;                   \
   520         int ofs = 0;                              \
   521         for(;;) {                             \
   522             unsigned run;                         \
   523             ofs += *(Type *)srcbuf;                   \
   524             run = ((Type *)srcbuf)[1];                    \
   525             srcbuf += 2 * sizeof(Type);                   \
   526             if(run) {                             \
   527             do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
   528             srcbuf += run * bpp;                      \
   529             ofs += run;                       \
   530             } else if(!ofs)                       \
   531             break;                            \
   532             if(ofs == w) {                        \
   533             ofs = 0;                          \
   534             dstbuf += dst->pitch;                     \
   535             if(!--linecount)                      \
   536                 break;                        \
   537             }                                 \
   538         }                                 \
   539         } while(0)
   540 
   541         CHOOSE_BLIT(RLEBLIT, alpha, fmt);
   542 
   543 #undef RLEBLIT
   544     }
   545 
   546   done:
   547     /* Unlock the destination if necessary */
   548     if (SDL_MUSTLOCK(dst)) {
   549         SDL_UnlockSurface(dst);
   550     }
   551     return (0);
   552 }
   553 
   554 #undef OPAQUE_BLIT
   555 
   556 /*
   557  * Per-pixel blitting macros for translucent pixels:
   558  * These use the same techniques as the per-surface blitting macros
   559  */
   560 
   561 /*
   562  * For 32bpp pixels, we have made sure the alpha is stored in the top
   563  * 8 bits, so proceed as usual
   564  */
   565 #define BLIT_TRANSL_888(src, dst)               \
   566     do {                            \
   567         Uint32 s = src;                     \
   568     Uint32 d = dst;                     \
   569     unsigned alpha = s >> 24;               \
   570     Uint32 s1 = s & 0xff00ff;               \
   571     Uint32 d1 = d & 0xff00ff;               \
   572     d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;    \
   573     s &= 0xff00;                        \
   574     d &= 0xff00;                        \
   575     d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
   576     dst = d1 | d | 0xff000000;              \
   577     } while(0)
   578 
   579 /*
   580  * For 16bpp pixels, we have stored the 5 most significant alpha bits in
   581  * bits 5-10. As before, we can process all 3 RGB components at the same time.
   582  */
   583 #define BLIT_TRANSL_565(src, dst)       \
   584     do {                    \
   585     Uint32 s = src;             \
   586     Uint32 d = dst;             \
   587     unsigned alpha = (s & 0x3e0) >> 5;  \
   588     s &= 0x07e0f81f;            \
   589     d = (d | d << 16) & 0x07e0f81f;     \
   590     d += (s - d) * alpha >> 5;      \
   591     d &= 0x07e0f81f;            \
   592     dst = (Uint16)(d | d >> 16);            \
   593     } while(0)
   594 
   595 #define BLIT_TRANSL_555(src, dst)       \
   596     do {                    \
   597     Uint32 s = src;             \
   598     Uint32 d = dst;             \
   599     unsigned alpha = (s & 0x3e0) >> 5;  \
   600     s &= 0x03e07c1f;            \
   601     d = (d | d << 16) & 0x03e07c1f;     \
   602     d += (s - d) * alpha >> 5;      \
   603     d &= 0x03e07c1f;            \
   604     dst = (Uint16)(d | d >> 16);            \
   605     } while(0)
   606 
   607 /* used to save the destination format in the encoding. Designed to be
   608    macro-compatible with SDL_PixelFormat but without the unneeded fields */
   609 typedef struct
   610 {
   611     Uint8 BytesPerPixel;
   612     Uint8 padding[3];
   613     Uint32 Rmask;
   614     Uint32 Gmask;
   615     Uint32 Bmask;
   616     Uint32 Amask;
   617     Uint8 Rloss;
   618     Uint8 Gloss;
   619     Uint8 Bloss;
   620     Uint8 Aloss;
   621     Uint8 Rshift;
   622     Uint8 Gshift;
   623     Uint8 Bshift;
   624     Uint8 Ashift;
   625 } RLEDestFormat;
   626 
   627 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
   628 static void
   629 RLEAlphaClipBlit(int w, Uint8 * srcbuf, SDL_Surface * dst,
   630                  Uint8 * dstbuf, SDL_Rect * srcrect)
   631 {
   632     SDL_PixelFormat *df = dst->format;
   633     /*
   634      * clipped blitter: Ptype is the destination pixel type,
   635      * Ctype the translucent count type, and do_blend the macro
   636      * to blend one pixel.
   637      */
   638 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)              \
   639     do {                                  \
   640     int linecount = srcrect->h;                   \
   641     int left = srcrect->x;                        \
   642     int right = left + srcrect->w;                    \
   643     dstbuf -= left * sizeof(Ptype);                   \
   644     do {                                  \
   645         int ofs = 0;                          \
   646         /* blit opaque pixels on one line */              \
   647         do {                              \
   648         unsigned run;                         \
   649         ofs += ((Ctype *)srcbuf)[0];                  \
   650         run = ((Ctype *)srcbuf)[1];               \
   651         srcbuf += 2 * sizeof(Ctype);                  \
   652         if(run) {                         \
   653             /* clip to left and right borders */          \
   654             int cofs = ofs;                   \
   655             int crun = run;                   \
   656             if(left - cofs > 0) {                 \
   657             crun -= left - cofs;                  \
   658             cofs = left;                      \
   659             }                             \
   660             if(crun > right - cofs)               \
   661             crun = right - cofs;                  \
   662             if(crun > 0)                      \
   663             PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),     \
   664                    srcbuf + (cofs - ofs) * sizeof(Ptype), \
   665                    (unsigned)crun, sizeof(Ptype));    \
   666             srcbuf += run * sizeof(Ptype);            \
   667             ofs += run;                       \
   668         } else if(!ofs)                       \
   669             return;                       \
   670         } while(ofs < w);                         \
   671         /* skip padding if necessary */               \
   672         if(sizeof(Ptype) == 2)                    \
   673         srcbuf += (uintptr_t)srcbuf & 2;              \
   674         /* blit translucent pixels on the same line */        \
   675         ofs = 0;                              \
   676         do {                              \
   677         unsigned run;                         \
   678         ofs += ((Uint16 *)srcbuf)[0];                 \
   679         run = ((Uint16 *)srcbuf)[1];                  \
   680         srcbuf += 4;                          \
   681         if(run) {                         \
   682             /* clip to left and right borders */          \
   683             int cofs = ofs;                   \
   684             int crun = run;                   \
   685             if(left - cofs > 0) {                 \
   686             crun -= left - cofs;                  \
   687             cofs = left;                      \
   688             }                             \
   689             if(crun > right - cofs)               \
   690             crun = right - cofs;                  \
   691             if(crun > 0) {                    \
   692             Ptype *dst = (Ptype *)dstbuf + cofs;          \
   693             Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);    \
   694             int i;                        \
   695             for(i = 0; i < crun; i++)             \
   696                 do_blend(src[i], dst[i]);             \
   697             }                             \
   698             srcbuf += run * 4;                    \
   699             ofs += run;                       \
   700         }                             \
   701         } while(ofs < w);                         \
   702         dstbuf += dst->pitch;                     \
   703     } while(--linecount);                         \
   704     } while(0)
   705 
   706     switch (df->BytesPerPixel) {
   707     case 2:
   708         if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0)
   709             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
   710         else
   711             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
   712         break;
   713     case 4:
   714         RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
   715         break;
   716     }
   717 }
   718 
   719 /* blit a pixel-alpha RLE surface */
   720 int
   721 SDL_RLEAlphaBlit(SDL_Surface * src, SDL_Rect * srcrect,
   722                  SDL_Surface * dst, SDL_Rect * dstrect)
   723 {
   724     int x, y;
   725     int w = src->w;
   726     Uint8 *srcbuf, *dstbuf;
   727     SDL_PixelFormat *df = dst->format;
   728 
   729     /* Lock the destination if necessary */
   730     if (SDL_MUSTLOCK(dst)) {
   731         if (SDL_LockSurface(dst) < 0) {
   732             return -1;
   733         }
   734     }
   735 
   736     x = dstrect->x;
   737     y = dstrect->y;
   738     dstbuf = (Uint8 *) dst->pixels + y * dst->pitch + x * df->BytesPerPixel;
   739     srcbuf = (Uint8 *) src->map->data + sizeof(RLEDestFormat);
   740 
   741     {
   742         /* skip lines at the top if necessary */
   743         int vskip = srcrect->y;
   744         if (vskip) {
   745             int ofs;
   746             if (df->BytesPerPixel == 2) {
   747                 /* the 16/32 interleaved format */
   748                 do {
   749                     /* skip opaque line */
   750                     ofs = 0;
   751                     do {
   752                         int run;
   753                         ofs += srcbuf[0];
   754                         run = srcbuf[1];
   755                         srcbuf += 2;
   756                         if (run) {
   757                             srcbuf += 2 * run;
   758                             ofs += run;
   759                         } else if (!ofs)
   760                             goto done;
   761                     } while (ofs < w);
   762 
   763                     /* skip padding */
   764                     srcbuf += (uintptr_t) srcbuf & 2;
   765 
   766                     /* skip translucent line */
   767                     ofs = 0;
   768                     do {
   769                         int run;
   770                         ofs += ((Uint16 *) srcbuf)[0];
   771                         run = ((Uint16 *) srcbuf)[1];
   772                         srcbuf += 4 * (run + 1);
   773                         ofs += run;
   774                     } while (ofs < w);
   775                 } while (--vskip);
   776             } else {
   777                 /* the 32/32 interleaved format */
   778                 vskip <<= 1;    /* opaque and translucent have same format */
   779                 do {
   780                     ofs = 0;
   781                     do {
   782                         int run;
   783                         ofs += ((Uint16 *) srcbuf)[0];
   784                         run = ((Uint16 *) srcbuf)[1];
   785                         srcbuf += 4;
   786                         if (run) {
   787                             srcbuf += 4 * run;
   788                             ofs += run;
   789                         } else if (!ofs)
   790                             goto done;
   791                     } while (ofs < w);
   792                 } while (--vskip);
   793             }
   794         }
   795     }
   796 
   797     /* if left or right edge clipping needed, call clip blit */
   798     if (srcrect->x || srcrect->w != src->w) {
   799         RLEAlphaClipBlit(w, srcbuf, dst, dstbuf, srcrect);
   800     } else {
   801 
   802         /*
   803          * non-clipped blitter. Ptype is the destination pixel type,
   804          * Ctype the translucent count type, and do_blend the
   805          * macro to blend one pixel.
   806          */
   807 #define RLEALPHABLIT(Ptype, Ctype, do_blend)                 \
   808     do {                                 \
   809         int linecount = srcrect->h;                  \
   810         do {                             \
   811         int ofs = 0;                         \
   812         /* blit opaque pixels on one line */             \
   813         do {                             \
   814             unsigned run;                    \
   815             ofs += ((Ctype *)srcbuf)[0];             \
   816             run = ((Ctype *)srcbuf)[1];              \
   817             srcbuf += 2 * sizeof(Ctype);             \
   818             if(run) {                        \
   819             PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
   820                    run, sizeof(Ptype));          \
   821             srcbuf += run * sizeof(Ptype);           \
   822             ofs += run;                  \
   823             } else if(!ofs)                  \
   824             goto done;                   \
   825         } while(ofs < w);                    \
   826         /* skip padding if necessary */              \
   827         if(sizeof(Ptype) == 2)                   \
   828             srcbuf += (uintptr_t)srcbuf & 2;             \
   829         /* blit translucent pixels on the same line */       \
   830         ofs = 0;                         \
   831         do {                             \
   832             unsigned run;                    \
   833             ofs += ((Uint16 *)srcbuf)[0];            \
   834             run = ((Uint16 *)srcbuf)[1];             \
   835             srcbuf += 4;                     \
   836             if(run) {                        \
   837             Ptype *dst = (Ptype *)dstbuf + ofs;      \
   838             unsigned i;                  \
   839             for(i = 0; i < run; i++) {           \
   840                 Uint32 src = *(Uint32 *)srcbuf;      \
   841                 do_blend(src, *dst);             \
   842                 srcbuf += 4;                 \
   843                 dst++;                   \
   844             }                        \
   845             ofs += run;                  \
   846             }                            \
   847         } while(ofs < w);                    \
   848         dstbuf += dst->pitch;                    \
   849         } while(--linecount);                    \
   850     } while(0)
   851 
   852         switch (df->BytesPerPixel) {
   853         case 2:
   854             if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0
   855                 || df->Bmask == 0x07e0)
   856                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
   857             else
   858                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
   859             break;
   860         case 4:
   861             RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
   862             break;
   863         }
   864     }
   865 
   866   done:
   867     /* Unlock the destination if necessary */
   868     if (SDL_MUSTLOCK(dst)) {
   869         SDL_UnlockSurface(dst);
   870     }
   871     return 0;
   872 }
   873 
   874 /*
   875  * Auxiliary functions:
   876  * The encoding functions take 32bpp rgb + a, and
   877  * return the number of bytes copied to the destination.
   878  * The decoding functions copy to 32bpp rgb + a, and
   879  * return the number of bytes copied from the source.
   880  * These are only used in the encoder and un-RLE code and are therefore not
   881  * highly optimised.
   882  */
   883 
   884 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
   885 static int
   886 copy_opaque_16(void *dst, Uint32 * src, int n,
   887                SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   888 {
   889     int i;
   890     Uint16 *d = dst;
   891     for (i = 0; i < n; i++) {
   892         unsigned r, g, b;
   893         RGB_FROM_PIXEL(*src, sfmt, r, g, b);
   894         PIXEL_FROM_RGB(*d, dfmt, r, g, b);
   895         src++;
   896         d++;
   897     }
   898     return n * 2;
   899 }
   900 
   901 /* decode opaque pixels from 16bpp to 32bpp rgb + a */
   902 static int
   903 uncopy_opaque_16(Uint32 * dst, void *src, int n,
   904                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   905 {
   906     int i;
   907     Uint16 *s = src;
   908     unsigned alpha = dfmt->Amask ? 255 : 0;
   909     for (i = 0; i < n; i++) {
   910         unsigned r, g, b;
   911         RGB_FROM_PIXEL(*s, sfmt, r, g, b);
   912         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
   913         s++;
   914         dst++;
   915     }
   916     return n * 2;
   917 }
   918 
   919 
   920 
   921 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
   922 static int
   923 copy_transl_565(void *dst, Uint32 * src, int n,
   924                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   925 {
   926     int i;
   927     Uint32 *d = dst;
   928     for (i = 0; i < n; i++) {
   929         unsigned r, g, b, a;
   930         Uint16 pix;
   931         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   932         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   933         *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
   934         src++;
   935         d++;
   936     }
   937     return n * 4;
   938 }
   939 
   940 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
   941 static int
   942 copy_transl_555(void *dst, Uint32 * src, int n,
   943                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   944 {
   945     int i;
   946     Uint32 *d = dst;
   947     for (i = 0; i < n; i++) {
   948         unsigned r, g, b, a;
   949         Uint16 pix;
   950         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   951         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   952         *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
   953         src++;
   954         d++;
   955     }
   956     return n * 4;
   957 }
   958 
   959 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
   960 static int
   961 uncopy_transl_16(Uint32 * dst, void *src, int n,
   962                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   963 {
   964     int i;
   965     Uint32 *s = src;
   966     for (i = 0; i < n; i++) {
   967         unsigned r, g, b, a;
   968         Uint32 pix = *s++;
   969         a = (pix & 0x3e0) >> 2;
   970         pix = (pix & ~0x3e0) | pix >> 16;
   971         RGB_FROM_PIXEL(pix, sfmt, r, g, b);
   972         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
   973         dst++;
   974     }
   975     return n * 4;
   976 }
   977 
   978 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   979 static int
   980 copy_32(void *dst, Uint32 * src, int n,
   981         SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   982 {
   983     int i;
   984     Uint32 *d = dst;
   985     for (i = 0; i < n; i++) {
   986         unsigned r, g, b, a;
   987         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   988         PIXEL_FROM_RGBA(*d, dfmt, r, g, b, a);
   989         d++;
   990         src++;
   991     }
   992     return n * 4;
   993 }
   994 
   995 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   996 static int
   997 uncopy_32(Uint32 * dst, void *src, int n,
   998           RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   999 {
  1000     int i;
  1001     Uint32 *s = src;
  1002     for (i = 0; i < n; i++) {
  1003         unsigned r, g, b, a;
  1004         Uint32 pixel = *s++;
  1005         RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
  1006         a = pixel >> 24;
  1007         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
  1008         dst++;
  1009     }
  1010     return n * 4;
  1011 }
  1012 
  1013 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
  1014 
  1015 #define ISTRANSL(pixel, fmt)    \
  1016     ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
  1017 
  1018 /* convert surface to be quickly alpha-blittable onto dest, if possible */
  1019 static int
  1020 RLEAlphaSurface(SDL_Surface * surface)
  1021 {
  1022     SDL_Surface *dest;
  1023     SDL_PixelFormat *df;
  1024     int maxsize = 0;
  1025     int max_opaque_run;
  1026     int max_transl_run = 65535;
  1027     unsigned masksum;
  1028     Uint8 *rlebuf, *dst;
  1029     int (*copy_opaque) (void *, Uint32 *, int,
  1030                         SDL_PixelFormat *, SDL_PixelFormat *);
  1031     int (*copy_transl) (void *, Uint32 *, int,
  1032                         SDL_PixelFormat *, SDL_PixelFormat *);
  1033 
  1034     dest = surface->map->dst;
  1035     if (!dest)
  1036         return -1;
  1037     df = dest->format;
  1038     if (surface->format->BitsPerPixel != 32)
  1039         return -1;              /* only 32bpp source supported */
  1040 
  1041     /* find out whether the destination is one we support,
  1042        and determine the max size of the encoded result */
  1043     masksum = df->Rmask | df->Gmask | df->Bmask;
  1044     switch (df->BytesPerPixel) {
  1045     case 2:
  1046         /* 16bpp: only support 565 and 555 formats */
  1047         switch (masksum) {
  1048         case 0xffff:
  1049             if (df->Gmask == 0x07e0
  1050                 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
  1051                 copy_opaque = copy_opaque_16;
  1052                 copy_transl = copy_transl_565;
  1053             } else
  1054                 return -1;
  1055             break;
  1056         case 0x7fff:
  1057             if (df->Gmask == 0x03e0
  1058                 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
  1059                 copy_opaque = copy_opaque_16;
  1060                 copy_transl = copy_transl_555;
  1061             } else
  1062                 return -1;
  1063             break;
  1064         default:
  1065             return -1;
  1066         }
  1067         max_opaque_run = 255;   /* runs stored as bytes */
  1068 
  1069         /* worst case is alternating opaque and translucent pixels,
  1070            with room for alignment padding between lines */
  1071         maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
  1072         break;
  1073     case 4:
  1074         if (masksum != 0x00ffffff)
  1075             return -1;          /* requires unused high byte */
  1076         copy_opaque = copy_32;
  1077         copy_transl = copy_32;
  1078         max_opaque_run = 255;   /* runs stored as short ints */
  1079 
  1080         /* worst case is alternating opaque and translucent pixels */
  1081         maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
  1082         break;
  1083     default:
  1084         return -1;              /* anything else unsupported right now */
  1085     }
  1086 
  1087     maxsize += sizeof(RLEDestFormat);
  1088     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1089     if (!rlebuf) {
  1090         return SDL_OutOfMemory();
  1091     }
  1092     {
  1093         /* save the destination format so we can undo the encoding later */
  1094         RLEDestFormat *r = (RLEDestFormat *) rlebuf;
  1095         r->BytesPerPixel = df->BytesPerPixel;
  1096         r->Rmask = df->Rmask;
  1097         r->Gmask = df->Gmask;
  1098         r->Bmask = df->Bmask;
  1099         r->Amask = df->Amask;
  1100         r->Rloss = df->Rloss;
  1101         r->Gloss = df->Gloss;
  1102         r->Bloss = df->Bloss;
  1103         r->Aloss = df->Aloss;
  1104         r->Rshift = df->Rshift;
  1105         r->Gshift = df->Gshift;
  1106         r->Bshift = df->Bshift;
  1107         r->Ashift = df->Ashift;
  1108     }
  1109     dst = rlebuf + sizeof(RLEDestFormat);
  1110 
  1111     /* Do the actual encoding */
  1112     {
  1113         int x, y;
  1114         int h = surface->h, w = surface->w;
  1115         SDL_PixelFormat *sf = surface->format;
  1116         Uint32 *src = (Uint32 *) surface->pixels;
  1117         Uint8 *lastline = dst;  /* end of last non-blank line */
  1118 
  1119         /* opaque counts are 8 or 16 bits, depending on target depth */
  1120 #define ADD_OPAQUE_COUNTS(n, m)         \
  1121     if(df->BytesPerPixel == 4) {        \
  1122         ((Uint16 *)dst)[0] = n;     \
  1123         ((Uint16 *)dst)[1] = m;     \
  1124         dst += 4;               \
  1125     } else {                \
  1126         dst[0] = n;             \
  1127         dst[1] = m;             \
  1128         dst += 2;               \
  1129     }
  1130 
  1131         /* translucent counts are always 16 bit */
  1132 #define ADD_TRANSL_COUNTS(n, m)     \
  1133     (((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
  1134 
  1135         for (y = 0; y < h; y++) {
  1136             int runstart, skipstart;
  1137             int blankline = 0;
  1138             /* First encode all opaque pixels of a scan line */
  1139             x = 0;
  1140             do {
  1141                 int run, skip, len;
  1142                 skipstart = x;
  1143                 while (x < w && !ISOPAQUE(src[x], sf))
  1144                     x++;
  1145                 runstart = x;
  1146                 while (x < w && ISOPAQUE(src[x], sf))
  1147                     x++;
  1148                 skip = runstart - skipstart;
  1149                 if (skip == w)
  1150                     blankline = 1;
  1151                 run = x - runstart;
  1152                 while (skip > max_opaque_run) {
  1153                     ADD_OPAQUE_COUNTS(max_opaque_run, 0);
  1154                     skip -= max_opaque_run;
  1155                 }
  1156                 len = MIN(run, max_opaque_run);
  1157                 ADD_OPAQUE_COUNTS(skip, len);
  1158                 dst += copy_opaque(dst, src + runstart, len, sf, df);
  1159                 runstart += len;
  1160                 run -= len;
  1161                 while (run) {
  1162                     len = MIN(run, max_opaque_run);
  1163                     ADD_OPAQUE_COUNTS(0, len);
  1164                     dst += copy_opaque(dst, src + runstart, len, sf, df);
  1165                     runstart += len;
  1166                     run -= len;
  1167                 }
  1168             } while (x < w);
  1169 
  1170             /* Make sure the next output address is 32-bit aligned */
  1171             dst += (uintptr_t) dst & 2;
  1172 
  1173             /* Next, encode all translucent pixels of the same scan line */
  1174             x = 0;
  1175             do {
  1176                 int run, skip, len;
  1177                 skipstart = x;
  1178                 while (x < w && !ISTRANSL(src[x], sf))
  1179                     x++;
  1180                 runstart = x;
  1181                 while (x < w && ISTRANSL(src[x], sf))
  1182                     x++;
  1183                 skip = runstart - skipstart;
  1184                 blankline &= (skip == w);
  1185                 run = x - runstart;
  1186                 while (skip > max_transl_run) {
  1187                     ADD_TRANSL_COUNTS(max_transl_run, 0);
  1188                     skip -= max_transl_run;
  1189                 }
  1190                 len = MIN(run, max_transl_run);
  1191                 ADD_TRANSL_COUNTS(skip, len);
  1192                 dst += copy_transl(dst, src + runstart, len, sf, df);
  1193                 runstart += len;
  1194                 run -= len;
  1195                 while (run) {
  1196                     len = MIN(run, max_transl_run);
  1197                     ADD_TRANSL_COUNTS(0, len);
  1198                     dst += copy_transl(dst, src + runstart, len, sf, df);
  1199                     runstart += len;
  1200                     run -= len;
  1201                 }
  1202                 if (!blankline)
  1203                     lastline = dst;
  1204             } while (x < w);
  1205 
  1206             src += surface->pitch >> 2;
  1207         }
  1208         dst = lastline;         /* back up past trailing blank lines */
  1209         ADD_OPAQUE_COUNTS(0, 0);
  1210     }
  1211 
  1212 #undef ADD_OPAQUE_COUNTS
  1213 #undef ADD_TRANSL_COUNTS
  1214 
  1215     /* Now that we have it encoded, release the original pixels */
  1216     if (!(surface->flags & SDL_PREALLOC)) {
  1217         SDL_free(surface->pixels);
  1218         surface->pixels = NULL;
  1219     }
  1220 
  1221     /* realloc the buffer to release unused memory */
  1222     {
  1223         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1224         if (!p)
  1225             p = rlebuf;
  1226         surface->map->data = p;
  1227     }
  1228 
  1229     return 0;
  1230 }
  1231 
  1232 static Uint32
  1233 getpix_8(Uint8 * srcbuf)
  1234 {
  1235     return *srcbuf;
  1236 }
  1237 
  1238 static Uint32
  1239 getpix_16(Uint8 * srcbuf)
  1240 {
  1241     return *(Uint16 *) srcbuf;
  1242 }
  1243 
  1244 static Uint32
  1245 getpix_24(Uint8 * srcbuf)
  1246 {
  1247 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  1248     return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
  1249 #else
  1250     return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
  1251 #endif
  1252 }
  1253 
  1254 static Uint32
  1255 getpix_32(Uint8 * srcbuf)
  1256 {
  1257     return *(Uint32 *) srcbuf;
  1258 }
  1259 
  1260 typedef Uint32(*getpix_func) (Uint8 *);
  1261 
  1262 static const getpix_func getpixes[4] = {
  1263     getpix_8, getpix_16, getpix_24, getpix_32
  1264 };
  1265 
  1266 static int
  1267 RLEColorkeySurface(SDL_Surface * surface)
  1268 {
  1269     Uint8 *rlebuf, *dst;
  1270     int maxn;
  1271     int y;
  1272     Uint8 *srcbuf, *lastline;
  1273     int maxsize = 0;
  1274     int bpp = surface->format->BytesPerPixel;
  1275     getpix_func getpix;
  1276     Uint32 ckey, rgbmask;
  1277     int w, h;
  1278 
  1279     /* calculate the worst case size for the compressed surface */
  1280     switch (bpp) {
  1281     case 1:
  1282         /* worst case is alternating opaque and transparent pixels,
  1283            starting with an opaque pixel */
  1284         maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
  1285         break;
  1286     case 2:
  1287     case 3:
  1288         /* worst case is solid runs, at most 255 pixels wide */
  1289         maxsize = surface->h * (2 * (surface->w / 255 + 1)
  1290                                 + surface->w * bpp) + 2;
  1291         break;
  1292     case 4:
  1293         /* worst case is solid runs, at most 65535 pixels wide */
  1294         maxsize = surface->h * (4 * (surface->w / 65535 + 1)
  1295                                 + surface->w * 4) + 4;
  1296         break;
  1297     }
  1298 
  1299     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1300     if (rlebuf == NULL) {
  1301         return SDL_OutOfMemory();
  1302     }
  1303 
  1304     /* Set up the conversion */
  1305     srcbuf = (Uint8 *) surface->pixels;
  1306     maxn = bpp == 4 ? 65535 : 255;
  1307     dst = rlebuf;
  1308     rgbmask = ~surface->format->Amask;
  1309     ckey = surface->map->info.colorkey & rgbmask;
  1310     lastline = dst;
  1311     getpix = getpixes[bpp - 1];
  1312     w = surface->w;
  1313     h = surface->h;
  1314 
  1315 #define ADD_COUNTS(n, m)            \
  1316     if(bpp == 4) {              \
  1317         ((Uint16 *)dst)[0] = n;     \
  1318         ((Uint16 *)dst)[1] = m;     \
  1319         dst += 4;               \
  1320     } else {                \
  1321         dst[0] = n;             \
  1322         dst[1] = m;             \
  1323         dst += 2;               \
  1324     }
  1325 
  1326     for (y = 0; y < h; y++) {
  1327         int x = 0;
  1328         int blankline = 0;
  1329         do {
  1330             int run, skip, len;
  1331             int runstart;
  1332             int skipstart = x;
  1333 
  1334             /* find run of transparent, then opaque pixels */
  1335             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
  1336                 x++;
  1337             runstart = x;
  1338             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
  1339                 x++;
  1340             skip = runstart - skipstart;
  1341             if (skip == w)
  1342                 blankline = 1;
  1343             run = x - runstart;
  1344 
  1345             /* encode segment */
  1346             while (skip > maxn) {
  1347                 ADD_COUNTS(maxn, 0);
  1348                 skip -= maxn;
  1349             }
  1350             len = MIN(run, maxn);
  1351             ADD_COUNTS(skip, len);
  1352             SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1353             dst += len * bpp;
  1354             run -= len;
  1355             runstart += len;
  1356             while (run) {
  1357                 len = MIN(run, maxn);
  1358                 ADD_COUNTS(0, len);
  1359                 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1360                 dst += len * bpp;
  1361                 runstart += len;
  1362                 run -= len;
  1363             }
  1364             if (!blankline)
  1365                 lastline = dst;
  1366         } while (x < w);
  1367 
  1368         srcbuf += surface->pitch;
  1369     }
  1370     dst = lastline;             /* back up bast trailing blank lines */
  1371     ADD_COUNTS(0, 0);
  1372 
  1373 #undef ADD_COUNTS
  1374 
  1375     /* Now that we have it encoded, release the original pixels */
  1376     if (!(surface->flags & SDL_PREALLOC)) {
  1377         SDL_free(surface->pixels);
  1378         surface->pixels = NULL;
  1379     }
  1380 
  1381     /* realloc the buffer to release unused memory */
  1382     {
  1383         /* If realloc returns NULL, the original block is left intact */
  1384         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1385         if (!p)
  1386             p = rlebuf;
  1387         surface->map->data = p;
  1388     }
  1389 
  1390     return (0);
  1391 }
  1392 
  1393 int
  1394 SDL_RLESurface(SDL_Surface * surface)
  1395 {
  1396     int flags;
  1397 
  1398     /* Clear any previous RLE conversion */
  1399     if ((surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL) {
  1400         SDL_UnRLESurface(surface, 1);
  1401     }
  1402 
  1403     /* We don't support RLE encoding of bitmaps */
  1404     if (surface->format->BitsPerPixel < 8) {
  1405         return -1;
  1406     }
  1407 
  1408     /* Make sure the pixels are available */
  1409     if (!surface->pixels) {
  1410         return -1;
  1411     }
  1412 
  1413     /* If we don't have colorkey or blending, nothing to do... */
  1414     flags = surface->map->info.flags;
  1415     if (!(flags & (SDL_COPY_COLORKEY | SDL_COPY_BLEND))) {
  1416         return -1;
  1417     }
  1418 
  1419     /* Pass on combinations not supported */
  1420     if ((flags & SDL_COPY_MODULATE_COLOR) ||
  1421         ((flags & SDL_COPY_MODULATE_ALPHA) && surface->format->Amask) ||
  1422         (flags & (SDL_COPY_ADD | SDL_COPY_MOD)) ||
  1423         (flags & SDL_COPY_NEAREST)) {
  1424         return -1;
  1425     }
  1426 
  1427     /* Encode and set up the blit */
  1428     if (!surface->format->Amask || !(flags & SDL_COPY_BLEND)) {
  1429         if (!surface->map->identity) {
  1430             return -1;
  1431         }
  1432         if (RLEColorkeySurface(surface) < 0) {
  1433             return -1;
  1434         }
  1435         surface->map->blit = SDL_RLEBlit;
  1436         surface->map->info.flags |= SDL_COPY_RLE_COLORKEY;
  1437     } else {
  1438         if (RLEAlphaSurface(surface) < 0) {
  1439             return -1;
  1440         }
  1441         surface->map->blit = SDL_RLEAlphaBlit;
  1442         surface->map->info.flags |= SDL_COPY_RLE_ALPHAKEY;
  1443     }
  1444 
  1445     /* The surface is now accelerated */
  1446     surface->flags |= SDL_RLEACCEL;
  1447 
  1448     return (0);
  1449 }
  1450 
  1451 /*
  1452  * Un-RLE a surface with pixel alpha
  1453  * This may not give back exactly the image before RLE-encoding; all
  1454  * completely transparent pixels will be lost, and color and alpha depth
  1455  * may have been reduced (when encoding for 16bpp targets).
  1456  */
  1457 static SDL_bool
  1458 UnRLEAlpha(SDL_Surface * surface)
  1459 {
  1460     Uint8 *srcbuf;
  1461     Uint32 *dst;
  1462     SDL_PixelFormat *sf = surface->format;
  1463     RLEDestFormat *df = surface->map->data;
  1464     int (*uncopy_opaque) (Uint32 *, void *, int,
  1465                           RLEDestFormat *, SDL_PixelFormat *);
  1466     int (*uncopy_transl) (Uint32 *, void *, int,
  1467                           RLEDestFormat *, SDL_PixelFormat *);
  1468     int w = surface->w;
  1469     int bpp = df->BytesPerPixel;
  1470 
  1471     if (bpp == 2) {
  1472         uncopy_opaque = uncopy_opaque_16;
  1473         uncopy_transl = uncopy_transl_16;
  1474     } else {
  1475         uncopy_opaque = uncopy_transl = uncopy_32;
  1476     }
  1477 
  1478     surface->pixels = SDL_malloc(surface->h * surface->pitch);
  1479     if (!surface->pixels) {
  1480         return (SDL_FALSE);
  1481     }
  1482     /* fill background with transparent pixels */
  1483     SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
  1484 
  1485     dst = surface->pixels;
  1486     srcbuf = (Uint8 *) (df + 1);
  1487     for (;;) {
  1488         /* copy opaque pixels */
  1489         int ofs = 0;
  1490         do {
  1491             unsigned run;
  1492             if (bpp == 2) {
  1493                 ofs += srcbuf[0];
  1494                 run = srcbuf[1];
  1495                 srcbuf += 2;
  1496             } else {
  1497                 ofs += ((Uint16 *) srcbuf)[0];
  1498                 run = ((Uint16 *) srcbuf)[1];
  1499                 srcbuf += 4;
  1500             }
  1501             if (run) {
  1502                 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
  1503                 ofs += run;
  1504             } else if (!ofs)
  1505                 return (SDL_TRUE);
  1506         } while (ofs < w);
  1507 
  1508         /* skip padding if needed */
  1509         if (bpp == 2)
  1510             srcbuf += (uintptr_t) srcbuf & 2;
  1511 
  1512         /* copy translucent pixels */
  1513         ofs = 0;
  1514         do {
  1515             unsigned run;
  1516             ofs += ((Uint16 *) srcbuf)[0];
  1517             run = ((Uint16 *) srcbuf)[1];
  1518             srcbuf += 4;
  1519             if (run) {
  1520                 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
  1521                 ofs += run;
  1522             }
  1523         } while (ofs < w);
  1524         dst += surface->pitch >> 2;
  1525     }
  1526     /* Make the compiler happy */
  1527     return (SDL_TRUE);
  1528 }
  1529 
  1530 void
  1531 SDL_UnRLESurface(SDL_Surface * surface, int recode)
  1532 {
  1533     if (surface->flags & SDL_RLEACCEL) {
  1534         surface->flags &= ~SDL_RLEACCEL;
  1535 
  1536         if (recode && !(surface->flags & SDL_PREALLOC)) {
  1537             if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
  1538                 SDL_Rect full;
  1539 
  1540                 /* re-create the original surface */
  1541                 surface->pixels = SDL_malloc(surface->h * surface->pitch);
  1542                 if (!surface->pixels) {
  1543                     /* Oh crap... */
  1544                     surface->flags |= SDL_RLEACCEL;
  1545                     return;
  1546                 }
  1547 
  1548                 /* fill it with the background color */
  1549                 SDL_FillRect(surface, NULL, surface->map->info.colorkey);
  1550 
  1551                 /* now render the encoded surface */
  1552                 full.x = full.y = 0;
  1553                 full.w = surface->w;
  1554                 full.h = surface->h;
  1555                 SDL_RLEBlit(surface, &full, surface, &full);
  1556             } else {
  1557                 if (!UnRLEAlpha(surface)) {
  1558                     /* Oh crap... */
  1559                     surface->flags |= SDL_RLEACCEL;
  1560                     return;
  1561                 }
  1562             }
  1563         }
  1564         surface->map->info.flags &=
  1565             ~(SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY);
  1566 
  1567         if (surface->map->data) {
  1568             SDL_free(surface->map->data);
  1569             surface->map->data = NULL;
  1570         }
  1571     }
  1572 }
  1573 
  1574 /* vi: set ts=4 sw=4 expandtab: */