src/video/SDL_RLEaccel.c
author Ryan C. Gordon <icculus@icculus.org>
Thu, 09 Aug 2012 14:14:41 -0400
changeset 6389 43a190ad60a7
parent 6138 4c64952a58fb
child 6885 700f1b25f77f
permissions -rwxr-xr-x
Removed some unused variables that gcc 4.6.1 complains about.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2012 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 /*
    24  * RLE encoding for software colorkey and alpha-channel acceleration
    25  *
    26  * Original version by Sam Lantinga
    27  *
    28  * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and
    29  * decoder. Added per-surface alpha blitter. Added per-pixel alpha
    30  * format, encoder and blitter.
    31  *
    32  * Many thanks to Xark and johns for hints, benchmarks and useful comments
    33  * leading to this code.
    34  *
    35  * Welcome to Macro Mayhem.
    36  */
    37 
    38 /*
    39  * The encoding translates the image data to a stream of segments of the form
    40  *
    41  * <skip> <run> <data>
    42  *
    43  * where <skip> is the number of transparent pixels to skip,
    44  *       <run>  is the number of opaque pixels to blit,
    45  * and   <data> are the pixels themselves.
    46  *
    47  * This basic structure is used both for colorkeyed surfaces, used for simple
    48  * binary transparency and for per-surface alpha blending, and for surfaces
    49  * with per-pixel alpha. The details differ, however:
    50  *
    51  * Encoding of colorkeyed surfaces:
    52  *
    53  *   Encoded pixels always have the same format as the target surface.
    54  *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
    55  *   where they are 16 bit. This makes the pixel data aligned at all times.
    56  *   Segments never wrap around from one scan line to the next.
    57  *
    58  *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
    59  *   beginning of a line.
    60  *
    61  * Encoding of surfaces with per-pixel alpha:
    62  *
    63  *   The sequence begins with a struct RLEDestFormat describing the target
    64  *   pixel format, to provide reliable un-encoding.
    65  *
    66  *   Each scan line is encoded twice: First all completely opaque pixels,
    67  *   encoded in the target format as described above, and then all
    68  *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
    69  *   in the following 32-bit format:
    70  *
    71  *   For 32-bit targets, each pixel has the target RGB format but with
    72  *   the alpha value occupying the highest 8 bits. The <skip> and <run>
    73  *   counts are 16 bit.
    74  * 
    75  *   For 16-bit targets, each pixel has the target RGB format, but with
    76  *   the middle component (usually green) shifted 16 steps to the left,
    77  *   and the hole filled with the 5 most significant bits of the alpha value.
    78  *   i.e. if the target has the format         rrrrrggggggbbbbb,
    79  *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
    80  *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
    81  *   for the translucent lines. Two padding bytes may be inserted
    82  *   before each translucent line to keep them 32-bit aligned.
    83  *
    84  *   The end of the sequence is marked by a zero <skip>,<run> pair at the
    85  *   beginning of an opaque line.
    86  */
    87 
    88 #include "SDL_video.h"
    89 #include "SDL_sysvideo.h"
    90 #include "SDL_blit.h"
    91 #include "SDL_RLEaccel_c.h"
    92 
    93 #ifndef MAX
    94 #define MAX(a, b) ((a) > (b) ? (a) : (b))
    95 #endif
    96 #ifndef MIN
    97 #define MIN(a, b) ((a) < (b) ? (a) : (b))
    98 #endif
    99 
   100 #define PIXEL_COPY(to, from, len, bpp)			\
   101 do {							\
   102     if(bpp == 4) {					\
   103 	SDL_memcpy4(to, from, (size_t)(len));		\
   104     } else {						\
   105 	SDL_memcpy(to, from, (size_t)(len) * (bpp));	\
   106     }							\
   107 } while(0)
   108 
   109 /*
   110  * Various colorkey blit methods, for opaque and per-surface alpha
   111  */
   112 
   113 #define OPAQUE_BLIT(to, from, length, bpp, alpha)	\
   114     PIXEL_COPY(to, from, length, bpp)
   115 
   116 /*
   117  * For 32bpp pixels on the form 0x00rrggbb:
   118  * If we treat the middle component separately, we can process the two
   119  * remaining in parallel. This is safe to do because of the gap to the left
   120  * of each component, so the bits from the multiplication don't collide.
   121  * This can be used for any RGB permutation of course.
   122  */
   123 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha)		\
   124     do {							\
   125         int i;							\
   126 	Uint32 *src = (Uint32 *)(from);				\
   127 	Uint32 *dst = (Uint32 *)(to);				\
   128 	for(i = 0; i < (int)(length); i++) {			\
   129 	    Uint32 s = *src++;					\
   130 	    Uint32 d = *dst;					\
   131 	    Uint32 s1 = s & 0xff00ff;				\
   132 	    Uint32 d1 = d & 0xff00ff;				\
   133 	    d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;	\
   134 	    s &= 0xff00;					\
   135 	    d &= 0xff00;					\
   136 	    d = (d + ((s - d) * alpha >> 8)) & 0xff00;		\
   137 	    *dst++ = d1 | d;					\
   138 	}							\
   139     } while(0)
   140 
   141 /*
   142  * For 16bpp pixels we can go a step further: put the middle component
   143  * in the high 16 bits of a 32 bit word, and process all three RGB
   144  * components at the same time. Since the smallest gap is here just
   145  * 5 bits, we have to scale alpha down to 5 bits as well.
   146  */
   147 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha)	\
   148     do {						\
   149         int i;						\
   150 	Uint16 *src = (Uint16 *)(from);			\
   151 	Uint16 *dst = (Uint16 *)(to);			\
   152 	Uint32 ALPHA = alpha >> 3;			\
   153 	for(i = 0; i < (int)(length); i++) {		\
   154 	    Uint32 s = *src++;				\
   155 	    Uint32 d = *dst;				\
   156 	    s = (s | s << 16) & 0x07e0f81f;		\
   157 	    d = (d | d << 16) & 0x07e0f81f;		\
   158 	    d += (s - d) * ALPHA >> 5;			\
   159 	    d &= 0x07e0f81f;				\
   160 	    *dst++ = (Uint16)(d | d >> 16);			\
   161 	}						\
   162     } while(0)
   163 
   164 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha)	\
   165     do {						\
   166         int i;						\
   167 	Uint16 *src = (Uint16 *)(from);			\
   168 	Uint16 *dst = (Uint16 *)(to);			\
   169 	Uint32 ALPHA = alpha >> 3;			\
   170 	for(i = 0; i < (int)(length); i++) {		\
   171 	    Uint32 s = *src++;				\
   172 	    Uint32 d = *dst;				\
   173 	    s = (s | s << 16) & 0x03e07c1f;		\
   174 	    d = (d | d << 16) & 0x03e07c1f;		\
   175 	    d += (s - d) * ALPHA >> 5;			\
   176 	    d &= 0x03e07c1f;				\
   177 	    *dst++ = (Uint16)(d | d >> 16);			\
   178 	}						\
   179     } while(0)
   180 
   181 /*
   182  * The general slow catch-all function, for remaining depths and formats
   183  */
   184 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)			\
   185     do {								\
   186         int i;								\
   187 	Uint8 *src = from;						\
   188 	Uint8 *dst = to;						\
   189 	for(i = 0; i < (int)(length); i++) {				\
   190 	    Uint32 s, d;						\
   191 	    unsigned rs, gs, bs, rd, gd, bd;				\
   192 	    switch(bpp) {						\
   193 	    case 2:							\
   194 		s = *(Uint16 *)src;					\
   195 		d = *(Uint16 *)dst;					\
   196 		break;							\
   197 	    case 3:							\
   198 		if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {			\
   199 		    s = (src[0] << 16) | (src[1] << 8) | src[2];	\
   200 		    d = (dst[0] << 16) | (dst[1] << 8) | dst[2];	\
   201 		} else {						\
   202 		    s = (src[2] << 16) | (src[1] << 8) | src[0];	\
   203 		    d = (dst[2] << 16) | (dst[1] << 8) | dst[0];	\
   204 		}							\
   205 		break;							\
   206 	    case 4:							\
   207 		s = *(Uint32 *)src;					\
   208 		d = *(Uint32 *)dst;					\
   209 		break;							\
   210 	    }								\
   211 	    RGB_FROM_PIXEL(s, fmt, rs, gs, bs);				\
   212 	    RGB_FROM_PIXEL(d, fmt, rd, gd, bd);				\
   213 	    rd += (rs - rd) * alpha >> 8;				\
   214 	    gd += (gs - gd) * alpha >> 8;				\
   215 	    bd += (bs - bd) * alpha >> 8;				\
   216 	    PIXEL_FROM_RGB(d, fmt, rd, gd, bd);				\
   217 	    switch(bpp) {						\
   218 	    case 2:							\
   219 		*(Uint16 *)dst = (Uint16)d;					\
   220 		break;							\
   221 	    case 3:							\
   222 		if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {			\
   223 		    dst[0] = (Uint8)(d >> 16);					\
   224 		    dst[1] = (Uint8)(d >> 8);					\
   225 		    dst[2] = (Uint8)(d);						\
   226 		} else {						\
   227 		    dst[0] = (Uint8)d;						\
   228 		    dst[1] = (Uint8)(d >> 8);					\
   229 		    dst[2] = (Uint8)(d >> 16);					\
   230 		}							\
   231 		break;							\
   232 	    case 4:							\
   233 		*(Uint32 *)dst = d;					\
   234 		break;							\
   235 	    }								\
   236 	    src += bpp;							\
   237 	    dst += bpp;							\
   238 	}								\
   239     } while(0)
   240 
   241 /*
   242  * Special case: 50% alpha (alpha=128)
   243  * This is treated specially because it can be optimized very well, and
   244  * since it is good for many cases of semi-translucency.
   245  * The theory is to do all three components at the same time:
   246  * First zero the lowest bit of each component, which gives us room to
   247  * add them. Then shift right and add the sum of the lowest bits.
   248  */
   249 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)		\
   250     do {								\
   251         int i;								\
   252 	Uint32 *src = (Uint32 *)(from);					\
   253 	Uint32 *dst = (Uint32 *)(to);					\
   254 	for(i = 0; i < (int)(length); i++) {				\
   255 	    Uint32 s = *src++;						\
   256 	    Uint32 d = *dst;						\
   257 	    *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)	\
   258 		     + (s & d & 0x00010101);				\
   259 	}								\
   260     } while(0)
   261 
   262 /*
   263  * For 16bpp, we can actually blend two pixels in parallel, if we take
   264  * care to shift before we add, not after.
   265  */
   266 
   267 /* helper: blend a single 16 bit pixel at 50% */
   268 #define BLEND16_50(dst, src, mask)			\
   269     do {						\
   270 	Uint32 s = *src++;				\
   271 	Uint32 d = *dst;				\
   272 	*dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +	\
   273 	                  (s & d & (~mask & 0xffff)));		\
   274     } while(0)
   275 
   276 /* basic 16bpp blender. mask is the pixels to keep when adding. */
   277 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)		\
   278     do {								\
   279 	unsigned n = (length);						\
   280 	Uint16 *src = (Uint16 *)(from);					\
   281 	Uint16 *dst = (Uint16 *)(to);					\
   282 	if(((uintptr_t)src ^ (uintptr_t)dst) & 3) {			\
   283 	    /* source and destination not in phase, blit one by one */	\
   284 	    while(n--)							\
   285 		BLEND16_50(dst, src, mask);				\
   286 	} else {							\
   287 	    if((uintptr_t)src & 3) {					\
   288 		/* first odd pixel */					\
   289 		BLEND16_50(dst, src, mask);				\
   290 		n--;							\
   291 	    }								\
   292 	    for(; n > 1; n -= 2) {					\
   293 		Uint32 s = *(Uint32 *)src;				\
   294 		Uint32 d = *(Uint32 *)dst;				\
   295 		*(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1)	\
   296 		               + ((d & (mask | mask << 16)) >> 1)	\
   297 		               + (s & d & (~(mask | mask << 16)));	\
   298 		src += 2;						\
   299 		dst += 2;						\
   300 	    }								\
   301 	    if(n)							\
   302 		BLEND16_50(dst, src, mask); /* last odd pixel */	\
   303 	}								\
   304     } while(0)
   305 
   306 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)	\
   307     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de)
   308 
   309 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)	\
   310     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
   311 
   312 #define CHOOSE_BLIT(blitter, alpha, fmt)				\
   313     do {								\
   314         if(alpha == 255) {						\
   315 	    switch(fmt->BytesPerPixel) {				\
   316 	    case 1: blitter(1, Uint8, OPAQUE_BLIT); break;		\
   317 	    case 2: blitter(2, Uint8, OPAQUE_BLIT); break;		\
   318 	    case 3: blitter(3, Uint8, OPAQUE_BLIT); break;		\
   319 	    case 4: blitter(4, Uint16, OPAQUE_BLIT); break;		\
   320 	    }								\
   321 	} else {							\
   322 	    switch(fmt->BytesPerPixel) {				\
   323 	    case 1:							\
   324 		/* No 8bpp alpha blitting */				\
   325 		break;							\
   326 									\
   327 	    case 2:							\
   328 		switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) {		\
   329 		case 0xffff:						\
   330 		    if(fmt->Gmask == 0x07e0				\
   331 		       || fmt->Rmask == 0x07e0				\
   332 		       || fmt->Bmask == 0x07e0) {			\
   333 			if(alpha == 128)				\
   334 			    blitter(2, Uint8, ALPHA_BLIT16_565_50);	\
   335 			else {						\
   336 			    blitter(2, Uint8, ALPHA_BLIT16_565);	\
   337 			}						\
   338 		    } else						\
   339 			goto general16;					\
   340 		    break;						\
   341 									\
   342 		case 0x7fff:						\
   343 		    if(fmt->Gmask == 0x03e0				\
   344 		       || fmt->Rmask == 0x03e0				\
   345 		       || fmt->Bmask == 0x03e0) {			\
   346 			if(alpha == 128)				\
   347 			    blitter(2, Uint8, ALPHA_BLIT16_555_50);	\
   348 			else {						\
   349 			    blitter(2, Uint8, ALPHA_BLIT16_555);	\
   350 			}						\
   351 			break;						\
   352 		    }							\
   353 		    /* fallthrough */					\
   354 									\
   355 		default:						\
   356 		general16:						\
   357 		    blitter(2, Uint8, ALPHA_BLIT_ANY);			\
   358 		}							\
   359 		break;							\
   360 									\
   361 	    case 3:							\
   362 		blitter(3, Uint8, ALPHA_BLIT_ANY);			\
   363 		break;							\
   364 									\
   365 	    case 4:							\
   366 		if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff	\
   367 		   && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00	\
   368 		       || fmt->Bmask == 0xff00)) {			\
   369 		    if(alpha == 128)					\
   370 			blitter(4, Uint16, ALPHA_BLIT32_888_50);	\
   371 		    else						\
   372 			blitter(4, Uint16, ALPHA_BLIT32_888);		\
   373 		} else							\
   374 		    blitter(4, Uint16, ALPHA_BLIT_ANY);			\
   375 		break;							\
   376 	    }								\
   377 	}								\
   378     } while(0)
   379 
   380 /*
   381  * This takes care of the case when the surface is clipped on the left and/or
   382  * right. Top clipping has already been taken care of.
   383  */
   384 static void
   385 RLEClipBlit(int w, Uint8 * srcbuf, SDL_Surface * dst,
   386             Uint8 * dstbuf, SDL_Rect * srcrect, unsigned alpha)
   387 {
   388     SDL_PixelFormat *fmt = dst->format;
   389 
   390 #define RLECLIPBLIT(bpp, Type, do_blit)					   \
   391     do {								   \
   392 	int linecount = srcrect->h;					   \
   393 	int ofs = 0;							   \
   394 	int left = srcrect->x;						   \
   395 	int right = left + srcrect->w;					   \
   396 	dstbuf -= left * bpp;						   \
   397 	for(;;) {							   \
   398 	    int run;							   \
   399 	    ofs += *(Type *)srcbuf;					   \
   400 	    run = ((Type *)srcbuf)[1];					   \
   401 	    srcbuf += 2 * sizeof(Type);					   \
   402 	    if(run) {							   \
   403 		/* clip to left and right borders */			   \
   404 		if(ofs < right) {					   \
   405 		    int start = 0;					   \
   406 		    int len = run;					   \
   407 		    int startcol;					   \
   408 		    if(left - ofs > 0) {				   \
   409 			start = left - ofs;				   \
   410 			len -= start;					   \
   411 			if(len <= 0)					   \
   412 			    goto nocopy ## bpp ## do_blit;		   \
   413 		    }							   \
   414 		    startcol = ofs + start;				   \
   415 		    if(len > right - startcol)				   \
   416 			len = right - startcol;				   \
   417 		    do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
   418 			    len, bpp, alpha);				   \
   419 		}							   \
   420 	    nocopy ## bpp ## do_blit:					   \
   421 		srcbuf += run * bpp;					   \
   422 		ofs += run;						   \
   423 	    } else if(!ofs)						   \
   424 		break;							   \
   425 	    if(ofs == w) {						   \
   426 		ofs = 0;						   \
   427 		dstbuf += dst->pitch;					   \
   428 		if(!--linecount)					   \
   429 		    break;						   \
   430 	    }								   \
   431 	}								   \
   432     } while(0)
   433 
   434     CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
   435 
   436 #undef RLECLIPBLIT
   437 
   438 }
   439 
   440 
   441 /* blit a colorkeyed RLE surface */
   442 int
   443 SDL_RLEBlit(SDL_Surface * src, SDL_Rect * srcrect,
   444             SDL_Surface * dst, SDL_Rect * dstrect)
   445 {
   446     Uint8 *dstbuf;
   447     Uint8 *srcbuf;
   448     int x, y;
   449     int w = src->w;
   450     unsigned alpha;
   451 
   452     /* Lock the destination if necessary */
   453     if (SDL_MUSTLOCK(dst)) {
   454         if (SDL_LockSurface(dst) < 0) {
   455             return (-1);
   456         }
   457     }
   458 
   459     /* Set up the source and destination pointers */
   460     x = dstrect->x;
   461     y = dstrect->y;
   462     dstbuf = (Uint8 *) dst->pixels
   463         + y * dst->pitch + x * src->format->BytesPerPixel;
   464     srcbuf = (Uint8 *) src->map->data;
   465 
   466     {
   467         /* skip lines at the top if neccessary */
   468         int vskip = srcrect->y;
   469         int ofs = 0;
   470         if (vskip) {
   471 
   472 #define RLESKIP(bpp, Type)			\
   473 		for(;;) {			\
   474 		    int run;			\
   475 		    ofs += *(Type *)srcbuf;	\
   476 		    run = ((Type *)srcbuf)[1];	\
   477 		    srcbuf += sizeof(Type) * 2;	\
   478 		    if(run) {			\
   479 			srcbuf += run * bpp;	\
   480 			ofs += run;		\
   481 		    } else if(!ofs)		\
   482 			goto done;		\
   483 		    if(ofs == w) {		\
   484 			ofs = 0;		\
   485 			if(!--vskip)		\
   486 			    break;		\
   487 		    }				\
   488 		}
   489 
   490             switch (src->format->BytesPerPixel) {
   491             case 1:
   492                 RLESKIP(1, Uint8);
   493                 break;
   494             case 2:
   495                 RLESKIP(2, Uint8);
   496                 break;
   497             case 3:
   498                 RLESKIP(3, Uint8);
   499                 break;
   500             case 4:
   501                 RLESKIP(4, Uint16);
   502                 break;
   503             }
   504 
   505 #undef RLESKIP
   506 
   507         }
   508     }
   509 
   510     alpha = src->map->info.a;
   511     /* if left or right edge clipping needed, call clip blit */
   512     if (srcrect->x || srcrect->w != src->w) {
   513         RLEClipBlit(w, srcbuf, dst, dstbuf, srcrect, alpha);
   514     } else {
   515         SDL_PixelFormat *fmt = src->format;
   516 
   517 #define RLEBLIT(bpp, Type, do_blit)					      \
   518 	    do {							      \
   519 		int linecount = srcrect->h;				      \
   520 		int ofs = 0;						      \
   521 		for(;;) {						      \
   522 		    unsigned run;					      \
   523 		    ofs += *(Type *)srcbuf;				      \
   524 		    run = ((Type *)srcbuf)[1];				      \
   525 		    srcbuf += 2 * sizeof(Type);				      \
   526 		    if(run) {						      \
   527 			do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
   528 			srcbuf += run * bpp;				      \
   529 			ofs += run;					      \
   530 		    } else if(!ofs)					      \
   531 			break;						      \
   532 		    if(ofs == w) {					      \
   533 			ofs = 0;					      \
   534 			dstbuf += dst->pitch;				      \
   535 			if(!--linecount)				      \
   536 			    break;					      \
   537 		    }							      \
   538 		}							      \
   539 	    } while(0)
   540 
   541         CHOOSE_BLIT(RLEBLIT, alpha, fmt);
   542 
   543 #undef RLEBLIT
   544     }
   545 
   546   done:
   547     /* Unlock the destination if necessary */
   548     if (SDL_MUSTLOCK(dst)) {
   549         SDL_UnlockSurface(dst);
   550     }
   551     return (0);
   552 }
   553 
   554 #undef OPAQUE_BLIT
   555 
   556 /*
   557  * Per-pixel blitting macros for translucent pixels:
   558  * These use the same techniques as the per-surface blitting macros
   559  */
   560 
   561 /*
   562  * For 32bpp pixels, we have made sure the alpha is stored in the top
   563  * 8 bits, so proceed as usual
   564  */
   565 #define BLIT_TRANSL_888(src, dst)				\
   566     do {							\
   567         Uint32 s = src;						\
   568 	Uint32 d = dst;						\
   569 	unsigned alpha = s >> 24;				\
   570 	Uint32 s1 = s & 0xff00ff;				\
   571 	Uint32 d1 = d & 0xff00ff;				\
   572 	d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;	\
   573 	s &= 0xff00;						\
   574 	d &= 0xff00;						\
   575 	d = (d + ((s - d) * alpha >> 8)) & 0xff00;		\
   576 	dst = d1 | d | 0xff000000;				\
   577     } while(0)
   578 
   579 /*
   580  * For 16bpp pixels, we have stored the 5 most significant alpha bits in
   581  * bits 5-10. As before, we can process all 3 RGB components at the same time.
   582  */
   583 #define BLIT_TRANSL_565(src, dst)		\
   584     do {					\
   585 	Uint32 s = src;				\
   586 	Uint32 d = dst;				\
   587 	unsigned alpha = (s & 0x3e0) >> 5;	\
   588 	s &= 0x07e0f81f;			\
   589 	d = (d | d << 16) & 0x07e0f81f;		\
   590 	d += (s - d) * alpha >> 5;		\
   591 	d &= 0x07e0f81f;			\
   592 	dst = (Uint16)(d | d >> 16);			\
   593     } while(0)
   594 
   595 #define BLIT_TRANSL_555(src, dst)		\
   596     do {					\
   597 	Uint32 s = src;				\
   598 	Uint32 d = dst;				\
   599 	unsigned alpha = (s & 0x3e0) >> 5;	\
   600 	s &= 0x03e07c1f;			\
   601 	d = (d | d << 16) & 0x03e07c1f;		\
   602 	d += (s - d) * alpha >> 5;		\
   603 	d &= 0x03e07c1f;			\
   604 	dst = (Uint16)(d | d >> 16);			\
   605     } while(0)
   606 
   607 /* used to save the destination format in the encoding. Designed to be
   608    macro-compatible with SDL_PixelFormat but without the unneeded fields */
   609 typedef struct
   610 {
   611     Uint8 BytesPerPixel;
   612     Uint8 padding[3];
   613     Uint32 Rmask;
   614     Uint32 Gmask;
   615     Uint32 Bmask;
   616     Uint32 Amask;
   617     Uint8 Rloss;
   618     Uint8 Gloss;
   619     Uint8 Bloss;
   620     Uint8 Aloss;
   621     Uint8 Rshift;
   622     Uint8 Gshift;
   623     Uint8 Bshift;
   624     Uint8 Ashift;
   625 } RLEDestFormat;
   626 
   627 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
   628 static void
   629 RLEAlphaClipBlit(int w, Uint8 * srcbuf, SDL_Surface * dst,
   630                  Uint8 * dstbuf, SDL_Rect * srcrect)
   631 {
   632     SDL_PixelFormat *df = dst->format;
   633     /*
   634      * clipped blitter: Ptype is the destination pixel type,
   635      * Ctype the translucent count type, and do_blend the macro
   636      * to blend one pixel.
   637      */
   638 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)			  \
   639     do {								  \
   640 	int linecount = srcrect->h;					  \
   641 	int left = srcrect->x;						  \
   642 	int right = left + srcrect->w;					  \
   643 	dstbuf -= left * sizeof(Ptype);					  \
   644 	do {								  \
   645 	    int ofs = 0;						  \
   646 	    /* blit opaque pixels on one line */			  \
   647 	    do {							  \
   648 		unsigned run;						  \
   649 		ofs += ((Ctype *)srcbuf)[0];				  \
   650 		run = ((Ctype *)srcbuf)[1];				  \
   651 		srcbuf += 2 * sizeof(Ctype);				  \
   652 		if(run) {						  \
   653 		    /* clip to left and right borders */		  \
   654 		    int cofs = ofs;					  \
   655 		    int crun = run;					  \
   656 		    if(left - cofs > 0) {				  \
   657 			crun -= left - cofs;				  \
   658 			cofs = left;					  \
   659 		    }							  \
   660 		    if(crun > right - cofs)				  \
   661 			crun = right - cofs;				  \
   662 		    if(crun > 0)					  \
   663 			PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),	  \
   664 				   srcbuf + (cofs - ofs) * sizeof(Ptype), \
   665 				   (unsigned)crun, sizeof(Ptype));	  \
   666 		    srcbuf += run * sizeof(Ptype);			  \
   667 		    ofs += run;						  \
   668 		} else if(!ofs)						  \
   669 		    return;						  \
   670 	    } while(ofs < w);						  \
   671 	    /* skip padding if necessary */				  \
   672 	    if(sizeof(Ptype) == 2)					  \
   673 		srcbuf += (uintptr_t)srcbuf & 2;			  \
   674 	    /* blit translucent pixels on the same line */		  \
   675 	    ofs = 0;							  \
   676 	    do {							  \
   677 		unsigned run;						  \
   678 		ofs += ((Uint16 *)srcbuf)[0];				  \
   679 		run = ((Uint16 *)srcbuf)[1];				  \
   680 		srcbuf += 4;						  \
   681 		if(run) {						  \
   682 		    /* clip to left and right borders */		  \
   683 		    int cofs = ofs;					  \
   684 		    int crun = run;					  \
   685 		    if(left - cofs > 0) {				  \
   686 			crun -= left - cofs;				  \
   687 			cofs = left;					  \
   688 		    }							  \
   689 		    if(crun > right - cofs)				  \
   690 			crun = right - cofs;				  \
   691 		    if(crun > 0) {					  \
   692 			Ptype *dst = (Ptype *)dstbuf + cofs;		  \
   693 			Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);	  \
   694 			int i;						  \
   695 			for(i = 0; i < crun; i++)			  \
   696 			    do_blend(src[i], dst[i]);			  \
   697 		    }							  \
   698 		    srcbuf += run * 4;					  \
   699 		    ofs += run;						  \
   700 		}							  \
   701 	    } while(ofs < w);						  \
   702 	    dstbuf += dst->pitch;					  \
   703 	} while(--linecount);						  \
   704     } while(0)
   705 
   706     switch (df->BytesPerPixel) {
   707     case 2:
   708         if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0)
   709             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
   710         else
   711             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
   712         break;
   713     case 4:
   714         RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
   715         break;
   716     }
   717 }
   718 
   719 /* blit a pixel-alpha RLE surface */
   720 int
   721 SDL_RLEAlphaBlit(SDL_Surface * src, SDL_Rect * srcrect,
   722                  SDL_Surface * dst, SDL_Rect * dstrect)
   723 {
   724     int x, y;
   725     int w = src->w;
   726     Uint8 *srcbuf, *dstbuf;
   727     SDL_PixelFormat *df = dst->format;
   728 
   729     /* Lock the destination if necessary */
   730     if (SDL_MUSTLOCK(dst)) {
   731         if (SDL_LockSurface(dst) < 0) {
   732             return -1;
   733         }
   734     }
   735 
   736     x = dstrect->x;
   737     y = dstrect->y;
   738     dstbuf = (Uint8 *) dst->pixels + y * dst->pitch + x * df->BytesPerPixel;
   739     srcbuf = (Uint8 *) src->map->data + sizeof(RLEDestFormat);
   740 
   741     {
   742         /* skip lines at the top if necessary */
   743         int vskip = srcrect->y;
   744         if (vskip) {
   745             int ofs;
   746             if (df->BytesPerPixel == 2) {
   747                 /* the 16/32 interleaved format */
   748                 do {
   749                     /* skip opaque line */
   750                     ofs = 0;
   751                     do {
   752                         int run;
   753                         ofs += srcbuf[0];
   754                         run = srcbuf[1];
   755                         srcbuf += 2;
   756                         if (run) {
   757                             srcbuf += 2 * run;
   758                             ofs += run;
   759                         } else if (!ofs)
   760                             goto done;
   761                     } while (ofs < w);
   762 
   763                     /* skip padding */
   764                     srcbuf += (uintptr_t) srcbuf & 2;
   765 
   766                     /* skip translucent line */
   767                     ofs = 0;
   768                     do {
   769                         int run;
   770                         ofs += ((Uint16 *) srcbuf)[0];
   771                         run = ((Uint16 *) srcbuf)[1];
   772                         srcbuf += 4 * (run + 1);
   773                         ofs += run;
   774                     } while (ofs < w);
   775                 } while (--vskip);
   776             } else {
   777                 /* the 32/32 interleaved format */
   778                 vskip <<= 1;    /* opaque and translucent have same format */
   779                 do {
   780                     ofs = 0;
   781                     do {
   782                         int run;
   783                         ofs += ((Uint16 *) srcbuf)[0];
   784                         run = ((Uint16 *) srcbuf)[1];
   785                         srcbuf += 4;
   786                         if (run) {
   787                             srcbuf += 4 * run;
   788                             ofs += run;
   789                         } else if (!ofs)
   790                             goto done;
   791                     } while (ofs < w);
   792                 } while (--vskip);
   793             }
   794         }
   795     }
   796 
   797     /* if left or right edge clipping needed, call clip blit */
   798     if (srcrect->x || srcrect->w != src->w) {
   799         RLEAlphaClipBlit(w, srcbuf, dst, dstbuf, srcrect);
   800     } else {
   801 
   802         /*
   803          * non-clipped blitter. Ptype is the destination pixel type,
   804          * Ctype the translucent count type, and do_blend the
   805          * macro to blend one pixel.
   806          */
   807 #define RLEALPHABLIT(Ptype, Ctype, do_blend)				 \
   808 	do {								 \
   809 	    int linecount = srcrect->h;					 \
   810 	    do {							 \
   811 		int ofs = 0;						 \
   812 		/* blit opaque pixels on one line */			 \
   813 		do {							 \
   814 		    unsigned run;					 \
   815 		    ofs += ((Ctype *)srcbuf)[0];			 \
   816 		    run = ((Ctype *)srcbuf)[1];				 \
   817 		    srcbuf += 2 * sizeof(Ctype);			 \
   818 		    if(run) {						 \
   819 			PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
   820 				   run, sizeof(Ptype));			 \
   821 			srcbuf += run * sizeof(Ptype);			 \
   822 			ofs += run;					 \
   823 		    } else if(!ofs)					 \
   824 			goto done;					 \
   825 		} while(ofs < w);					 \
   826 		/* skip padding if necessary */				 \
   827 		if(sizeof(Ptype) == 2)					 \
   828 		    srcbuf += (uintptr_t)srcbuf & 2;		 	 \
   829 		/* blit translucent pixels on the same line */		 \
   830 		ofs = 0;						 \
   831 		do {							 \
   832 		    unsigned run;					 \
   833 		    ofs += ((Uint16 *)srcbuf)[0];			 \
   834 		    run = ((Uint16 *)srcbuf)[1];			 \
   835 		    srcbuf += 4;					 \
   836 		    if(run) {						 \
   837 			Ptype *dst = (Ptype *)dstbuf + ofs;		 \
   838 			unsigned i;					 \
   839 			for(i = 0; i < run; i++) {			 \
   840 			    Uint32 src = *(Uint32 *)srcbuf;		 \
   841 			    do_blend(src, *dst);			 \
   842 			    srcbuf += 4;				 \
   843 			    dst++;					 \
   844 			}						 \
   845 			ofs += run;					 \
   846 		    }							 \
   847 		} while(ofs < w);					 \
   848 		dstbuf += dst->pitch;					 \
   849 	    } while(--linecount);					 \
   850 	} while(0)
   851 
   852         switch (df->BytesPerPixel) {
   853         case 2:
   854             if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0
   855                 || df->Bmask == 0x07e0)
   856                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
   857             else
   858                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
   859             break;
   860         case 4:
   861             RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
   862             break;
   863         }
   864     }
   865 
   866   done:
   867     /* Unlock the destination if necessary */
   868     if (SDL_MUSTLOCK(dst)) {
   869         SDL_UnlockSurface(dst);
   870     }
   871     return 0;
   872 }
   873 
   874 /*
   875  * Auxiliary functions:
   876  * The encoding functions take 32bpp rgb + a, and
   877  * return the number of bytes copied to the destination.
   878  * The decoding functions copy to 32bpp rgb + a, and
   879  * return the number of bytes copied from the source.
   880  * These are only used in the encoder and un-RLE code and are therefore not
   881  * highly optimised.
   882  */
   883 
   884 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
   885 static int
   886 copy_opaque_16(void *dst, Uint32 * src, int n,
   887                SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   888 {
   889     int i;
   890     Uint16 *d = dst;
   891     for (i = 0; i < n; i++) {
   892         unsigned r, g, b;
   893         RGB_FROM_PIXEL(*src, sfmt, r, g, b);
   894         PIXEL_FROM_RGB(*d, dfmt, r, g, b);
   895         src++;
   896         d++;
   897     }
   898     return n * 2;
   899 }
   900 
   901 /* decode opaque pixels from 16bpp to 32bpp rgb + a */
   902 static int
   903 uncopy_opaque_16(Uint32 * dst, void *src, int n,
   904                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   905 {
   906     int i;
   907     Uint16 *s = src;
   908     unsigned alpha = dfmt->Amask ? 255 : 0;
   909     for (i = 0; i < n; i++) {
   910         unsigned r, g, b;
   911         RGB_FROM_PIXEL(*s, sfmt, r, g, b);
   912         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
   913         s++;
   914         dst++;
   915     }
   916     return n * 2;
   917 }
   918 
   919 
   920 
   921 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
   922 static int
   923 copy_transl_565(void *dst, Uint32 * src, int n,
   924                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   925 {
   926     int i;
   927     Uint32 *d = dst;
   928     for (i = 0; i < n; i++) {
   929         unsigned r, g, b, a;
   930         Uint16 pix;
   931         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   932         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   933         *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
   934         src++;
   935         d++;
   936     }
   937     return n * 4;
   938 }
   939 
   940 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
   941 static int
   942 copy_transl_555(void *dst, Uint32 * src, int n,
   943                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   944 {
   945     int i;
   946     Uint32 *d = dst;
   947     for (i = 0; i < n; i++) {
   948         unsigned r, g, b, a;
   949         Uint16 pix;
   950         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   951         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   952         *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
   953         src++;
   954         d++;
   955     }
   956     return n * 4;
   957 }
   958 
   959 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
   960 static int
   961 uncopy_transl_16(Uint32 * dst, void *src, int n,
   962                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   963 {
   964     int i;
   965     Uint32 *s = src;
   966     for (i = 0; i < n; i++) {
   967         unsigned r, g, b, a;
   968         Uint32 pix = *s++;
   969         a = (pix & 0x3e0) >> 2;
   970         pix = (pix & ~0x3e0) | pix >> 16;
   971         RGB_FROM_PIXEL(pix, sfmt, r, g, b);
   972         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
   973         dst++;
   974     }
   975     return n * 4;
   976 }
   977 
   978 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   979 static int
   980 copy_32(void *dst, Uint32 * src, int n,
   981         SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   982 {
   983     int i;
   984     Uint32 *d = dst;
   985     for (i = 0; i < n; i++) {
   986         unsigned r, g, b, a;
   987         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   988         PIXEL_FROM_RGBA(*d, dfmt, r, g, b, a);
   989         d++;
   990         src++;
   991     }
   992     return n * 4;
   993 }
   994 
   995 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   996 static int
   997 uncopy_32(Uint32 * dst, void *src, int n,
   998           RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   999 {
  1000     int i;
  1001     Uint32 *s = src;
  1002     for (i = 0; i < n; i++) {
  1003         unsigned r, g, b, a;
  1004         Uint32 pixel = *s++;
  1005         RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
  1006         a = pixel >> 24;
  1007         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
  1008         dst++;
  1009     }
  1010     return n * 4;
  1011 }
  1012 
  1013 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
  1014 
  1015 #define ISTRANSL(pixel, fmt)	\
  1016     ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
  1017 
  1018 /* convert surface to be quickly alpha-blittable onto dest, if possible */
  1019 static int
  1020 RLEAlphaSurface(SDL_Surface * surface)
  1021 {
  1022     SDL_Surface *dest;
  1023     SDL_PixelFormat *df;
  1024     int maxsize = 0;
  1025     int max_opaque_run;
  1026     int max_transl_run = 65535;
  1027     unsigned masksum;
  1028     Uint8 *rlebuf, *dst;
  1029     int (*copy_opaque) (void *, Uint32 *, int,
  1030                         SDL_PixelFormat *, SDL_PixelFormat *);
  1031     int (*copy_transl) (void *, Uint32 *, int,
  1032                         SDL_PixelFormat *, SDL_PixelFormat *);
  1033 
  1034     dest = surface->map->dst;
  1035     if (!dest)
  1036         return -1;
  1037     df = dest->format;
  1038     if (surface->format->BitsPerPixel != 32)
  1039         return -1;              /* only 32bpp source supported */
  1040 
  1041     /* find out whether the destination is one we support,
  1042        and determine the max size of the encoded result */
  1043     masksum = df->Rmask | df->Gmask | df->Bmask;
  1044     switch (df->BytesPerPixel) {
  1045     case 2:
  1046         /* 16bpp: only support 565 and 555 formats */
  1047         switch (masksum) {
  1048         case 0xffff:
  1049             if (df->Gmask == 0x07e0
  1050                 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
  1051                 copy_opaque = copy_opaque_16;
  1052                 copy_transl = copy_transl_565;
  1053             } else
  1054                 return -1;
  1055             break;
  1056         case 0x7fff:
  1057             if (df->Gmask == 0x03e0
  1058                 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
  1059                 copy_opaque = copy_opaque_16;
  1060                 copy_transl = copy_transl_555;
  1061             } else
  1062                 return -1;
  1063             break;
  1064         default:
  1065             return -1;
  1066         }
  1067         max_opaque_run = 255;   /* runs stored as bytes */
  1068 
  1069         /* worst case is alternating opaque and translucent pixels,
  1070            with room for alignment padding between lines */
  1071         maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
  1072         break;
  1073     case 4:
  1074         if (masksum != 0x00ffffff)
  1075             return -1;          /* requires unused high byte */
  1076         copy_opaque = copy_32;
  1077         copy_transl = copy_32;
  1078         max_opaque_run = 255;   /* runs stored as short ints */
  1079 
  1080         /* worst case is alternating opaque and translucent pixels */
  1081         maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
  1082         break;
  1083     default:
  1084         return -1;              /* anything else unsupported right now */
  1085     }
  1086 
  1087     maxsize += sizeof(RLEDestFormat);
  1088     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1089     if (!rlebuf) {
  1090         SDL_OutOfMemory();
  1091         return -1;
  1092     }
  1093     {
  1094         /* save the destination format so we can undo the encoding later */
  1095         RLEDestFormat *r = (RLEDestFormat *) rlebuf;
  1096         r->BytesPerPixel = df->BytesPerPixel;
  1097         r->Rmask = df->Rmask;
  1098         r->Gmask = df->Gmask;
  1099         r->Bmask = df->Bmask;
  1100         r->Amask = df->Amask;
  1101         r->Rloss = df->Rloss;
  1102         r->Gloss = df->Gloss;
  1103         r->Bloss = df->Bloss;
  1104         r->Aloss = df->Aloss;
  1105         r->Rshift = df->Rshift;
  1106         r->Gshift = df->Gshift;
  1107         r->Bshift = df->Bshift;
  1108         r->Ashift = df->Ashift;
  1109     }
  1110     dst = rlebuf + sizeof(RLEDestFormat);
  1111 
  1112     /* Do the actual encoding */
  1113     {
  1114         int x, y;
  1115         int h = surface->h, w = surface->w;
  1116         SDL_PixelFormat *sf = surface->format;
  1117         Uint32 *src = (Uint32 *) surface->pixels;
  1118         Uint8 *lastline = dst;  /* end of last non-blank line */
  1119 
  1120         /* opaque counts are 8 or 16 bits, depending on target depth */
  1121 #define ADD_OPAQUE_COUNTS(n, m)			\
  1122 	if(df->BytesPerPixel == 4) {		\
  1123 	    ((Uint16 *)dst)[0] = n;		\
  1124 	    ((Uint16 *)dst)[1] = m;		\
  1125 	    dst += 4;				\
  1126 	} else {				\
  1127 	    dst[0] = n;				\
  1128 	    dst[1] = m;				\
  1129 	    dst += 2;				\
  1130 	}
  1131 
  1132         /* translucent counts are always 16 bit */
  1133 #define ADD_TRANSL_COUNTS(n, m)		\
  1134 	(((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
  1135 
  1136         for (y = 0; y < h; y++) {
  1137             int runstart, skipstart;
  1138             int blankline = 0;
  1139             /* First encode all opaque pixels of a scan line */
  1140             x = 0;
  1141             do {
  1142                 int run, skip, len;
  1143                 skipstart = x;
  1144                 while (x < w && !ISOPAQUE(src[x], sf))
  1145                     x++;
  1146                 runstart = x;
  1147                 while (x < w && ISOPAQUE(src[x], sf))
  1148                     x++;
  1149                 skip = runstart - skipstart;
  1150                 if (skip == w)
  1151                     blankline = 1;
  1152                 run = x - runstart;
  1153                 while (skip > max_opaque_run) {
  1154                     ADD_OPAQUE_COUNTS(max_opaque_run, 0);
  1155                     skip -= max_opaque_run;
  1156                 }
  1157                 len = MIN(run, max_opaque_run);
  1158                 ADD_OPAQUE_COUNTS(skip, len);
  1159                 dst += copy_opaque(dst, src + runstart, len, sf, df);
  1160                 runstart += len;
  1161                 run -= len;
  1162                 while (run) {
  1163                     len = MIN(run, max_opaque_run);
  1164                     ADD_OPAQUE_COUNTS(0, len);
  1165                     dst += copy_opaque(dst, src + runstart, len, sf, df);
  1166                     runstart += len;
  1167                     run -= len;
  1168                 }
  1169             } while (x < w);
  1170 
  1171             /* Make sure the next output address is 32-bit aligned */
  1172             dst += (uintptr_t) dst & 2;
  1173 
  1174             /* Next, encode all translucent pixels of the same scan line */
  1175             x = 0;
  1176             do {
  1177                 int run, skip, len;
  1178                 skipstart = x;
  1179                 while (x < w && !ISTRANSL(src[x], sf))
  1180                     x++;
  1181                 runstart = x;
  1182                 while (x < w && ISTRANSL(src[x], sf))
  1183                     x++;
  1184                 skip = runstart - skipstart;
  1185                 blankline &= (skip == w);
  1186                 run = x - runstart;
  1187                 while (skip > max_transl_run) {
  1188                     ADD_TRANSL_COUNTS(max_transl_run, 0);
  1189                     skip -= max_transl_run;
  1190                 }
  1191                 len = MIN(run, max_transl_run);
  1192                 ADD_TRANSL_COUNTS(skip, len);
  1193                 dst += copy_transl(dst, src + runstart, len, sf, df);
  1194                 runstart += len;
  1195                 run -= len;
  1196                 while (run) {
  1197                     len = MIN(run, max_transl_run);
  1198                     ADD_TRANSL_COUNTS(0, len);
  1199                     dst += copy_transl(dst, src + runstart, len, sf, df);
  1200                     runstart += len;
  1201                     run -= len;
  1202                 }
  1203                 if (!blankline)
  1204                     lastline = dst;
  1205             } while (x < w);
  1206 
  1207             src += surface->pitch >> 2;
  1208         }
  1209         dst = lastline;         /* back up past trailing blank lines */
  1210         ADD_OPAQUE_COUNTS(0, 0);
  1211     }
  1212 
  1213 #undef ADD_OPAQUE_COUNTS
  1214 #undef ADD_TRANSL_COUNTS
  1215 
  1216     /* Now that we have it encoded, release the original pixels */
  1217     if (!(surface->flags & SDL_PREALLOC)) {
  1218         SDL_free(surface->pixels);
  1219         surface->pixels = NULL;
  1220     }
  1221 
  1222     /* realloc the buffer to release unused memory */
  1223     {
  1224         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1225         if (!p)
  1226             p = rlebuf;
  1227         surface->map->data = p;
  1228     }
  1229 
  1230     return 0;
  1231 }
  1232 
  1233 static Uint32
  1234 getpix_8(Uint8 * srcbuf)
  1235 {
  1236     return *srcbuf;
  1237 }
  1238 
  1239 static Uint32
  1240 getpix_16(Uint8 * srcbuf)
  1241 {
  1242     return *(Uint16 *) srcbuf;
  1243 }
  1244 
  1245 static Uint32
  1246 getpix_24(Uint8 * srcbuf)
  1247 {
  1248 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  1249     return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
  1250 #else
  1251     return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
  1252 #endif
  1253 }
  1254 
  1255 static Uint32
  1256 getpix_32(Uint8 * srcbuf)
  1257 {
  1258     return *(Uint32 *) srcbuf;
  1259 }
  1260 
  1261 typedef Uint32(*getpix_func) (Uint8 *);
  1262 
  1263 static const getpix_func getpixes[4] = {
  1264     getpix_8, getpix_16, getpix_24, getpix_32
  1265 };
  1266 
  1267 static int
  1268 RLEColorkeySurface(SDL_Surface * surface)
  1269 {
  1270     Uint8 *rlebuf, *dst;
  1271     int maxn;
  1272     int y;
  1273     Uint8 *srcbuf, *lastline;
  1274     int maxsize = 0;
  1275     int bpp = surface->format->BytesPerPixel;
  1276     getpix_func getpix;
  1277     Uint32 ckey, rgbmask;
  1278     int w, h;
  1279 
  1280     /* calculate the worst case size for the compressed surface */
  1281     switch (bpp) {
  1282     case 1:
  1283         /* worst case is alternating opaque and transparent pixels,
  1284            starting with an opaque pixel */
  1285         maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
  1286         break;
  1287     case 2:
  1288     case 3:
  1289         /* worst case is solid runs, at most 255 pixels wide */
  1290         maxsize = surface->h * (2 * (surface->w / 255 + 1)
  1291                                 + surface->w * bpp) + 2;
  1292         break;
  1293     case 4:
  1294         /* worst case is solid runs, at most 65535 pixels wide */
  1295         maxsize = surface->h * (4 * (surface->w / 65535 + 1)
  1296                                 + surface->w * 4) + 4;
  1297         break;
  1298     }
  1299 
  1300     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1301     if (rlebuf == NULL) {
  1302         SDL_OutOfMemory();
  1303         return (-1);
  1304     }
  1305 
  1306     /* Set up the conversion */
  1307     srcbuf = (Uint8 *) surface->pixels;
  1308     maxn = bpp == 4 ? 65535 : 255;
  1309     dst = rlebuf;
  1310     rgbmask = ~surface->format->Amask;
  1311     ckey = surface->map->info.colorkey & rgbmask;
  1312     lastline = dst;
  1313     getpix = getpixes[bpp - 1];
  1314     w = surface->w;
  1315     h = surface->h;
  1316 
  1317 #define ADD_COUNTS(n, m)			\
  1318 	if(bpp == 4) {				\
  1319 	    ((Uint16 *)dst)[0] = n;		\
  1320 	    ((Uint16 *)dst)[1] = m;		\
  1321 	    dst += 4;				\
  1322 	} else {				\
  1323 	    dst[0] = n;				\
  1324 	    dst[1] = m;				\
  1325 	    dst += 2;				\
  1326 	}
  1327 
  1328     for (y = 0; y < h; y++) {
  1329         int x = 0;
  1330         int blankline = 0;
  1331         do {
  1332             int run, skip, len;
  1333             int runstart;
  1334             int skipstart = x;
  1335 
  1336             /* find run of transparent, then opaque pixels */
  1337             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
  1338                 x++;
  1339             runstart = x;
  1340             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
  1341                 x++;
  1342             skip = runstart - skipstart;
  1343             if (skip == w)
  1344                 blankline = 1;
  1345             run = x - runstart;
  1346 
  1347             /* encode segment */
  1348             while (skip > maxn) {
  1349                 ADD_COUNTS(maxn, 0);
  1350                 skip -= maxn;
  1351             }
  1352             len = MIN(run, maxn);
  1353             ADD_COUNTS(skip, len);
  1354             SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1355             dst += len * bpp;
  1356             run -= len;
  1357             runstart += len;
  1358             while (run) {
  1359                 len = MIN(run, maxn);
  1360                 ADD_COUNTS(0, len);
  1361                 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1362                 dst += len * bpp;
  1363                 runstart += len;
  1364                 run -= len;
  1365             }
  1366             if (!blankline)
  1367                 lastline = dst;
  1368         } while (x < w);
  1369 
  1370         srcbuf += surface->pitch;
  1371     }
  1372     dst = lastline;             /* back up bast trailing blank lines */
  1373     ADD_COUNTS(0, 0);
  1374 
  1375 #undef ADD_COUNTS
  1376 
  1377     /* Now that we have it encoded, release the original pixels */
  1378     if (!(surface->flags & SDL_PREALLOC)) {
  1379         SDL_free(surface->pixels);
  1380         surface->pixels = NULL;
  1381     }
  1382 
  1383     /* realloc the buffer to release unused memory */
  1384     {
  1385         /* If realloc returns NULL, the original block is left intact */
  1386         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1387         if (!p)
  1388             p = rlebuf;
  1389         surface->map->data = p;
  1390     }
  1391 
  1392     return (0);
  1393 }
  1394 
  1395 int
  1396 SDL_RLESurface(SDL_Surface * surface)
  1397 {
  1398     int flags;
  1399 
  1400     /* Clear any previous RLE conversion */
  1401     if ((surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL) {
  1402         SDL_UnRLESurface(surface, 1);
  1403     }
  1404 
  1405     /* We don't support RLE encoding of bitmaps */
  1406     if (surface->format->BitsPerPixel < 8) {
  1407         return -1;
  1408     }
  1409 
  1410     /* Make sure the pixels are available */
  1411     if (!surface->pixels) {
  1412         return -1;
  1413     }
  1414 
  1415     /* If we don't have colorkey or blending, nothing to do... */
  1416     flags = surface->map->info.flags;
  1417     if (!(flags & (SDL_COPY_COLORKEY | SDL_COPY_BLEND))) {
  1418         return -1;
  1419     }
  1420 
  1421     /* Pass on combinations not supported */
  1422     if ((flags & SDL_COPY_MODULATE_COLOR) ||
  1423         ((flags & SDL_COPY_MODULATE_ALPHA) && surface->format->Amask) ||
  1424         (flags & (SDL_COPY_ADD | SDL_COPY_MOD)) ||
  1425         (flags & SDL_COPY_NEAREST)) {
  1426         return -1;
  1427     }
  1428 
  1429     /* Encode and set up the blit */
  1430     if (!surface->format->Amask || !(flags & SDL_COPY_BLEND)) {
  1431         if (!surface->map->identity) {
  1432             return -1;
  1433         }
  1434         if (RLEColorkeySurface(surface) < 0) {
  1435             return -1;
  1436         }
  1437         surface->map->blit = SDL_RLEBlit;
  1438         surface->map->info.flags |= SDL_COPY_RLE_COLORKEY;
  1439     } else {
  1440         if (RLEAlphaSurface(surface) < 0) {
  1441             return -1;
  1442         }
  1443         surface->map->blit = SDL_RLEAlphaBlit;
  1444         surface->map->info.flags |= SDL_COPY_RLE_ALPHAKEY;
  1445     }
  1446 
  1447     /* The surface is now accelerated */
  1448     surface->flags |= SDL_RLEACCEL;
  1449 
  1450     return (0);
  1451 }
  1452 
  1453 /*
  1454  * Un-RLE a surface with pixel alpha
  1455  * This may not give back exactly the image before RLE-encoding; all
  1456  * completely transparent pixels will be lost, and colour and alpha depth
  1457  * may have been reduced (when encoding for 16bpp targets).
  1458  */
  1459 static SDL_bool
  1460 UnRLEAlpha(SDL_Surface * surface)
  1461 {
  1462     Uint8 *srcbuf;
  1463     Uint32 *dst;
  1464     SDL_PixelFormat *sf = surface->format;
  1465     RLEDestFormat *df = surface->map->data;
  1466     int (*uncopy_opaque) (Uint32 *, void *, int,
  1467                           RLEDestFormat *, SDL_PixelFormat *);
  1468     int (*uncopy_transl) (Uint32 *, void *, int,
  1469                           RLEDestFormat *, SDL_PixelFormat *);
  1470     int w = surface->w;
  1471     int bpp = df->BytesPerPixel;
  1472 
  1473     if (bpp == 2) {
  1474         uncopy_opaque = uncopy_opaque_16;
  1475         uncopy_transl = uncopy_transl_16;
  1476     } else {
  1477         uncopy_opaque = uncopy_transl = uncopy_32;
  1478     }
  1479 
  1480     surface->pixels = SDL_malloc(surface->h * surface->pitch);
  1481     if (!surface->pixels) {
  1482         return (SDL_FALSE);
  1483     }
  1484     /* fill background with transparent pixels */
  1485     SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
  1486 
  1487     dst = surface->pixels;
  1488     srcbuf = (Uint8 *) (df + 1);
  1489     for (;;) {
  1490         /* copy opaque pixels */
  1491         int ofs = 0;
  1492         do {
  1493             unsigned run;
  1494             if (bpp == 2) {
  1495                 ofs += srcbuf[0];
  1496                 run = srcbuf[1];
  1497                 srcbuf += 2;
  1498             } else {
  1499                 ofs += ((Uint16 *) srcbuf)[0];
  1500                 run = ((Uint16 *) srcbuf)[1];
  1501                 srcbuf += 4;
  1502             }
  1503             if (run) {
  1504                 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
  1505                 ofs += run;
  1506             } else if (!ofs)
  1507                 return (SDL_TRUE);
  1508         } while (ofs < w);
  1509 
  1510         /* skip padding if needed */
  1511         if (bpp == 2)
  1512             srcbuf += (uintptr_t) srcbuf & 2;
  1513 
  1514         /* copy translucent pixels */
  1515         ofs = 0;
  1516         do {
  1517             unsigned run;
  1518             ofs += ((Uint16 *) srcbuf)[0];
  1519             run = ((Uint16 *) srcbuf)[1];
  1520             srcbuf += 4;
  1521             if (run) {
  1522                 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
  1523                 ofs += run;
  1524             }
  1525         } while (ofs < w);
  1526         dst += surface->pitch >> 2;
  1527     }
  1528     /* Make the compiler happy */
  1529     return (SDL_TRUE);
  1530 }
  1531 
  1532 void
  1533 SDL_UnRLESurface(SDL_Surface * surface, int recode)
  1534 {
  1535     if (surface->flags & SDL_RLEACCEL) {
  1536         surface->flags &= ~SDL_RLEACCEL;
  1537 
  1538         if (recode && !(surface->flags & SDL_PREALLOC)) {
  1539             if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
  1540                 SDL_Rect full;
  1541 
  1542                 /* re-create the original surface */
  1543                 surface->pixels = SDL_malloc(surface->h * surface->pitch);
  1544                 if (!surface->pixels) {
  1545                     /* Oh crap... */
  1546                     surface->flags |= SDL_RLEACCEL;
  1547                     return;
  1548                 }
  1549 
  1550                 /* fill it with the background colour */
  1551                 SDL_FillRect(surface, NULL, surface->map->info.colorkey);
  1552 
  1553                 /* now render the encoded surface */
  1554                 full.x = full.y = 0;
  1555                 full.w = surface->w;
  1556                 full.h = surface->h;
  1557                 SDL_RLEBlit(surface, &full, surface, &full);
  1558             } else {
  1559                 if (!UnRLEAlpha(surface)) {
  1560                     /* Oh crap... */
  1561                     surface->flags |= SDL_RLEACCEL;
  1562                     return;
  1563                 }
  1564             }
  1565         }
  1566         surface->map->info.flags &=
  1567             ~(SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY);
  1568 
  1569         if (surface->map->data) {
  1570             SDL_free(surface->map->data);
  1571             surface->map->data = NULL;
  1572         }
  1573     }
  1574 }
  1575 
  1576 /* vi: set ts=4 sw=4 expandtab: */