src/video/SDL_RLEaccel.c
author Sam Lantinga <slouken@libsdl.org>
Sat, 31 Dec 2011 09:28:07 -0500
changeset 6138 4c64952a58fb
parent 5631 3e1cbc6d7cad
child 6389 43a190ad60a7
permissions -rwxr-xr-x
Happy New Year!
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2012 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 /*
    24  * RLE encoding for software colorkey and alpha-channel acceleration
    25  *
    26  * Original version by Sam Lantinga
    27  *
    28  * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and
    29  * decoder. Added per-surface alpha blitter. Added per-pixel alpha
    30  * format, encoder and blitter.
    31  *
    32  * Many thanks to Xark and johns for hints, benchmarks and useful comments
    33  * leading to this code.
    34  *
    35  * Welcome to Macro Mayhem.
    36  */
    37 
    38 /*
    39  * The encoding translates the image data to a stream of segments of the form
    40  *
    41  * <skip> <run> <data>
    42  *
    43  * where <skip> is the number of transparent pixels to skip,
    44  *       <run>  is the number of opaque pixels to blit,
    45  * and   <data> are the pixels themselves.
    46  *
    47  * This basic structure is used both for colorkeyed surfaces, used for simple
    48  * binary transparency and for per-surface alpha blending, and for surfaces
    49  * with per-pixel alpha. The details differ, however:
    50  *
    51  * Encoding of colorkeyed surfaces:
    52  *
    53  *   Encoded pixels always have the same format as the target surface.
    54  *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
    55  *   where they are 16 bit. This makes the pixel data aligned at all times.
    56  *   Segments never wrap around from one scan line to the next.
    57  *
    58  *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
    59  *   beginning of a line.
    60  *
    61  * Encoding of surfaces with per-pixel alpha:
    62  *
    63  *   The sequence begins with a struct RLEDestFormat describing the target
    64  *   pixel format, to provide reliable un-encoding.
    65  *
    66  *   Each scan line is encoded twice: First all completely opaque pixels,
    67  *   encoded in the target format as described above, and then all
    68  *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
    69  *   in the following 32-bit format:
    70  *
    71  *   For 32-bit targets, each pixel has the target RGB format but with
    72  *   the alpha value occupying the highest 8 bits. The <skip> and <run>
    73  *   counts are 16 bit.
    74  * 
    75  *   For 16-bit targets, each pixel has the target RGB format, but with
    76  *   the middle component (usually green) shifted 16 steps to the left,
    77  *   and the hole filled with the 5 most significant bits of the alpha value.
    78  *   i.e. if the target has the format         rrrrrggggggbbbbb,
    79  *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
    80  *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
    81  *   for the translucent lines. Two padding bytes may be inserted
    82  *   before each translucent line to keep them 32-bit aligned.
    83  *
    84  *   The end of the sequence is marked by a zero <skip>,<run> pair at the
    85  *   beginning of an opaque line.
    86  */
    87 
    88 #include "SDL_video.h"
    89 #include "SDL_sysvideo.h"
    90 #include "SDL_blit.h"
    91 #include "SDL_RLEaccel_c.h"
    92 
    93 #ifndef MAX
    94 #define MAX(a, b) ((a) > (b) ? (a) : (b))
    95 #endif
    96 #ifndef MIN
    97 #define MIN(a, b) ((a) < (b) ? (a) : (b))
    98 #endif
    99 
   100 #define PIXEL_COPY(to, from, len, bpp)			\
   101 do {							\
   102     if(bpp == 4) {					\
   103 	SDL_memcpy4(to, from, (size_t)(len));		\
   104     } else {						\
   105 	SDL_memcpy(to, from, (size_t)(len) * (bpp));	\
   106     }							\
   107 } while(0)
   108 
   109 /*
   110  * Various colorkey blit methods, for opaque and per-surface alpha
   111  */
   112 
   113 #define OPAQUE_BLIT(to, from, length, bpp, alpha)	\
   114     PIXEL_COPY(to, from, length, bpp)
   115 
   116 /*
   117  * For 32bpp pixels on the form 0x00rrggbb:
   118  * If we treat the middle component separately, we can process the two
   119  * remaining in parallel. This is safe to do because of the gap to the left
   120  * of each component, so the bits from the multiplication don't collide.
   121  * This can be used for any RGB permutation of course.
   122  */
   123 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha)		\
   124     do {							\
   125         int i;							\
   126 	Uint32 *src = (Uint32 *)(from);				\
   127 	Uint32 *dst = (Uint32 *)(to);				\
   128 	for(i = 0; i < (int)(length); i++) {			\
   129 	    Uint32 s = *src++;					\
   130 	    Uint32 d = *dst;					\
   131 	    Uint32 s1 = s & 0xff00ff;				\
   132 	    Uint32 d1 = d & 0xff00ff;				\
   133 	    d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;	\
   134 	    s &= 0xff00;					\
   135 	    d &= 0xff00;					\
   136 	    d = (d + ((s - d) * alpha >> 8)) & 0xff00;		\
   137 	    *dst++ = d1 | d;					\
   138 	}							\
   139     } while(0)
   140 
   141 /*
   142  * For 16bpp pixels we can go a step further: put the middle component
   143  * in the high 16 bits of a 32 bit word, and process all three RGB
   144  * components at the same time. Since the smallest gap is here just
   145  * 5 bits, we have to scale alpha down to 5 bits as well.
   146  */
   147 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha)	\
   148     do {						\
   149         int i;						\
   150 	Uint16 *src = (Uint16 *)(from);			\
   151 	Uint16 *dst = (Uint16 *)(to);			\
   152 	Uint32 ALPHA = alpha >> 3;			\
   153 	for(i = 0; i < (int)(length); i++) {		\
   154 	    Uint32 s = *src++;				\
   155 	    Uint32 d = *dst;				\
   156 	    s = (s | s << 16) & 0x07e0f81f;		\
   157 	    d = (d | d << 16) & 0x07e0f81f;		\
   158 	    d += (s - d) * ALPHA >> 5;			\
   159 	    d &= 0x07e0f81f;				\
   160 	    *dst++ = (Uint16)(d | d >> 16);			\
   161 	}						\
   162     } while(0)
   163 
   164 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha)	\
   165     do {						\
   166         int i;						\
   167 	Uint16 *src = (Uint16 *)(from);			\
   168 	Uint16 *dst = (Uint16 *)(to);			\
   169 	Uint32 ALPHA = alpha >> 3;			\
   170 	for(i = 0; i < (int)(length); i++) {		\
   171 	    Uint32 s = *src++;				\
   172 	    Uint32 d = *dst;				\
   173 	    s = (s | s << 16) & 0x03e07c1f;		\
   174 	    d = (d | d << 16) & 0x03e07c1f;		\
   175 	    d += (s - d) * ALPHA >> 5;			\
   176 	    d &= 0x03e07c1f;				\
   177 	    *dst++ = (Uint16)(d | d >> 16);			\
   178 	}						\
   179     } while(0)
   180 
   181 /*
   182  * The general slow catch-all function, for remaining depths and formats
   183  */
   184 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)			\
   185     do {								\
   186         int i;								\
   187 	Uint8 *src = from;						\
   188 	Uint8 *dst = to;						\
   189 	for(i = 0; i < (int)(length); i++) {				\
   190 	    Uint32 s, d;						\
   191 	    unsigned rs, gs, bs, rd, gd, bd;				\
   192 	    switch(bpp) {						\
   193 	    case 2:							\
   194 		s = *(Uint16 *)src;					\
   195 		d = *(Uint16 *)dst;					\
   196 		break;							\
   197 	    case 3:							\
   198 		if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {			\
   199 		    s = (src[0] << 16) | (src[1] << 8) | src[2];	\
   200 		    d = (dst[0] << 16) | (dst[1] << 8) | dst[2];	\
   201 		} else {						\
   202 		    s = (src[2] << 16) | (src[1] << 8) | src[0];	\
   203 		    d = (dst[2] << 16) | (dst[1] << 8) | dst[0];	\
   204 		}							\
   205 		break;							\
   206 	    case 4:							\
   207 		s = *(Uint32 *)src;					\
   208 		d = *(Uint32 *)dst;					\
   209 		break;							\
   210 	    }								\
   211 	    RGB_FROM_PIXEL(s, fmt, rs, gs, bs);				\
   212 	    RGB_FROM_PIXEL(d, fmt, rd, gd, bd);				\
   213 	    rd += (rs - rd) * alpha >> 8;				\
   214 	    gd += (gs - gd) * alpha >> 8;				\
   215 	    bd += (bs - bd) * alpha >> 8;				\
   216 	    PIXEL_FROM_RGB(d, fmt, rd, gd, bd);				\
   217 	    switch(bpp) {						\
   218 	    case 2:							\
   219 		*(Uint16 *)dst = (Uint16)d;					\
   220 		break;							\
   221 	    case 3:							\
   222 		if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {			\
   223 		    dst[0] = (Uint8)(d >> 16);					\
   224 		    dst[1] = (Uint8)(d >> 8);					\
   225 		    dst[2] = (Uint8)(d);						\
   226 		} else {						\
   227 		    dst[0] = (Uint8)d;						\
   228 		    dst[1] = (Uint8)(d >> 8);					\
   229 		    dst[2] = (Uint8)(d >> 16);					\
   230 		}							\
   231 		break;							\
   232 	    case 4:							\
   233 		*(Uint32 *)dst = d;					\
   234 		break;							\
   235 	    }								\
   236 	    src += bpp;							\
   237 	    dst += bpp;							\
   238 	}								\
   239     } while(0)
   240 
   241 /*
   242  * Special case: 50% alpha (alpha=128)
   243  * This is treated specially because it can be optimized very well, and
   244  * since it is good for many cases of semi-translucency.
   245  * The theory is to do all three components at the same time:
   246  * First zero the lowest bit of each component, which gives us room to
   247  * add them. Then shift right and add the sum of the lowest bits.
   248  */
   249 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)		\
   250     do {								\
   251         int i;								\
   252 	Uint32 *src = (Uint32 *)(from);					\
   253 	Uint32 *dst = (Uint32 *)(to);					\
   254 	for(i = 0; i < (int)(length); i++) {				\
   255 	    Uint32 s = *src++;						\
   256 	    Uint32 d = *dst;						\
   257 	    *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)	\
   258 		     + (s & d & 0x00010101);				\
   259 	}								\
   260     } while(0)
   261 
   262 /*
   263  * For 16bpp, we can actually blend two pixels in parallel, if we take
   264  * care to shift before we add, not after.
   265  */
   266 
   267 /* helper: blend a single 16 bit pixel at 50% */
   268 #define BLEND16_50(dst, src, mask)			\
   269     do {						\
   270 	Uint32 s = *src++;				\
   271 	Uint32 d = *dst;				\
   272 	*dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +	\
   273 	                  (s & d & (~mask & 0xffff)));		\
   274     } while(0)
   275 
   276 /* basic 16bpp blender. mask is the pixels to keep when adding. */
   277 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)		\
   278     do {								\
   279 	unsigned n = (length);						\
   280 	Uint16 *src = (Uint16 *)(from);					\
   281 	Uint16 *dst = (Uint16 *)(to);					\
   282 	if(((uintptr_t)src ^ (uintptr_t)dst) & 3) {			\
   283 	    /* source and destination not in phase, blit one by one */	\
   284 	    while(n--)							\
   285 		BLEND16_50(dst, src, mask);				\
   286 	} else {							\
   287 	    if((uintptr_t)src & 3) {					\
   288 		/* first odd pixel */					\
   289 		BLEND16_50(dst, src, mask);				\
   290 		n--;							\
   291 	    }								\
   292 	    for(; n > 1; n -= 2) {					\
   293 		Uint32 s = *(Uint32 *)src;				\
   294 		Uint32 d = *(Uint32 *)dst;				\
   295 		*(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1)	\
   296 		               + ((d & (mask | mask << 16)) >> 1)	\
   297 		               + (s & d & (~(mask | mask << 16)));	\
   298 		src += 2;						\
   299 		dst += 2;						\
   300 	    }								\
   301 	    if(n)							\
   302 		BLEND16_50(dst, src, mask); /* last odd pixel */	\
   303 	}								\
   304     } while(0)
   305 
   306 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)	\
   307     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de)
   308 
   309 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)	\
   310     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
   311 
   312 #define CHOOSE_BLIT(blitter, alpha, fmt)				\
   313     do {								\
   314         if(alpha == 255) {						\
   315 	    switch(fmt->BytesPerPixel) {				\
   316 	    case 1: blitter(1, Uint8, OPAQUE_BLIT); break;		\
   317 	    case 2: blitter(2, Uint8, OPAQUE_BLIT); break;		\
   318 	    case 3: blitter(3, Uint8, OPAQUE_BLIT); break;		\
   319 	    case 4: blitter(4, Uint16, OPAQUE_BLIT); break;		\
   320 	    }								\
   321 	} else {							\
   322 	    switch(fmt->BytesPerPixel) {				\
   323 	    case 1:							\
   324 		/* No 8bpp alpha blitting */				\
   325 		break;							\
   326 									\
   327 	    case 2:							\
   328 		switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) {		\
   329 		case 0xffff:						\
   330 		    if(fmt->Gmask == 0x07e0				\
   331 		       || fmt->Rmask == 0x07e0				\
   332 		       || fmt->Bmask == 0x07e0) {			\
   333 			if(alpha == 128)				\
   334 			    blitter(2, Uint8, ALPHA_BLIT16_565_50);	\
   335 			else {						\
   336 			    blitter(2, Uint8, ALPHA_BLIT16_565);	\
   337 			}						\
   338 		    } else						\
   339 			goto general16;					\
   340 		    break;						\
   341 									\
   342 		case 0x7fff:						\
   343 		    if(fmt->Gmask == 0x03e0				\
   344 		       || fmt->Rmask == 0x03e0				\
   345 		       || fmt->Bmask == 0x03e0) {			\
   346 			if(alpha == 128)				\
   347 			    blitter(2, Uint8, ALPHA_BLIT16_555_50);	\
   348 			else {						\
   349 			    blitter(2, Uint8, ALPHA_BLIT16_555);	\
   350 			}						\
   351 			break;						\
   352 		    }							\
   353 		    /* fallthrough */					\
   354 									\
   355 		default:						\
   356 		general16:						\
   357 		    blitter(2, Uint8, ALPHA_BLIT_ANY);			\
   358 		}							\
   359 		break;							\
   360 									\
   361 	    case 3:							\
   362 		blitter(3, Uint8, ALPHA_BLIT_ANY);			\
   363 		break;							\
   364 									\
   365 	    case 4:							\
   366 		if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff	\
   367 		   && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00	\
   368 		       || fmt->Bmask == 0xff00)) {			\
   369 		    if(alpha == 128)					\
   370 			blitter(4, Uint16, ALPHA_BLIT32_888_50);	\
   371 		    else						\
   372 			blitter(4, Uint16, ALPHA_BLIT32_888);		\
   373 		} else							\
   374 		    blitter(4, Uint16, ALPHA_BLIT_ANY);			\
   375 		break;							\
   376 	    }								\
   377 	}								\
   378     } while(0)
   379 
   380 /*
   381  * This takes care of the case when the surface is clipped on the left and/or
   382  * right. Top clipping has already been taken care of.
   383  */
   384 static void
   385 RLEClipBlit(int w, Uint8 * srcbuf, SDL_Surface * dst,
   386             Uint8 * dstbuf, SDL_Rect * srcrect, unsigned alpha)
   387 {
   388     SDL_PixelFormat *fmt = dst->format;
   389 
   390 #define RLECLIPBLIT(bpp, Type, do_blit)					   \
   391     do {								   \
   392 	int linecount = srcrect->h;					   \
   393 	int ofs = 0;							   \
   394 	int left = srcrect->x;						   \
   395 	int right = left + srcrect->w;					   \
   396 	dstbuf -= left * bpp;						   \
   397 	for(;;) {							   \
   398 	    int run;							   \
   399 	    ofs += *(Type *)srcbuf;					   \
   400 	    run = ((Type *)srcbuf)[1];					   \
   401 	    srcbuf += 2 * sizeof(Type);					   \
   402 	    if(run) {							   \
   403 		/* clip to left and right borders */			   \
   404 		if(ofs < right) {					   \
   405 		    int start = 0;					   \
   406 		    int len = run;					   \
   407 		    int startcol;					   \
   408 		    if(left - ofs > 0) {				   \
   409 			start = left - ofs;				   \
   410 			len -= start;					   \
   411 			if(len <= 0)					   \
   412 			    goto nocopy ## bpp ## do_blit;		   \
   413 		    }							   \
   414 		    startcol = ofs + start;				   \
   415 		    if(len > right - startcol)				   \
   416 			len = right - startcol;				   \
   417 		    do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
   418 			    len, bpp, alpha);				   \
   419 		}							   \
   420 	    nocopy ## bpp ## do_blit:					   \
   421 		srcbuf += run * bpp;					   \
   422 		ofs += run;						   \
   423 	    } else if(!ofs)						   \
   424 		break;							   \
   425 	    if(ofs == w) {						   \
   426 		ofs = 0;						   \
   427 		dstbuf += dst->pitch;					   \
   428 		if(!--linecount)					   \
   429 		    break;						   \
   430 	    }								   \
   431 	}								   \
   432     } while(0)
   433 
   434     CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
   435 
   436 #undef RLECLIPBLIT
   437 
   438 }
   439 
   440 
   441 /* blit a colorkeyed RLE surface */
   442 int
   443 SDL_RLEBlit(SDL_Surface * src, SDL_Rect * srcrect,
   444             SDL_Surface * dst, SDL_Rect * dstrect)
   445 {
   446     Uint8 *dstbuf;
   447     Uint8 *srcbuf;
   448     int x, y;
   449     int w = src->w;
   450     unsigned alpha;
   451 
   452     /* Lock the destination if necessary */
   453     if (SDL_MUSTLOCK(dst)) {
   454         if (SDL_LockSurface(dst) < 0) {
   455             return (-1);
   456         }
   457     }
   458 
   459     /* Set up the source and destination pointers */
   460     x = dstrect->x;
   461     y = dstrect->y;
   462     dstbuf = (Uint8 *) dst->pixels
   463         + y * dst->pitch + x * src->format->BytesPerPixel;
   464     srcbuf = (Uint8 *) src->map->data;
   465 
   466     {
   467         /* skip lines at the top if neccessary */
   468         int vskip = srcrect->y;
   469         int ofs = 0;
   470         if (vskip) {
   471 
   472 #define RLESKIP(bpp, Type)			\
   473 		for(;;) {			\
   474 		    int run;			\
   475 		    ofs += *(Type *)srcbuf;	\
   476 		    run = ((Type *)srcbuf)[1];	\
   477 		    srcbuf += sizeof(Type) * 2;	\
   478 		    if(run) {			\
   479 			srcbuf += run * bpp;	\
   480 			ofs += run;		\
   481 		    } else if(!ofs)		\
   482 			goto done;		\
   483 		    if(ofs == w) {		\
   484 			ofs = 0;		\
   485 			if(!--vskip)		\
   486 			    break;		\
   487 		    }				\
   488 		}
   489 
   490             switch (src->format->BytesPerPixel) {
   491             case 1:
   492                 RLESKIP(1, Uint8);
   493                 break;
   494             case 2:
   495                 RLESKIP(2, Uint8);
   496                 break;
   497             case 3:
   498                 RLESKIP(3, Uint8);
   499                 break;
   500             case 4:
   501                 RLESKIP(4, Uint16);
   502                 break;
   503             }
   504 
   505 #undef RLESKIP
   506 
   507         }
   508     }
   509 
   510     alpha = src->map->info.a;
   511     /* if left or right edge clipping needed, call clip blit */
   512     if (srcrect->x || srcrect->w != src->w) {
   513         RLEClipBlit(w, srcbuf, dst, dstbuf, srcrect, alpha);
   514     } else {
   515         SDL_PixelFormat *fmt = src->format;
   516 
   517 #define RLEBLIT(bpp, Type, do_blit)					      \
   518 	    do {							      \
   519 		int linecount = srcrect->h;				      \
   520 		int ofs = 0;						      \
   521 		for(;;) {						      \
   522 		    unsigned run;					      \
   523 		    ofs += *(Type *)srcbuf;				      \
   524 		    run = ((Type *)srcbuf)[1];				      \
   525 		    srcbuf += 2 * sizeof(Type);				      \
   526 		    if(run) {						      \
   527 			do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
   528 			srcbuf += run * bpp;				      \
   529 			ofs += run;					      \
   530 		    } else if(!ofs)					      \
   531 			break;						      \
   532 		    if(ofs == w) {					      \
   533 			ofs = 0;					      \
   534 			dstbuf += dst->pitch;				      \
   535 			if(!--linecount)				      \
   536 			    break;					      \
   537 		    }							      \
   538 		}							      \
   539 	    } while(0)
   540 
   541         CHOOSE_BLIT(RLEBLIT, alpha, fmt);
   542 
   543 #undef RLEBLIT
   544     }
   545 
   546   done:
   547     /* Unlock the destination if necessary */
   548     if (SDL_MUSTLOCK(dst)) {
   549         SDL_UnlockSurface(dst);
   550     }
   551     return (0);
   552 }
   553 
   554 #undef OPAQUE_BLIT
   555 
   556 /*
   557  * Per-pixel blitting macros for translucent pixels:
   558  * These use the same techniques as the per-surface blitting macros
   559  */
   560 
   561 /*
   562  * For 32bpp pixels, we have made sure the alpha is stored in the top
   563  * 8 bits, so proceed as usual
   564  */
   565 #define BLIT_TRANSL_888(src, dst)				\
   566     do {							\
   567         Uint32 s = src;						\
   568 	Uint32 d = dst;						\
   569 	unsigned alpha = s >> 24;				\
   570 	Uint32 s1 = s & 0xff00ff;				\
   571 	Uint32 d1 = d & 0xff00ff;				\
   572 	d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;	\
   573 	s &= 0xff00;						\
   574 	d &= 0xff00;						\
   575 	d = (d + ((s - d) * alpha >> 8)) & 0xff00;		\
   576 	dst = d1 | d | 0xff000000;				\
   577     } while(0)
   578 
   579 /*
   580  * For 16bpp pixels, we have stored the 5 most significant alpha bits in
   581  * bits 5-10. As before, we can process all 3 RGB components at the same time.
   582  */
   583 #define BLIT_TRANSL_565(src, dst)		\
   584     do {					\
   585 	Uint32 s = src;				\
   586 	Uint32 d = dst;				\
   587 	unsigned alpha = (s & 0x3e0) >> 5;	\
   588 	s &= 0x07e0f81f;			\
   589 	d = (d | d << 16) & 0x07e0f81f;		\
   590 	d += (s - d) * alpha >> 5;		\
   591 	d &= 0x07e0f81f;			\
   592 	dst = (Uint16)(d | d >> 16);			\
   593     } while(0)
   594 
   595 #define BLIT_TRANSL_555(src, dst)		\
   596     do {					\
   597 	Uint32 s = src;				\
   598 	Uint32 d = dst;				\
   599 	unsigned alpha = (s & 0x3e0) >> 5;	\
   600 	s &= 0x03e07c1f;			\
   601 	d = (d | d << 16) & 0x03e07c1f;		\
   602 	d += (s - d) * alpha >> 5;		\
   603 	d &= 0x03e07c1f;			\
   604 	dst = (Uint16)(d | d >> 16);			\
   605     } while(0)
   606 
   607 /* used to save the destination format in the encoding. Designed to be
   608    macro-compatible with SDL_PixelFormat but without the unneeded fields */
   609 typedef struct
   610 {
   611     Uint8 BytesPerPixel;
   612     Uint8 padding[3];
   613     Uint32 Rmask;
   614     Uint32 Gmask;
   615     Uint32 Bmask;
   616     Uint32 Amask;
   617     Uint8 Rloss;
   618     Uint8 Gloss;
   619     Uint8 Bloss;
   620     Uint8 Aloss;
   621     Uint8 Rshift;
   622     Uint8 Gshift;
   623     Uint8 Bshift;
   624     Uint8 Ashift;
   625 } RLEDestFormat;
   626 
   627 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
   628 static void
   629 RLEAlphaClipBlit(int w, Uint8 * srcbuf, SDL_Surface * dst,
   630                  Uint8 * dstbuf, SDL_Rect * srcrect)
   631 {
   632     SDL_PixelFormat *df = dst->format;
   633     /*
   634      * clipped blitter: Ptype is the destination pixel type,
   635      * Ctype the translucent count type, and do_blend the macro
   636      * to blend one pixel.
   637      */
   638 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)			  \
   639     do {								  \
   640 	int linecount = srcrect->h;					  \
   641 	int left = srcrect->x;						  \
   642 	int right = left + srcrect->w;					  \
   643 	dstbuf -= left * sizeof(Ptype);					  \
   644 	do {								  \
   645 	    int ofs = 0;						  \
   646 	    /* blit opaque pixels on one line */			  \
   647 	    do {							  \
   648 		unsigned run;						  \
   649 		ofs += ((Ctype *)srcbuf)[0];				  \
   650 		run = ((Ctype *)srcbuf)[1];				  \
   651 		srcbuf += 2 * sizeof(Ctype);				  \
   652 		if(run) {						  \
   653 		    /* clip to left and right borders */		  \
   654 		    int cofs = ofs;					  \
   655 		    int crun = run;					  \
   656 		    if(left - cofs > 0) {				  \
   657 			crun -= left - cofs;				  \
   658 			cofs = left;					  \
   659 		    }							  \
   660 		    if(crun > right - cofs)				  \
   661 			crun = right - cofs;				  \
   662 		    if(crun > 0)					  \
   663 			PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),	  \
   664 				   srcbuf + (cofs - ofs) * sizeof(Ptype), \
   665 				   (unsigned)crun, sizeof(Ptype));	  \
   666 		    srcbuf += run * sizeof(Ptype);			  \
   667 		    ofs += run;						  \
   668 		} else if(!ofs)						  \
   669 		    return;						  \
   670 	    } while(ofs < w);						  \
   671 	    /* skip padding if necessary */				  \
   672 	    if(sizeof(Ptype) == 2)					  \
   673 		srcbuf += (uintptr_t)srcbuf & 2;			  \
   674 	    /* blit translucent pixels on the same line */		  \
   675 	    ofs = 0;							  \
   676 	    do {							  \
   677 		unsigned run;						  \
   678 		ofs += ((Uint16 *)srcbuf)[0];				  \
   679 		run = ((Uint16 *)srcbuf)[1];				  \
   680 		srcbuf += 4;						  \
   681 		if(run) {						  \
   682 		    /* clip to left and right borders */		  \
   683 		    int cofs = ofs;					  \
   684 		    int crun = run;					  \
   685 		    if(left - cofs > 0) {				  \
   686 			crun -= left - cofs;				  \
   687 			cofs = left;					  \
   688 		    }							  \
   689 		    if(crun > right - cofs)				  \
   690 			crun = right - cofs;				  \
   691 		    if(crun > 0) {					  \
   692 			Ptype *dst = (Ptype *)dstbuf + cofs;		  \
   693 			Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);	  \
   694 			int i;						  \
   695 			for(i = 0; i < crun; i++)			  \
   696 			    do_blend(src[i], dst[i]);			  \
   697 		    }							  \
   698 		    srcbuf += run * 4;					  \
   699 		    ofs += run;						  \
   700 		}							  \
   701 	    } while(ofs < w);						  \
   702 	    dstbuf += dst->pitch;					  \
   703 	} while(--linecount);						  \
   704     } while(0)
   705 
   706     switch (df->BytesPerPixel) {
   707     case 2:
   708         if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0)
   709             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
   710         else
   711             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
   712         break;
   713     case 4:
   714         RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
   715         break;
   716     }
   717 }
   718 
   719 /* blit a pixel-alpha RLE surface */
   720 int
   721 SDL_RLEAlphaBlit(SDL_Surface * src, SDL_Rect * srcrect,
   722                  SDL_Surface * dst, SDL_Rect * dstrect)
   723 {
   724     int x, y;
   725     int w = src->w;
   726     Uint8 *srcbuf, *dstbuf;
   727     SDL_PixelFormat *df = dst->format;
   728 
   729     /* Lock the destination if necessary */
   730     if (SDL_MUSTLOCK(dst)) {
   731         if (SDL_LockSurface(dst) < 0) {
   732             return -1;
   733         }
   734     }
   735 
   736     x = dstrect->x;
   737     y = dstrect->y;
   738     dstbuf = (Uint8 *) dst->pixels + y * dst->pitch + x * df->BytesPerPixel;
   739     srcbuf = (Uint8 *) src->map->data + sizeof(RLEDestFormat);
   740 
   741     {
   742         /* skip lines at the top if necessary */
   743         int vskip = srcrect->y;
   744         if (vskip) {
   745             int ofs;
   746             if (df->BytesPerPixel == 2) {
   747                 /* the 16/32 interleaved format */
   748                 do {
   749                     /* skip opaque line */
   750                     ofs = 0;
   751                     do {
   752                         int run;
   753                         ofs += srcbuf[0];
   754                         run = srcbuf[1];
   755                         srcbuf += 2;
   756                         if (run) {
   757                             srcbuf += 2 * run;
   758                             ofs += run;
   759                         } else if (!ofs)
   760                             goto done;
   761                     } while (ofs < w);
   762 
   763                     /* skip padding */
   764                     srcbuf += (uintptr_t) srcbuf & 2;
   765 
   766                     /* skip translucent line */
   767                     ofs = 0;
   768                     do {
   769                         int run;
   770                         ofs += ((Uint16 *) srcbuf)[0];
   771                         run = ((Uint16 *) srcbuf)[1];
   772                         srcbuf += 4 * (run + 1);
   773                         ofs += run;
   774                     } while (ofs < w);
   775                 } while (--vskip);
   776             } else {
   777                 /* the 32/32 interleaved format */
   778                 vskip <<= 1;    /* opaque and translucent have same format */
   779                 do {
   780                     ofs = 0;
   781                     do {
   782                         int run;
   783                         ofs += ((Uint16 *) srcbuf)[0];
   784                         run = ((Uint16 *) srcbuf)[1];
   785                         srcbuf += 4;
   786                         if (run) {
   787                             srcbuf += 4 * run;
   788                             ofs += run;
   789                         } else if (!ofs)
   790                             goto done;
   791                     } while (ofs < w);
   792                 } while (--vskip);
   793             }
   794         }
   795     }
   796 
   797     /* if left or right edge clipping needed, call clip blit */
   798     if (srcrect->x || srcrect->w != src->w) {
   799         RLEAlphaClipBlit(w, srcbuf, dst, dstbuf, srcrect);
   800     } else {
   801 
   802         /*
   803          * non-clipped blitter. Ptype is the destination pixel type,
   804          * Ctype the translucent count type, and do_blend the
   805          * macro to blend one pixel.
   806          */
   807 #define RLEALPHABLIT(Ptype, Ctype, do_blend)				 \
   808 	do {								 \
   809 	    int linecount = srcrect->h;					 \
   810 	    do {							 \
   811 		int ofs = 0;						 \
   812 		/* blit opaque pixels on one line */			 \
   813 		do {							 \
   814 		    unsigned run;					 \
   815 		    ofs += ((Ctype *)srcbuf)[0];			 \
   816 		    run = ((Ctype *)srcbuf)[1];				 \
   817 		    srcbuf += 2 * sizeof(Ctype);			 \
   818 		    if(run) {						 \
   819 			PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
   820 				   run, sizeof(Ptype));			 \
   821 			srcbuf += run * sizeof(Ptype);			 \
   822 			ofs += run;					 \
   823 		    } else if(!ofs)					 \
   824 			goto done;					 \
   825 		} while(ofs < w);					 \
   826 		/* skip padding if necessary */				 \
   827 		if(sizeof(Ptype) == 2)					 \
   828 		    srcbuf += (uintptr_t)srcbuf & 2;		 	 \
   829 		/* blit translucent pixels on the same line */		 \
   830 		ofs = 0;						 \
   831 		do {							 \
   832 		    unsigned run;					 \
   833 		    ofs += ((Uint16 *)srcbuf)[0];			 \
   834 		    run = ((Uint16 *)srcbuf)[1];			 \
   835 		    srcbuf += 4;					 \
   836 		    if(run) {						 \
   837 			Ptype *dst = (Ptype *)dstbuf + ofs;		 \
   838 			unsigned i;					 \
   839 			for(i = 0; i < run; i++) {			 \
   840 			    Uint32 src = *(Uint32 *)srcbuf;		 \
   841 			    do_blend(src, *dst);			 \
   842 			    srcbuf += 4;				 \
   843 			    dst++;					 \
   844 			}						 \
   845 			ofs += run;					 \
   846 		    }							 \
   847 		} while(ofs < w);					 \
   848 		dstbuf += dst->pitch;					 \
   849 	    } while(--linecount);					 \
   850 	} while(0)
   851 
   852         switch (df->BytesPerPixel) {
   853         case 2:
   854             if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0
   855                 || df->Bmask == 0x07e0)
   856                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
   857             else
   858                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
   859             break;
   860         case 4:
   861             RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
   862             break;
   863         }
   864     }
   865 
   866   done:
   867     /* Unlock the destination if necessary */
   868     if (SDL_MUSTLOCK(dst)) {
   869         SDL_UnlockSurface(dst);
   870     }
   871     return 0;
   872 }
   873 
   874 /*
   875  * Auxiliary functions:
   876  * The encoding functions take 32bpp rgb + a, and
   877  * return the number of bytes copied to the destination.
   878  * The decoding functions copy to 32bpp rgb + a, and
   879  * return the number of bytes copied from the source.
   880  * These are only used in the encoder and un-RLE code and are therefore not
   881  * highly optimised.
   882  */
   883 
   884 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
   885 static int
   886 copy_opaque_16(void *dst, Uint32 * src, int n,
   887                SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   888 {
   889     int i;
   890     Uint16 *d = dst;
   891     for (i = 0; i < n; i++) {
   892         unsigned r, g, b;
   893         RGB_FROM_PIXEL(*src, sfmt, r, g, b);
   894         PIXEL_FROM_RGB(*d, dfmt, r, g, b);
   895         src++;
   896         d++;
   897     }
   898     return n * 2;
   899 }
   900 
   901 /* decode opaque pixels from 16bpp to 32bpp rgb + a */
   902 static int
   903 uncopy_opaque_16(Uint32 * dst, void *src, int n,
   904                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   905 {
   906     int i;
   907     Uint16 *s = src;
   908     unsigned alpha = dfmt->Amask ? 255 : 0;
   909     for (i = 0; i < n; i++) {
   910         unsigned r, g, b;
   911         RGB_FROM_PIXEL(*s, sfmt, r, g, b);
   912         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
   913         s++;
   914         dst++;
   915     }
   916     return n * 2;
   917 }
   918 
   919 
   920 
   921 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
   922 static int
   923 copy_transl_565(void *dst, Uint32 * src, int n,
   924                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   925 {
   926     int i;
   927     Uint32 *d = dst;
   928     for (i = 0; i < n; i++) {
   929         unsigned r, g, b, a;
   930         Uint16 pix;
   931         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   932         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   933         *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
   934         src++;
   935         d++;
   936     }
   937     return n * 4;
   938 }
   939 
   940 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
   941 static int
   942 copy_transl_555(void *dst, Uint32 * src, int n,
   943                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   944 {
   945     int i;
   946     Uint32 *d = dst;
   947     for (i = 0; i < n; i++) {
   948         unsigned r, g, b, a;
   949         Uint16 pix;
   950         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   951         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   952         *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
   953         src++;
   954         d++;
   955     }
   956     return n * 4;
   957 }
   958 
   959 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
   960 static int
   961 uncopy_transl_16(Uint32 * dst, void *src, int n,
   962                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   963 {
   964     int i;
   965     Uint32 *s = src;
   966     for (i = 0; i < n; i++) {
   967         unsigned r, g, b, a;
   968         Uint32 pix = *s++;
   969         a = (pix & 0x3e0) >> 2;
   970         pix = (pix & ~0x3e0) | pix >> 16;
   971         RGB_FROM_PIXEL(pix, sfmt, r, g, b);
   972         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
   973         dst++;
   974     }
   975     return n * 4;
   976 }
   977 
   978 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   979 static int
   980 copy_32(void *dst, Uint32 * src, int n,
   981         SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   982 {
   983     int i;
   984     Uint32 *d = dst;
   985     for (i = 0; i < n; i++) {
   986         unsigned r, g, b, a;
   987         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   988         PIXEL_FROM_RGBA(*d, dfmt, r, g, b, a);
   989         d++;
   990         src++;
   991     }
   992     return n * 4;
   993 }
   994 
   995 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   996 static int
   997 uncopy_32(Uint32 * dst, void *src, int n,
   998           RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   999 {
  1000     int i;
  1001     Uint32 *s = src;
  1002     for (i = 0; i < n; i++) {
  1003         unsigned r, g, b, a;
  1004         Uint32 pixel = *s++;
  1005         RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
  1006         a = pixel >> 24;
  1007         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
  1008         dst++;
  1009     }
  1010     return n * 4;
  1011 }
  1012 
  1013 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
  1014 
  1015 #define ISTRANSL(pixel, fmt)	\
  1016     ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
  1017 
  1018 /* convert surface to be quickly alpha-blittable onto dest, if possible */
  1019 static int
  1020 RLEAlphaSurface(SDL_Surface * surface)
  1021 {
  1022     SDL_Surface *dest;
  1023     SDL_PixelFormat *df;
  1024     int maxsize = 0;
  1025     int max_opaque_run;
  1026     int max_transl_run = 65535;
  1027     unsigned masksum;
  1028     Uint8 *rlebuf, *dst;
  1029     int (*copy_opaque) (void *, Uint32 *, int,
  1030                         SDL_PixelFormat *, SDL_PixelFormat *);
  1031     int (*copy_transl) (void *, Uint32 *, int,
  1032                         SDL_PixelFormat *, SDL_PixelFormat *);
  1033 
  1034     dest = surface->map->dst;
  1035     if (!dest)
  1036         return -1;
  1037     df = dest->format;
  1038     if (surface->format->BitsPerPixel != 32)
  1039         return -1;              /* only 32bpp source supported */
  1040 
  1041     /* find out whether the destination is one we support,
  1042        and determine the max size of the encoded result */
  1043     masksum = df->Rmask | df->Gmask | df->Bmask;
  1044     switch (df->BytesPerPixel) {
  1045     case 2:
  1046         /* 16bpp: only support 565 and 555 formats */
  1047         switch (masksum) {
  1048         case 0xffff:
  1049             if (df->Gmask == 0x07e0
  1050                 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
  1051                 copy_opaque = copy_opaque_16;
  1052                 copy_transl = copy_transl_565;
  1053             } else
  1054                 return -1;
  1055             break;
  1056         case 0x7fff:
  1057             if (df->Gmask == 0x03e0
  1058                 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
  1059                 copy_opaque = copy_opaque_16;
  1060                 copy_transl = copy_transl_555;
  1061             } else
  1062                 return -1;
  1063             break;
  1064         default:
  1065             return -1;
  1066         }
  1067         max_opaque_run = 255;   /* runs stored as bytes */
  1068 
  1069         /* worst case is alternating opaque and translucent pixels,
  1070            with room for alignment padding between lines */
  1071         maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
  1072         break;
  1073     case 4:
  1074         if (masksum != 0x00ffffff)
  1075             return -1;          /* requires unused high byte */
  1076         copy_opaque = copy_32;
  1077         copy_transl = copy_32;
  1078         max_opaque_run = 255;   /* runs stored as short ints */
  1079 
  1080         /* worst case is alternating opaque and translucent pixels */
  1081         maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
  1082         break;
  1083     default:
  1084         return -1;              /* anything else unsupported right now */
  1085     }
  1086 
  1087     maxsize += sizeof(RLEDestFormat);
  1088     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1089     if (!rlebuf) {
  1090         SDL_OutOfMemory();
  1091         return -1;
  1092     }
  1093     {
  1094         /* save the destination format so we can undo the encoding later */
  1095         RLEDestFormat *r = (RLEDestFormat *) rlebuf;
  1096         r->BytesPerPixel = df->BytesPerPixel;
  1097         r->Rmask = df->Rmask;
  1098         r->Gmask = df->Gmask;
  1099         r->Bmask = df->Bmask;
  1100         r->Amask = df->Amask;
  1101         r->Rloss = df->Rloss;
  1102         r->Gloss = df->Gloss;
  1103         r->Bloss = df->Bloss;
  1104         r->Aloss = df->Aloss;
  1105         r->Rshift = df->Rshift;
  1106         r->Gshift = df->Gshift;
  1107         r->Bshift = df->Bshift;
  1108         r->Ashift = df->Ashift;
  1109     }
  1110     dst = rlebuf + sizeof(RLEDestFormat);
  1111 
  1112     /* Do the actual encoding */
  1113     {
  1114         int x, y;
  1115         int h = surface->h, w = surface->w;
  1116         SDL_PixelFormat *sf = surface->format;
  1117         Uint32 *src = (Uint32 *) surface->pixels;
  1118         Uint8 *lastline = dst;  /* end of last non-blank line */
  1119 
  1120         /* opaque counts are 8 or 16 bits, depending on target depth */
  1121 #define ADD_OPAQUE_COUNTS(n, m)			\
  1122 	if(df->BytesPerPixel == 4) {		\
  1123 	    ((Uint16 *)dst)[0] = n;		\
  1124 	    ((Uint16 *)dst)[1] = m;		\
  1125 	    dst += 4;				\
  1126 	} else {				\
  1127 	    dst[0] = n;				\
  1128 	    dst[1] = m;				\
  1129 	    dst += 2;				\
  1130 	}
  1131 
  1132         /* translucent counts are always 16 bit */
  1133 #define ADD_TRANSL_COUNTS(n, m)		\
  1134 	(((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
  1135 
  1136         for (y = 0; y < h; y++) {
  1137             int runstart, skipstart;
  1138             int blankline = 0;
  1139             /* First encode all opaque pixels of a scan line */
  1140             x = 0;
  1141             do {
  1142                 int run, skip, len;
  1143                 skipstart = x;
  1144                 while (x < w && !ISOPAQUE(src[x], sf))
  1145                     x++;
  1146                 runstart = x;
  1147                 while (x < w && ISOPAQUE(src[x], sf))
  1148                     x++;
  1149                 skip = runstart - skipstart;
  1150                 if (skip == w)
  1151                     blankline = 1;
  1152                 run = x - runstart;
  1153                 while (skip > max_opaque_run) {
  1154                     ADD_OPAQUE_COUNTS(max_opaque_run, 0);
  1155                     skip -= max_opaque_run;
  1156                 }
  1157                 len = MIN(run, max_opaque_run);
  1158                 ADD_OPAQUE_COUNTS(skip, len);
  1159                 dst += copy_opaque(dst, src + runstart, len, sf, df);
  1160                 runstart += len;
  1161                 run -= len;
  1162                 while (run) {
  1163                     len = MIN(run, max_opaque_run);
  1164                     ADD_OPAQUE_COUNTS(0, len);
  1165                     dst += copy_opaque(dst, src + runstart, len, sf, df);
  1166                     runstart += len;
  1167                     run -= len;
  1168                 }
  1169             } while (x < w);
  1170 
  1171             /* Make sure the next output address is 32-bit aligned */
  1172             dst += (uintptr_t) dst & 2;
  1173 
  1174             /* Next, encode all translucent pixels of the same scan line */
  1175             x = 0;
  1176             do {
  1177                 int run, skip, len;
  1178                 skipstart = x;
  1179                 while (x < w && !ISTRANSL(src[x], sf))
  1180                     x++;
  1181                 runstart = x;
  1182                 while (x < w && ISTRANSL(src[x], sf))
  1183                     x++;
  1184                 skip = runstart - skipstart;
  1185                 blankline &= (skip == w);
  1186                 run = x - runstart;
  1187                 while (skip > max_transl_run) {
  1188                     ADD_TRANSL_COUNTS(max_transl_run, 0);
  1189                     skip -= max_transl_run;
  1190                 }
  1191                 len = MIN(run, max_transl_run);
  1192                 ADD_TRANSL_COUNTS(skip, len);
  1193                 dst += copy_transl(dst, src + runstart, len, sf, df);
  1194                 runstart += len;
  1195                 run -= len;
  1196                 while (run) {
  1197                     len = MIN(run, max_transl_run);
  1198                     ADD_TRANSL_COUNTS(0, len);
  1199                     dst += copy_transl(dst, src + runstart, len, sf, df);
  1200                     runstart += len;
  1201                     run -= len;
  1202                 }
  1203                 if (!blankline)
  1204                     lastline = dst;
  1205             } while (x < w);
  1206 
  1207             src += surface->pitch >> 2;
  1208         }
  1209         dst = lastline;         /* back up past trailing blank lines */
  1210         ADD_OPAQUE_COUNTS(0, 0);
  1211     }
  1212 
  1213 #undef ADD_OPAQUE_COUNTS
  1214 #undef ADD_TRANSL_COUNTS
  1215 
  1216     /* Now that we have it encoded, release the original pixels */
  1217     if (!(surface->flags & SDL_PREALLOC)) {
  1218         SDL_free(surface->pixels);
  1219         surface->pixels = NULL;
  1220     }
  1221 
  1222     /* realloc the buffer to release unused memory */
  1223     {
  1224         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1225         if (!p)
  1226             p = rlebuf;
  1227         surface->map->data = p;
  1228     }
  1229 
  1230     return 0;
  1231 }
  1232 
  1233 static Uint32
  1234 getpix_8(Uint8 * srcbuf)
  1235 {
  1236     return *srcbuf;
  1237 }
  1238 
  1239 static Uint32
  1240 getpix_16(Uint8 * srcbuf)
  1241 {
  1242     return *(Uint16 *) srcbuf;
  1243 }
  1244 
  1245 static Uint32
  1246 getpix_24(Uint8 * srcbuf)
  1247 {
  1248 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  1249     return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
  1250 #else
  1251     return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
  1252 #endif
  1253 }
  1254 
  1255 static Uint32
  1256 getpix_32(Uint8 * srcbuf)
  1257 {
  1258     return *(Uint32 *) srcbuf;
  1259 }
  1260 
  1261 typedef Uint32(*getpix_func) (Uint8 *);
  1262 
  1263 static const getpix_func getpixes[4] = {
  1264     getpix_8, getpix_16, getpix_24, getpix_32
  1265 };
  1266 
  1267 static int
  1268 RLEColorkeySurface(SDL_Surface * surface)
  1269 {
  1270     Uint8 *rlebuf, *dst;
  1271     int maxn;
  1272     int y;
  1273     Uint8 *srcbuf, *curbuf, *lastline;
  1274     int maxsize = 0;
  1275     int skip, run;
  1276     int bpp = surface->format->BytesPerPixel;
  1277     getpix_func getpix;
  1278     Uint32 ckey, rgbmask;
  1279     int w, h;
  1280 
  1281     /* calculate the worst case size for the compressed surface */
  1282     switch (bpp) {
  1283     case 1:
  1284         /* worst case is alternating opaque and transparent pixels,
  1285            starting with an opaque pixel */
  1286         maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
  1287         break;
  1288     case 2:
  1289     case 3:
  1290         /* worst case is solid runs, at most 255 pixels wide */
  1291         maxsize = surface->h * (2 * (surface->w / 255 + 1)
  1292                                 + surface->w * bpp) + 2;
  1293         break;
  1294     case 4:
  1295         /* worst case is solid runs, at most 65535 pixels wide */
  1296         maxsize = surface->h * (4 * (surface->w / 65535 + 1)
  1297                                 + surface->w * 4) + 4;
  1298         break;
  1299     }
  1300 
  1301     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1302     if (rlebuf == NULL) {
  1303         SDL_OutOfMemory();
  1304         return (-1);
  1305     }
  1306 
  1307     /* Set up the conversion */
  1308     srcbuf = (Uint8 *) surface->pixels;
  1309     curbuf = srcbuf;
  1310     maxn = bpp == 4 ? 65535 : 255;
  1311     skip = run = 0;
  1312     dst = rlebuf;
  1313     rgbmask = ~surface->format->Amask;
  1314     ckey = surface->map->info.colorkey & rgbmask;
  1315     lastline = dst;
  1316     getpix = getpixes[bpp - 1];
  1317     w = surface->w;
  1318     h = surface->h;
  1319 
  1320 #define ADD_COUNTS(n, m)			\
  1321 	if(bpp == 4) {				\
  1322 	    ((Uint16 *)dst)[0] = n;		\
  1323 	    ((Uint16 *)dst)[1] = m;		\
  1324 	    dst += 4;				\
  1325 	} else {				\
  1326 	    dst[0] = n;				\
  1327 	    dst[1] = m;				\
  1328 	    dst += 2;				\
  1329 	}
  1330 
  1331     for (y = 0; y < h; y++) {
  1332         int x = 0;
  1333         int blankline = 0;
  1334         do {
  1335             int run, skip, len;
  1336             int runstart;
  1337             int skipstart = x;
  1338 
  1339             /* find run of transparent, then opaque pixels */
  1340             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
  1341                 x++;
  1342             runstart = x;
  1343             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
  1344                 x++;
  1345             skip = runstart - skipstart;
  1346             if (skip == w)
  1347                 blankline = 1;
  1348             run = x - runstart;
  1349 
  1350             /* encode segment */
  1351             while (skip > maxn) {
  1352                 ADD_COUNTS(maxn, 0);
  1353                 skip -= maxn;
  1354             }
  1355             len = MIN(run, maxn);
  1356             ADD_COUNTS(skip, len);
  1357             SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1358             dst += len * bpp;
  1359             run -= len;
  1360             runstart += len;
  1361             while (run) {
  1362                 len = MIN(run, maxn);
  1363                 ADD_COUNTS(0, len);
  1364                 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1365                 dst += len * bpp;
  1366                 runstart += len;
  1367                 run -= len;
  1368             }
  1369             if (!blankline)
  1370                 lastline = dst;
  1371         } while (x < w);
  1372 
  1373         srcbuf += surface->pitch;
  1374     }
  1375     dst = lastline;             /* back up bast trailing blank lines */
  1376     ADD_COUNTS(0, 0);
  1377 
  1378 #undef ADD_COUNTS
  1379 
  1380     /* Now that we have it encoded, release the original pixels */
  1381     if (!(surface->flags & SDL_PREALLOC)) {
  1382         SDL_free(surface->pixels);
  1383         surface->pixels = NULL;
  1384     }
  1385 
  1386     /* realloc the buffer to release unused memory */
  1387     {
  1388         /* If realloc returns NULL, the original block is left intact */
  1389         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1390         if (!p)
  1391             p = rlebuf;
  1392         surface->map->data = p;
  1393     }
  1394 
  1395     return (0);
  1396 }
  1397 
  1398 int
  1399 SDL_RLESurface(SDL_Surface * surface)
  1400 {
  1401     int flags;
  1402 
  1403     /* Clear any previous RLE conversion */
  1404     if ((surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL) {
  1405         SDL_UnRLESurface(surface, 1);
  1406     }
  1407 
  1408     /* We don't support RLE encoding of bitmaps */
  1409     if (surface->format->BitsPerPixel < 8) {
  1410         return -1;
  1411     }
  1412 
  1413     /* Make sure the pixels are available */
  1414     if (!surface->pixels) {
  1415         return -1;
  1416     }
  1417 
  1418     /* If we don't have colorkey or blending, nothing to do... */
  1419     flags = surface->map->info.flags;
  1420     if (!(flags & (SDL_COPY_COLORKEY | SDL_COPY_BLEND))) {
  1421         return -1;
  1422     }
  1423 
  1424     /* Pass on combinations not supported */
  1425     if ((flags & SDL_COPY_MODULATE_COLOR) ||
  1426         ((flags & SDL_COPY_MODULATE_ALPHA) && surface->format->Amask) ||
  1427         (flags & (SDL_COPY_ADD | SDL_COPY_MOD)) ||
  1428         (flags & SDL_COPY_NEAREST)) {
  1429         return -1;
  1430     }
  1431 
  1432     /* Encode and set up the blit */
  1433     if (!surface->format->Amask || !(flags & SDL_COPY_BLEND)) {
  1434         if (!surface->map->identity) {
  1435             return -1;
  1436         }
  1437         if (RLEColorkeySurface(surface) < 0) {
  1438             return -1;
  1439         }
  1440         surface->map->blit = SDL_RLEBlit;
  1441         surface->map->info.flags |= SDL_COPY_RLE_COLORKEY;
  1442     } else {
  1443         if (RLEAlphaSurface(surface) < 0) {
  1444             return -1;
  1445         }
  1446         surface->map->blit = SDL_RLEAlphaBlit;
  1447         surface->map->info.flags |= SDL_COPY_RLE_ALPHAKEY;
  1448     }
  1449 
  1450     /* The surface is now accelerated */
  1451     surface->flags |= SDL_RLEACCEL;
  1452 
  1453     return (0);
  1454 }
  1455 
  1456 /*
  1457  * Un-RLE a surface with pixel alpha
  1458  * This may not give back exactly the image before RLE-encoding; all
  1459  * completely transparent pixels will be lost, and colour and alpha depth
  1460  * may have been reduced (when encoding for 16bpp targets).
  1461  */
  1462 static SDL_bool
  1463 UnRLEAlpha(SDL_Surface * surface)
  1464 {
  1465     Uint8 *srcbuf;
  1466     Uint32 *dst;
  1467     SDL_PixelFormat *sf = surface->format;
  1468     RLEDestFormat *df = surface->map->data;
  1469     int (*uncopy_opaque) (Uint32 *, void *, int,
  1470                           RLEDestFormat *, SDL_PixelFormat *);
  1471     int (*uncopy_transl) (Uint32 *, void *, int,
  1472                           RLEDestFormat *, SDL_PixelFormat *);
  1473     int w = surface->w;
  1474     int bpp = df->BytesPerPixel;
  1475 
  1476     if (bpp == 2) {
  1477         uncopy_opaque = uncopy_opaque_16;
  1478         uncopy_transl = uncopy_transl_16;
  1479     } else {
  1480         uncopy_opaque = uncopy_transl = uncopy_32;
  1481     }
  1482 
  1483     surface->pixels = SDL_malloc(surface->h * surface->pitch);
  1484     if (!surface->pixels) {
  1485         return (SDL_FALSE);
  1486     }
  1487     /* fill background with transparent pixels */
  1488     SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
  1489 
  1490     dst = surface->pixels;
  1491     srcbuf = (Uint8 *) (df + 1);
  1492     for (;;) {
  1493         /* copy opaque pixels */
  1494         int ofs = 0;
  1495         do {
  1496             unsigned run;
  1497             if (bpp == 2) {
  1498                 ofs += srcbuf[0];
  1499                 run = srcbuf[1];
  1500                 srcbuf += 2;
  1501             } else {
  1502                 ofs += ((Uint16 *) srcbuf)[0];
  1503                 run = ((Uint16 *) srcbuf)[1];
  1504                 srcbuf += 4;
  1505             }
  1506             if (run) {
  1507                 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
  1508                 ofs += run;
  1509             } else if (!ofs)
  1510                 return (SDL_TRUE);
  1511         } while (ofs < w);
  1512 
  1513         /* skip padding if needed */
  1514         if (bpp == 2)
  1515             srcbuf += (uintptr_t) srcbuf & 2;
  1516 
  1517         /* copy translucent pixels */
  1518         ofs = 0;
  1519         do {
  1520             unsigned run;
  1521             ofs += ((Uint16 *) srcbuf)[0];
  1522             run = ((Uint16 *) srcbuf)[1];
  1523             srcbuf += 4;
  1524             if (run) {
  1525                 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
  1526                 ofs += run;
  1527             }
  1528         } while (ofs < w);
  1529         dst += surface->pitch >> 2;
  1530     }
  1531     /* Make the compiler happy */
  1532     return (SDL_TRUE);
  1533 }
  1534 
  1535 void
  1536 SDL_UnRLESurface(SDL_Surface * surface, int recode)
  1537 {
  1538     if (surface->flags & SDL_RLEACCEL) {
  1539         surface->flags &= ~SDL_RLEACCEL;
  1540 
  1541         if (recode && !(surface->flags & SDL_PREALLOC)) {
  1542             if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
  1543                 SDL_Rect full;
  1544 
  1545                 /* re-create the original surface */
  1546                 surface->pixels = SDL_malloc(surface->h * surface->pitch);
  1547                 if (!surface->pixels) {
  1548                     /* Oh crap... */
  1549                     surface->flags |= SDL_RLEACCEL;
  1550                     return;
  1551                 }
  1552 
  1553                 /* fill it with the background colour */
  1554                 SDL_FillRect(surface, NULL, surface->map->info.colorkey);
  1555 
  1556                 /* now render the encoded surface */
  1557                 full.x = full.y = 0;
  1558                 full.w = surface->w;
  1559                 full.h = surface->h;
  1560                 SDL_RLEBlit(surface, &full, surface, &full);
  1561             } else {
  1562                 if (!UnRLEAlpha(surface)) {
  1563                     /* Oh crap... */
  1564                     surface->flags |= SDL_RLEACCEL;
  1565                     return;
  1566                 }
  1567             }
  1568         }
  1569         surface->map->info.flags &=
  1570             ~(SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY);
  1571 
  1572         if (surface->map->data) {
  1573             SDL_free(surface->map->data);
  1574             surface->map->data = NULL;
  1575         }
  1576     }
  1577 }
  1578 
  1579 /* vi: set ts=4 sw=4 expandtab: */