src/video/SDL_RLEaccel.c
author Sam Lantinga <slouken@libsdl.org>
Fri, 08 Apr 2011 13:03:26 -0700
changeset 5535 96594ac5fd1a
parent 5439 3a778c6c0269
child 5631 3e1cbc6d7cad
permissions -rw-r--r--
SDL 1.3 is now under the zlib license.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2011 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 /*
    24  * RLE encoding for software colorkey and alpha-channel acceleration
    25  *
    26  * Original version by Sam Lantinga
    27  *
    28  * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and
    29  * decoder. Added per-surface alpha blitter. Added per-pixel alpha
    30  * format, encoder and blitter.
    31  *
    32  * Many thanks to Xark and johns for hints, benchmarks and useful comments
    33  * leading to this code.
    34  *
    35  * Welcome to Macro Mayhem.
    36  */
    37 
    38 /*
    39  * The encoding translates the image data to a stream of segments of the form
    40  *
    41  * <skip> <run> <data>
    42  *
    43  * where <skip> is the number of transparent pixels to skip,
    44  *       <run>  is the number of opaque pixels to blit,
    45  * and   <data> are the pixels themselves.
    46  *
    47  * This basic structure is used both for colorkeyed surfaces, used for simple
    48  * binary transparency and for per-surface alpha blending, and for surfaces
    49  * with per-pixel alpha. The details differ, however:
    50  *
    51  * Encoding of colorkeyed surfaces:
    52  *
    53  *   Encoded pixels always have the same format as the target surface.
    54  *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
    55  *   where they are 16 bit. This makes the pixel data aligned at all times.
    56  *   Segments never wrap around from one scan line to the next.
    57  *
    58  *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
    59  *   beginning of a line.
    60  *
    61  * Encoding of surfaces with per-pixel alpha:
    62  *
    63  *   The sequence begins with a struct RLEDestFormat describing the target
    64  *   pixel format, to provide reliable un-encoding.
    65  *
    66  *   Each scan line is encoded twice: First all completely opaque pixels,
    67  *   encoded in the target format as described above, and then all
    68  *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
    69  *   in the following 32-bit format:
    70  *
    71  *   For 32-bit targets, each pixel has the target RGB format but with
    72  *   the alpha value occupying the highest 8 bits. The <skip> and <run>
    73  *   counts are 16 bit.
    74  * 
    75  *   For 16-bit targets, each pixel has the target RGB format, but with
    76  *   the middle component (usually green) shifted 16 steps to the left,
    77  *   and the hole filled with the 5 most significant bits of the alpha value.
    78  *   i.e. if the target has the format         rrrrrggggggbbbbb,
    79  *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
    80  *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
    81  *   for the translucent lines. Two padding bytes may be inserted
    82  *   before each translucent line to keep them 32-bit aligned.
    83  *
    84  *   The end of the sequence is marked by a zero <skip>,<run> pair at the
    85  *   beginning of an opaque line.
    86  */
    87 
    88 #include "SDL_video.h"
    89 #include "SDL_sysvideo.h"
    90 #include "SDL_blit.h"
    91 #include "SDL_RLEaccel_c.h"
    92 
    93 #ifndef MAX
    94 #define MAX(a, b) ((a) > (b) ? (a) : (b))
    95 #endif
    96 #ifndef MIN
    97 #define MIN(a, b) ((a) < (b) ? (a) : (b))
    98 #endif
    99 
   100 #define PIXEL_COPY(to, from, len, bpp)			\
   101 do {							\
   102     if(bpp == 4) {					\
   103 	SDL_memcpy4(to, from, (size_t)(len));		\
   104     } else {						\
   105 	SDL_memcpy(to, from, (size_t)(len) * (bpp));	\
   106     }							\
   107 } while(0)
   108 
   109 /*
   110  * Various colorkey blit methods, for opaque and per-surface alpha
   111  */
   112 
   113 #define OPAQUE_BLIT(to, from, length, bpp, alpha)	\
   114     PIXEL_COPY(to, from, length, bpp)
   115 
   116 /*
   117  * For 32bpp pixels on the form 0x00rrggbb:
   118  * If we treat the middle component separately, we can process the two
   119  * remaining in parallel. This is safe to do because of the gap to the left
   120  * of each component, so the bits from the multiplication don't collide.
   121  * This can be used for any RGB permutation of course.
   122  */
   123 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha)		\
   124     do {							\
   125         int i;							\
   126 	Uint32 *src = (Uint32 *)(from);				\
   127 	Uint32 *dst = (Uint32 *)(to);				\
   128 	for(i = 0; i < (int)(length); i++) {			\
   129 	    Uint32 s = *src++;					\
   130 	    Uint32 d = *dst;					\
   131 	    Uint32 s1 = s & 0xff00ff;				\
   132 	    Uint32 d1 = d & 0xff00ff;				\
   133 	    d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;	\
   134 	    s &= 0xff00;					\
   135 	    d &= 0xff00;					\
   136 	    d = (d + ((s - d) * alpha >> 8)) & 0xff00;		\
   137 	    *dst++ = d1 | d;					\
   138 	}							\
   139     } while(0)
   140 
   141 /*
   142  * For 16bpp pixels we can go a step further: put the middle component
   143  * in the high 16 bits of a 32 bit word, and process all three RGB
   144  * components at the same time. Since the smallest gap is here just
   145  * 5 bits, we have to scale alpha down to 5 bits as well.
   146  */
   147 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha)	\
   148     do {						\
   149         int i;						\
   150 	Uint16 *src = (Uint16 *)(from);			\
   151 	Uint16 *dst = (Uint16 *)(to);			\
   152 	Uint32 ALPHA = alpha >> 3;			\
   153 	for(i = 0; i < (int)(length); i++) {		\
   154 	    Uint32 s = *src++;				\
   155 	    Uint32 d = *dst;				\
   156 	    s = (s | s << 16) & 0x07e0f81f;		\
   157 	    d = (d | d << 16) & 0x07e0f81f;		\
   158 	    d += (s - d) * ALPHA >> 5;			\
   159 	    d &= 0x07e0f81f;				\
   160 	    *dst++ = (Uint16)(d | d >> 16);			\
   161 	}						\
   162     } while(0)
   163 
   164 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha)	\
   165     do {						\
   166         int i;						\
   167 	Uint16 *src = (Uint16 *)(from);			\
   168 	Uint16 *dst = (Uint16 *)(to);			\
   169 	Uint32 ALPHA = alpha >> 3;			\
   170 	for(i = 0; i < (int)(length); i++) {		\
   171 	    Uint32 s = *src++;				\
   172 	    Uint32 d = *dst;				\
   173 	    s = (s | s << 16) & 0x03e07c1f;		\
   174 	    d = (d | d << 16) & 0x03e07c1f;		\
   175 	    d += (s - d) * ALPHA >> 5;			\
   176 	    d &= 0x03e07c1f;				\
   177 	    *dst++ = (Uint16)(d | d >> 16);			\
   178 	}						\
   179     } while(0)
   180 
   181 /*
   182  * The general slow catch-all function, for remaining depths and formats
   183  */
   184 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)			\
   185     do {								\
   186         int i;								\
   187 	Uint8 *src = from;						\
   188 	Uint8 *dst = to;						\
   189 	for(i = 0; i < (int)(length); i++) {				\
   190 	    Uint32 s, d;						\
   191 	    unsigned rs, gs, bs, rd, gd, bd;				\
   192 	    switch(bpp) {						\
   193 	    case 2:							\
   194 		s = *(Uint16 *)src;					\
   195 		d = *(Uint16 *)dst;					\
   196 		break;							\
   197 	    case 3:							\
   198 		if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {			\
   199 		    s = (src[0] << 16) | (src[1] << 8) | src[2];	\
   200 		    d = (dst[0] << 16) | (dst[1] << 8) | dst[2];	\
   201 		} else {						\
   202 		    s = (src[2] << 16) | (src[1] << 8) | src[0];	\
   203 		    d = (dst[2] << 16) | (dst[1] << 8) | dst[0];	\
   204 		}							\
   205 		break;							\
   206 	    case 4:							\
   207 		s = *(Uint32 *)src;					\
   208 		d = *(Uint32 *)dst;					\
   209 		break;							\
   210 	    }								\
   211 	    RGB_FROM_PIXEL(s, fmt, rs, gs, bs);				\
   212 	    RGB_FROM_PIXEL(d, fmt, rd, gd, bd);				\
   213 	    rd += (rs - rd) * alpha >> 8;				\
   214 	    gd += (gs - gd) * alpha >> 8;				\
   215 	    bd += (bs - bd) * alpha >> 8;				\
   216 	    PIXEL_FROM_RGB(d, fmt, rd, gd, bd);				\
   217 	    switch(bpp) {						\
   218 	    case 2:							\
   219 		*(Uint16 *)dst = (Uint16)d;					\
   220 		break;							\
   221 	    case 3:							\
   222 		if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {			\
   223 		    dst[0] = (Uint8)(d >> 16);					\
   224 		    dst[1] = (Uint8)(d >> 8);					\
   225 		    dst[2] = (Uint8)(d);						\
   226 		} else {						\
   227 		    dst[0] = (Uint8)d;						\
   228 		    dst[1] = (Uint8)(d >> 8);					\
   229 		    dst[2] = (Uint8)(d >> 16);					\
   230 		}							\
   231 		break;							\
   232 	    case 4:							\
   233 		*(Uint32 *)dst = d;					\
   234 		break;							\
   235 	    }								\
   236 	    src += bpp;							\
   237 	    dst += bpp;							\
   238 	}								\
   239     } while(0)
   240 
   241 /*
   242  * Special case: 50% alpha (alpha=128)
   243  * This is treated specially because it can be optimized very well, and
   244  * since it is good for many cases of semi-translucency.
   245  * The theory is to do all three components at the same time:
   246  * First zero the lowest bit of each component, which gives us room to
   247  * add them. Then shift right and add the sum of the lowest bits.
   248  */
   249 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)		\
   250     do {								\
   251         int i;								\
   252 	Uint32 *src = (Uint32 *)(from);					\
   253 	Uint32 *dst = (Uint32 *)(to);					\
   254 	for(i = 0; i < (int)(length); i++) {				\
   255 	    Uint32 s = *src++;						\
   256 	    Uint32 d = *dst;						\
   257 	    *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)	\
   258 		     + (s & d & 0x00010101);				\
   259 	}								\
   260     } while(0)
   261 
   262 /*
   263  * For 16bpp, we can actually blend two pixels in parallel, if we take
   264  * care to shift before we add, not after.
   265  */
   266 
   267 /* helper: blend a single 16 bit pixel at 50% */
   268 #define BLEND16_50(dst, src, mask)			\
   269     do {						\
   270 	Uint32 s = *src++;				\
   271 	Uint32 d = *dst;				\
   272 	*dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +	\
   273 	                  (s & d & (~mask & 0xffff)));		\
   274     } while(0)
   275 
   276 /* basic 16bpp blender. mask is the pixels to keep when adding. */
   277 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)		\
   278     do {								\
   279 	unsigned n = (length);						\
   280 	Uint16 *src = (Uint16 *)(from);					\
   281 	Uint16 *dst = (Uint16 *)(to);					\
   282 	if(((uintptr_t)src ^ (uintptr_t)dst) & 3) {			\
   283 	    /* source and destination not in phase, blit one by one */	\
   284 	    while(n--)							\
   285 		BLEND16_50(dst, src, mask);				\
   286 	} else {							\
   287 	    if((uintptr_t)src & 3) {					\
   288 		/* first odd pixel */					\
   289 		BLEND16_50(dst, src, mask);				\
   290 		n--;							\
   291 	    }								\
   292 	    for(; n > 1; n -= 2) {					\
   293 		Uint32 s = *(Uint32 *)src;				\
   294 		Uint32 d = *(Uint32 *)dst;				\
   295 		*(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1)	\
   296 		               + ((d & (mask | mask << 16)) >> 1)	\
   297 		               + (s & d & (~(mask | mask << 16)));	\
   298 		src += 2;						\
   299 		dst += 2;						\
   300 	    }								\
   301 	    if(n)							\
   302 		BLEND16_50(dst, src, mask); /* last odd pixel */	\
   303 	}								\
   304     } while(0)
   305 
   306 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)	\
   307     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de)
   308 
   309 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)	\
   310     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
   311 
   312 #define CHOOSE_BLIT(blitter, alpha, fmt)				\
   313     do {								\
   314         if(alpha == 255) {						\
   315 	    switch(fmt->BytesPerPixel) {				\
   316 	    case 1: blitter(1, Uint8, OPAQUE_BLIT); break;		\
   317 	    case 2: blitter(2, Uint8, OPAQUE_BLIT); break;		\
   318 	    case 3: blitter(3, Uint8, OPAQUE_BLIT); break;		\
   319 	    case 4: blitter(4, Uint16, OPAQUE_BLIT); break;		\
   320 	    }								\
   321 	} else {							\
   322 	    switch(fmt->BytesPerPixel) {				\
   323 	    case 1:							\
   324 		/* No 8bpp alpha blitting */				\
   325 		break;							\
   326 									\
   327 	    case 2:							\
   328 		switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) {		\
   329 		case 0xffff:						\
   330 		    if(fmt->Gmask == 0x07e0				\
   331 		       || fmt->Rmask == 0x07e0				\
   332 		       || fmt->Bmask == 0x07e0) {			\
   333 			if(alpha == 128)				\
   334 			    blitter(2, Uint8, ALPHA_BLIT16_565_50);	\
   335 			else {						\
   336 			    blitter(2, Uint8, ALPHA_BLIT16_565);	\
   337 			}						\
   338 		    } else						\
   339 			goto general16;					\
   340 		    break;						\
   341 									\
   342 		case 0x7fff:						\
   343 		    if(fmt->Gmask == 0x03e0				\
   344 		       || fmt->Rmask == 0x03e0				\
   345 		       || fmt->Bmask == 0x03e0) {			\
   346 			if(alpha == 128)				\
   347 			    blitter(2, Uint8, ALPHA_BLIT16_555_50);	\
   348 			else {						\
   349 			    blitter(2, Uint8, ALPHA_BLIT16_555);	\
   350 			}						\
   351 			break;						\
   352 		    }							\
   353 		    /* fallthrough */					\
   354 									\
   355 		default:						\
   356 		general16:						\
   357 		    blitter(2, Uint8, ALPHA_BLIT_ANY);			\
   358 		}							\
   359 		break;							\
   360 									\
   361 	    case 3:							\
   362 		blitter(3, Uint8, ALPHA_BLIT_ANY);			\
   363 		break;							\
   364 									\
   365 	    case 4:							\
   366 		if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff	\
   367 		   && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00	\
   368 		       || fmt->Bmask == 0xff00)) {			\
   369 		    if(alpha == 128)					\
   370 			blitter(4, Uint16, ALPHA_BLIT32_888_50);	\
   371 		    else						\
   372 			blitter(4, Uint16, ALPHA_BLIT32_888);		\
   373 		} else							\
   374 		    blitter(4, Uint16, ALPHA_BLIT_ANY);			\
   375 		break;							\
   376 	    }								\
   377 	}								\
   378     } while(0)
   379 
   380 /*
   381  * This takes care of the case when the surface is clipped on the left and/or
   382  * right. Top clipping has already been taken care of.
   383  */
   384 static void
   385 RLEClipBlit(int w, Uint8 * srcbuf, SDL_Surface * dst,
   386             Uint8 * dstbuf, SDL_Rect * srcrect, unsigned alpha)
   387 {
   388     SDL_PixelFormat *fmt = dst->format;
   389 
   390 #define RLECLIPBLIT(bpp, Type, do_blit)					   \
   391     do {								   \
   392 	int linecount = srcrect->h;					   \
   393 	int ofs = 0;							   \
   394 	int left = srcrect->x;						   \
   395 	int right = left + srcrect->w;					   \
   396 	dstbuf -= left * bpp;						   \
   397 	for(;;) {							   \
   398 	    int run;							   \
   399 	    ofs += *(Type *)srcbuf;					   \
   400 	    run = ((Type *)srcbuf)[1];					   \
   401 	    srcbuf += 2 * sizeof(Type);					   \
   402 	    if(run) {							   \
   403 		/* clip to left and right borders */			   \
   404 		if(ofs < right) {					   \
   405 		    int start = 0;					   \
   406 		    int len = run;					   \
   407 		    int startcol;					   \
   408 		    if(left - ofs > 0) {				   \
   409 			start = left - ofs;				   \
   410 			len -= start;					   \
   411 			if(len <= 0)					   \
   412 			    goto nocopy ## bpp ## do_blit;		   \
   413 		    }							   \
   414 		    startcol = ofs + start;				   \
   415 		    if(len > right - startcol)				   \
   416 			len = right - startcol;				   \
   417 		    do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
   418 			    len, bpp, alpha);				   \
   419 		}							   \
   420 	    nocopy ## bpp ## do_blit:					   \
   421 		srcbuf += run * bpp;					   \
   422 		ofs += run;						   \
   423 	    } else if(!ofs)						   \
   424 		break;							   \
   425 	    if(ofs == w) {						   \
   426 		ofs = 0;						   \
   427 		dstbuf += dst->pitch;					   \
   428 		if(!--linecount)					   \
   429 		    break;						   \
   430 	    }								   \
   431 	}								   \
   432     } while(0)
   433 
   434     CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
   435 
   436 #undef RLECLIPBLIT
   437 
   438 }
   439 
   440 
   441 /* blit a colorkeyed RLE surface */
   442 int
   443 SDL_RLEBlit(SDL_Surface * src, SDL_Rect * srcrect,
   444             SDL_Surface * dst, SDL_Rect * dstrect)
   445 {
   446     Uint8 *dstbuf;
   447     Uint8 *srcbuf;
   448     int x, y;
   449     int w = src->w;
   450     unsigned alpha;
   451 
   452     /* Lock the destination if necessary */
   453     if (SDL_MUSTLOCK(dst)) {
   454         if (SDL_LockSurface(dst) < 0) {
   455             return (-1);
   456         }
   457     }
   458 
   459     /* Set up the source and destination pointers */
   460     x = dstrect->x;
   461     y = dstrect->y;
   462     dstbuf = (Uint8 *) dst->pixels
   463         + y * dst->pitch + x * src->format->BytesPerPixel;
   464     srcbuf = (Uint8 *) src->map->data;
   465 
   466     {
   467         /* skip lines at the top if neccessary */
   468         int vskip = srcrect->y;
   469         int ofs = 0;
   470         if (vskip) {
   471 
   472 #define RLESKIP(bpp, Type)			\
   473 		for(;;) {			\
   474 		    int run;			\
   475 		    ofs += *(Type *)srcbuf;	\
   476 		    run = ((Type *)srcbuf)[1];	\
   477 		    srcbuf += sizeof(Type) * 2;	\
   478 		    if(run) {			\
   479 			srcbuf += run * bpp;	\
   480 			ofs += run;		\
   481 		    } else if(!ofs)		\
   482 			goto done;		\
   483 		    if(ofs == w) {		\
   484 			ofs = 0;		\
   485 			if(!--vskip)		\
   486 			    break;		\
   487 		    }				\
   488 		}
   489 
   490             switch (src->format->BytesPerPixel) {
   491             case 1:
   492                 RLESKIP(1, Uint8);
   493                 break;
   494             case 2:
   495                 RLESKIP(2, Uint8);
   496                 break;
   497             case 3:
   498                 RLESKIP(3, Uint8);
   499                 break;
   500             case 4:
   501                 RLESKIP(4, Uint16);
   502                 break;
   503             }
   504 
   505 #undef RLESKIP
   506 
   507         }
   508     }
   509 
   510     alpha = src->map->info.a;
   511     /* if left or right edge clipping needed, call clip blit */
   512     if (srcrect->x || srcrect->w != src->w) {
   513         RLEClipBlit(w, srcbuf, dst, dstbuf, srcrect, alpha);
   514     } else {
   515         SDL_PixelFormat *fmt = src->format;
   516 
   517 #define RLEBLIT(bpp, Type, do_blit)					      \
   518 	    do {							      \
   519 		int linecount = srcrect->h;				      \
   520 		int ofs = 0;						      \
   521 		for(;;) {						      \
   522 		    unsigned run;					      \
   523 		    ofs += *(Type *)srcbuf;				      \
   524 		    run = ((Type *)srcbuf)[1];				      \
   525 		    srcbuf += 2 * sizeof(Type);				      \
   526 		    if(run) {						      \
   527 			do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
   528 			srcbuf += run * bpp;				      \
   529 			ofs += run;					      \
   530 		    } else if(!ofs)					      \
   531 			break;						      \
   532 		    if(ofs == w) {					      \
   533 			ofs = 0;					      \
   534 			dstbuf += dst->pitch;				      \
   535 			if(!--linecount)				      \
   536 			    break;					      \
   537 		    }							      \
   538 		}							      \
   539 	    } while(0)
   540 
   541         CHOOSE_BLIT(RLEBLIT, alpha, fmt);
   542 
   543 #undef RLEBLIT
   544     }
   545 
   546   done:
   547     /* Unlock the destination if necessary */
   548     if (SDL_MUSTLOCK(dst)) {
   549         SDL_UnlockSurface(dst);
   550     }
   551     return (0);
   552 }
   553 
   554 #undef OPAQUE_BLIT
   555 
   556 /*
   557  * Per-pixel blitting macros for translucent pixels:
   558  * These use the same techniques as the per-surface blitting macros
   559  */
   560 
   561 /*
   562  * For 32bpp pixels, we have made sure the alpha is stored in the top
   563  * 8 bits, so proceed as usual
   564  */
   565 #define BLIT_TRANSL_888(src, dst)				\
   566     do {							\
   567         Uint32 s = src;						\
   568 	Uint32 d = dst;						\
   569 	unsigned alpha = s >> 24;				\
   570 	Uint32 s1 = s & 0xff00ff;				\
   571 	Uint32 d1 = d & 0xff00ff;				\
   572 	d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;	\
   573 	s &= 0xff00;						\
   574 	d &= 0xff00;						\
   575 	d = (d + ((s - d) * alpha >> 8)) & 0xff00;		\
   576 	dst = d1 | d | 0xff000000;				\
   577     } while(0)
   578 
   579 /*
   580  * For 16bpp pixels, we have stored the 5 most significant alpha bits in
   581  * bits 5-10. As before, we can process all 3 RGB components at the same time.
   582  */
   583 #define BLIT_TRANSL_565(src, dst)		\
   584     do {					\
   585 	Uint32 s = src;				\
   586 	Uint32 d = dst;				\
   587 	unsigned alpha = (s & 0x3e0) >> 5;	\
   588 	s &= 0x07e0f81f;			\
   589 	d = (d | d << 16) & 0x07e0f81f;		\
   590 	d += (s - d) * alpha >> 5;		\
   591 	d &= 0x07e0f81f;			\
   592 	dst = (Uint16)(d | d >> 16);			\
   593     } while(0)
   594 
   595 #define BLIT_TRANSL_555(src, dst)		\
   596     do {					\
   597 	Uint32 s = src;				\
   598 	Uint32 d = dst;				\
   599 	unsigned alpha = (s & 0x3e0) >> 5;	\
   600 	s &= 0x03e07c1f;			\
   601 	d = (d | d << 16) & 0x03e07c1f;		\
   602 	d += (s - d) * alpha >> 5;		\
   603 	d &= 0x03e07c1f;			\
   604 	dst = (Uint16)(d | d >> 16);			\
   605     } while(0)
   606 
   607 /* used to save the destination format in the encoding. Designed to be
   608    macro-compatible with SDL_PixelFormat but without the unneeded fields */
   609 typedef struct
   610 {
   611     Uint8 BytesPerPixel;
   612     Uint8 padding[3];
   613     Uint32 Rmask;
   614     Uint32 Gmask;
   615     Uint32 Bmask;
   616     Uint32 Amask;
   617     Uint8 Rloss;
   618     Uint8 Gloss;
   619     Uint8 Bloss;
   620     Uint8 Aloss;
   621     Uint8 Rshift;
   622     Uint8 Gshift;
   623     Uint8 Bshift;
   624     Uint8 Ashift;
   625 } RLEDestFormat;
   626 
   627 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
   628 static void
   629 RLEAlphaClipBlit(int w, Uint8 * srcbuf, SDL_Surface * dst,
   630                  Uint8 * dstbuf, SDL_Rect * srcrect)
   631 {
   632     SDL_PixelFormat *df = dst->format;
   633     /*
   634      * clipped blitter: Ptype is the destination pixel type,
   635      * Ctype the translucent count type, and do_blend the macro
   636      * to blend one pixel.
   637      */
   638 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)			  \
   639     do {								  \
   640 	int linecount = srcrect->h;					  \
   641 	int left = srcrect->x;						  \
   642 	int right = left + srcrect->w;					  \
   643 	dstbuf -= left * sizeof(Ptype);					  \
   644 	do {								  \
   645 	    int ofs = 0;						  \
   646 	    /* blit opaque pixels on one line */			  \
   647 	    do {							  \
   648 		unsigned run;						  \
   649 		ofs += ((Ctype *)srcbuf)[0];				  \
   650 		run = ((Ctype *)srcbuf)[1];				  \
   651 		srcbuf += 2 * sizeof(Ctype);				  \
   652 		if(run) {						  \
   653 		    /* clip to left and right borders */		  \
   654 		    int cofs = ofs;					  \
   655 		    int crun = run;					  \
   656 		    if(left - cofs > 0) {				  \
   657 			crun -= left - cofs;				  \
   658 			cofs = left;					  \
   659 		    }							  \
   660 		    if(crun > right - cofs)				  \
   661 			crun = right - cofs;				  \
   662 		    if(crun > 0)					  \
   663 			PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),	  \
   664 				   srcbuf + (cofs - ofs) * sizeof(Ptype), \
   665 				   (unsigned)crun, sizeof(Ptype));	  \
   666 		    srcbuf += run * sizeof(Ptype);			  \
   667 		    ofs += run;						  \
   668 		} else if(!ofs)						  \
   669 		    return;						  \
   670 	    } while(ofs < w);						  \
   671 	    /* skip padding if necessary */				  \
   672 	    if(sizeof(Ptype) == 2)					  \
   673 		srcbuf += (uintptr_t)srcbuf & 2;			  \
   674 	    /* blit translucent pixels on the same line */		  \
   675 	    ofs = 0;							  \
   676 	    do {							  \
   677 		unsigned run;						  \
   678 		ofs += ((Uint16 *)srcbuf)[0];				  \
   679 		run = ((Uint16 *)srcbuf)[1];				  \
   680 		srcbuf += 4;						  \
   681 		if(run) {						  \
   682 		    /* clip to left and right borders */		  \
   683 		    int cofs = ofs;					  \
   684 		    int crun = run;					  \
   685 		    if(left - cofs > 0) {				  \
   686 			crun -= left - cofs;				  \
   687 			cofs = left;					  \
   688 		    }							  \
   689 		    if(crun > right - cofs)				  \
   690 			crun = right - cofs;				  \
   691 		    if(crun > 0) {					  \
   692 			Ptype *dst = (Ptype *)dstbuf + cofs;		  \
   693 			Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);	  \
   694 			int i;						  \
   695 			for(i = 0; i < crun; i++)			  \
   696 			    do_blend(src[i], dst[i]);			  \
   697 		    }							  \
   698 		    srcbuf += run * 4;					  \
   699 		    ofs += run;						  \
   700 		}							  \
   701 	    } while(ofs < w);						  \
   702 	    dstbuf += dst->pitch;					  \
   703 	} while(--linecount);						  \
   704     } while(0)
   705 
   706     switch (df->BytesPerPixel) {
   707     case 2:
   708         if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0)
   709             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
   710         else
   711             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
   712         break;
   713     case 4:
   714         RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
   715         break;
   716     }
   717 }
   718 
   719 /* blit a pixel-alpha RLE surface */
   720 int
   721 SDL_RLEAlphaBlit(SDL_Surface * src, SDL_Rect * srcrect,
   722                  SDL_Surface * dst, SDL_Rect * dstrect)
   723 {
   724     int x, y;
   725     int w = src->w;
   726     Uint8 *srcbuf, *dstbuf;
   727     SDL_PixelFormat *df = dst->format;
   728 
   729     /* Lock the destination if necessary */
   730     if (SDL_MUSTLOCK(dst)) {
   731         if (SDL_LockSurface(dst) < 0) {
   732             return -1;
   733         }
   734     }
   735 
   736     x = dstrect->x;
   737     y = dstrect->y;
   738     dstbuf = (Uint8 *) dst->pixels + y * dst->pitch + x * df->BytesPerPixel;
   739     srcbuf = (Uint8 *) src->map->data + sizeof(RLEDestFormat);
   740 
   741     {
   742         /* skip lines at the top if necessary */
   743         int vskip = srcrect->y;
   744         if (vskip) {
   745             int ofs;
   746             if (df->BytesPerPixel == 2) {
   747                 /* the 16/32 interleaved format */
   748                 do {
   749                     /* skip opaque line */
   750                     ofs = 0;
   751                     do {
   752                         int run;
   753                         ofs += srcbuf[0];
   754                         run = srcbuf[1];
   755                         srcbuf += 2;
   756                         if (run) {
   757                             srcbuf += 2 * run;
   758                             ofs += run;
   759                         } else if (!ofs)
   760                             goto done;
   761                     } while (ofs < w);
   762 
   763                     /* skip padding */
   764                     srcbuf += (uintptr_t) srcbuf & 2;
   765 
   766                     /* skip translucent line */
   767                     ofs = 0;
   768                     do {
   769                         int run;
   770                         ofs += ((Uint16 *) srcbuf)[0];
   771                         run = ((Uint16 *) srcbuf)[1];
   772                         srcbuf += 4 * (run + 1);
   773                         ofs += run;
   774                     } while (ofs < w);
   775                 } while (--vskip);
   776             } else {
   777                 /* the 32/32 interleaved format */
   778                 vskip <<= 1;    /* opaque and translucent have same format */
   779                 do {
   780                     ofs = 0;
   781                     do {
   782                         int run;
   783                         ofs += ((Uint16 *) srcbuf)[0];
   784                         run = ((Uint16 *) srcbuf)[1];
   785                         srcbuf += 4;
   786                         if (run) {
   787                             srcbuf += 4 * run;
   788                             ofs += run;
   789                         } else if (!ofs)
   790                             goto done;
   791                     } while (ofs < w);
   792                 } while (--vskip);
   793             }
   794         }
   795     }
   796 
   797     /* if left or right edge clipping needed, call clip blit */
   798     if (srcrect->x || srcrect->w != src->w) {
   799         RLEAlphaClipBlit(w, srcbuf, dst, dstbuf, srcrect);
   800     } else {
   801 
   802         /*
   803          * non-clipped blitter. Ptype is the destination pixel type,
   804          * Ctype the translucent count type, and do_blend the
   805          * macro to blend one pixel.
   806          */
   807 #define RLEALPHABLIT(Ptype, Ctype, do_blend)				 \
   808 	do {								 \
   809 	    int linecount = srcrect->h;					 \
   810 	    do {							 \
   811 		int ofs = 0;						 \
   812 		/* blit opaque pixels on one line */			 \
   813 		do {							 \
   814 		    unsigned run;					 \
   815 		    ofs += ((Ctype *)srcbuf)[0];			 \
   816 		    run = ((Ctype *)srcbuf)[1];				 \
   817 		    srcbuf += 2 * sizeof(Ctype);			 \
   818 		    if(run) {						 \
   819 			PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
   820 				   run, sizeof(Ptype));			 \
   821 			srcbuf += run * sizeof(Ptype);			 \
   822 			ofs += run;					 \
   823 		    } else if(!ofs)					 \
   824 			goto done;					 \
   825 		} while(ofs < w);					 \
   826 		/* skip padding if necessary */				 \
   827 		if(sizeof(Ptype) == 2)					 \
   828 		    srcbuf += (uintptr_t)srcbuf & 2;		 	 \
   829 		/* blit translucent pixels on the same line */		 \
   830 		ofs = 0;						 \
   831 		do {							 \
   832 		    unsigned run;					 \
   833 		    ofs += ((Uint16 *)srcbuf)[0];			 \
   834 		    run = ((Uint16 *)srcbuf)[1];			 \
   835 		    srcbuf += 4;					 \
   836 		    if(run) {						 \
   837 			Ptype *dst = (Ptype *)dstbuf + ofs;		 \
   838 			unsigned i;					 \
   839 			for(i = 0; i < run; i++) {			 \
   840 			    Uint32 src = *(Uint32 *)srcbuf;		 \
   841 			    do_blend(src, *dst);			 \
   842 			    srcbuf += 4;				 \
   843 			    dst++;					 \
   844 			}						 \
   845 			ofs += run;					 \
   846 		    }							 \
   847 		} while(ofs < w);					 \
   848 		dstbuf += dst->pitch;					 \
   849 	    } while(--linecount);					 \
   850 	} while(0)
   851 
   852         switch (df->BytesPerPixel) {
   853         case 2:
   854             if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0
   855                 || df->Bmask == 0x07e0)
   856                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
   857             else
   858                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
   859             break;
   860         case 4:
   861             RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
   862             break;
   863         }
   864     }
   865 
   866   done:
   867     /* Unlock the destination if necessary */
   868     if (SDL_MUSTLOCK(dst)) {
   869         SDL_UnlockSurface(dst);
   870     }
   871     return 0;
   872 }
   873 
   874 /*
   875  * Auxiliary functions:
   876  * The encoding functions take 32bpp rgb + a, and
   877  * return the number of bytes copied to the destination.
   878  * The decoding functions copy to 32bpp rgb + a, and
   879  * return the number of bytes copied from the source.
   880  * These are only used in the encoder and un-RLE code and are therefore not
   881  * highly optimised.
   882  */
   883 
   884 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
   885 static int
   886 copy_opaque_16(void *dst, Uint32 * src, int n,
   887                SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   888 {
   889     int i;
   890     Uint16 *d = dst;
   891     for (i = 0; i < n; i++) {
   892         unsigned r, g, b;
   893         RGB_FROM_PIXEL(*src, sfmt, r, g, b);
   894         PIXEL_FROM_RGB(*d, dfmt, r, g, b);
   895         src++;
   896         d++;
   897     }
   898     return n * 2;
   899 }
   900 
   901 /* decode opaque pixels from 16bpp to 32bpp rgb + a */
   902 static int
   903 uncopy_opaque_16(Uint32 * dst, void *src, int n,
   904                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   905 {
   906     int i;
   907     Uint16 *s = src;
   908     unsigned alpha = dfmt->Amask ? 255 : 0;
   909     for (i = 0; i < n; i++) {
   910         unsigned r, g, b;
   911         RGB_FROM_PIXEL(*s, sfmt, r, g, b);
   912         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
   913         s++;
   914         dst++;
   915     }
   916     return n * 2;
   917 }
   918 
   919 
   920 
   921 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
   922 static int
   923 copy_transl_565(void *dst, Uint32 * src, int n,
   924                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   925 {
   926     int i;
   927     Uint32 *d = dst;
   928     for (i = 0; i < n; i++) {
   929         unsigned r, g, b, a;
   930         Uint16 pix;
   931         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   932         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   933         *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
   934         src++;
   935         d++;
   936     }
   937     return n * 4;
   938 }
   939 
   940 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
   941 static int
   942 copy_transl_555(void *dst, Uint32 * src, int n,
   943                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   944 {
   945     int i;
   946     Uint32 *d = dst;
   947     for (i = 0; i < n; i++) {
   948         unsigned r, g, b, a;
   949         Uint16 pix;
   950         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   951         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   952         *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
   953         src++;
   954         d++;
   955     }
   956     return n * 4;
   957 }
   958 
   959 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
   960 static int
   961 uncopy_transl_16(Uint32 * dst, void *src, int n,
   962                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   963 {
   964     int i;
   965     Uint32 *s = src;
   966     for (i = 0; i < n; i++) {
   967         unsigned r, g, b, a;
   968         Uint32 pix = *s++;
   969         a = (pix & 0x3e0) >> 2;
   970         pix = (pix & ~0x3e0) | pix >> 16;
   971         RGB_FROM_PIXEL(pix, sfmt, r, g, b);
   972         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
   973         dst++;
   974     }
   975     return n * 4;
   976 }
   977 
   978 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   979 static int
   980 copy_32(void *dst, Uint32 * src, int n,
   981         SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   982 {
   983     int i;
   984     Uint32 *d = dst;
   985     for (i = 0; i < n; i++) {
   986         unsigned r, g, b, a;
   987         Uint32 pixel;
   988         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   989         PIXEL_FROM_RGB(pixel, dfmt, r, g, b);
   990         *d++ = pixel | a << 24;
   991         src++;
   992     }
   993     return n * 4;
   994 }
   995 
   996 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   997 static int
   998 uncopy_32(Uint32 * dst, void *src, int n,
   999           RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
  1000 {
  1001     int i;
  1002     Uint32 *s = src;
  1003     for (i = 0; i < n; i++) {
  1004         unsigned r, g, b, a;
  1005         Uint32 pixel = *s++;
  1006         RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
  1007         a = pixel >> 24;
  1008         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
  1009         dst++;
  1010     }
  1011     return n * 4;
  1012 }
  1013 
  1014 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
  1015 
  1016 #define ISTRANSL(pixel, fmt)	\
  1017     ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
  1018 
  1019 /* convert surface to be quickly alpha-blittable onto dest, if possible */
  1020 static int
  1021 RLEAlphaSurface(SDL_Surface * surface)
  1022 {
  1023     SDL_Surface *dest;
  1024     SDL_PixelFormat *df;
  1025     int maxsize = 0;
  1026     int max_opaque_run;
  1027     int max_transl_run = 65535;
  1028     unsigned masksum;
  1029     Uint8 *rlebuf, *dst;
  1030     int (*copy_opaque) (void *, Uint32 *, int,
  1031                         SDL_PixelFormat *, SDL_PixelFormat *);
  1032     int (*copy_transl) (void *, Uint32 *, int,
  1033                         SDL_PixelFormat *, SDL_PixelFormat *);
  1034 
  1035     dest = surface->map->dst;
  1036     if (!dest)
  1037         return -1;
  1038     df = dest->format;
  1039     if (surface->format->BitsPerPixel != 32)
  1040         return -1;              /* only 32bpp source supported */
  1041 
  1042     /* find out whether the destination is one we support,
  1043        and determine the max size of the encoded result */
  1044     masksum = df->Rmask | df->Gmask | df->Bmask;
  1045     switch (df->BytesPerPixel) {
  1046     case 2:
  1047         /* 16bpp: only support 565 and 555 formats */
  1048         switch (masksum) {
  1049         case 0xffff:
  1050             if (df->Gmask == 0x07e0
  1051                 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
  1052                 copy_opaque = copy_opaque_16;
  1053                 copy_transl = copy_transl_565;
  1054             } else
  1055                 return -1;
  1056             break;
  1057         case 0x7fff:
  1058             if (df->Gmask == 0x03e0
  1059                 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
  1060                 copy_opaque = copy_opaque_16;
  1061                 copy_transl = copy_transl_555;
  1062             } else
  1063                 return -1;
  1064             break;
  1065         default:
  1066             return -1;
  1067         }
  1068         max_opaque_run = 255;   /* runs stored as bytes */
  1069 
  1070         /* worst case is alternating opaque and translucent pixels,
  1071            with room for alignment padding between lines */
  1072         maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
  1073         break;
  1074     case 4:
  1075         if (masksum != 0x00ffffff)
  1076             return -1;          /* requires unused high byte */
  1077         copy_opaque = copy_32;
  1078         copy_transl = copy_32;
  1079         max_opaque_run = 255;   /* runs stored as short ints */
  1080 
  1081         /* worst case is alternating opaque and translucent pixels */
  1082         maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
  1083         break;
  1084     default:
  1085         return -1;              /* anything else unsupported right now */
  1086     }
  1087 
  1088     maxsize += sizeof(RLEDestFormat);
  1089     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1090     if (!rlebuf) {
  1091         SDL_OutOfMemory();
  1092         return -1;
  1093     }
  1094     {
  1095         /* save the destination format so we can undo the encoding later */
  1096         RLEDestFormat *r = (RLEDestFormat *) rlebuf;
  1097         r->BytesPerPixel = df->BytesPerPixel;
  1098         r->Rmask = df->Rmask;
  1099         r->Gmask = df->Gmask;
  1100         r->Bmask = df->Bmask;
  1101         r->Amask = df->Amask;
  1102         r->Rloss = df->Rloss;
  1103         r->Gloss = df->Gloss;
  1104         r->Bloss = df->Bloss;
  1105         r->Aloss = df->Aloss;
  1106         r->Rshift = df->Rshift;
  1107         r->Gshift = df->Gshift;
  1108         r->Bshift = df->Bshift;
  1109         r->Ashift = df->Ashift;
  1110     }
  1111     dst = rlebuf + sizeof(RLEDestFormat);
  1112 
  1113     /* Do the actual encoding */
  1114     {
  1115         int x, y;
  1116         int h = surface->h, w = surface->w;
  1117         SDL_PixelFormat *sf = surface->format;
  1118         Uint32 *src = (Uint32 *) surface->pixels;
  1119         Uint8 *lastline = dst;  /* end of last non-blank line */
  1120 
  1121         /* opaque counts are 8 or 16 bits, depending on target depth */
  1122 #define ADD_OPAQUE_COUNTS(n, m)			\
  1123 	if(df->BytesPerPixel == 4) {		\
  1124 	    ((Uint16 *)dst)[0] = n;		\
  1125 	    ((Uint16 *)dst)[1] = m;		\
  1126 	    dst += 4;				\
  1127 	} else {				\
  1128 	    dst[0] = n;				\
  1129 	    dst[1] = m;				\
  1130 	    dst += 2;				\
  1131 	}
  1132 
  1133         /* translucent counts are always 16 bit */
  1134 #define ADD_TRANSL_COUNTS(n, m)		\
  1135 	(((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
  1136 
  1137         for (y = 0; y < h; y++) {
  1138             int runstart, skipstart;
  1139             int blankline = 0;
  1140             /* First encode all opaque pixels of a scan line */
  1141             x = 0;
  1142             do {
  1143                 int run, skip, len;
  1144                 skipstart = x;
  1145                 while (x < w && !ISOPAQUE(src[x], sf))
  1146                     x++;
  1147                 runstart = x;
  1148                 while (x < w && ISOPAQUE(src[x], sf))
  1149                     x++;
  1150                 skip = runstart - skipstart;
  1151                 if (skip == w)
  1152                     blankline = 1;
  1153                 run = x - runstart;
  1154                 while (skip > max_opaque_run) {
  1155                     ADD_OPAQUE_COUNTS(max_opaque_run, 0);
  1156                     skip -= max_opaque_run;
  1157                 }
  1158                 len = MIN(run, max_opaque_run);
  1159                 ADD_OPAQUE_COUNTS(skip, len);
  1160                 dst += copy_opaque(dst, src + runstart, len, sf, df);
  1161                 runstart += len;
  1162                 run -= len;
  1163                 while (run) {
  1164                     len = MIN(run, max_opaque_run);
  1165                     ADD_OPAQUE_COUNTS(0, len);
  1166                     dst += copy_opaque(dst, src + runstart, len, sf, df);
  1167                     runstart += len;
  1168                     run -= len;
  1169                 }
  1170             } while (x < w);
  1171 
  1172             /* Make sure the next output address is 32-bit aligned */
  1173             dst += (uintptr_t) dst & 2;
  1174 
  1175             /* Next, encode all translucent pixels of the same scan line */
  1176             x = 0;
  1177             do {
  1178                 int run, skip, len;
  1179                 skipstart = x;
  1180                 while (x < w && !ISTRANSL(src[x], sf))
  1181                     x++;
  1182                 runstart = x;
  1183                 while (x < w && ISTRANSL(src[x], sf))
  1184                     x++;
  1185                 skip = runstart - skipstart;
  1186                 blankline &= (skip == w);
  1187                 run = x - runstart;
  1188                 while (skip > max_transl_run) {
  1189                     ADD_TRANSL_COUNTS(max_transl_run, 0);
  1190                     skip -= max_transl_run;
  1191                 }
  1192                 len = MIN(run, max_transl_run);
  1193                 ADD_TRANSL_COUNTS(skip, len);
  1194                 dst += copy_transl(dst, src + runstart, len, sf, df);
  1195                 runstart += len;
  1196                 run -= len;
  1197                 while (run) {
  1198                     len = MIN(run, max_transl_run);
  1199                     ADD_TRANSL_COUNTS(0, len);
  1200                     dst += copy_transl(dst, src + runstart, len, sf, df);
  1201                     runstart += len;
  1202                     run -= len;
  1203                 }
  1204                 if (!blankline)
  1205                     lastline = dst;
  1206             } while (x < w);
  1207 
  1208             src += surface->pitch >> 2;
  1209         }
  1210         dst = lastline;         /* back up past trailing blank lines */
  1211         ADD_OPAQUE_COUNTS(0, 0);
  1212     }
  1213 
  1214 #undef ADD_OPAQUE_COUNTS
  1215 #undef ADD_TRANSL_COUNTS
  1216 
  1217     /* Now that we have it encoded, release the original pixels */
  1218     if (!(surface->flags & SDL_PREALLOC)) {
  1219         SDL_free(surface->pixels);
  1220         surface->pixels = NULL;
  1221     }
  1222 
  1223     /* realloc the buffer to release unused memory */
  1224     {
  1225         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1226         if (!p)
  1227             p = rlebuf;
  1228         surface->map->data = p;
  1229     }
  1230 
  1231     return 0;
  1232 }
  1233 
  1234 static Uint32
  1235 getpix_8(Uint8 * srcbuf)
  1236 {
  1237     return *srcbuf;
  1238 }
  1239 
  1240 static Uint32
  1241 getpix_16(Uint8 * srcbuf)
  1242 {
  1243     return *(Uint16 *) srcbuf;
  1244 }
  1245 
  1246 static Uint32
  1247 getpix_24(Uint8 * srcbuf)
  1248 {
  1249 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  1250     return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
  1251 #else
  1252     return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
  1253 #endif
  1254 }
  1255 
  1256 static Uint32
  1257 getpix_32(Uint8 * srcbuf)
  1258 {
  1259     return *(Uint32 *) srcbuf;
  1260 }
  1261 
  1262 typedef Uint32(*getpix_func) (Uint8 *);
  1263 
  1264 static const getpix_func getpixes[4] = {
  1265     getpix_8, getpix_16, getpix_24, getpix_32
  1266 };
  1267 
  1268 static int
  1269 RLEColorkeySurface(SDL_Surface * surface)
  1270 {
  1271     Uint8 *rlebuf, *dst;
  1272     int maxn;
  1273     int y;
  1274     Uint8 *srcbuf, *curbuf, *lastline;
  1275     int maxsize = 0;
  1276     int skip, run;
  1277     int bpp = surface->format->BytesPerPixel;
  1278     getpix_func getpix;
  1279     Uint32 ckey, rgbmask;
  1280     int w, h;
  1281 
  1282     /* calculate the worst case size for the compressed surface */
  1283     switch (bpp) {
  1284     case 1:
  1285         /* worst case is alternating opaque and transparent pixels,
  1286            starting with an opaque pixel */
  1287         maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
  1288         break;
  1289     case 2:
  1290     case 3:
  1291         /* worst case is solid runs, at most 255 pixels wide */
  1292         maxsize = surface->h * (2 * (surface->w / 255 + 1)
  1293                                 + surface->w * bpp) + 2;
  1294         break;
  1295     case 4:
  1296         /* worst case is solid runs, at most 65535 pixels wide */
  1297         maxsize = surface->h * (4 * (surface->w / 65535 + 1)
  1298                                 + surface->w * 4) + 4;
  1299         break;
  1300     }
  1301 
  1302     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1303     if (rlebuf == NULL) {
  1304         SDL_OutOfMemory();
  1305         return (-1);
  1306     }
  1307 
  1308     /* Set up the conversion */
  1309     srcbuf = (Uint8 *) surface->pixels;
  1310     curbuf = srcbuf;
  1311     maxn = bpp == 4 ? 65535 : 255;
  1312     skip = run = 0;
  1313     dst = rlebuf;
  1314     rgbmask = ~surface->format->Amask;
  1315     ckey = surface->map->info.colorkey & rgbmask;
  1316     lastline = dst;
  1317     getpix = getpixes[bpp - 1];
  1318     w = surface->w;
  1319     h = surface->h;
  1320 
  1321 #define ADD_COUNTS(n, m)			\
  1322 	if(bpp == 4) {				\
  1323 	    ((Uint16 *)dst)[0] = n;		\
  1324 	    ((Uint16 *)dst)[1] = m;		\
  1325 	    dst += 4;				\
  1326 	} else {				\
  1327 	    dst[0] = n;				\
  1328 	    dst[1] = m;				\
  1329 	    dst += 2;				\
  1330 	}
  1331 
  1332     for (y = 0; y < h; y++) {
  1333         int x = 0;
  1334         int blankline = 0;
  1335         do {
  1336             int run, skip, len;
  1337             int runstart;
  1338             int skipstart = x;
  1339 
  1340             /* find run of transparent, then opaque pixels */
  1341             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
  1342                 x++;
  1343             runstart = x;
  1344             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
  1345                 x++;
  1346             skip = runstart - skipstart;
  1347             if (skip == w)
  1348                 blankline = 1;
  1349             run = x - runstart;
  1350 
  1351             /* encode segment */
  1352             while (skip > maxn) {
  1353                 ADD_COUNTS(maxn, 0);
  1354                 skip -= maxn;
  1355             }
  1356             len = MIN(run, maxn);
  1357             ADD_COUNTS(skip, len);
  1358             SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1359             dst += len * bpp;
  1360             run -= len;
  1361             runstart += len;
  1362             while (run) {
  1363                 len = MIN(run, maxn);
  1364                 ADD_COUNTS(0, len);
  1365                 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1366                 dst += len * bpp;
  1367                 runstart += len;
  1368                 run -= len;
  1369             }
  1370             if (!blankline)
  1371                 lastline = dst;
  1372         } while (x < w);
  1373 
  1374         srcbuf += surface->pitch;
  1375     }
  1376     dst = lastline;             /* back up bast trailing blank lines */
  1377     ADD_COUNTS(0, 0);
  1378 
  1379 #undef ADD_COUNTS
  1380 
  1381     /* Now that we have it encoded, release the original pixels */
  1382     if (!(surface->flags & SDL_PREALLOC)) {
  1383         SDL_free(surface->pixels);
  1384         surface->pixels = NULL;
  1385     }
  1386 
  1387     /* realloc the buffer to release unused memory */
  1388     {
  1389         /* If realloc returns NULL, the original block is left intact */
  1390         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1391         if (!p)
  1392             p = rlebuf;
  1393         surface->map->data = p;
  1394     }
  1395 
  1396     return (0);
  1397 }
  1398 
  1399 int
  1400 SDL_RLESurface(SDL_Surface * surface)
  1401 {
  1402     int flags;
  1403 
  1404     /* Clear any previous RLE conversion */
  1405     if ((surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL) {
  1406         SDL_UnRLESurface(surface, 1);
  1407     }
  1408 
  1409     /* We don't support RLE encoding of bitmaps */
  1410     if (surface->format->BitsPerPixel < 8) {
  1411         return -1;
  1412     }
  1413 
  1414     /* Make sure the pixels are available */
  1415     if (!surface->pixels) {
  1416         return -1;
  1417     }
  1418 
  1419     /* If we don't have colorkey or blending, nothing to do... */
  1420     flags = surface->map->info.flags;
  1421     if (!(flags & (SDL_COPY_COLORKEY | SDL_COPY_BLEND))) {
  1422         return -1;
  1423     }
  1424 
  1425     /* Pass on combinations not supported */
  1426     if ((flags & SDL_COPY_MODULATE_COLOR) ||
  1427         ((flags & SDL_COPY_MODULATE_ALPHA) && surface->format->Amask) ||
  1428         (flags & (SDL_COPY_ADD | SDL_COPY_MOD)) ||
  1429         (flags & SDL_COPY_NEAREST)) {
  1430         return -1;
  1431     }
  1432 
  1433     /* Encode and set up the blit */
  1434     if (!surface->format->Amask || !(flags & SDL_COPY_BLEND)) {
  1435         if (!surface->map->identity) {
  1436             return -1;
  1437         }
  1438         if (RLEColorkeySurface(surface) < 0) {
  1439             return -1;
  1440         }
  1441         surface->map->blit = SDL_RLEBlit;
  1442         surface->map->info.flags |= SDL_COPY_RLE_COLORKEY;
  1443     } else {
  1444         if (RLEAlphaSurface(surface) < 0) {
  1445             return -1;
  1446         }
  1447         surface->map->blit = SDL_RLEAlphaBlit;
  1448         surface->map->info.flags |= SDL_COPY_RLE_ALPHAKEY;
  1449     }
  1450 
  1451     /* The surface is now accelerated */
  1452     surface->flags |= SDL_RLEACCEL;
  1453 
  1454     return (0);
  1455 }
  1456 
  1457 /*
  1458  * Un-RLE a surface with pixel alpha
  1459  * This may not give back exactly the image before RLE-encoding; all
  1460  * completely transparent pixels will be lost, and colour and alpha depth
  1461  * may have been reduced (when encoding for 16bpp targets).
  1462  */
  1463 static SDL_bool
  1464 UnRLEAlpha(SDL_Surface * surface)
  1465 {
  1466     Uint8 *srcbuf;
  1467     Uint32 *dst;
  1468     SDL_PixelFormat *sf = surface->format;
  1469     RLEDestFormat *df = surface->map->data;
  1470     int (*uncopy_opaque) (Uint32 *, void *, int,
  1471                           RLEDestFormat *, SDL_PixelFormat *);
  1472     int (*uncopy_transl) (Uint32 *, void *, int,
  1473                           RLEDestFormat *, SDL_PixelFormat *);
  1474     int w = surface->w;
  1475     int bpp = df->BytesPerPixel;
  1476 
  1477     if (bpp == 2) {
  1478         uncopy_opaque = uncopy_opaque_16;
  1479         uncopy_transl = uncopy_transl_16;
  1480     } else {
  1481         uncopy_opaque = uncopy_transl = uncopy_32;
  1482     }
  1483 
  1484     surface->pixels = SDL_malloc(surface->h * surface->pitch);
  1485     if (!surface->pixels) {
  1486         return (SDL_FALSE);
  1487     }
  1488     /* fill background with transparent pixels */
  1489     SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
  1490 
  1491     dst = surface->pixels;
  1492     srcbuf = (Uint8 *) (df + 1);
  1493     for (;;) {
  1494         /* copy opaque pixels */
  1495         int ofs = 0;
  1496         do {
  1497             unsigned run;
  1498             if (bpp == 2) {
  1499                 ofs += srcbuf[0];
  1500                 run = srcbuf[1];
  1501                 srcbuf += 2;
  1502             } else {
  1503                 ofs += ((Uint16 *) srcbuf)[0];
  1504                 run = ((Uint16 *) srcbuf)[1];
  1505                 srcbuf += 4;
  1506             }
  1507             if (run) {
  1508                 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
  1509                 ofs += run;
  1510             } else if (!ofs)
  1511                 return (SDL_TRUE);
  1512         } while (ofs < w);
  1513 
  1514         /* skip padding if needed */
  1515         if (bpp == 2)
  1516             srcbuf += (uintptr_t) srcbuf & 2;
  1517 
  1518         /* copy translucent pixels */
  1519         ofs = 0;
  1520         do {
  1521             unsigned run;
  1522             ofs += ((Uint16 *) srcbuf)[0];
  1523             run = ((Uint16 *) srcbuf)[1];
  1524             srcbuf += 4;
  1525             if (run) {
  1526                 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
  1527                 ofs += run;
  1528             }
  1529         } while (ofs < w);
  1530         dst += surface->pitch >> 2;
  1531     }
  1532     /* Make the compiler happy */
  1533     return (SDL_TRUE);
  1534 }
  1535 
  1536 void
  1537 SDL_UnRLESurface(SDL_Surface * surface, int recode)
  1538 {
  1539     if (surface->flags & SDL_RLEACCEL) {
  1540         surface->flags &= ~SDL_RLEACCEL;
  1541 
  1542         if (recode && !(surface->flags & SDL_PREALLOC)) {
  1543             if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
  1544                 SDL_Rect full;
  1545 
  1546                 /* re-create the original surface */
  1547                 surface->pixels = SDL_malloc(surface->h * surface->pitch);
  1548                 if (!surface->pixels) {
  1549                     /* Oh crap... */
  1550                     surface->flags |= SDL_RLEACCEL;
  1551                     return;
  1552                 }
  1553 
  1554                 /* fill it with the background colour */
  1555                 SDL_FillRect(surface, NULL, surface->map->info.colorkey);
  1556 
  1557                 /* now render the encoded surface */
  1558                 full.x = full.y = 0;
  1559                 full.w = surface->w;
  1560                 full.h = surface->h;
  1561                 SDL_RLEBlit(surface, &full, surface, &full);
  1562             } else {
  1563                 if (!UnRLEAlpha(surface)) {
  1564                     /* Oh crap... */
  1565                     surface->flags |= SDL_RLEACCEL;
  1566                     return;
  1567                 }
  1568             }
  1569         }
  1570         surface->map->info.flags &=
  1571             ~(SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY);
  1572 
  1573         if (surface->map->data) {
  1574             SDL_free(surface->map->data);
  1575             surface->map->data = NULL;
  1576         }
  1577     }
  1578 }
  1579 
  1580 /* vi: set ts=4 sw=4 expandtab: */