src/video/SDL_RLEaccel.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 06 Mar 2011 21:12:19 -0800
changeset 5423 b69fa50e80d7
parent 5262 b530ef003506
child 5435 3de226f0cc06
permissions -rw-r--r--
a Nintendo ds update

Frank Zago to SDL

For those interested, here's a snapshot of the current port. I did away with
most of the previous attempt which was based of the sprite engine, because the
support is limited to 128 64x64 sprites. Instead I'm using the gl engine.
The drawback is that either the frame buffer or the gl engine can be used
because there's not that much video memory on a DS.

With minimal changes to their code, it can now run the following tests: ,
testspriteminimal, testscale and testsprite2. The last 2 only run under the
emulator for some reason. The tests are not included in this patch for size
reason.

In 16 bits mode, the 16th bit indicated transparency/opacity. If 0, the color
is not displayed. So I had to patch a few core file to set that bit to 1. See
patch for src/video/SDL_RLEaccel.c and src/video/SDL_blit.h. Is that ok, or is
there a better way ?

The nds also doesn't support windowed mode, so I force the fullscreen in
src/video/SDL_video.c. Is that ok, or is there a better way ?

To get a smaller library, I also tried to not compile the software renderer
when the hardware renderer is compiled in, and define SDL_NO_COMPAT; however
the compilation eventually fails in SDL_surface.c because SDL_SRCCOLORKEY is
defined in SDL_compat.h. Is SDL_NO_COMPAT only for application and not SDL
itself ?
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2011 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /*
    25  * RLE encoding for software colorkey and alpha-channel acceleration
    26  *
    27  * Original version by Sam Lantinga
    28  *
    29  * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and
    30  * decoder. Added per-surface alpha blitter. Added per-pixel alpha
    31  * format, encoder and blitter.
    32  *
    33  * Many thanks to Xark and johns for hints, benchmarks and useful comments
    34  * leading to this code.
    35  *
    36  * Welcome to Macro Mayhem.
    37  */
    38 
    39 /*
    40  * The encoding translates the image data to a stream of segments of the form
    41  *
    42  * <skip> <run> <data>
    43  *
    44  * where <skip> is the number of transparent pixels to skip,
    45  *       <run>  is the number of opaque pixels to blit,
    46  * and   <data> are the pixels themselves.
    47  *
    48  * This basic structure is used both for colorkeyed surfaces, used for simple
    49  * binary transparency and for per-surface alpha blending, and for surfaces
    50  * with per-pixel alpha. The details differ, however:
    51  *
    52  * Encoding of colorkeyed surfaces:
    53  *
    54  *   Encoded pixels always have the same format as the target surface.
    55  *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
    56  *   where they are 16 bit. This makes the pixel data aligned at all times.
    57  *   Segments never wrap around from one scan line to the next.
    58  *
    59  *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
    60  *   beginning of a line.
    61  *
    62  * Encoding of surfaces with per-pixel alpha:
    63  *
    64  *   The sequence begins with a struct RLEDestFormat describing the target
    65  *   pixel format, to provide reliable un-encoding.
    66  *
    67  *   Each scan line is encoded twice: First all completely opaque pixels,
    68  *   encoded in the target format as described above, and then all
    69  *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
    70  *   in the following 32-bit format:
    71  *
    72  *   For 32-bit targets, each pixel has the target RGB format but with
    73  *   the alpha value occupying the highest 8 bits. The <skip> and <run>
    74  *   counts are 16 bit.
    75  * 
    76  *   For 16-bit targets, each pixel has the target RGB format, but with
    77  *   the middle component (usually green) shifted 16 steps to the left,
    78  *   and the hole filled with the 5 most significant bits of the alpha value.
    79  *   i.e. if the target has the format         rrrrrggggggbbbbb,
    80  *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
    81  *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
    82  *   for the translucent lines. Two padding bytes may be inserted
    83  *   before each translucent line to keep them 32-bit aligned.
    84  *
    85  *   The end of the sequence is marked by a zero <skip>,<run> pair at the
    86  *   beginning of an opaque line.
    87  */
    88 
    89 #include "SDL_video.h"
    90 #include "SDL_sysvideo.h"
    91 #include "SDL_blit.h"
    92 #include "SDL_RLEaccel_c.h"
    93 
    94 #ifndef MAX
    95 #define MAX(a, b) ((a) > (b) ? (a) : (b))
    96 #endif
    97 #ifndef MIN
    98 #define MIN(a, b) ((a) < (b) ? (a) : (b))
    99 #endif
   100 
   101 #define PIXEL_COPY(to, from, len, bpp)			\
   102 do {							\
   103     if(bpp == 4) {					\
   104 	SDL_memcpy4(to, from, (size_t)(len));		\
   105     } else {						\
   106 	SDL_memcpy(to, from, (size_t)(len) * (bpp));	\
   107     }							\
   108 } while(0)
   109 
   110 /*
   111  * Various colorkey blit methods, for opaque and per-surface alpha
   112  */
   113 
   114 #define OPAQUE_BLIT(to, from, length, bpp, alpha)	\
   115     PIXEL_COPY(to, from, length, bpp)
   116 
   117 /*
   118  * For 32bpp pixels on the form 0x00rrggbb:
   119  * If we treat the middle component separately, we can process the two
   120  * remaining in parallel. This is safe to do because of the gap to the left
   121  * of each component, so the bits from the multiplication don't collide.
   122  * This can be used for any RGB permutation of course.
   123  */
   124 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha)		\
   125     do {							\
   126         int i;							\
   127 	Uint32 *src = (Uint32 *)(from);				\
   128 	Uint32 *dst = (Uint32 *)(to);				\
   129 	for(i = 0; i < (int)(length); i++) {			\
   130 	    Uint32 s = *src++;					\
   131 	    Uint32 d = *dst;					\
   132 	    Uint32 s1 = s & 0xff00ff;				\
   133 	    Uint32 d1 = d & 0xff00ff;				\
   134 	    d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;	\
   135 	    s &= 0xff00;					\
   136 	    d &= 0xff00;					\
   137 	    d = (d + ((s - d) * alpha >> 8)) & 0xff00;		\
   138 	    *dst++ = d1 | d;					\
   139 	}							\
   140     } while(0)
   141 
   142 /*
   143  * For 16bpp pixels we can go a step further: put the middle component
   144  * in the high 16 bits of a 32 bit word, and process all three RGB
   145  * components at the same time. Since the smallest gap is here just
   146  * 5 bits, we have to scale alpha down to 5 bits as well.
   147  */
   148 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha)	\
   149     do {						\
   150         int i;						\
   151 	Uint16 *src = (Uint16 *)(from);			\
   152 	Uint16 *dst = (Uint16 *)(to);			\
   153 	Uint32 ALPHA = alpha >> 3;			\
   154 	for(i = 0; i < (int)(length); i++) {		\
   155 	    Uint32 s = *src++;				\
   156 	    Uint32 d = *dst;				\
   157 	    s = (s | s << 16) & 0x07e0f81f;		\
   158 	    d = (d | d << 16) & 0x07e0f81f;		\
   159 	    d += (s - d) * ALPHA >> 5;			\
   160 	    d &= 0x07e0f81f;				\
   161 	    *dst++ = (Uint16)(d | d >> 16);			\
   162 	}						\
   163     } while(0)
   164 
   165 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha)	\
   166     do {						\
   167         int i;						\
   168 	Uint16 *src = (Uint16 *)(from);			\
   169 	Uint16 *dst = (Uint16 *)(to);			\
   170 	Uint32 ALPHA = alpha >> 3;			\
   171 	for(i = 0; i < (int)(length); i++) {		\
   172 	    Uint32 s = *src++;				\
   173 	    Uint32 d = *dst;				\
   174 	    s = (s | s << 16) & 0x03e07c1f;		\
   175 	    d = (d | d << 16) & 0x03e07c1f;		\
   176 	    d += (s - d) * ALPHA >> 5;			\
   177 	    d &= 0x03e07c1f;				\
   178 	    *dst++ = (Uint16)(d | d >> 16);			\
   179 	}						\
   180     } while(0)
   181 
   182 /*
   183  * The general slow catch-all function, for remaining depths and formats
   184  */
   185 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)			\
   186     do {								\
   187         int i;								\
   188 	Uint8 *src = from;						\
   189 	Uint8 *dst = to;						\
   190 	for(i = 0; i < (int)(length); i++) {				\
   191 	    Uint32 s, d;						\
   192 	    unsigned rs, gs, bs, rd, gd, bd;				\
   193 	    switch(bpp) {						\
   194 	    case 2:							\
   195 		s = *(Uint16 *)src;					\
   196 		d = *(Uint16 *)dst;					\
   197 		break;							\
   198 	    case 3:							\
   199 		if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {			\
   200 		    s = (src[0] << 16) | (src[1] << 8) | src[2];	\
   201 		    d = (dst[0] << 16) | (dst[1] << 8) | dst[2];	\
   202 		} else {						\
   203 		    s = (src[2] << 16) | (src[1] << 8) | src[0];	\
   204 		    d = (dst[2] << 16) | (dst[1] << 8) | dst[0];	\
   205 		}							\
   206 		break;							\
   207 	    case 4:							\
   208 		s = *(Uint32 *)src;					\
   209 		d = *(Uint32 *)dst;					\
   210 		break;							\
   211 	    }								\
   212 	    RGB_FROM_PIXEL(s, fmt, rs, gs, bs);				\
   213 	    RGB_FROM_PIXEL(d, fmt, rd, gd, bd);				\
   214 	    rd += (rs - rd) * alpha >> 8;				\
   215 	    gd += (gs - gd) * alpha >> 8;				\
   216 	    bd += (bs - bd) * alpha >> 8;				\
   217 	    PIXEL_FROM_RGB(d, fmt, rd, gd, bd);				\
   218 	    switch(bpp) {						\
   219 	    case 2:							\
   220 		*(Uint16 *)dst = (Uint16)d;					\
   221 		break;							\
   222 	    case 3:							\
   223 		if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {			\
   224 		    dst[0] = (Uint8)(d >> 16);					\
   225 		    dst[1] = (Uint8)(d >> 8);					\
   226 		    dst[2] = (Uint8)(d);						\
   227 		} else {						\
   228 		    dst[0] = (Uint8)d;						\
   229 		    dst[1] = (Uint8)(d >> 8);					\
   230 		    dst[2] = (Uint8)(d >> 16);					\
   231 		}							\
   232 		break;							\
   233 	    case 4:							\
   234 		*(Uint32 *)dst = d;					\
   235 		break;							\
   236 	    }								\
   237 	    src += bpp;							\
   238 	    dst += bpp;							\
   239 	}								\
   240     } while(0)
   241 
   242 /*
   243  * Special case: 50% alpha (alpha=128)
   244  * This is treated specially because it can be optimized very well, and
   245  * since it is good for many cases of semi-translucency.
   246  * The theory is to do all three components at the same time:
   247  * First zero the lowest bit of each component, which gives us room to
   248  * add them. Then shift right and add the sum of the lowest bits.
   249  */
   250 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)		\
   251     do {								\
   252         int i;								\
   253 	Uint32 *src = (Uint32 *)(from);					\
   254 	Uint32 *dst = (Uint32 *)(to);					\
   255 	for(i = 0; i < (int)(length); i++) {				\
   256 	    Uint32 s = *src++;						\
   257 	    Uint32 d = *dst;						\
   258 	    *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)	\
   259 		     + (s & d & 0x00010101);				\
   260 	}								\
   261     } while(0)
   262 
   263 /*
   264  * For 16bpp, we can actually blend two pixels in parallel, if we take
   265  * care to shift before we add, not after.
   266  */
   267 
   268 /* helper: blend a single 16 bit pixel at 50% */
   269 #define BLEND16_50(dst, src, mask)			\
   270     do {						\
   271 	Uint32 s = *src++;				\
   272 	Uint32 d = *dst;				\
   273 	*dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +	\
   274 	                  (s & d & (~mask & 0xffff)));		\
   275     } while(0)
   276 
   277 /* basic 16bpp blender. mask is the pixels to keep when adding. */
   278 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)		\
   279     do {								\
   280 	unsigned n = (length);						\
   281 	Uint16 *src = (Uint16 *)(from);					\
   282 	Uint16 *dst = (Uint16 *)(to);					\
   283 	if(((uintptr_t)src ^ (uintptr_t)dst) & 3) {			\
   284 	    /* source and destination not in phase, blit one by one */	\
   285 	    while(n--)							\
   286 		BLEND16_50(dst, src, mask);				\
   287 	} else {							\
   288 	    if((uintptr_t)src & 3) {					\
   289 		/* first odd pixel */					\
   290 		BLEND16_50(dst, src, mask);				\
   291 		n--;							\
   292 	    }								\
   293 	    for(; n > 1; n -= 2) {					\
   294 		Uint32 s = *(Uint32 *)src;				\
   295 		Uint32 d = *(Uint32 *)dst;				\
   296 		*(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1)	\
   297 		               + ((d & (mask | mask << 16)) >> 1)	\
   298 		               + (s & d & (~(mask | mask << 16)));	\
   299 		src += 2;						\
   300 		dst += 2;						\
   301 	    }								\
   302 	    if(n)							\
   303 		BLEND16_50(dst, src, mask); /* last odd pixel */	\
   304 	}								\
   305     } while(0)
   306 
   307 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)	\
   308     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de)
   309 
   310 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)	\
   311     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
   312 
   313 #define CHOOSE_BLIT(blitter, alpha, fmt)				\
   314     do {								\
   315         if(alpha == 255) {						\
   316 	    switch(fmt->BytesPerPixel) {				\
   317 	    case 1: blitter(1, Uint8, OPAQUE_BLIT); break;		\
   318 	    case 2: blitter(2, Uint8, OPAQUE_BLIT); break;		\
   319 	    case 3: blitter(3, Uint8, OPAQUE_BLIT); break;		\
   320 	    case 4: blitter(4, Uint16, OPAQUE_BLIT); break;		\
   321 	    }								\
   322 	} else {							\
   323 	    switch(fmt->BytesPerPixel) {				\
   324 	    case 1:							\
   325 		/* No 8bpp alpha blitting */				\
   326 		break;							\
   327 									\
   328 	    case 2:							\
   329 		switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) {		\
   330 		case 0xffff:						\
   331 		    if(fmt->Gmask == 0x07e0				\
   332 		       || fmt->Rmask == 0x07e0				\
   333 		       || fmt->Bmask == 0x07e0) {			\
   334 			if(alpha == 128)				\
   335 			    blitter(2, Uint8, ALPHA_BLIT16_565_50);	\
   336 			else {						\
   337 			    blitter(2, Uint8, ALPHA_BLIT16_565);	\
   338 			}						\
   339 		    } else						\
   340 			goto general16;					\
   341 		    break;						\
   342 									\
   343 		case 0x7fff:						\
   344 		    if(fmt->Gmask == 0x03e0				\
   345 		       || fmt->Rmask == 0x03e0				\
   346 		       || fmt->Bmask == 0x03e0) {			\
   347 			if(alpha == 128)				\
   348 			    blitter(2, Uint8, ALPHA_BLIT16_555_50);	\
   349 			else {						\
   350 			    blitter(2, Uint8, ALPHA_BLIT16_555);	\
   351 			}						\
   352 			break;						\
   353 		    }							\
   354 		    /* fallthrough */					\
   355 									\
   356 		default:						\
   357 		general16:						\
   358 		    blitter(2, Uint8, ALPHA_BLIT_ANY);			\
   359 		}							\
   360 		break;							\
   361 									\
   362 	    case 3:							\
   363 		blitter(3, Uint8, ALPHA_BLIT_ANY);			\
   364 		break;							\
   365 									\
   366 	    case 4:							\
   367 		if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff	\
   368 		   && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00	\
   369 		       || fmt->Bmask == 0xff00)) {			\
   370 		    if(alpha == 128)					\
   371 			blitter(4, Uint16, ALPHA_BLIT32_888_50);	\
   372 		    else						\
   373 			blitter(4, Uint16, ALPHA_BLIT32_888);		\
   374 		} else							\
   375 		    blitter(4, Uint16, ALPHA_BLIT_ANY);			\
   376 		break;							\
   377 	    }								\
   378 	}								\
   379     } while(0)
   380 
   381 /*
   382  * This takes care of the case when the surface is clipped on the left and/or
   383  * right. Top clipping has already been taken care of.
   384  */
   385 static void
   386 RLEClipBlit(int w, Uint8 * srcbuf, SDL_Surface * dst,
   387             Uint8 * dstbuf, SDL_Rect * srcrect, unsigned alpha)
   388 {
   389     SDL_PixelFormat *fmt = dst->format;
   390 
   391 #define RLECLIPBLIT(bpp, Type, do_blit)					   \
   392     do {								   \
   393 	int linecount = srcrect->h;					   \
   394 	int ofs = 0;							   \
   395 	int left = srcrect->x;						   \
   396 	int right = left + srcrect->w;					   \
   397 	dstbuf -= left * bpp;						   \
   398 	for(;;) {							   \
   399 	    int run;							   \
   400 	    ofs += *(Type *)srcbuf;					   \
   401 	    run = ((Type *)srcbuf)[1];					   \
   402 	    srcbuf += 2 * sizeof(Type);					   \
   403 	    if(run) {							   \
   404 		/* clip to left and right borders */			   \
   405 		if(ofs < right) {					   \
   406 		    int start = 0;					   \
   407 		    int len = run;					   \
   408 		    int startcol;					   \
   409 		    if(left - ofs > 0) {				   \
   410 			start = left - ofs;				   \
   411 			len -= start;					   \
   412 			if(len <= 0)					   \
   413 			    goto nocopy ## bpp ## do_blit;		   \
   414 		    }							   \
   415 		    startcol = ofs + start;				   \
   416 		    if(len > right - startcol)				   \
   417 			len = right - startcol;				   \
   418 		    do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
   419 			    len, bpp, alpha);				   \
   420 		}							   \
   421 	    nocopy ## bpp ## do_blit:					   \
   422 		srcbuf += run * bpp;					   \
   423 		ofs += run;						   \
   424 	    } else if(!ofs)						   \
   425 		break;							   \
   426 	    if(ofs == w) {						   \
   427 		ofs = 0;						   \
   428 		dstbuf += dst->pitch;					   \
   429 		if(!--linecount)					   \
   430 		    break;						   \
   431 	    }								   \
   432 	}								   \
   433     } while(0)
   434 
   435     CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
   436 
   437 #undef RLECLIPBLIT
   438 
   439 }
   440 
   441 
   442 /* blit a colorkeyed RLE surface */
   443 int
   444 SDL_RLEBlit(SDL_Surface * src, SDL_Rect * srcrect,
   445             SDL_Surface * dst, SDL_Rect * dstrect)
   446 {
   447     Uint8 *dstbuf;
   448     Uint8 *srcbuf;
   449     int x, y;
   450     int w = src->w;
   451     unsigned alpha;
   452 
   453     /* Lock the destination if necessary */
   454     if (SDL_MUSTLOCK(dst)) {
   455         if (SDL_LockSurface(dst) < 0) {
   456             return (-1);
   457         }
   458     }
   459 
   460     /* Set up the source and destination pointers */
   461     x = dstrect->x;
   462     y = dstrect->y;
   463     dstbuf = (Uint8 *) dst->pixels
   464         + y * dst->pitch + x * src->format->BytesPerPixel;
   465     srcbuf = (Uint8 *) src->map->data;
   466 
   467     {
   468         /* skip lines at the top if neccessary */
   469         int vskip = srcrect->y;
   470         int ofs = 0;
   471         if (vskip) {
   472 
   473 #define RLESKIP(bpp, Type)			\
   474 		for(;;) {			\
   475 		    int run;			\
   476 		    ofs += *(Type *)srcbuf;	\
   477 		    run = ((Type *)srcbuf)[1];	\
   478 		    srcbuf += sizeof(Type) * 2;	\
   479 		    if(run) {			\
   480 			srcbuf += run * bpp;	\
   481 			ofs += run;		\
   482 		    } else if(!ofs)		\
   483 			goto done;		\
   484 		    if(ofs == w) {		\
   485 			ofs = 0;		\
   486 			if(!--vskip)		\
   487 			    break;		\
   488 		    }				\
   489 		}
   490 
   491             switch (src->format->BytesPerPixel) {
   492             case 1:
   493                 RLESKIP(1, Uint8);
   494                 break;
   495             case 2:
   496                 RLESKIP(2, Uint8);
   497                 break;
   498             case 3:
   499                 RLESKIP(3, Uint8);
   500                 break;
   501             case 4:
   502                 RLESKIP(4, Uint16);
   503                 break;
   504             }
   505 
   506 #undef RLESKIP
   507 
   508         }
   509     }
   510 
   511     alpha = src->map->info.a;
   512     /* if left or right edge clipping needed, call clip blit */
   513     if (srcrect->x || srcrect->w != src->w) {
   514         RLEClipBlit(w, srcbuf, dst, dstbuf, srcrect, alpha);
   515     } else {
   516         SDL_PixelFormat *fmt = src->format;
   517 
   518 #define RLEBLIT(bpp, Type, do_blit)					      \
   519 	    do {							      \
   520 		int linecount = srcrect->h;				      \
   521 		int ofs = 0;						      \
   522 		for(;;) {						      \
   523 		    unsigned run;					      \
   524 		    ofs += *(Type *)srcbuf;				      \
   525 		    run = ((Type *)srcbuf)[1];				      \
   526 		    srcbuf += 2 * sizeof(Type);				      \
   527 		    if(run) {						      \
   528 			do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
   529 			srcbuf += run * bpp;				      \
   530 			ofs += run;					      \
   531 		    } else if(!ofs)					      \
   532 			break;						      \
   533 		    if(ofs == w) {					      \
   534 			ofs = 0;					      \
   535 			dstbuf += dst->pitch;				      \
   536 			if(!--linecount)				      \
   537 			    break;					      \
   538 		    }							      \
   539 		}							      \
   540 	    } while(0)
   541 
   542         CHOOSE_BLIT(RLEBLIT, alpha, fmt);
   543 
   544 #undef RLEBLIT
   545     }
   546 
   547   done:
   548     /* Unlock the destination if necessary */
   549     if (SDL_MUSTLOCK(dst)) {
   550         SDL_UnlockSurface(dst);
   551     }
   552     return (0);
   553 }
   554 
   555 #undef OPAQUE_BLIT
   556 
   557 /*
   558  * Per-pixel blitting macros for translucent pixels:
   559  * These use the same techniques as the per-surface blitting macros
   560  */
   561 
   562 /*
   563  * For 32bpp pixels, we have made sure the alpha is stored in the top
   564  * 8 bits, so proceed as usual
   565  */
   566 #define BLIT_TRANSL_888(src, dst)				\
   567     do {							\
   568         Uint32 s = src;						\
   569 	Uint32 d = dst;						\
   570 	unsigned alpha = s >> 24;				\
   571 	Uint32 s1 = s & 0xff00ff;				\
   572 	Uint32 d1 = d & 0xff00ff;				\
   573 	d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;	\
   574 	s &= 0xff00;						\
   575 	d &= 0xff00;						\
   576 	d = (d + ((s - d) * alpha >> 8)) & 0xff00;		\
   577 	dst = d1 | d | 0xff000000;				\
   578     } while(0)
   579 
   580 /*
   581  * For 16bpp pixels, we have stored the 5 most significant alpha bits in
   582  * bits 5-10. As before, we can process all 3 RGB components at the same time.
   583  */
   584 #define BLIT_TRANSL_565(src, dst)		\
   585     do {					\
   586 	Uint32 s = src;				\
   587 	Uint32 d = dst;				\
   588 	unsigned alpha = (s & 0x3e0) >> 5;	\
   589 	s &= 0x07e0f81f;			\
   590 	d = (d | d << 16) & 0x07e0f81f;		\
   591 	d += (s - d) * alpha >> 5;		\
   592 	d &= 0x07e0f81f;			\
   593 	dst = (Uint16)(d | d >> 16);			\
   594     } while(0)
   595 
   596 #define BLIT_TRANSL_555(src, dst)		\
   597     do {					\
   598 	Uint32 s = src;				\
   599 	Uint32 d = dst;				\
   600 	unsigned alpha = (s & 0x3e0) >> 5;	\
   601 	s &= 0x03e07c1f;			\
   602 	d = (d | d << 16) & 0x03e07c1f;		\
   603 	d += (s - d) * alpha >> 5;		\
   604 	d &= 0x03e07c1f;			\
   605 	dst = (Uint16)(d | d >> 16);			\
   606     } while(0)
   607 
   608 /* used to save the destination format in the encoding. Designed to be
   609    macro-compatible with SDL_PixelFormat but without the unneeded fields */
   610 typedef struct
   611 {
   612     Uint8 BytesPerPixel;
   613     Uint8 Rloss;
   614     Uint8 Gloss;
   615     Uint8 Bloss;
   616     Uint8 Rshift;
   617     Uint8 Gshift;
   618     Uint8 Bshift;
   619     Uint8 Ashift;
   620     Uint32 Rmask;
   621     Uint32 Gmask;
   622     Uint32 Bmask;
   623     Uint32 Amask;
   624 } RLEDestFormat;
   625 
   626 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
   627 static void
   628 RLEAlphaClipBlit(int w, Uint8 * srcbuf, SDL_Surface * dst,
   629                  Uint8 * dstbuf, SDL_Rect * srcrect)
   630 {
   631     SDL_PixelFormat *df = dst->format;
   632     /*
   633      * clipped blitter: Ptype is the destination pixel type,
   634      * Ctype the translucent count type, and do_blend the macro
   635      * to blend one pixel.
   636      */
   637 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)			  \
   638     do {								  \
   639 	int linecount = srcrect->h;					  \
   640 	int left = srcrect->x;						  \
   641 	int right = left + srcrect->w;					  \
   642 	dstbuf -= left * sizeof(Ptype);					  \
   643 	do {								  \
   644 	    int ofs = 0;						  \
   645 	    /* blit opaque pixels on one line */			  \
   646 	    do {							  \
   647 		unsigned run;						  \
   648 		ofs += ((Ctype *)srcbuf)[0];				  \
   649 		run = ((Ctype *)srcbuf)[1];				  \
   650 		srcbuf += 2 * sizeof(Ctype);				  \
   651 		if(run) {						  \
   652 		    /* clip to left and right borders */		  \
   653 		    int cofs = ofs;					  \
   654 		    int crun = run;					  \
   655 		    if(left - cofs > 0) {				  \
   656 			crun -= left - cofs;				  \
   657 			cofs = left;					  \
   658 		    }							  \
   659 		    if(crun > right - cofs)				  \
   660 			crun = right - cofs;				  \
   661 		    if(crun > 0)					  \
   662 			PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),	  \
   663 				   srcbuf + (cofs - ofs) * sizeof(Ptype), \
   664 				   (unsigned)crun, sizeof(Ptype));	  \
   665 		    srcbuf += run * sizeof(Ptype);			  \
   666 		    ofs += run;						  \
   667 		} else if(!ofs)						  \
   668 		    return;						  \
   669 	    } while(ofs < w);						  \
   670 	    /* skip padding if necessary */				  \
   671 	    if(sizeof(Ptype) == 2)					  \
   672 		srcbuf += (uintptr_t)srcbuf & 2;			  \
   673 	    /* blit translucent pixels on the same line */		  \
   674 	    ofs = 0;							  \
   675 	    do {							  \
   676 		unsigned run;						  \
   677 		ofs += ((Uint16 *)srcbuf)[0];				  \
   678 		run = ((Uint16 *)srcbuf)[1];				  \
   679 		srcbuf += 4;						  \
   680 		if(run) {						  \
   681 		    /* clip to left and right borders */		  \
   682 		    int cofs = ofs;					  \
   683 		    int crun = run;					  \
   684 		    if(left - cofs > 0) {				  \
   685 			crun -= left - cofs;				  \
   686 			cofs = left;					  \
   687 		    }							  \
   688 		    if(crun > right - cofs)				  \
   689 			crun = right - cofs;				  \
   690 		    if(crun > 0) {					  \
   691 			Ptype *dst = (Ptype *)dstbuf + cofs;		  \
   692 			Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);	  \
   693 			int i;						  \
   694 			for(i = 0; i < crun; i++)			  \
   695 			    do_blend(src[i], dst[i]);			  \
   696 		    }							  \
   697 		    srcbuf += run * 4;					  \
   698 		    ofs += run;						  \
   699 		}							  \
   700 	    } while(ofs < w);						  \
   701 	    dstbuf += dst->pitch;					  \
   702 	} while(--linecount);						  \
   703     } while(0)
   704 
   705     switch (df->BytesPerPixel) {
   706     case 2:
   707         if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0)
   708             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
   709         else
   710             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
   711         break;
   712     case 4:
   713         RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
   714         break;
   715     }
   716 }
   717 
   718 /* blit a pixel-alpha RLE surface */
   719 int
   720 SDL_RLEAlphaBlit(SDL_Surface * src, SDL_Rect * srcrect,
   721                  SDL_Surface * dst, SDL_Rect * dstrect)
   722 {
   723     int x, y;
   724     int w = src->w;
   725     Uint8 *srcbuf, *dstbuf;
   726     SDL_PixelFormat *df = dst->format;
   727 
   728     /* Lock the destination if necessary */
   729     if (SDL_MUSTLOCK(dst)) {
   730         if (SDL_LockSurface(dst) < 0) {
   731             return -1;
   732         }
   733     }
   734 
   735     x = dstrect->x;
   736     y = dstrect->y;
   737     dstbuf = (Uint8 *) dst->pixels + y * dst->pitch + x * df->BytesPerPixel;
   738     srcbuf = (Uint8 *) src->map->data + sizeof(RLEDestFormat);
   739 
   740     {
   741         /* skip lines at the top if necessary */
   742         int vskip = srcrect->y;
   743         if (vskip) {
   744             int ofs;
   745             if (df->BytesPerPixel == 2) {
   746                 /* the 16/32 interleaved format */
   747                 do {
   748                     /* skip opaque line */
   749                     ofs = 0;
   750                     do {
   751                         int run;
   752                         ofs += srcbuf[0];
   753                         run = srcbuf[1];
   754                         srcbuf += 2;
   755                         if (run) {
   756                             srcbuf += 2 * run;
   757                             ofs += run;
   758                         } else if (!ofs)
   759                             goto done;
   760                     } while (ofs < w);
   761 
   762                     /* skip padding */
   763                     srcbuf += (uintptr_t) srcbuf & 2;
   764 
   765                     /* skip translucent line */
   766                     ofs = 0;
   767                     do {
   768                         int run;
   769                         ofs += ((Uint16 *) srcbuf)[0];
   770                         run = ((Uint16 *) srcbuf)[1];
   771                         srcbuf += 4 * (run + 1);
   772                         ofs += run;
   773                     } while (ofs < w);
   774                 } while (--vskip);
   775             } else {
   776                 /* the 32/32 interleaved format */
   777                 vskip <<= 1;    /* opaque and translucent have same format */
   778                 do {
   779                     ofs = 0;
   780                     do {
   781                         int run;
   782                         ofs += ((Uint16 *) srcbuf)[0];
   783                         run = ((Uint16 *) srcbuf)[1];
   784                         srcbuf += 4;
   785                         if (run) {
   786                             srcbuf += 4 * run;
   787                             ofs += run;
   788                         } else if (!ofs)
   789                             goto done;
   790                     } while (ofs < w);
   791                 } while (--vskip);
   792             }
   793         }
   794     }
   795 
   796     /* if left or right edge clipping needed, call clip blit */
   797     if (srcrect->x || srcrect->w != src->w) {
   798         RLEAlphaClipBlit(w, srcbuf, dst, dstbuf, srcrect);
   799     } else {
   800 
   801         /*
   802          * non-clipped blitter. Ptype is the destination pixel type,
   803          * Ctype the translucent count type, and do_blend the
   804          * macro to blend one pixel.
   805          */
   806 #define RLEALPHABLIT(Ptype, Ctype, do_blend)				 \
   807 	do {								 \
   808 	    int linecount = srcrect->h;					 \
   809 	    do {							 \
   810 		int ofs = 0;						 \
   811 		/* blit opaque pixels on one line */			 \
   812 		do {							 \
   813 		    unsigned run;					 \
   814 		    ofs += ((Ctype *)srcbuf)[0];			 \
   815 		    run = ((Ctype *)srcbuf)[1];				 \
   816 		    srcbuf += 2 * sizeof(Ctype);			 \
   817 		    if(run) {						 \
   818 			PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
   819 				   run, sizeof(Ptype));			 \
   820 			srcbuf += run * sizeof(Ptype);			 \
   821 			ofs += run;					 \
   822 		    } else if(!ofs)					 \
   823 			goto done;					 \
   824 		} while(ofs < w);					 \
   825 		/* skip padding if necessary */				 \
   826 		if(sizeof(Ptype) == 2)					 \
   827 		    srcbuf += (uintptr_t)srcbuf & 2;		 	 \
   828 		/* blit translucent pixels on the same line */		 \
   829 		ofs = 0;						 \
   830 		do {							 \
   831 		    unsigned run;					 \
   832 		    ofs += ((Uint16 *)srcbuf)[0];			 \
   833 		    run = ((Uint16 *)srcbuf)[1];			 \
   834 		    srcbuf += 4;					 \
   835 		    if(run) {						 \
   836 			Ptype *dst = (Ptype *)dstbuf + ofs;		 \
   837 			unsigned i;					 \
   838 			for(i = 0; i < run; i++) {			 \
   839 			    Uint32 src = *(Uint32 *)srcbuf;		 \
   840 			    do_blend(src, *dst);			 \
   841 			    srcbuf += 4;				 \
   842 			    dst++;					 \
   843 			}						 \
   844 			ofs += run;					 \
   845 		    }							 \
   846 		} while(ofs < w);					 \
   847 		dstbuf += dst->pitch;					 \
   848 	    } while(--linecount);					 \
   849 	} while(0)
   850 
   851         switch (df->BytesPerPixel) {
   852         case 2:
   853             if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0
   854                 || df->Bmask == 0x07e0)
   855                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
   856             else
   857                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
   858             break;
   859         case 4:
   860             RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
   861             break;
   862         }
   863     }
   864 
   865   done:
   866     /* Unlock the destination if necessary */
   867     if (SDL_MUSTLOCK(dst)) {
   868         SDL_UnlockSurface(dst);
   869     }
   870     return 0;
   871 }
   872 
   873 /*
   874  * Auxiliary functions:
   875  * The encoding functions take 32bpp rgb + a, and
   876  * return the number of bytes copied to the destination.
   877  * The decoding functions copy to 32bpp rgb + a, and
   878  * return the number of bytes copied from the source.
   879  * These are only used in the encoder and un-RLE code and are therefore not
   880  * highly optimised.
   881  */
   882 
   883 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
   884 static int
   885 copy_opaque_16(void *dst, Uint32 * src, int n,
   886                SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   887 {
   888     int i;
   889     Uint16 *d = dst;
   890     for (i = 0; i < n; i++) {
   891         unsigned r, g, b;
   892         RGB_FROM_PIXEL(*src, sfmt, r, g, b);
   893         PIXEL_FROM_RGB(*d, dfmt, r, g, b);
   894 #ifdef __NDS__
   895 		*d |= NDS_BIT15;
   896 #endif
   897         src++;
   898         d++;
   899     }
   900     return n * 2;
   901 }
   902 
   903 /* decode opaque pixels from 16bpp to 32bpp rgb + a */
   904 static int
   905 uncopy_opaque_16(Uint32 * dst, void *src, int n,
   906                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   907 {
   908     int i;
   909     Uint16 *s = src;
   910     unsigned alpha = dfmt->Amask ? 255 : 0;
   911     for (i = 0; i < n; i++) {
   912         unsigned r, g, b;
   913         RGB_FROM_PIXEL(*s, sfmt, r, g, b);
   914         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
   915         s++;
   916         dst++;
   917     }
   918     return n * 2;
   919 }
   920 
   921 
   922 
   923 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
   924 static int
   925 copy_transl_565(void *dst, Uint32 * src, int n,
   926                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   927 {
   928     int i;
   929     Uint32 *d = dst;
   930     for (i = 0; i < n; i++) {
   931         unsigned r, g, b, a;
   932         Uint16 pix;
   933         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   934         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   935         *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
   936         src++;
   937         d++;
   938     }
   939     return n * 4;
   940 }
   941 
   942 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
   943 static int
   944 copy_transl_555(void *dst, Uint32 * src, int n,
   945                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   946 {
   947     int i;
   948     Uint32 *d = dst;
   949     for (i = 0; i < n; i++) {
   950         unsigned r, g, b, a;
   951         Uint16 pix;
   952         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   953         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
   954         *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0) | NDS_BIT15;
   955         src++;
   956         d++;
   957     }
   958     return n * 4;
   959 }
   960 
   961 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
   962 static int
   963 uncopy_transl_16(Uint32 * dst, void *src, int n,
   964                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
   965 {
   966     int i;
   967     Uint32 *s = src;
   968     for (i = 0; i < n; i++) {
   969         unsigned r, g, b, a;
   970         Uint32 pix = *s++;
   971         a = (pix & 0x3e0) >> 2;
   972         pix = (pix & ~0x3e0) | pix >> 16;
   973         RGB_FROM_PIXEL(pix, sfmt, r, g, b);
   974         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
   975         dst++;
   976     }
   977     return n * 4;
   978 }
   979 
   980 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   981 static int
   982 copy_32(void *dst, Uint32 * src, int n,
   983         SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
   984 {
   985     int i;
   986     Uint32 *d = dst;
   987     for (i = 0; i < n; i++) {
   988         unsigned r, g, b, a;
   989         Uint32 pixel;
   990         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
   991         PIXEL_FROM_RGB(pixel, dfmt, r, g, b);
   992         *d++ = pixel | a << 24;
   993         src++;
   994     }
   995     return n * 4;
   996 }
   997 
   998 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
   999 static int
  1000 uncopy_32(Uint32 * dst, void *src, int n,
  1001           RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
  1002 {
  1003     int i;
  1004     Uint32 *s = src;
  1005     for (i = 0; i < n; i++) {
  1006         unsigned r, g, b, a;
  1007         Uint32 pixel = *s++;
  1008         RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
  1009         a = pixel >> 24;
  1010         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
  1011         dst++;
  1012     }
  1013     return n * 4;
  1014 }
  1015 
  1016 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
  1017 
  1018 #define ISTRANSL(pixel, fmt)	\
  1019     ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
  1020 
  1021 /* convert surface to be quickly alpha-blittable onto dest, if possible */
  1022 static int
  1023 RLEAlphaSurface(SDL_Surface * surface)
  1024 {
  1025     SDL_Surface *dest;
  1026     SDL_PixelFormat *df;
  1027     int maxsize = 0;
  1028     int max_opaque_run;
  1029     int max_transl_run = 65535;
  1030     unsigned masksum;
  1031     Uint8 *rlebuf, *dst;
  1032     int (*copy_opaque) (void *, Uint32 *, int,
  1033                         SDL_PixelFormat *, SDL_PixelFormat *);
  1034     int (*copy_transl) (void *, Uint32 *, int,
  1035                         SDL_PixelFormat *, SDL_PixelFormat *);
  1036 
  1037     dest = surface->map->dst;
  1038     if (!dest)
  1039         return -1;
  1040     df = dest->format;
  1041     if (surface->format->BitsPerPixel != 32)
  1042         return -1;              /* only 32bpp source supported */
  1043 
  1044     /* find out whether the destination is one we support,
  1045        and determine the max size of the encoded result */
  1046     masksum = df->Rmask | df->Gmask | df->Bmask;
  1047     switch (df->BytesPerPixel) {
  1048     case 2:
  1049         /* 16bpp: only support 565 and 555 formats */
  1050         switch (masksum) {
  1051         case 0xffff:
  1052             if (df->Gmask == 0x07e0
  1053                 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
  1054                 copy_opaque = copy_opaque_16;
  1055                 copy_transl = copy_transl_565;
  1056             } else
  1057                 return -1;
  1058             break;
  1059         case 0x7fff:
  1060             if (df->Gmask == 0x03e0
  1061                 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
  1062                 copy_opaque = copy_opaque_16;
  1063                 copy_transl = copy_transl_555;
  1064             } else
  1065                 return -1;
  1066             break;
  1067         default:
  1068             return -1;
  1069         }
  1070         max_opaque_run = 255;   /* runs stored as bytes */
  1071 
  1072         /* worst case is alternating opaque and translucent pixels,
  1073            with room for alignment padding between lines */
  1074         maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
  1075         break;
  1076     case 4:
  1077         if (masksum != 0x00ffffff)
  1078             return -1;          /* requires unused high byte */
  1079         copy_opaque = copy_32;
  1080         copy_transl = copy_32;
  1081         max_opaque_run = 255;   /* runs stored as short ints */
  1082 
  1083         /* worst case is alternating opaque and translucent pixels */
  1084         maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
  1085         break;
  1086     default:
  1087         return -1;              /* anything else unsupported right now */
  1088     }
  1089 
  1090     maxsize += sizeof(RLEDestFormat);
  1091     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1092     if (!rlebuf) {
  1093         SDL_OutOfMemory();
  1094         return -1;
  1095     }
  1096     {
  1097         /* save the destination format so we can undo the encoding later */
  1098         RLEDestFormat *r = (RLEDestFormat *) rlebuf;
  1099         r->BytesPerPixel = df->BytesPerPixel;
  1100         r->Rloss = df->Rloss;
  1101         r->Gloss = df->Gloss;
  1102         r->Bloss = df->Bloss;
  1103         r->Rshift = df->Rshift;
  1104         r->Gshift = df->Gshift;
  1105         r->Bshift = df->Bshift;
  1106         r->Ashift = df->Ashift;
  1107         r->Rmask = df->Rmask;
  1108         r->Gmask = df->Gmask;
  1109         r->Bmask = df->Bmask;
  1110         r->Amask = df->Amask;
  1111     }
  1112     dst = rlebuf + sizeof(RLEDestFormat);
  1113 
  1114     /* Do the actual encoding */
  1115     {
  1116         int x, y;
  1117         int h = surface->h, w = surface->w;
  1118         SDL_PixelFormat *sf = surface->format;
  1119         Uint32 *src = (Uint32 *) surface->pixels;
  1120         Uint8 *lastline = dst;  /* end of last non-blank line */
  1121 
  1122         /* opaque counts are 8 or 16 bits, depending on target depth */
  1123 #define ADD_OPAQUE_COUNTS(n, m)			\
  1124 	if(df->BytesPerPixel == 4) {		\
  1125 	    ((Uint16 *)dst)[0] = n;		\
  1126 	    ((Uint16 *)dst)[1] = m;		\
  1127 	    dst += 4;				\
  1128 	} else {				\
  1129 	    dst[0] = n;				\
  1130 	    dst[1] = m;				\
  1131 	    dst += 2;				\
  1132 	}
  1133 
  1134         /* translucent counts are always 16 bit */
  1135 #define ADD_TRANSL_COUNTS(n, m)		\
  1136 	(((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
  1137 
  1138         for (y = 0; y < h; y++) {
  1139             int runstart, skipstart;
  1140             int blankline = 0;
  1141             /* First encode all opaque pixels of a scan line */
  1142             x = 0;
  1143             do {
  1144                 int run, skip, len;
  1145                 skipstart = x;
  1146                 while (x < w && !ISOPAQUE(src[x], sf))
  1147                     x++;
  1148                 runstart = x;
  1149                 while (x < w && ISOPAQUE(src[x], sf))
  1150                     x++;
  1151                 skip = runstart - skipstart;
  1152                 if (skip == w)
  1153                     blankline = 1;
  1154                 run = x - runstart;
  1155                 while (skip > max_opaque_run) {
  1156                     ADD_OPAQUE_COUNTS(max_opaque_run, 0);
  1157                     skip -= max_opaque_run;
  1158                 }
  1159                 len = MIN(run, max_opaque_run);
  1160                 ADD_OPAQUE_COUNTS(skip, len);
  1161                 dst += copy_opaque(dst, src + runstart, len, sf, df);
  1162                 runstart += len;
  1163                 run -= len;
  1164                 while (run) {
  1165                     len = MIN(run, max_opaque_run);
  1166                     ADD_OPAQUE_COUNTS(0, len);
  1167                     dst += copy_opaque(dst, src + runstart, len, sf, df);
  1168                     runstart += len;
  1169                     run -= len;
  1170                 }
  1171             } while (x < w);
  1172 
  1173             /* Make sure the next output address is 32-bit aligned */
  1174             dst += (uintptr_t) dst & 2;
  1175 
  1176             /* Next, encode all translucent pixels of the same scan line */
  1177             x = 0;
  1178             do {
  1179                 int run, skip, len;
  1180                 skipstart = x;
  1181                 while (x < w && !ISTRANSL(src[x], sf))
  1182                     x++;
  1183                 runstart = x;
  1184                 while (x < w && ISTRANSL(src[x], sf))
  1185                     x++;
  1186                 skip = runstart - skipstart;
  1187                 blankline &= (skip == w);
  1188                 run = x - runstart;
  1189                 while (skip > max_transl_run) {
  1190                     ADD_TRANSL_COUNTS(max_transl_run, 0);
  1191                     skip -= max_transl_run;
  1192                 }
  1193                 len = MIN(run, max_transl_run);
  1194                 ADD_TRANSL_COUNTS(skip, len);
  1195                 dst += copy_transl(dst, src + runstart, len, sf, df);
  1196                 runstart += len;
  1197                 run -= len;
  1198                 while (run) {
  1199                     len = MIN(run, max_transl_run);
  1200                     ADD_TRANSL_COUNTS(0, len);
  1201                     dst += copy_transl(dst, src + runstart, len, sf, df);
  1202                     runstart += len;
  1203                     run -= len;
  1204                 }
  1205                 if (!blankline)
  1206                     lastline = dst;
  1207             } while (x < w);
  1208 
  1209             src += surface->pitch >> 2;
  1210         }
  1211         dst = lastline;         /* back up past trailing blank lines */
  1212         ADD_OPAQUE_COUNTS(0, 0);
  1213     }
  1214 
  1215 #undef ADD_OPAQUE_COUNTS
  1216 #undef ADD_TRANSL_COUNTS
  1217 
  1218     /* Now that we have it encoded, release the original pixels */
  1219     if (!(surface->flags & SDL_PREALLOC)) {
  1220         SDL_free(surface->pixels);
  1221         surface->pixels = NULL;
  1222     }
  1223 
  1224     /* realloc the buffer to release unused memory */
  1225     {
  1226         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1227         if (!p)
  1228             p = rlebuf;
  1229         surface->map->data = p;
  1230     }
  1231 
  1232     return 0;
  1233 }
  1234 
  1235 static Uint32
  1236 getpix_8(Uint8 * srcbuf)
  1237 {
  1238     return *srcbuf;
  1239 }
  1240 
  1241 static Uint32
  1242 getpix_16(Uint8 * srcbuf)
  1243 {
  1244     return *(Uint16 *) srcbuf;
  1245 }
  1246 
  1247 static Uint32
  1248 getpix_24(Uint8 * srcbuf)
  1249 {
  1250 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  1251     return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
  1252 #else
  1253     return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
  1254 #endif
  1255 }
  1256 
  1257 static Uint32
  1258 getpix_32(Uint8 * srcbuf)
  1259 {
  1260     return *(Uint32 *) srcbuf;
  1261 }
  1262 
  1263 typedef Uint32(*getpix_func) (Uint8 *);
  1264 
  1265 static const getpix_func getpixes[4] = {
  1266     getpix_8, getpix_16, getpix_24, getpix_32
  1267 };
  1268 
  1269 static int
  1270 RLEColorkeySurface(SDL_Surface * surface)
  1271 {
  1272     Uint8 *rlebuf, *dst;
  1273     int maxn;
  1274     int y;
  1275     Uint8 *srcbuf, *curbuf, *lastline;
  1276     int maxsize = 0;
  1277     int skip, run;
  1278     int bpp = surface->format->BytesPerPixel;
  1279     getpix_func getpix;
  1280     Uint32 ckey, rgbmask;
  1281     int w, h;
  1282 
  1283     /* calculate the worst case size for the compressed surface */
  1284     switch (bpp) {
  1285     case 1:
  1286         /* worst case is alternating opaque and transparent pixels,
  1287            starting with an opaque pixel */
  1288         maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
  1289         break;
  1290     case 2:
  1291     case 3:
  1292         /* worst case is solid runs, at most 255 pixels wide */
  1293         maxsize = surface->h * (2 * (surface->w / 255 + 1)
  1294                                 + surface->w * bpp) + 2;
  1295         break;
  1296     case 4:
  1297         /* worst case is solid runs, at most 65535 pixels wide */
  1298         maxsize = surface->h * (4 * (surface->w / 65535 + 1)
  1299                                 + surface->w * 4) + 4;
  1300         break;
  1301     }
  1302 
  1303     rlebuf = (Uint8 *) SDL_malloc(maxsize);
  1304     if (rlebuf == NULL) {
  1305         SDL_OutOfMemory();
  1306         return (-1);
  1307     }
  1308 
  1309     /* Set up the conversion */
  1310     srcbuf = (Uint8 *) surface->pixels;
  1311     curbuf = srcbuf;
  1312     maxn = bpp == 4 ? 65535 : 255;
  1313     skip = run = 0;
  1314     dst = rlebuf;
  1315     rgbmask = ~surface->format->Amask;
  1316     ckey = surface->map->info.colorkey & rgbmask;
  1317     lastline = dst;
  1318     getpix = getpixes[bpp - 1];
  1319     w = surface->w;
  1320     h = surface->h;
  1321 
  1322 #define ADD_COUNTS(n, m)			\
  1323 	if(bpp == 4) {				\
  1324 	    ((Uint16 *)dst)[0] = n;		\
  1325 	    ((Uint16 *)dst)[1] = m;		\
  1326 	    dst += 4;				\
  1327 	} else {				\
  1328 	    dst[0] = n;				\
  1329 	    dst[1] = m;				\
  1330 	    dst += 2;				\
  1331 	}
  1332 
  1333     for (y = 0; y < h; y++) {
  1334         int x = 0;
  1335         int blankline = 0;
  1336         do {
  1337             int run, skip, len;
  1338             int runstart;
  1339             int skipstart = x;
  1340 
  1341             /* find run of transparent, then opaque pixels */
  1342             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
  1343                 x++;
  1344             runstart = x;
  1345             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
  1346                 x++;
  1347             skip = runstart - skipstart;
  1348             if (skip == w)
  1349                 blankline = 1;
  1350             run = x - runstart;
  1351 
  1352             /* encode segment */
  1353             while (skip > maxn) {
  1354                 ADD_COUNTS(maxn, 0);
  1355                 skip -= maxn;
  1356             }
  1357             len = MIN(run, maxn);
  1358             ADD_COUNTS(skip, len);
  1359             SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1360             dst += len * bpp;
  1361             run -= len;
  1362             runstart += len;
  1363             while (run) {
  1364                 len = MIN(run, maxn);
  1365                 ADD_COUNTS(0, len);
  1366                 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
  1367                 dst += len * bpp;
  1368                 runstart += len;
  1369                 run -= len;
  1370             }
  1371             if (!blankline)
  1372                 lastline = dst;
  1373         } while (x < w);
  1374 
  1375         srcbuf += surface->pitch;
  1376     }
  1377     dst = lastline;             /* back up bast trailing blank lines */
  1378     ADD_COUNTS(0, 0);
  1379 
  1380 #undef ADD_COUNTS
  1381 
  1382     /* Now that we have it encoded, release the original pixels */
  1383     if (!(surface->flags & SDL_PREALLOC)) {
  1384         SDL_free(surface->pixels);
  1385         surface->pixels = NULL;
  1386     }
  1387 
  1388     /* realloc the buffer to release unused memory */
  1389     {
  1390         /* If realloc returns NULL, the original block is left intact */
  1391         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
  1392         if (!p)
  1393             p = rlebuf;
  1394         surface->map->data = p;
  1395     }
  1396 
  1397     return (0);
  1398 }
  1399 
  1400 int
  1401 SDL_RLESurface(SDL_Surface * surface)
  1402 {
  1403     int flags;
  1404 
  1405     /* Clear any previous RLE conversion */
  1406     if ((surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL) {
  1407         SDL_UnRLESurface(surface, 1);
  1408     }
  1409 
  1410     /* We don't support RLE encoding of bitmaps */
  1411     if (surface->format->BitsPerPixel < 8) {
  1412         return -1;
  1413     }
  1414 
  1415     /* Make sure the pixels are available */
  1416     if (!surface->pixels) {
  1417         return -1;
  1418     }
  1419 
  1420     /* If we don't have colorkey or blending, nothing to do... */
  1421     flags = surface->map->info.flags;
  1422     if (!(flags & (SDL_COPY_COLORKEY | SDL_COPY_BLEND))) {
  1423         return -1;
  1424     }
  1425 
  1426     /* Pass on combinations not supported */
  1427     if ((flags & SDL_COPY_MODULATE_COLOR) ||
  1428         ((flags & SDL_COPY_MODULATE_ALPHA) && surface->format->Amask) ||
  1429         (flags & (SDL_COPY_ADD | SDL_COPY_MOD)) ||
  1430         (flags & SDL_COPY_NEAREST)) {
  1431         return -1;
  1432     }
  1433 
  1434     /* Encode and set up the blit */
  1435     if (!surface->format->Amask || !(flags & SDL_COPY_BLEND)) {
  1436         if (!surface->map->identity) {
  1437             return -1;
  1438         }
  1439         if (RLEColorkeySurface(surface) < 0) {
  1440             return -1;
  1441         }
  1442         surface->map->blit = SDL_RLEBlit;
  1443         surface->map->info.flags |= SDL_COPY_RLE_COLORKEY;
  1444     } else {
  1445         if (RLEAlphaSurface(surface) < 0) {
  1446             return -1;
  1447         }
  1448         surface->map->blit = SDL_RLEAlphaBlit;
  1449         surface->map->info.flags |= SDL_COPY_RLE_ALPHAKEY;
  1450     }
  1451 
  1452     /* The surface is now accelerated */
  1453     surface->flags |= SDL_RLEACCEL;
  1454 
  1455     return (0);
  1456 }
  1457 
  1458 /*
  1459  * Un-RLE a surface with pixel alpha
  1460  * This may not give back exactly the image before RLE-encoding; all
  1461  * completely transparent pixels will be lost, and colour and alpha depth
  1462  * may have been reduced (when encoding for 16bpp targets).
  1463  */
  1464 static SDL_bool
  1465 UnRLEAlpha(SDL_Surface * surface)
  1466 {
  1467     Uint8 *srcbuf;
  1468     Uint32 *dst;
  1469     SDL_PixelFormat *sf = surface->format;
  1470     RLEDestFormat *df = surface->map->data;
  1471     int (*uncopy_opaque) (Uint32 *, void *, int,
  1472                           RLEDestFormat *, SDL_PixelFormat *);
  1473     int (*uncopy_transl) (Uint32 *, void *, int,
  1474                           RLEDestFormat *, SDL_PixelFormat *);
  1475     int w = surface->w;
  1476     int bpp = df->BytesPerPixel;
  1477 
  1478     if (bpp == 2) {
  1479         uncopy_opaque = uncopy_opaque_16;
  1480         uncopy_transl = uncopy_transl_16;
  1481     } else {
  1482         uncopy_opaque = uncopy_transl = uncopy_32;
  1483     }
  1484 
  1485     surface->pixels = SDL_malloc(surface->h * surface->pitch);
  1486     if (!surface->pixels) {
  1487         return (SDL_FALSE);
  1488     }
  1489     /* fill background with transparent pixels */
  1490     SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
  1491 
  1492     dst = surface->pixels;
  1493     srcbuf = (Uint8 *) (df + 1);
  1494     for (;;) {
  1495         /* copy opaque pixels */
  1496         int ofs = 0;
  1497         do {
  1498             unsigned run;
  1499             if (bpp == 2) {
  1500                 ofs += srcbuf[0];
  1501                 run = srcbuf[1];
  1502                 srcbuf += 2;
  1503             } else {
  1504                 ofs += ((Uint16 *) srcbuf)[0];
  1505                 run = ((Uint16 *) srcbuf)[1];
  1506                 srcbuf += 4;
  1507             }
  1508             if (run) {
  1509                 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
  1510                 ofs += run;
  1511             } else if (!ofs)
  1512                 return (SDL_TRUE);
  1513         } while (ofs < w);
  1514 
  1515         /* skip padding if needed */
  1516         if (bpp == 2)
  1517             srcbuf += (uintptr_t) srcbuf & 2;
  1518 
  1519         /* copy translucent pixels */
  1520         ofs = 0;
  1521         do {
  1522             unsigned run;
  1523             ofs += ((Uint16 *) srcbuf)[0];
  1524             run = ((Uint16 *) srcbuf)[1];
  1525             srcbuf += 4;
  1526             if (run) {
  1527                 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
  1528                 ofs += run;
  1529             }
  1530         } while (ofs < w);
  1531         dst += surface->pitch >> 2;
  1532     }
  1533     /* Make the compiler happy */
  1534     return (SDL_TRUE);
  1535 }
  1536 
  1537 void
  1538 SDL_UnRLESurface(SDL_Surface * surface, int recode)
  1539 {
  1540     if (surface->flags & SDL_RLEACCEL) {
  1541         surface->flags &= ~SDL_RLEACCEL;
  1542 
  1543         if (recode && !(surface->flags & SDL_PREALLOC)) {
  1544             if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
  1545                 SDL_Rect full;
  1546 
  1547                 /* re-create the original surface */
  1548                 surface->pixels = SDL_malloc(surface->h * surface->pitch);
  1549                 if (!surface->pixels) {
  1550                     /* Oh crap... */
  1551                     surface->flags |= SDL_RLEACCEL;
  1552                     return;
  1553                 }
  1554 
  1555                 /* fill it with the background colour */
  1556                 SDL_FillRect(surface, NULL, surface->map->info.colorkey);
  1557 
  1558                 /* now render the encoded surface */
  1559                 full.x = full.y = 0;
  1560                 full.w = surface->w;
  1561                 full.h = surface->h;
  1562                 SDL_RLEBlit(surface, &full, surface, &full);
  1563             } else {
  1564                 if (!UnRLEAlpha(surface)) {
  1565                     /* Oh crap... */
  1566                     surface->flags |= SDL_RLEACCEL;
  1567                     return;
  1568                 }
  1569             }
  1570         }
  1571         surface->map->info.flags &=
  1572             ~(SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY);
  1573 
  1574         if (surface->map->data) {
  1575             SDL_free(surface->map->data);
  1576             surface->map->data = NULL;
  1577         }
  1578     }
  1579 }
  1580 
  1581 /* vi: set ts=4 sw=4 expandtab: */