/* SDL - Simple DirectMedia Layer Copyright (C) 1997-2006 Sam Lantinga This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Sam Lantinga slouken@libsdl.org */ /* * RLE encoding for software colorkey and alpha-channel acceleration * * Original version by Sam Lantinga * * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and * decoder. Added per-surface alpha blitter. Added per-pixel alpha * format, encoder and blitter. * * Many thanks to Xark and johns for hints, benchmarks and useful comments * leading to this code. * * Welcome to Macro Mayhem. */ /* * The encoding translates the image data to a stream of segments of the form * * * * where is the number of transparent pixels to skip, * is the number of opaque pixels to blit, * and are the pixels themselves. * * This basic structure is used both for colorkeyed surfaces, used for simple * binary transparency and for per-surface alpha blending, and for surfaces * with per-pixel alpha. The details differ, however: * * Encoding of colorkeyed surfaces: * * Encoded pixels always have the same format as the target surface. * and are unsigned 8 bit integers, except for 32 bit depth * where they are 16 bit. This makes the pixel data aligned at all times. * Segments never wrap around from one scan line to the next. * * The end of the sequence is marked by a zero , pair at the * * beginning of a line. * * Encoding of surfaces with per-pixel alpha: * * The sequence begins with a struct RLEDestFormat describing the target * pixel format, to provide reliable un-encoding. * * Each scan line is encoded twice: First all completely opaque pixels, * encoded in the target format as described above, and then all * partially transparent (translucent) pixels (where 1 <= alpha <= 254), * in the following 32-bit format: * * For 32-bit targets, each pixel has the target RGB format but with * the alpha value occupying the highest 8 bits. The and * counts are 16 bit. * * For 16-bit targets, each pixel has the target RGB format, but with * the middle component (usually green) shifted 16 steps to the left, * and the hole filled with the 5 most significant bits of the alpha value. * i.e. if the target has the format rrrrrggggggbbbbb, * the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb. * The and counts are 8 bit for the opaque lines, 16 bit * for the translucent lines. Two padding bytes may be inserted * before each translucent line to keep them 32-bit aligned. * * The end of the sequence is marked by a zero , pair at the * beginning of an opaque line. */ #include "SDL_video.h" #include "SDL_sysvideo.h" #include "SDL_blit.h" #include "SDL_RLEaccel_c.h" #if (defined(i386) || defined(__x86_64__)) && defined(__GNUC__) && defined(USE_ASMBLIT) #define MMX_ASMBLIT #endif #ifdef MMX_ASMBLIT #include "mmx.h" #include "SDL_cpuinfo.h" #endif #ifndef MAX #define MAX(a, b) ((a) > (b) ? (a) : (b)) #endif #ifndef MIN #define MIN(a, b) ((a) < (b) ? (a) : (b)) #endif #define PIXEL_COPY(to, from, len, bpp) \ do { \ if(bpp == 4) { \ SDL_memcpy4(to, from, (unsigned)(len)); \ } else { \ SDL_memcpy(to, from, (unsigned)(len) * (bpp)); \ } \ } while(0) /* * Various colorkey blit methods, for opaque and per-surface alpha */ #define OPAQUE_BLIT(to, from, length, bpp, alpha) \ PIXEL_COPY(to, from, length, bpp) #ifdef MMX_ASMBLIT #define ALPHA_BLIT32_888MMX(to, from, length, bpp, alpha) \ do { \ Uint32 *srcp = (Uint32 *)(from); \ Uint32 *dstp = (Uint32 *)(to); \ int i = 0x00FF00FF; \ movd_m2r(*(&i), mm3); \ punpckldq_r2r(mm3, mm3); \ i = 0xFF000000; \ movd_m2r(*(&i), mm7); \ punpckldq_r2r(mm7, mm7); \ i = alpha | alpha << 16; \ movd_m2r(*(&i), mm4); \ punpckldq_r2r(mm4, mm4); \ pcmpeqd_r2r(mm5,mm5); /* set mm5 to "1" */ \ pxor_r2r(mm7, mm5); /* make clear alpha mask */ \ i = length; \ if(i & 1) { \ movd_m2r((*srcp), mm1); /* src -> mm1 */ \ punpcklbw_r2r(mm1, mm1); \ pand_r2r(mm3, mm1); \ movd_m2r((*dstp), mm2); /* dst -> mm2 */ \ punpcklbw_r2r(mm2, mm2); \ pand_r2r(mm3, mm2); \ psubw_r2r(mm2, mm1); \ pmullw_r2r(mm4, mm1); \ psrlw_i2r(8, mm1); \ paddw_r2r(mm1, mm2); \ pand_r2r(mm3, mm2); \ packuswb_r2r(mm2, mm2); \ pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \ movd_r2m(mm2, *dstp); \ ++srcp; \ ++dstp; \ i--; \ } \ for(; i > 0; --i) { \ movq_m2r((*srcp), mm0); \ movq_r2r(mm0, mm1); \ punpcklbw_r2r(mm0, mm0); \ movq_m2r((*dstp), mm2); \ punpckhbw_r2r(mm1, mm1); \ movq_r2r(mm2, mm6); \ pand_r2r(mm3, mm0); \ punpcklbw_r2r(mm2, mm2); \ pand_r2r(mm3, mm1); \ punpckhbw_r2r(mm6, mm6); \ pand_r2r(mm3, mm2); \ psubw_r2r(mm2, mm0); \ pmullw_r2r(mm4, mm0); \ pand_r2r(mm3, mm6); \ psubw_r2r(mm6, mm1); \ pmullw_r2r(mm4, mm1); \ psrlw_i2r(8, mm0); \ paddw_r2r(mm0, mm2); \ psrlw_i2r(8, mm1); \ paddw_r2r(mm1, mm6); \ pand_r2r(mm3, mm2); \ pand_r2r(mm3, mm6); \ packuswb_r2r(mm2, mm2); \ packuswb_r2r(mm6, mm6); \ psrlq_i2r(32, mm2); \ psllq_i2r(32, mm6); \ por_r2r(mm6, mm2); \ pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \ movq_r2m(mm2, *dstp); \ srcp += 2; \ dstp += 2; \ i--; \ } \ emms(); \ } while(0) #define ALPHA_BLIT16_565MMX(to, from, length, bpp, alpha) \ do { \ int i, n = 0; \ Uint16 *srcp = (Uint16 *)(from); \ Uint16 *dstp = (Uint16 *)(to); \ Uint32 ALPHA = 0xF800; \ movd_m2r(*(&ALPHA), mm1); \ punpcklwd_r2r(mm1, mm1); \ punpcklwd_r2r(mm1, mm1); \ ALPHA = 0x07E0; \ movd_m2r(*(&ALPHA), mm4); \ punpcklwd_r2r(mm4, mm4); \ punpcklwd_r2r(mm4, mm4); \ ALPHA = 0x001F; \ movd_m2r(*(&ALPHA), mm7); \ punpcklwd_r2r(mm7, mm7); \ punpcklwd_r2r(mm7, mm7); \ alpha &= ~(1+2+4); \ i = (Uint32)alpha | (Uint32)alpha << 16; \ movd_m2r(*(&i), mm0); \ punpckldq_r2r(mm0, mm0); \ ALPHA = alpha >> 3; \ i = ((int)(length) & 3); \ for(; i > 0; --i) { \ Uint32 s = *srcp++; \ Uint32 d = *dstp; \ s = (s | s << 16) & 0x07e0f81f; \ d = (d | d << 16) & 0x07e0f81f; \ d += (s - d) * ALPHA >> 5; \ d &= 0x07e0f81f; \ *dstp++ = d | d >> 16; \ n++; \ } \ i = (int)(length) - n; \ for(; i > 0; --i) { \ movq_m2r((*dstp), mm3); \ movq_m2r((*srcp), mm2); \ movq_r2r(mm2, mm5); \ pand_r2r(mm1 , mm5); \ psrlq_i2r(11, mm5); \ movq_r2r(mm3, mm6); \ pand_r2r(mm1 , mm6); \ psrlq_i2r(11, mm6); \ psubw_r2r(mm6, mm5); \ pmullw_r2r(mm0, mm5); \ psrlw_i2r(8, mm5); \ paddw_r2r(mm5, mm6); \ psllq_i2r(11, mm6); \ pand_r2r(mm1, mm6); \ movq_r2r(mm4, mm5); \ por_r2r(mm7, mm5); \ pand_r2r(mm5, mm3); \ por_r2r(mm6, mm3); \ movq_r2r(mm2, mm5); \ pand_r2r(mm4 , mm5); \ psrlq_i2r(5, mm5); \ movq_r2r(mm3, mm6); \ pand_r2r(mm4 , mm6); \ psrlq_i2r(5, mm6); \ psubw_r2r(mm6, mm5); \ pmullw_r2r(mm0, mm5); \ psrlw_i2r(8, mm5); \ paddw_r2r(mm5, mm6); \ psllq_i2r(5, mm6); \ pand_r2r(mm4, mm6); \ movq_r2r(mm1, mm5); \ por_r2r(mm7, mm5); \ pand_r2r(mm5, mm3); \ por_r2r(mm6, mm3); \ movq_r2r(mm2, mm5); \ pand_r2r(mm7 , mm5); \ movq_r2r(mm3, mm6); \ pand_r2r(mm7 , mm6); \ psubw_r2r(mm6, mm5); \ pmullw_r2r(mm0, mm5); \ psrlw_i2r(8, mm5); \ paddw_r2r(mm5, mm6); \ pand_r2r(mm7, mm6); \ movq_r2r(mm1, mm5); \ por_r2r(mm4, mm5); \ pand_r2r(mm5, mm3); \ por_r2r(mm6, mm3); \ movq_r2m(mm3, *dstp); \ srcp += 4; \ dstp += 4; \ i -= 3; \ } \ emms(); \ } while(0) #define ALPHA_BLIT16_555MMX(to, from, length, bpp, alpha) \ do { \ int i, n = 0; \ Uint16 *srcp = (Uint16 *)(from); \ Uint16 *dstp = (Uint16 *)(to); \ Uint32 ALPHA = 0x7C00; \ movd_m2r(*(&ALPHA), mm1); \ punpcklwd_r2r(mm1, mm1); \ punpcklwd_r2r(mm1, mm1); \ ALPHA = 0x03E0; \ movd_m2r(*(&ALPHA), mm4); \ punpcklwd_r2r(mm4, mm4); \ punpcklwd_r2r(mm4, mm4); \ ALPHA = 0x001F; \ movd_m2r(*(&ALPHA), mm7); \ punpcklwd_r2r(mm7, mm7); \ punpcklwd_r2r(mm7, mm7); \ alpha &= ~(1+2+4); \ i = (Uint32)alpha | (Uint32)alpha << 16; \ movd_m2r(*(&i), mm0); \ punpckldq_r2r(mm0, mm0); \ i = ((int)(length) & 3); \ ALPHA = alpha >> 3; \ for(; i > 0; --i) { \ Uint32 s = *srcp++; \ Uint32 d = *dstp; \ s = (s | s << 16) & 0x03e07c1f; \ d = (d | d << 16) & 0x03e07c1f; \ d += (s - d) * ALPHA >> 5; \ d &= 0x03e07c1f; \ *dstp++ = d | d >> 16; \ n++; \ } \ i = (int)(length) - n; \ for(; i > 0; --i) { \ movq_m2r((*dstp), mm3); \ movq_m2r((*srcp), mm2); \ movq_r2r(mm2, mm5); \ pand_r2r(mm1 , mm5); \ psrlq_i2r(10, mm5); \ movq_r2r(mm3, mm6); \ pand_r2r(mm1 , mm6); \ psrlq_i2r(10, mm6); \ psubw_r2r(mm6, mm5); \ pmullw_r2r(mm0, mm5); \ psrlw_i2r(8, mm5); \ paddw_r2r(mm5, mm6); \ psllq_i2r(10, mm6); \ pand_r2r(mm1, mm6); \ movq_r2r(mm4, mm5); \ por_r2r(mm7, mm5); \ pand_r2r(mm5, mm3); \ por_r2r(mm6, mm3); \ movq_r2r(mm2, mm5); \ pand_r2r(mm4 , mm5); \ psrlq_i2r(5, mm5); \ movq_r2r(mm3, mm6); \ pand_r2r(mm4 , mm6); \ psrlq_i2r(5, mm6); \ psubw_r2r(mm6, mm5); \ pmullw_r2r(mm0, mm5); \ psrlw_i2r(8, mm5); \ paddw_r2r(mm5, mm6); \ psllq_i2r(5, mm6); \ pand_r2r(mm4, mm6); \ movq_r2r(mm1, mm5); \ por_r2r(mm7, mm5); \ pand_r2r(mm5, mm3); \ por_r2r(mm6, mm3); \ movq_r2r(mm2, mm5); \ pand_r2r(mm7 , mm5); \ movq_r2r(mm3, mm6); \ pand_r2r(mm7 , mm6); \ psubw_r2r(mm6, mm5); \ pmullw_r2r(mm0, mm5); \ psrlw_i2r(8, mm5); \ paddw_r2r(mm5, mm6); \ pand_r2r(mm7, mm6); \ movq_r2r(mm1, mm5); \ por_r2r(mm4, mm5); \ pand_r2r(mm5, mm3); \ por_r2r(mm6, mm3); \ movq_r2m(mm3, *dstp); \ srcp += 4; \ dstp += 4; \ i -= 3; \ } \ emms(); \ } while(0) #endif /* * For 32bpp pixels on the form 0x00rrggbb: * If we treat the middle component separately, we can process the two * remaining in parallel. This is safe to do because of the gap to the left * of each component, so the bits from the multiplication don't collide. * This can be used for any RGB permutation of course. */ #define ALPHA_BLIT32_888(to, from, length, bpp, alpha) \ do { \ int i; \ Uint32 *src = (Uint32 *)(from); \ Uint32 *dst = (Uint32 *)(to); \ for(i = 0; i < (int)(length); i++) { \ Uint32 s = *src++; \ Uint32 d = *dst; \ Uint32 s1 = s & 0xff00ff; \ Uint32 d1 = d & 0xff00ff; \ d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ s &= 0xff00; \ d &= 0xff00; \ d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ *dst++ = d1 | d; \ } \ } while(0) /* * For 16bpp pixels we can go a step further: put the middle component * in the high 16 bits of a 32 bit word, and process all three RGB * components at the same time. Since the smallest gap is here just * 5 bits, we have to scale alpha down to 5 bits as well. */ #define ALPHA_BLIT16_565(to, from, length, bpp, alpha) \ do { \ int i; \ Uint16 *src = (Uint16 *)(from); \ Uint16 *dst = (Uint16 *)(to); \ Uint32 ALPHA = alpha >> 3; \ for(i = 0; i < (int)(length); i++) { \ Uint32 s = *src++; \ Uint32 d = *dst; \ s = (s | s << 16) & 0x07e0f81f; \ d = (d | d << 16) & 0x07e0f81f; \ d += (s - d) * ALPHA >> 5; \ d &= 0x07e0f81f; \ *dst++ = d | d >> 16; \ } \ } while(0) #define ALPHA_BLIT16_555(to, from, length, bpp, alpha) \ do { \ int i; \ Uint16 *src = (Uint16 *)(from); \ Uint16 *dst = (Uint16 *)(to); \ Uint32 ALPHA = alpha >> 3; \ for(i = 0; i < (int)(length); i++) { \ Uint32 s = *src++; \ Uint32 d = *dst; \ s = (s | s << 16) & 0x03e07c1f; \ d = (d | d << 16) & 0x03e07c1f; \ d += (s - d) * ALPHA >> 5; \ d &= 0x03e07c1f; \ *dst++ = d | d >> 16; \ } \ } while(0) /* * The general slow catch-all function, for remaining depths and formats */ #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha) \ do { \ int i; \ Uint8 *src = from; \ Uint8 *dst = to; \ for(i = 0; i < (int)(length); i++) { \ Uint32 s, d; \ unsigned rs, gs, bs, rd, gd, bd; \ switch(bpp) { \ case 2: \ s = *(Uint16 *)src; \ d = *(Uint16 *)dst; \ break; \ case 3: \ if(SDL_BYTEORDER == SDL_BIG_ENDIAN) { \ s = (src[0] << 16) | (src[1] << 8) | src[2]; \ d = (dst[0] << 16) | (dst[1] << 8) | dst[2]; \ } else { \ s = (src[2] << 16) | (src[1] << 8) | src[0]; \ d = (dst[2] << 16) | (dst[1] << 8) | dst[0]; \ } \ break; \ case 4: \ s = *(Uint32 *)src; \ d = *(Uint32 *)dst; \ break; \ } \ RGB_FROM_PIXEL(s, fmt, rs, gs, bs); \ RGB_FROM_PIXEL(d, fmt, rd, gd, bd); \ rd += (rs - rd) * alpha >> 8; \ gd += (gs - gd) * alpha >> 8; \ bd += (bs - bd) * alpha >> 8; \ PIXEL_FROM_RGB(d, fmt, rd, gd, bd); \ switch(bpp) { \ case 2: \ *(Uint16 *)dst = d; \ break; \ case 3: \ if(SDL_BYTEORDER == SDL_BIG_ENDIAN) { \ dst[0] = d >> 16; \ dst[1] = d >> 8; \ dst[2] = d; \ } else { \ dst[0] = d; \ dst[1] = d >> 8; \ dst[2] = d >> 16; \ } \ break; \ case 4: \ *(Uint32 *)dst = d; \ break; \ } \ src += bpp; \ dst += bpp; \ } \ } while(0) #ifdef MMX_ASMBLIT #define ALPHA_BLIT32_888_50MMX(to, from, length, bpp, alpha) \ do { \ Uint32 *srcp = (Uint32 *)(from); \ Uint32 *dstp = (Uint32 *)(to); \ int i = 0x00fefefe; \ movd_m2r(*(&i), mm4); \ punpckldq_r2r(mm4, mm4); \ i = 0x00010101; \ movd_m2r(*(&i), mm3); \ punpckldq_r2r(mm3, mm3); \ i = (int)(length); \ if( i & 1 ) { \ Uint32 s = *srcp++; \ Uint32 d = *dstp; \ *dstp++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \ + (s & d & 0x00010101); \ i--; \ } \ for(; i > 0; --i) { \ movq_m2r((*dstp), mm2); /* dst -> mm2 */ \ movq_r2r(mm2, mm6); /* dst -> mm6 */ \ movq_m2r((*srcp), mm1); /* src -> mm1 */ \ movq_r2r(mm1, mm5); /* src -> mm5 */ \ pand_r2r(mm4, mm6); /* dst & 0x00fefefe -> mm6 */ \ pand_r2r(mm4, mm5); /* src & 0x00fefefe -> mm5 */ \ paddd_r2r(mm6, mm5); /* (dst & 0x00fefefe) + (dst & 0x00fefefe) -> mm5 */ \ psrld_i2r(1, mm5); \ pand_r2r(mm1, mm2); /* s & d -> mm2 */ \ pand_r2r(mm3, mm2); /* s & d & 0x00010101 -> mm2 */ \ paddd_r2r(mm5, mm2); \ movq_r2m(mm2, (*dstp)); \ dstp += 2; \ srcp += 2; \ i--; \ } \ emms(); \ } while(0) #endif /* * Special case: 50% alpha (alpha=128) * This is treated specially because it can be optimized very well, and * since it is good for many cases of semi-translucency. * The theory is to do all three components at the same time: * First zero the lowest bit of each component, which gives us room to * add them. Then shift right and add the sum of the lowest bits. */ #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha) \ do { \ int i; \ Uint32 *src = (Uint32 *)(from); \ Uint32 *dst = (Uint32 *)(to); \ for(i = 0; i < (int)(length); i++) { \ Uint32 s = *src++; \ Uint32 d = *dst; \ *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \ + (s & d & 0x00010101); \ } \ } while(0) /* * For 16bpp, we can actually blend two pixels in parallel, if we take * care to shift before we add, not after. */ /* helper: blend a single 16 bit pixel at 50% */ #define BLEND16_50(dst, src, mask) \ do { \ Uint32 s = *src++; \ Uint32 d = *dst; \ *dst++ = (((s & mask) + (d & mask)) >> 1) \ + (s & d & (~mask & 0xffff)); \ } while(0) /* basic 16bpp blender. mask is the pixels to keep when adding. */ #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask) \ do { \ unsigned n = (length); \ Uint16 *src = (Uint16 *)(from); \ Uint16 *dst = (Uint16 *)(to); \ if(((unsigned long)src ^ (unsigned long)dst) & 3) { \ /* source and destination not in phase, blit one by one */ \ while(n--) \ BLEND16_50(dst, src, mask); \ } else { \ if((unsigned long)src & 3) { \ /* first odd pixel */ \ BLEND16_50(dst, src, mask); \ n--; \ } \ for(; n > 1; n -= 2) { \ Uint32 s = *(Uint32 *)src; \ Uint32 d = *(Uint32 *)dst; \ *(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1) \ + ((d & (mask | mask << 16)) >> 1) \ + (s & d & (~(mask | mask << 16))); \ src += 2; \ dst += 2; \ } \ if(n) \ BLEND16_50(dst, src, mask); /* last odd pixel */ \ } \ } while(0) #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha) \ ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de) #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha) \ ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde) #ifdef MMX_ASMBLIT #define CHOOSE_BLIT(blitter, alpha, fmt) \ do { \ if(alpha == 255) { \ switch(fmt->BytesPerPixel) { \ case 1: blitter(1, Uint8, OPAQUE_BLIT); break; \ case 2: blitter(2, Uint8, OPAQUE_BLIT); break; \ case 3: blitter(3, Uint8, OPAQUE_BLIT); break; \ case 4: blitter(4, Uint16, OPAQUE_BLIT); break; \ } \ } else { \ switch(fmt->BytesPerPixel) { \ case 1: \ /* No 8bpp alpha blitting */ \ break; \ \ case 2: \ switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) { \ case 0xffff: \ if(fmt->Gmask == 0x07e0 \ || fmt->Rmask == 0x07e0 \ || fmt->Bmask == 0x07e0) { \ if(alpha == 128) \ blitter(2, Uint8, ALPHA_BLIT16_565_50); \ else { \ if(SDL_HasMMX()) \ blitter(2, Uint8, ALPHA_BLIT16_565MMX); \ else \ blitter(2, Uint8, ALPHA_BLIT16_565); \ } \ } else \ goto general16; \ break; \ \ case 0x7fff: \ if(fmt->Gmask == 0x03e0 \ || fmt->Rmask == 0x03e0 \ || fmt->Bmask == 0x03e0) { \ if(alpha == 128) \ blitter(2, Uint8, ALPHA_BLIT16_555_50); \ else { \ if(SDL_HasMMX()) \ blitter(2, Uint8, ALPHA_BLIT16_555MMX); \ else \ blitter(2, Uint8, ALPHA_BLIT16_555); \ } \ break; \ } \ /* fallthrough */ \ \ default: \ general16: \ blitter(2, Uint8, ALPHA_BLIT_ANY); \ } \ break; \ \ case 3: \ blitter(3, Uint8, ALPHA_BLIT_ANY); \ break; \ \ case 4: \ if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \ && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \ || fmt->Bmask == 0xff00)) { \ if(alpha == 128) \ { \ if(SDL_HasMMX()) \ blitter(4, Uint16, ALPHA_BLIT32_888_50MMX);\ else \ blitter(4, Uint16, ALPHA_BLIT32_888_50);\ } \ else \ { \ if(SDL_HasMMX()) \ blitter(4, Uint16, ALPHA_BLIT32_888MMX);\ else \ blitter(4, Uint16, ALPHA_BLIT32_888); \ } \ } else \ blitter(4, Uint16, ALPHA_BLIT_ANY); \ break; \ } \ } \ } while(0) #else #define CHOOSE_BLIT(blitter, alpha, fmt) \ do { \ if(alpha == 255) { \ switch(fmt->BytesPerPixel) { \ case 1: blitter(1, Uint8, OPAQUE_BLIT); break; \ case 2: blitter(2, Uint8, OPAQUE_BLIT); break; \ case 3: blitter(3, Uint8, OPAQUE_BLIT); break; \ case 4: blitter(4, Uint16, OPAQUE_BLIT); break; \ } \ } else { \ switch(fmt->BytesPerPixel) { \ case 1: \ /* No 8bpp alpha blitting */ \ break; \ \ case 2: \ switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) { \ case 0xffff: \ if(fmt->Gmask == 0x07e0 \ || fmt->Rmask == 0x07e0 \ || fmt->Bmask == 0x07e0) { \ if(alpha == 128) \ blitter(2, Uint8, ALPHA_BLIT16_565_50); \ else { \ blitter(2, Uint8, ALPHA_BLIT16_565); \ } \ } else \ goto general16; \ break; \ \ case 0x7fff: \ if(fmt->Gmask == 0x03e0 \ || fmt->Rmask == 0x03e0 \ || fmt->Bmask == 0x03e0) { \ if(alpha == 128) \ blitter(2, Uint8, ALPHA_BLIT16_555_50); \ else { \ blitter(2, Uint8, ALPHA_BLIT16_555); \ } \ break; \ } \ /* fallthrough */ \ \ default: \ general16: \ blitter(2, Uint8, ALPHA_BLIT_ANY); \ } \ break; \ \ case 3: \ blitter(3, Uint8, ALPHA_BLIT_ANY); \ break; \ \ case 4: \ if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \ && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \ || fmt->Bmask == 0xff00)) { \ if(alpha == 128) \ blitter(4, Uint16, ALPHA_BLIT32_888_50); \ else \ blitter(4, Uint16, ALPHA_BLIT32_888); \ } else \ blitter(4, Uint16, ALPHA_BLIT_ANY); \ break; \ } \ } \ } while(0) #endif /* * This takes care of the case when the surface is clipped on the left and/or * right. Top clipping has already been taken care of. */ static void RLEClipBlit(int w, Uint8 *srcbuf, SDL_Surface *dst, Uint8 *dstbuf, SDL_Rect *srcrect, unsigned alpha) { SDL_PixelFormat *fmt = dst->format; #define RLECLIPBLIT(bpp, Type, do_blit) \ do { \ int linecount = srcrect->h; \ int ofs = 0; \ int left = srcrect->x; \ int right = left + srcrect->w; \ dstbuf -= left * bpp; \ for(;;) { \ int run; \ ofs += *(Type *)srcbuf; \ run = ((Type *)srcbuf)[1]; \ srcbuf += 2 * sizeof(Type); \ if(run) { \ /* clip to left and right borders */ \ if(ofs < right) { \ int start = 0; \ int len = run; \ int startcol; \ if(left - ofs > 0) { \ start = left - ofs; \ len -= start; \ if(len <= 0) \ goto nocopy ## bpp ## do_blit; \ } \ startcol = ofs + start; \ if(len > right - startcol) \ len = right - startcol; \ do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \ len, bpp, alpha); \ } \ nocopy ## bpp ## do_blit: \ srcbuf += run * bpp; \ ofs += run; \ } else if(!ofs) \ break; \ if(ofs == w) { \ ofs = 0; \ dstbuf += dst->pitch; \ if(!--linecount) \ break; \ } \ } \ } while(0) CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt); #undef RLECLIPBLIT } /* blit a colorkeyed RLE surface */ int SDL_RLEBlit(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, SDL_Rect *dstrect) { Uint8 *dstbuf; Uint8 *srcbuf; int x, y; int w = src->w; unsigned alpha; /* Lock the destination if necessary */ if ( SDL_MUSTLOCK(dst) ) { if ( SDL_LockSurface(dst) < 0 ) { return(-1); } } /* Set up the source and destination pointers */ x = dstrect->x; y = dstrect->y; dstbuf = (Uint8 *)dst->pixels + y * dst->pitch + x * src->format->BytesPerPixel; srcbuf = (Uint8 *)src->map->sw_data->aux_data; { /* skip lines at the top if neccessary */ int vskip = srcrect->y; int ofs = 0; if(vskip) { #define RLESKIP(bpp, Type) \ for(;;) { \ int run; \ ofs += *(Type *)srcbuf; \ run = ((Type *)srcbuf)[1]; \ srcbuf += sizeof(Type) * 2; \ if(run) { \ srcbuf += run * bpp; \ ofs += run; \ } else if(!ofs) \ goto done; \ if(ofs == w) { \ ofs = 0; \ if(!--vskip) \ break; \ } \ } switch(src->format->BytesPerPixel) { case 1: RLESKIP(1, Uint8); break; case 2: RLESKIP(2, Uint8); break; case 3: RLESKIP(3, Uint8); break; case 4: RLESKIP(4, Uint16); break; } #undef RLESKIP } } alpha = (src->flags & SDL_SRCALPHA) == SDL_SRCALPHA ? src->format->alpha : 255; /* if left or right edge clipping needed, call clip blit */ if ( srcrect->x || srcrect->w != src->w ) { RLEClipBlit(w, srcbuf, dst, dstbuf, srcrect, alpha); } else { SDL_PixelFormat *fmt = src->format; #define RLEBLIT(bpp, Type, do_blit) \ do { \ int linecount = srcrect->h; \ int ofs = 0; \ for(;;) { \ unsigned run; \ ofs += *(Type *)srcbuf; \ run = ((Type *)srcbuf)[1]; \ srcbuf += 2 * sizeof(Type); \ if(run) { \ do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \ srcbuf += run * bpp; \ ofs += run; \ } else if(!ofs) \ break; \ if(ofs == w) { \ ofs = 0; \ dstbuf += dst->pitch; \ if(!--linecount) \ break; \ } \ } \ } while(0) CHOOSE_BLIT(RLEBLIT, alpha, fmt); #undef RLEBLIT } done: /* Unlock the destination if necessary */ if ( SDL_MUSTLOCK(dst) ) { SDL_UnlockSurface(dst); } return(0); } #undef OPAQUE_BLIT /* * Per-pixel blitting macros for translucent pixels: * These use the same techniques as the per-surface blitting macros */ /* * For 32bpp pixels, we have made sure the alpha is stored in the top * 8 bits, so proceed as usual */ #define BLIT_TRANSL_888(src, dst) \ do { \ Uint32 s = src; \ Uint32 d = dst; \ unsigned alpha = s >> 24; \ Uint32 s1 = s & 0xff00ff; \ Uint32 d1 = d & 0xff00ff; \ d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ s &= 0xff00; \ d &= 0xff00; \ d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ dst = d1 | d; \ } while(0) /* * For 16bpp pixels, we have stored the 5 most significant alpha bits in * bits 5-10. As before, we can process all 3 RGB components at the same time. */ #define BLIT_TRANSL_565(src, dst) \ do { \ Uint32 s = src; \ Uint32 d = dst; \ unsigned alpha = (s & 0x3e0) >> 5; \ s &= 0x07e0f81f; \ d = (d | d << 16) & 0x07e0f81f; \ d += (s - d) * alpha >> 5; \ d &= 0x07e0f81f; \ dst = d | d >> 16; \ } while(0) #define BLIT_TRANSL_555(src, dst) \ do { \ Uint32 s = src; \ Uint32 d = dst; \ unsigned alpha = (s & 0x3e0) >> 5; \ s &= 0x03e07c1f; \ d = (d | d << 16) & 0x03e07c1f; \ d += (s - d) * alpha >> 5; \ d &= 0x03e07c1f; \ dst = d | d >> 16; \ } while(0) /* used to save the destination format in the encoding. Designed to be macro-compatible with SDL_PixelFormat but without the unneeded fields */ typedef struct { Uint8 BytesPerPixel; Uint8 Rloss; Uint8 Gloss; Uint8 Bloss; Uint8 Rshift; Uint8 Gshift; Uint8 Bshift; Uint8 Ashift; Uint32 Rmask; Uint32 Gmask; Uint32 Bmask; Uint32 Amask; } RLEDestFormat; /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */ static void RLEAlphaClipBlit(int w, Uint8 *srcbuf, SDL_Surface *dst, Uint8 *dstbuf, SDL_Rect *srcrect) { SDL_PixelFormat *df = dst->format; /* * clipped blitter: Ptype is the destination pixel type, * Ctype the translucent count type, and do_blend the macro * to blend one pixel. */ #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend) \ do { \ int linecount = srcrect->h; \ int left = srcrect->x; \ int right = left + srcrect->w; \ dstbuf -= left * sizeof(Ptype); \ do { \ int ofs = 0; \ /* blit opaque pixels on one line */ \ do { \ unsigned run; \ ofs += ((Ctype *)srcbuf)[0]; \ run = ((Ctype *)srcbuf)[1]; \ srcbuf += 2 * sizeof(Ctype); \ if(run) { \ /* clip to left and right borders */ \ int cofs = ofs; \ int crun = run; \ if(left - cofs > 0) { \ crun -= left - cofs; \ cofs = left; \ } \ if(crun > right - cofs) \ crun = right - cofs; \ if(crun > 0) \ PIXEL_COPY(dstbuf + cofs * sizeof(Ptype), \ srcbuf + (cofs - ofs) * sizeof(Ptype), \ (unsigned)crun, sizeof(Ptype)); \ srcbuf += run * sizeof(Ptype); \ ofs += run; \ } else if(!ofs) \ return; \ } while(ofs < w); \ /* skip padding if necessary */ \ if(sizeof(Ptype) == 2) \ srcbuf += (unsigned long)srcbuf & 2; \ /* blit translucent pixels on the same line */ \ ofs = 0; \ do { \ unsigned run; \ ofs += ((Uint16 *)srcbuf)[0]; \ run = ((Uint16 *)srcbuf)[1]; \ srcbuf += 4; \ if(run) { \ /* clip to left and right borders */ \ int cofs = ofs; \ int crun = run; \ if(left - cofs > 0) { \ crun -= left - cofs; \ cofs = left; \ } \ if(crun > right - cofs) \ crun = right - cofs; \ if(crun > 0) { \ Ptype *dst = (Ptype *)dstbuf + cofs; \ Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs); \ int i; \ for(i = 0; i < crun; i++) \ do_blend(src[i], dst[i]); \ } \ srcbuf += run * 4; \ ofs += run; \ } \ } while(ofs < w); \ dstbuf += dst->pitch; \ } while(--linecount); \ } while(0) switch(df->BytesPerPixel) { case 2: if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565); else RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555); break; case 4: RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888); break; } } /* blit a pixel-alpha RLE surface */ int SDL_RLEAlphaBlit(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, SDL_Rect *dstrect) { int x, y; int w = src->w; Uint8 *srcbuf, *dstbuf; SDL_PixelFormat *df = dst->format; /* Lock the destination if necessary */ if ( SDL_MUSTLOCK(dst) ) { if ( SDL_LockSurface(dst) < 0 ) { return -1; } } x = dstrect->x; y = dstrect->y; dstbuf = (Uint8 *)dst->pixels + y * dst->pitch + x * df->BytesPerPixel; srcbuf = (Uint8 *)src->map->sw_data->aux_data + sizeof(RLEDestFormat); { /* skip lines at the top if necessary */ int vskip = srcrect->y; if(vskip) { int ofs; if(df->BytesPerPixel == 2) { /* the 16/32 interleaved format */ do { /* skip opaque line */ ofs = 0; do { int run; ofs += srcbuf[0]; run = srcbuf[1]; srcbuf += 2; if(run) { srcbuf += 2 * run; ofs += run; } else if(!ofs) goto done; } while(ofs < w); /* skip padding */ srcbuf += (unsigned long)srcbuf & 2; /* skip translucent line */ ofs = 0; do { int run; ofs += ((Uint16 *)srcbuf)[0]; run = ((Uint16 *)srcbuf)[1]; srcbuf += 4 * (run + 1); ofs += run; } while(ofs < w); } while(--vskip); } else { /* the 32/32 interleaved format */ vskip <<= 1; /* opaque and translucent have same format */ do { ofs = 0; do { int run; ofs += ((Uint16 *)srcbuf)[0]; run = ((Uint16 *)srcbuf)[1]; srcbuf += 4; if(run) { srcbuf += 4 * run; ofs += run; } else if(!ofs) goto done; } while(ofs < w); } while(--vskip); } } } /* if left or right edge clipping needed, call clip blit */ if(srcrect->x || srcrect->w != src->w) { RLEAlphaClipBlit(w, srcbuf, dst, dstbuf, srcrect); } else { /* * non-clipped blitter. Ptype is the destination pixel type, * Ctype the translucent count type, and do_blend the * macro to blend one pixel. */ #define RLEALPHABLIT(Ptype, Ctype, do_blend) \ do { \ int linecount = srcrect->h; \ do { \ int ofs = 0; \ /* blit opaque pixels on one line */ \ do { \ unsigned run; \ ofs += ((Ctype *)srcbuf)[0]; \ run = ((Ctype *)srcbuf)[1]; \ srcbuf += 2 * sizeof(Ctype); \ if(run) { \ PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \ run, sizeof(Ptype)); \ srcbuf += run * sizeof(Ptype); \ ofs += run; \ } else if(!ofs) \ goto done; \ } while(ofs < w); \ /* skip padding if necessary */ \ if(sizeof(Ptype) == 2) \ srcbuf += (unsigned long)srcbuf & 2; \ /* blit translucent pixels on the same line */ \ ofs = 0; \ do { \ unsigned run; \ ofs += ((Uint16 *)srcbuf)[0]; \ run = ((Uint16 *)srcbuf)[1]; \ srcbuf += 4; \ if(run) { \ Ptype *dst = (Ptype *)dstbuf + ofs; \ unsigned i; \ for(i = 0; i < run; i++) { \ Uint32 src = *(Uint32 *)srcbuf; \ do_blend(src, *dst); \ srcbuf += 4; \ dst++; \ } \ ofs += run; \ } \ } while(ofs < w); \ dstbuf += dst->pitch; \ } while(--linecount); \ } while(0) switch(df->BytesPerPixel) { case 2: if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565); else RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555); break; case 4: RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888); break; } } done: /* Unlock the destination if necessary */ if ( SDL_MUSTLOCK(dst) ) { SDL_UnlockSurface(dst); } return 0; } /* * Auxiliary functions: * The encoding functions take 32bpp rgb + a, and * return the number of bytes copied to the destination. * The decoding functions copy to 32bpp rgb + a, and * return the number of bytes copied from the source. * These are only used in the encoder and un-RLE code and are therefore not * highly optimised. */ /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */ static int copy_opaque_16(void *dst, Uint32 *src, int n, SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) { int i; Uint16 *d = dst; for(i = 0; i < n; i++) { unsigned r, g, b; RGB_FROM_PIXEL(*src, sfmt, r, g, b); PIXEL_FROM_RGB(*d, dfmt, r, g, b); src++; d++; } return n * 2; } /* decode opaque pixels from 16bpp to 32bpp rgb + a */ static int uncopy_opaque_16(Uint32 *dst, void *src, int n, RLEDestFormat *sfmt, SDL_PixelFormat *dfmt) { int i; Uint16 *s = src; unsigned alpha = dfmt->Amask ? 255 : 0; for(i = 0; i < n; i++) { unsigned r, g, b; RGB_FROM_PIXEL(*s, sfmt, r, g, b); PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha); s++; dst++; } return n * 2; } /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */ static int copy_transl_565(void *dst, Uint32 *src, int n, SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) { int i; Uint32 *d = dst; for(i = 0; i < n; i++) { unsigned r, g, b, a; Uint16 pix; RGBA_FROM_8888(*src, sfmt, r, g, b, a); PIXEL_FROM_RGB(pix, dfmt, r, g, b); *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0); src++; d++; } return n * 4; } /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */ static int copy_transl_555(void *dst, Uint32 *src, int n, SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) { int i; Uint32 *d = dst; for(i = 0; i < n; i++) { unsigned r, g, b, a; Uint16 pix; RGBA_FROM_8888(*src, sfmt, r, g, b, a); PIXEL_FROM_RGB(pix, dfmt, r, g, b); *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0); src++; d++; } return n * 4; } /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */ static int uncopy_transl_16(Uint32 *dst, void *src, int n, RLEDestFormat *sfmt, SDL_PixelFormat *dfmt) { int i; Uint32 *s = src; for(i = 0; i < n; i++) { unsigned r, g, b, a; Uint32 pix = *s++; a = (pix & 0x3e0) >> 2; pix = (pix & ~0x3e0) | pix >> 16; RGB_FROM_PIXEL(pix, sfmt, r, g, b); PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a); dst++; } return n * 4; } /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */ static int copy_32(void *dst, Uint32 *src, int n, SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) { int i; Uint32 *d = dst; for(i = 0; i < n; i++) { unsigned r, g, b, a; Uint32 pixel; RGBA_FROM_8888(*src, sfmt, r, g, b, a); PIXEL_FROM_RGB(pixel, dfmt, r, g, b); *d++ = pixel | a << 24; src++; } return n * 4; } /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */ static int uncopy_32(Uint32 *dst, void *src, int n, RLEDestFormat *sfmt, SDL_PixelFormat *dfmt) { int i; Uint32 *s = src; for(i = 0; i < n; i++) { unsigned r, g, b, a; Uint32 pixel = *s++; RGB_FROM_PIXEL(pixel, sfmt, r, g, b); a = pixel >> 24; PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a); dst++; } return n * 4; } #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255) #define ISTRANSL(pixel, fmt) \ ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U) /* convert surface to be quickly alpha-blittable onto dest, if possible */ static int RLEAlphaSurface(SDL_Surface *surface) { SDL_Surface *dest; SDL_PixelFormat *df; int maxsize = 0; int max_opaque_run; int max_transl_run = 65535; unsigned masksum; Uint8 *rlebuf, *dst; int (*copy_opaque)(void *, Uint32 *, int, SDL_PixelFormat *, SDL_PixelFormat *); int (*copy_transl)(void *, Uint32 *, int, SDL_PixelFormat *, SDL_PixelFormat *); dest = surface->map->dst; if(!dest) return -1; df = dest->format; if(surface->format->BitsPerPixel != 32) return -1; /* only 32bpp source supported */ /* find out whether the destination is one we support, and determine the max size of the encoded result */ masksum = df->Rmask | df->Gmask | df->Bmask; switch(df->BytesPerPixel) { case 2: /* 16bpp: only support 565 and 555 formats */ switch(masksum) { case 0xffff: if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) { copy_opaque = copy_opaque_16; copy_transl = copy_transl_565; } else return -1; break; case 0x7fff: if(df->Gmask == 0x03e0 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) { copy_opaque = copy_opaque_16; copy_transl = copy_transl_555; } else return -1; break; default: return -1; } max_opaque_run = 255; /* runs stored as bytes */ /* worst case is alternating opaque and translucent pixels, with room for alignment padding between lines */ maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2; break; case 4: if(masksum != 0x00ffffff) return -1; /* requires unused high byte */ copy_opaque = copy_32; copy_transl = copy_32; max_opaque_run = 255; /* runs stored as short ints */ /* worst case is alternating opaque and translucent pixels */ maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4; break; default: return -1; /* anything else unsupported right now */ } maxsize += sizeof(RLEDestFormat); rlebuf = (Uint8 *)SDL_malloc(maxsize); if(!rlebuf) { SDL_OutOfMemory(); return -1; } { /* save the destination format so we can undo the encoding later */ RLEDestFormat *r = (RLEDestFormat *)rlebuf; r->BytesPerPixel = df->BytesPerPixel; r->Rloss = df->Rloss; r->Gloss = df->Gloss; r->Bloss = df->Bloss; r->Rshift = df->Rshift; r->Gshift = df->Gshift; r->Bshift = df->Bshift; r->Ashift = df->Ashift; r->Rmask = df->Rmask; r->Gmask = df->Gmask; r->Bmask = df->Bmask; r->Amask = df->Amask; } dst = rlebuf + sizeof(RLEDestFormat); /* Do the actual encoding */ { int x, y; int h = surface->h, w = surface->w; SDL_PixelFormat *sf = surface->format; Uint32 *src = (Uint32 *)surface->pixels; Uint8 *lastline = dst; /* end of last non-blank line */ /* opaque counts are 8 or 16 bits, depending on target depth */ #define ADD_OPAQUE_COUNTS(n, m) \ if(df->BytesPerPixel == 4) { \ ((Uint16 *)dst)[0] = n; \ ((Uint16 *)dst)[1] = m; \ dst += 4; \ } else { \ dst[0] = n; \ dst[1] = m; \ dst += 2; \ } /* translucent counts are always 16 bit */ #define ADD_TRANSL_COUNTS(n, m) \ (((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4) for(y = 0; y < h; y++) { int runstart, skipstart; int blankline = 0; /* First encode all opaque pixels of a scan line */ x = 0; do { int run, skip, len; skipstart = x; while(x < w && !ISOPAQUE(src[x], sf)) x++; runstart = x; while(x < w && ISOPAQUE(src[x], sf)) x++; skip = runstart - skipstart; if(skip == w) blankline = 1; run = x - runstart; while(skip > max_opaque_run) { ADD_OPAQUE_COUNTS(max_opaque_run, 0); skip -= max_opaque_run; } len = MIN(run, max_opaque_run); ADD_OPAQUE_COUNTS(skip, len); dst += copy_opaque(dst, src + runstart, len, sf, df); runstart += len; run -= len; while(run) { len = MIN(run, max_opaque_run); ADD_OPAQUE_COUNTS(0, len); dst += copy_opaque(dst, src + runstart, len, sf, df); runstart += len; run -= len; } } while(x < w); /* Make sure the next output address is 32-bit aligned */ dst += (unsigned long)dst & 2; /* Next, encode all translucent pixels of the same scan line */ x = 0; do { int run, skip, len; skipstart = x; while(x < w && !ISTRANSL(src[x], sf)) x++; runstart = x; while(x < w && ISTRANSL(src[x], sf)) x++; skip = runstart - skipstart; blankline &= (skip == w); run = x - runstart; while(skip > max_transl_run) { ADD_TRANSL_COUNTS(max_transl_run, 0); skip -= max_transl_run; } len = MIN(run, max_transl_run); ADD_TRANSL_COUNTS(skip, len); dst += copy_transl(dst, src + runstart, len, sf, df); runstart += len; run -= len; while(run) { len = MIN(run, max_transl_run); ADD_TRANSL_COUNTS(0, len); dst += copy_transl(dst, src + runstart, len, sf, df); runstart += len; run -= len; } if(!blankline) lastline = dst; } while(x < w); src += surface->pitch >> 2; } dst = lastline; /* back up past trailing blank lines */ ADD_OPAQUE_COUNTS(0, 0); } #undef ADD_OPAQUE_COUNTS #undef ADD_TRANSL_COUNTS /* Now that we have it encoded, release the original pixels */ if((surface->flags & SDL_PREALLOC) != SDL_PREALLOC && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) { SDL_free( surface->pixels ); surface->pixels = NULL; } /* realloc the buffer to release unused memory */ { Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf); if(!p) p = rlebuf; surface->map->sw_data->aux_data = p; } return 0; } static Uint32 getpix_8(Uint8 *srcbuf) { return *srcbuf; } static Uint32 getpix_16(Uint8 *srcbuf) { return *(Uint16 *)srcbuf; } static Uint32 getpix_24(Uint8 *srcbuf) { #if SDL_BYTEORDER == SDL_LIL_ENDIAN return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16); #else return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2]; #endif } static Uint32 getpix_32(Uint8 *srcbuf) { return *(Uint32 *)srcbuf; } typedef Uint32 (*getpix_func)(Uint8 *); static getpix_func getpixes[4] = { getpix_8, getpix_16, getpix_24, getpix_32 }; static int RLEColorkeySurface(SDL_Surface *surface) { Uint8 *rlebuf, *dst; int maxn; int y; Uint8 *srcbuf, *curbuf, *lastline; int maxsize = 0; int skip, run; int bpp = surface->format->BytesPerPixel; getpix_func getpix; Uint32 ckey, rgbmask; int w, h; /* calculate the worst case size for the compressed surface */ switch(bpp) { case 1: /* worst case is alternating opaque and transparent pixels, starting with an opaque pixel */ maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2; break; case 2: case 3: /* worst case is solid runs, at most 255 pixels wide */ maxsize = surface->h * (2 * (surface->w / 255 + 1) + surface->w * bpp) + 2; break; case 4: /* worst case is solid runs, at most 65535 pixels wide */ maxsize = surface->h * (4 * (surface->w / 65535 + 1) + surface->w * 4) + 4; break; } rlebuf = (Uint8 *)SDL_malloc(maxsize); if ( rlebuf == NULL ) { SDL_OutOfMemory(); return(-1); } /* Set up the conversion */ srcbuf = (Uint8 *)surface->pixels; curbuf = srcbuf; maxn = bpp == 4 ? 65535 : 255; skip = run = 0; dst = rlebuf; rgbmask = ~surface->format->Amask; ckey = surface->format->colorkey & rgbmask; lastline = dst; getpix = getpixes[bpp - 1]; w = surface->w; h = surface->h; #define ADD_COUNTS(n, m) \ if(bpp == 4) { \ ((Uint16 *)dst)[0] = n; \ ((Uint16 *)dst)[1] = m; \ dst += 4; \ } else { \ dst[0] = n; \ dst[1] = m; \ dst += 2; \ } for(y = 0; y < h; y++) { int x = 0; int blankline = 0; do { int run, skip, len; int runstart; int skipstart = x; /* find run of transparent, then opaque pixels */ while(x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey) x++; runstart = x; while(x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey) x++; skip = runstart - skipstart; if(skip == w) blankline = 1; run = x - runstart; /* encode segment */ while(skip > maxn) { ADD_COUNTS(maxn, 0); skip -= maxn; } len = MIN(run, maxn); ADD_COUNTS(skip, len); SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp); dst += len * bpp; run -= len; runstart += len; while(run) { len = MIN(run, maxn); ADD_COUNTS(0, len); SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp); dst += len * bpp; runstart += len; run -= len; } if(!blankline) lastline = dst; } while(x < w); srcbuf += surface->pitch; } dst = lastline; /* back up bast trailing blank lines */ ADD_COUNTS(0, 0); #undef ADD_COUNTS /* Now that we have it encoded, release the original pixels */ if((surface->flags & SDL_PREALLOC) != SDL_PREALLOC && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) { SDL_free( surface->pixels ); surface->pixels = NULL; } /* realloc the buffer to release unused memory */ { /* If realloc returns NULL, the original block is left intact */ Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf); if(!p) p = rlebuf; surface->map->sw_data->aux_data = p; } return(0); } int SDL_RLESurface(SDL_Surface *surface) { int retcode; /* Clear any previous RLE conversion */ if ( (surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL ) { SDL_UnRLESurface(surface, 1); } /* We don't support RLE encoding of bitmaps */ if ( surface->format->BitsPerPixel < 8 ) { return(-1); } /* Lock the surface if it's in hardware */ if ( SDL_MUSTLOCK(surface) ) { if ( SDL_LockSurface(surface) < 0 ) { return(-1); } } /* Encode */ if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { retcode = RLEColorkeySurface(surface); } else { if((surface->flags & SDL_SRCALPHA) == SDL_SRCALPHA && surface->format->Amask != 0) retcode = RLEAlphaSurface(surface); else retcode = -1; /* no RLE for per-surface alpha sans ckey */ } /* Unlock the surface if it's in hardware */ if ( SDL_MUSTLOCK(surface) ) { SDL_UnlockSurface(surface); } if(retcode < 0) return -1; /* The surface is now accelerated */ surface->flags |= SDL_RLEACCEL; return(0); } /* * Un-RLE a surface with pixel alpha * This may not give back exactly the image before RLE-encoding; all * completely transparent pixels will be lost, and colour and alpha depth * may have been reduced (when encoding for 16bpp targets). */ static SDL_bool UnRLEAlpha(SDL_Surface *surface) { Uint8 *srcbuf; Uint32 *dst; SDL_PixelFormat *sf = surface->format; RLEDestFormat *df = surface->map->sw_data->aux_data; int (*uncopy_opaque)(Uint32 *, void *, int, RLEDestFormat *, SDL_PixelFormat *); int (*uncopy_transl)(Uint32 *, void *, int, RLEDestFormat *, SDL_PixelFormat *); int w = surface->w; int bpp = df->BytesPerPixel; if(bpp == 2) { uncopy_opaque = uncopy_opaque_16; uncopy_transl = uncopy_transl_16; } else { uncopy_opaque = uncopy_transl = uncopy_32; } surface->pixels = SDL_malloc(surface->h * surface->pitch); if ( !surface->pixels ) { return(SDL_FALSE); } /* fill background with transparent pixels */ SDL_memset(surface->pixels, 0, surface->h * surface->pitch); dst = surface->pixels; srcbuf = (Uint8 *)(df + 1); for(;;) { /* copy opaque pixels */ int ofs = 0; do { unsigned run; if(bpp == 2) { ofs += srcbuf[0]; run = srcbuf[1]; srcbuf += 2; } else { ofs += ((Uint16 *)srcbuf)[0]; run = ((Uint16 *)srcbuf)[1]; srcbuf += 4; } if(run) { srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf); ofs += run; } else if(!ofs) return(SDL_TRUE); } while(ofs < w); /* skip padding if needed */ if(bpp == 2) srcbuf += (unsigned long)srcbuf & 2; /* copy translucent pixels */ ofs = 0; do { unsigned run; ofs += ((Uint16 *)srcbuf)[0]; run = ((Uint16 *)srcbuf)[1]; srcbuf += 4; if(run) { srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf); ofs += run; } } while(ofs < w); dst += surface->pitch >> 2; } /* Make the compiler happy */ return(SDL_TRUE); } void SDL_UnRLESurface(SDL_Surface *surface, int recode) { if ( (surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL ) { surface->flags &= ~SDL_RLEACCEL; if(recode && (surface->flags & SDL_PREALLOC) != SDL_PREALLOC && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) { if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { SDL_Rect full; unsigned alpha_flag; /* re-create the original surface */ surface->pixels = SDL_malloc(surface->h * surface->pitch); if ( !surface->pixels ) { /* Oh crap... */ surface->flags |= SDL_RLEACCEL; return; } /* fill it with the background colour */ SDL_FillRect(surface, NULL, surface->format->colorkey); /* now render the encoded surface */ full.x = full.y = 0; full.w = surface->w; full.h = surface->h; alpha_flag = surface->flags & SDL_SRCALPHA; surface->flags &= ~SDL_SRCALPHA; /* opaque blit */ SDL_RLEBlit(surface, &full, surface, &full); surface->flags |= alpha_flag; } else { if ( !UnRLEAlpha(surface) ) { /* Oh crap... */ surface->flags |= SDL_RLEACCEL; return; } } } if ( surface->map && surface->map->sw_data->aux_data ) { SDL_free(surface->map->sw_data->aux_data); surface->map->sw_data->aux_data = NULL; } } }