src/video/SDL_blit.h
author Sam Lantinga
Sun, 13 Feb 2011 13:46:10 -0800
changeset 5288 d4381f3b0d1e
parent 5262 b530ef003506
child 5389 24903690f48a
permissions -rw-r--r--
A few fixes:
Fixed creating render texture framebuffer.
Removed the need for palette watch, added surface format caching.
Added an SDL_DONTFREE flag so you can't free the window and 1.2 shadow surfaces.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2011 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #ifndef _SDL_blit_h
    25 #define _SDL_blit_h
    26 
    27 #ifdef __MINGW32__
    28 #include <_mingw.h>
    29 #endif
    30 
    31 #if defined(__MINGW32__) && defined(__MINGW64_VERSION_MAJOR)
    32 #include <intrin.h>
    33 #else
    34 #ifdef __MMX__
    35 #include <mmintrin.h>
    36 #endif
    37 #ifdef __SSE__
    38 #include <xmmintrin.h>
    39 #endif
    40 #ifdef __SSE2__
    41 #include <emmintrin.h>
    42 #endif
    43 #endif
    44 
    45 #include "SDL_cpuinfo.h"
    46 #include "SDL_endian.h"
    47 #include "SDL_surface.h"
    48 
    49 /* SDL blit copy flags */
    50 #define SDL_COPY_MODULATE_COLOR     0x00000001
    51 #define SDL_COPY_MODULATE_ALPHA     0x00000002
    52 #define SDL_COPY_BLEND              0x00000010
    53 #define SDL_COPY_ADD                0x00000020
    54 #define SDL_COPY_MOD                0x00000040
    55 #define SDL_COPY_COLORKEY           0x00000100
    56 #define SDL_COPY_NEAREST            0x00000200
    57 #define SDL_COPY_RLE_DESIRED        0x00001000
    58 #define SDL_COPY_RLE_COLORKEY       0x00002000
    59 #define SDL_COPY_RLE_ALPHAKEY       0x00004000
    60 #define SDL_COPY_RLE_MASK           (SDL_COPY_RLE_DESIRED|SDL_COPY_RLE_COLORKEY|SDL_COPY_RLE_ALPHAKEY)
    61 
    62 /* SDL blit CPU flags */
    63 #define SDL_CPU_ANY                 0x00000000
    64 #define SDL_CPU_MMX                 0x00000001
    65 #define SDL_CPU_SSE                 0x00000004
    66 #define SDL_CPU_SSE2                0x00000008
    67 
    68 typedef struct
    69 {
    70     Uint8 *src;
    71     int src_w, src_h;
    72     int src_pitch;
    73     int src_skip;
    74     Uint8 *dst;
    75     int dst_w, dst_h;
    76     int dst_pitch;
    77     int dst_skip;
    78     SDL_PixelFormat *src_fmt;
    79     SDL_PixelFormat *dst_fmt;
    80     Uint8 *table;
    81     int flags;
    82     Uint32 colorkey;
    83     Uint8 r, g, b, a;
    84 } SDL_BlitInfo;
    85 
    86 typedef void (SDLCALL * SDL_BlitFunc) (SDL_BlitInfo * info);
    87 
    88 typedef struct
    89 {
    90     Uint32 src_format;
    91     Uint32 dst_format;
    92     int flags;
    93     int cpu;
    94     SDL_BlitFunc func;
    95 } SDL_BlitFuncEntry;
    96 
    97 /* Blit mapping definition */
    98 typedef struct SDL_BlitMap
    99 {
   100     SDL_Surface *dst;
   101     int identity;
   102     SDL_blit blit;
   103     void *data;
   104     SDL_BlitInfo info;
   105 
   106     /* the version count matches the destination; mismatch indicates
   107        an invalid mapping */
   108     Uint32 palette_version;
   109 } SDL_BlitMap;
   110 
   111 /* Functions found in SDL_blit.c */
   112 extern int SDL_CalculateBlit(SDL_Surface * surface);
   113 
   114 /* Functions found in SDL_blit_*.c */
   115 extern SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface * surface);
   116 extern SDL_BlitFunc SDL_CalculateBlit1(SDL_Surface * surface);
   117 extern SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface * surface);
   118 extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface * surface);
   119 
   120 /*
   121  * Useful macros for blitting routines
   122  */
   123 
   124 #if defined(__GNUC__)
   125 #define DECLARE_ALIGNED(t,v,a)  t __attribute__((aligned(a))) v
   126 #elif defined(_MSC_VER)
   127 #define DECLARE_ALIGNED(t,v,a)  __declspec(align(a)) t v
   128 #else
   129 #define DECLARE_ALIGNED(t,v,a)  t v
   130 #endif
   131 
   132 /* Load pixel of the specified format from a buffer and get its R-G-B values */
   133 /* FIXME: rescale values to 0..255 here? */
   134 #define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
   135 {									\
   136 	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
   137 	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
   138 	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
   139 }
   140 #define RGB_FROM_RGB565(Pixel, r, g, b)					\
   141 {									\
   142 	r = (((Pixel&0xF800)>>11)<<3);		 			\
   143 	g = (((Pixel&0x07E0)>>5)<<2); 					\
   144 	b = ((Pixel&0x001F)<<3); 					\
   145 }
   146 #define RGB_FROM_RGB555(Pixel, r, g, b)					\
   147 {									\
   148 	r = (((Pixel&0x7C00)>>10)<<3);		 			\
   149 	g = (((Pixel&0x03E0)>>5)<<3); 					\
   150 	b = ((Pixel&0x001F)<<3); 					\
   151 }
   152 #define RGB_FROM_RGB888(Pixel, r, g, b)					\
   153 {									\
   154 	r = ((Pixel&0xFF0000)>>16);		 			\
   155 	g = ((Pixel&0xFF00)>>8);		 			\
   156 	b = (Pixel&0xFF);			 			\
   157 }
   158 #define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
   159 do {									   \
   160 	switch (bpp) {							   \
   161 		case 2:							   \
   162 			Pixel = *((Uint16 *)(buf));			   \
   163 		break;							   \
   164 									   \
   165 		case 3: {						   \
   166 		        Uint8 *B = (Uint8 *)(buf);			   \
   167 			if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   168 			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
   169 			} else {					   \
   170 			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
   171 			}						   \
   172 		}							   \
   173 		break;							   \
   174 									   \
   175 		case 4:							   \
   176 			Pixel = *((Uint32 *)(buf));			   \
   177 		break;							   \
   178 									   \
   179 		default:						   \
   180 		        Pixel; /* stop gcc complaints */		   \
   181 		break;							   \
   182 	}								   \
   183 } while (0)
   184 
   185 #define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
   186 do {									   \
   187 	switch (bpp) {							   \
   188 		case 2:							   \
   189 			Pixel = *((Uint16 *)(buf));			   \
   190 			RGB_FROM_PIXEL(Pixel, fmt, r, g, b);		   \
   191 		break;							   \
   192 									   \
   193 		case 3:	{						   \
   194                         if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   195 			        r = *((buf)+fmt->Rshift/8);		   \
   196 				g = *((buf)+fmt->Gshift/8);		   \
   197 				b = *((buf)+fmt->Bshift/8);		   \
   198 			} else {					   \
   199 			        r = *((buf)+2-fmt->Rshift/8);		   \
   200 				g = *((buf)+2-fmt->Gshift/8);		   \
   201 				b = *((buf)+2-fmt->Bshift/8);		   \
   202 			}						   \
   203 		}							   \
   204 		break;							   \
   205 									   \
   206 		case 4:							   \
   207 			Pixel = *((Uint32 *)(buf));			   \
   208 			RGB_FROM_PIXEL(Pixel, fmt, r, g, b);		   \
   209 		break;							   \
   210 									   \
   211 		default:						   \
   212 		        Pixel; /* stop gcc complaints */		   \
   213 		break;							   \
   214 	}								   \
   215 } while (0)
   216 
   217 /* Assemble R-G-B values into a specified pixel format and store them */
   218 #define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
   219 {									\
   220 	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
   221 		((g>>fmt->Gloss)<<fmt->Gshift)|				\
   222 		((b>>fmt->Bloss)<<fmt->Bshift);				\
   223 }
   224 #define RGB565_FROM_RGB(Pixel, r, g, b)					\
   225 {									\
   226 	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
   227 }
   228 #define RGB555_FROM_RGB(Pixel, r, g, b)					\
   229 {									\
   230 	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
   231 }
   232 #define RGB888_FROM_RGB(Pixel, r, g, b)					\
   233 {									\
   234 	Pixel = (r<<16)|(g<<8)|b;					\
   235 }
   236 #define ARGB8888_FROM_RGBA(Pixel, r, g, b, a)				\
   237 {									\
   238 	Pixel = (a<<24)|(r<<16)|(g<<8)|b;				\
   239 }
   240 #define RGBA8888_FROM_RGBA(Pixel, r, g, b, a)				\
   241 {									\
   242 	Pixel = (r<<24)|(g<<16)|(b<<8)|a;				\
   243 }
   244 #define ABGR8888_FROM_RGBA(Pixel, r, g, b, a)				\
   245 {									\
   246 	Pixel = (a<<24)|(b<<16)|(g<<8)|r;				\
   247 }
   248 #define BGRA8888_FROM_RGBA(Pixel, r, g, b, a)				\
   249 {									\
   250 	Pixel = (b<<24)|(g<<16)|(r<<8)|a;				\
   251 }
   252 #define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
   253 {									\
   254 	switch (bpp) {							\
   255 		case 2: {						\
   256 			Uint16 Pixel;					\
   257 									\
   258 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   259 			*((Uint16 *)(buf)) = Pixel;			\
   260 		}							\
   261 		break;							\
   262 									\
   263 		case 3: {						\
   264                         if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   265 			        *((buf)+fmt->Rshift/8) = r;		\
   266 				*((buf)+fmt->Gshift/8) = g;		\
   267 				*((buf)+fmt->Bshift/8) = b;		\
   268 			} else {					\
   269 			        *((buf)+2-fmt->Rshift/8) = r;		\
   270 				*((buf)+2-fmt->Gshift/8) = g;		\
   271 				*((buf)+2-fmt->Bshift/8) = b;		\
   272 			}						\
   273 		}							\
   274 		break;							\
   275 									\
   276 		case 4: {						\
   277 			Uint32 Pixel;					\
   278 									\
   279 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   280 			*((Uint32 *)(buf)) = Pixel;			\
   281 		}							\
   282 		break;							\
   283 	}								\
   284 }
   285 #define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
   286 {									\
   287 	switch (bpp) {							\
   288 		case 2: {						\
   289 			Uint16 *bufp;					\
   290 			Uint16 Pixel;					\
   291 									\
   292 			bufp = (Uint16 *)buf;				\
   293 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   294 			*bufp = Pixel | (*bufp & Amask);		\
   295 		}							\
   296 		break;							\
   297 									\
   298 		case 3: {						\
   299                         if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   300 			        *((buf)+fmt->Rshift/8) = r;		\
   301 				*((buf)+fmt->Gshift/8) = g;		\
   302 				*((buf)+fmt->Bshift/8) = b;		\
   303 			} else {					\
   304 			        *((buf)+2-fmt->Rshift/8) = r;		\
   305 				*((buf)+2-fmt->Gshift/8) = g;		\
   306 				*((buf)+2-fmt->Bshift/8) = b;		\
   307 			}						\
   308 		}							\
   309 		break;							\
   310 									\
   311 		case 4: {						\
   312 			Uint32 *bufp;					\
   313 			Uint32 Pixel;					\
   314 									\
   315 			bufp = (Uint32 *)buf;				\
   316 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   317 			*bufp = Pixel | (*bufp & Amask);		\
   318 		}							\
   319 		break;							\
   320 	}								\
   321 }
   322 
   323 /* FIXME: Should we rescale alpha into 0..255 here? */
   324 #define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
   325 {									\
   326 	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
   327 	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
   328 	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
   329 	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
   330 }
   331 #define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
   332 {						\
   333 	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
   334 	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
   335 	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
   336 	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
   337 }
   338 #define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
   339 {									\
   340 	r = (Pixel>>24);						\
   341 	g = ((Pixel>>16)&0xFF);						\
   342 	b = ((Pixel>>8)&0xFF);						\
   343 	a = (Pixel&0xFF);						\
   344 }
   345 #define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
   346 {									\
   347 	r = ((Pixel>>16)&0xFF);						\
   348 	g = ((Pixel>>8)&0xFF);						\
   349 	b = (Pixel&0xFF);						\
   350 	a = (Pixel>>24);						\
   351 }
   352 #define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
   353 {									\
   354 	r = (Pixel&0xFF);						\
   355 	g = ((Pixel>>8)&0xFF);						\
   356 	b = ((Pixel>>16)&0xFF);						\
   357 	a = (Pixel>>24);						\
   358 }
   359 #define RGBA_FROM_BGRA8888(Pixel, r, g, b, a)				\
   360 {									\
   361 	r = ((Pixel>>8)&0xFF);						\
   362 	g = ((Pixel>>16)&0xFF);						\
   363 	b = (Pixel>>24);						\
   364 	a = (Pixel&0xFF);						\
   365 }
   366 #define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
   367 do {									   \
   368 	switch (bpp) {							   \
   369 		case 2:							   \
   370 			Pixel = *((Uint16 *)(buf));			   \
   371 			RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);	   \
   372 		break;							   \
   373 									   \
   374 		case 3:	{						   \
   375                         if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   376 			        r = *((buf)+fmt->Rshift/8);		   \
   377 				g = *((buf)+fmt->Gshift/8);		   \
   378 				b = *((buf)+fmt->Bshift/8);		   \
   379 			} else {					   \
   380 			        r = *((buf)+2-fmt->Rshift/8);		   \
   381 				g = *((buf)+2-fmt->Gshift/8);		   \
   382 				b = *((buf)+2-fmt->Bshift/8);		   \
   383 			}						   \
   384 			a = 0xFF;					   \
   385 		}							   \
   386 		break;							   \
   387 									   \
   388 		case 4:							   \
   389 			Pixel = *((Uint32 *)(buf));			   \
   390 			RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);	   \
   391 		break;							   \
   392 									   \
   393 		default:						   \
   394 		        Pixel; /* stop gcc complaints */		   \
   395 		break;							   \
   396 	}								   \
   397 } while (0)
   398 
   399 /* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
   400 #define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
   401 {									\
   402 	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
   403 		((g>>fmt->Gloss)<<fmt->Gshift)|				\
   404 		((b>>fmt->Bloss)<<fmt->Bshift)|				\
   405 		((a>>fmt->Aloss)<<fmt->Ashift);				\
   406 }
   407 #define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
   408 {									\
   409 	switch (bpp) {							\
   410 		case 2: {						\
   411 			Uint16 Pixel;					\
   412 									\
   413 			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
   414 			*((Uint16 *)(buf)) = Pixel;			\
   415 		}							\
   416 		break;							\
   417 									\
   418 		case 3: {						\
   419                         if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   420 			        *((buf)+fmt->Rshift/8) = r;		\
   421 				*((buf)+fmt->Gshift/8) = g;		\
   422 				*((buf)+fmt->Bshift/8) = b;		\
   423 			} else {					\
   424 			        *((buf)+2-fmt->Rshift/8) = r;		\
   425 				*((buf)+2-fmt->Gshift/8) = g;		\
   426 				*((buf)+2-fmt->Bshift/8) = b;		\
   427 			}						\
   428 		}							\
   429 		break;							\
   430 									\
   431 		case 4: {						\
   432 			Uint32 Pixel;					\
   433 									\
   434 			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
   435 			*((Uint32 *)(buf)) = Pixel;			\
   436 		}							\
   437 		break;							\
   438 	}								\
   439 }
   440 
   441 /* Blend the RGB values of two Pixels based on a source alpha value */
   442 #define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
   443 do {						\
   444 	dR = ((((int)(sR-dR)*(int)A)/255)+dR);	\
   445 	dG = ((((int)(sG-dG)*(int)A)/255)+dG);	\
   446 	dB = ((((int)(sB-dB)*(int)A)/255)+dB);	\
   447 } while(0)
   448 
   449 
   450 /* This is a very useful loop for optimizing blitters */
   451 #if defined(_MSC_VER) && (_MSC_VER == 1300)
   452 /* There's a bug in the Visual C++ 7 optimizer when compiling this code */
   453 #else
   454 #define USE_DUFFS_LOOP
   455 #endif
   456 #ifdef USE_DUFFS_LOOP
   457 
   458 /* 8-times unrolled loop */
   459 #define DUFFS_LOOP8(pixel_copy_increment, width)			\
   460 { int n = (width+7)/8;							\
   461 	switch (width & 7) {						\
   462 	case 0: do {	pixel_copy_increment;				\
   463 	case 7:		pixel_copy_increment;				\
   464 	case 6:		pixel_copy_increment;				\
   465 	case 5:		pixel_copy_increment;				\
   466 	case 4:		pixel_copy_increment;				\
   467 	case 3:		pixel_copy_increment;				\
   468 	case 2:		pixel_copy_increment;				\
   469 	case 1:		pixel_copy_increment;				\
   470 		} while ( --n > 0 );					\
   471 	}								\
   472 }
   473 
   474 /* 4-times unrolled loop */
   475 #define DUFFS_LOOP4(pixel_copy_increment, width)			\
   476 { int n = (width+3)/4;							\
   477 	switch (width & 3) {						\
   478 	case 0: do {	pixel_copy_increment;				\
   479 	case 3:		pixel_copy_increment;				\
   480 	case 2:		pixel_copy_increment;				\
   481 	case 1:		pixel_copy_increment;				\
   482 		} while (--n > 0);					\
   483 	}								\
   484 }
   485 
   486 /* Use the 8-times version of the loop by default */
   487 #define DUFFS_LOOP(pixel_copy_increment, width)				\
   488 	DUFFS_LOOP8(pixel_copy_increment, width)
   489 
   490 /* Special version of Duff's device for even more optimization */
   491 #define DUFFS_LOOP_124(pixel_copy_increment1,				\
   492                        pixel_copy_increment2,				\
   493                        pixel_copy_increment4, width)			\
   494 { int n = width;							\
   495 	if (n & 1) {							\
   496 		pixel_copy_increment1; n -= 1;				\
   497 	}								\
   498 	if (n & 2) {							\
   499 		pixel_copy_increment2; n -= 2;				\
   500 	}								\
   501 	if (n) {							\
   502 		n = (n+7)/ 8;						\
   503 		switch (n & 4) {					\
   504 		case 0: do {	pixel_copy_increment4;			\
   505 		case 4:		pixel_copy_increment4;			\
   506 			} while (--n > 0);				\
   507 		}							\
   508 	}								\
   509 }
   510 
   511 #else
   512 
   513 /* Don't use Duff's device to unroll loops */
   514 #define DUFFS_LOOP(pixel_copy_increment, width)				\
   515 { int n;								\
   516 	for ( n=width; n > 0; --n ) {					\
   517 		pixel_copy_increment;					\
   518 	}								\
   519 }
   520 #define DUFFS_LOOP8(pixel_copy_increment, width)			\
   521 	DUFFS_LOOP(pixel_copy_increment, width)
   522 #define DUFFS_LOOP4(pixel_copy_increment, width)			\
   523 	DUFFS_LOOP(pixel_copy_increment, width)
   524 #define DUFFS_LOOP_124(pixel_copy_increment1,				\
   525                        pixel_copy_increment2,				\
   526                        pixel_copy_increment4, width)			\
   527 	DUFFS_LOOP(pixel_copy_increment1, width)
   528 
   529 #endif /* USE_DUFFS_LOOP */
   530 
   531 /* Prevent Visual C++ 6.0 from printing out stupid warnings */
   532 #if defined(_MSC_VER) && (_MSC_VER >= 600)
   533 #pragma warning(disable: 4550)
   534 #endif
   535 
   536 #endif /* _SDL_blit_h */
   537 
   538 /* vi: set ts=4 sw=4 expandtab: */