src/video/SDL_blit.h
author Sam Lantinga
Tue, 22 Feb 2011 21:44:36 -0800
changeset 5389 24903690f48a
parent 5288 d4381f3b0d1e
child 5423 b69fa50e80d7
permissions -rw-r--r--
Re-added the 3DNow! and AltiVec instruction support.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2011 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #ifndef _SDL_blit_h
    25 #define _SDL_blit_h
    26 
    27 #include "SDL_cpuinfo.h"
    28 #include "SDL_endian.h"
    29 #include "SDL_surface.h"
    30 
    31 /* SDL blit copy flags */
    32 #define SDL_COPY_MODULATE_COLOR     0x00000001
    33 #define SDL_COPY_MODULATE_ALPHA     0x00000002
    34 #define SDL_COPY_BLEND              0x00000010
    35 #define SDL_COPY_ADD                0x00000020
    36 #define SDL_COPY_MOD                0x00000040
    37 #define SDL_COPY_COLORKEY           0x00000100
    38 #define SDL_COPY_NEAREST            0x00000200
    39 #define SDL_COPY_RLE_DESIRED        0x00001000
    40 #define SDL_COPY_RLE_COLORKEY       0x00002000
    41 #define SDL_COPY_RLE_ALPHAKEY       0x00004000
    42 #define SDL_COPY_RLE_MASK           (SDL_COPY_RLE_DESIRED|SDL_COPY_RLE_COLORKEY|SDL_COPY_RLE_ALPHAKEY)
    43 
    44 /* SDL blit CPU flags */
    45 #define SDL_CPU_ANY                 0x00000000
    46 #define SDL_CPU_MMX                 0x00000001
    47 #define SDL_CPU_3DNOW               0x00000002
    48 #define SDL_CPU_SSE                 0x00000004
    49 #define SDL_CPU_SSE2                0x00000008
    50 #define SDL_CPU_ALTIVEC_PREFETCH    0x00000010
    51 #define SDL_CPU_ALTIVEC_NOPREFETCH  0x00000020
    52 
    53 typedef struct
    54 {
    55     Uint8 *src;
    56     int src_w, src_h;
    57     int src_pitch;
    58     int src_skip;
    59     Uint8 *dst;
    60     int dst_w, dst_h;
    61     int dst_pitch;
    62     int dst_skip;
    63     SDL_PixelFormat *src_fmt;
    64     SDL_PixelFormat *dst_fmt;
    65     Uint8 *table;
    66     int flags;
    67     Uint32 colorkey;
    68     Uint8 r, g, b, a;
    69 } SDL_BlitInfo;
    70 
    71 typedef void (SDLCALL * SDL_BlitFunc) (SDL_BlitInfo * info);
    72 
    73 typedef struct
    74 {
    75     Uint32 src_format;
    76     Uint32 dst_format;
    77     int flags;
    78     int cpu;
    79     SDL_BlitFunc func;
    80 } SDL_BlitFuncEntry;
    81 
    82 /* Blit mapping definition */
    83 typedef struct SDL_BlitMap
    84 {
    85     SDL_Surface *dst;
    86     int identity;
    87     SDL_blit blit;
    88     void *data;
    89     SDL_BlitInfo info;
    90 
    91     /* the version count matches the destination; mismatch indicates
    92        an invalid mapping */
    93     Uint32 palette_version;
    94 } SDL_BlitMap;
    95 
    96 /* Functions found in SDL_blit.c */
    97 extern int SDL_CalculateBlit(SDL_Surface * surface);
    98 
    99 /* Functions found in SDL_blit_*.c */
   100 extern SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface * surface);
   101 extern SDL_BlitFunc SDL_CalculateBlit1(SDL_Surface * surface);
   102 extern SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface * surface);
   103 extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface * surface);
   104 
   105 /*
   106  * Useful macros for blitting routines
   107  */
   108 
   109 #if defined(__GNUC__)
   110 #define DECLARE_ALIGNED(t,v,a)  t __attribute__((aligned(a))) v
   111 #elif defined(_MSC_VER)
   112 #define DECLARE_ALIGNED(t,v,a)  __declspec(align(a)) t v
   113 #else
   114 #define DECLARE_ALIGNED(t,v,a)  t v
   115 #endif
   116 
   117 /* Load pixel of the specified format from a buffer and get its R-G-B values */
   118 /* FIXME: rescale values to 0..255 here? */
   119 #define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
   120 {									\
   121 	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
   122 	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
   123 	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
   124 }
   125 #define RGB_FROM_RGB565(Pixel, r, g, b)					\
   126 {									\
   127 	r = (((Pixel&0xF800)>>11)<<3);		 			\
   128 	g = (((Pixel&0x07E0)>>5)<<2); 					\
   129 	b = ((Pixel&0x001F)<<3); 					\
   130 }
   131 #define RGB_FROM_RGB555(Pixel, r, g, b)					\
   132 {									\
   133 	r = (((Pixel&0x7C00)>>10)<<3);		 			\
   134 	g = (((Pixel&0x03E0)>>5)<<3); 					\
   135 	b = ((Pixel&0x001F)<<3); 					\
   136 }
   137 #define RGB_FROM_RGB888(Pixel, r, g, b)					\
   138 {									\
   139 	r = ((Pixel&0xFF0000)>>16);		 			\
   140 	g = ((Pixel&0xFF00)>>8);		 			\
   141 	b = (Pixel&0xFF);			 			\
   142 }
   143 #define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
   144 do {									   \
   145 	switch (bpp) {							   \
   146 		case 2:							   \
   147 			Pixel = *((Uint16 *)(buf));			   \
   148 		break;							   \
   149 									   \
   150 		case 3: {						   \
   151 		        Uint8 *B = (Uint8 *)(buf);			   \
   152 			if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   153 			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
   154 			} else {					   \
   155 			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
   156 			}						   \
   157 		}							   \
   158 		break;							   \
   159 									   \
   160 		case 4:							   \
   161 			Pixel = *((Uint32 *)(buf));			   \
   162 		break;							   \
   163 									   \
   164 		default:						   \
   165 		        Pixel; /* stop gcc complaints */		   \
   166 		break;							   \
   167 	}								   \
   168 } while (0)
   169 
   170 #define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
   171 do {									   \
   172 	switch (bpp) {							   \
   173 		case 2:							   \
   174 			Pixel = *((Uint16 *)(buf));			   \
   175 			RGB_FROM_PIXEL(Pixel, fmt, r, g, b);		   \
   176 		break;							   \
   177 									   \
   178 		case 3:	{						   \
   179                         if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   180 			        r = *((buf)+fmt->Rshift/8);		   \
   181 				g = *((buf)+fmt->Gshift/8);		   \
   182 				b = *((buf)+fmt->Bshift/8);		   \
   183 			} else {					   \
   184 			        r = *((buf)+2-fmt->Rshift/8);		   \
   185 				g = *((buf)+2-fmt->Gshift/8);		   \
   186 				b = *((buf)+2-fmt->Bshift/8);		   \
   187 			}						   \
   188 		}							   \
   189 		break;							   \
   190 									   \
   191 		case 4:							   \
   192 			Pixel = *((Uint32 *)(buf));			   \
   193 			RGB_FROM_PIXEL(Pixel, fmt, r, g, b);		   \
   194 		break;							   \
   195 									   \
   196 		default:						   \
   197 		        Pixel; /* stop gcc complaints */		   \
   198 		break;							   \
   199 	}								   \
   200 } while (0)
   201 
   202 /* Assemble R-G-B values into a specified pixel format and store them */
   203 #define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
   204 {									\
   205 	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
   206 		((g>>fmt->Gloss)<<fmt->Gshift)|				\
   207 		((b>>fmt->Bloss)<<fmt->Bshift);				\
   208 }
   209 #define RGB565_FROM_RGB(Pixel, r, g, b)					\
   210 {									\
   211 	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
   212 }
   213 #define RGB555_FROM_RGB(Pixel, r, g, b)					\
   214 {									\
   215 	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
   216 }
   217 #define RGB888_FROM_RGB(Pixel, r, g, b)					\
   218 {									\
   219 	Pixel = (r<<16)|(g<<8)|b;					\
   220 }
   221 #define ARGB8888_FROM_RGBA(Pixel, r, g, b, a)				\
   222 {									\
   223 	Pixel = (a<<24)|(r<<16)|(g<<8)|b;				\
   224 }
   225 #define RGBA8888_FROM_RGBA(Pixel, r, g, b, a)				\
   226 {									\
   227 	Pixel = (r<<24)|(g<<16)|(b<<8)|a;				\
   228 }
   229 #define ABGR8888_FROM_RGBA(Pixel, r, g, b, a)				\
   230 {									\
   231 	Pixel = (a<<24)|(b<<16)|(g<<8)|r;				\
   232 }
   233 #define BGRA8888_FROM_RGBA(Pixel, r, g, b, a)				\
   234 {									\
   235 	Pixel = (b<<24)|(g<<16)|(r<<8)|a;				\
   236 }
   237 #define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
   238 {									\
   239 	switch (bpp) {							\
   240 		case 2: {						\
   241 			Uint16 Pixel;					\
   242 									\
   243 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   244 			*((Uint16 *)(buf)) = Pixel;			\
   245 		}							\
   246 		break;							\
   247 									\
   248 		case 3: {						\
   249                         if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   250 			        *((buf)+fmt->Rshift/8) = r;		\
   251 				*((buf)+fmt->Gshift/8) = g;		\
   252 				*((buf)+fmt->Bshift/8) = b;		\
   253 			} else {					\
   254 			        *((buf)+2-fmt->Rshift/8) = r;		\
   255 				*((buf)+2-fmt->Gshift/8) = g;		\
   256 				*((buf)+2-fmt->Bshift/8) = b;		\
   257 			}						\
   258 		}							\
   259 		break;							\
   260 									\
   261 		case 4: {						\
   262 			Uint32 Pixel;					\
   263 									\
   264 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   265 			*((Uint32 *)(buf)) = Pixel;			\
   266 		}							\
   267 		break;							\
   268 	}								\
   269 }
   270 #define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
   271 {									\
   272 	switch (bpp) {							\
   273 		case 2: {						\
   274 			Uint16 *bufp;					\
   275 			Uint16 Pixel;					\
   276 									\
   277 			bufp = (Uint16 *)buf;				\
   278 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   279 			*bufp = Pixel | (*bufp & Amask);		\
   280 		}							\
   281 		break;							\
   282 									\
   283 		case 3: {						\
   284                         if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   285 			        *((buf)+fmt->Rshift/8) = r;		\
   286 				*((buf)+fmt->Gshift/8) = g;		\
   287 				*((buf)+fmt->Bshift/8) = b;		\
   288 			} else {					\
   289 			        *((buf)+2-fmt->Rshift/8) = r;		\
   290 				*((buf)+2-fmt->Gshift/8) = g;		\
   291 				*((buf)+2-fmt->Bshift/8) = b;		\
   292 			}						\
   293 		}							\
   294 		break;							\
   295 									\
   296 		case 4: {						\
   297 			Uint32 *bufp;					\
   298 			Uint32 Pixel;					\
   299 									\
   300 			bufp = (Uint32 *)buf;				\
   301 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   302 			*bufp = Pixel | (*bufp & Amask);		\
   303 		}							\
   304 		break;							\
   305 	}								\
   306 }
   307 
   308 /* FIXME: Should we rescale alpha into 0..255 here? */
   309 #define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
   310 {									\
   311 	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
   312 	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
   313 	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
   314 	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
   315 }
   316 #define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
   317 {						\
   318 	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
   319 	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
   320 	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
   321 	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
   322 }
   323 #define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
   324 {									\
   325 	r = (Pixel>>24);						\
   326 	g = ((Pixel>>16)&0xFF);						\
   327 	b = ((Pixel>>8)&0xFF);						\
   328 	a = (Pixel&0xFF);						\
   329 }
   330 #define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
   331 {									\
   332 	r = ((Pixel>>16)&0xFF);						\
   333 	g = ((Pixel>>8)&0xFF);						\
   334 	b = (Pixel&0xFF);						\
   335 	a = (Pixel>>24);						\
   336 }
   337 #define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
   338 {									\
   339 	r = (Pixel&0xFF);						\
   340 	g = ((Pixel>>8)&0xFF);						\
   341 	b = ((Pixel>>16)&0xFF);						\
   342 	a = (Pixel>>24);						\
   343 }
   344 #define RGBA_FROM_BGRA8888(Pixel, r, g, b, a)				\
   345 {									\
   346 	r = ((Pixel>>8)&0xFF);						\
   347 	g = ((Pixel>>16)&0xFF);						\
   348 	b = (Pixel>>24);						\
   349 	a = (Pixel&0xFF);						\
   350 }
   351 #define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
   352 do {									   \
   353 	switch (bpp) {							   \
   354 		case 2:							   \
   355 			Pixel = *((Uint16 *)(buf));			   \
   356 			RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);	   \
   357 		break;							   \
   358 									   \
   359 		case 3:	{						   \
   360                         if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   361 			        r = *((buf)+fmt->Rshift/8);		   \
   362 				g = *((buf)+fmt->Gshift/8);		   \
   363 				b = *((buf)+fmt->Bshift/8);		   \
   364 			} else {					   \
   365 			        r = *((buf)+2-fmt->Rshift/8);		   \
   366 				g = *((buf)+2-fmt->Gshift/8);		   \
   367 				b = *((buf)+2-fmt->Bshift/8);		   \
   368 			}						   \
   369 			a = 0xFF;					   \
   370 		}							   \
   371 		break;							   \
   372 									   \
   373 		case 4:							   \
   374 			Pixel = *((Uint32 *)(buf));			   \
   375 			RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);	   \
   376 		break;							   \
   377 									   \
   378 		default:						   \
   379 		        Pixel; /* stop gcc complaints */		   \
   380 		break;							   \
   381 	}								   \
   382 } while (0)
   383 
   384 /* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
   385 #define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
   386 {									\
   387 	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
   388 		((g>>fmt->Gloss)<<fmt->Gshift)|				\
   389 		((b>>fmt->Bloss)<<fmt->Bshift)|				\
   390 		((a>>fmt->Aloss)<<fmt->Ashift);				\
   391 }
   392 #define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
   393 {									\
   394 	switch (bpp) {							\
   395 		case 2: {						\
   396 			Uint16 Pixel;					\
   397 									\
   398 			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
   399 			*((Uint16 *)(buf)) = Pixel;			\
   400 		}							\
   401 		break;							\
   402 									\
   403 		case 3: {						\
   404                         if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   405 			        *((buf)+fmt->Rshift/8) = r;		\
   406 				*((buf)+fmt->Gshift/8) = g;		\
   407 				*((buf)+fmt->Bshift/8) = b;		\
   408 			} else {					\
   409 			        *((buf)+2-fmt->Rshift/8) = r;		\
   410 				*((buf)+2-fmt->Gshift/8) = g;		\
   411 				*((buf)+2-fmt->Bshift/8) = b;		\
   412 			}						\
   413 		}							\
   414 		break;							\
   415 									\
   416 		case 4: {						\
   417 			Uint32 Pixel;					\
   418 									\
   419 			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
   420 			*((Uint32 *)(buf)) = Pixel;			\
   421 		}							\
   422 		break;							\
   423 	}								\
   424 }
   425 
   426 /* Blend the RGB values of two Pixels based on a source alpha value */
   427 #define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
   428 do {						\
   429 	dR = ((((int)(sR-dR)*(int)A)/255)+dR);	\
   430 	dG = ((((int)(sG-dG)*(int)A)/255)+dG);	\
   431 	dB = ((((int)(sB-dB)*(int)A)/255)+dB);	\
   432 } while(0)
   433 
   434 
   435 /* This is a very useful loop for optimizing blitters */
   436 #if defined(_MSC_VER) && (_MSC_VER == 1300)
   437 /* There's a bug in the Visual C++ 7 optimizer when compiling this code */
   438 #else
   439 #define USE_DUFFS_LOOP
   440 #endif
   441 #ifdef USE_DUFFS_LOOP
   442 
   443 /* 8-times unrolled loop */
   444 #define DUFFS_LOOP8(pixel_copy_increment, width)			\
   445 { int n = (width+7)/8;							\
   446 	switch (width & 7) {						\
   447 	case 0: do {	pixel_copy_increment;				\
   448 	case 7:		pixel_copy_increment;				\
   449 	case 6:		pixel_copy_increment;				\
   450 	case 5:		pixel_copy_increment;				\
   451 	case 4:		pixel_copy_increment;				\
   452 	case 3:		pixel_copy_increment;				\
   453 	case 2:		pixel_copy_increment;				\
   454 	case 1:		pixel_copy_increment;				\
   455 		} while ( --n > 0 );					\
   456 	}								\
   457 }
   458 
   459 /* 4-times unrolled loop */
   460 #define DUFFS_LOOP4(pixel_copy_increment, width)			\
   461 { int n = (width+3)/4;							\
   462 	switch (width & 3) {						\
   463 	case 0: do {	pixel_copy_increment;				\
   464 	case 3:		pixel_copy_increment;				\
   465 	case 2:		pixel_copy_increment;				\
   466 	case 1:		pixel_copy_increment;				\
   467 		} while (--n > 0);					\
   468 	}								\
   469 }
   470 
   471 /* Use the 8-times version of the loop by default */
   472 #define DUFFS_LOOP(pixel_copy_increment, width)				\
   473 	DUFFS_LOOP8(pixel_copy_increment, width)
   474 
   475 /* Special version of Duff's device for even more optimization */
   476 #define DUFFS_LOOP_124(pixel_copy_increment1,				\
   477                        pixel_copy_increment2,				\
   478                        pixel_copy_increment4, width)			\
   479 { int n = width;							\
   480 	if (n & 1) {							\
   481 		pixel_copy_increment1; n -= 1;				\
   482 	}								\
   483 	if (n & 2) {							\
   484 		pixel_copy_increment2; n -= 2;				\
   485 	}								\
   486 	if (n) {							\
   487 		n = (n+7)/ 8;						\
   488 		switch (n & 4) {					\
   489 		case 0: do {	pixel_copy_increment4;			\
   490 		case 4:		pixel_copy_increment4;			\
   491 			} while (--n > 0);				\
   492 		}							\
   493 	}								\
   494 }
   495 
   496 #else
   497 
   498 /* Don't use Duff's device to unroll loops */
   499 #define DUFFS_LOOP(pixel_copy_increment, width)				\
   500 { int n;								\
   501 	for ( n=width; n > 0; --n ) {					\
   502 		pixel_copy_increment;					\
   503 	}								\
   504 }
   505 #define DUFFS_LOOP8(pixel_copy_increment, width)			\
   506 	DUFFS_LOOP(pixel_copy_increment, width)
   507 #define DUFFS_LOOP4(pixel_copy_increment, width)			\
   508 	DUFFS_LOOP(pixel_copy_increment, width)
   509 #define DUFFS_LOOP_124(pixel_copy_increment1,				\
   510                        pixel_copy_increment2,				\
   511                        pixel_copy_increment4, width)			\
   512 	DUFFS_LOOP(pixel_copy_increment1, width)
   513 
   514 #endif /* USE_DUFFS_LOOP */
   515 
   516 /* Prevent Visual C++ 6.0 from printing out stupid warnings */
   517 #if defined(_MSC_VER) && (_MSC_VER >= 600)
   518 #pragma warning(disable: 4550)
   519 #endif
   520 
   521 #endif /* _SDL_blit_h */
   522 
   523 /* vi: set ts=4 sw=4 expandtab: */