src/video/SDL_blit.h
author Sam Lantinga <slouken@libsdl.org>
Wed, 15 Aug 2007 08:21:10 +0000
changeset 2247 93994f65c74c
parent 1895 c121d94672cb
child 2249 5a58b57b6724
permissions -rw-r--r--
Removed hermes since it's LGPL and not compatible with a commercial license.

Prepping for using MMX and SSE intrinsics instead of inline assembly.
.. except for memcpy equivalents which only get faster if they can
exploit the parallelism of loading into multiple SIMD registers. :)
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #ifndef _SDL_blit_h
    25 #define _SDL_blit_h
    26 
    27 #include "SDL_endian.h"
    28 
    29 /* The structure passed to the low level blit functions */
    30 typedef struct
    31 {
    32     Uint8 *s_pixels;
    33     int s_width;
    34     int s_height;
    35     int s_skip;
    36     Uint8 *d_pixels;
    37     int d_width;
    38     int d_height;
    39     int d_skip;
    40     void *aux_data;
    41     SDL_PixelFormat *src;
    42     Uint8 *table;
    43     SDL_PixelFormat *dst;
    44 } SDL_BlitInfo;
    45 
    46 /* The type definition for the low level blit functions */
    47 typedef void (*SDL_loblit) (SDL_BlitInfo * info);
    48 
    49 /* This is the private info structure for software accelerated blits */
    50 struct private_swaccel
    51 {
    52     SDL_loblit blit;
    53     void *aux_data;
    54 };
    55 
    56 /* Blit mapping definition */
    57 typedef struct SDL_BlitMap
    58 {
    59     SDL_Surface *dst;
    60     int identity;
    61     Uint8 *table;
    62     SDL_blit sw_blit;
    63     struct private_swaccel *sw_data;
    64 
    65     /* the version count matches the destination; mismatch indicates
    66        an invalid mapping */
    67     unsigned int format_version;
    68 } SDL_BlitMap;
    69 
    70 #define SDL_BLIT_ANY                0x00000000
    71 #define SDL_BLIT_MMX                0x00000001
    72 #define SDL_BLIT_SSE                0x00000002
    73 #define SDL_BLIT_ALTIVEC_PREFETCH   0x00000004
    74 #define SDL_BLIT_ALTIVEC_NOPREFETCH 0x00000008
    75 
    76 typedef struct SDL_BlitEntry
    77 {
    78     Uint32 features;
    79     SDL_loblit blit;
    80 } SDL_BlitEntry;
    81 
    82 /* Functions found in SDL_blit.c */
    83 extern int SDL_CalculateBlit(SDL_Surface * surface);
    84 
    85 /* Functions found in SDL_blit_{0,1,N,A}.c */
    86 extern SDL_loblit SDL_CalculateBlit0(SDL_Surface * surface, int complex);
    87 extern SDL_loblit SDL_CalculateBlit1(SDL_Surface * surface, int complex);
    88 extern SDL_loblit SDL_CalculateBlitN(SDL_Surface * surface, int complex);
    89 extern SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface * surface, int complex);
    90 
    91 /*
    92  * Useful macros for blitting routines
    93  */
    94 
    95 #define FORMAT_EQUAL(A, B)						\
    96     ((A)->BitsPerPixel == (B)->BitsPerPixel				\
    97      && ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))
    98 
    99 /* Load pixel of the specified format from a buffer and get its R-G-B values */
   100 /* FIXME: rescale values to 0..255 here? */
   101 #define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
   102 {									\
   103 	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
   104 	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
   105 	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
   106 }
   107 #define RGB_FROM_RGB565(Pixel, r, g, b)					\
   108 {									\
   109 	r = (((Pixel&0xF800)>>11)<<3);		 			\
   110 	g = (((Pixel&0x07E0)>>5)<<2); 					\
   111 	b = ((Pixel&0x001F)<<3); 					\
   112 }
   113 #define RGB_FROM_RGB555(Pixel, r, g, b)					\
   114 {									\
   115 	r = (((Pixel&0x7C00)>>10)<<3);		 			\
   116 	g = (((Pixel&0x03E0)>>5)<<3); 					\
   117 	b = ((Pixel&0x001F)<<3); 					\
   118 }
   119 #define RGB_FROM_RGB888(Pixel, r, g, b)					\
   120 {									\
   121 	r = ((Pixel&0xFF0000)>>16);		 			\
   122 	g = ((Pixel&0xFF00)>>8);		 			\
   123 	b = (Pixel&0xFF);			 			\
   124 }
   125 #define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
   126 do {									   \
   127 	switch (bpp) {							   \
   128 		case 2:							   \
   129 			Pixel = *((Uint16 *)(buf));			   \
   130 		break;							   \
   131 									   \
   132 		case 3: {						   \
   133 		        Uint8 *B = (Uint8 *)(buf);			   \
   134 			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   135 			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
   136 			} else {					   \
   137 			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
   138 			}						   \
   139 		}							   \
   140 		break;							   \
   141 									   \
   142 		case 4:							   \
   143 			Pixel = *((Uint32 *)(buf));			   \
   144 		break;							   \
   145 									   \
   146 		default:						   \
   147 			Pixel = 0; /* appease gcc */			   \
   148 		break;							   \
   149 	}								   \
   150 } while(0)
   151 
   152 #define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
   153 do {									   \
   154 	switch (bpp) {							   \
   155 		case 2:							   \
   156 			Pixel = *((Uint16 *)(buf));			   \
   157 		break;							   \
   158 									   \
   159 		case 3: {						   \
   160 		        Uint8 *B = (Uint8 *)buf;			   \
   161 			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   162 			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
   163 			} else {					   \
   164 			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
   165 			}						   \
   166 		}							   \
   167 		break;							   \
   168 									   \
   169 		case 4:							   \
   170 			Pixel = *((Uint32 *)(buf));			   \
   171 		break;							   \
   172 									   \
   173 	        default:						   \
   174 		        Pixel = 0;	/* prevent gcc from complaining */ \
   175 		break;							   \
   176 	}								   \
   177 	RGB_FROM_PIXEL(Pixel, fmt, r, g, b);				   \
   178 } while(0)
   179 
   180 /* Assemble R-G-B values into a specified pixel format and store them */
   181 #define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
   182 {									\
   183 	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
   184 		((g>>fmt->Gloss)<<fmt->Gshift)|				\
   185 		((b>>fmt->Bloss)<<fmt->Bshift);				\
   186 }
   187 #define RGB565_FROM_RGB(Pixel, r, g, b)					\
   188 {									\
   189 	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
   190 }
   191 #define RGB555_FROM_RGB(Pixel, r, g, b)					\
   192 {									\
   193 	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
   194 }
   195 #define RGB888_FROM_RGB(Pixel, r, g, b)					\
   196 {									\
   197 	Pixel = (r<<16)|(g<<8)|b;					\
   198 }
   199 #define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
   200 {									\
   201 	switch (bpp) {							\
   202 		case 2: {						\
   203 			Uint16 Pixel;					\
   204 									\
   205 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   206 			*((Uint16 *)(buf)) = Pixel;			\
   207 		}							\
   208 		break;							\
   209 									\
   210 		case 3: {						\
   211                         if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   212 			        *((buf)+fmt->Rshift/8) = r;		\
   213 				*((buf)+fmt->Gshift/8) = g;		\
   214 				*((buf)+fmt->Bshift/8) = b;		\
   215 			} else {					\
   216 			        *((buf)+2-fmt->Rshift/8) = r;		\
   217 				*((buf)+2-fmt->Gshift/8) = g;		\
   218 				*((buf)+2-fmt->Bshift/8) = b;		\
   219 			}						\
   220 		}							\
   221 		break;							\
   222 									\
   223 		case 4: {						\
   224 			Uint32 Pixel;					\
   225 									\
   226 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   227 			*((Uint32 *)(buf)) = Pixel;			\
   228 		}							\
   229 		break;							\
   230 	}								\
   231 }
   232 #define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
   233 {									\
   234 	switch (bpp) {							\
   235 		case 2: {						\
   236 			Uint16 *bufp;					\
   237 			Uint16 Pixel;					\
   238 									\
   239 			bufp = (Uint16 *)buf;				\
   240 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   241 			*bufp = Pixel | (*bufp & Amask);		\
   242 		}							\
   243 		break;							\
   244 									\
   245 		case 3: {						\
   246                         if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   247 			        *((buf)+fmt->Rshift/8) = r;		\
   248 				*((buf)+fmt->Gshift/8) = g;		\
   249 				*((buf)+fmt->Bshift/8) = b;		\
   250 			} else {					\
   251 			        *((buf)+2-fmt->Rshift/8) = r;		\
   252 				*((buf)+2-fmt->Gshift/8) = g;		\
   253 				*((buf)+2-fmt->Bshift/8) = b;		\
   254 			}						\
   255 		}							\
   256 		break;							\
   257 									\
   258 		case 4: {						\
   259 			Uint32 *bufp;					\
   260 			Uint32 Pixel;					\
   261 									\
   262 			bufp = (Uint32 *)buf;				\
   263 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   264 			*bufp = Pixel | (*bufp & Amask);		\
   265 		}							\
   266 		break;							\
   267 	}								\
   268 }
   269 
   270 /* FIXME: Should we rescale alpha into 0..255 here? */
   271 #define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
   272 {									\
   273 	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
   274 	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
   275 	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
   276 	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
   277 }
   278 #define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
   279 {						\
   280 	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
   281 	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
   282 	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
   283 	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
   284 }
   285 #define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
   286 {									\
   287 	r = (Pixel>>24);						\
   288 	g = ((Pixel>>16)&0xFF);						\
   289 	b = ((Pixel>>8)&0xFF);						\
   290 	a = (Pixel&0xFF);						\
   291 }
   292 #define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
   293 {									\
   294 	r = ((Pixel>>16)&0xFF);						\
   295 	g = ((Pixel>>8)&0xFF);						\
   296 	b = (Pixel&0xFF);						\
   297 	a = (Pixel>>24);						\
   298 }
   299 #define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
   300 {									\
   301 	r = (Pixel&0xFF);						\
   302 	g = ((Pixel>>8)&0xFF);						\
   303 	b = ((Pixel>>16)&0xFF);						\
   304 	a = (Pixel>>24);						\
   305 }
   306 #define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
   307 do {									   \
   308 	switch (bpp) {							   \
   309 		case 2:							   \
   310 			Pixel = *((Uint16 *)(buf));			   \
   311 		break;							   \
   312 									   \
   313 		case 3:	{/* FIXME: broken code (no alpha) */		   \
   314 		        Uint8 *b = (Uint8 *)buf;			   \
   315 			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   316 			        Pixel = b[0] + (b[1] << 8) + (b[2] << 16); \
   317 			} else {					   \
   318 			        Pixel = (b[0] << 16) + (b[1] << 8) + b[2]; \
   319 			}						   \
   320 		}							   \
   321 		break;							   \
   322 									   \
   323 		case 4:							   \
   324 			Pixel = *((Uint32 *)(buf));			   \
   325 		break;							   \
   326 									   \
   327 		default:						   \
   328 		        Pixel = 0; /* stop gcc complaints */		   \
   329 		break;							   \
   330 	}								   \
   331 	RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);			   \
   332 	Pixel &= ~fmt->Amask;						   \
   333 } while(0)
   334 
   335 /* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
   336 #define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
   337 {									\
   338 	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
   339 		((g>>fmt->Gloss)<<fmt->Gshift)|				\
   340 		((b>>fmt->Bloss)<<fmt->Bshift)|				\
   341 		((a>>fmt->Aloss)<<fmt->Ashift);				\
   342 }
   343 #define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
   344 {									\
   345 	switch (bpp) {							\
   346 		case 2: {						\
   347 			Uint16 Pixel;					\
   348 									\
   349 			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
   350 			*((Uint16 *)(buf)) = Pixel;			\
   351 		}							\
   352 		break;							\
   353 									\
   354 		case 3: { /* FIXME: broken code (no alpha) */		\
   355                         if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   356 			        *((buf)+fmt->Rshift/8) = r;		\
   357 				*((buf)+fmt->Gshift/8) = g;		\
   358 				*((buf)+fmt->Bshift/8) = b;		\
   359 			} else {					\
   360 			        *((buf)+2-fmt->Rshift/8) = r;		\
   361 				*((buf)+2-fmt->Gshift/8) = g;		\
   362 				*((buf)+2-fmt->Bshift/8) = b;		\
   363 			}						\
   364 		}							\
   365 		break;							\
   366 									\
   367 		case 4: {						\
   368 			Uint32 Pixel;					\
   369 									\
   370 			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
   371 			*((Uint32 *)(buf)) = Pixel;			\
   372 		}							\
   373 		break;							\
   374 	}								\
   375 }
   376 
   377 /* Blend the RGB values of two Pixels based on a source alpha value */
   378 #define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
   379 do {						\
   380 	dR = (((sR-dR)*(A))>>8)+dR;		\
   381 	dG = (((sG-dG)*(A))>>8)+dG;		\
   382 	dB = (((sB-dB)*(A))>>8)+dB;		\
   383 } while(0)
   384 
   385 /* Blend the RGB values of two Pixels based on a source alpha value */
   386 #define ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB)	\
   387 do {						\
   388     unsigned tR, tG, tB, tA; \
   389     tA = 255 - sA; \
   390     tR = 1 + (sR * sA) + (dR * tA); \
   391     dR = (tR + (tR >> 8)) >> 8; \
   392     tG = 1 + (sG * sA) + (dG * tA); \
   393     dG = (tG + (tG >> 8)) >> 8; \
   394     tB = 1 + (sB * sA) + (dB * tA); \
   395     dB = (tB + (tB >> 8)) >> 8; \
   396 } while(0)
   397 
   398 
   399 /* This is a very useful loop for optimizing blitters */
   400 #if defined(_MSC_VER) && (_MSC_VER == 1300)
   401 /* There's a bug in the Visual C++ 7 optimizer when compiling this code */
   402 #else
   403 #define USE_DUFFS_LOOP
   404 #endif
   405 #ifdef USE_DUFFS_LOOP
   406 
   407 /* 8-times unrolled loop */
   408 #define DUFFS_LOOP8(pixel_copy_increment, width)			\
   409 { int n = (width+7)/8;							\
   410 	switch (width & 7) {						\
   411 	case 0: do {	pixel_copy_increment;				\
   412 	case 7:		pixel_copy_increment;				\
   413 	case 6:		pixel_copy_increment;				\
   414 	case 5:		pixel_copy_increment;				\
   415 	case 4:		pixel_copy_increment;				\
   416 	case 3:		pixel_copy_increment;				\
   417 	case 2:		pixel_copy_increment;				\
   418 	case 1:		pixel_copy_increment;				\
   419 		} while ( --n > 0 );					\
   420 	}								\
   421 }
   422 
   423 /* 4-times unrolled loop */
   424 #define DUFFS_LOOP4(pixel_copy_increment, width)			\
   425 { int n = (width+3)/4;							\
   426 	switch (width & 3) {						\
   427 	case 0: do {	pixel_copy_increment;				\
   428 	case 3:		pixel_copy_increment;				\
   429 	case 2:		pixel_copy_increment;				\
   430 	case 1:		pixel_copy_increment;				\
   431 		} while ( --n > 0 );					\
   432 	}								\
   433 }
   434 
   435 /* 2 - times unrolled loop */
   436 #define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
   437 				double_pixel_copy_increment, width)	\
   438 { int n, w = width;							\
   439 	if( w & 1 ) {							\
   440 	    pixel_copy_increment;					\
   441 	    w--;							\
   442 	}								\
   443 	if ( w > 0 )	{						\
   444 	    n = ( w + 2) / 4;						\
   445 	    switch( w & 2 ) {						\
   446 	    case 0: do {	double_pixel_copy_increment;		\
   447 	    case 2:		double_pixel_copy_increment;		\
   448 		    } while ( --n > 0 );					\
   449 	    }								\
   450 	}								\
   451 }
   452 
   453 /* 2 - times unrolled loop 4 pixels */
   454 #define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
   455 				double_pixel_copy_increment,		\
   456 				quatro_pixel_copy_increment, width)	\
   457 { int n, w = width;								\
   458         if(w & 1) {							\
   459 	  pixel_copy_increment;						\
   460 	  w--;								\
   461 	}								\
   462 	if(w & 2) {							\
   463 	  double_pixel_copy_increment;					\
   464 	  w -= 2;							\
   465 	}								\
   466 	if ( w > 0 ) {							\
   467 	    n = ( w + 7 ) / 8;						\
   468 	    switch( w & 4 ) {						\
   469 	    case 0: do {	quatro_pixel_copy_increment;		\
   470 	    case 4:		quatro_pixel_copy_increment;		\
   471 		    } while ( --n > 0 );					\
   472 	    }								\
   473 	}								\
   474 }
   475 
   476 /* Use the 8-times version of the loop by default */
   477 #define DUFFS_LOOP(pixel_copy_increment, width)				\
   478 	DUFFS_LOOP8(pixel_copy_increment, width)
   479 
   480 #else
   481 
   482 /* Don't use Duff's device to unroll loops */
   483 #define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
   484 			 double_pixel_copy_increment, width)		\
   485 { int n = width;								\
   486     if( n & 1 ) {							\
   487 	pixel_copy_increment;						\
   488 	n--;								\
   489     }									\
   490     n=n>>1;								\
   491     for(; n > 0; --n) {   						\
   492 	double_pixel_copy_increment;					\
   493     }									\
   494 }
   495 
   496 /* Don't use Duff's device to unroll loops */
   497 #define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
   498 				double_pixel_copy_increment,		\
   499 				quatro_pixel_copy_increment, width)	\
   500 { int n = width;								\
   501         if(n & 1) {							\
   502 	  pixel_copy_increment;						\
   503 	  n--;								\
   504 	}								\
   505 	if(n & 2) {							\
   506 	  double_pixel_copy_increment;					\
   507 	  n -= 2;							\
   508 	}								\
   509 	n=n>>2;								\
   510 	for(; n > 0; --n) {   						\
   511 	  quatro_pixel_copy_increment;					\
   512         }								\
   513 }
   514 
   515 /* Don't use Duff's device to unroll loops */
   516 #define DUFFS_LOOP(pixel_copy_increment, width)				\
   517 { int n;								\
   518 	for ( n=width; n > 0; --n ) {					\
   519 		pixel_copy_increment;					\
   520 	}								\
   521 }
   522 #define DUFFS_LOOP8(pixel_copy_increment, width)			\
   523 	DUFFS_LOOP(pixel_copy_increment, width)
   524 #define DUFFS_LOOP4(pixel_copy_increment, width)			\
   525 	DUFFS_LOOP(pixel_copy_increment, width)
   526 
   527 #endif /* USE_DUFFS_LOOP */
   528 
   529 /* Prevent Visual C++ 6.0 from printing out stupid warnings */
   530 #if defined(_MSC_VER) && (_MSC_VER >= 600)
   531 #pragma warning(disable: 4550)
   532 #endif
   533 
   534 #endif /* _SDL_blit_h */
   535 /* vi: set ts=4 sw=4 expandtab: */