src/video/SDL_blit.h
author Sam Lantinga <slouken@libsdl.org>
Fri, 17 Aug 2007 00:54:53 +0000
changeset 2257 340942cfda48
parent 2251 292bee385630
child 2260 202ddfd1cfb1
permissions -rw-r--r--
Moved the colorkey and per-surface alpha into the blit info,
in preparation for support for general color channel modulation.

Removed and consolidated some data in the blit info.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #ifndef _SDL_blit_h
    25 #define _SDL_blit_h
    26 
    27 #ifdef __MMX__
    28 #include <mmintrin.h>
    29 #endif
    30 #ifdef __SSE__
    31 #include <xmmintrin.h>
    32 #endif
    33 
    34 #include "SDL_cpuinfo.h"
    35 #include "SDL_endian.h"
    36 
    37 /* The structure passed to the low level blit functions */
    38 typedef struct
    39 {
    40     Uint8 *s_pixels;
    41     int s_width;
    42     int s_height;
    43     int s_skip;
    44     Uint8 *d_pixels;
    45     int d_width;
    46     int d_height;
    47     int d_skip;
    48     SDL_PixelFormat *src;
    49     Uint8 *table;
    50     SDL_PixelFormat *dst;
    51     Uint32 ckey, cmod;
    52 } SDL_BlitInfo;
    53 
    54 /* The type definition for the low level blit functions */
    55 typedef void (*SDL_loblit) (SDL_BlitInfo * info);
    56 
    57 /* Blit mapping definition */
    58 typedef struct SDL_BlitMap
    59 {
    60     SDL_Surface *dst;
    61     int identity;
    62     Uint8 *table;
    63     SDL_blit blit;
    64     void *data;
    65     Uint32 ckey;    /* colorkey */
    66     Uint32 cmod;    /* ARGB modulation */
    67 
    68     /* the version count matches the destination; mismatch indicates
    69        an invalid mapping */
    70     unsigned int format_version;
    71 } SDL_BlitMap;
    72 
    73 #define SDL_BLIT_ANY                0x00000000
    74 #define SDL_BLIT_MMX                0x00000001
    75 #define SDL_BLIT_SSE                0x00000002
    76 #define SDL_BLIT_ALTIVEC_PREFETCH   0x00000004
    77 #define SDL_BLIT_ALTIVEC_NOPREFETCH 0x00000008
    78 
    79 typedef struct SDL_BlitEntry
    80 {
    81     Uint32 features;
    82     SDL_loblit blit;
    83 } SDL_BlitEntry;
    84 
    85 /* Functions found in SDL_blit.c */
    86 extern int SDL_CalculateBlit(SDL_Surface * surface);
    87 
    88 /* Functions found in SDL_blit_{0,1,N,A}.c */
    89 extern SDL_loblit SDL_CalculateBlit0(SDL_Surface * surface, int complex);
    90 extern SDL_loblit SDL_CalculateBlit1(SDL_Surface * surface, int complex);
    91 extern SDL_loblit SDL_CalculateBlitN(SDL_Surface * surface, int complex);
    92 extern SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface * surface, int complex);
    93 
    94 /*
    95  * Useful macros for blitting routines
    96  */
    97 
    98 #if defined(__GNUC__)
    99 #define DECLARE_ALIGNED(t,v,a)  t __attribute__((aligned(a))) v
   100 #elif defined(_MSC_VER)
   101 #define DECLARE_ALIGNED(t,v,a)  __declspec(align(a)) t v
   102 #else
   103 #define DECLARE_ALIGNED(t,v,a)  t v
   104 #endif
   105 
   106 #define FORMAT_EQUAL(A, B)						\
   107     ((A)->BitsPerPixel == (B)->BitsPerPixel				\
   108      && ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))
   109 
   110 /* Load pixel of the specified format from a buffer and get its R-G-B values */
   111 /* FIXME: rescale values to 0..255 here? */
   112 #define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
   113 {									\
   114 	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
   115 	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
   116 	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
   117 }
   118 #define RGB_FROM_RGB565(Pixel, r, g, b)					\
   119 {									\
   120 	r = (((Pixel&0xF800)>>11)<<3);		 			\
   121 	g = (((Pixel&0x07E0)>>5)<<2); 					\
   122 	b = ((Pixel&0x001F)<<3); 					\
   123 }
   124 #define RGB_FROM_RGB555(Pixel, r, g, b)					\
   125 {									\
   126 	r = (((Pixel&0x7C00)>>10)<<3);		 			\
   127 	g = (((Pixel&0x03E0)>>5)<<3); 					\
   128 	b = ((Pixel&0x001F)<<3); 					\
   129 }
   130 #define RGB_FROM_RGB888(Pixel, r, g, b)					\
   131 {									\
   132 	r = ((Pixel&0xFF0000)>>16);		 			\
   133 	g = ((Pixel&0xFF00)>>8);		 			\
   134 	b = (Pixel&0xFF);			 			\
   135 }
   136 #define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
   137 do {									   \
   138 	switch (bpp) {							   \
   139 		case 2:							   \
   140 			Pixel = *((Uint16 *)(buf));			   \
   141 		break;							   \
   142 									   \
   143 		case 3: {						   \
   144 		        Uint8 *B = (Uint8 *)(buf);			   \
   145 			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   146 			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
   147 			} else {					   \
   148 			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
   149 			}						   \
   150 		}							   \
   151 		break;							   \
   152 									   \
   153 		case 4:							   \
   154 			Pixel = *((Uint32 *)(buf));			   \
   155 		break;							   \
   156 									   \
   157 		default:						   \
   158 			Pixel = 0; /* appease gcc */			   \
   159 		break;							   \
   160 	}								   \
   161 } while(0)
   162 
   163 #define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
   164 do {									   \
   165 	switch (bpp) {							   \
   166 		case 2:							   \
   167 			Pixel = *((Uint16 *)(buf));			   \
   168 		break;							   \
   169 									   \
   170 		case 3: {						   \
   171 		        Uint8 *B = (Uint8 *)buf;			   \
   172 			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   173 			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
   174 			} else {					   \
   175 			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
   176 			}						   \
   177 		}							   \
   178 		break;							   \
   179 									   \
   180 		case 4:							   \
   181 			Pixel = *((Uint32 *)(buf));			   \
   182 		break;							   \
   183 									   \
   184 	        default:						   \
   185 		        Pixel = 0;	/* prevent gcc from complaining */ \
   186 		break;							   \
   187 	}								   \
   188 	RGB_FROM_PIXEL(Pixel, fmt, r, g, b);				   \
   189 } while(0)
   190 
   191 /* Assemble R-G-B values into a specified pixel format and store them */
   192 #define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
   193 {									\
   194 	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
   195 		((g>>fmt->Gloss)<<fmt->Gshift)|				\
   196 		((b>>fmt->Bloss)<<fmt->Bshift);				\
   197 }
   198 #define RGB565_FROM_RGB(Pixel, r, g, b)					\
   199 {									\
   200 	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
   201 }
   202 #define RGB555_FROM_RGB(Pixel, r, g, b)					\
   203 {									\
   204 	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
   205 }
   206 #define RGB888_FROM_RGB(Pixel, r, g, b)					\
   207 {									\
   208 	Pixel = (r<<16)|(g<<8)|b;					\
   209 }
   210 #define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
   211 {									\
   212 	switch (bpp) {							\
   213 		case 2: {						\
   214 			Uint16 Pixel;					\
   215 									\
   216 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   217 			*((Uint16 *)(buf)) = Pixel;			\
   218 		}							\
   219 		break;							\
   220 									\
   221 		case 3: {						\
   222                         if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   223 			        *((buf)+fmt->Rshift/8) = r;		\
   224 				*((buf)+fmt->Gshift/8) = g;		\
   225 				*((buf)+fmt->Bshift/8) = b;		\
   226 			} else {					\
   227 			        *((buf)+2-fmt->Rshift/8) = r;		\
   228 				*((buf)+2-fmt->Gshift/8) = g;		\
   229 				*((buf)+2-fmt->Bshift/8) = b;		\
   230 			}						\
   231 		}							\
   232 		break;							\
   233 									\
   234 		case 4: {						\
   235 			Uint32 Pixel;					\
   236 									\
   237 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   238 			*((Uint32 *)(buf)) = Pixel;			\
   239 		}							\
   240 		break;							\
   241 	}								\
   242 }
   243 #define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
   244 {									\
   245 	switch (bpp) {							\
   246 		case 2: {						\
   247 			Uint16 *bufp;					\
   248 			Uint16 Pixel;					\
   249 									\
   250 			bufp = (Uint16 *)buf;				\
   251 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   252 			*bufp = Pixel | (*bufp & Amask);		\
   253 		}							\
   254 		break;							\
   255 									\
   256 		case 3: {						\
   257                         if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   258 			        *((buf)+fmt->Rshift/8) = r;		\
   259 				*((buf)+fmt->Gshift/8) = g;		\
   260 				*((buf)+fmt->Bshift/8) = b;		\
   261 			} else {					\
   262 			        *((buf)+2-fmt->Rshift/8) = r;		\
   263 				*((buf)+2-fmt->Gshift/8) = g;		\
   264 				*((buf)+2-fmt->Bshift/8) = b;		\
   265 			}						\
   266 		}							\
   267 		break;							\
   268 									\
   269 		case 4: {						\
   270 			Uint32 *bufp;					\
   271 			Uint32 Pixel;					\
   272 									\
   273 			bufp = (Uint32 *)buf;				\
   274 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   275 			*bufp = Pixel | (*bufp & Amask);		\
   276 		}							\
   277 		break;							\
   278 	}								\
   279 }
   280 
   281 /* FIXME: Should we rescale alpha into 0..255 here? */
   282 #define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
   283 {									\
   284 	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
   285 	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
   286 	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
   287 	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
   288 }
   289 #define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
   290 {						\
   291 	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
   292 	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
   293 	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
   294 	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
   295 }
   296 #define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
   297 {									\
   298 	r = (Pixel>>24);						\
   299 	g = ((Pixel>>16)&0xFF);						\
   300 	b = ((Pixel>>8)&0xFF);						\
   301 	a = (Pixel&0xFF);						\
   302 }
   303 #define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
   304 {									\
   305 	r = ((Pixel>>16)&0xFF);						\
   306 	g = ((Pixel>>8)&0xFF);						\
   307 	b = (Pixel&0xFF);						\
   308 	a = (Pixel>>24);						\
   309 }
   310 #define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
   311 {									\
   312 	r = (Pixel&0xFF);						\
   313 	g = ((Pixel>>8)&0xFF);						\
   314 	b = ((Pixel>>16)&0xFF);						\
   315 	a = (Pixel>>24);						\
   316 }
   317 #define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
   318 do {									   \
   319 	switch (bpp) {							   \
   320 		case 2:							   \
   321 			Pixel = *((Uint16 *)(buf));			   \
   322 		break;							   \
   323 									   \
   324 		case 3:	{/* FIXME: broken code (no alpha) */		   \
   325 		        Uint8 *b = (Uint8 *)buf;			   \
   326 			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   327 			        Pixel = b[0] + (b[1] << 8) + (b[2] << 16); \
   328 			} else {					   \
   329 			        Pixel = (b[0] << 16) + (b[1] << 8) + b[2]; \
   330 			}						   \
   331 		}							   \
   332 		break;							   \
   333 									   \
   334 		case 4:							   \
   335 			Pixel = *((Uint32 *)(buf));			   \
   336 		break;							   \
   337 									   \
   338 		default:						   \
   339 		        Pixel = 0; /* stop gcc complaints */		   \
   340 		break;							   \
   341 	}								   \
   342 	RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);			   \
   343 	Pixel &= ~fmt->Amask;						   \
   344 } while(0)
   345 
   346 /* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
   347 #define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
   348 {									\
   349 	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
   350 		((g>>fmt->Gloss)<<fmt->Gshift)|				\
   351 		((b>>fmt->Bloss)<<fmt->Bshift)|				\
   352 		((a>>fmt->Aloss)<<fmt->Ashift);				\
   353 }
   354 #define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
   355 {									\
   356 	switch (bpp) {							\
   357 		case 2: {						\
   358 			Uint16 Pixel;					\
   359 									\
   360 			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
   361 			*((Uint16 *)(buf)) = Pixel;			\
   362 		}							\
   363 		break;							\
   364 									\
   365 		case 3: { /* FIXME: broken code (no alpha) */		\
   366                         if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   367 			        *((buf)+fmt->Rshift/8) = r;		\
   368 				*((buf)+fmt->Gshift/8) = g;		\
   369 				*((buf)+fmt->Bshift/8) = b;		\
   370 			} else {					\
   371 			        *((buf)+2-fmt->Rshift/8) = r;		\
   372 				*((buf)+2-fmt->Gshift/8) = g;		\
   373 				*((buf)+2-fmt->Bshift/8) = b;		\
   374 			}						\
   375 		}							\
   376 		break;							\
   377 									\
   378 		case 4: {						\
   379 			Uint32 Pixel;					\
   380 									\
   381 			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
   382 			*((Uint32 *)(buf)) = Pixel;			\
   383 		}							\
   384 		break;							\
   385 	}								\
   386 }
   387 
   388 /* Blend the RGB values of two Pixels based on a source alpha value */
   389 #define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
   390 do {						\
   391 	dR = (((sR-dR)*(A))>>8)+dR;		\
   392 	dG = (((sG-dG)*(A))>>8)+dG;		\
   393 	dB = (((sB-dB)*(A))>>8)+dB;		\
   394 } while(0)
   395 
   396 /* Blend the RGB values of two Pixels based on a source alpha value */
   397 #define ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB)	\
   398 do {						\
   399     unsigned tR, tG, tB, tA; \
   400     tA = 255 - sA; \
   401     tR = 1 + (sR * sA) + (dR * tA); \
   402     dR = (tR + (tR >> 8)) >> 8; \
   403     tG = 1 + (sG * sA) + (dG * tA); \
   404     dG = (tG + (tG >> 8)) >> 8; \
   405     tB = 1 + (sB * sA) + (dB * tA); \
   406     dB = (tB + (tB >> 8)) >> 8; \
   407 } while(0)
   408 
   409 
   410 /* This is a very useful loop for optimizing blitters */
   411 #if defined(_MSC_VER) && (_MSC_VER == 1300)
   412 /* There's a bug in the Visual C++ 7 optimizer when compiling this code */
   413 #else
   414 #define USE_DUFFS_LOOP
   415 #endif
   416 #ifdef USE_DUFFS_LOOP
   417 
   418 /* 8-times unrolled loop */
   419 #define DUFFS_LOOP8(pixel_copy_increment, width)			\
   420 { int n = (width+7)/8;							\
   421 	switch (width & 7) {						\
   422 	case 0: do {	pixel_copy_increment;				\
   423 	case 7:		pixel_copy_increment;				\
   424 	case 6:		pixel_copy_increment;				\
   425 	case 5:		pixel_copy_increment;				\
   426 	case 4:		pixel_copy_increment;				\
   427 	case 3:		pixel_copy_increment;				\
   428 	case 2:		pixel_copy_increment;				\
   429 	case 1:		pixel_copy_increment;				\
   430 		} while ( --n > 0 );					\
   431 	}								\
   432 }
   433 
   434 /* 4-times unrolled loop */
   435 #define DUFFS_LOOP4(pixel_copy_increment, width)			\
   436 { int n = (width+3)/4;							\
   437 	switch (width & 3) {						\
   438 	case 0: do {	pixel_copy_increment;				\
   439 	case 3:		pixel_copy_increment;				\
   440 	case 2:		pixel_copy_increment;				\
   441 	case 1:		pixel_copy_increment;				\
   442 		} while ( --n > 0 );					\
   443 	}								\
   444 }
   445 
   446 /* 2 - times unrolled loop */
   447 #define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
   448 				double_pixel_copy_increment, width)	\
   449 { int n, w = width;							\
   450 	if( w & 1 ) {							\
   451 	    pixel_copy_increment;					\
   452 	    w--;							\
   453 	}								\
   454 	if ( w > 0 )	{						\
   455 	    n = ( w + 2) / 4;						\
   456 	    switch( w & 2 ) {						\
   457 	    case 0: do {	double_pixel_copy_increment;		\
   458 	    case 2:		double_pixel_copy_increment;		\
   459 		    } while ( --n > 0 );					\
   460 	    }								\
   461 	}								\
   462 }
   463 
   464 /* 2 - times unrolled loop 4 pixels */
   465 #define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
   466 				double_pixel_copy_increment,		\
   467 				quatro_pixel_copy_increment, width)	\
   468 { int n, w = width;								\
   469         if(w & 1) {							\
   470 	  pixel_copy_increment;						\
   471 	  w--;								\
   472 	}								\
   473 	if(w & 2) {							\
   474 	  double_pixel_copy_increment;					\
   475 	  w -= 2;							\
   476 	}								\
   477 	if ( w > 0 ) {							\
   478 	    n = ( w + 7 ) / 8;						\
   479 	    switch( w & 4 ) {						\
   480 	    case 0: do {	quatro_pixel_copy_increment;		\
   481 	    case 4:		quatro_pixel_copy_increment;		\
   482 		    } while ( --n > 0 );					\
   483 	    }								\
   484 	}								\
   485 }
   486 
   487 /* Use the 8-times version of the loop by default */
   488 #define DUFFS_LOOP(pixel_copy_increment, width)				\
   489 	DUFFS_LOOP8(pixel_copy_increment, width)
   490 
   491 #else
   492 
   493 /* Don't use Duff's device to unroll loops */
   494 #define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
   495 			 double_pixel_copy_increment, width)		\
   496 { int n = width;								\
   497     if( n & 1 ) {							\
   498 	pixel_copy_increment;						\
   499 	n--;								\
   500     }									\
   501     n=n>>1;								\
   502     for(; n > 0; --n) {   						\
   503 	double_pixel_copy_increment;					\
   504     }									\
   505 }
   506 
   507 /* Don't use Duff's device to unroll loops */
   508 #define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
   509 				double_pixel_copy_increment,		\
   510 				quatro_pixel_copy_increment, width)	\
   511 { int n = width;								\
   512         if(n & 1) {							\
   513 	  pixel_copy_increment;						\
   514 	  n--;								\
   515 	}								\
   516 	if(n & 2) {							\
   517 	  double_pixel_copy_increment;					\
   518 	  n -= 2;							\
   519 	}								\
   520 	n=n>>2;								\
   521 	for(; n > 0; --n) {   						\
   522 	  quatro_pixel_copy_increment;					\
   523         }								\
   524 }
   525 
   526 /* Don't use Duff's device to unroll loops */
   527 #define DUFFS_LOOP(pixel_copy_increment, width)				\
   528 { int n;								\
   529 	for ( n=width; n > 0; --n ) {					\
   530 		pixel_copy_increment;					\
   531 	}								\
   532 }
   533 #define DUFFS_LOOP8(pixel_copy_increment, width)			\
   534 	DUFFS_LOOP(pixel_copy_increment, width)
   535 #define DUFFS_LOOP4(pixel_copy_increment, width)			\
   536 	DUFFS_LOOP(pixel_copy_increment, width)
   537 
   538 #endif /* USE_DUFFS_LOOP */
   539 
   540 /* Prevent Visual C++ 6.0 from printing out stupid warnings */
   541 #if defined(_MSC_VER) && (_MSC_VER >= 600)
   542 #pragma warning(disable: 4550)
   543 #endif
   544 
   545 #endif /* _SDL_blit_h */
   546 /* vi: set ts=4 sw=4 expandtab: */