src/video/SDL_blit.h
author Sam Lantinga <slouken@libsdl.org>
Fri, 17 Aug 2007 03:22:03 +0000
changeset 2261 c20476d7d7b3
parent 2260 202ddfd1cfb1
child 2262 bee005ace1bf
permissions -rw-r--r--
Enabled 3DNow! intrinsic support
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #ifndef _SDL_blit_h
    25 #define _SDL_blit_h
    26 
    27 #ifdef __MMX__
    28 #include <mmintrin.h>
    29 #endif
    30 #ifdef __3dNOW__
    31 #include <mm3dnow.h>
    32 #endif
    33 #ifdef __SSE__
    34 #include <xmmintrin.h>
    35 #endif
    36 
    37 #include "SDL_cpuinfo.h"
    38 #include "SDL_endian.h"
    39 
    40 /* The structure passed to the low level blit functions */
    41 typedef struct
    42 {
    43     Uint8 *s_pixels;
    44     int s_width;
    45     int s_height;
    46     int s_skip;
    47     Uint8 *d_pixels;
    48     int d_width;
    49     int d_height;
    50     int d_skip;
    51     SDL_PixelFormat *src;
    52     Uint8 *table;
    53     SDL_PixelFormat *dst;
    54     Uint32 ckey, cmod;
    55 } SDL_BlitInfo;
    56 
    57 /* The type definition for the low level blit functions */
    58 typedef void (*SDL_loblit) (SDL_BlitInfo * info);
    59 
    60 /* Blit mapping definition */
    61 typedef struct SDL_BlitMap
    62 {
    63     SDL_Surface *dst;
    64     int identity;
    65     Uint8 *table;
    66     SDL_blit blit;
    67     void *data;
    68     Uint32 ckey;                /* colorkey */
    69     Uint32 cmod;                /* ARGB modulation */
    70 
    71     /* the version count matches the destination; mismatch indicates
    72        an invalid mapping */
    73     unsigned int format_version;
    74 } SDL_BlitMap;
    75 
    76 #define SDL_BLIT_ANY                0x00000000
    77 #define SDL_BLIT_MMX                0x00000001
    78 #define SDL_BLIT_SSE                0x00000002
    79 #define SDL_BLIT_ALTIVEC_PREFETCH   0x00000004
    80 #define SDL_BLIT_ALTIVEC_NOPREFETCH 0x00000008
    81 
    82 typedef struct SDL_BlitEntry
    83 {
    84     Uint32 features;
    85     SDL_loblit blit;
    86 } SDL_BlitEntry;
    87 
    88 /* Functions found in SDL_blit.c */
    89 extern int SDL_CalculateBlit(SDL_Surface * surface);
    90 
    91 /* Functions found in SDL_blit_{0,1,N,A}.c */
    92 extern SDL_loblit SDL_CalculateBlit0(SDL_Surface * surface, int complex);
    93 extern SDL_loblit SDL_CalculateBlit1(SDL_Surface * surface, int complex);
    94 extern SDL_loblit SDL_CalculateBlitN(SDL_Surface * surface, int complex);
    95 extern SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface * surface, int complex);
    96 
    97 /*
    98  * Useful macros for blitting routines
    99  */
   100 
   101 #if defined(__GNUC__)
   102 #define DECLARE_ALIGNED(t,v,a)  t __attribute__((aligned(a))) v
   103 #elif defined(_MSC_VER)
   104 #define DECLARE_ALIGNED(t,v,a)  __declspec(align(a)) t v
   105 #else
   106 #define DECLARE_ALIGNED(t,v,a)  t v
   107 #endif
   108 
   109 #define FORMAT_EQUAL(A, B)						\
   110     ((A)->BitsPerPixel == (B)->BitsPerPixel				\
   111      && ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))
   112 
   113 /* Load pixel of the specified format from a buffer and get its R-G-B values */
   114 /* FIXME: rescale values to 0..255 here? */
   115 #define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
   116 {									\
   117 	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
   118 	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
   119 	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
   120 }
   121 #define RGB_FROM_RGB565(Pixel, r, g, b)					\
   122 {									\
   123 	r = (((Pixel&0xF800)>>11)<<3);		 			\
   124 	g = (((Pixel&0x07E0)>>5)<<2); 					\
   125 	b = ((Pixel&0x001F)<<3); 					\
   126 }
   127 #define RGB_FROM_RGB555(Pixel, r, g, b)					\
   128 {									\
   129 	r = (((Pixel&0x7C00)>>10)<<3);		 			\
   130 	g = (((Pixel&0x03E0)>>5)<<3); 					\
   131 	b = ((Pixel&0x001F)<<3); 					\
   132 }
   133 #define RGB_FROM_RGB888(Pixel, r, g, b)					\
   134 {									\
   135 	r = ((Pixel&0xFF0000)>>16);		 			\
   136 	g = ((Pixel&0xFF00)>>8);		 			\
   137 	b = (Pixel&0xFF);			 			\
   138 }
   139 #define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
   140 do {									   \
   141 	switch (bpp) {							   \
   142 		case 2:							   \
   143 			Pixel = *((Uint16 *)(buf));			   \
   144 		break;							   \
   145 									   \
   146 		case 3: {						   \
   147 		        Uint8 *B = (Uint8 *)(buf);			   \
   148 			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   149 			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
   150 			} else {					   \
   151 			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
   152 			}						   \
   153 		}							   \
   154 		break;							   \
   155 									   \
   156 		case 4:							   \
   157 			Pixel = *((Uint32 *)(buf));			   \
   158 		break;							   \
   159 									   \
   160 		default:						   \
   161 			Pixel = 0; /* appease gcc */			   \
   162 		break;							   \
   163 	}								   \
   164 } while(0)
   165 
   166 #define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
   167 do {									   \
   168 	switch (bpp) {							   \
   169 		case 2:							   \
   170 			Pixel = *((Uint16 *)(buf));			   \
   171 		break;							   \
   172 									   \
   173 		case 3: {						   \
   174 		        Uint8 *B = (Uint8 *)buf;			   \
   175 			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   176 			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
   177 			} else {					   \
   178 			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
   179 			}						   \
   180 		}							   \
   181 		break;							   \
   182 									   \
   183 		case 4:							   \
   184 			Pixel = *((Uint32 *)(buf));			   \
   185 		break;							   \
   186 									   \
   187 	        default:						   \
   188 		        Pixel = 0;	/* prevent gcc from complaining */ \
   189 		break;							   \
   190 	}								   \
   191 	RGB_FROM_PIXEL(Pixel, fmt, r, g, b);				   \
   192 } while(0)
   193 
   194 /* Assemble R-G-B values into a specified pixel format and store them */
   195 #define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
   196 {									\
   197 	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
   198 		((g>>fmt->Gloss)<<fmt->Gshift)|				\
   199 		((b>>fmt->Bloss)<<fmt->Bshift);				\
   200 }
   201 #define RGB565_FROM_RGB(Pixel, r, g, b)					\
   202 {									\
   203 	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
   204 }
   205 #define RGB555_FROM_RGB(Pixel, r, g, b)					\
   206 {									\
   207 	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
   208 }
   209 #define RGB888_FROM_RGB(Pixel, r, g, b)					\
   210 {									\
   211 	Pixel = (r<<16)|(g<<8)|b;					\
   212 }
   213 #define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
   214 {									\
   215 	switch (bpp) {							\
   216 		case 2: {						\
   217 			Uint16 Pixel;					\
   218 									\
   219 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   220 			*((Uint16 *)(buf)) = Pixel;			\
   221 		}							\
   222 		break;							\
   223 									\
   224 		case 3: {						\
   225                         if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   226 			        *((buf)+fmt->Rshift/8) = r;		\
   227 				*((buf)+fmt->Gshift/8) = g;		\
   228 				*((buf)+fmt->Bshift/8) = b;		\
   229 			} else {					\
   230 			        *((buf)+2-fmt->Rshift/8) = r;		\
   231 				*((buf)+2-fmt->Gshift/8) = g;		\
   232 				*((buf)+2-fmt->Bshift/8) = b;		\
   233 			}						\
   234 		}							\
   235 		break;							\
   236 									\
   237 		case 4: {						\
   238 			Uint32 Pixel;					\
   239 									\
   240 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   241 			*((Uint32 *)(buf)) = Pixel;			\
   242 		}							\
   243 		break;							\
   244 	}								\
   245 }
   246 #define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
   247 {									\
   248 	switch (bpp) {							\
   249 		case 2: {						\
   250 			Uint16 *bufp;					\
   251 			Uint16 Pixel;					\
   252 									\
   253 			bufp = (Uint16 *)buf;				\
   254 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   255 			*bufp = Pixel | (*bufp & Amask);		\
   256 		}							\
   257 		break;							\
   258 									\
   259 		case 3: {						\
   260                         if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   261 			        *((buf)+fmt->Rshift/8) = r;		\
   262 				*((buf)+fmt->Gshift/8) = g;		\
   263 				*((buf)+fmt->Bshift/8) = b;		\
   264 			} else {					\
   265 			        *((buf)+2-fmt->Rshift/8) = r;		\
   266 				*((buf)+2-fmt->Gshift/8) = g;		\
   267 				*((buf)+2-fmt->Bshift/8) = b;		\
   268 			}						\
   269 		}							\
   270 		break;							\
   271 									\
   272 		case 4: {						\
   273 			Uint32 *bufp;					\
   274 			Uint32 Pixel;					\
   275 									\
   276 			bufp = (Uint32 *)buf;				\
   277 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   278 			*bufp = Pixel | (*bufp & Amask);		\
   279 		}							\
   280 		break;							\
   281 	}								\
   282 }
   283 
   284 /* FIXME: Should we rescale alpha into 0..255 here? */
   285 #define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
   286 {									\
   287 	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
   288 	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
   289 	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
   290 	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
   291 }
   292 #define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
   293 {						\
   294 	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
   295 	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
   296 	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
   297 	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
   298 }
   299 #define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
   300 {									\
   301 	r = (Pixel>>24);						\
   302 	g = ((Pixel>>16)&0xFF);						\
   303 	b = ((Pixel>>8)&0xFF);						\
   304 	a = (Pixel&0xFF);						\
   305 }
   306 #define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
   307 {									\
   308 	r = ((Pixel>>16)&0xFF);						\
   309 	g = ((Pixel>>8)&0xFF);						\
   310 	b = (Pixel&0xFF);						\
   311 	a = (Pixel>>24);						\
   312 }
   313 #define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
   314 {									\
   315 	r = (Pixel&0xFF);						\
   316 	g = ((Pixel>>8)&0xFF);						\
   317 	b = ((Pixel>>16)&0xFF);						\
   318 	a = (Pixel>>24);						\
   319 }
   320 #define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
   321 do {									   \
   322 	switch (bpp) {							   \
   323 		case 2:							   \
   324 			Pixel = *((Uint16 *)(buf));			   \
   325 		break;							   \
   326 									   \
   327 		case 3:	{/* FIXME: broken code (no alpha) */		   \
   328 		        Uint8 *b = (Uint8 *)buf;			   \
   329 			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   330 			        Pixel = b[0] + (b[1] << 8) + (b[2] << 16); \
   331 			} else {					   \
   332 			        Pixel = (b[0] << 16) + (b[1] << 8) + b[2]; \
   333 			}						   \
   334 		}							   \
   335 		break;							   \
   336 									   \
   337 		case 4:							   \
   338 			Pixel = *((Uint32 *)(buf));			   \
   339 		break;							   \
   340 									   \
   341 		default:						   \
   342 		        Pixel = 0; /* stop gcc complaints */		   \
   343 		break;							   \
   344 	}								   \
   345 	RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);			   \
   346 	Pixel &= ~fmt->Amask;						   \
   347 } while(0)
   348 
   349 /* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
   350 #define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
   351 {									\
   352 	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
   353 		((g>>fmt->Gloss)<<fmt->Gshift)|				\
   354 		((b>>fmt->Bloss)<<fmt->Bshift)|				\
   355 		((a>>fmt->Aloss)<<fmt->Ashift);				\
   356 }
   357 #define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
   358 {									\
   359 	switch (bpp) {							\
   360 		case 2: {						\
   361 			Uint16 Pixel;					\
   362 									\
   363 			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
   364 			*((Uint16 *)(buf)) = Pixel;			\
   365 		}							\
   366 		break;							\
   367 									\
   368 		case 3: { /* FIXME: broken code (no alpha) */		\
   369                         if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   370 			        *((buf)+fmt->Rshift/8) = r;		\
   371 				*((buf)+fmt->Gshift/8) = g;		\
   372 				*((buf)+fmt->Bshift/8) = b;		\
   373 			} else {					\
   374 			        *((buf)+2-fmt->Rshift/8) = r;		\
   375 				*((buf)+2-fmt->Gshift/8) = g;		\
   376 				*((buf)+2-fmt->Bshift/8) = b;		\
   377 			}						\
   378 		}							\
   379 		break;							\
   380 									\
   381 		case 4: {						\
   382 			Uint32 Pixel;					\
   383 									\
   384 			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
   385 			*((Uint32 *)(buf)) = Pixel;			\
   386 		}							\
   387 		break;							\
   388 	}								\
   389 }
   390 
   391 /* Blend the RGB values of two Pixels based on a source alpha value */
   392 #define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
   393 do {						\
   394 	dR = (((sR-dR)*(A))>>8)+dR;		\
   395 	dG = (((sG-dG)*(A))>>8)+dG;		\
   396 	dB = (((sB-dB)*(A))>>8)+dB;		\
   397 } while(0)
   398 
   399 /* Blend the RGB values of two Pixels based on a source alpha value */
   400 #define ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB)	\
   401 do {						\
   402     unsigned tR, tG, tB, tA; \
   403     tA = 255 - sA; \
   404     tR = 1 + (sR * sA) + (dR * tA); \
   405     dR = (tR + (tR >> 8)) >> 8; \
   406     tG = 1 + (sG * sA) + (dG * tA); \
   407     dG = (tG + (tG >> 8)) >> 8; \
   408     tB = 1 + (sB * sA) + (dB * tA); \
   409     dB = (tB + (tB >> 8)) >> 8; \
   410 } while(0)
   411 
   412 
   413 /* This is a very useful loop for optimizing blitters */
   414 #if defined(_MSC_VER) && (_MSC_VER == 1300)
   415 /* There's a bug in the Visual C++ 7 optimizer when compiling this code */
   416 #else
   417 #define USE_DUFFS_LOOP
   418 #endif
   419 #ifdef USE_DUFFS_LOOP
   420 
   421 /* 8-times unrolled loop */
   422 #define DUFFS_LOOP8(pixel_copy_increment, width)			\
   423 { int n = (width+7)/8;							\
   424 	switch (width & 7) {						\
   425 	case 0: do {	pixel_copy_increment;				\
   426 	case 7:		pixel_copy_increment;				\
   427 	case 6:		pixel_copy_increment;				\
   428 	case 5:		pixel_copy_increment;				\
   429 	case 4:		pixel_copy_increment;				\
   430 	case 3:		pixel_copy_increment;				\
   431 	case 2:		pixel_copy_increment;				\
   432 	case 1:		pixel_copy_increment;				\
   433 		} while ( --n > 0 );					\
   434 	}								\
   435 }
   436 
   437 /* 4-times unrolled loop */
   438 #define DUFFS_LOOP4(pixel_copy_increment, width)			\
   439 { int n = (width+3)/4;							\
   440 	switch (width & 3) {						\
   441 	case 0: do {	pixel_copy_increment;				\
   442 	case 3:		pixel_copy_increment;				\
   443 	case 2:		pixel_copy_increment;				\
   444 	case 1:		pixel_copy_increment;				\
   445 		} while ( --n > 0 );					\
   446 	}								\
   447 }
   448 
   449 /* 2 - times unrolled loop */
   450 #define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
   451 				double_pixel_copy_increment, width)	\
   452 { int n, w = width;							\
   453 	if( w & 1 ) {							\
   454 	    pixel_copy_increment;					\
   455 	    w--;							\
   456 	}								\
   457 	if ( w > 0 )	{						\
   458 	    n = ( w + 2) / 4;						\
   459 	    switch( w & 2 ) {						\
   460 	    case 0: do {	double_pixel_copy_increment;		\
   461 	    case 2:		double_pixel_copy_increment;		\
   462 		    } while ( --n > 0 );					\
   463 	    }								\
   464 	}								\
   465 }
   466 
   467 /* 2 - times unrolled loop 4 pixels */
   468 #define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
   469 				double_pixel_copy_increment,		\
   470 				quatro_pixel_copy_increment, width)	\
   471 { int n, w = width;								\
   472         if(w & 1) {							\
   473 	  pixel_copy_increment;						\
   474 	  w--;								\
   475 	}								\
   476 	if(w & 2) {							\
   477 	  double_pixel_copy_increment;					\
   478 	  w -= 2;							\
   479 	}								\
   480 	if ( w > 0 ) {							\
   481 	    n = ( w + 7 ) / 8;						\
   482 	    switch( w & 4 ) {						\
   483 	    case 0: do {	quatro_pixel_copy_increment;		\
   484 	    case 4:		quatro_pixel_copy_increment;		\
   485 		    } while ( --n > 0 );					\
   486 	    }								\
   487 	}								\
   488 }
   489 
   490 /* Use the 8-times version of the loop by default */
   491 #define DUFFS_LOOP(pixel_copy_increment, width)				\
   492 	DUFFS_LOOP8(pixel_copy_increment, width)
   493 
   494 #else
   495 
   496 /* Don't use Duff's device to unroll loops */
   497 #define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
   498 			 double_pixel_copy_increment, width)		\
   499 { int n = width;								\
   500     if( n & 1 ) {							\
   501 	pixel_copy_increment;						\
   502 	n--;								\
   503     }									\
   504     n=n>>1;								\
   505     for(; n > 0; --n) {   						\
   506 	double_pixel_copy_increment;					\
   507     }									\
   508 }
   509 
   510 /* Don't use Duff's device to unroll loops */
   511 #define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
   512 				double_pixel_copy_increment,		\
   513 				quatro_pixel_copy_increment, width)	\
   514 { int n = width;								\
   515         if(n & 1) {							\
   516 	  pixel_copy_increment;						\
   517 	  n--;								\
   518 	}								\
   519 	if(n & 2) {							\
   520 	  double_pixel_copy_increment;					\
   521 	  n -= 2;							\
   522 	}								\
   523 	n=n>>2;								\
   524 	for(; n > 0; --n) {   						\
   525 	  quatro_pixel_copy_increment;					\
   526         }								\
   527 }
   528 
   529 /* Don't use Duff's device to unroll loops */
   530 #define DUFFS_LOOP(pixel_copy_increment, width)				\
   531 { int n;								\
   532 	for ( n=width; n > 0; --n ) {					\
   533 		pixel_copy_increment;					\
   534 	}								\
   535 }
   536 #define DUFFS_LOOP8(pixel_copy_increment, width)			\
   537 	DUFFS_LOOP(pixel_copy_increment, width)
   538 #define DUFFS_LOOP4(pixel_copy_increment, width)			\
   539 	DUFFS_LOOP(pixel_copy_increment, width)
   540 
   541 #endif /* USE_DUFFS_LOOP */
   542 
   543 /* Prevent Visual C++ 6.0 from printing out stupid warnings */
   544 #if defined(_MSC_VER) && (_MSC_VER >= 600)
   545 #pragma warning(disable: 4550)
   546 #endif
   547 
   548 #endif /* _SDL_blit_h */
   549 /* vi: set ts=4 sw=4 expandtab: */