src/video/SDL_blit.h
author Sam Lantinga <slouken@libsdl.org>
Thu, 16 Aug 2007 06:37:22 +0000
changeset 2251 292bee385630
parent 2250 e1d228456537
child 2257 340942cfda48
permissions -rw-r--r--
SSE and MMX intrinsics work with Visual Studio now...
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #ifndef _SDL_blit_h
    25 #define _SDL_blit_h
    26 
    27 #ifdef __MMX__
    28 #include <mmintrin.h>
    29 #endif
    30 #ifdef __SSE__
    31 #include <xmmintrin.h>
    32 #endif
    33 
    34 #include "SDL_cpuinfo.h"
    35 #include "SDL_endian.h"
    36 
    37 /* The structure passed to the low level blit functions */
    38 typedef struct
    39 {
    40     Uint8 *s_pixels;
    41     int s_width;
    42     int s_height;
    43     int s_skip;
    44     Uint8 *d_pixels;
    45     int d_width;
    46     int d_height;
    47     int d_skip;
    48     void *aux_data;
    49     SDL_PixelFormat *src;
    50     Uint8 *table;
    51     SDL_PixelFormat *dst;
    52 } SDL_BlitInfo;
    53 
    54 /* The type definition for the low level blit functions */
    55 typedef void (*SDL_loblit) (SDL_BlitInfo * info);
    56 
    57 /* This is the private info structure for software accelerated blits */
    58 struct private_swaccel
    59 {
    60     SDL_loblit blit;
    61     void *aux_data;
    62 };
    63 
    64 /* Blit mapping definition */
    65 typedef struct SDL_BlitMap
    66 {
    67     SDL_Surface *dst;
    68     int identity;
    69     Uint8 *table;
    70     SDL_blit sw_blit;
    71     struct private_swaccel *sw_data;
    72 
    73     /* the version count matches the destination; mismatch indicates
    74        an invalid mapping */
    75     unsigned int format_version;
    76 } SDL_BlitMap;
    77 
    78 #define SDL_BLIT_ANY                0x00000000
    79 #define SDL_BLIT_MMX                0x00000001
    80 #define SDL_BLIT_SSE                0x00000002
    81 #define SDL_BLIT_ALTIVEC_PREFETCH   0x00000004
    82 #define SDL_BLIT_ALTIVEC_NOPREFETCH 0x00000008
    83 
    84 typedef struct SDL_BlitEntry
    85 {
    86     Uint32 features;
    87     SDL_loblit blit;
    88 } SDL_BlitEntry;
    89 
    90 /* Functions found in SDL_blit.c */
    91 extern int SDL_CalculateBlit(SDL_Surface * surface);
    92 
    93 /* Functions found in SDL_blit_{0,1,N,A}.c */
    94 extern SDL_loblit SDL_CalculateBlit0(SDL_Surface * surface, int complex);
    95 extern SDL_loblit SDL_CalculateBlit1(SDL_Surface * surface, int complex);
    96 extern SDL_loblit SDL_CalculateBlitN(SDL_Surface * surface, int complex);
    97 extern SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface * surface, int complex);
    98 
    99 /*
   100  * Useful macros for blitting routines
   101  */
   102 
   103 #if defined(__GNUC__)
   104 #define DECLARE_ALIGNED(t,v,a)  t __attribute__((aligned(a))) v
   105 #elif defined(_MSC_VER)
   106 #define DECLARE_ALIGNED(t,v,a)  __declspec(align(a)) t v
   107 #else
   108 #define DECLARE_ALIGNED(t,v,a)  t v
   109 #endif
   110 
   111 #define FORMAT_EQUAL(A, B)						\
   112     ((A)->BitsPerPixel == (B)->BitsPerPixel				\
   113      && ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))
   114 
   115 /* Load pixel of the specified format from a buffer and get its R-G-B values */
   116 /* FIXME: rescale values to 0..255 here? */
   117 #define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
   118 {									\
   119 	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
   120 	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
   121 	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
   122 }
   123 #define RGB_FROM_RGB565(Pixel, r, g, b)					\
   124 {									\
   125 	r = (((Pixel&0xF800)>>11)<<3);		 			\
   126 	g = (((Pixel&0x07E0)>>5)<<2); 					\
   127 	b = ((Pixel&0x001F)<<3); 					\
   128 }
   129 #define RGB_FROM_RGB555(Pixel, r, g, b)					\
   130 {									\
   131 	r = (((Pixel&0x7C00)>>10)<<3);		 			\
   132 	g = (((Pixel&0x03E0)>>5)<<3); 					\
   133 	b = ((Pixel&0x001F)<<3); 					\
   134 }
   135 #define RGB_FROM_RGB888(Pixel, r, g, b)					\
   136 {									\
   137 	r = ((Pixel&0xFF0000)>>16);		 			\
   138 	g = ((Pixel&0xFF00)>>8);		 			\
   139 	b = (Pixel&0xFF);			 			\
   140 }
   141 #define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
   142 do {									   \
   143 	switch (bpp) {							   \
   144 		case 2:							   \
   145 			Pixel = *((Uint16 *)(buf));			   \
   146 		break;							   \
   147 									   \
   148 		case 3: {						   \
   149 		        Uint8 *B = (Uint8 *)(buf);			   \
   150 			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   151 			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
   152 			} else {					   \
   153 			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
   154 			}						   \
   155 		}							   \
   156 		break;							   \
   157 									   \
   158 		case 4:							   \
   159 			Pixel = *((Uint32 *)(buf));			   \
   160 		break;							   \
   161 									   \
   162 		default:						   \
   163 			Pixel = 0; /* appease gcc */			   \
   164 		break;							   \
   165 	}								   \
   166 } while(0)
   167 
   168 #define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
   169 do {									   \
   170 	switch (bpp) {							   \
   171 		case 2:							   \
   172 			Pixel = *((Uint16 *)(buf));			   \
   173 		break;							   \
   174 									   \
   175 		case 3: {						   \
   176 		        Uint8 *B = (Uint8 *)buf;			   \
   177 			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   178 			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
   179 			} else {					   \
   180 			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
   181 			}						   \
   182 		}							   \
   183 		break;							   \
   184 									   \
   185 		case 4:							   \
   186 			Pixel = *((Uint32 *)(buf));			   \
   187 		break;							   \
   188 									   \
   189 	        default:						   \
   190 		        Pixel = 0;	/* prevent gcc from complaining */ \
   191 		break;							   \
   192 	}								   \
   193 	RGB_FROM_PIXEL(Pixel, fmt, r, g, b);				   \
   194 } while(0)
   195 
   196 /* Assemble R-G-B values into a specified pixel format and store them */
   197 #define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
   198 {									\
   199 	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
   200 		((g>>fmt->Gloss)<<fmt->Gshift)|				\
   201 		((b>>fmt->Bloss)<<fmt->Bshift);				\
   202 }
   203 #define RGB565_FROM_RGB(Pixel, r, g, b)					\
   204 {									\
   205 	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
   206 }
   207 #define RGB555_FROM_RGB(Pixel, r, g, b)					\
   208 {									\
   209 	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
   210 }
   211 #define RGB888_FROM_RGB(Pixel, r, g, b)					\
   212 {									\
   213 	Pixel = (r<<16)|(g<<8)|b;					\
   214 }
   215 #define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
   216 {									\
   217 	switch (bpp) {							\
   218 		case 2: {						\
   219 			Uint16 Pixel;					\
   220 									\
   221 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   222 			*((Uint16 *)(buf)) = Pixel;			\
   223 		}							\
   224 		break;							\
   225 									\
   226 		case 3: {						\
   227                         if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   228 			        *((buf)+fmt->Rshift/8) = r;		\
   229 				*((buf)+fmt->Gshift/8) = g;		\
   230 				*((buf)+fmt->Bshift/8) = b;		\
   231 			} else {					\
   232 			        *((buf)+2-fmt->Rshift/8) = r;		\
   233 				*((buf)+2-fmt->Gshift/8) = g;		\
   234 				*((buf)+2-fmt->Bshift/8) = b;		\
   235 			}						\
   236 		}							\
   237 		break;							\
   238 									\
   239 		case 4: {						\
   240 			Uint32 Pixel;					\
   241 									\
   242 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   243 			*((Uint32 *)(buf)) = Pixel;			\
   244 		}							\
   245 		break;							\
   246 	}								\
   247 }
   248 #define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
   249 {									\
   250 	switch (bpp) {							\
   251 		case 2: {						\
   252 			Uint16 *bufp;					\
   253 			Uint16 Pixel;					\
   254 									\
   255 			bufp = (Uint16 *)buf;				\
   256 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   257 			*bufp = Pixel | (*bufp & Amask);		\
   258 		}							\
   259 		break;							\
   260 									\
   261 		case 3: {						\
   262                         if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   263 			        *((buf)+fmt->Rshift/8) = r;		\
   264 				*((buf)+fmt->Gshift/8) = g;		\
   265 				*((buf)+fmt->Bshift/8) = b;		\
   266 			} else {					\
   267 			        *((buf)+2-fmt->Rshift/8) = r;		\
   268 				*((buf)+2-fmt->Gshift/8) = g;		\
   269 				*((buf)+2-fmt->Bshift/8) = b;		\
   270 			}						\
   271 		}							\
   272 		break;							\
   273 									\
   274 		case 4: {						\
   275 			Uint32 *bufp;					\
   276 			Uint32 Pixel;					\
   277 									\
   278 			bufp = (Uint32 *)buf;				\
   279 			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
   280 			*bufp = Pixel | (*bufp & Amask);		\
   281 		}							\
   282 		break;							\
   283 	}								\
   284 }
   285 
   286 /* FIXME: Should we rescale alpha into 0..255 here? */
   287 #define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
   288 {									\
   289 	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
   290 	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
   291 	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
   292 	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
   293 }
   294 #define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
   295 {						\
   296 	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
   297 	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
   298 	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
   299 	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
   300 }
   301 #define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
   302 {									\
   303 	r = (Pixel>>24);						\
   304 	g = ((Pixel>>16)&0xFF);						\
   305 	b = ((Pixel>>8)&0xFF);						\
   306 	a = (Pixel&0xFF);						\
   307 }
   308 #define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
   309 {									\
   310 	r = ((Pixel>>16)&0xFF);						\
   311 	g = ((Pixel>>8)&0xFF);						\
   312 	b = (Pixel&0xFF);						\
   313 	a = (Pixel>>24);						\
   314 }
   315 #define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
   316 {									\
   317 	r = (Pixel&0xFF);						\
   318 	g = ((Pixel>>8)&0xFF);						\
   319 	b = ((Pixel>>16)&0xFF);						\
   320 	a = (Pixel>>24);						\
   321 }
   322 #define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
   323 do {									   \
   324 	switch (bpp) {							   \
   325 		case 2:							   \
   326 			Pixel = *((Uint16 *)(buf));			   \
   327 		break;							   \
   328 									   \
   329 		case 3:	{/* FIXME: broken code (no alpha) */		   \
   330 		        Uint8 *b = (Uint8 *)buf;			   \
   331 			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
   332 			        Pixel = b[0] + (b[1] << 8) + (b[2] << 16); \
   333 			} else {					   \
   334 			        Pixel = (b[0] << 16) + (b[1] << 8) + b[2]; \
   335 			}						   \
   336 		}							   \
   337 		break;							   \
   338 									   \
   339 		case 4:							   \
   340 			Pixel = *((Uint32 *)(buf));			   \
   341 		break;							   \
   342 									   \
   343 		default:						   \
   344 		        Pixel = 0; /* stop gcc complaints */		   \
   345 		break;							   \
   346 	}								   \
   347 	RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);			   \
   348 	Pixel &= ~fmt->Amask;						   \
   349 } while(0)
   350 
   351 /* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
   352 #define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
   353 {									\
   354 	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
   355 		((g>>fmt->Gloss)<<fmt->Gshift)|				\
   356 		((b>>fmt->Bloss)<<fmt->Bshift)|				\
   357 		((a>>fmt->Aloss)<<fmt->Ashift);				\
   358 }
   359 #define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
   360 {									\
   361 	switch (bpp) {							\
   362 		case 2: {						\
   363 			Uint16 Pixel;					\
   364 									\
   365 			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
   366 			*((Uint16 *)(buf)) = Pixel;			\
   367 		}							\
   368 		break;							\
   369 									\
   370 		case 3: { /* FIXME: broken code (no alpha) */		\
   371                         if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
   372 			        *((buf)+fmt->Rshift/8) = r;		\
   373 				*((buf)+fmt->Gshift/8) = g;		\
   374 				*((buf)+fmt->Bshift/8) = b;		\
   375 			} else {					\
   376 			        *((buf)+2-fmt->Rshift/8) = r;		\
   377 				*((buf)+2-fmt->Gshift/8) = g;		\
   378 				*((buf)+2-fmt->Bshift/8) = b;		\
   379 			}						\
   380 		}							\
   381 		break;							\
   382 									\
   383 		case 4: {						\
   384 			Uint32 Pixel;					\
   385 									\
   386 			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
   387 			*((Uint32 *)(buf)) = Pixel;			\
   388 		}							\
   389 		break;							\
   390 	}								\
   391 }
   392 
   393 /* Blend the RGB values of two Pixels based on a source alpha value */
   394 #define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
   395 do {						\
   396 	dR = (((sR-dR)*(A))>>8)+dR;		\
   397 	dG = (((sG-dG)*(A))>>8)+dG;		\
   398 	dB = (((sB-dB)*(A))>>8)+dB;		\
   399 } while(0)
   400 
   401 /* Blend the RGB values of two Pixels based on a source alpha value */
   402 #define ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB)	\
   403 do {						\
   404     unsigned tR, tG, tB, tA; \
   405     tA = 255 - sA; \
   406     tR = 1 + (sR * sA) + (dR * tA); \
   407     dR = (tR + (tR >> 8)) >> 8; \
   408     tG = 1 + (sG * sA) + (dG * tA); \
   409     dG = (tG + (tG >> 8)) >> 8; \
   410     tB = 1 + (sB * sA) + (dB * tA); \
   411     dB = (tB + (tB >> 8)) >> 8; \
   412 } while(0)
   413 
   414 
   415 /* This is a very useful loop for optimizing blitters */
   416 #if defined(_MSC_VER) && (_MSC_VER == 1300)
   417 /* There's a bug in the Visual C++ 7 optimizer when compiling this code */
   418 #else
   419 #define USE_DUFFS_LOOP
   420 #endif
   421 #ifdef USE_DUFFS_LOOP
   422 
   423 /* 8-times unrolled loop */
   424 #define DUFFS_LOOP8(pixel_copy_increment, width)			\
   425 { int n = (width+7)/8;							\
   426 	switch (width & 7) {						\
   427 	case 0: do {	pixel_copy_increment;				\
   428 	case 7:		pixel_copy_increment;				\
   429 	case 6:		pixel_copy_increment;				\
   430 	case 5:		pixel_copy_increment;				\
   431 	case 4:		pixel_copy_increment;				\
   432 	case 3:		pixel_copy_increment;				\
   433 	case 2:		pixel_copy_increment;				\
   434 	case 1:		pixel_copy_increment;				\
   435 		} while ( --n > 0 );					\
   436 	}								\
   437 }
   438 
   439 /* 4-times unrolled loop */
   440 #define DUFFS_LOOP4(pixel_copy_increment, width)			\
   441 { int n = (width+3)/4;							\
   442 	switch (width & 3) {						\
   443 	case 0: do {	pixel_copy_increment;				\
   444 	case 3:		pixel_copy_increment;				\
   445 	case 2:		pixel_copy_increment;				\
   446 	case 1:		pixel_copy_increment;				\
   447 		} while ( --n > 0 );					\
   448 	}								\
   449 }
   450 
   451 /* 2 - times unrolled loop */
   452 #define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
   453 				double_pixel_copy_increment, width)	\
   454 { int n, w = width;							\
   455 	if( w & 1 ) {							\
   456 	    pixel_copy_increment;					\
   457 	    w--;							\
   458 	}								\
   459 	if ( w > 0 )	{						\
   460 	    n = ( w + 2) / 4;						\
   461 	    switch( w & 2 ) {						\
   462 	    case 0: do {	double_pixel_copy_increment;		\
   463 	    case 2:		double_pixel_copy_increment;		\
   464 		    } while ( --n > 0 );					\
   465 	    }								\
   466 	}								\
   467 }
   468 
   469 /* 2 - times unrolled loop 4 pixels */
   470 #define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
   471 				double_pixel_copy_increment,		\
   472 				quatro_pixel_copy_increment, width)	\
   473 { int n, w = width;								\
   474         if(w & 1) {							\
   475 	  pixel_copy_increment;						\
   476 	  w--;								\
   477 	}								\
   478 	if(w & 2) {							\
   479 	  double_pixel_copy_increment;					\
   480 	  w -= 2;							\
   481 	}								\
   482 	if ( w > 0 ) {							\
   483 	    n = ( w + 7 ) / 8;						\
   484 	    switch( w & 4 ) {						\
   485 	    case 0: do {	quatro_pixel_copy_increment;		\
   486 	    case 4:		quatro_pixel_copy_increment;		\
   487 		    } while ( --n > 0 );					\
   488 	    }								\
   489 	}								\
   490 }
   491 
   492 /* Use the 8-times version of the loop by default */
   493 #define DUFFS_LOOP(pixel_copy_increment, width)				\
   494 	DUFFS_LOOP8(pixel_copy_increment, width)
   495 
   496 #else
   497 
   498 /* Don't use Duff's device to unroll loops */
   499 #define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
   500 			 double_pixel_copy_increment, width)		\
   501 { int n = width;								\
   502     if( n & 1 ) {							\
   503 	pixel_copy_increment;						\
   504 	n--;								\
   505     }									\
   506     n=n>>1;								\
   507     for(; n > 0; --n) {   						\
   508 	double_pixel_copy_increment;					\
   509     }									\
   510 }
   511 
   512 /* Don't use Duff's device to unroll loops */
   513 #define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
   514 				double_pixel_copy_increment,		\
   515 				quatro_pixel_copy_increment, width)	\
   516 { int n = width;								\
   517         if(n & 1) {							\
   518 	  pixel_copy_increment;						\
   519 	  n--;								\
   520 	}								\
   521 	if(n & 2) {							\
   522 	  double_pixel_copy_increment;					\
   523 	  n -= 2;							\
   524 	}								\
   525 	n=n>>2;								\
   526 	for(; n > 0; --n) {   						\
   527 	  quatro_pixel_copy_increment;					\
   528         }								\
   529 }
   530 
   531 /* Don't use Duff's device to unroll loops */
   532 #define DUFFS_LOOP(pixel_copy_increment, width)				\
   533 { int n;								\
   534 	for ( n=width; n > 0; --n ) {					\
   535 		pixel_copy_increment;					\
   536 	}								\
   537 }
   538 #define DUFFS_LOOP8(pixel_copy_increment, width)			\
   539 	DUFFS_LOOP(pixel_copy_increment, width)
   540 #define DUFFS_LOOP4(pixel_copy_increment, width)			\
   541 	DUFFS_LOOP(pixel_copy_increment, width)
   542 
   543 #endif /* USE_DUFFS_LOOP */
   544 
   545 /* Prevent Visual C++ 6.0 from printing out stupid warnings */
   546 #if defined(_MSC_VER) && (_MSC_VER >= 600)
   547 #pragma warning(disable: 4550)
   548 #endif
   549 
   550 #endif /* _SDL_blit_h */
   551 /* vi: set ts=4 sw=4 expandtab: */