src/video/SDL_blit.h
author Sam Lantinga <slouken@libsdl.org>
Thu, 16 Aug 2007 05:56:24 +0000
changeset 2249 5a58b57b6724
parent 2247 93994f65c74c
child 2250 e1d228456537
permissions -rw-r--r--
Added SSE and MMX optimization for SDL_FillRect()
slouken@0
     1
/*
slouken@0
     2
    SDL - Simple DirectMedia Layer
slouken@1312
     3
    Copyright (C) 1997-2006 Sam Lantinga
slouken@0
     4
slouken@0
     5
    This library is free software; you can redistribute it and/or
slouken@1312
     6
    modify it under the terms of the GNU Lesser General Public
slouken@0
     7
    License as published by the Free Software Foundation; either
slouken@1312
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@0
     9
slouken@0
    10
    This library is distributed in the hope that it will be useful,
slouken@0
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@0
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@1312
    13
    Lesser General Public License for more details.
slouken@0
    14
slouken@1312
    15
    You should have received a copy of the GNU Lesser General Public
slouken@1312
    16
    License along with this library; if not, write to the Free Software
slouken@1312
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@0
    18
slouken@0
    19
    Sam Lantinga
slouken@252
    20
    slouken@libsdl.org
slouken@0
    21
*/
slouken@1402
    22
#include "SDL_config.h"
slouken@0
    23
slouken@0
    24
#ifndef _SDL_blit_h
slouken@0
    25
#define _SDL_blit_h
slouken@0
    26
slouken@2249
    27
#ifdef __MMX__
slouken@2249
    28
#include <mmintrin.h>
slouken@2249
    29
#endif
slouken@2249
    30
#ifdef __SSE__
slouken@2249
    31
#include <xmmintrin.h>
slouken@2249
    32
#endif
slouken@2249
    33
slouken@0
    34
#include "SDL_endian.h"
slouken@0
    35
slouken@0
    36
/* The structure passed to the low level blit functions */
slouken@1895
    37
typedef struct
slouken@1895
    38
{
slouken@1895
    39
    Uint8 *s_pixels;
slouken@1895
    40
    int s_width;
slouken@1895
    41
    int s_height;
slouken@1895
    42
    int s_skip;
slouken@1895
    43
    Uint8 *d_pixels;
slouken@1895
    44
    int d_width;
slouken@1895
    45
    int d_height;
slouken@1895
    46
    int d_skip;
slouken@1895
    47
    void *aux_data;
slouken@1895
    48
    SDL_PixelFormat *src;
slouken@1895
    49
    Uint8 *table;
slouken@1895
    50
    SDL_PixelFormat *dst;
slouken@0
    51
} SDL_BlitInfo;
slouken@0
    52
slouken@0
    53
/* The type definition for the low level blit functions */
slouken@1895
    54
typedef void (*SDL_loblit) (SDL_BlitInfo * info);
slouken@0
    55
slouken@0
    56
/* This is the private info structure for software accelerated blits */
slouken@1895
    57
struct private_swaccel
slouken@1895
    58
{
slouken@1895
    59
    SDL_loblit blit;
slouken@1895
    60
    void *aux_data;
slouken@0
    61
};
slouken@0
    62
slouken@0
    63
/* Blit mapping definition */
slouken@1895
    64
typedef struct SDL_BlitMap
slouken@1895
    65
{
slouken@1895
    66
    SDL_Surface *dst;
slouken@1895
    67
    int identity;
slouken@1895
    68
    Uint8 *table;
slouken@1895
    69
    SDL_blit sw_blit;
slouken@1895
    70
    struct private_swaccel *sw_data;
slouken@0
    71
slouken@1895
    72
    /* the version count matches the destination; mismatch indicates
slouken@1895
    73
       an invalid mapping */
slouken@1895
    74
    unsigned int format_version;
slouken@0
    75
} SDL_BlitMap;
slouken@0
    76
slouken@2247
    77
#define SDL_BLIT_ANY                0x00000000
slouken@2247
    78
#define SDL_BLIT_MMX                0x00000001
slouken@2247
    79
#define SDL_BLIT_SSE                0x00000002
slouken@2247
    80
#define SDL_BLIT_ALTIVEC_PREFETCH   0x00000004
slouken@2247
    81
#define SDL_BLIT_ALTIVEC_NOPREFETCH 0x00000008
slouken@2247
    82
slouken@2247
    83
typedef struct SDL_BlitEntry
slouken@2247
    84
{
slouken@2247
    85
    Uint32 features;
slouken@2247
    86
    SDL_loblit blit;
slouken@2247
    87
} SDL_BlitEntry;
slouken@0
    88
slouken@0
    89
/* Functions found in SDL_blit.c */
slouken@1895
    90
extern int SDL_CalculateBlit(SDL_Surface * surface);
slouken@0
    91
slouken@0
    92
/* Functions found in SDL_blit_{0,1,N,A}.c */
slouken@1895
    93
extern SDL_loblit SDL_CalculateBlit0(SDL_Surface * surface, int complex);
slouken@1895
    94
extern SDL_loblit SDL_CalculateBlit1(SDL_Surface * surface, int complex);
slouken@1895
    95
extern SDL_loblit SDL_CalculateBlitN(SDL_Surface * surface, int complex);
slouken@1895
    96
extern SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface * surface, int complex);
slouken@0
    97
slouken@0
    98
/*
slouken@0
    99
 * Useful macros for blitting routines
slouken@0
   100
 */
slouken@0
   101
slouken@2249
   102
#if defined(__GNUC__)
slouken@2249
   103
#define DECLARE_ALIGNED(t,v,a)  t __attribute__((aligned(a))) v
slouken@2249
   104
#elif defined(_MSC_VER)
slouken@2249
   105
#define DECLARE_ALIGNED(t,v,a)  t __declspec(align(a)) v
slouken@2249
   106
#else
slouken@2249
   107
#define DECLARE_ALIGNED(t,v,a)  t v
slouken@2249
   108
#endif
slouken@2249
   109
slouken@0
   110
#define FORMAT_EQUAL(A, B)						\
slouken@0
   111
    ((A)->BitsPerPixel == (B)->BitsPerPixel				\
slouken@0
   112
     && ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))
slouken@0
   113
slouken@0
   114
/* Load pixel of the specified format from a buffer and get its R-G-B values */
slouken@0
   115
/* FIXME: rescale values to 0..255 here? */
icculus@1162
   116
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
slouken@0
   117
{									\
icculus@1162
   118
	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
icculus@1162
   119
	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
icculus@1162
   120
	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
slouken@0
   121
}
icculus@1162
   122
#define RGB_FROM_RGB565(Pixel, r, g, b)					\
slouken@0
   123
{									\
icculus@1162
   124
	r = (((Pixel&0xF800)>>11)<<3);		 			\
icculus@1162
   125
	g = (((Pixel&0x07E0)>>5)<<2); 					\
icculus@1162
   126
	b = ((Pixel&0x001F)<<3); 					\
slouken@0
   127
}
icculus@1162
   128
#define RGB_FROM_RGB555(Pixel, r, g, b)					\
slouken@0
   129
{									\
icculus@1162
   130
	r = (((Pixel&0x7C00)>>10)<<3);		 			\
icculus@1162
   131
	g = (((Pixel&0x03E0)>>5)<<3); 					\
icculus@1162
   132
	b = ((Pixel&0x001F)<<3); 					\
slouken@0
   133
}
icculus@1162
   134
#define RGB_FROM_RGB888(Pixel, r, g, b)					\
slouken@0
   135
{									\
icculus@1162
   136
	r = ((Pixel&0xFF0000)>>16);		 			\
icculus@1162
   137
	g = ((Pixel&0xFF00)>>8);		 			\
icculus@1162
   138
	b = (Pixel&0xFF);			 			\
slouken@0
   139
}
icculus@1162
   140
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
slouken@0
   141
do {									   \
slouken@0
   142
	switch (bpp) {							   \
slouken@0
   143
		case 2:							   \
icculus@1162
   144
			Pixel = *((Uint16 *)(buf));			   \
slouken@0
   145
		break;							   \
slouken@0
   146
									   \
slouken@0
   147
		case 3: {						   \
slouken@0
   148
		        Uint8 *B = (Uint8 *)(buf);			   \
slouken@0
   149
			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
icculus@1162
   150
			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
slouken@0
   151
			} else {					   \
icculus@1162
   152
			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
slouken@0
   153
			}						   \
slouken@0
   154
		}							   \
slouken@0
   155
		break;							   \
slouken@0
   156
									   \
slouken@0
   157
		case 4:							   \
icculus@1162
   158
			Pixel = *((Uint32 *)(buf));			   \
slouken@0
   159
		break;							   \
slouken@0
   160
									   \
slouken@0
   161
		default:						   \
icculus@1162
   162
			Pixel = 0; /* appease gcc */			   \
slouken@0
   163
		break;							   \
slouken@0
   164
	}								   \
slouken@0
   165
} while(0)
slouken@0
   166
icculus@1162
   167
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
slouken@0
   168
do {									   \
slouken@0
   169
	switch (bpp) {							   \
slouken@0
   170
		case 2:							   \
icculus@1162
   171
			Pixel = *((Uint16 *)(buf));			   \
slouken@0
   172
		break;							   \
slouken@0
   173
									   \
slouken@0
   174
		case 3: {						   \
slouken@0
   175
		        Uint8 *B = (Uint8 *)buf;			   \
slouken@0
   176
			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
icculus@1162
   177
			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
slouken@0
   178
			} else {					   \
icculus@1162
   179
			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
slouken@0
   180
			}						   \
slouken@0
   181
		}							   \
slouken@0
   182
		break;							   \
slouken@0
   183
									   \
slouken@0
   184
		case 4:							   \
icculus@1162
   185
			Pixel = *((Uint32 *)(buf));			   \
slouken@0
   186
		break;							   \
slouken@0
   187
									   \
slouken@0
   188
	        default:						   \
icculus@1162
   189
		        Pixel = 0;	/* prevent gcc from complaining */ \
slouken@0
   190
		break;							   \
slouken@0
   191
	}								   \
icculus@1162
   192
	RGB_FROM_PIXEL(Pixel, fmt, r, g, b);				   \
slouken@0
   193
} while(0)
slouken@0
   194
slouken@0
   195
/* Assemble R-G-B values into a specified pixel format and store them */
icculus@1162
   196
#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
slouken@0
   197
{									\
icculus@1162
   198
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
slouken@0
   199
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
slouken@0
   200
		((b>>fmt->Bloss)<<fmt->Bshift);				\
slouken@0
   201
}
icculus@1162
   202
#define RGB565_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   203
{									\
icculus@1162
   204
	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
slouken@0
   205
}
icculus@1162
   206
#define RGB555_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   207
{									\
icculus@1162
   208
	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
slouken@0
   209
}
icculus@1162
   210
#define RGB888_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   211
{									\
icculus@1162
   212
	Pixel = (r<<16)|(g<<8)|b;					\
slouken@0
   213
}
slouken@0
   214
#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
slouken@0
   215
{									\
slouken@0
   216
	switch (bpp) {							\
slouken@0
   217
		case 2: {						\
icculus@1162
   218
			Uint16 Pixel;					\
slouken@0
   219
									\
icculus@1162
   220
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   221
			*((Uint16 *)(buf)) = Pixel;			\
slouken@0
   222
		}							\
slouken@0
   223
		break;							\
slouken@0
   224
									\
slouken@0
   225
		case 3: {						\
slouken@0
   226
                        if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   227
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   228
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   229
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   230
			} else {					\
slouken@0
   231
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   232
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   233
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   234
			}						\
slouken@0
   235
		}							\
slouken@0
   236
		break;							\
slouken@0
   237
									\
slouken@0
   238
		case 4: {						\
icculus@1162
   239
			Uint32 Pixel;					\
slouken@0
   240
									\
icculus@1162
   241
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   242
			*((Uint32 *)(buf)) = Pixel;			\
slouken@0
   243
		}							\
slouken@0
   244
		break;							\
slouken@0
   245
	}								\
slouken@0
   246
}
slouken@0
   247
#define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
slouken@0
   248
{									\
slouken@0
   249
	switch (bpp) {							\
slouken@0
   250
		case 2: {						\
slouken@0
   251
			Uint16 *bufp;					\
icculus@1162
   252
			Uint16 Pixel;					\
slouken@0
   253
									\
slouken@0
   254
			bufp = (Uint16 *)buf;				\
icculus@1162
   255
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   256
			*bufp = Pixel | (*bufp & Amask);		\
slouken@0
   257
		}							\
slouken@0
   258
		break;							\
slouken@0
   259
									\
slouken@0
   260
		case 3: {						\
slouken@0
   261
                        if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   262
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   263
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   264
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   265
			} else {					\
slouken@0
   266
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   267
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   268
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   269
			}						\
slouken@0
   270
		}							\
slouken@0
   271
		break;							\
slouken@0
   272
									\
slouken@0
   273
		case 4: {						\
slouken@0
   274
			Uint32 *bufp;					\
icculus@1162
   275
			Uint32 Pixel;					\
slouken@0
   276
									\
slouken@0
   277
			bufp = (Uint32 *)buf;				\
icculus@1162
   278
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   279
			*bufp = Pixel | (*bufp & Amask);		\
slouken@0
   280
		}							\
slouken@0
   281
		break;							\
slouken@0
   282
	}								\
slouken@0
   283
}
slouken@0
   284
slouken@0
   285
/* FIXME: Should we rescale alpha into 0..255 here? */
icculus@1162
   286
#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
slouken@0
   287
{									\
icculus@1162
   288
	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
icculus@1162
   289
	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
icculus@1162
   290
	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
icculus@1162
   291
	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
slouken@0
   292
}
icculus@1162
   293
#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
slouken@0
   294
{						\
icculus@1162
   295
	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
icculus@1162
   296
	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
icculus@1162
   297
	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
icculus@1162
   298
	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
slouken@0
   299
}
icculus@1162
   300
#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
slouken@0
   301
{									\
icculus@1162
   302
	r = (Pixel>>24);						\
icculus@1162
   303
	g = ((Pixel>>16)&0xFF);						\
icculus@1162
   304
	b = ((Pixel>>8)&0xFF);						\
icculus@1162
   305
	a = (Pixel&0xFF);						\
slouken@0
   306
}
icculus@1162
   307
#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
slouken@0
   308
{									\
icculus@1162
   309
	r = ((Pixel>>16)&0xFF);						\
icculus@1162
   310
	g = ((Pixel>>8)&0xFF);						\
icculus@1162
   311
	b = (Pixel&0xFF);						\
icculus@1162
   312
	a = (Pixel>>24);						\
slouken@0
   313
}
icculus@1162
   314
#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
slouken@0
   315
{									\
icculus@1162
   316
	r = (Pixel&0xFF);						\
icculus@1162
   317
	g = ((Pixel>>8)&0xFF);						\
icculus@1162
   318
	b = ((Pixel>>16)&0xFF);						\
icculus@1162
   319
	a = (Pixel>>24);						\
slouken@0
   320
}
icculus@1162
   321
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
slouken@0
   322
do {									   \
slouken@0
   323
	switch (bpp) {							   \
slouken@0
   324
		case 2:							   \
icculus@1162
   325
			Pixel = *((Uint16 *)(buf));			   \
slouken@0
   326
		break;							   \
slouken@0
   327
									   \
slouken@0
   328
		case 3:	{/* FIXME: broken code (no alpha) */		   \
slouken@0
   329
		        Uint8 *b = (Uint8 *)buf;			   \
slouken@0
   330
			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
icculus@1162
   331
			        Pixel = b[0] + (b[1] << 8) + (b[2] << 16); \
slouken@0
   332
			} else {					   \
icculus@1162
   333
			        Pixel = (b[0] << 16) + (b[1] << 8) + b[2]; \
slouken@0
   334
			}						   \
slouken@0
   335
		}							   \
slouken@0
   336
		break;							   \
slouken@0
   337
									   \
slouken@0
   338
		case 4:							   \
icculus@1162
   339
			Pixel = *((Uint32 *)(buf));			   \
slouken@0
   340
		break;							   \
slouken@0
   341
									   \
slouken@0
   342
		default:						   \
icculus@1162
   343
		        Pixel = 0; /* stop gcc complaints */		   \
slouken@0
   344
		break;							   \
slouken@0
   345
	}								   \
icculus@1162
   346
	RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);			   \
icculus@1162
   347
	Pixel &= ~fmt->Amask;						   \
slouken@0
   348
} while(0)
slouken@0
   349
slouken@0
   350
/* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
icculus@1162
   351
#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
slouken@0
   352
{									\
icculus@1162
   353
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
slouken@0
   354
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
slouken@0
   355
		((b>>fmt->Bloss)<<fmt->Bshift)|				\
slouken@535
   356
		((a>>fmt->Aloss)<<fmt->Ashift);				\
slouken@0
   357
}
slouken@0
   358
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
slouken@0
   359
{									\
slouken@0
   360
	switch (bpp) {							\
slouken@0
   361
		case 2: {						\
icculus@1162
   362
			Uint16 Pixel;					\
slouken@0
   363
									\
icculus@1162
   364
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
icculus@1162
   365
			*((Uint16 *)(buf)) = Pixel;			\
slouken@0
   366
		}							\
slouken@0
   367
		break;							\
slouken@0
   368
									\
slouken@0
   369
		case 3: { /* FIXME: broken code (no alpha) */		\
slouken@0
   370
                        if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   371
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   372
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   373
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   374
			} else {					\
slouken@0
   375
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   376
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   377
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   378
			}						\
slouken@0
   379
		}							\
slouken@0
   380
		break;							\
slouken@0
   381
									\
slouken@0
   382
		case 4: {						\
icculus@1162
   383
			Uint32 Pixel;					\
slouken@0
   384
									\
icculus@1162
   385
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
icculus@1162
   386
			*((Uint32 *)(buf)) = Pixel;			\
slouken@0
   387
		}							\
slouken@0
   388
		break;							\
slouken@0
   389
	}								\
slouken@0
   390
}
slouken@0
   391
icculus@1162
   392
/* Blend the RGB values of two Pixels based on a source alpha value */
slouken@0
   393
#define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
slouken@0
   394
do {						\
slouken@0
   395
	dR = (((sR-dR)*(A))>>8)+dR;		\
slouken@0
   396
	dG = (((sG-dG)*(A))>>8)+dG;		\
slouken@0
   397
	dB = (((sB-dB)*(A))>>8)+dB;		\
slouken@0
   398
} while(0)
slouken@0
   399
icculus@1162
   400
/* Blend the RGB values of two Pixels based on a source alpha value */
icculus@1047
   401
#define ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB)	\
icculus@1047
   402
do {						\
icculus@1047
   403
    unsigned tR, tG, tB, tA; \
icculus@1047
   404
    tA = 255 - sA; \
icculus@1047
   405
    tR = 1 + (sR * sA) + (dR * tA); \
icculus@1047
   406
    dR = (tR + (tR >> 8)) >> 8; \
icculus@1047
   407
    tG = 1 + (sG * sA) + (dG * tA); \
icculus@1047
   408
    dG = (tG + (tG >> 8)) >> 8; \
icculus@1047
   409
    tB = 1 + (sB * sA) + (dB * tA); \
icculus@1047
   410
    dB = (tB + (tB >> 8)) >> 8; \
icculus@1047
   411
} while(0)
icculus@1047
   412
icculus@1047
   413
slouken@0
   414
/* This is a very useful loop for optimizing blitters */
slouken@553
   415
#if defined(_MSC_VER) && (_MSC_VER == 1300)
slouken@553
   416
/* There's a bug in the Visual C++ 7 optimizer when compiling this code */
slouken@553
   417
#else
slouken@0
   418
#define USE_DUFFS_LOOP
slouken@553
   419
#endif
slouken@0
   420
#ifdef USE_DUFFS_LOOP
slouken@0
   421
slouken@0
   422
/* 8-times unrolled loop */
slouken@0
   423
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
slouken@0
   424
{ int n = (width+7)/8;							\
slouken@91
   425
	switch (width & 7) {						\
slouken@0
   426
	case 0: do {	pixel_copy_increment;				\
slouken@0
   427
	case 7:		pixel_copy_increment;				\
slouken@0
   428
	case 6:		pixel_copy_increment;				\
slouken@0
   429
	case 5:		pixel_copy_increment;				\
slouken@0
   430
	case 4:		pixel_copy_increment;				\
slouken@0
   431
	case 3:		pixel_copy_increment;				\
slouken@0
   432
	case 2:		pixel_copy_increment;				\
slouken@0
   433
	case 1:		pixel_copy_increment;				\
slouken@0
   434
		} while ( --n > 0 );					\
slouken@0
   435
	}								\
slouken@0
   436
}
slouken@0
   437
slouken@0
   438
/* 4-times unrolled loop */
slouken@0
   439
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
slouken@0
   440
{ int n = (width+3)/4;							\
slouken@91
   441
	switch (width & 3) {						\
slouken@0
   442
	case 0: do {	pixel_copy_increment;				\
slouken@0
   443
	case 3:		pixel_copy_increment;				\
slouken@0
   444
	case 2:		pixel_copy_increment;				\
slouken@0
   445
	case 1:		pixel_copy_increment;				\
slouken@0
   446
		} while ( --n > 0 );					\
slouken@0
   447
	}								\
slouken@0
   448
}
slouken@0
   449
slouken@689
   450
/* 2 - times unrolled loop */
slouken@689
   451
#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
slouken@689
   452
				double_pixel_copy_increment, width)	\
slouken@689
   453
{ int n, w = width;							\
slouken@689
   454
	if( w & 1 ) {							\
slouken@689
   455
	    pixel_copy_increment;					\
slouken@689
   456
	    w--;							\
slouken@689
   457
	}								\
slouken@689
   458
	if ( w > 0 )	{						\
slouken@689
   459
	    n = ( w + 2) / 4;						\
slouken@689
   460
	    switch( w & 2 ) {						\
slouken@689
   461
	    case 0: do {	double_pixel_copy_increment;		\
slouken@689
   462
	    case 2:		double_pixel_copy_increment;		\
slouken@689
   463
		    } while ( --n > 0 );					\
slouken@689
   464
	    }								\
slouken@689
   465
	}								\
slouken@689
   466
}
slouken@689
   467
slouken@689
   468
/* 2 - times unrolled loop 4 pixels */
slouken@689
   469
#define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
slouken@689
   470
				double_pixel_copy_increment,		\
slouken@689
   471
				quatro_pixel_copy_increment, width)	\
slouken@689
   472
{ int n, w = width;								\
slouken@689
   473
        if(w & 1) {							\
slouken@689
   474
	  pixel_copy_increment;						\
slouken@689
   475
	  w--;								\
slouken@689
   476
	}								\
slouken@689
   477
	if(w & 2) {							\
slouken@689
   478
	  double_pixel_copy_increment;					\
slouken@689
   479
	  w -= 2;							\
slouken@689
   480
	}								\
slouken@689
   481
	if ( w > 0 ) {							\
slouken@689
   482
	    n = ( w + 7 ) / 8;						\
slouken@689
   483
	    switch( w & 4 ) {						\
slouken@689
   484
	    case 0: do {	quatro_pixel_copy_increment;		\
slouken@689
   485
	    case 4:		quatro_pixel_copy_increment;		\
slouken@689
   486
		    } while ( --n > 0 );					\
slouken@689
   487
	    }								\
slouken@689
   488
	}								\
slouken@689
   489
}
slouken@689
   490
slouken@0
   491
/* Use the 8-times version of the loop by default */
slouken@0
   492
#define DUFFS_LOOP(pixel_copy_increment, width)				\
slouken@0
   493
	DUFFS_LOOP8(pixel_copy_increment, width)
slouken@0
   494
slouken@0
   495
#else
slouken@0
   496
slouken@0
   497
/* Don't use Duff's device to unroll loops */
slouken@689
   498
#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
slouken@689
   499
			 double_pixel_copy_increment, width)		\
slouken@689
   500
{ int n = width;								\
slouken@689
   501
    if( n & 1 ) {							\
slouken@689
   502
	pixel_copy_increment;						\
slouken@689
   503
	n--;								\
slouken@689
   504
    }									\
slouken@689
   505
    n=n>>1;								\
slouken@689
   506
    for(; n > 0; --n) {   						\
slouken@689
   507
	double_pixel_copy_increment;					\
slouken@689
   508
    }									\
slouken@689
   509
}
slouken@689
   510
slouken@689
   511
/* Don't use Duff's device to unroll loops */
slouken@689
   512
#define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
slouken@689
   513
				double_pixel_copy_increment,		\
slouken@689
   514
				quatro_pixel_copy_increment, width)	\
slouken@689
   515
{ int n = width;								\
slouken@689
   516
        if(n & 1) {							\
slouken@689
   517
	  pixel_copy_increment;						\
slouken@689
   518
	  n--;								\
slouken@689
   519
	}								\
slouken@689
   520
	if(n & 2) {							\
slouken@689
   521
	  double_pixel_copy_increment;					\
slouken@689
   522
	  n -= 2;							\
slouken@689
   523
	}								\
slouken@689
   524
	n=n>>2;								\
slouken@689
   525
	for(; n > 0; --n) {   						\
slouken@689
   526
	  quatro_pixel_copy_increment;					\
slouken@689
   527
        }								\
slouken@689
   528
}
slouken@689
   529
slouken@689
   530
/* Don't use Duff's device to unroll loops */
slouken@0
   531
#define DUFFS_LOOP(pixel_copy_increment, width)				\
slouken@0
   532
{ int n;								\
slouken@0
   533
	for ( n=width; n > 0; --n ) {					\
slouken@0
   534
		pixel_copy_increment;					\
slouken@0
   535
	}								\
slouken@0
   536
}
slouken@0
   537
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
slouken@0
   538
	DUFFS_LOOP(pixel_copy_increment, width)
slouken@0
   539
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
slouken@0
   540
	DUFFS_LOOP(pixel_copy_increment, width)
slouken@0
   541
slouken@0
   542
#endif /* USE_DUFFS_LOOP */
slouken@0
   543
slouken@0
   544
/* Prevent Visual C++ 6.0 from printing out stupid warnings */
slouken@0
   545
#if defined(_MSC_VER) && (_MSC_VER >= 600)
slouken@0
   546
#pragma warning(disable: 4550)
slouken@0
   547
#endif
slouken@0
   548
slouken@0
   549
#endif /* _SDL_blit_h */
slouken@1895
   550
/* vi: set ts=4 sw=4 expandtab: */