src/video/SDL_blit.h
author Sam Lantinga <slouken@libsdl.org>
Fri, 17 Aug 2007 03:22:03 +0000
changeset 2261 c20476d7d7b3
parent 2260 202ddfd1cfb1
child 2262 bee005ace1bf
permissions -rw-r--r--
Enabled 3DNow! intrinsic support
slouken@0
     1
/*
slouken@0
     2
    SDL - Simple DirectMedia Layer
slouken@1312
     3
    Copyright (C) 1997-2006 Sam Lantinga
slouken@0
     4
slouken@0
     5
    This library is free software; you can redistribute it and/or
slouken@1312
     6
    modify it under the terms of the GNU Lesser General Public
slouken@0
     7
    License as published by the Free Software Foundation; either
slouken@1312
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@0
     9
slouken@0
    10
    This library is distributed in the hope that it will be useful,
slouken@0
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@0
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@1312
    13
    Lesser General Public License for more details.
slouken@0
    14
slouken@1312
    15
    You should have received a copy of the GNU Lesser General Public
slouken@1312
    16
    License along with this library; if not, write to the Free Software
slouken@1312
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@0
    18
slouken@0
    19
    Sam Lantinga
slouken@252
    20
    slouken@libsdl.org
slouken@0
    21
*/
slouken@1402
    22
#include "SDL_config.h"
slouken@0
    23
slouken@0
    24
#ifndef _SDL_blit_h
slouken@0
    25
#define _SDL_blit_h
slouken@0
    26
slouken@2249
    27
#ifdef __MMX__
slouken@2249
    28
#include <mmintrin.h>
slouken@2249
    29
#endif
slouken@2261
    30
#ifdef __3dNOW__
slouken@2261
    31
#include <mm3dnow.h>
slouken@2261
    32
#endif
slouken@2249
    33
#ifdef __SSE__
slouken@2249
    34
#include <xmmintrin.h>
slouken@2249
    35
#endif
slouken@2249
    36
slouken@2250
    37
#include "SDL_cpuinfo.h"
slouken@0
    38
#include "SDL_endian.h"
slouken@0
    39
slouken@0
    40
/* The structure passed to the low level blit functions */
slouken@1895
    41
typedef struct
slouken@1895
    42
{
slouken@1895
    43
    Uint8 *s_pixels;
slouken@1895
    44
    int s_width;
slouken@1895
    45
    int s_height;
slouken@1895
    46
    int s_skip;
slouken@1895
    47
    Uint8 *d_pixels;
slouken@1895
    48
    int d_width;
slouken@1895
    49
    int d_height;
slouken@1895
    50
    int d_skip;
slouken@1895
    51
    SDL_PixelFormat *src;
slouken@1895
    52
    Uint8 *table;
slouken@1895
    53
    SDL_PixelFormat *dst;
slouken@2257
    54
    Uint32 ckey, cmod;
slouken@0
    55
} SDL_BlitInfo;
slouken@0
    56
slouken@0
    57
/* The type definition for the low level blit functions */
slouken@1895
    58
typedef void (*SDL_loblit) (SDL_BlitInfo * info);
slouken@0
    59
slouken@0
    60
/* Blit mapping definition */
slouken@1895
    61
typedef struct SDL_BlitMap
slouken@1895
    62
{
slouken@1895
    63
    SDL_Surface *dst;
slouken@1895
    64
    int identity;
slouken@1895
    65
    Uint8 *table;
slouken@2257
    66
    SDL_blit blit;
slouken@2257
    67
    void *data;
slouken@2260
    68
    Uint32 ckey;                /* colorkey */
slouken@2260
    69
    Uint32 cmod;                /* ARGB modulation */
slouken@0
    70
slouken@1895
    71
    /* the version count matches the destination; mismatch indicates
slouken@1895
    72
       an invalid mapping */
slouken@1895
    73
    unsigned int format_version;
slouken@0
    74
} SDL_BlitMap;
slouken@0
    75
slouken@2247
    76
#define SDL_BLIT_ANY                0x00000000
slouken@2247
    77
#define SDL_BLIT_MMX                0x00000001
slouken@2247
    78
#define SDL_BLIT_SSE                0x00000002
slouken@2247
    79
#define SDL_BLIT_ALTIVEC_PREFETCH   0x00000004
slouken@2247
    80
#define SDL_BLIT_ALTIVEC_NOPREFETCH 0x00000008
slouken@2247
    81
slouken@2247
    82
typedef struct SDL_BlitEntry
slouken@2247
    83
{
slouken@2247
    84
    Uint32 features;
slouken@2247
    85
    SDL_loblit blit;
slouken@2247
    86
} SDL_BlitEntry;
slouken@0
    87
slouken@0
    88
/* Functions found in SDL_blit.c */
slouken@1895
    89
extern int SDL_CalculateBlit(SDL_Surface * surface);
slouken@0
    90
slouken@0
    91
/* Functions found in SDL_blit_{0,1,N,A}.c */
slouken@1895
    92
extern SDL_loblit SDL_CalculateBlit0(SDL_Surface * surface, int complex);
slouken@1895
    93
extern SDL_loblit SDL_CalculateBlit1(SDL_Surface * surface, int complex);
slouken@1895
    94
extern SDL_loblit SDL_CalculateBlitN(SDL_Surface * surface, int complex);
slouken@1895
    95
extern SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface * surface, int complex);
slouken@0
    96
slouken@0
    97
/*
slouken@0
    98
 * Useful macros for blitting routines
slouken@0
    99
 */
slouken@0
   100
slouken@2249
   101
#if defined(__GNUC__)
slouken@2249
   102
#define DECLARE_ALIGNED(t,v,a)  t __attribute__((aligned(a))) v
slouken@2249
   103
#elif defined(_MSC_VER)
slouken@2251
   104
#define DECLARE_ALIGNED(t,v,a)  __declspec(align(a)) t v
slouken@2249
   105
#else
slouken@2249
   106
#define DECLARE_ALIGNED(t,v,a)  t v
slouken@2249
   107
#endif
slouken@2249
   108
slouken@0
   109
#define FORMAT_EQUAL(A, B)						\
slouken@0
   110
    ((A)->BitsPerPixel == (B)->BitsPerPixel				\
slouken@0
   111
     && ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))
slouken@0
   112
slouken@0
   113
/* Load pixel of the specified format from a buffer and get its R-G-B values */
slouken@0
   114
/* FIXME: rescale values to 0..255 here? */
icculus@1162
   115
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
slouken@0
   116
{									\
icculus@1162
   117
	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
icculus@1162
   118
	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
icculus@1162
   119
	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
slouken@0
   120
}
icculus@1162
   121
#define RGB_FROM_RGB565(Pixel, r, g, b)					\
slouken@0
   122
{									\
icculus@1162
   123
	r = (((Pixel&0xF800)>>11)<<3);		 			\
icculus@1162
   124
	g = (((Pixel&0x07E0)>>5)<<2); 					\
icculus@1162
   125
	b = ((Pixel&0x001F)<<3); 					\
slouken@0
   126
}
icculus@1162
   127
#define RGB_FROM_RGB555(Pixel, r, g, b)					\
slouken@0
   128
{									\
icculus@1162
   129
	r = (((Pixel&0x7C00)>>10)<<3);		 			\
icculus@1162
   130
	g = (((Pixel&0x03E0)>>5)<<3); 					\
icculus@1162
   131
	b = ((Pixel&0x001F)<<3); 					\
slouken@0
   132
}
icculus@1162
   133
#define RGB_FROM_RGB888(Pixel, r, g, b)					\
slouken@0
   134
{									\
icculus@1162
   135
	r = ((Pixel&0xFF0000)>>16);		 			\
icculus@1162
   136
	g = ((Pixel&0xFF00)>>8);		 			\
icculus@1162
   137
	b = (Pixel&0xFF);			 			\
slouken@0
   138
}
icculus@1162
   139
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
slouken@0
   140
do {									   \
slouken@0
   141
	switch (bpp) {							   \
slouken@0
   142
		case 2:							   \
icculus@1162
   143
			Pixel = *((Uint16 *)(buf));			   \
slouken@0
   144
		break;							   \
slouken@0
   145
									   \
slouken@0
   146
		case 3: {						   \
slouken@0
   147
		        Uint8 *B = (Uint8 *)(buf);			   \
slouken@0
   148
			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
icculus@1162
   149
			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
slouken@0
   150
			} else {					   \
icculus@1162
   151
			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
slouken@0
   152
			}						   \
slouken@0
   153
		}							   \
slouken@0
   154
		break;							   \
slouken@0
   155
									   \
slouken@0
   156
		case 4:							   \
icculus@1162
   157
			Pixel = *((Uint32 *)(buf));			   \
slouken@0
   158
		break;							   \
slouken@0
   159
									   \
slouken@0
   160
		default:						   \
icculus@1162
   161
			Pixel = 0; /* appease gcc */			   \
slouken@0
   162
		break;							   \
slouken@0
   163
	}								   \
slouken@0
   164
} while(0)
slouken@0
   165
icculus@1162
   166
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
slouken@0
   167
do {									   \
slouken@0
   168
	switch (bpp) {							   \
slouken@0
   169
		case 2:							   \
icculus@1162
   170
			Pixel = *((Uint16 *)(buf));			   \
slouken@0
   171
		break;							   \
slouken@0
   172
									   \
slouken@0
   173
		case 3: {						   \
slouken@0
   174
		        Uint8 *B = (Uint8 *)buf;			   \
slouken@0
   175
			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
icculus@1162
   176
			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
slouken@0
   177
			} else {					   \
icculus@1162
   178
			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
slouken@0
   179
			}						   \
slouken@0
   180
		}							   \
slouken@0
   181
		break;							   \
slouken@0
   182
									   \
slouken@0
   183
		case 4:							   \
icculus@1162
   184
			Pixel = *((Uint32 *)(buf));			   \
slouken@0
   185
		break;							   \
slouken@0
   186
									   \
slouken@0
   187
	        default:						   \
icculus@1162
   188
		        Pixel = 0;	/* prevent gcc from complaining */ \
slouken@0
   189
		break;							   \
slouken@0
   190
	}								   \
icculus@1162
   191
	RGB_FROM_PIXEL(Pixel, fmt, r, g, b);				   \
slouken@0
   192
} while(0)
slouken@0
   193
slouken@0
   194
/* Assemble R-G-B values into a specified pixel format and store them */
icculus@1162
   195
#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
slouken@0
   196
{									\
icculus@1162
   197
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
slouken@0
   198
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
slouken@0
   199
		((b>>fmt->Bloss)<<fmt->Bshift);				\
slouken@0
   200
}
icculus@1162
   201
#define RGB565_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   202
{									\
icculus@1162
   203
	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
slouken@0
   204
}
icculus@1162
   205
#define RGB555_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   206
{									\
icculus@1162
   207
	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
slouken@0
   208
}
icculus@1162
   209
#define RGB888_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   210
{									\
icculus@1162
   211
	Pixel = (r<<16)|(g<<8)|b;					\
slouken@0
   212
}
slouken@0
   213
#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
slouken@0
   214
{									\
slouken@0
   215
	switch (bpp) {							\
slouken@0
   216
		case 2: {						\
icculus@1162
   217
			Uint16 Pixel;					\
slouken@0
   218
									\
icculus@1162
   219
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   220
			*((Uint16 *)(buf)) = Pixel;			\
slouken@0
   221
		}							\
slouken@0
   222
		break;							\
slouken@0
   223
									\
slouken@0
   224
		case 3: {						\
slouken@0
   225
                        if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   226
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   227
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   228
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   229
			} else {					\
slouken@0
   230
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   231
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   232
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   233
			}						\
slouken@0
   234
		}							\
slouken@0
   235
		break;							\
slouken@0
   236
									\
slouken@0
   237
		case 4: {						\
icculus@1162
   238
			Uint32 Pixel;					\
slouken@0
   239
									\
icculus@1162
   240
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   241
			*((Uint32 *)(buf)) = Pixel;			\
slouken@0
   242
		}							\
slouken@0
   243
		break;							\
slouken@0
   244
	}								\
slouken@0
   245
}
slouken@0
   246
#define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
slouken@0
   247
{									\
slouken@0
   248
	switch (bpp) {							\
slouken@0
   249
		case 2: {						\
slouken@0
   250
			Uint16 *bufp;					\
icculus@1162
   251
			Uint16 Pixel;					\
slouken@0
   252
									\
slouken@0
   253
			bufp = (Uint16 *)buf;				\
icculus@1162
   254
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   255
			*bufp = Pixel | (*bufp & Amask);		\
slouken@0
   256
		}							\
slouken@0
   257
		break;							\
slouken@0
   258
									\
slouken@0
   259
		case 3: {						\
slouken@0
   260
                        if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   261
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   262
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   263
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   264
			} else {					\
slouken@0
   265
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   266
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   267
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   268
			}						\
slouken@0
   269
		}							\
slouken@0
   270
		break;							\
slouken@0
   271
									\
slouken@0
   272
		case 4: {						\
slouken@0
   273
			Uint32 *bufp;					\
icculus@1162
   274
			Uint32 Pixel;					\
slouken@0
   275
									\
slouken@0
   276
			bufp = (Uint32 *)buf;				\
icculus@1162
   277
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   278
			*bufp = Pixel | (*bufp & Amask);		\
slouken@0
   279
		}							\
slouken@0
   280
		break;							\
slouken@0
   281
	}								\
slouken@0
   282
}
slouken@0
   283
slouken@0
   284
/* FIXME: Should we rescale alpha into 0..255 here? */
icculus@1162
   285
#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
slouken@0
   286
{									\
icculus@1162
   287
	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
icculus@1162
   288
	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
icculus@1162
   289
	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
icculus@1162
   290
	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
slouken@0
   291
}
icculus@1162
   292
#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
slouken@0
   293
{						\
icculus@1162
   294
	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
icculus@1162
   295
	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
icculus@1162
   296
	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
icculus@1162
   297
	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
slouken@0
   298
}
icculus@1162
   299
#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
slouken@0
   300
{									\
icculus@1162
   301
	r = (Pixel>>24);						\
icculus@1162
   302
	g = ((Pixel>>16)&0xFF);						\
icculus@1162
   303
	b = ((Pixel>>8)&0xFF);						\
icculus@1162
   304
	a = (Pixel&0xFF);						\
slouken@0
   305
}
icculus@1162
   306
#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
slouken@0
   307
{									\
icculus@1162
   308
	r = ((Pixel>>16)&0xFF);						\
icculus@1162
   309
	g = ((Pixel>>8)&0xFF);						\
icculus@1162
   310
	b = (Pixel&0xFF);						\
icculus@1162
   311
	a = (Pixel>>24);						\
slouken@0
   312
}
icculus@1162
   313
#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
slouken@0
   314
{									\
icculus@1162
   315
	r = (Pixel&0xFF);						\
icculus@1162
   316
	g = ((Pixel>>8)&0xFF);						\
icculus@1162
   317
	b = ((Pixel>>16)&0xFF);						\
icculus@1162
   318
	a = (Pixel>>24);						\
slouken@0
   319
}
icculus@1162
   320
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
slouken@0
   321
do {									   \
slouken@0
   322
	switch (bpp) {							   \
slouken@0
   323
		case 2:							   \
icculus@1162
   324
			Pixel = *((Uint16 *)(buf));			   \
slouken@0
   325
		break;							   \
slouken@0
   326
									   \
slouken@0
   327
		case 3:	{/* FIXME: broken code (no alpha) */		   \
slouken@0
   328
		        Uint8 *b = (Uint8 *)buf;			   \
slouken@0
   329
			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
icculus@1162
   330
			        Pixel = b[0] + (b[1] << 8) + (b[2] << 16); \
slouken@0
   331
			} else {					   \
icculus@1162
   332
			        Pixel = (b[0] << 16) + (b[1] << 8) + b[2]; \
slouken@0
   333
			}						   \
slouken@0
   334
		}							   \
slouken@0
   335
		break;							   \
slouken@0
   336
									   \
slouken@0
   337
		case 4:							   \
icculus@1162
   338
			Pixel = *((Uint32 *)(buf));			   \
slouken@0
   339
		break;							   \
slouken@0
   340
									   \
slouken@0
   341
		default:						   \
icculus@1162
   342
		        Pixel = 0; /* stop gcc complaints */		   \
slouken@0
   343
		break;							   \
slouken@0
   344
	}								   \
icculus@1162
   345
	RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);			   \
icculus@1162
   346
	Pixel &= ~fmt->Amask;						   \
slouken@0
   347
} while(0)
slouken@0
   348
slouken@0
   349
/* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
icculus@1162
   350
#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
slouken@0
   351
{									\
icculus@1162
   352
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
slouken@0
   353
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
slouken@0
   354
		((b>>fmt->Bloss)<<fmt->Bshift)|				\
slouken@535
   355
		((a>>fmt->Aloss)<<fmt->Ashift);				\
slouken@0
   356
}
slouken@0
   357
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
slouken@0
   358
{									\
slouken@0
   359
	switch (bpp) {							\
slouken@0
   360
		case 2: {						\
icculus@1162
   361
			Uint16 Pixel;					\
slouken@0
   362
									\
icculus@1162
   363
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
icculus@1162
   364
			*((Uint16 *)(buf)) = Pixel;			\
slouken@0
   365
		}							\
slouken@0
   366
		break;							\
slouken@0
   367
									\
slouken@0
   368
		case 3: { /* FIXME: broken code (no alpha) */		\
slouken@0
   369
                        if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   370
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   371
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   372
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   373
			} else {					\
slouken@0
   374
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   375
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   376
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   377
			}						\
slouken@0
   378
		}							\
slouken@0
   379
		break;							\
slouken@0
   380
									\
slouken@0
   381
		case 4: {						\
icculus@1162
   382
			Uint32 Pixel;					\
slouken@0
   383
									\
icculus@1162
   384
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
icculus@1162
   385
			*((Uint32 *)(buf)) = Pixel;			\
slouken@0
   386
		}							\
slouken@0
   387
		break;							\
slouken@0
   388
	}								\
slouken@0
   389
}
slouken@0
   390
icculus@1162
   391
/* Blend the RGB values of two Pixels based on a source alpha value */
slouken@0
   392
#define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
slouken@0
   393
do {						\
slouken@0
   394
	dR = (((sR-dR)*(A))>>8)+dR;		\
slouken@0
   395
	dG = (((sG-dG)*(A))>>8)+dG;		\
slouken@0
   396
	dB = (((sB-dB)*(A))>>8)+dB;		\
slouken@0
   397
} while(0)
slouken@0
   398
icculus@1162
   399
/* Blend the RGB values of two Pixels based on a source alpha value */
icculus@1047
   400
#define ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB)	\
icculus@1047
   401
do {						\
icculus@1047
   402
    unsigned tR, tG, tB, tA; \
icculus@1047
   403
    tA = 255 - sA; \
icculus@1047
   404
    tR = 1 + (sR * sA) + (dR * tA); \
icculus@1047
   405
    dR = (tR + (tR >> 8)) >> 8; \
icculus@1047
   406
    tG = 1 + (sG * sA) + (dG * tA); \
icculus@1047
   407
    dG = (tG + (tG >> 8)) >> 8; \
icculus@1047
   408
    tB = 1 + (sB * sA) + (dB * tA); \
icculus@1047
   409
    dB = (tB + (tB >> 8)) >> 8; \
icculus@1047
   410
} while(0)
icculus@1047
   411
icculus@1047
   412
slouken@0
   413
/* This is a very useful loop for optimizing blitters */
slouken@553
   414
#if defined(_MSC_VER) && (_MSC_VER == 1300)
slouken@553
   415
/* There's a bug in the Visual C++ 7 optimizer when compiling this code */
slouken@553
   416
#else
slouken@0
   417
#define USE_DUFFS_LOOP
slouken@553
   418
#endif
slouken@0
   419
#ifdef USE_DUFFS_LOOP
slouken@0
   420
slouken@0
   421
/* 8-times unrolled loop */
slouken@0
   422
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
slouken@0
   423
{ int n = (width+7)/8;							\
slouken@91
   424
	switch (width & 7) {						\
slouken@0
   425
	case 0: do {	pixel_copy_increment;				\
slouken@0
   426
	case 7:		pixel_copy_increment;				\
slouken@0
   427
	case 6:		pixel_copy_increment;				\
slouken@0
   428
	case 5:		pixel_copy_increment;				\
slouken@0
   429
	case 4:		pixel_copy_increment;				\
slouken@0
   430
	case 3:		pixel_copy_increment;				\
slouken@0
   431
	case 2:		pixel_copy_increment;				\
slouken@0
   432
	case 1:		pixel_copy_increment;				\
slouken@0
   433
		} while ( --n > 0 );					\
slouken@0
   434
	}								\
slouken@0
   435
}
slouken@0
   436
slouken@0
   437
/* 4-times unrolled loop */
slouken@0
   438
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
slouken@0
   439
{ int n = (width+3)/4;							\
slouken@91
   440
	switch (width & 3) {						\
slouken@0
   441
	case 0: do {	pixel_copy_increment;				\
slouken@0
   442
	case 3:		pixel_copy_increment;				\
slouken@0
   443
	case 2:		pixel_copy_increment;				\
slouken@0
   444
	case 1:		pixel_copy_increment;				\
slouken@0
   445
		} while ( --n > 0 );					\
slouken@0
   446
	}								\
slouken@0
   447
}
slouken@0
   448
slouken@689
   449
/* 2 - times unrolled loop */
slouken@689
   450
#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
slouken@689
   451
				double_pixel_copy_increment, width)	\
slouken@689
   452
{ int n, w = width;							\
slouken@689
   453
	if( w & 1 ) {							\
slouken@689
   454
	    pixel_copy_increment;					\
slouken@689
   455
	    w--;							\
slouken@689
   456
	}								\
slouken@689
   457
	if ( w > 0 )	{						\
slouken@689
   458
	    n = ( w + 2) / 4;						\
slouken@689
   459
	    switch( w & 2 ) {						\
slouken@689
   460
	    case 0: do {	double_pixel_copy_increment;		\
slouken@689
   461
	    case 2:		double_pixel_copy_increment;		\
slouken@689
   462
		    } while ( --n > 0 );					\
slouken@689
   463
	    }								\
slouken@689
   464
	}								\
slouken@689
   465
}
slouken@689
   466
slouken@689
   467
/* 2 - times unrolled loop 4 pixels */
slouken@689
   468
#define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
slouken@689
   469
				double_pixel_copy_increment,		\
slouken@689
   470
				quatro_pixel_copy_increment, width)	\
slouken@689
   471
{ int n, w = width;								\
slouken@689
   472
        if(w & 1) {							\
slouken@689
   473
	  pixel_copy_increment;						\
slouken@689
   474
	  w--;								\
slouken@689
   475
	}								\
slouken@689
   476
	if(w & 2) {							\
slouken@689
   477
	  double_pixel_copy_increment;					\
slouken@689
   478
	  w -= 2;							\
slouken@689
   479
	}								\
slouken@689
   480
	if ( w > 0 ) {							\
slouken@689
   481
	    n = ( w + 7 ) / 8;						\
slouken@689
   482
	    switch( w & 4 ) {						\
slouken@689
   483
	    case 0: do {	quatro_pixel_copy_increment;		\
slouken@689
   484
	    case 4:		quatro_pixel_copy_increment;		\
slouken@689
   485
		    } while ( --n > 0 );					\
slouken@689
   486
	    }								\
slouken@689
   487
	}								\
slouken@689
   488
}
slouken@689
   489
slouken@0
   490
/* Use the 8-times version of the loop by default */
slouken@0
   491
#define DUFFS_LOOP(pixel_copy_increment, width)				\
slouken@0
   492
	DUFFS_LOOP8(pixel_copy_increment, width)
slouken@0
   493
slouken@0
   494
#else
slouken@0
   495
slouken@0
   496
/* Don't use Duff's device to unroll loops */
slouken@689
   497
#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
slouken@689
   498
			 double_pixel_copy_increment, width)		\
slouken@689
   499
{ int n = width;								\
slouken@689
   500
    if( n & 1 ) {							\
slouken@689
   501
	pixel_copy_increment;						\
slouken@689
   502
	n--;								\
slouken@689
   503
    }									\
slouken@689
   504
    n=n>>1;								\
slouken@689
   505
    for(; n > 0; --n) {   						\
slouken@689
   506
	double_pixel_copy_increment;					\
slouken@689
   507
    }									\
slouken@689
   508
}
slouken@689
   509
slouken@689
   510
/* Don't use Duff's device to unroll loops */
slouken@689
   511
#define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
slouken@689
   512
				double_pixel_copy_increment,		\
slouken@689
   513
				quatro_pixel_copy_increment, width)	\
slouken@689
   514
{ int n = width;								\
slouken@689
   515
        if(n & 1) {							\
slouken@689
   516
	  pixel_copy_increment;						\
slouken@689
   517
	  n--;								\
slouken@689
   518
	}								\
slouken@689
   519
	if(n & 2) {							\
slouken@689
   520
	  double_pixel_copy_increment;					\
slouken@689
   521
	  n -= 2;							\
slouken@689
   522
	}								\
slouken@689
   523
	n=n>>2;								\
slouken@689
   524
	for(; n > 0; --n) {   						\
slouken@689
   525
	  quatro_pixel_copy_increment;					\
slouken@689
   526
        }								\
slouken@689
   527
}
slouken@689
   528
slouken@689
   529
/* Don't use Duff's device to unroll loops */
slouken@0
   530
#define DUFFS_LOOP(pixel_copy_increment, width)				\
slouken@0
   531
{ int n;								\
slouken@0
   532
	for ( n=width; n > 0; --n ) {					\
slouken@0
   533
		pixel_copy_increment;					\
slouken@0
   534
	}								\
slouken@0
   535
}
slouken@0
   536
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
slouken@0
   537
	DUFFS_LOOP(pixel_copy_increment, width)
slouken@0
   538
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
slouken@0
   539
	DUFFS_LOOP(pixel_copy_increment, width)
slouken@0
   540
slouken@0
   541
#endif /* USE_DUFFS_LOOP */
slouken@0
   542
slouken@0
   543
/* Prevent Visual C++ 6.0 from printing out stupid warnings */
slouken@0
   544
#if defined(_MSC_VER) && (_MSC_VER >= 600)
slouken@0
   545
#pragma warning(disable: 4550)
slouken@0
   546
#endif
slouken@0
   547
slouken@0
   548
#endif /* _SDL_blit_h */
slouken@1895
   549
/* vi: set ts=4 sw=4 expandtab: */