src/video/SDL_blit.h
author Sam Lantinga
Wed, 15 Aug 2007 08:21:10 +0000
changeset 2247 93994f65c74c
parent 1895 c121d94672cb
child 2249 5a58b57b6724
permissions -rw-r--r--
Removed hermes since it's LGPL and not compatible with a commercial license.

Prepping for using MMX and SSE intrinsics instead of inline assembly.
.. except for memcpy equivalents which only get faster if they can
exploit the parallelism of loading into multiple SIMD registers. :)
slouken@0
     1
/*
slouken@0
     2
    SDL - Simple DirectMedia Layer
slouken@1312
     3
    Copyright (C) 1997-2006 Sam Lantinga
slouken@0
     4
slouken@0
     5
    This library is free software; you can redistribute it and/or
slouken@1312
     6
    modify it under the terms of the GNU Lesser General Public
slouken@0
     7
    License as published by the Free Software Foundation; either
slouken@1312
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@0
     9
slouken@0
    10
    This library is distributed in the hope that it will be useful,
slouken@0
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@0
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@1312
    13
    Lesser General Public License for more details.
slouken@0
    14
slouken@1312
    15
    You should have received a copy of the GNU Lesser General Public
slouken@1312
    16
    License along with this library; if not, write to the Free Software
slouken@1312
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@0
    18
slouken@0
    19
    Sam Lantinga
slouken@252
    20
    slouken@libsdl.org
slouken@0
    21
*/
slouken@1402
    22
#include "SDL_config.h"
slouken@0
    23
slouken@0
    24
#ifndef _SDL_blit_h
slouken@0
    25
#define _SDL_blit_h
slouken@0
    26
slouken@0
    27
#include "SDL_endian.h"
slouken@0
    28
slouken@0
    29
/* The structure passed to the low level blit functions */
slouken@1895
    30
typedef struct
slouken@1895
    31
{
slouken@1895
    32
    Uint8 *s_pixels;
slouken@1895
    33
    int s_width;
slouken@1895
    34
    int s_height;
slouken@1895
    35
    int s_skip;
slouken@1895
    36
    Uint8 *d_pixels;
slouken@1895
    37
    int d_width;
slouken@1895
    38
    int d_height;
slouken@1895
    39
    int d_skip;
slouken@1895
    40
    void *aux_data;
slouken@1895
    41
    SDL_PixelFormat *src;
slouken@1895
    42
    Uint8 *table;
slouken@1895
    43
    SDL_PixelFormat *dst;
slouken@0
    44
} SDL_BlitInfo;
slouken@0
    45
slouken@0
    46
/* The type definition for the low level blit functions */
slouken@1895
    47
typedef void (*SDL_loblit) (SDL_BlitInfo * info);
slouken@0
    48
slouken@0
    49
/* This is the private info structure for software accelerated blits */
slouken@1895
    50
struct private_swaccel
slouken@1895
    51
{
slouken@1895
    52
    SDL_loblit blit;
slouken@1895
    53
    void *aux_data;
slouken@0
    54
};
slouken@0
    55
slouken@0
    56
/* Blit mapping definition */
slouken@1895
    57
typedef struct SDL_BlitMap
slouken@1895
    58
{
slouken@1895
    59
    SDL_Surface *dst;
slouken@1895
    60
    int identity;
slouken@1895
    61
    Uint8 *table;
slouken@1895
    62
    SDL_blit sw_blit;
slouken@1895
    63
    struct private_swaccel *sw_data;
slouken@0
    64
slouken@1895
    65
    /* the version count matches the destination; mismatch indicates
slouken@1895
    66
       an invalid mapping */
slouken@1895
    67
    unsigned int format_version;
slouken@0
    68
} SDL_BlitMap;
slouken@0
    69
slouken@2247
    70
#define SDL_BLIT_ANY                0x00000000
slouken@2247
    71
#define SDL_BLIT_MMX                0x00000001
slouken@2247
    72
#define SDL_BLIT_SSE                0x00000002
slouken@2247
    73
#define SDL_BLIT_ALTIVEC_PREFETCH   0x00000004
slouken@2247
    74
#define SDL_BLIT_ALTIVEC_NOPREFETCH 0x00000008
slouken@2247
    75
slouken@2247
    76
typedef struct SDL_BlitEntry
slouken@2247
    77
{
slouken@2247
    78
    Uint32 features;
slouken@2247
    79
    SDL_loblit blit;
slouken@2247
    80
} SDL_BlitEntry;
slouken@0
    81
slouken@0
    82
/* Functions found in SDL_blit.c */
slouken@1895
    83
extern int SDL_CalculateBlit(SDL_Surface * surface);
slouken@0
    84
slouken@0
    85
/* Functions found in SDL_blit_{0,1,N,A}.c */
slouken@1895
    86
extern SDL_loblit SDL_CalculateBlit0(SDL_Surface * surface, int complex);
slouken@1895
    87
extern SDL_loblit SDL_CalculateBlit1(SDL_Surface * surface, int complex);
slouken@1895
    88
extern SDL_loblit SDL_CalculateBlitN(SDL_Surface * surface, int complex);
slouken@1895
    89
extern SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface * surface, int complex);
slouken@0
    90
slouken@0
    91
/*
slouken@0
    92
 * Useful macros for blitting routines
slouken@0
    93
 */
slouken@0
    94
slouken@0
    95
#define FORMAT_EQUAL(A, B)						\
slouken@0
    96
    ((A)->BitsPerPixel == (B)->BitsPerPixel				\
slouken@0
    97
     && ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))
slouken@0
    98
slouken@0
    99
/* Load pixel of the specified format from a buffer and get its R-G-B values */
slouken@0
   100
/* FIXME: rescale values to 0..255 here? */
icculus@1162
   101
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
slouken@0
   102
{									\
icculus@1162
   103
	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
icculus@1162
   104
	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
icculus@1162
   105
	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
slouken@0
   106
}
icculus@1162
   107
#define RGB_FROM_RGB565(Pixel, r, g, b)					\
slouken@0
   108
{									\
icculus@1162
   109
	r = (((Pixel&0xF800)>>11)<<3);		 			\
icculus@1162
   110
	g = (((Pixel&0x07E0)>>5)<<2); 					\
icculus@1162
   111
	b = ((Pixel&0x001F)<<3); 					\
slouken@0
   112
}
icculus@1162
   113
#define RGB_FROM_RGB555(Pixel, r, g, b)					\
slouken@0
   114
{									\
icculus@1162
   115
	r = (((Pixel&0x7C00)>>10)<<3);		 			\
icculus@1162
   116
	g = (((Pixel&0x03E0)>>5)<<3); 					\
icculus@1162
   117
	b = ((Pixel&0x001F)<<3); 					\
slouken@0
   118
}
icculus@1162
   119
#define RGB_FROM_RGB888(Pixel, r, g, b)					\
slouken@0
   120
{									\
icculus@1162
   121
	r = ((Pixel&0xFF0000)>>16);		 			\
icculus@1162
   122
	g = ((Pixel&0xFF00)>>8);		 			\
icculus@1162
   123
	b = (Pixel&0xFF);			 			\
slouken@0
   124
}
icculus@1162
   125
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
slouken@0
   126
do {									   \
slouken@0
   127
	switch (bpp) {							   \
slouken@0
   128
		case 2:							   \
icculus@1162
   129
			Pixel = *((Uint16 *)(buf));			   \
slouken@0
   130
		break;							   \
slouken@0
   131
									   \
slouken@0
   132
		case 3: {						   \
slouken@0
   133
		        Uint8 *B = (Uint8 *)(buf);			   \
slouken@0
   134
			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
icculus@1162
   135
			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
slouken@0
   136
			} else {					   \
icculus@1162
   137
			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
slouken@0
   138
			}						   \
slouken@0
   139
		}							   \
slouken@0
   140
		break;							   \
slouken@0
   141
									   \
slouken@0
   142
		case 4:							   \
icculus@1162
   143
			Pixel = *((Uint32 *)(buf));			   \
slouken@0
   144
		break;							   \
slouken@0
   145
									   \
slouken@0
   146
		default:						   \
icculus@1162
   147
			Pixel = 0; /* appease gcc */			   \
slouken@0
   148
		break;							   \
slouken@0
   149
	}								   \
slouken@0
   150
} while(0)
slouken@0
   151
icculus@1162
   152
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
slouken@0
   153
do {									   \
slouken@0
   154
	switch (bpp) {							   \
slouken@0
   155
		case 2:							   \
icculus@1162
   156
			Pixel = *((Uint16 *)(buf));			   \
slouken@0
   157
		break;							   \
slouken@0
   158
									   \
slouken@0
   159
		case 3: {						   \
slouken@0
   160
		        Uint8 *B = (Uint8 *)buf;			   \
slouken@0
   161
			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
icculus@1162
   162
			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
slouken@0
   163
			} else {					   \
icculus@1162
   164
			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
slouken@0
   165
			}						   \
slouken@0
   166
		}							   \
slouken@0
   167
		break;							   \
slouken@0
   168
									   \
slouken@0
   169
		case 4:							   \
icculus@1162
   170
			Pixel = *((Uint32 *)(buf));			   \
slouken@0
   171
		break;							   \
slouken@0
   172
									   \
slouken@0
   173
	        default:						   \
icculus@1162
   174
		        Pixel = 0;	/* prevent gcc from complaining */ \
slouken@0
   175
		break;							   \
slouken@0
   176
	}								   \
icculus@1162
   177
	RGB_FROM_PIXEL(Pixel, fmt, r, g, b);				   \
slouken@0
   178
} while(0)
slouken@0
   179
slouken@0
   180
/* Assemble R-G-B values into a specified pixel format and store them */
icculus@1162
   181
#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
slouken@0
   182
{									\
icculus@1162
   183
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
slouken@0
   184
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
slouken@0
   185
		((b>>fmt->Bloss)<<fmt->Bshift);				\
slouken@0
   186
}
icculus@1162
   187
#define RGB565_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   188
{									\
icculus@1162
   189
	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
slouken@0
   190
}
icculus@1162
   191
#define RGB555_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   192
{									\
icculus@1162
   193
	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
slouken@0
   194
}
icculus@1162
   195
#define RGB888_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   196
{									\
icculus@1162
   197
	Pixel = (r<<16)|(g<<8)|b;					\
slouken@0
   198
}
slouken@0
   199
#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
slouken@0
   200
{									\
slouken@0
   201
	switch (bpp) {							\
slouken@0
   202
		case 2: {						\
icculus@1162
   203
			Uint16 Pixel;					\
slouken@0
   204
									\
icculus@1162
   205
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   206
			*((Uint16 *)(buf)) = Pixel;			\
slouken@0
   207
		}							\
slouken@0
   208
		break;							\
slouken@0
   209
									\
slouken@0
   210
		case 3: {						\
slouken@0
   211
                        if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   212
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   213
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   214
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   215
			} else {					\
slouken@0
   216
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   217
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   218
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   219
			}						\
slouken@0
   220
		}							\
slouken@0
   221
		break;							\
slouken@0
   222
									\
slouken@0
   223
		case 4: {						\
icculus@1162
   224
			Uint32 Pixel;					\
slouken@0
   225
									\
icculus@1162
   226
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   227
			*((Uint32 *)(buf)) = Pixel;			\
slouken@0
   228
		}							\
slouken@0
   229
		break;							\
slouken@0
   230
	}								\
slouken@0
   231
}
slouken@0
   232
#define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
slouken@0
   233
{									\
slouken@0
   234
	switch (bpp) {							\
slouken@0
   235
		case 2: {						\
slouken@0
   236
			Uint16 *bufp;					\
icculus@1162
   237
			Uint16 Pixel;					\
slouken@0
   238
									\
slouken@0
   239
			bufp = (Uint16 *)buf;				\
icculus@1162
   240
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   241
			*bufp = Pixel | (*bufp & Amask);		\
slouken@0
   242
		}							\
slouken@0
   243
		break;							\
slouken@0
   244
									\
slouken@0
   245
		case 3: {						\
slouken@0
   246
                        if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   247
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   248
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   249
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   250
			} else {					\
slouken@0
   251
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   252
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   253
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   254
			}						\
slouken@0
   255
		}							\
slouken@0
   256
		break;							\
slouken@0
   257
									\
slouken@0
   258
		case 4: {						\
slouken@0
   259
			Uint32 *bufp;					\
icculus@1162
   260
			Uint32 Pixel;					\
slouken@0
   261
									\
slouken@0
   262
			bufp = (Uint32 *)buf;				\
icculus@1162
   263
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   264
			*bufp = Pixel | (*bufp & Amask);		\
slouken@0
   265
		}							\
slouken@0
   266
		break;							\
slouken@0
   267
	}								\
slouken@0
   268
}
slouken@0
   269
slouken@0
   270
/* FIXME: Should we rescale alpha into 0..255 here? */
icculus@1162
   271
#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
slouken@0
   272
{									\
icculus@1162
   273
	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
icculus@1162
   274
	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
icculus@1162
   275
	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
icculus@1162
   276
	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
slouken@0
   277
}
icculus@1162
   278
#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
slouken@0
   279
{						\
icculus@1162
   280
	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
icculus@1162
   281
	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
icculus@1162
   282
	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
icculus@1162
   283
	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
slouken@0
   284
}
icculus@1162
   285
#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
slouken@0
   286
{									\
icculus@1162
   287
	r = (Pixel>>24);						\
icculus@1162
   288
	g = ((Pixel>>16)&0xFF);						\
icculus@1162
   289
	b = ((Pixel>>8)&0xFF);						\
icculus@1162
   290
	a = (Pixel&0xFF);						\
slouken@0
   291
}
icculus@1162
   292
#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
slouken@0
   293
{									\
icculus@1162
   294
	r = ((Pixel>>16)&0xFF);						\
icculus@1162
   295
	g = ((Pixel>>8)&0xFF);						\
icculus@1162
   296
	b = (Pixel&0xFF);						\
icculus@1162
   297
	a = (Pixel>>24);						\
slouken@0
   298
}
icculus@1162
   299
#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
slouken@0
   300
{									\
icculus@1162
   301
	r = (Pixel&0xFF);						\
icculus@1162
   302
	g = ((Pixel>>8)&0xFF);						\
icculus@1162
   303
	b = ((Pixel>>16)&0xFF);						\
icculus@1162
   304
	a = (Pixel>>24);						\
slouken@0
   305
}
icculus@1162
   306
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
slouken@0
   307
do {									   \
slouken@0
   308
	switch (bpp) {							   \
slouken@0
   309
		case 2:							   \
icculus@1162
   310
			Pixel = *((Uint16 *)(buf));			   \
slouken@0
   311
		break;							   \
slouken@0
   312
									   \
slouken@0
   313
		case 3:	{/* FIXME: broken code (no alpha) */		   \
slouken@0
   314
		        Uint8 *b = (Uint8 *)buf;			   \
slouken@0
   315
			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
icculus@1162
   316
			        Pixel = b[0] + (b[1] << 8) + (b[2] << 16); \
slouken@0
   317
			} else {					   \
icculus@1162
   318
			        Pixel = (b[0] << 16) + (b[1] << 8) + b[2]; \
slouken@0
   319
			}						   \
slouken@0
   320
		}							   \
slouken@0
   321
		break;							   \
slouken@0
   322
									   \
slouken@0
   323
		case 4:							   \
icculus@1162
   324
			Pixel = *((Uint32 *)(buf));			   \
slouken@0
   325
		break;							   \
slouken@0
   326
									   \
slouken@0
   327
		default:						   \
icculus@1162
   328
		        Pixel = 0; /* stop gcc complaints */		   \
slouken@0
   329
		break;							   \
slouken@0
   330
	}								   \
icculus@1162
   331
	RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);			   \
icculus@1162
   332
	Pixel &= ~fmt->Amask;						   \
slouken@0
   333
} while(0)
slouken@0
   334
slouken@0
   335
/* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
icculus@1162
   336
#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
slouken@0
   337
{									\
icculus@1162
   338
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
slouken@0
   339
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
slouken@0
   340
		((b>>fmt->Bloss)<<fmt->Bshift)|				\
slouken@535
   341
		((a>>fmt->Aloss)<<fmt->Ashift);				\
slouken@0
   342
}
slouken@0
   343
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
slouken@0
   344
{									\
slouken@0
   345
	switch (bpp) {							\
slouken@0
   346
		case 2: {						\
icculus@1162
   347
			Uint16 Pixel;					\
slouken@0
   348
									\
icculus@1162
   349
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
icculus@1162
   350
			*((Uint16 *)(buf)) = Pixel;			\
slouken@0
   351
		}							\
slouken@0
   352
		break;							\
slouken@0
   353
									\
slouken@0
   354
		case 3: { /* FIXME: broken code (no alpha) */		\
slouken@0
   355
                        if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   356
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   357
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   358
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   359
			} else {					\
slouken@0
   360
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   361
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   362
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   363
			}						\
slouken@0
   364
		}							\
slouken@0
   365
		break;							\
slouken@0
   366
									\
slouken@0
   367
		case 4: {						\
icculus@1162
   368
			Uint32 Pixel;					\
slouken@0
   369
									\
icculus@1162
   370
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
icculus@1162
   371
			*((Uint32 *)(buf)) = Pixel;			\
slouken@0
   372
		}							\
slouken@0
   373
		break;							\
slouken@0
   374
	}								\
slouken@0
   375
}
slouken@0
   376
icculus@1162
   377
/* Blend the RGB values of two Pixels based on a source alpha value */
slouken@0
   378
#define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
slouken@0
   379
do {						\
slouken@0
   380
	dR = (((sR-dR)*(A))>>8)+dR;		\
slouken@0
   381
	dG = (((sG-dG)*(A))>>8)+dG;		\
slouken@0
   382
	dB = (((sB-dB)*(A))>>8)+dB;		\
slouken@0
   383
} while(0)
slouken@0
   384
icculus@1162
   385
/* Blend the RGB values of two Pixels based on a source alpha value */
icculus@1047
   386
#define ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB)	\
icculus@1047
   387
do {						\
icculus@1047
   388
    unsigned tR, tG, tB, tA; \
icculus@1047
   389
    tA = 255 - sA; \
icculus@1047
   390
    tR = 1 + (sR * sA) + (dR * tA); \
icculus@1047
   391
    dR = (tR + (tR >> 8)) >> 8; \
icculus@1047
   392
    tG = 1 + (sG * sA) + (dG * tA); \
icculus@1047
   393
    dG = (tG + (tG >> 8)) >> 8; \
icculus@1047
   394
    tB = 1 + (sB * sA) + (dB * tA); \
icculus@1047
   395
    dB = (tB + (tB >> 8)) >> 8; \
icculus@1047
   396
} while(0)
icculus@1047
   397
icculus@1047
   398
slouken@0
   399
/* This is a very useful loop for optimizing blitters */
slouken@553
   400
#if defined(_MSC_VER) && (_MSC_VER == 1300)
slouken@553
   401
/* There's a bug in the Visual C++ 7 optimizer when compiling this code */
slouken@553
   402
#else
slouken@0
   403
#define USE_DUFFS_LOOP
slouken@553
   404
#endif
slouken@0
   405
#ifdef USE_DUFFS_LOOP
slouken@0
   406
slouken@0
   407
/* 8-times unrolled loop */
slouken@0
   408
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
slouken@0
   409
{ int n = (width+7)/8;							\
slouken@91
   410
	switch (width & 7) {						\
slouken@0
   411
	case 0: do {	pixel_copy_increment;				\
slouken@0
   412
	case 7:		pixel_copy_increment;				\
slouken@0
   413
	case 6:		pixel_copy_increment;				\
slouken@0
   414
	case 5:		pixel_copy_increment;				\
slouken@0
   415
	case 4:		pixel_copy_increment;				\
slouken@0
   416
	case 3:		pixel_copy_increment;				\
slouken@0
   417
	case 2:		pixel_copy_increment;				\
slouken@0
   418
	case 1:		pixel_copy_increment;				\
slouken@0
   419
		} while ( --n > 0 );					\
slouken@0
   420
	}								\
slouken@0
   421
}
slouken@0
   422
slouken@0
   423
/* 4-times unrolled loop */
slouken@0
   424
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
slouken@0
   425
{ int n = (width+3)/4;							\
slouken@91
   426
	switch (width & 3) {						\
slouken@0
   427
	case 0: do {	pixel_copy_increment;				\
slouken@0
   428
	case 3:		pixel_copy_increment;				\
slouken@0
   429
	case 2:		pixel_copy_increment;				\
slouken@0
   430
	case 1:		pixel_copy_increment;				\
slouken@0
   431
		} while ( --n > 0 );					\
slouken@0
   432
	}								\
slouken@0
   433
}
slouken@0
   434
slouken@689
   435
/* 2 - times unrolled loop */
slouken@689
   436
#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
slouken@689
   437
				double_pixel_copy_increment, width)	\
slouken@689
   438
{ int n, w = width;							\
slouken@689
   439
	if( w & 1 ) {							\
slouken@689
   440
	    pixel_copy_increment;					\
slouken@689
   441
	    w--;							\
slouken@689
   442
	}								\
slouken@689
   443
	if ( w > 0 )	{						\
slouken@689
   444
	    n = ( w + 2) / 4;						\
slouken@689
   445
	    switch( w & 2 ) {						\
slouken@689
   446
	    case 0: do {	double_pixel_copy_increment;		\
slouken@689
   447
	    case 2:		double_pixel_copy_increment;		\
slouken@689
   448
		    } while ( --n > 0 );					\
slouken@689
   449
	    }								\
slouken@689
   450
	}								\
slouken@689
   451
}
slouken@689
   452
slouken@689
   453
/* 2 - times unrolled loop 4 pixels */
slouken@689
   454
#define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
slouken@689
   455
				double_pixel_copy_increment,		\
slouken@689
   456
				quatro_pixel_copy_increment, width)	\
slouken@689
   457
{ int n, w = width;								\
slouken@689
   458
        if(w & 1) {							\
slouken@689
   459
	  pixel_copy_increment;						\
slouken@689
   460
	  w--;								\
slouken@689
   461
	}								\
slouken@689
   462
	if(w & 2) {							\
slouken@689
   463
	  double_pixel_copy_increment;					\
slouken@689
   464
	  w -= 2;							\
slouken@689
   465
	}								\
slouken@689
   466
	if ( w > 0 ) {							\
slouken@689
   467
	    n = ( w + 7 ) / 8;						\
slouken@689
   468
	    switch( w & 4 ) {						\
slouken@689
   469
	    case 0: do {	quatro_pixel_copy_increment;		\
slouken@689
   470
	    case 4:		quatro_pixel_copy_increment;		\
slouken@689
   471
		    } while ( --n > 0 );					\
slouken@689
   472
	    }								\
slouken@689
   473
	}								\
slouken@689
   474
}
slouken@689
   475
slouken@0
   476
/* Use the 8-times version of the loop by default */
slouken@0
   477
#define DUFFS_LOOP(pixel_copy_increment, width)				\
slouken@0
   478
	DUFFS_LOOP8(pixel_copy_increment, width)
slouken@0
   479
slouken@0
   480
#else
slouken@0
   481
slouken@0
   482
/* Don't use Duff's device to unroll loops */
slouken@689
   483
#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
slouken@689
   484
			 double_pixel_copy_increment, width)		\
slouken@689
   485
{ int n = width;								\
slouken@689
   486
    if( n & 1 ) {							\
slouken@689
   487
	pixel_copy_increment;						\
slouken@689
   488
	n--;								\
slouken@689
   489
    }									\
slouken@689
   490
    n=n>>1;								\
slouken@689
   491
    for(; n > 0; --n) {   						\
slouken@689
   492
	double_pixel_copy_increment;					\
slouken@689
   493
    }									\
slouken@689
   494
}
slouken@689
   495
slouken@689
   496
/* Don't use Duff's device to unroll loops */
slouken@689
   497
#define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
slouken@689
   498
				double_pixel_copy_increment,		\
slouken@689
   499
				quatro_pixel_copy_increment, width)	\
slouken@689
   500
{ int n = width;								\
slouken@689
   501
        if(n & 1) {							\
slouken@689
   502
	  pixel_copy_increment;						\
slouken@689
   503
	  n--;								\
slouken@689
   504
	}								\
slouken@689
   505
	if(n & 2) {							\
slouken@689
   506
	  double_pixel_copy_increment;					\
slouken@689
   507
	  n -= 2;							\
slouken@689
   508
	}								\
slouken@689
   509
	n=n>>2;								\
slouken@689
   510
	for(; n > 0; --n) {   						\
slouken@689
   511
	  quatro_pixel_copy_increment;					\
slouken@689
   512
        }								\
slouken@689
   513
}
slouken@689
   514
slouken@689
   515
/* Don't use Duff's device to unroll loops */
slouken@0
   516
#define DUFFS_LOOP(pixel_copy_increment, width)				\
slouken@0
   517
{ int n;								\
slouken@0
   518
	for ( n=width; n > 0; --n ) {					\
slouken@0
   519
		pixel_copy_increment;					\
slouken@0
   520
	}								\
slouken@0
   521
}
slouken@0
   522
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
slouken@0
   523
	DUFFS_LOOP(pixel_copy_increment, width)
slouken@0
   524
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
slouken@0
   525
	DUFFS_LOOP(pixel_copy_increment, width)
slouken@0
   526
slouken@0
   527
#endif /* USE_DUFFS_LOOP */
slouken@0
   528
slouken@0
   529
/* Prevent Visual C++ 6.0 from printing out stupid warnings */
slouken@0
   530
#if defined(_MSC_VER) && (_MSC_VER >= 600)
slouken@0
   531
#pragma warning(disable: 4550)
slouken@0
   532
#endif
slouken@0
   533
slouken@0
   534
#endif /* _SDL_blit_h */
slouken@1895
   535
/* vi: set ts=4 sw=4 expandtab: */