src/video/SDL_blit.h
author Sam Lantinga <slouken@libsdl.org>
Tue, 02 Dec 2008 17:14:04 +0000
changeset 2824 4dba7aa7ea77
parent 2267 c785543d1843
child 2853 6258fa7cd300
permissions -rw-r--r--
Added slow but complete blit fallback
Don't try to RLE encode surfaces that have alpha channel and alpha modulation
Don't turn on blending when converting an RGB surface to RGBA format
Do turn on blending when converting colorkey to alpha channel
slouken@0
     1
/*
slouken@0
     2
    SDL - Simple DirectMedia Layer
slouken@1312
     3
    Copyright (C) 1997-2006 Sam Lantinga
slouken@0
     4
slouken@0
     5
    This library is free software; you can redistribute it and/or
slouken@1312
     6
    modify it under the terms of the GNU Lesser General Public
slouken@0
     7
    License as published by the Free Software Foundation; either
slouken@1312
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@0
     9
slouken@0
    10
    This library is distributed in the hope that it will be useful,
slouken@0
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@0
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@1312
    13
    Lesser General Public License for more details.
slouken@0
    14
slouken@1312
    15
    You should have received a copy of the GNU Lesser General Public
slouken@1312
    16
    License along with this library; if not, write to the Free Software
slouken@1312
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@0
    18
slouken@0
    19
    Sam Lantinga
slouken@252
    20
    slouken@libsdl.org
slouken@0
    21
*/
slouken@1402
    22
#include "SDL_config.h"
slouken@0
    23
slouken@0
    24
#ifndef _SDL_blit_h
slouken@0
    25
#define _SDL_blit_h
slouken@0
    26
slouken@2249
    27
#ifdef __MMX__
slouken@2249
    28
#include <mmintrin.h>
slouken@2249
    29
#endif
slouken@2261
    30
#ifdef __3dNOW__
slouken@2261
    31
#include <mm3dnow.h>
slouken@2261
    32
#endif
slouken@2249
    33
#ifdef __SSE__
slouken@2249
    34
#include <xmmintrin.h>
slouken@2249
    35
#endif
slouken@2262
    36
#ifdef __SSE2__
slouken@2262
    37
#include <emmintrin.h>
slouken@2262
    38
#endif
slouken@2249
    39
slouken@2250
    40
#include "SDL_cpuinfo.h"
slouken@0
    41
#include "SDL_endian.h"
slouken@0
    42
slouken@2262
    43
/* SDL blit copy flags */
slouken@2266
    44
#define SDL_COPY_MODULATE_COLOR     0x00000001
slouken@2266
    45
#define SDL_COPY_MODULATE_ALPHA     0x00000002
slouken@2266
    46
#define SDL_COPY_MASK               0x00000010
slouken@2266
    47
#define SDL_COPY_BLEND              0x00000020
slouken@2266
    48
#define SDL_COPY_ADD                0x00000040
slouken@2266
    49
#define SDL_COPY_MOD                0x00000080
slouken@2266
    50
#define SDL_COPY_COLORKEY           0x00000100
slouken@2266
    51
#define SDL_COPY_NEAREST            0x00000200
slouken@2266
    52
#define SDL_COPY_RLE_DESIRED        0x00001000
slouken@2266
    53
#define SDL_COPY_RLE_COLORKEY       0x00002000
slouken@2266
    54
#define SDL_COPY_RLE_ALPHAKEY       0x00004000
slouken@2262
    55
slouken@2262
    56
/* SDL blit CPU flags */
slouken@2266
    57
#define SDL_CPU_ANY                 0x00000000
slouken@2266
    58
#define SDL_CPU_MMX                 0x00000001
slouken@2266
    59
#define SDL_CPU_3DNOW               0x00000002
slouken@2266
    60
#define SDL_CPU_SSE                 0x00000004
slouken@2266
    61
#define SDL_CPU_SSE2                0x00000008
slouken@2266
    62
#define SDL_CPU_ALTIVEC_PREFETCH    0x00000010
slouken@2266
    63
#define SDL_CPU_ALTIVEC_NOPREFETCH  0x00000020
slouken@2262
    64
slouken@2267
    65
typedef struct
slouken@2267
    66
{
slouken@2262
    67
    Uint8 *src;
slouken@2262
    68
    int src_w, src_h;
slouken@2262
    69
    int src_pitch;
slouken@2267
    70
    int src_skip;
slouken@2262
    71
    Uint8 *dst;
slouken@2262
    72
    int dst_w, dst_h;
slouken@2262
    73
    int dst_pitch;
slouken@2267
    74
    int dst_skip;
slouken@2262
    75
    SDL_PixelFormat *src_fmt;
slouken@2262
    76
    SDL_PixelFormat *dst_fmt;
slouken@1895
    77
    Uint8 *table;
slouken@2262
    78
    int flags;
slouken@2262
    79
    Uint32 colorkey;
slouken@2262
    80
    Uint8 r, g, b, a;
slouken@0
    81
} SDL_BlitInfo;
slouken@0
    82
slouken@2267
    83
typedef void (SDLCALL * SDL_BlitFunc) (SDL_BlitInfo * info);
slouken@2262
    84
slouken@2267
    85
typedef struct
slouken@2267
    86
{
slouken@2262
    87
    Uint32 src_format;
slouken@2262
    88
    Uint32 dst_format;
slouken@2262
    89
    int flags;
slouken@2262
    90
    int cpu;
slouken@2262
    91
    SDL_BlitFunc func;
slouken@2262
    92
} SDL_BlitFuncEntry;
slouken@0
    93
slouken@0
    94
/* Blit mapping definition */
slouken@1895
    95
typedef struct SDL_BlitMap
slouken@1895
    96
{
slouken@1895
    97
    SDL_Surface *dst;
slouken@1895
    98
    int identity;
slouken@2257
    99
    SDL_blit blit;
slouken@2257
   100
    void *data;
slouken@2262
   101
    SDL_BlitInfo info;
slouken@0
   102
slouken@1895
   103
    /* the version count matches the destination; mismatch indicates
slouken@1895
   104
       an invalid mapping */
slouken@1895
   105
    unsigned int format_version;
slouken@0
   106
} SDL_BlitMap;
slouken@0
   107
slouken@0
   108
/* Functions found in SDL_blit.c */
slouken@1895
   109
extern int SDL_CalculateBlit(SDL_Surface * surface);
slouken@0
   110
slouken@2267
   111
/* Functions found in SDL_blit_*.c */
slouken@2267
   112
extern SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface * surface);
slouken@2267
   113
extern SDL_BlitFunc SDL_CalculateBlit1(SDL_Surface * surface);
slouken@2267
   114
extern SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface * surface);
slouken@2267
   115
extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface * surface);
slouken@2263
   116
slouken@0
   117
/*
slouken@0
   118
 * Useful macros for blitting routines
slouken@0
   119
 */
slouken@0
   120
slouken@2249
   121
#if defined(__GNUC__)
slouken@2249
   122
#define DECLARE_ALIGNED(t,v,a)  t __attribute__((aligned(a))) v
slouken@2249
   123
#elif defined(_MSC_VER)
slouken@2251
   124
#define DECLARE_ALIGNED(t,v,a)  __declspec(align(a)) t v
slouken@2249
   125
#else
slouken@2249
   126
#define DECLARE_ALIGNED(t,v,a)  t v
slouken@2249
   127
#endif
slouken@2249
   128
slouken@0
   129
#define FORMAT_EQUAL(A, B)						\
slouken@0
   130
    ((A)->BitsPerPixel == (B)->BitsPerPixel				\
slouken@0
   131
     && ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))
slouken@0
   132
slouken@0
   133
/* Load pixel of the specified format from a buffer and get its R-G-B values */
slouken@0
   134
/* FIXME: rescale values to 0..255 here? */
icculus@1162
   135
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
slouken@0
   136
{									\
icculus@1162
   137
	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
icculus@1162
   138
	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
icculus@1162
   139
	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
slouken@0
   140
}
icculus@1162
   141
#define RGB_FROM_RGB565(Pixel, r, g, b)					\
slouken@0
   142
{									\
icculus@1162
   143
	r = (((Pixel&0xF800)>>11)<<3);		 			\
icculus@1162
   144
	g = (((Pixel&0x07E0)>>5)<<2); 					\
icculus@1162
   145
	b = ((Pixel&0x001F)<<3); 					\
slouken@0
   146
}
icculus@1162
   147
#define RGB_FROM_RGB555(Pixel, r, g, b)					\
slouken@0
   148
{									\
icculus@1162
   149
	r = (((Pixel&0x7C00)>>10)<<3);		 			\
icculus@1162
   150
	g = (((Pixel&0x03E0)>>5)<<3); 					\
icculus@1162
   151
	b = ((Pixel&0x001F)<<3); 					\
slouken@0
   152
}
icculus@1162
   153
#define RGB_FROM_RGB888(Pixel, r, g, b)					\
slouken@0
   154
{									\
icculus@1162
   155
	r = ((Pixel&0xFF0000)>>16);		 			\
icculus@1162
   156
	g = ((Pixel&0xFF00)>>8);		 			\
icculus@1162
   157
	b = (Pixel&0xFF);			 			\
slouken@0
   158
}
icculus@1162
   159
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
slouken@0
   160
do {									   \
slouken@0
   161
	switch (bpp) {							   \
slouken@0
   162
		case 2:							   \
icculus@1162
   163
			Pixel = *((Uint16 *)(buf));			   \
slouken@0
   164
		break;							   \
slouken@0
   165
									   \
slouken@0
   166
		case 3: {						   \
slouken@0
   167
		        Uint8 *B = (Uint8 *)(buf);			   \
slouken@2824
   168
			if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
icculus@1162
   169
			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
slouken@0
   170
			} else {					   \
icculus@1162
   171
			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
slouken@0
   172
			}						   \
slouken@0
   173
		}							   \
slouken@0
   174
		break;							   \
slouken@0
   175
									   \
slouken@0
   176
		case 4:							   \
icculus@1162
   177
			Pixel = *((Uint32 *)(buf));			   \
slouken@0
   178
		break;							   \
slouken@0
   179
									   \
slouken@0
   180
		default:						   \
slouken@2824
   181
		        Pixel; /* stop gcc complaints */		   \
slouken@0
   182
		break;							   \
slouken@0
   183
	}								   \
slouken@2824
   184
} while (0)
slouken@0
   185
icculus@1162
   186
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
slouken@0
   187
do {									   \
slouken@0
   188
	switch (bpp) {							   \
slouken@0
   189
		case 2:							   \
icculus@1162
   190
			Pixel = *((Uint16 *)(buf));			   \
slouken@2824
   191
			RGB_FROM_PIXEL(Pixel, fmt, r, g, b);		   \
slouken@0
   192
		break;							   \
slouken@0
   193
									   \
slouken@2824
   194
		case 3:	{						   \
slouken@2824
   195
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
slouken@2824
   196
			        r = *((buf)+fmt->Rshift/8);		   \
slouken@2824
   197
				g = *((buf)+fmt->Gshift/8);		   \
slouken@2824
   198
				b = *((buf)+fmt->Bshift/8);		   \
slouken@0
   199
			} else {					   \
slouken@2824
   200
			        r = *((buf)+2-fmt->Rshift/8);		   \
slouken@2824
   201
				g = *((buf)+2-fmt->Gshift/8);		   \
slouken@2824
   202
				b = *((buf)+2-fmt->Bshift/8);		   \
slouken@0
   203
			}						   \
slouken@0
   204
		}							   \
slouken@0
   205
		break;							   \
slouken@0
   206
									   \
slouken@0
   207
		case 4:							   \
icculus@1162
   208
			Pixel = *((Uint32 *)(buf));			   \
slouken@2824
   209
			RGB_FROM_PIXEL(Pixel, fmt, r, g, b);		   \
slouken@0
   210
		break;							   \
slouken@0
   211
									   \
slouken@2824
   212
		default:						   \
slouken@2824
   213
		        Pixel; /* stop gcc complaints */		   \
slouken@0
   214
		break;							   \
slouken@0
   215
	}								   \
slouken@2824
   216
} while (0)
slouken@0
   217
slouken@0
   218
/* Assemble R-G-B values into a specified pixel format and store them */
icculus@1162
   219
#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
slouken@0
   220
{									\
icculus@1162
   221
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
slouken@0
   222
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
slouken@0
   223
		((b>>fmt->Bloss)<<fmt->Bshift);				\
slouken@0
   224
}
icculus@1162
   225
#define RGB565_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   226
{									\
icculus@1162
   227
	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
slouken@0
   228
}
icculus@1162
   229
#define RGB555_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   230
{									\
icculus@1162
   231
	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
slouken@0
   232
}
icculus@1162
   233
#define RGB888_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   234
{									\
icculus@1162
   235
	Pixel = (r<<16)|(g<<8)|b;					\
slouken@0
   236
}
slouken@0
   237
#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
slouken@0
   238
{									\
slouken@0
   239
	switch (bpp) {							\
slouken@0
   240
		case 2: {						\
icculus@1162
   241
			Uint16 Pixel;					\
slouken@0
   242
									\
icculus@1162
   243
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   244
			*((Uint16 *)(buf)) = Pixel;			\
slouken@0
   245
		}							\
slouken@0
   246
		break;							\
slouken@0
   247
									\
slouken@0
   248
		case 3: {						\
slouken@2824
   249
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   250
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   251
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   252
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   253
			} else {					\
slouken@0
   254
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   255
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   256
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   257
			}						\
slouken@0
   258
		}							\
slouken@0
   259
		break;							\
slouken@0
   260
									\
slouken@0
   261
		case 4: {						\
icculus@1162
   262
			Uint32 Pixel;					\
slouken@0
   263
									\
icculus@1162
   264
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   265
			*((Uint32 *)(buf)) = Pixel;			\
slouken@0
   266
		}							\
slouken@0
   267
		break;							\
slouken@0
   268
	}								\
slouken@0
   269
}
slouken@0
   270
#define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
slouken@0
   271
{									\
slouken@0
   272
	switch (bpp) {							\
slouken@0
   273
		case 2: {						\
slouken@0
   274
			Uint16 *bufp;					\
icculus@1162
   275
			Uint16 Pixel;					\
slouken@0
   276
									\
slouken@0
   277
			bufp = (Uint16 *)buf;				\
icculus@1162
   278
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   279
			*bufp = Pixel | (*bufp & Amask);		\
slouken@0
   280
		}							\
slouken@0
   281
		break;							\
slouken@0
   282
									\
slouken@0
   283
		case 3: {						\
slouken@2824
   284
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   285
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   286
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   287
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   288
			} else {					\
slouken@0
   289
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   290
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   291
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   292
			}						\
slouken@0
   293
		}							\
slouken@0
   294
		break;							\
slouken@0
   295
									\
slouken@0
   296
		case 4: {						\
slouken@0
   297
			Uint32 *bufp;					\
icculus@1162
   298
			Uint32 Pixel;					\
slouken@0
   299
									\
slouken@0
   300
			bufp = (Uint32 *)buf;				\
icculus@1162
   301
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   302
			*bufp = Pixel | (*bufp & Amask);		\
slouken@0
   303
		}							\
slouken@0
   304
		break;							\
slouken@0
   305
	}								\
slouken@0
   306
}
slouken@0
   307
slouken@0
   308
/* FIXME: Should we rescale alpha into 0..255 here? */
icculus@1162
   309
#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
slouken@0
   310
{									\
icculus@1162
   311
	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
icculus@1162
   312
	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
icculus@1162
   313
	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
icculus@1162
   314
	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
slouken@0
   315
}
icculus@1162
   316
#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
slouken@0
   317
{						\
icculus@1162
   318
	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
icculus@1162
   319
	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
icculus@1162
   320
	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
icculus@1162
   321
	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
slouken@0
   322
}
icculus@1162
   323
#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
slouken@0
   324
{									\
icculus@1162
   325
	r = (Pixel>>24);						\
icculus@1162
   326
	g = ((Pixel>>16)&0xFF);						\
icculus@1162
   327
	b = ((Pixel>>8)&0xFF);						\
icculus@1162
   328
	a = (Pixel&0xFF);						\
slouken@0
   329
}
icculus@1162
   330
#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
slouken@0
   331
{									\
icculus@1162
   332
	r = ((Pixel>>16)&0xFF);						\
icculus@1162
   333
	g = ((Pixel>>8)&0xFF);						\
icculus@1162
   334
	b = (Pixel&0xFF);						\
icculus@1162
   335
	a = (Pixel>>24);						\
slouken@0
   336
}
icculus@1162
   337
#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
slouken@0
   338
{									\
icculus@1162
   339
	r = (Pixel&0xFF);						\
icculus@1162
   340
	g = ((Pixel>>8)&0xFF);						\
icculus@1162
   341
	b = ((Pixel>>16)&0xFF);						\
icculus@1162
   342
	a = (Pixel>>24);						\
slouken@0
   343
}
icculus@1162
   344
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
slouken@0
   345
do {									   \
slouken@0
   346
	switch (bpp) {							   \
slouken@0
   347
		case 2:							   \
icculus@1162
   348
			Pixel = *((Uint16 *)(buf));			   \
slouken@2824
   349
			RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);	   \
slouken@0
   350
		break;							   \
slouken@0
   351
									   \
slouken@2824
   352
		case 3:	{						   \
slouken@2824
   353
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
slouken@2824
   354
			        r = *((buf)+fmt->Rshift/8);		   \
slouken@2824
   355
				g = *((buf)+fmt->Gshift/8);		   \
slouken@2824
   356
				b = *((buf)+fmt->Bshift/8);		   \
slouken@0
   357
			} else {					   \
slouken@2824
   358
			        r = *((buf)+2-fmt->Rshift/8);		   \
slouken@2824
   359
				g = *((buf)+2-fmt->Gshift/8);		   \
slouken@2824
   360
				b = *((buf)+2-fmt->Bshift/8);		   \
slouken@0
   361
			}						   \
slouken@2824
   362
			a = 0xFF;					   \
slouken@0
   363
		}							   \
slouken@0
   364
		break;							   \
slouken@0
   365
									   \
slouken@0
   366
		case 4:							   \
icculus@1162
   367
			Pixel = *((Uint32 *)(buf));			   \
slouken@2824
   368
			RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);	   \
slouken@0
   369
		break;							   \
slouken@0
   370
									   \
slouken@0
   371
		default:						   \
slouken@2824
   372
		        Pixel; /* stop gcc complaints */		   \
slouken@0
   373
		break;							   \
slouken@0
   374
	}								   \
slouken@2824
   375
} while (0)
slouken@0
   376
slouken@0
   377
/* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
icculus@1162
   378
#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
slouken@0
   379
{									\
icculus@1162
   380
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
slouken@0
   381
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
slouken@0
   382
		((b>>fmt->Bloss)<<fmt->Bshift)|				\
slouken@535
   383
		((a>>fmt->Aloss)<<fmt->Ashift);				\
slouken@0
   384
}
slouken@0
   385
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
slouken@0
   386
{									\
slouken@0
   387
	switch (bpp) {							\
slouken@0
   388
		case 2: {						\
icculus@1162
   389
			Uint16 Pixel;					\
slouken@0
   390
									\
icculus@1162
   391
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
icculus@1162
   392
			*((Uint16 *)(buf)) = Pixel;			\
slouken@0
   393
		}							\
slouken@0
   394
		break;							\
slouken@0
   395
									\
slouken@2824
   396
		case 3: {						\
slouken@2824
   397
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   398
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   399
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   400
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   401
			} else {					\
slouken@0
   402
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   403
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   404
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   405
			}						\
slouken@0
   406
		}							\
slouken@0
   407
		break;							\
slouken@0
   408
									\
slouken@0
   409
		case 4: {						\
icculus@1162
   410
			Uint32 Pixel;					\
slouken@0
   411
									\
icculus@1162
   412
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
icculus@1162
   413
			*((Uint32 *)(buf)) = Pixel;			\
slouken@0
   414
		}							\
slouken@0
   415
		break;							\
slouken@0
   416
	}								\
slouken@0
   417
}
slouken@0
   418
icculus@1162
   419
/* Blend the RGB values of two Pixels based on a source alpha value */
slouken@0
   420
#define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
slouken@0
   421
do {						\
slouken@0
   422
	dR = (((sR-dR)*(A))>>8)+dR;		\
slouken@0
   423
	dG = (((sG-dG)*(A))>>8)+dG;		\
slouken@0
   424
	dB = (((sB-dB)*(A))>>8)+dB;		\
slouken@0
   425
} while(0)
slouken@0
   426
icculus@1162
   427
/* Blend the RGB values of two Pixels based on a source alpha value */
icculus@1047
   428
#define ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB)	\
icculus@1047
   429
do {						\
icculus@1047
   430
    unsigned tR, tG, tB, tA; \
icculus@1047
   431
    tA = 255 - sA; \
icculus@1047
   432
    tR = 1 + (sR * sA) + (dR * tA); \
icculus@1047
   433
    dR = (tR + (tR >> 8)) >> 8; \
icculus@1047
   434
    tG = 1 + (sG * sA) + (dG * tA); \
icculus@1047
   435
    dG = (tG + (tG >> 8)) >> 8; \
icculus@1047
   436
    tB = 1 + (sB * sA) + (dB * tA); \
icculus@1047
   437
    dB = (tB + (tB >> 8)) >> 8; \
icculus@1047
   438
} while(0)
icculus@1047
   439
icculus@1047
   440
slouken@0
   441
/* This is a very useful loop for optimizing blitters */
slouken@553
   442
#if defined(_MSC_VER) && (_MSC_VER == 1300)
slouken@553
   443
/* There's a bug in the Visual C++ 7 optimizer when compiling this code */
slouken@553
   444
#else
slouken@0
   445
#define USE_DUFFS_LOOP
slouken@553
   446
#endif
slouken@0
   447
#ifdef USE_DUFFS_LOOP
slouken@0
   448
slouken@0
   449
/* 8-times unrolled loop */
slouken@0
   450
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
slouken@0
   451
{ int n = (width+7)/8;							\
slouken@91
   452
	switch (width & 7) {						\
slouken@0
   453
	case 0: do {	pixel_copy_increment;				\
slouken@0
   454
	case 7:		pixel_copy_increment;				\
slouken@0
   455
	case 6:		pixel_copy_increment;				\
slouken@0
   456
	case 5:		pixel_copy_increment;				\
slouken@0
   457
	case 4:		pixel_copy_increment;				\
slouken@0
   458
	case 3:		pixel_copy_increment;				\
slouken@0
   459
	case 2:		pixel_copy_increment;				\
slouken@0
   460
	case 1:		pixel_copy_increment;				\
slouken@0
   461
		} while ( --n > 0 );					\
slouken@0
   462
	}								\
slouken@0
   463
}
slouken@0
   464
slouken@0
   465
/* 4-times unrolled loop */
slouken@0
   466
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
slouken@0
   467
{ int n = (width+3)/4;							\
slouken@91
   468
	switch (width & 3) {						\
slouken@0
   469
	case 0: do {	pixel_copy_increment;				\
slouken@0
   470
	case 3:		pixel_copy_increment;				\
slouken@0
   471
	case 2:		pixel_copy_increment;				\
slouken@0
   472
	case 1:		pixel_copy_increment;				\
slouken@0
   473
		} while ( --n > 0 );					\
slouken@0
   474
	}								\
slouken@0
   475
}
slouken@0
   476
slouken@689
   477
/* 2 - times unrolled loop */
slouken@689
   478
#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
slouken@689
   479
				double_pixel_copy_increment, width)	\
slouken@689
   480
{ int n, w = width;							\
slouken@689
   481
	if( w & 1 ) {							\
slouken@689
   482
	    pixel_copy_increment;					\
slouken@689
   483
	    w--;							\
slouken@689
   484
	}								\
slouken@689
   485
	if ( w > 0 )	{						\
slouken@689
   486
	    n = ( w + 2) / 4;						\
slouken@689
   487
	    switch( w & 2 ) {						\
slouken@689
   488
	    case 0: do {	double_pixel_copy_increment;		\
slouken@689
   489
	    case 2:		double_pixel_copy_increment;		\
slouken@689
   490
		    } while ( --n > 0 );					\
slouken@689
   491
	    }								\
slouken@689
   492
	}								\
slouken@689
   493
}
slouken@689
   494
slouken@689
   495
/* 2 - times unrolled loop 4 pixels */
slouken@689
   496
#define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
slouken@689
   497
				double_pixel_copy_increment,		\
slouken@689
   498
				quatro_pixel_copy_increment, width)	\
slouken@689
   499
{ int n, w = width;								\
slouken@689
   500
        if(w & 1) {							\
slouken@689
   501
	  pixel_copy_increment;						\
slouken@689
   502
	  w--;								\
slouken@689
   503
	}								\
slouken@689
   504
	if(w & 2) {							\
slouken@689
   505
	  double_pixel_copy_increment;					\
slouken@689
   506
	  w -= 2;							\
slouken@689
   507
	}								\
slouken@689
   508
	if ( w > 0 ) {							\
slouken@689
   509
	    n = ( w + 7 ) / 8;						\
slouken@689
   510
	    switch( w & 4 ) {						\
slouken@689
   511
	    case 0: do {	quatro_pixel_copy_increment;		\
slouken@689
   512
	    case 4:		quatro_pixel_copy_increment;		\
slouken@689
   513
		    } while ( --n > 0 );					\
slouken@689
   514
	    }								\
slouken@689
   515
	}								\
slouken@689
   516
}
slouken@689
   517
slouken@0
   518
/* Use the 8-times version of the loop by default */
slouken@0
   519
#define DUFFS_LOOP(pixel_copy_increment, width)				\
slouken@0
   520
	DUFFS_LOOP8(pixel_copy_increment, width)
slouken@0
   521
slouken@0
   522
#else
slouken@0
   523
slouken@0
   524
/* Don't use Duff's device to unroll loops */
slouken@689
   525
#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
slouken@689
   526
			 double_pixel_copy_increment, width)		\
slouken@689
   527
{ int n = width;								\
slouken@689
   528
    if( n & 1 ) {							\
slouken@689
   529
	pixel_copy_increment;						\
slouken@689
   530
	n--;								\
slouken@689
   531
    }									\
slouken@689
   532
    n=n>>1;								\
slouken@689
   533
    for(; n > 0; --n) {   						\
slouken@689
   534
	double_pixel_copy_increment;					\
slouken@689
   535
    }									\
slouken@689
   536
}
slouken@689
   537
slouken@689
   538
/* Don't use Duff's device to unroll loops */
slouken@689
   539
#define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
slouken@689
   540
				double_pixel_copy_increment,		\
slouken@689
   541
				quatro_pixel_copy_increment, width)	\
slouken@689
   542
{ int n = width;								\
slouken@689
   543
        if(n & 1) {							\
slouken@689
   544
	  pixel_copy_increment;						\
slouken@689
   545
	  n--;								\
slouken@689
   546
	}								\
slouken@689
   547
	if(n & 2) {							\
slouken@689
   548
	  double_pixel_copy_increment;					\
slouken@689
   549
	  n -= 2;							\
slouken@689
   550
	}								\
slouken@689
   551
	n=n>>2;								\
slouken@689
   552
	for(; n > 0; --n) {   						\
slouken@689
   553
	  quatro_pixel_copy_increment;					\
slouken@689
   554
        }								\
slouken@689
   555
}
slouken@689
   556
slouken@689
   557
/* Don't use Duff's device to unroll loops */
slouken@0
   558
#define DUFFS_LOOP(pixel_copy_increment, width)				\
slouken@0
   559
{ int n;								\
slouken@0
   560
	for ( n=width; n > 0; --n ) {					\
slouken@0
   561
		pixel_copy_increment;					\
slouken@0
   562
	}								\
slouken@0
   563
}
slouken@0
   564
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
slouken@0
   565
	DUFFS_LOOP(pixel_copy_increment, width)
slouken@0
   566
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
slouken@0
   567
	DUFFS_LOOP(pixel_copy_increment, width)
slouken@0
   568
slouken@0
   569
#endif /* USE_DUFFS_LOOP */
slouken@0
   570
slouken@0
   571
/* Prevent Visual C++ 6.0 from printing out stupid warnings */
slouken@0
   572
#if defined(_MSC_VER) && (_MSC_VER >= 600)
slouken@0
   573
#pragma warning(disable: 4550)
slouken@0
   574
#endif
slouken@0
   575
slouken@0
   576
#endif /* _SDL_blit_h */
slouken@1895
   577
/* vi: set ts=4 sw=4 expandtab: */