src/video/SDL_blit.h
author Sam Lantinga <slouken@libsdl.org>
Sun, 07 Dec 2008 22:37:40 +0000
changeset 2853 6258fa7cd300
parent 2824 4dba7aa7ea77
child 2859 99210400e8b9
permissions -rw-r--r--
Fixed picking blit function when RLE fails
slouken@0
     1
/*
slouken@0
     2
    SDL - Simple DirectMedia Layer
slouken@1312
     3
    Copyright (C) 1997-2006 Sam Lantinga
slouken@0
     4
slouken@0
     5
    This library is free software; you can redistribute it and/or
slouken@1312
     6
    modify it under the terms of the GNU Lesser General Public
slouken@0
     7
    License as published by the Free Software Foundation; either
slouken@1312
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@0
     9
slouken@0
    10
    This library is distributed in the hope that it will be useful,
slouken@0
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@0
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@1312
    13
    Lesser General Public License for more details.
slouken@0
    14
slouken@1312
    15
    You should have received a copy of the GNU Lesser General Public
slouken@1312
    16
    License along with this library; if not, write to the Free Software
slouken@1312
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@0
    18
slouken@0
    19
    Sam Lantinga
slouken@252
    20
    slouken@libsdl.org
slouken@0
    21
*/
slouken@1402
    22
#include "SDL_config.h"
slouken@0
    23
slouken@0
    24
#ifndef _SDL_blit_h
slouken@0
    25
#define _SDL_blit_h
slouken@0
    26
slouken@2249
    27
#ifdef __MMX__
slouken@2249
    28
#include <mmintrin.h>
slouken@2249
    29
#endif
slouken@2261
    30
#ifdef __3dNOW__
slouken@2261
    31
#include <mm3dnow.h>
slouken@2261
    32
#endif
slouken@2249
    33
#ifdef __SSE__
slouken@2249
    34
#include <xmmintrin.h>
slouken@2249
    35
#endif
slouken@2262
    36
#ifdef __SSE2__
slouken@2262
    37
#include <emmintrin.h>
slouken@2262
    38
#endif
slouken@2249
    39
slouken@2250
    40
#include "SDL_cpuinfo.h"
slouken@0
    41
#include "SDL_endian.h"
slouken@0
    42
slouken@2262
    43
/* SDL blit copy flags */
slouken@2266
    44
#define SDL_COPY_MODULATE_COLOR     0x00000001
slouken@2266
    45
#define SDL_COPY_MODULATE_ALPHA     0x00000002
slouken@2266
    46
#define SDL_COPY_MASK               0x00000010
slouken@2266
    47
#define SDL_COPY_BLEND              0x00000020
slouken@2266
    48
#define SDL_COPY_ADD                0x00000040
slouken@2266
    49
#define SDL_COPY_MOD                0x00000080
slouken@2266
    50
#define SDL_COPY_COLORKEY           0x00000100
slouken@2266
    51
#define SDL_COPY_NEAREST            0x00000200
slouken@2266
    52
#define SDL_COPY_RLE_DESIRED        0x00001000
slouken@2266
    53
#define SDL_COPY_RLE_COLORKEY       0x00002000
slouken@2266
    54
#define SDL_COPY_RLE_ALPHAKEY       0x00004000
slouken@2853
    55
#define SDL_COPY_RLE_MASK           (SDL_COPY_RLE_DESIRED|SDL_COPY_RLE_COLORKEY|SDL_COPY_RLE_ALPHAKEY)
slouken@2262
    56
slouken@2262
    57
/* SDL blit CPU flags */
slouken@2266
    58
#define SDL_CPU_ANY                 0x00000000
slouken@2266
    59
#define SDL_CPU_MMX                 0x00000001
slouken@2266
    60
#define SDL_CPU_3DNOW               0x00000002
slouken@2266
    61
#define SDL_CPU_SSE                 0x00000004
slouken@2266
    62
#define SDL_CPU_SSE2                0x00000008
slouken@2266
    63
#define SDL_CPU_ALTIVEC_PREFETCH    0x00000010
slouken@2266
    64
#define SDL_CPU_ALTIVEC_NOPREFETCH  0x00000020
slouken@2262
    65
slouken@2267
    66
typedef struct
slouken@2267
    67
{
slouken@2262
    68
    Uint8 *src;
slouken@2262
    69
    int src_w, src_h;
slouken@2262
    70
    int src_pitch;
slouken@2267
    71
    int src_skip;
slouken@2262
    72
    Uint8 *dst;
slouken@2262
    73
    int dst_w, dst_h;
slouken@2262
    74
    int dst_pitch;
slouken@2267
    75
    int dst_skip;
slouken@2262
    76
    SDL_PixelFormat *src_fmt;
slouken@2262
    77
    SDL_PixelFormat *dst_fmt;
slouken@1895
    78
    Uint8 *table;
slouken@2262
    79
    int flags;
slouken@2262
    80
    Uint32 colorkey;
slouken@2262
    81
    Uint8 r, g, b, a;
slouken@0
    82
} SDL_BlitInfo;
slouken@0
    83
slouken@2267
    84
typedef void (SDLCALL * SDL_BlitFunc) (SDL_BlitInfo * info);
slouken@2262
    85
slouken@2267
    86
typedef struct
slouken@2267
    87
{
slouken@2262
    88
    Uint32 src_format;
slouken@2262
    89
    Uint32 dst_format;
slouken@2262
    90
    int flags;
slouken@2262
    91
    int cpu;
slouken@2262
    92
    SDL_BlitFunc func;
slouken@2262
    93
} SDL_BlitFuncEntry;
slouken@0
    94
slouken@0
    95
/* Blit mapping definition */
slouken@1895
    96
typedef struct SDL_BlitMap
slouken@1895
    97
{
slouken@1895
    98
    SDL_Surface *dst;
slouken@1895
    99
    int identity;
slouken@2257
   100
    SDL_blit blit;
slouken@2257
   101
    void *data;
slouken@2262
   102
    SDL_BlitInfo info;
slouken@0
   103
slouken@1895
   104
    /* the version count matches the destination; mismatch indicates
slouken@1895
   105
       an invalid mapping */
slouken@1895
   106
    unsigned int format_version;
slouken@0
   107
} SDL_BlitMap;
slouken@0
   108
slouken@0
   109
/* Functions found in SDL_blit.c */
slouken@1895
   110
extern int SDL_CalculateBlit(SDL_Surface * surface);
slouken@0
   111
slouken@2267
   112
/* Functions found in SDL_blit_*.c */
slouken@2267
   113
extern SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface * surface);
slouken@2267
   114
extern SDL_BlitFunc SDL_CalculateBlit1(SDL_Surface * surface);
slouken@2267
   115
extern SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface * surface);
slouken@2267
   116
extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface * surface);
slouken@2263
   117
slouken@0
   118
/*
slouken@0
   119
 * Useful macros for blitting routines
slouken@0
   120
 */
slouken@0
   121
slouken@2249
   122
#if defined(__GNUC__)
slouken@2249
   123
#define DECLARE_ALIGNED(t,v,a)  t __attribute__((aligned(a))) v
slouken@2249
   124
#elif defined(_MSC_VER)
slouken@2251
   125
#define DECLARE_ALIGNED(t,v,a)  __declspec(align(a)) t v
slouken@2249
   126
#else
slouken@2249
   127
#define DECLARE_ALIGNED(t,v,a)  t v
slouken@2249
   128
#endif
slouken@2249
   129
slouken@0
   130
#define FORMAT_EQUAL(A, B)						\
slouken@0
   131
    ((A)->BitsPerPixel == (B)->BitsPerPixel				\
slouken@0
   132
     && ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))
slouken@0
   133
slouken@0
   134
/* Load pixel of the specified format from a buffer and get its R-G-B values */
slouken@0
   135
/* FIXME: rescale values to 0..255 here? */
icculus@1162
   136
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
slouken@0
   137
{									\
icculus@1162
   138
	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
icculus@1162
   139
	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
icculus@1162
   140
	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
slouken@0
   141
}
icculus@1162
   142
#define RGB_FROM_RGB565(Pixel, r, g, b)					\
slouken@0
   143
{									\
icculus@1162
   144
	r = (((Pixel&0xF800)>>11)<<3);		 			\
icculus@1162
   145
	g = (((Pixel&0x07E0)>>5)<<2); 					\
icculus@1162
   146
	b = ((Pixel&0x001F)<<3); 					\
slouken@0
   147
}
icculus@1162
   148
#define RGB_FROM_RGB555(Pixel, r, g, b)					\
slouken@0
   149
{									\
icculus@1162
   150
	r = (((Pixel&0x7C00)>>10)<<3);		 			\
icculus@1162
   151
	g = (((Pixel&0x03E0)>>5)<<3); 					\
icculus@1162
   152
	b = ((Pixel&0x001F)<<3); 					\
slouken@0
   153
}
icculus@1162
   154
#define RGB_FROM_RGB888(Pixel, r, g, b)					\
slouken@0
   155
{									\
icculus@1162
   156
	r = ((Pixel&0xFF0000)>>16);		 			\
icculus@1162
   157
	g = ((Pixel&0xFF00)>>8);		 			\
icculus@1162
   158
	b = (Pixel&0xFF);			 			\
slouken@0
   159
}
icculus@1162
   160
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
slouken@0
   161
do {									   \
slouken@0
   162
	switch (bpp) {							   \
slouken@0
   163
		case 2:							   \
icculus@1162
   164
			Pixel = *((Uint16 *)(buf));			   \
slouken@0
   165
		break;							   \
slouken@0
   166
									   \
slouken@0
   167
		case 3: {						   \
slouken@0
   168
		        Uint8 *B = (Uint8 *)(buf);			   \
slouken@2824
   169
			if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
icculus@1162
   170
			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
slouken@0
   171
			} else {					   \
icculus@1162
   172
			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
slouken@0
   173
			}						   \
slouken@0
   174
		}							   \
slouken@0
   175
		break;							   \
slouken@0
   176
									   \
slouken@0
   177
		case 4:							   \
icculus@1162
   178
			Pixel = *((Uint32 *)(buf));			   \
slouken@0
   179
		break;							   \
slouken@0
   180
									   \
slouken@0
   181
		default:						   \
slouken@2824
   182
		        Pixel; /* stop gcc complaints */		   \
slouken@0
   183
		break;							   \
slouken@0
   184
	}								   \
slouken@2824
   185
} while (0)
slouken@0
   186
icculus@1162
   187
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
slouken@0
   188
do {									   \
slouken@0
   189
	switch (bpp) {							   \
slouken@0
   190
		case 2:							   \
icculus@1162
   191
			Pixel = *((Uint16 *)(buf));			   \
slouken@2824
   192
			RGB_FROM_PIXEL(Pixel, fmt, r, g, b);		   \
slouken@0
   193
		break;							   \
slouken@0
   194
									   \
slouken@2824
   195
		case 3:	{						   \
slouken@2824
   196
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
slouken@2824
   197
			        r = *((buf)+fmt->Rshift/8);		   \
slouken@2824
   198
				g = *((buf)+fmt->Gshift/8);		   \
slouken@2824
   199
				b = *((buf)+fmt->Bshift/8);		   \
slouken@0
   200
			} else {					   \
slouken@2824
   201
			        r = *((buf)+2-fmt->Rshift/8);		   \
slouken@2824
   202
				g = *((buf)+2-fmt->Gshift/8);		   \
slouken@2824
   203
				b = *((buf)+2-fmt->Bshift/8);		   \
slouken@0
   204
			}						   \
slouken@0
   205
		}							   \
slouken@0
   206
		break;							   \
slouken@0
   207
									   \
slouken@0
   208
		case 4:							   \
icculus@1162
   209
			Pixel = *((Uint32 *)(buf));			   \
slouken@2824
   210
			RGB_FROM_PIXEL(Pixel, fmt, r, g, b);		   \
slouken@0
   211
		break;							   \
slouken@0
   212
									   \
slouken@2824
   213
		default:						   \
slouken@2824
   214
		        Pixel; /* stop gcc complaints */		   \
slouken@0
   215
		break;							   \
slouken@0
   216
	}								   \
slouken@2824
   217
} while (0)
slouken@0
   218
slouken@0
   219
/* Assemble R-G-B values into a specified pixel format and store them */
icculus@1162
   220
#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
slouken@0
   221
{									\
icculus@1162
   222
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
slouken@0
   223
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
slouken@0
   224
		((b>>fmt->Bloss)<<fmt->Bshift);				\
slouken@0
   225
}
icculus@1162
   226
#define RGB565_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   227
{									\
icculus@1162
   228
	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
slouken@0
   229
}
icculus@1162
   230
#define RGB555_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   231
{									\
icculus@1162
   232
	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
slouken@0
   233
}
icculus@1162
   234
#define RGB888_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   235
{									\
icculus@1162
   236
	Pixel = (r<<16)|(g<<8)|b;					\
slouken@0
   237
}
slouken@0
   238
#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
slouken@0
   239
{									\
slouken@0
   240
	switch (bpp) {							\
slouken@0
   241
		case 2: {						\
icculus@1162
   242
			Uint16 Pixel;					\
slouken@0
   243
									\
icculus@1162
   244
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   245
			*((Uint16 *)(buf)) = Pixel;			\
slouken@0
   246
		}							\
slouken@0
   247
		break;							\
slouken@0
   248
									\
slouken@0
   249
		case 3: {						\
slouken@2824
   250
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   251
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   252
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   253
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   254
			} else {					\
slouken@0
   255
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   256
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   257
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   258
			}						\
slouken@0
   259
		}							\
slouken@0
   260
		break;							\
slouken@0
   261
									\
slouken@0
   262
		case 4: {						\
icculus@1162
   263
			Uint32 Pixel;					\
slouken@0
   264
									\
icculus@1162
   265
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   266
			*((Uint32 *)(buf)) = Pixel;			\
slouken@0
   267
		}							\
slouken@0
   268
		break;							\
slouken@0
   269
	}								\
slouken@0
   270
}
slouken@0
   271
#define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
slouken@0
   272
{									\
slouken@0
   273
	switch (bpp) {							\
slouken@0
   274
		case 2: {						\
slouken@0
   275
			Uint16 *bufp;					\
icculus@1162
   276
			Uint16 Pixel;					\
slouken@0
   277
									\
slouken@0
   278
			bufp = (Uint16 *)buf;				\
icculus@1162
   279
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   280
			*bufp = Pixel | (*bufp & Amask);		\
slouken@0
   281
		}							\
slouken@0
   282
		break;							\
slouken@0
   283
									\
slouken@0
   284
		case 3: {						\
slouken@2824
   285
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   286
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   287
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   288
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   289
			} else {					\
slouken@0
   290
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   291
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   292
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   293
			}						\
slouken@0
   294
		}							\
slouken@0
   295
		break;							\
slouken@0
   296
									\
slouken@0
   297
		case 4: {						\
slouken@0
   298
			Uint32 *bufp;					\
icculus@1162
   299
			Uint32 Pixel;					\
slouken@0
   300
									\
slouken@0
   301
			bufp = (Uint32 *)buf;				\
icculus@1162
   302
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   303
			*bufp = Pixel | (*bufp & Amask);		\
slouken@0
   304
		}							\
slouken@0
   305
		break;							\
slouken@0
   306
	}								\
slouken@0
   307
}
slouken@0
   308
slouken@0
   309
/* FIXME: Should we rescale alpha into 0..255 here? */
icculus@1162
   310
#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
slouken@0
   311
{									\
icculus@1162
   312
	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
icculus@1162
   313
	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
icculus@1162
   314
	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
icculus@1162
   315
	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
slouken@0
   316
}
icculus@1162
   317
#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
slouken@0
   318
{						\
icculus@1162
   319
	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
icculus@1162
   320
	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
icculus@1162
   321
	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
icculus@1162
   322
	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
slouken@0
   323
}
icculus@1162
   324
#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
slouken@0
   325
{									\
icculus@1162
   326
	r = (Pixel>>24);						\
icculus@1162
   327
	g = ((Pixel>>16)&0xFF);						\
icculus@1162
   328
	b = ((Pixel>>8)&0xFF);						\
icculus@1162
   329
	a = (Pixel&0xFF);						\
slouken@0
   330
}
icculus@1162
   331
#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
slouken@0
   332
{									\
icculus@1162
   333
	r = ((Pixel>>16)&0xFF);						\
icculus@1162
   334
	g = ((Pixel>>8)&0xFF);						\
icculus@1162
   335
	b = (Pixel&0xFF);						\
icculus@1162
   336
	a = (Pixel>>24);						\
slouken@0
   337
}
icculus@1162
   338
#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
slouken@0
   339
{									\
icculus@1162
   340
	r = (Pixel&0xFF);						\
icculus@1162
   341
	g = ((Pixel>>8)&0xFF);						\
icculus@1162
   342
	b = ((Pixel>>16)&0xFF);						\
icculus@1162
   343
	a = (Pixel>>24);						\
slouken@0
   344
}
icculus@1162
   345
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
slouken@0
   346
do {									   \
slouken@0
   347
	switch (bpp) {							   \
slouken@0
   348
		case 2:							   \
icculus@1162
   349
			Pixel = *((Uint16 *)(buf));			   \
slouken@2824
   350
			RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);	   \
slouken@0
   351
		break;							   \
slouken@0
   352
									   \
slouken@2824
   353
		case 3:	{						   \
slouken@2824
   354
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
slouken@2824
   355
			        r = *((buf)+fmt->Rshift/8);		   \
slouken@2824
   356
				g = *((buf)+fmt->Gshift/8);		   \
slouken@2824
   357
				b = *((buf)+fmt->Bshift/8);		   \
slouken@0
   358
			} else {					   \
slouken@2824
   359
			        r = *((buf)+2-fmt->Rshift/8);		   \
slouken@2824
   360
				g = *((buf)+2-fmt->Gshift/8);		   \
slouken@2824
   361
				b = *((buf)+2-fmt->Bshift/8);		   \
slouken@0
   362
			}						   \
slouken@2824
   363
			a = 0xFF;					   \
slouken@0
   364
		}							   \
slouken@0
   365
		break;							   \
slouken@0
   366
									   \
slouken@0
   367
		case 4:							   \
icculus@1162
   368
			Pixel = *((Uint32 *)(buf));			   \
slouken@2824
   369
			RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);	   \
slouken@0
   370
		break;							   \
slouken@0
   371
									   \
slouken@0
   372
		default:						   \
slouken@2824
   373
		        Pixel; /* stop gcc complaints */		   \
slouken@0
   374
		break;							   \
slouken@0
   375
	}								   \
slouken@2824
   376
} while (0)
slouken@0
   377
slouken@0
   378
/* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
icculus@1162
   379
#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
slouken@0
   380
{									\
icculus@1162
   381
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
slouken@0
   382
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
slouken@0
   383
		((b>>fmt->Bloss)<<fmt->Bshift)|				\
slouken@535
   384
		((a>>fmt->Aloss)<<fmt->Ashift);				\
slouken@0
   385
}
slouken@0
   386
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
slouken@0
   387
{									\
slouken@0
   388
	switch (bpp) {							\
slouken@0
   389
		case 2: {						\
icculus@1162
   390
			Uint16 Pixel;					\
slouken@0
   391
									\
icculus@1162
   392
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
icculus@1162
   393
			*((Uint16 *)(buf)) = Pixel;			\
slouken@0
   394
		}							\
slouken@0
   395
		break;							\
slouken@0
   396
									\
slouken@2824
   397
		case 3: {						\
slouken@2824
   398
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   399
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   400
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   401
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   402
			} else {					\
slouken@0
   403
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   404
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   405
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   406
			}						\
slouken@0
   407
		}							\
slouken@0
   408
		break;							\
slouken@0
   409
									\
slouken@0
   410
		case 4: {						\
icculus@1162
   411
			Uint32 Pixel;					\
slouken@0
   412
									\
icculus@1162
   413
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
icculus@1162
   414
			*((Uint32 *)(buf)) = Pixel;			\
slouken@0
   415
		}							\
slouken@0
   416
		break;							\
slouken@0
   417
	}								\
slouken@0
   418
}
slouken@0
   419
icculus@1162
   420
/* Blend the RGB values of two Pixels based on a source alpha value */
slouken@0
   421
#define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
slouken@0
   422
do {						\
slouken@0
   423
	dR = (((sR-dR)*(A))>>8)+dR;		\
slouken@0
   424
	dG = (((sG-dG)*(A))>>8)+dG;		\
slouken@0
   425
	dB = (((sB-dB)*(A))>>8)+dB;		\
slouken@0
   426
} while(0)
slouken@0
   427
icculus@1162
   428
/* Blend the RGB values of two Pixels based on a source alpha value */
icculus@1047
   429
#define ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB)	\
icculus@1047
   430
do {						\
icculus@1047
   431
    unsigned tR, tG, tB, tA; \
icculus@1047
   432
    tA = 255 - sA; \
icculus@1047
   433
    tR = 1 + (sR * sA) + (dR * tA); \
icculus@1047
   434
    dR = (tR + (tR >> 8)) >> 8; \
icculus@1047
   435
    tG = 1 + (sG * sA) + (dG * tA); \
icculus@1047
   436
    dG = (tG + (tG >> 8)) >> 8; \
icculus@1047
   437
    tB = 1 + (sB * sA) + (dB * tA); \
icculus@1047
   438
    dB = (tB + (tB >> 8)) >> 8; \
icculus@1047
   439
} while(0)
icculus@1047
   440
icculus@1047
   441
slouken@0
   442
/* This is a very useful loop for optimizing blitters */
slouken@553
   443
#if defined(_MSC_VER) && (_MSC_VER == 1300)
slouken@553
   444
/* There's a bug in the Visual C++ 7 optimizer when compiling this code */
slouken@553
   445
#else
slouken@0
   446
#define USE_DUFFS_LOOP
slouken@553
   447
#endif
slouken@0
   448
#ifdef USE_DUFFS_LOOP
slouken@0
   449
slouken@0
   450
/* 8-times unrolled loop */
slouken@0
   451
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
slouken@0
   452
{ int n = (width+7)/8;							\
slouken@91
   453
	switch (width & 7) {						\
slouken@0
   454
	case 0: do {	pixel_copy_increment;				\
slouken@0
   455
	case 7:		pixel_copy_increment;				\
slouken@0
   456
	case 6:		pixel_copy_increment;				\
slouken@0
   457
	case 5:		pixel_copy_increment;				\
slouken@0
   458
	case 4:		pixel_copy_increment;				\
slouken@0
   459
	case 3:		pixel_copy_increment;				\
slouken@0
   460
	case 2:		pixel_copy_increment;				\
slouken@0
   461
	case 1:		pixel_copy_increment;				\
slouken@0
   462
		} while ( --n > 0 );					\
slouken@0
   463
	}								\
slouken@0
   464
}
slouken@0
   465
slouken@0
   466
/* 4-times unrolled loop */
slouken@0
   467
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
slouken@0
   468
{ int n = (width+3)/4;							\
slouken@91
   469
	switch (width & 3) {						\
slouken@0
   470
	case 0: do {	pixel_copy_increment;				\
slouken@0
   471
	case 3:		pixel_copy_increment;				\
slouken@0
   472
	case 2:		pixel_copy_increment;				\
slouken@0
   473
	case 1:		pixel_copy_increment;				\
slouken@0
   474
		} while ( --n > 0 );					\
slouken@0
   475
	}								\
slouken@0
   476
}
slouken@0
   477
slouken@689
   478
/* 2 - times unrolled loop */
slouken@689
   479
#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
slouken@689
   480
				double_pixel_copy_increment, width)	\
slouken@689
   481
{ int n, w = width;							\
slouken@689
   482
	if( w & 1 ) {							\
slouken@689
   483
	    pixel_copy_increment;					\
slouken@689
   484
	    w--;							\
slouken@689
   485
	}								\
slouken@689
   486
	if ( w > 0 )	{						\
slouken@689
   487
	    n = ( w + 2) / 4;						\
slouken@689
   488
	    switch( w & 2 ) {						\
slouken@689
   489
	    case 0: do {	double_pixel_copy_increment;		\
slouken@689
   490
	    case 2:		double_pixel_copy_increment;		\
slouken@689
   491
		    } while ( --n > 0 );					\
slouken@689
   492
	    }								\
slouken@689
   493
	}								\
slouken@689
   494
}
slouken@689
   495
slouken@689
   496
/* 2 - times unrolled loop 4 pixels */
slouken@689
   497
#define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
slouken@689
   498
				double_pixel_copy_increment,		\
slouken@689
   499
				quatro_pixel_copy_increment, width)	\
slouken@689
   500
{ int n, w = width;								\
slouken@689
   501
        if(w & 1) {							\
slouken@689
   502
	  pixel_copy_increment;						\
slouken@689
   503
	  w--;								\
slouken@689
   504
	}								\
slouken@689
   505
	if(w & 2) {							\
slouken@689
   506
	  double_pixel_copy_increment;					\
slouken@689
   507
	  w -= 2;							\
slouken@689
   508
	}								\
slouken@689
   509
	if ( w > 0 ) {							\
slouken@689
   510
	    n = ( w + 7 ) / 8;						\
slouken@689
   511
	    switch( w & 4 ) {						\
slouken@689
   512
	    case 0: do {	quatro_pixel_copy_increment;		\
slouken@689
   513
	    case 4:		quatro_pixel_copy_increment;		\
slouken@689
   514
		    } while ( --n > 0 );					\
slouken@689
   515
	    }								\
slouken@689
   516
	}								\
slouken@689
   517
}
slouken@689
   518
slouken@0
   519
/* Use the 8-times version of the loop by default */
slouken@0
   520
#define DUFFS_LOOP(pixel_copy_increment, width)				\
slouken@0
   521
	DUFFS_LOOP8(pixel_copy_increment, width)
slouken@0
   522
slouken@0
   523
#else
slouken@0
   524
slouken@0
   525
/* Don't use Duff's device to unroll loops */
slouken@689
   526
#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
slouken@689
   527
			 double_pixel_copy_increment, width)		\
slouken@689
   528
{ int n = width;								\
slouken@689
   529
    if( n & 1 ) {							\
slouken@689
   530
	pixel_copy_increment;						\
slouken@689
   531
	n--;								\
slouken@689
   532
    }									\
slouken@689
   533
    n=n>>1;								\
slouken@689
   534
    for(; n > 0; --n) {   						\
slouken@689
   535
	double_pixel_copy_increment;					\
slouken@689
   536
    }									\
slouken@689
   537
}
slouken@689
   538
slouken@689
   539
/* Don't use Duff's device to unroll loops */
slouken@689
   540
#define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
slouken@689
   541
				double_pixel_copy_increment,		\
slouken@689
   542
				quatro_pixel_copy_increment, width)	\
slouken@689
   543
{ int n = width;								\
slouken@689
   544
        if(n & 1) {							\
slouken@689
   545
	  pixel_copy_increment;						\
slouken@689
   546
	  n--;								\
slouken@689
   547
	}								\
slouken@689
   548
	if(n & 2) {							\
slouken@689
   549
	  double_pixel_copy_increment;					\
slouken@689
   550
	  n -= 2;							\
slouken@689
   551
	}								\
slouken@689
   552
	n=n>>2;								\
slouken@689
   553
	for(; n > 0; --n) {   						\
slouken@689
   554
	  quatro_pixel_copy_increment;					\
slouken@689
   555
        }								\
slouken@689
   556
}
slouken@689
   557
slouken@689
   558
/* Don't use Duff's device to unroll loops */
slouken@0
   559
#define DUFFS_LOOP(pixel_copy_increment, width)				\
slouken@0
   560
{ int n;								\
slouken@0
   561
	for ( n=width; n > 0; --n ) {					\
slouken@0
   562
		pixel_copy_increment;					\
slouken@0
   563
	}								\
slouken@0
   564
}
slouken@0
   565
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
slouken@0
   566
	DUFFS_LOOP(pixel_copy_increment, width)
slouken@0
   567
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
slouken@0
   568
	DUFFS_LOOP(pixel_copy_increment, width)
slouken@0
   569
slouken@0
   570
#endif /* USE_DUFFS_LOOP */
slouken@0
   571
slouken@0
   572
/* Prevent Visual C++ 6.0 from printing out stupid warnings */
slouken@0
   573
#if defined(_MSC_VER) && (_MSC_VER >= 600)
slouken@0
   574
#pragma warning(disable: 4550)
slouken@0
   575
#endif
slouken@0
   576
slouken@0
   577
#endif /* _SDL_blit_h */
slouken@1895
   578
/* vi: set ts=4 sw=4 expandtab: */