src/video/SDL_blit.h
author Sam Lantinga
Sun, 06 Mar 2011 21:12:19 -0800
changeset 5423 b69fa50e80d7
parent 5389 24903690f48a
child 5426 7a3982c50af8
permissions -rw-r--r--
a Nintendo ds update

Frank Zago to SDL

For those interested, here's a snapshot of the current port. I did away with
most of the previous attempt which was based of the sprite engine, because the
support is limited to 128 64x64 sprites. Instead I'm using the gl engine.
The drawback is that either the frame buffer or the gl engine can be used
because there's not that much video memory on a DS.

With minimal changes to their code, it can now run the following tests: ,
testspriteminimal, testscale and testsprite2. The last 2 only run under the
emulator for some reason. The tests are not included in this patch for size
reason.

In 16 bits mode, the 16th bit indicated transparency/opacity. If 0, the color
is not displayed. So I had to patch a few core file to set that bit to 1. See
patch for src/video/SDL_RLEaccel.c and src/video/SDL_blit.h. Is that ok, or is
there a better way ?

The nds also doesn't support windowed mode, so I force the fullscreen in
src/video/SDL_video.c. Is that ok, or is there a better way ?

To get a smaller library, I also tried to not compile the software renderer
when the hardware renderer is compiled in, and define SDL_NO_COMPAT; however
the compilation eventually fails in SDL_surface.c because SDL_SRCCOLORKEY is
defined in SDL_compat.h. Is SDL_NO_COMPAT only for application and not SDL
itself ?
slouken@0
     1
/*
slouken@0
     2
    SDL - Simple DirectMedia Layer
slouken@5262
     3
    Copyright (C) 1997-2011 Sam Lantinga
slouken@0
     4
slouken@0
     5
    This library is free software; you can redistribute it and/or
slouken@1312
     6
    modify it under the terms of the GNU Lesser General Public
slouken@0
     7
    License as published by the Free Software Foundation; either
slouken@1312
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@0
     9
slouken@0
    10
    This library is distributed in the hope that it will be useful,
slouken@0
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@0
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@1312
    13
    Lesser General Public License for more details.
slouken@0
    14
slouken@1312
    15
    You should have received a copy of the GNU Lesser General Public
slouken@1312
    16
    License along with this library; if not, write to the Free Software
slouken@1312
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@0
    18
slouken@0
    19
    Sam Lantinga
slouken@252
    20
    slouken@libsdl.org
slouken@0
    21
*/
slouken@1402
    22
#include "SDL_config.h"
slouken@0
    23
slouken@0
    24
#ifndef _SDL_blit_h
slouken@0
    25
#define _SDL_blit_h
slouken@0
    26
slouken@2250
    27
#include "SDL_cpuinfo.h"
slouken@0
    28
#include "SDL_endian.h"
slouken@5163
    29
#include "SDL_surface.h"
slouken@0
    30
slouken@2262
    31
/* SDL blit copy flags */
slouken@2266
    32
#define SDL_COPY_MODULATE_COLOR     0x00000001
slouken@2266
    33
#define SDL_COPY_MODULATE_ALPHA     0x00000002
slouken@5140
    34
#define SDL_COPY_BLEND              0x00000010
slouken@5140
    35
#define SDL_COPY_ADD                0x00000020
slouken@5184
    36
#define SDL_COPY_MOD                0x00000040
slouken@2266
    37
#define SDL_COPY_COLORKEY           0x00000100
slouken@2266
    38
#define SDL_COPY_NEAREST            0x00000200
slouken@2266
    39
#define SDL_COPY_RLE_DESIRED        0x00001000
slouken@2266
    40
#define SDL_COPY_RLE_COLORKEY       0x00002000
slouken@2266
    41
#define SDL_COPY_RLE_ALPHAKEY       0x00004000
slouken@2853
    42
#define SDL_COPY_RLE_MASK           (SDL_COPY_RLE_DESIRED|SDL_COPY_RLE_COLORKEY|SDL_COPY_RLE_ALPHAKEY)
slouken@2262
    43
slouken@2262
    44
/* SDL blit CPU flags */
slouken@2266
    45
#define SDL_CPU_ANY                 0x00000000
slouken@2266
    46
#define SDL_CPU_MMX                 0x00000001
slouken@5389
    47
#define SDL_CPU_3DNOW               0x00000002
slouken@2266
    48
#define SDL_CPU_SSE                 0x00000004
slouken@2266
    49
#define SDL_CPU_SSE2                0x00000008
slouken@5389
    50
#define SDL_CPU_ALTIVEC_PREFETCH    0x00000010
slouken@5389
    51
#define SDL_CPU_ALTIVEC_NOPREFETCH  0x00000020
slouken@2262
    52
slouken@2267
    53
typedef struct
slouken@2267
    54
{
slouken@2262
    55
    Uint8 *src;
slouken@2262
    56
    int src_w, src_h;
slouken@2262
    57
    int src_pitch;
slouken@2267
    58
    int src_skip;
slouken@2262
    59
    Uint8 *dst;
slouken@2262
    60
    int dst_w, dst_h;
slouken@2262
    61
    int dst_pitch;
slouken@2267
    62
    int dst_skip;
slouken@2262
    63
    SDL_PixelFormat *src_fmt;
slouken@2262
    64
    SDL_PixelFormat *dst_fmt;
slouken@1895
    65
    Uint8 *table;
slouken@2262
    66
    int flags;
slouken@2262
    67
    Uint32 colorkey;
slouken@2262
    68
    Uint8 r, g, b, a;
slouken@0
    69
} SDL_BlitInfo;
slouken@0
    70
slouken@2267
    71
typedef void (SDLCALL * SDL_BlitFunc) (SDL_BlitInfo * info);
slouken@2262
    72
slouken@2267
    73
typedef struct
slouken@2267
    74
{
slouken@2262
    75
    Uint32 src_format;
slouken@2262
    76
    Uint32 dst_format;
slouken@2262
    77
    int flags;
slouken@2262
    78
    int cpu;
slouken@2262
    79
    SDL_BlitFunc func;
slouken@2262
    80
} SDL_BlitFuncEntry;
slouken@0
    81
slouken@0
    82
/* Blit mapping definition */
slouken@1895
    83
typedef struct SDL_BlitMap
slouken@1895
    84
{
slouken@1895
    85
    SDL_Surface *dst;
slouken@1895
    86
    int identity;
slouken@2257
    87
    SDL_blit blit;
slouken@2257
    88
    void *data;
slouken@2262
    89
    SDL_BlitInfo info;
slouken@0
    90
slouken@1895
    91
    /* the version count matches the destination; mismatch indicates
slouken@1895
    92
       an invalid mapping */
slouken@5288
    93
    Uint32 palette_version;
slouken@0
    94
} SDL_BlitMap;
slouken@0
    95
slouken@0
    96
/* Functions found in SDL_blit.c */
slouken@1895
    97
extern int SDL_CalculateBlit(SDL_Surface * surface);
slouken@0
    98
slouken@2267
    99
/* Functions found in SDL_blit_*.c */
slouken@2267
   100
extern SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface * surface);
slouken@2267
   101
extern SDL_BlitFunc SDL_CalculateBlit1(SDL_Surface * surface);
slouken@2267
   102
extern SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface * surface);
slouken@2267
   103
extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface * surface);
slouken@2263
   104
slouken@0
   105
/*
slouken@0
   106
 * Useful macros for blitting routines
slouken@0
   107
 */
slouken@0
   108
slouken@2249
   109
#if defined(__GNUC__)
slouken@2249
   110
#define DECLARE_ALIGNED(t,v,a)  t __attribute__((aligned(a))) v
slouken@2249
   111
#elif defined(_MSC_VER)
slouken@2251
   112
#define DECLARE_ALIGNED(t,v,a)  __declspec(align(a)) t v
slouken@2249
   113
#else
slouken@2249
   114
#define DECLARE_ALIGNED(t,v,a)  t v
slouken@2249
   115
#endif
slouken@2249
   116
slouken@5423
   117
/* The Nintendo surfaces are special. Bit 15 is the transparency
slouken@5423
   118
 * bit. It must be set for the pixel to be displayed. By setting that
slouken@5423
   119
 * value to 0 for other platforms, their compiler should optimize it
slouken@5423
   120
 * out. */
slouken@5423
   121
#ifdef __NDS__
slouken@5423
   122
#define NDS_BIT15 0x8000
slouken@5423
   123
#else
slouken@5423
   124
#define NDS_BIT15 0
slouken@5423
   125
#endif
slouken@5423
   126
slouken@0
   127
/* Load pixel of the specified format from a buffer and get its R-G-B values */
slouken@0
   128
/* FIXME: rescale values to 0..255 here? */
icculus@1162
   129
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
slouken@0
   130
{									\
icculus@1162
   131
	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
icculus@1162
   132
	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
icculus@1162
   133
	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
slouken@0
   134
}
icculus@1162
   135
#define RGB_FROM_RGB565(Pixel, r, g, b)					\
slouken@0
   136
{									\
icculus@1162
   137
	r = (((Pixel&0xF800)>>11)<<3);		 			\
icculus@1162
   138
	g = (((Pixel&0x07E0)>>5)<<2); 					\
icculus@1162
   139
	b = ((Pixel&0x001F)<<3); 					\
slouken@0
   140
}
icculus@1162
   141
#define RGB_FROM_RGB555(Pixel, r, g, b)					\
slouken@0
   142
{									\
icculus@1162
   143
	r = (((Pixel&0x7C00)>>10)<<3);		 			\
icculus@1162
   144
	g = (((Pixel&0x03E0)>>5)<<3); 					\
icculus@1162
   145
	b = ((Pixel&0x001F)<<3); 					\
slouken@0
   146
}
icculus@1162
   147
#define RGB_FROM_RGB888(Pixel, r, g, b)					\
slouken@0
   148
{									\
icculus@1162
   149
	r = ((Pixel&0xFF0000)>>16);		 			\
icculus@1162
   150
	g = ((Pixel&0xFF00)>>8);		 			\
icculus@1162
   151
	b = (Pixel&0xFF);			 			\
slouken@0
   152
}
icculus@1162
   153
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
slouken@0
   154
do {									   \
slouken@0
   155
	switch (bpp) {							   \
slouken@0
   156
		case 2:							   \
icculus@1162
   157
			Pixel = *((Uint16 *)(buf));			   \
slouken@0
   158
		break;							   \
slouken@0
   159
									   \
slouken@0
   160
		case 3: {						   \
slouken@0
   161
		        Uint8 *B = (Uint8 *)(buf);			   \
slouken@2824
   162
			if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
icculus@1162
   163
			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
slouken@0
   164
			} else {					   \
icculus@1162
   165
			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
slouken@0
   166
			}						   \
slouken@0
   167
		}							   \
slouken@0
   168
		break;							   \
slouken@0
   169
									   \
slouken@0
   170
		case 4:							   \
icculus@1162
   171
			Pixel = *((Uint32 *)(buf));			   \
slouken@0
   172
		break;							   \
slouken@0
   173
									   \
slouken@0
   174
		default:						   \
slouken@2824
   175
		        Pixel; /* stop gcc complaints */		   \
slouken@0
   176
		break;							   \
slouken@0
   177
	}								   \
slouken@2824
   178
} while (0)
slouken@0
   179
icculus@1162
   180
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
slouken@0
   181
do {									   \
slouken@0
   182
	switch (bpp) {							   \
slouken@0
   183
		case 2:							   \
icculus@1162
   184
			Pixel = *((Uint16 *)(buf));			   \
slouken@2824
   185
			RGB_FROM_PIXEL(Pixel, fmt, r, g, b);		   \
slouken@0
   186
		break;							   \
slouken@0
   187
									   \
slouken@2824
   188
		case 3:	{						   \
slouken@2824
   189
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
slouken@2824
   190
			        r = *((buf)+fmt->Rshift/8);		   \
slouken@2824
   191
				g = *((buf)+fmt->Gshift/8);		   \
slouken@2824
   192
				b = *((buf)+fmt->Bshift/8);		   \
slouken@0
   193
			} else {					   \
slouken@2824
   194
			        r = *((buf)+2-fmt->Rshift/8);		   \
slouken@2824
   195
				g = *((buf)+2-fmt->Gshift/8);		   \
slouken@2824
   196
				b = *((buf)+2-fmt->Bshift/8);		   \
slouken@0
   197
			}						   \
slouken@0
   198
		}							   \
slouken@0
   199
		break;							   \
slouken@0
   200
									   \
slouken@0
   201
		case 4:							   \
icculus@1162
   202
			Pixel = *((Uint32 *)(buf));			   \
slouken@2824
   203
			RGB_FROM_PIXEL(Pixel, fmt, r, g, b);		   \
slouken@0
   204
		break;							   \
slouken@0
   205
									   \
slouken@2824
   206
		default:						   \
slouken@2824
   207
		        Pixel; /* stop gcc complaints */		   \
slouken@0
   208
		break;							   \
slouken@0
   209
	}								   \
slouken@2824
   210
} while (0)
slouken@0
   211
slouken@0
   212
/* Assemble R-G-B values into a specified pixel format and store them */
icculus@1162
   213
#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
slouken@0
   214
{									\
icculus@1162
   215
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
slouken@0
   216
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
slouken@0
   217
		((b>>fmt->Bloss)<<fmt->Bshift);				\
slouken@0
   218
}
icculus@1162
   219
#define RGB565_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   220
{									\
icculus@1162
   221
	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
slouken@0
   222
}
icculus@1162
   223
#define RGB555_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   224
{									\
icculus@1162
   225
	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
slouken@0
   226
}
icculus@1162
   227
#define RGB888_FROM_RGB(Pixel, r, g, b)					\
slouken@0
   228
{									\
icculus@1162
   229
	Pixel = (r<<16)|(g<<8)|b;					\
slouken@0
   230
}
slouken@2899
   231
#define ARGB8888_FROM_RGBA(Pixel, r, g, b, a)				\
slouken@2899
   232
{									\
slouken@2899
   233
	Pixel = (a<<24)|(r<<16)|(g<<8)|b;				\
slouken@2899
   234
}
slouken@3054
   235
#define RGBA8888_FROM_RGBA(Pixel, r, g, b, a)				\
slouken@3054
   236
{									\
slouken@3054
   237
	Pixel = (r<<24)|(g<<16)|(b<<8)|a;				\
slouken@3054
   238
}
slouken@3054
   239
#define ABGR8888_FROM_RGBA(Pixel, r, g, b, a)				\
slouken@3054
   240
{									\
slouken@3054
   241
	Pixel = (a<<24)|(b<<16)|(g<<8)|r;				\
slouken@3054
   242
}
slouken@3054
   243
#define BGRA8888_FROM_RGBA(Pixel, r, g, b, a)				\
slouken@3054
   244
{									\
slouken@3054
   245
	Pixel = (b<<24)|(g<<16)|(r<<8)|a;				\
slouken@3054
   246
}
slouken@0
   247
#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
slouken@0
   248
{									\
slouken@0
   249
	switch (bpp) {							\
slouken@0
   250
		case 2: {						\
icculus@1162
   251
			Uint16 Pixel;					\
slouken@0
   252
									\
icculus@1162
   253
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
slouken@5423
   254
			*((Uint16 *)(buf)) = Pixel | NDS_BIT15;		\
slouken@0
   255
		}							\
slouken@0
   256
		break;							\
slouken@0
   257
									\
slouken@0
   258
		case 3: {						\
slouken@2824
   259
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   260
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   261
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   262
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   263
			} else {					\
slouken@0
   264
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   265
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   266
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   267
			}						\
slouken@0
   268
		}							\
slouken@0
   269
		break;							\
slouken@0
   270
									\
slouken@0
   271
		case 4: {						\
icculus@1162
   272
			Uint32 Pixel;					\
slouken@0
   273
									\
icculus@1162
   274
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   275
			*((Uint32 *)(buf)) = Pixel;			\
slouken@0
   276
		}							\
slouken@0
   277
		break;							\
slouken@0
   278
	}								\
slouken@0
   279
}
slouken@0
   280
#define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
slouken@0
   281
{									\
slouken@0
   282
	switch (bpp) {							\
slouken@0
   283
		case 2: {						\
slouken@0
   284
			Uint16 *bufp;					\
icculus@1162
   285
			Uint16 Pixel;					\
slouken@0
   286
									\
slouken@0
   287
			bufp = (Uint16 *)buf;				\
icculus@1162
   288
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   289
			*bufp = Pixel | (*bufp & Amask);		\
slouken@0
   290
		}							\
slouken@0
   291
		break;							\
slouken@0
   292
									\
slouken@0
   293
		case 3: {						\
slouken@2824
   294
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   295
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   296
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   297
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   298
			} else {					\
slouken@0
   299
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   300
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   301
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   302
			}						\
slouken@0
   303
		}							\
slouken@0
   304
		break;							\
slouken@0
   305
									\
slouken@0
   306
		case 4: {						\
slouken@0
   307
			Uint32 *bufp;					\
icculus@1162
   308
			Uint32 Pixel;					\
slouken@0
   309
									\
slouken@0
   310
			bufp = (Uint32 *)buf;				\
icculus@1162
   311
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
icculus@1162
   312
			*bufp = Pixel | (*bufp & Amask);		\
slouken@0
   313
		}							\
slouken@0
   314
		break;							\
slouken@0
   315
	}								\
slouken@0
   316
}
slouken@0
   317
slouken@0
   318
/* FIXME: Should we rescale alpha into 0..255 here? */
icculus@1162
   319
#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
slouken@0
   320
{									\
icculus@1162
   321
	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
icculus@1162
   322
	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
icculus@1162
   323
	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
icculus@1162
   324
	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
slouken@0
   325
}
icculus@1162
   326
#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
slouken@0
   327
{						\
icculus@1162
   328
	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
icculus@1162
   329
	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
icculus@1162
   330
	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
icculus@1162
   331
	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
slouken@0
   332
}
icculus@1162
   333
#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
slouken@0
   334
{									\
icculus@1162
   335
	r = (Pixel>>24);						\
icculus@1162
   336
	g = ((Pixel>>16)&0xFF);						\
icculus@1162
   337
	b = ((Pixel>>8)&0xFF);						\
icculus@1162
   338
	a = (Pixel&0xFF);						\
slouken@0
   339
}
icculus@1162
   340
#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
slouken@0
   341
{									\
icculus@1162
   342
	r = ((Pixel>>16)&0xFF);						\
icculus@1162
   343
	g = ((Pixel>>8)&0xFF);						\
icculus@1162
   344
	b = (Pixel&0xFF);						\
icculus@1162
   345
	a = (Pixel>>24);						\
slouken@0
   346
}
icculus@1162
   347
#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
slouken@0
   348
{									\
icculus@1162
   349
	r = (Pixel&0xFF);						\
icculus@1162
   350
	g = ((Pixel>>8)&0xFF);						\
icculus@1162
   351
	b = ((Pixel>>16)&0xFF);						\
icculus@1162
   352
	a = (Pixel>>24);						\
slouken@0
   353
}
slouken@3054
   354
#define RGBA_FROM_BGRA8888(Pixel, r, g, b, a)				\
slouken@3054
   355
{									\
slouken@3054
   356
	r = ((Pixel>>8)&0xFF);						\
slouken@3054
   357
	g = ((Pixel>>16)&0xFF);						\
slouken@3054
   358
	b = (Pixel>>24);						\
slouken@3054
   359
	a = (Pixel&0xFF);						\
slouken@3054
   360
}
icculus@1162
   361
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
slouken@0
   362
do {									   \
slouken@0
   363
	switch (bpp) {							   \
slouken@0
   364
		case 2:							   \
icculus@1162
   365
			Pixel = *((Uint16 *)(buf));			   \
slouken@2824
   366
			RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);	   \
slouken@0
   367
		break;							   \
slouken@0
   368
									   \
slouken@2824
   369
		case 3:	{						   \
slouken@2824
   370
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
slouken@2824
   371
			        r = *((buf)+fmt->Rshift/8);		   \
slouken@2824
   372
				g = *((buf)+fmt->Gshift/8);		   \
slouken@2824
   373
				b = *((buf)+fmt->Bshift/8);		   \
slouken@0
   374
			} else {					   \
slouken@2824
   375
			        r = *((buf)+2-fmt->Rshift/8);		   \
slouken@2824
   376
				g = *((buf)+2-fmt->Gshift/8);		   \
slouken@2824
   377
				b = *((buf)+2-fmt->Bshift/8);		   \
slouken@0
   378
			}						   \
slouken@2824
   379
			a = 0xFF;					   \
slouken@0
   380
		}							   \
slouken@0
   381
		break;							   \
slouken@0
   382
									   \
slouken@0
   383
		case 4:							   \
icculus@1162
   384
			Pixel = *((Uint32 *)(buf));			   \
slouken@2824
   385
			RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);	   \
slouken@0
   386
		break;							   \
slouken@0
   387
									   \
slouken@0
   388
		default:						   \
slouken@2824
   389
		        Pixel; /* stop gcc complaints */		   \
slouken@0
   390
		break;							   \
slouken@0
   391
	}								   \
slouken@2824
   392
} while (0)
slouken@0
   393
slouken@0
   394
/* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
icculus@1162
   395
#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
slouken@0
   396
{									\
icculus@1162
   397
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
slouken@0
   398
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
slouken@0
   399
		((b>>fmt->Bloss)<<fmt->Bshift)|				\
slouken@535
   400
		((a>>fmt->Aloss)<<fmt->Ashift);				\
slouken@0
   401
}
slouken@0
   402
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
slouken@0
   403
{									\
slouken@0
   404
	switch (bpp) {							\
slouken@0
   405
		case 2: {						\
icculus@1162
   406
			Uint16 Pixel;					\
slouken@0
   407
									\
icculus@1162
   408
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
slouken@5423
   409
			*((Uint16 *)(buf)) = Pixel | NDS_BIT15;		\
slouken@0
   410
		}							\
slouken@0
   411
		break;							\
slouken@0
   412
									\
slouken@2824
   413
		case 3: {						\
slouken@2824
   414
                        if (SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
slouken@0
   415
			        *((buf)+fmt->Rshift/8) = r;		\
slouken@0
   416
				*((buf)+fmt->Gshift/8) = g;		\
slouken@0
   417
				*((buf)+fmt->Bshift/8) = b;		\
slouken@0
   418
			} else {					\
slouken@0
   419
			        *((buf)+2-fmt->Rshift/8) = r;		\
slouken@0
   420
				*((buf)+2-fmt->Gshift/8) = g;		\
slouken@0
   421
				*((buf)+2-fmt->Bshift/8) = b;		\
slouken@0
   422
			}						\
slouken@0
   423
		}							\
slouken@0
   424
		break;							\
slouken@0
   425
									\
slouken@0
   426
		case 4: {						\
icculus@1162
   427
			Uint32 Pixel;					\
slouken@0
   428
									\
icculus@1162
   429
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
icculus@1162
   430
			*((Uint32 *)(buf)) = Pixel;			\
slouken@0
   431
		}							\
slouken@0
   432
		break;							\
slouken@0
   433
	}								\
slouken@0
   434
}
slouken@0
   435
icculus@1162
   436
/* Blend the RGB values of two Pixels based on a source alpha value */
slouken@0
   437
#define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
slouken@0
   438
do {						\
icculus@3631
   439
	dR = ((((int)(sR-dR)*(int)A)/255)+dR);	\
icculus@3631
   440
	dG = ((((int)(sG-dG)*(int)A)/255)+dG);	\
icculus@3631
   441
	dB = ((((int)(sB-dB)*(int)A)/255)+dB);	\
icculus@1047
   442
} while(0)
icculus@1047
   443
icculus@1047
   444
slouken@0
   445
/* This is a very useful loop for optimizing blitters */
slouken@553
   446
#if defined(_MSC_VER) && (_MSC_VER == 1300)
slouken@553
   447
/* There's a bug in the Visual C++ 7 optimizer when compiling this code */
slouken@553
   448
#else
slouken@0
   449
#define USE_DUFFS_LOOP
slouken@553
   450
#endif
slouken@0
   451
#ifdef USE_DUFFS_LOOP
slouken@0
   452
slouken@0
   453
/* 8-times unrolled loop */
slouken@0
   454
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
slouken@0
   455
{ int n = (width+7)/8;							\
slouken@91
   456
	switch (width & 7) {						\
slouken@0
   457
	case 0: do {	pixel_copy_increment;				\
slouken@0
   458
	case 7:		pixel_copy_increment;				\
slouken@0
   459
	case 6:		pixel_copy_increment;				\
slouken@0
   460
	case 5:		pixel_copy_increment;				\
slouken@0
   461
	case 4:		pixel_copy_increment;				\
slouken@0
   462
	case 3:		pixel_copy_increment;				\
slouken@0
   463
	case 2:		pixel_copy_increment;				\
slouken@0
   464
	case 1:		pixel_copy_increment;				\
slouken@0
   465
		} while ( --n > 0 );					\
slouken@0
   466
	}								\
slouken@0
   467
}
slouken@0
   468
slouken@0
   469
/* 4-times unrolled loop */
slouken@0
   470
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
slouken@0
   471
{ int n = (width+3)/4;							\
slouken@91
   472
	switch (width & 3) {						\
slouken@0
   473
	case 0: do {	pixel_copy_increment;				\
slouken@0
   474
	case 3:		pixel_copy_increment;				\
slouken@0
   475
	case 2:		pixel_copy_increment;				\
slouken@0
   476
	case 1:		pixel_copy_increment;				\
slouken@3035
   477
		} while (--n > 0);					\
slouken@689
   478
	}								\
slouken@689
   479
}
slouken@689
   480
slouken@0
   481
/* Use the 8-times version of the loop by default */
slouken@0
   482
#define DUFFS_LOOP(pixel_copy_increment, width)				\
slouken@0
   483
	DUFFS_LOOP8(pixel_copy_increment, width)
slouken@0
   484
slouken@3035
   485
/* Special version of Duff's device for even more optimization */
slouken@3035
   486
#define DUFFS_LOOP_124(pixel_copy_increment1,				\
slouken@3035
   487
                       pixel_copy_increment2,				\
slouken@3035
   488
                       pixel_copy_increment4, width)			\
slouken@3035
   489
{ int n = width;							\
slouken@3035
   490
	if (n & 1) {							\
slouken@3035
   491
		pixel_copy_increment1; n -= 1;				\
slouken@3035
   492
	}								\
slouken@3035
   493
	if (n & 2) {							\
slouken@3035
   494
		pixel_copy_increment2; n -= 2;				\
slouken@3035
   495
	}								\
slouken@3035
   496
	if (n) {							\
slouken@3035
   497
		n = (n+7)/ 8;						\
slouken@3035
   498
		switch (n & 4) {					\
slouken@3035
   499
		case 0: do {	pixel_copy_increment4;			\
slouken@3035
   500
		case 4:		pixel_copy_increment4;			\
slouken@3035
   501
			} while (--n > 0);				\
slouken@3035
   502
		}							\
slouken@3035
   503
	}								\
slouken@689
   504
}
slouken@689
   505
slouken@3035
   506
#else
slouken@689
   507
slouken@689
   508
/* Don't use Duff's device to unroll loops */
slouken@0
   509
#define DUFFS_LOOP(pixel_copy_increment, width)				\
slouken@0
   510
{ int n;								\
slouken@0
   511
	for ( n=width; n > 0; --n ) {					\
slouken@0
   512
		pixel_copy_increment;					\
slouken@0
   513
	}								\
slouken@0
   514
}
slouken@0
   515
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
slouken@0
   516
	DUFFS_LOOP(pixel_copy_increment, width)
slouken@0
   517
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
slouken@0
   518
	DUFFS_LOOP(pixel_copy_increment, width)
slouken@3035
   519
#define DUFFS_LOOP_124(pixel_copy_increment1,				\
slouken@3035
   520
                       pixel_copy_increment2,				\
slouken@3035
   521
                       pixel_copy_increment4, width)			\
slouken@3035
   522
	DUFFS_LOOP(pixel_copy_increment1, width)
slouken@0
   523
slouken@0
   524
#endif /* USE_DUFFS_LOOP */
slouken@0
   525
slouken@0
   526
/* Prevent Visual C++ 6.0 from printing out stupid warnings */
slouken@0
   527
#if defined(_MSC_VER) && (_MSC_VER >= 600)
slouken@0
   528
#pragma warning(disable: 4550)
slouken@0
   529
#endif
slouken@0
   530
slouken@0
   531
#endif /* _SDL_blit_h */
slouken@2898
   532
slouken@1895
   533
/* vi: set ts=4 sw=4 expandtab: */