This repository has been archived by the owner on Feb 11, 2021. It is now read-only.
/
SDL_blit.h
549 lines (511 loc) · 15.1 KB
1
2
/*
SDL - Simple DirectMedia Layer
3
Copyright (C) 1997-2010 Sam Lantinga
4
5
This library is free software; you can redistribute it and/or
6
modify it under the terms of the GNU Lesser General Public
7
License as published by the Free Software Foundation; either
8
version 2.1 of the License, or (at your option) any later version.
9
10
11
12
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
Lesser General Public License for more details.
14
15
16
17
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
19
Sam Lantinga
20
slouken@libsdl.org
21
*/
22
#include "SDL_config.h"
23
24
25
26
#ifndef _SDL_blit_h
#define _SDL_blit_h
27
28
29
30
31
32
33
#ifdef __MINGW32__
#include <_mingw.h>
#endif
#if defined(__MINGW32__) && defined(__MINGW64_VERSION_MAJOR)
#include <intrin.h>
#else
34
35
#ifdef __MMX__
#include <mmintrin.h>
36
37
38
#endif
#ifdef __3dNOW__
#include <mm3dnow.h>
39
40
41
42
#endif
#ifdef __SSE__
#include <xmmintrin.h>
#endif
43
44
45
#ifdef __SSE2__
#include <emmintrin.h>
#endif
46
#endif
47
48
#include "SDL_cpuinfo.h"
49
#include "SDL_endian.h"
50
#include "SDL_video.h"
51
52
/* SDL blit copy flags */
53
54
55
56
57
58
59
60
61
62
63
#define SDL_COPY_MODULATE_COLOR 0x00000001
#define SDL_COPY_MODULATE_ALPHA 0x00000002
#define SDL_COPY_MASK 0x00000010
#define SDL_COPY_BLEND 0x00000020
#define SDL_COPY_ADD 0x00000040
#define SDL_COPY_MOD 0x00000080
#define SDL_COPY_COLORKEY 0x00000100
#define SDL_COPY_NEAREST 0x00000200
#define SDL_COPY_RLE_DESIRED 0x00001000
#define SDL_COPY_RLE_COLORKEY 0x00002000
#define SDL_COPY_RLE_ALPHAKEY 0x00004000
64
#define SDL_COPY_RLE_MASK (SDL_COPY_RLE_DESIRED|SDL_COPY_RLE_COLORKEY|SDL_COPY_RLE_ALPHAKEY)
65
66
/* SDL blit CPU flags */
67
68
69
70
71
72
73
#define SDL_CPU_ANY 0x00000000
#define SDL_CPU_MMX 0x00000001
#define SDL_CPU_3DNOW 0x00000002
#define SDL_CPU_SSE 0x00000004
#define SDL_CPU_SSE2 0x00000008
#define SDL_CPU_ALTIVEC_PREFETCH 0x00000010
#define SDL_CPU_ALTIVEC_NOPREFETCH 0x00000020
74
75
76
typedef struct
{
77
78
79
Uint8 *src;
int src_w, src_h;
int src_pitch;
80
int src_skip;
81
82
83
Uint8 *dst;
int dst_w, dst_h;
int dst_pitch;
84
int dst_skip;
85
86
SDL_PixelFormat *src_fmt;
SDL_PixelFormat *dst_fmt;
87
Uint8 *table;
88
89
90
int flags;
Uint32 colorkey;
Uint8 r, g, b, a;
91
92
} SDL_BlitInfo;
93
typedef void (SDLCALL * SDL_BlitFunc) (SDL_BlitInfo * info);
94
95
96
typedef struct
{
97
98
99
100
101
102
Uint32 src_format;
Uint32 dst_format;
int flags;
int cpu;
SDL_BlitFunc func;
} SDL_BlitFuncEntry;
103
104
/* Blit mapping definition */
105
106
107
108
typedef struct SDL_BlitMap
{
SDL_Surface *dst;
int identity;
109
110
SDL_blit blit;
void *data;
111
SDL_BlitInfo info;
112
113
114
115
/* the version count matches the destination; mismatch indicates
an invalid mapping */
unsigned int format_version;
116
117
118
} SDL_BlitMap;
/* Functions found in SDL_blit.c */
119
extern int SDL_CalculateBlit(SDL_Surface * surface);
120
121
122
123
124
125
/* Functions found in SDL_blit_*.c */
extern SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface * surface);
extern SDL_BlitFunc SDL_CalculateBlit1(SDL_Surface * surface);
extern SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface * surface);
extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface * surface);
126
127
128
129
130
/*
* Useful macros for blitting routines
*/
131
132
133
#if defined(__GNUC__)
#define DECLARE_ALIGNED(t,v,a) t __attribute__((aligned(a))) v
#elif defined(_MSC_VER)
134
#define DECLARE_ALIGNED(t,v,a) __declspec(align(a)) t v
135
136
137
138
#else
#define DECLARE_ALIGNED(t,v,a) t v
#endif
139
140
141
142
143
144
#define FORMAT_EQUAL(A, B) \
((A)->BitsPerPixel == (B)->BitsPerPixel \
&& ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))
/* Load pixel of the specified format from a buffer and get its R-G-B values */
/* FIXME: rescale values to 0..255 here? */
145
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b) \
146
{ \
147
148
149
r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); \
g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); \
b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); \
150
}
151
#define RGB_FROM_RGB565(Pixel, r, g, b) \
152
{ \
153
154
155
r = (((Pixel&0xF800)>>11)<<3); \
g = (((Pixel&0x07E0)>>5)<<2); \
b = ((Pixel&0x001F)<<3); \
156
}
157
#define RGB_FROM_RGB555(Pixel, r, g, b) \
158
{ \
159
160
161
r = (((Pixel&0x7C00)>>10)<<3); \
g = (((Pixel&0x03E0)>>5)<<3); \
b = ((Pixel&0x001F)<<3); \
162
}
163
#define RGB_FROM_RGB888(Pixel, r, g, b) \
164
{ \
165
166
167
r = ((Pixel&0xFF0000)>>16); \
g = ((Pixel&0xFF00)>>8); \
b = (Pixel&0xFF); \
168
}
169
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel) \
170
171
172
do { \
switch (bpp) { \
case 2: \
173
Pixel = *((Uint16 *)(buf)); \
174
175
176
177
break; \
\
case 3: { \
Uint8 *B = (Uint8 *)(buf); \
178
if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
179
Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
180
} else { \
181
Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
182
183
184
185
186
} \
} \
break; \
\
case 4: \
187
Pixel = *((Uint32 *)(buf)); \
188
189
190
break; \
\
default: \
191
Pixel; /* stop gcc complaints */ \
192
193
break; \
} \
194
} while (0)
195
196
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b) \
197
198
199
do { \
switch (bpp) { \
case 2: \
200
Pixel = *((Uint16 *)(buf)); \
201
RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \
202
203
break; \
\
204
205
206
207
208
case 3: { \
if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
r = *((buf)+fmt->Rshift/8); \
g = *((buf)+fmt->Gshift/8); \
b = *((buf)+fmt->Bshift/8); \
209
} else { \
210
211
212
r = *((buf)+2-fmt->Rshift/8); \
g = *((buf)+2-fmt->Gshift/8); \
b = *((buf)+2-fmt->Bshift/8); \
213
214
215
216
217
} \
} \
break; \
\
case 4: \
218
Pixel = *((Uint32 *)(buf)); \
219
RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \
220
221
break; \
\
222
223
default: \
Pixel; /* stop gcc complaints */ \
224
225
break; \
} \
226
} while (0)
227
228
/* Assemble R-G-B values into a specified pixel format and store them */
229
#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b) \
230
{ \
231
Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)| \
232
233
234
((g>>fmt->Gloss)<<fmt->Gshift)| \
((b>>fmt->Bloss)<<fmt->Bshift); \
}
235
#define RGB565_FROM_RGB(Pixel, r, g, b) \
236
{ \
237
Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3); \
238
}
239
#define RGB555_FROM_RGB(Pixel, r, g, b) \
240
{ \
241
Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3); \
242
}
243
#define RGB888_FROM_RGB(Pixel, r, g, b) \
244
{ \
245
Pixel = (r<<16)|(g<<8)|b; \
246
}
247
248
249
250
#define ARGB8888_FROM_RGBA(Pixel, r, g, b, a) \
{ \
Pixel = (a<<24)|(r<<16)|(g<<8)|b; \
}
251
252
253
254
255
256
257
258
259
260
261
262
#define RGBA8888_FROM_RGBA(Pixel, r, g, b, a) \
{ \
Pixel = (r<<24)|(g<<16)|(b<<8)|a; \
}
#define ABGR8888_FROM_RGBA(Pixel, r, g, b, a) \
{ \
Pixel = (a<<24)|(b<<16)|(g<<8)|r; \
}
#define BGRA8888_FROM_RGBA(Pixel, r, g, b, a) \
{ \
Pixel = (b<<24)|(g<<16)|(r<<8)|a; \
}
263
264
265
266
#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) \
{ \
switch (bpp) { \
case 2: { \
267
Uint16 Pixel; \
268
\
269
270
PIXEL_FROM_RGB(Pixel, fmt, r, g, b); \
*((Uint16 *)(buf)) = Pixel; \
271
272
273
274
} \
break; \
\
case 3: { \
275
if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
276
277
278
279
280
281
282
283
284
285
286
287
*((buf)+fmt->Rshift/8) = r; \
*((buf)+fmt->Gshift/8) = g; \
*((buf)+fmt->Bshift/8) = b; \
} else { \
*((buf)+2-fmt->Rshift/8) = r; \
*((buf)+2-fmt->Gshift/8) = g; \
*((buf)+2-fmt->Bshift/8) = b; \
} \
} \
break; \
\
case 4: { \
288
Uint32 Pixel; \
289
\
290
291
PIXEL_FROM_RGB(Pixel, fmt, r, g, b); \
*((Uint32 *)(buf)) = Pixel; \
292
293
294
295
296
297
298
299
300
} \
break; \
} \
}
#define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask) \
{ \
switch (bpp) { \
case 2: { \
Uint16 *bufp; \
301
Uint16 Pixel; \
302
303
\
bufp = (Uint16 *)buf; \
304
305
PIXEL_FROM_RGB(Pixel, fmt, r, g, b); \
*bufp = Pixel | (*bufp & Amask); \
306
307
308
309
} \
break; \
\
case 3: { \
310
if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
311
312
313
314
315
316
317
318
319
320
321
322
323
*((buf)+fmt->Rshift/8) = r; \
*((buf)+fmt->Gshift/8) = g; \
*((buf)+fmt->Bshift/8) = b; \
} else { \
*((buf)+2-fmt->Rshift/8) = r; \
*((buf)+2-fmt->Gshift/8) = g; \
*((buf)+2-fmt->Bshift/8) = b; \
} \
} \
break; \
\
case 4: { \
Uint32 *bufp; \
324
Uint32 Pixel; \
325
326
\
bufp = (Uint32 *)buf; \
327
328
PIXEL_FROM_RGB(Pixel, fmt, r, g, b); \
*bufp = Pixel | (*bufp & Amask); \
329
330
331
332
333
334
} \
break; \
} \
}
/* FIXME: Should we rescale alpha into 0..255 here? */
335
#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a) \
336
{ \
337
338
339
340
r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; \
g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; \
b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; \
a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss; \
341
}
342
#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a) \
343
{ \
344
345
346
347
r = (Pixel&fmt->Rmask)>>fmt->Rshift; \
g = (Pixel&fmt->Gmask)>>fmt->Gshift; \
b = (Pixel&fmt->Bmask)>>fmt->Bshift; \
a = (Pixel&fmt->Amask)>>fmt->Ashift; \
348
}
349
#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a) \
350
{ \
351
352
353
354
r = (Pixel>>24); \
g = ((Pixel>>16)&0xFF); \
b = ((Pixel>>8)&0xFF); \
a = (Pixel&0xFF); \
355
}
356
#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a) \
357
{ \
358
359
360
361
r = ((Pixel>>16)&0xFF); \
g = ((Pixel>>8)&0xFF); \
b = (Pixel&0xFF); \
a = (Pixel>>24); \
362
}
363
#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a) \
364
{ \
365
366
367
368
r = (Pixel&0xFF); \
g = ((Pixel>>8)&0xFF); \
b = ((Pixel>>16)&0xFF); \
a = (Pixel>>24); \
369
}
370
371
372
373
374
375
376
#define RGBA_FROM_BGRA8888(Pixel, r, g, b, a) \
{ \
r = ((Pixel>>8)&0xFF); \
g = ((Pixel>>16)&0xFF); \
b = (Pixel>>24); \
a = (Pixel&0xFF); \
}
377
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a) \
378
379
380
do { \
switch (bpp) { \
case 2: \
381
Pixel = *((Uint16 *)(buf)); \
382
RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \
383
384
break; \
\
385
386
387
388
389
case 3: { \
if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
r = *((buf)+fmt->Rshift/8); \
g = *((buf)+fmt->Gshift/8); \
b = *((buf)+fmt->Bshift/8); \
390
} else { \
391
392
393
r = *((buf)+2-fmt->Rshift/8); \
g = *((buf)+2-fmt->Gshift/8); \
b = *((buf)+2-fmt->Bshift/8); \
394
} \
395
a = 0xFF; \
396
397
398
399
} \
break; \
\
case 4: \
400
Pixel = *((Uint32 *)(buf)); \
401
RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \
402
403
404
break; \
\
default: \
405
Pixel; /* stop gcc complaints */ \
406
407
break; \
} \
408
} while (0)
409
410
/* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
411
#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a) \
412
{ \
413
Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)| \
414
415
((g>>fmt->Gloss)<<fmt->Gshift)| \
((b>>fmt->Bloss)<<fmt->Bshift)| \
416
((a>>fmt->Aloss)<<fmt->Ashift); \
417
418
419
420
421
}
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a) \
{ \
switch (bpp) { \
case 2: { \
422
Uint16 Pixel; \
423
\
424
425
PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a); \
*((Uint16 *)(buf)) = Pixel; \
426
427
428
} \
break; \
\
429
430
case 3: { \
if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
431
432
433
434
435
436
437
438
439
440
441
442
*((buf)+fmt->Rshift/8) = r; \
*((buf)+fmt->Gshift/8) = g; \
*((buf)+fmt->Bshift/8) = b; \
} else { \
*((buf)+2-fmt->Rshift/8) = r; \
*((buf)+2-fmt->Gshift/8) = g; \
*((buf)+2-fmt->Bshift/8) = b; \
} \
} \
break; \
\
case 4: { \
443
Uint32 Pixel; \
444
\
445
446
PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a); \
*((Uint32 *)(buf)) = Pixel; \
447
448
449
450
451
} \
break; \
} \
}
452
/* Blend the RGB values of two Pixels based on a source alpha value */
453
454
#define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB) \
do { \
455
456
457
dR = ((((int)(sR-dR)*(int)A)/255)+dR); \
dG = ((((int)(sG-dG)*(int)A)/255)+dG); \
dB = ((((int)(sB-dB)*(int)A)/255)+dB); \
458
459
460
} while(0)
461
/* This is a very useful loop for optimizing blitters */
462
463
464
#if defined(_MSC_VER) && (_MSC_VER == 1300)
/* There's a bug in the Visual C++ 7 optimizer when compiling this code */
#else
465
#define USE_DUFFS_LOOP
466
#endif
467
468
469
470
471
#ifdef USE_DUFFS_LOOP
/* 8-times unrolled loop */
#define DUFFS_LOOP8(pixel_copy_increment, width) \
{ int n = (width+7)/8; \
472
switch (width & 7) { \
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
case 0: do { pixel_copy_increment; \
case 7: pixel_copy_increment; \
case 6: pixel_copy_increment; \
case 5: pixel_copy_increment; \
case 4: pixel_copy_increment; \
case 3: pixel_copy_increment; \
case 2: pixel_copy_increment; \
case 1: pixel_copy_increment; \
} while ( --n > 0 ); \
} \
}
/* 4-times unrolled loop */
#define DUFFS_LOOP4(pixel_copy_increment, width) \
{ int n = (width+3)/4; \
488
switch (width & 3) { \
489
490
491
492
case 0: do { pixel_copy_increment; \
case 3: pixel_copy_increment; \
case 2: pixel_copy_increment; \
case 1: pixel_copy_increment; \
493
} while (--n > 0); \
494
495
496
} \
}
497
498
499
500
/* Use the 8-times version of the loop by default */
#define DUFFS_LOOP(pixel_copy_increment, width) \
DUFFS_LOOP8(pixel_copy_increment, width)
501
502
503
504
505
506
507
508
509
510
/* Special version of Duff's device for even more optimization */
#define DUFFS_LOOP_124(pixel_copy_increment1, \
pixel_copy_increment2, \
pixel_copy_increment4, width) \
{ int n = width; \
if (n & 1) { \
pixel_copy_increment1; n -= 1; \
} \
if (n & 2) { \
pixel_copy_increment2; n -= 2; \
511
} \
512
513
514
515
516
517
518
if (n) { \
n = (n+7)/ 8; \
switch (n & 4) { \
case 0: do { pixel_copy_increment4; \
case 4: pixel_copy_increment4; \
} while (--n > 0); \
} \
519
520
521
} \
}
522
523
#else
524
525
526
527
528
529
530
531
532
533
534
/* Don't use Duff's device to unroll loops */
#define DUFFS_LOOP(pixel_copy_increment, width) \
{ int n; \
for ( n=width; n > 0; --n ) { \
pixel_copy_increment; \
} \
}
#define DUFFS_LOOP8(pixel_copy_increment, width) \
DUFFS_LOOP(pixel_copy_increment, width)
#define DUFFS_LOOP4(pixel_copy_increment, width) \
DUFFS_LOOP(pixel_copy_increment, width)
535
536
537
538
#define DUFFS_LOOP_124(pixel_copy_increment1, \
pixel_copy_increment2, \
pixel_copy_increment4, width) \
DUFFS_LOOP(pixel_copy_increment1, width)
539
540
541
542
543
544
545
546
547
#endif /* USE_DUFFS_LOOP */
/* Prevent Visual C++ 6.0 from printing out stupid warnings */
#if defined(_MSC_VER) && (_MSC_VER >= 600)
#pragma warning(disable: 4550)
#endif
#endif /* _SDL_blit_h */
548
549
/* vi: set ts=4 sw=4 expandtab: */