slouken@0
|
1 |
/*
|
slouken@5535
|
2 |
Simple DirectMedia Layer
|
slouken@6885
|
3 |
Copyright (C) 1997-2013 Sam Lantinga <slouken@libsdl.org>
|
slouken@0
|
4 |
|
slouken@5535
|
5 |
This software is provided 'as-is', without any express or implied
|
slouken@5535
|
6 |
warranty. In no event will the authors be held liable for any damages
|
slouken@5535
|
7 |
arising from the use of this software.
|
slouken@0
|
8 |
|
slouken@5535
|
9 |
Permission is granted to anyone to use this software for any purpose,
|
slouken@5535
|
10 |
including commercial applications, and to alter it and redistribute it
|
slouken@5535
|
11 |
freely, subject to the following restrictions:
|
slouken@0
|
12 |
|
slouken@5535
|
13 |
1. The origin of this software must not be misrepresented; you must not
|
slouken@5535
|
14 |
claim that you wrote the original software. If you use this software
|
slouken@5535
|
15 |
in a product, an acknowledgment in the product documentation would be
|
slouken@5535
|
16 |
appreciated but is not required.
|
slouken@5535
|
17 |
2. Altered source versions must be plainly marked as such, and must not be
|
slouken@5535
|
18 |
misrepresented as being the original software.
|
slouken@5535
|
19 |
3. This notice may not be removed or altered from any source distribution.
|
slouken@0
|
20 |
*/
|
slouken@1402
|
21 |
#include "SDL_config.h"
|
slouken@0
|
22 |
|
slouken@0
|
23 |
#include "SDL_video.h"
|
slouken@0
|
24 |
#include "SDL_blit.h"
|
slouken@0
|
25 |
|
slouken@0
|
26 |
/* Functions to perform alpha blended blitting */
|
slouken@0
|
27 |
|
slouken@0
|
28 |
/* N->1 blending with per-surface alpha */
|
slouken@1895
|
29 |
static void
|
slouken@1895
|
30 |
BlitNto1SurfaceAlpha(SDL_BlitInfo * info)
|
slouken@0
|
31 |
{
|
slouken@2262
|
32 |
int width = info->dst_w;
|
slouken@2262
|
33 |
int height = info->dst_h;
|
slouken@2262
|
34 |
Uint8 *src = info->src;
|
slouken@2267
|
35 |
int srcskip = info->src_skip;
|
slouken@2262
|
36 |
Uint8 *dst = info->dst;
|
slouken@2267
|
37 |
int dstskip = info->dst_skip;
|
slouken@1895
|
38 |
Uint8 *palmap = info->table;
|
slouken@2267
|
39 |
SDL_PixelFormat *srcfmt = info->src_fmt;
|
slouken@2267
|
40 |
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
slouken@1895
|
41 |
int srcbpp = srcfmt->BytesPerPixel;
|
slouken@7502
|
42 |
Uint32 Pixel;
|
slouken@7502
|
43 |
unsigned sR, sG, sB;
|
slouken@7502
|
44 |
unsigned dR, dG, dB;
|
slouken@2267
|
45 |
const unsigned A = info->a;
|
slouken@0
|
46 |
|
slouken@1895
|
47 |
while (height--) {
|
slouken@1895
|
48 |
/* *INDENT-OFF* */
|
slouken@0
|
49 |
DUFFS_LOOP4(
|
slouken@0
|
50 |
{
|
icculus@1162
|
51 |
DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
|
slouken@0
|
52 |
dR = dstfmt->palette->colors[*dst].r;
|
slouken@0
|
53 |
dG = dstfmt->palette->colors[*dst].g;
|
slouken@0
|
54 |
dB = dstfmt->palette->colors[*dst].b;
|
slouken@7502
|
55 |
ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB);
|
slouken@0
|
56 |
dR &= 0xff;
|
slouken@0
|
57 |
dG &= 0xff;
|
slouken@0
|
58 |
dB &= 0xff;
|
slouken@0
|
59 |
/* Pack RGB into 8bit pixel */
|
slouken@0
|
60 |
if ( palmap == NULL ) {
|
slouken@7502
|
61 |
*dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
|
slouken@0
|
62 |
} else {
|
slouken@7502
|
63 |
*dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
|
slouken@0
|
64 |
}
|
slouken@0
|
65 |
dst++;
|
slouken@0
|
66 |
src += srcbpp;
|
slouken@0
|
67 |
},
|
slouken@0
|
68 |
width);
|
slouken@1895
|
69 |
/* *INDENT-ON* */
|
slouken@1895
|
70 |
src += srcskip;
|
slouken@1895
|
71 |
dst += dstskip;
|
slouken@1895
|
72 |
}
|
slouken@0
|
73 |
}
|
slouken@0
|
74 |
|
slouken@0
|
75 |
/* N->1 blending with pixel alpha */
|
slouken@1895
|
76 |
static void
|
slouken@1895
|
77 |
BlitNto1PixelAlpha(SDL_BlitInfo * info)
|
slouken@0
|
78 |
{
|
slouken@2262
|
79 |
int width = info->dst_w;
|
slouken@2262
|
80 |
int height = info->dst_h;
|
slouken@2262
|
81 |
Uint8 *src = info->src;
|
slouken@2267
|
82 |
int srcskip = info->src_skip;
|
slouken@2262
|
83 |
Uint8 *dst = info->dst;
|
slouken@2267
|
84 |
int dstskip = info->dst_skip;
|
slouken@1895
|
85 |
Uint8 *palmap = info->table;
|
slouken@2267
|
86 |
SDL_PixelFormat *srcfmt = info->src_fmt;
|
slouken@2267
|
87 |
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
slouken@1895
|
88 |
int srcbpp = srcfmt->BytesPerPixel;
|
slouken@7502
|
89 |
Uint32 Pixel;
|
slouken@7502
|
90 |
unsigned sR, sG, sB, sA;
|
slouken@7502
|
91 |
unsigned dR, dG, dB;
|
slouken@0
|
92 |
|
slouken@1895
|
93 |
while (height--) {
|
slouken@1895
|
94 |
/* *INDENT-OFF* */
|
slouken@0
|
95 |
DUFFS_LOOP4(
|
slouken@0
|
96 |
{
|
icculus@1162
|
97 |
DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA);
|
slouken@0
|
98 |
dR = dstfmt->palette->colors[*dst].r;
|
slouken@0
|
99 |
dG = dstfmt->palette->colors[*dst].g;
|
slouken@0
|
100 |
dB = dstfmt->palette->colors[*dst].b;
|
slouken@7502
|
101 |
ALPHA_BLEND_RGB(sR, sG, sB, sA, dR, dG, dB);
|
slouken@0
|
102 |
dR &= 0xff;
|
slouken@0
|
103 |
dG &= 0xff;
|
slouken@0
|
104 |
dB &= 0xff;
|
slouken@0
|
105 |
/* Pack RGB into 8bit pixel */
|
slouken@0
|
106 |
if ( palmap == NULL ) {
|
slouken@7502
|
107 |
*dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
|
slouken@0
|
108 |
} else {
|
slouken@7502
|
109 |
*dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
|
slouken@0
|
110 |
}
|
slouken@0
|
111 |
dst++;
|
slouken@0
|
112 |
src += srcbpp;
|
slouken@0
|
113 |
},
|
slouken@0
|
114 |
width);
|
slouken@1895
|
115 |
/* *INDENT-ON* */
|
slouken@1895
|
116 |
src += srcskip;
|
slouken@1895
|
117 |
dst += dstskip;
|
slouken@1895
|
118 |
}
|
slouken@0
|
119 |
}
|
slouken@0
|
120 |
|
slouken@0
|
121 |
/* colorkeyed N->1 blending with per-surface alpha */
|
slouken@1895
|
122 |
static void
|
slouken@1895
|
123 |
BlitNto1SurfaceAlphaKey(SDL_BlitInfo * info)
|
slouken@0
|
124 |
{
|
slouken@2262
|
125 |
int width = info->dst_w;
|
slouken@2262
|
126 |
int height = info->dst_h;
|
slouken@2262
|
127 |
Uint8 *src = info->src;
|
slouken@2267
|
128 |
int srcskip = info->src_skip;
|
slouken@2262
|
129 |
Uint8 *dst = info->dst;
|
slouken@2267
|
130 |
int dstskip = info->dst_skip;
|
slouken@1895
|
131 |
Uint8 *palmap = info->table;
|
slouken@2267
|
132 |
SDL_PixelFormat *srcfmt = info->src_fmt;
|
slouken@2267
|
133 |
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
slouken@1895
|
134 |
int srcbpp = srcfmt->BytesPerPixel;
|
slouken@2267
|
135 |
Uint32 ckey = info->colorkey;
|
slouken@7502
|
136 |
Uint32 Pixel;
|
slouken@7502
|
137 |
unsigned sR, sG, sB;
|
slouken@7502
|
138 |
unsigned dR, dG, dB;
|
slouken@7502
|
139 |
const unsigned A = info->a;
|
slouken@0
|
140 |
|
slouken@1895
|
141 |
while (height--) {
|
slouken@1895
|
142 |
/* *INDENT-OFF* */
|
slouken@0
|
143 |
DUFFS_LOOP(
|
slouken@0
|
144 |
{
|
icculus@1162
|
145 |
DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
|
icculus@1162
|
146 |
if ( Pixel != ckey ) {
|
slouken@0
|
147 |
dR = dstfmt->palette->colors[*dst].r;
|
slouken@0
|
148 |
dG = dstfmt->palette->colors[*dst].g;
|
slouken@0
|
149 |
dB = dstfmt->palette->colors[*dst].b;
|
slouken@7502
|
150 |
ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB);
|
slouken@0
|
151 |
dR &= 0xff;
|
slouken@0
|
152 |
dG &= 0xff;
|
slouken@0
|
153 |
dB &= 0xff;
|
slouken@0
|
154 |
/* Pack RGB into 8bit pixel */
|
slouken@0
|
155 |
if ( palmap == NULL ) {
|
slouken@7502
|
156 |
*dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
|
slouken@0
|
157 |
} else {
|
slouken@7502
|
158 |
*dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
|
slouken@0
|
159 |
}
|
slouken@0
|
160 |
}
|
slouken@0
|
161 |
dst++;
|
slouken@0
|
162 |
src += srcbpp;
|
slouken@0
|
163 |
},
|
slouken@0
|
164 |
width);
|
slouken@1895
|
165 |
/* *INDENT-ON* */
|
slouken@1895
|
166 |
src += srcskip;
|
slouken@1895
|
167 |
dst += dstskip;
|
slouken@1895
|
168 |
}
|
slouken@0
|
169 |
}
|
slouken@0
|
170 |
|
slouken@2255
|
171 |
#ifdef __MMX__
|
slouken@1542
|
172 |
|
slouken@1542
|
173 |
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
|
slouken@1895
|
174 |
static void
|
slouken@1895
|
175 |
BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info)
|
slouken@1542
|
176 |
{
|
slouken@2262
|
177 |
int width = info->dst_w;
|
slouken@2262
|
178 |
int height = info->dst_h;
|
slouken@2262
|
179 |
Uint32 *srcp = (Uint32 *) info->src;
|
slouken@2267
|
180 |
int srcskip = info->src_skip >> 2;
|
slouken@2262
|
181 |
Uint32 *dstp = (Uint32 *) info->dst;
|
slouken@2267
|
182 |
int dstskip = info->dst_skip >> 2;
|
slouken@2267
|
183 |
Uint32 dalpha = info->dst_fmt->Amask;
|
slouken@1542
|
184 |
|
slouken@1895
|
185 |
__m64 src1, src2, dst1, dst2, lmask, hmask, dsta;
|
slouken@1542
|
186 |
|
slouken@1895
|
187 |
hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */
|
slouken@1895
|
188 |
lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */
|
slouken@1895
|
189 |
dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */
|
slouken@1542
|
190 |
|
slouken@1895
|
191 |
while (height--) {
|
slouken@1895
|
192 |
int n = width;
|
slouken@1895
|
193 |
if (n & 1) {
|
slouken@1895
|
194 |
Uint32 s = *srcp++;
|
slouken@1895
|
195 |
Uint32 d = *dstp;
|
slouken@1895
|
196 |
*dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
|
slouken@1895
|
197 |
+ (s & d & 0x00010101)) | dalpha;
|
slouken@1895
|
198 |
n--;
|
slouken@1895
|
199 |
}
|
slouken@1542
|
200 |
|
slouken@1895
|
201 |
for (n >>= 1; n > 0; --n) {
|
slouken@1895
|
202 |
dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
|
slouken@1895
|
203 |
dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
|
slouken@1542
|
204 |
|
slouken@1895
|
205 |
src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
|
slouken@1895
|
206 |
src2 = src1; /* 2 x src -> src2(ARGBARGB) */
|
slouken@1895
|
207 |
|
slouken@1895
|
208 |
dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */
|
slouken@1895
|
209 |
src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */
|
slouken@1895
|
210 |
src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */
|
slouken@1895
|
211 |
src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */
|
slouken@1895
|
212 |
|
slouken@1895
|
213 |
dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */
|
slouken@1895
|
214 |
dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */
|
slouken@1895
|
215 |
dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */
|
slouken@1895
|
216 |
dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */
|
slouken@1895
|
217 |
|
slouken@1895
|
218 |
*(__m64 *) dstp = dst1; /* dst1 -> 2 x dst pixels */
|
slouken@1895
|
219 |
dstp += 2;
|
slouken@1895
|
220 |
srcp += 2;
|
slouken@1895
|
221 |
}
|
slouken@1895
|
222 |
|
slouken@1895
|
223 |
srcp += srcskip;
|
slouken@1895
|
224 |
dstp += dstskip;
|
slouken@1895
|
225 |
}
|
slouken@1895
|
226 |
_mm_empty();
|
slouken@1542
|
227 |
}
|
slouken@1542
|
228 |
|
slouken@1542
|
229 |
/* fast RGB888->(A)RGB888 blending with surface alpha */
|
slouken@1895
|
230 |
static void
|
slouken@1895
|
231 |
BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info)
|
slouken@1542
|
232 |
{
|
slouken@2267
|
233 |
SDL_PixelFormat *df = info->dst_fmt;
|
slouken@6863
|
234 |
Uint32 chanmask;
|
slouken@2267
|
235 |
unsigned alpha = info->a;
|
slouken@1542
|
236 |
|
slouken@1895
|
237 |
if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) {
|
slouken@1895
|
238 |
/* only call a128 version when R,G,B occupy lower bits */
|
slouken@1895
|
239 |
BlitRGBtoRGBSurfaceAlpha128MMX(info);
|
slouken@1895
|
240 |
} else {
|
slouken@2262
|
241 |
int width = info->dst_w;
|
slouken@2262
|
242 |
int height = info->dst_h;
|
slouken@2262
|
243 |
Uint32 *srcp = (Uint32 *) info->src;
|
slouken@2267
|
244 |
int srcskip = info->src_skip >> 2;
|
slouken@2262
|
245 |
Uint32 *dstp = (Uint32 *) info->dst;
|
slouken@2267
|
246 |
int dstskip = info->dst_skip >> 2;
|
slouken@1895
|
247 |
Uint32 dalpha = df->Amask;
|
slouken@1895
|
248 |
Uint32 amult;
|
slouken@1542
|
249 |
|
slouken@1895
|
250 |
__m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta;
|
slouken@1542
|
251 |
|
slouken@1895
|
252 |
mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
|
slouken@1895
|
253 |
/* form the alpha mult */
|
slouken@1895
|
254 |
amult = alpha | (alpha << 8);
|
slouken@1895
|
255 |
amult = amult | (amult << 16);
|
slouken@1895
|
256 |
chanmask =
|
slouken@3013
|
257 |
(0xff << df->Rshift) | (0xff << df->
|
slouken@3013
|
258 |
Gshift) | (0xff << df->Bshift);
|
slouken@1895
|
259 |
mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */
|
slouken@1895
|
260 |
mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */
|
slouken@1895
|
261 |
/* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */
|
slouken@1895
|
262 |
dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */
|
slouken@1542
|
263 |
|
slouken@1895
|
264 |
while (height--) {
|
slouken@1895
|
265 |
int n = width;
|
slouken@1895
|
266 |
if (n & 1) {
|
slouken@1895
|
267 |
/* One Pixel Blend */
|
slouken@1895
|
268 |
src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB) */
|
slouken@1895
|
269 |
src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */
|
slouken@1542
|
270 |
|
slouken@1895
|
271 |
dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
|
slouken@1895
|
272 |
dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
|
slouken@1542
|
273 |
|
slouken@1895
|
274 |
src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */
|
slouken@1895
|
275 |
src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
|
slouken@1895
|
276 |
src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */
|
slouken@1895
|
277 |
dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */
|
slouken@1542
|
278 |
|
slouken@1895
|
279 |
dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */
|
slouken@1895
|
280 |
dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */
|
slouken@1895
|
281 |
*dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
|
slouken@1542
|
282 |
|
slouken@1895
|
283 |
++srcp;
|
slouken@1895
|
284 |
++dstp;
|
slouken@1542
|
285 |
|
slouken@1895
|
286 |
n--;
|
slouken@1895
|
287 |
}
|
slouken@1542
|
288 |
|
slouken@1895
|
289 |
for (n >>= 1; n > 0; --n) {
|
slouken@1895
|
290 |
/* Two Pixels Blend */
|
slouken@1895
|
291 |
src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
|
slouken@1895
|
292 |
src2 = src1; /* 2 x src -> src2(ARGBARGB) */
|
slouken@1895
|
293 |
src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */
|
slouken@1895
|
294 |
src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */
|
slouken@1542
|
295 |
|
slouken@1895
|
296 |
dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
|
slouken@1895
|
297 |
dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
|
slouken@1895
|
298 |
dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */
|
slouken@1895
|
299 |
dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */
|
slouken@1895
|
300 |
|
slouken@1895
|
301 |
src1 = _mm_sub_pi16(src1, dst1); /* src1 - dst1 -> src1 */
|
slouken@1895
|
302 |
src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */
|
slouken@1895
|
303 |
src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */
|
slouken@1895
|
304 |
dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */
|
slouken@1895
|
305 |
|
slouken@1895
|
306 |
src2 = _mm_sub_pi16(src2, dst2); /* src2 - dst2 -> src2 */
|
slouken@1895
|
307 |
src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
|
slouken@1895
|
308 |
src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */
|
slouken@1895
|
309 |
dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */
|
slouken@1895
|
310 |
|
slouken@1895
|
311 |
dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */
|
slouken@1895
|
312 |
dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */
|
slouken@1895
|
313 |
|
slouken@1895
|
314 |
*(__m64 *) dstp = dst1; /* dst1 -> 2 x pixel */
|
slouken@1895
|
315 |
|
slouken@1895
|
316 |
srcp += 2;
|
slouken@1895
|
317 |
dstp += 2;
|
slouken@1895
|
318 |
}
|
slouken@1895
|
319 |
srcp += srcskip;
|
slouken@1895
|
320 |
dstp += dstskip;
|
slouken@1895
|
321 |
}
|
slouken@1895
|
322 |
_mm_empty();
|
slouken@1895
|
323 |
}
|
slouken@1542
|
324 |
}
|
slouken@1542
|
325 |
|
slouken@1542
|
326 |
/* fast ARGB888->(A)RGB888 blending with pixel alpha */
|
slouken@1895
|
327 |
static void
|
slouken@1895
|
328 |
BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info)
|
slouken@1542
|
329 |
{
|
slouken@2262
|
330 |
int width = info->dst_w;
|
slouken@2262
|
331 |
int height = info->dst_h;
|
slouken@2262
|
332 |
Uint32 *srcp = (Uint32 *) info->src;
|
slouken@2267
|
333 |
int srcskip = info->src_skip >> 2;
|
slouken@2262
|
334 |
Uint32 *dstp = (Uint32 *) info->dst;
|
slouken@2267
|
335 |
int dstskip = info->dst_skip >> 2;
|
slouken@2267
|
336 |
SDL_PixelFormat *sf = info->src_fmt;
|
slouken@1895
|
337 |
Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
|
slouken@1895
|
338 |
Uint32 amask = sf->Amask;
|
slouken@1895
|
339 |
Uint32 ashift = sf->Ashift;
|
slouken@7640
|
340 |
Uint64 multmask, multmask2;
|
slouken@1542
|
341 |
|
slouken@7640
|
342 |
__m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
|
slouken@1542
|
343 |
|
slouken@1895
|
344 |
mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
|
slouken@7640
|
345 |
multmask = 0x00FF;
|
slouken@7640
|
346 |
multmask <<= (ashift * 2);
|
slouken@7640
|
347 |
multmask2 = 0x00FF00FF00FF00FF;
|
slouken@1542
|
348 |
|
slouken@1895
|
349 |
while (height--) {
|
slouken@1895
|
350 |
/* *INDENT-OFF* */
|
slouken@1542
|
351 |
DUFFS_LOOP4({
|
slouken@1542
|
352 |
Uint32 alpha = *srcp & amask;
|
slouken@1542
|
353 |
if (alpha == 0) {
|
slouken@1542
|
354 |
/* do nothing */
|
slouken@7641
|
355 |
} else if (alpha == amask) {
|
slouken@7640
|
356 |
*dstp = *srcp;
|
slouken@1542
|
357 |
} else {
|
slouken@1542
|
358 |
src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/
|
slouken@1542
|
359 |
src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
|
slouken@1542
|
360 |
|
slouken@1542
|
361 |
dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/
|
slouken@1542
|
362 |
dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
|
slouken@1542
|
363 |
|
slouken@1542
|
364 |
mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
|
slouken@1542
|
365 |
mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
|
slouken@1542
|
366 |
mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
|
slouken@7640
|
367 |
mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
|
slouken@7640
|
368 |
mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask); /* 0F0A0A0A -> mm_alpha*/
|
slouken@7640
|
369 |
mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2); /* 255 - mm_alpha -> mm_alpha*/
|
slouken@1542
|
370 |
|
slouken@1542
|
371 |
/* blend */
|
slouken@7640
|
372 |
src1 = _mm_mullo_pi16(src1, mm_alpha);
|
slouken@7640
|
373 |
src1 = _mm_srli_pi16(src1, 8);
|
slouken@7640
|
374 |
dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
|
slouken@7640
|
375 |
dst1 = _mm_srli_pi16(dst1, 8);
|
slouken@7640
|
376 |
dst1 = _mm_add_pi16(src1, dst1);
|
slouken@7640
|
377 |
dst1 = _mm_packs_pu16(dst1, mm_zero);
|
slouken@1542
|
378 |
|
slouken@1542
|
379 |
*dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
|
slouken@1542
|
380 |
}
|
slouken@1542
|
381 |
++srcp;
|
slouken@1542
|
382 |
++dstp;
|
slouken@1542
|
383 |
}, width);
|
slouken@1895
|
384 |
/* *INDENT-ON* */
|
slouken@1895
|
385 |
srcp += srcskip;
|
slouken@1895
|
386 |
dstp += dstskip;
|
slouken@1895
|
387 |
}
|
slouken@1895
|
388 |
_mm_empty();
|
slouken@1542
|
389 |
}
|
slouken@1895
|
390 |
|
slouken@2255
|
391 |
#endif /* __MMX__ */
|
slouken@689
|
392 |
|
slouken@1
|
393 |
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
|
slouken@1895
|
394 |
static void
|
slouken@1895
|
395 |
BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info)
|
slouken@0
|
396 |
{
|
slouken@2262
|
397 |
int width = info->dst_w;
|
slouken@2262
|
398 |
int height = info->dst_h;
|
slouken@2262
|
399 |
Uint32 *srcp = (Uint32 *) info->src;
|
slouken@2267
|
400 |
int srcskip = info->src_skip >> 2;
|
slouken@2262
|
401 |
Uint32 *dstp = (Uint32 *) info->dst;
|
slouken@2267
|
402 |
int dstskip = info->dst_skip >> 2;
|
slouken@0
|
403 |
|
slouken@1895
|
404 |
while (height--) {
|
slouken@1895
|
405 |
/* *INDENT-OFF* */
|
slouken@0
|
406 |
DUFFS_LOOP4({
|
slouken@1
|
407 |
Uint32 s = *srcp++;
|
slouken@1
|
408 |
Uint32 d = *dstp;
|
slouken@1
|
409 |
*dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
|
slouken@1
|
410 |
+ (s & d & 0x00010101)) | 0xff000000;
|
slouken@0
|
411 |
}, width);
|
slouken@1895
|
412 |
/* *INDENT-ON* */
|
slouken@1895
|
413 |
srcp += srcskip;
|
slouken@1895
|
414 |
dstp += dstskip;
|
slouken@1895
|
415 |
}
|
slouken@0
|
416 |
}
|
slouken@0
|
417 |
|
slouken@1
|
418 |
/* fast RGB888->(A)RGB888 blending with surface alpha */
|
slouken@1895
|
419 |
static void
|
slouken@1895
|
420 |
BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo * info)
|
slouken@1
|
421 |
{
|
slouken@2267
|
422 |
unsigned alpha = info->a;
|
slouken@1895
|
423 |
if (alpha == 128) {
|
slouken@1895
|
424 |
BlitRGBtoRGBSurfaceAlpha128(info);
|
slouken@1895
|
425 |
} else {
|
slouken@2262
|
426 |
int width = info->dst_w;
|
slouken@2262
|
427 |
int height = info->dst_h;
|
slouken@2262
|
428 |
Uint32 *srcp = (Uint32 *) info->src;
|
slouken@2267
|
429 |
int srcskip = info->src_skip >> 2;
|
slouken@2262
|
430 |
Uint32 *dstp = (Uint32 *) info->dst;
|
slouken@2267
|
431 |
int dstskip = info->dst_skip >> 2;
|
slouken@1895
|
432 |
Uint32 s;
|
slouken@1895
|
433 |
Uint32 d;
|
slouken@1895
|
434 |
Uint32 s1;
|
slouken@1895
|
435 |
Uint32 d1;
|
slouken@1
|
436 |
|
slouken@1895
|
437 |
while (height--) {
|
slouken@1895
|
438 |
/* *INDENT-OFF* */
|
slouken@3035
|
439 |
DUFFS_LOOP4({
|
slouken@1
|
440 |
s = *srcp;
|
slouken@1
|
441 |
d = *dstp;
|
slouken@1
|
442 |
s1 = s & 0xff00ff;
|
slouken@1
|
443 |
d1 = d & 0xff00ff;
|
slouken@1
|
444 |
d1 = (d1 + ((s1 - d1) * alpha >> 8))
|
slouken@1
|
445 |
& 0xff00ff;
|
slouken@1
|
446 |
s &= 0xff00;
|
slouken@1
|
447 |
d &= 0xff00;
|
slouken@1
|
448 |
d = (d + ((s - d) * alpha >> 8)) & 0xff00;
|
slouken@1
|
449 |
*dstp = d1 | d | 0xff000000;
|
slouken@1
|
450 |
++srcp;
|
slouken@1
|
451 |
++dstp;
|
slouken@1
|
452 |
}, width);
|
slouken@1895
|
453 |
/* *INDENT-ON* */
|
slouken@1895
|
454 |
srcp += srcskip;
|
slouken@1895
|
455 |
dstp += dstskip;
|
slouken@1895
|
456 |
}
|
slouken@1895
|
457 |
}
|
slouken@1
|
458 |
}
|
slouken@1
|
459 |
|
slouken@0
|
460 |
/* fast ARGB888->(A)RGB888 blending with pixel alpha */
|
slouken@1895
|
461 |
static void
|
slouken@1895
|
462 |
BlitRGBtoRGBPixelAlpha(SDL_BlitInfo * info)
|
slouken@0
|
463 |
{
|
slouken@2262
|
464 |
int width = info->dst_w;
|
slouken@2262
|
465 |
int height = info->dst_h;
|
slouken@2262
|
466 |
Uint32 *srcp = (Uint32 *) info->src;
|
slouken@2267
|
467 |
int srcskip = info->src_skip >> 2;
|
slouken@2262
|
468 |
Uint32 *dstp = (Uint32 *) info->dst;
|
slouken@2267
|
469 |
int dstskip = info->dst_skip >> 2;
|
slouken@0
|
470 |
|
slouken@1895
|
471 |
while (height--) {
|
slouken@1895
|
472 |
/* *INDENT-OFF* */
|
slouken@0
|
473 |
DUFFS_LOOP4({
|
slouken@0
|
474 |
Uint32 dalpha;
|
slouken@0
|
475 |
Uint32 d;
|
slouken@0
|
476 |
Uint32 s1;
|
slouken@0
|
477 |
Uint32 d1;
|
slouken@0
|
478 |
Uint32 s = *srcp;
|
slouken@0
|
479 |
Uint32 alpha = s >> 24;
|
slouken@0
|
480 |
/* FIXME: Here we special-case opaque alpha since the
|
slouken@0
|
481 |
compositioning used (>>8 instead of /255) doesn't handle
|
slouken@0
|
482 |
it correctly. Also special-case alpha=0 for speed?
|
slouken@0
|
483 |
Benchmark this! */
|
slouken@7640
|
484 |
if (alpha) {
|
slouken@7640
|
485 |
if (alpha == SDL_ALPHA_OPAQUE) {
|
slouken@7640
|
486 |
*dstp = *srcp;
|
slouken@689
|
487 |
} else {
|
slouken@0
|
488 |
/*
|
slouken@0
|
489 |
* take out the middle component (green), and process
|
slouken@0
|
490 |
* the other two in parallel. One multiply less.
|
slouken@0
|
491 |
*/
|
slouken@0
|
492 |
d = *dstp;
|
slouken@7640
|
493 |
dalpha = d >> 24;
|
slouken@0
|
494 |
s1 = s & 0xff00ff;
|
slouken@0
|
495 |
d1 = d & 0xff00ff;
|
slouken@0
|
496 |
d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;
|
slouken@0
|
497 |
s &= 0xff00;
|
slouken@0
|
498 |
d &= 0xff00;
|
slouken@0
|
499 |
d = (d + ((s - d) * alpha >> 8)) & 0xff00;
|
slouken@7640
|
500 |
dalpha = alpha + (dalpha * (alpha ^ 0xFF) >> 8);
|
slouken@7640
|
501 |
*dstp = d1 | d | (dalpha << 24);
|
slouken@689
|
502 |
}
|
slouken@0
|
503 |
}
|
slouken@0
|
504 |
++srcp;
|
slouken@0
|
505 |
++dstp;
|
slouken@0
|
506 |
}, width);
|
slouken@1895
|
507 |
/* *INDENT-ON* */
|
slouken@1895
|
508 |
srcp += srcskip;
|
slouken@1895
|
509 |
dstp += dstskip;
|
slouken@1895
|
510 |
}
|
slouken@0
|
511 |
}
|
slouken@0
|
512 |
|
slouken@5389
|
513 |
#ifdef __3dNOW__
|
slouken@5389
|
514 |
/* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
|
slouken@5389
|
515 |
static void
|
slouken@5389
|
516 |
BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
|
slouken@5389
|
517 |
{
|
slouken@5389
|
518 |
int width = info->dst_w;
|
slouken@5389
|
519 |
int height = info->dst_h;
|
slouken@5389
|
520 |
Uint32 *srcp = (Uint32 *) info->src;
|
slouken@5389
|
521 |
int srcskip = info->src_skip >> 2;
|
slouken@5389
|
522 |
Uint32 *dstp = (Uint32 *) info->dst;
|
slouken@5389
|
523 |
int dstskip = info->dst_skip >> 2;
|
slouken@5389
|
524 |
SDL_PixelFormat *sf = info->src_fmt;
|
slouken@5389
|
525 |
Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
|
slouken@5389
|
526 |
Uint32 amask = sf->Amask;
|
slouken@5389
|
527 |
Uint32 ashift = sf->Ashift;
|
slouken@7640
|
528 |
Uint64 multmask, multmask2;
|
slouken@5389
|
529 |
|
slouken@7640
|
530 |
__m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
|
slouken@5389
|
531 |
|
slouken@5389
|
532 |
mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
|
slouken@7640
|
533 |
multmask = 0x00FF;
|
slouken@5389
|
534 |
multmask <<= (ashift * 2);
|
slouken@7640
|
535 |
multmask2 = 0x00FF00FF00FF00FF;
|
slouken@5389
|
536 |
|
slouken@5389
|
537 |
while (height--) {
|
slouken@5389
|
538 |
/* *INDENT-OFF* */
|
slouken@5389
|
539 |
DUFFS_LOOP4({
|
slouken@5389
|
540 |
Uint32 alpha;
|
slouken@5389
|
541 |
|
slouken@5389
|
542 |
_m_prefetch(srcp + 16);
|
slouken@5389
|
543 |
_m_prefetch(dstp + 16);
|
slouken@5389
|
544 |
|
slouken@5389
|
545 |
alpha = *srcp & amask;
|
slouken@5389
|
546 |
if (alpha == 0) {
|
slouken@5389
|
547 |
/* do nothing */
|
slouken@7641
|
548 |
} else if (alpha == amask) {
|
slouken@7640
|
549 |
*dstp = *srcp;
|
slouken@5389
|
550 |
} else {
|
slouken@5389
|
551 |
src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/
|
slouken@5389
|
552 |
src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
|
slouken@5389
|
553 |
|
slouken@5389
|
554 |
dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/
|
slouken@5389
|
555 |
dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
|
slouken@5389
|
556 |
|
slouken@5389
|
557 |
mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
|
slouken@5389
|
558 |
mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
|
slouken@5389
|
559 |
mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
|
slouken@7640
|
560 |
mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
|
slouken@7640
|
561 |
mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask); /* 0F0A0A0A -> mm_alpha*/
|
slouken@7640
|
562 |
mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2); /* 255 - mm_alpha -> mm_alpha*/
|
slouken@7640
|
563 |
|
slouken@5389
|
564 |
|
slouken@5389
|
565 |
/* blend */
|
slouken@7640
|
566 |
src1 = _mm_mullo_pi16(src1, mm_alpha);
|
slouken@7640
|
567 |
src1 = _mm_srli_pi16(src1, 8);
|
slouken@7640
|
568 |
dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
|
slouken@7640
|
569 |
dst1 = _mm_srli_pi16(dst1, 8);
|
slouken@7640
|
570 |
dst1 = _mm_add_pi16(src1, dst1);
|
slouken@7640
|
571 |
dst1 = _mm_packs_pu16(dst1, mm_zero);
|
slouken@5389
|
572 |
|
slouken@5389
|
573 |
*dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
|
slouken@5389
|
574 |
}
|
slouken@5389
|
575 |
++srcp;
|
slouken@5389
|
576 |
++dstp;
|
slouken@5389
|
577 |
}, width);
|
slouken@5389
|
578 |
/* *INDENT-ON* */
|
slouken@5389
|
579 |
srcp += srcskip;
|
slouken@5389
|
580 |
dstp += dstskip;
|
slouken@5389
|
581 |
}
|
slouken@5389
|
582 |
_mm_empty();
|
slouken@5389
|
583 |
}
|
slouken@5389
|
584 |
|
slouken@5389
|
585 |
#endif /* __MMX__ */
|
slouken@5389
|
586 |
|
slouken@1
|
587 |
/* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
|
slouken@1
|
588 |
|
slouken@1
|
589 |
/* blend a single 16 bit pixel at 50% */
|
slouken@1
|
590 |
#define BLEND16_50(d, s, mask) \
|
slouken@1
|
591 |
((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff)))
|
slouken@1
|
592 |
|
slouken@1
|
593 |
/* blend two 16 bit pixels at 50% */
|
slouken@1
|
594 |
#define BLEND2x16_50(d, s, mask) \
|
slouken@1
|
595 |
(((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \
|
slouken@1
|
596 |
+ (s & d & (~(mask | mask << 16))))
|
slouken@1
|
597 |
|
slouken@1895
|
598 |
static void
|
slouken@1895
|
599 |
Blit16to16SurfaceAlpha128(SDL_BlitInfo * info, Uint16 mask)
|
slouken@0
|
600 |
{
|
slouken@2262
|
601 |
int width = info->dst_w;
|
slouken@2262
|
602 |
int height = info->dst_h;
|
slouken@2262
|
603 |
Uint16 *srcp = (Uint16 *) info->src;
|
slouken@2267
|
604 |
int srcskip = info->src_skip >> 1;
|
slouken@2262
|
605 |
Uint16 *dstp = (Uint16 *) info->dst;
|
slouken@2267
|
606 |
int dstskip = info->dst_skip >> 1;
|
slouken@0
|
607 |
|
slouken@1895
|
608 |
while (height--) {
|
slouken@1895
|
609 |
if (((uintptr_t) srcp ^ (uintptr_t) dstp) & 2) {
|
slouken@1895
|
610 |
/*
|
slouken@1895
|
611 |
* Source and destination not aligned, pipeline it.
|
slouken@1895
|
612 |
* This is mostly a win for big blits but no loss for
|
slouken@1895
|
613 |
* small ones
|
slouken@1895
|
614 |
*/
|
slouken@1895
|
615 |
Uint32 prev_sw;
|
slouken@1895
|
616 |
int w = width;
|
slouken@1
|
617 |
|
slouken@1895
|
618 |
/* handle odd destination */
|
slouken@1895
|
619 |
if ((uintptr_t) dstp & 2) {
|
slouken@1895
|
620 |
Uint16 d = *dstp, s = *srcp;
|
slouken@1895
|
621 |
*dstp = BLEND16_50(d, s, mask);
|
slouken@1895
|
622 |
dstp++;
|
slouken@1895
|
623 |
srcp++;
|
slouken@1895
|
624 |
w--;
|
slouken@1895
|
625 |
}
|
slouken@1895
|
626 |
srcp++; /* srcp is now 32-bit aligned */
|
slouken@1
|
627 |
|
slouken@1895
|
628 |
/* bootstrap pipeline with first halfword */
|
slouken@1895
|
629 |
prev_sw = ((Uint32 *) srcp)[-1];
|
slouken@1
|
630 |
|
slouken@1895
|
631 |
while (w > 1) {
|
slouken@1895
|
632 |
Uint32 sw, dw, s;
|
slouken@1895
|
633 |
sw = *(Uint32 *) srcp;
|
slouken@1895
|
634 |
dw = *(Uint32 *) dstp;
|
slouken@1443
|
635 |
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
|
slouken@1895
|
636 |
s = (prev_sw << 16) + (sw >> 16);
|
slouken@1443
|
637 |
#else
|
slouken@1895
|
638 |
s = (prev_sw >> 16) + (sw << 16);
|
slouken@1443
|
639 |
#endif
|
slouken@1895
|
640 |
prev_sw = sw;
|
slouken@1895
|
641 |
*(Uint32 *) dstp = BLEND2x16_50(dw, s, mask);
|
slouken@1895
|
642 |
dstp += 2;
|
slouken@1895
|
643 |
srcp += 2;
|
slouken@1895
|
644 |
w -= 2;
|
slouken@1895
|
645 |
}
|
slouken@1
|
646 |
|
slouken@1895
|
647 |
/* final pixel if any */
|
slouken@1895
|
648 |
if (w) {
|
slouken@1895
|
649 |
Uint16 d = *dstp, s;
|
slouken@1443
|
650 |
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
|
slouken@1895
|
651 |
s = (Uint16) prev_sw;
|
slouken@1443
|
652 |
#else
|
slouken@1895
|
653 |
s = (Uint16) (prev_sw >> 16);
|
slouken@1443
|
654 |
#endif
|
slouken@1895
|
655 |
*dstp = BLEND16_50(d, s, mask);
|
slouken@1895
|
656 |
srcp++;
|
slouken@1895
|
657 |
dstp++;
|
slouken@1895
|
658 |
}
|
slouken@1895
|
659 |
srcp += srcskip - 1;
|
slouken@1895
|
660 |
dstp += dstskip;
|
slouken@1895
|
661 |
} else {
|
slouken@1895
|
662 |
/* source and destination are aligned */
|
slouken@1895
|
663 |
int w = width;
|
slouken@1
|
664 |
|
slouken@1895
|
665 |
/* first odd pixel? */
|
slouken@1895
|
666 |
if ((uintptr_t) srcp & 2) {
|
slouken@1895
|
667 |
Uint16 d = *dstp, s = *srcp;
|
slouken@1895
|
668 |
*dstp = BLEND16_50(d, s, mask);
|
slouken@1895
|
669 |
srcp++;
|
slouken@1895
|
670 |
dstp++;
|
slouken@1895
|
671 |
w--;
|
slouken@1895
|
672 |
}
|
slouken@1895
|
673 |
/* srcp and dstp are now 32-bit aligned */
|
slouken@1
|
674 |
|
slouken@1895
|
675 |
while (w > 1) {
|
slouken@1895
|
676 |
Uint32 sw = *(Uint32 *) srcp;
|
slouken@1895
|
677 |
Uint32 dw = *(Uint32 *) dstp;
|
slouken@1895
|
678 |
*(Uint32 *) dstp = BLEND2x16_50(dw, sw, mask);
|
slouken@1895
|
679 |
srcp += 2;
|
slouken@1895
|
680 |
dstp += 2;
|
slouken@1895
|
681 |
w -= 2;
|
slouken@1895
|
682 |
}
|
slouken@1
|
683 |
|
slouken@1895
|
684 |
/* last odd pixel? */
|
slouken@1895
|
685 |
if (w) {
|
slouken@1895
|
686 |
Uint16 d = *dstp, s = *srcp;
|
slouken@1895
|
687 |
*dstp = BLEND16_50(d, s, mask);
|
slouken@1895
|
688 |
srcp++;
|
slouken@1895
|
689 |
dstp++;
|
slouken@1895
|
690 |
}
|
slouken@1895
|
691 |
srcp += srcskip;
|
slouken@1895
|
692 |
dstp += dstskip;
|
slouken@1895
|
693 |
}
|
slouken@1895
|
694 |
}
|
slouken@1
|
695 |
}
|
slouken@1
|
696 |
|
slouken@2255
|
697 |
#ifdef __MMX__
|
slouken@689
|
698 |
|
slouken@1542
|
699 |
/* fast RGB565->RGB565 blending with surface alpha */
|
slouken@1895
|
700 |
static void
|
slouken@1895
|
701 |
Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info)
|
slouken@1542
|
702 |
{
|
slouken@2267
|
703 |
unsigned alpha = info->a;
|
slouken@1895
|
704 |
if (alpha == 128) {
|
slouken@1895
|
705 |
Blit16to16SurfaceAlpha128(info, 0xf7de);
|
slouken@1895
|
706 |
} else {
|
slouken@2262
|
707 |
int width = info->dst_w;
|
slouken@2262
|
708 |
int height = info->dst_h;
|
slouken@2262
|
709 |
Uint16 *srcp = (Uint16 *) info->src;
|
slouken@2267
|
710 |
int srcskip = info->src_skip >> 1;
|
slouken@2262
|
711 |
Uint16 *dstp = (Uint16 *) info->dst;
|
slouken@2267
|
712 |
int dstskip = info->dst_skip >> 1;
|
slouken@1895
|
713 |
Uint32 s, d;
|
slouken@1542
|
714 |
|
slouken@1895
|
715 |
__m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha;
|
slouken@1542
|
716 |
|
slouken@1895
|
717 |
alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
|
slouken@1895
|
718 |
mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */
|
slouken@1895
|
719 |
alpha >>= 3; /* downscale alpha to 5 bits */
|
slouken@1895
|
720 |
|
slouken@1895
|
721 |
mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
|
slouken@1895
|
722 |
mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
|
slouken@1895
|
723 |
/* position alpha to allow for mullo and mulhi on diff channels
|
slouken@1895
|
724 |
to reduce the number of operations */
|
slouken@1895
|
725 |
mm_alpha = _mm_slli_si64(mm_alpha, 3);
|
slouken@1895
|
726 |
|
slouken@1895
|
727 |
/* Setup the 565 color channel masks */
|
slouken@1895
|
728 |
gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */
|
slouken@1895
|
729 |
bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
|
slouken@1895
|
730 |
|
slouken@1895
|
731 |
while (height--) {
|
slouken@1895
|
732 |
/* *INDENT-OFF* */
|
slouken@3035
|
733 |
DUFFS_LOOP_124(
|
slouken@1542
|
734 |
{
|
slouken@1542
|
735 |
s = *srcp++;
|
slouken@1542
|
736 |
d = *dstp;
|
slouken@1542
|
737 |
/*
|
slouken@1542
|
738 |
* shift out the middle component (green) to
|
slouken@1542
|
739 |
* the high 16 bits, and process all three RGB
|
slouken@1542
|
740 |
* components at the same time.
|
slouken@1542
|
741 |
*/
|
slouken@1542
|
742 |
s = (s | s << 16) & 0x07e0f81f;
|
slouken@1542
|
743 |
d = (d | d << 16) & 0x07e0f81f;
|
slouken@1542
|
744 |
d += (s - d) * alpha >> 5;
|
slouken@1542
|
745 |
d &= 0x07e0f81f;
|
slouken@1546
|
746 |
*dstp++ = (Uint16)(d | d >> 16);
|
slouken@1542
|
747 |
},{
|
slouken@1542
|
748 |
s = *srcp++;
|
slouken@1542
|
749 |
d = *dstp;
|
slouken@1542
|
750 |
/*
|
slouken@1542
|
751 |
* shift out the middle component (green) to
|
slouken@1542
|
752 |
* the high 16 bits, and process all three RGB
|
slouken@1542
|
753 |
* components at the same time.
|
slouken@1542
|
754 |
*/
|
slouken@1542
|
755 |
s = (s | s << 16) & 0x07e0f81f;
|
slouken@1542
|
756 |
d = (d | d << 16) & 0x07e0f81f;
|
slouken@1542
|
757 |
d += (s - d) * alpha >> 5;
|
slouken@1542
|
758 |
d &= 0x07e0f81f;
|
slouken@1546
|
759 |
*dstp++ = (Uint16)(d | d >> 16);
|
slouken@1542
|
760 |
s = *srcp++;
|
slouken@1542
|
761 |
d = *dstp;
|
slouken@1542
|
762 |
/*
|
slouken@1542
|
763 |
* shift out the middle component (green) to
|
slouken@1542
|
764 |
* the high 16 bits, and process all three RGB
|
slouken@1542
|
765 |
* components at the same time.
|
slouken@1542
|
766 |
*/
|
slouken@1542
|
767 |
s = (s | s << 16) & 0x07e0f81f;
|
slouken@1542
|
768 |
d = (d | d << 16) & 0x07e0f81f;
|
slouken@1542
|
769 |
d += (s - d) * alpha >> 5;
|
slouken@1542
|
770 |
d &= 0x07e0f81f;
|
slouken@1546
|
771 |
*dstp++ = (Uint16)(d | d >> 16);
|
slouken@1542
|
772 |
},{
|
slouken@1542
|
773 |
src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */
|
slouken@1542
|
774 |
dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */
|
slouken@1542
|
775 |
|
slouken@1542
|
776 |
/* red */
|
slouken@1542
|
777 |
src2 = src1;
|
slouken@1542
|
778 |
src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 [000r 000r 000r 000r] */
|
slouken@1542
|
779 |
|
slouken@1542
|
780 |
dst2 = dst1;
|
slouken@1542
|
781 |
dst2 = _mm_srli_pi16(dst2, 11); /* dst2 >> 11 -> dst2 [000r 000r 000r 000r] */
|
slouken@1542
|
782 |
|
slouken@1542
|
783 |
/* blend */
|
slouken@1542
|
784 |
src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
|
slouken@1542
|
785 |
src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
|
slouken@1542
|
786 |
src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */
|
slouken@1542
|
787 |
dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
|
slouken@1542
|
788 |
dst2 = _mm_slli_pi16(dst2, 11); /* dst2 << 11 -> dst2 */
|
slouken@1542
|
789 |
|
slouken@1542
|
790 |
mm_res = dst2; /* RED -> mm_res */
|
slouken@1542
|
791 |
|
slouken@1542
|
792 |
/* green -- process the bits in place */
|
slouken@1542
|
793 |
src2 = src1;
|
slouken@1542
|
794 |
src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */
|
slouken@1542
|
795 |
|
slouken@1542
|
796 |
dst2 = dst1;
|
slouken@1542
|
797 |
dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */
|
slouken@1542
|
798 |
|
slouken@1542
|
799 |
/* blend */
|
slouken@1542
|
800 |
src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
|
slouken@1542
|
801 |
src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
|
slouken@1542
|
802 |
src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */
|
slouken@1542
|
803 |
dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
|
slouken@1542
|
804 |
|
slouken@1542
|
805 |
mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */
|
slouken@1542
|
806 |
|
slouken@1542
|
807 |
/* blue */
|
slouken@1542
|
808 |
src2 = src1;
|
slouken@1542
|
809 |
src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */
|
slouken@1542
|
810 |
|
slouken@1542
|
811 |
dst2 = dst1;
|
slouken@1542
|
812 |
dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */
|
slouken@1542
|
813 |
|
slouken@1542
|
814 |
/* blend */
|
slouken@1542
|
815 |
src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
|
slouken@1542
|
816 |
src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
|
slouken@1542
|
817 |
src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */
|
slouken@1542
|
818 |
dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
|
slouken@1542
|
819 |
dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */
|
slouken@1542
|
820 |
|
slouken@1542
|
821 |
mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */
|
slouken@1542
|
822 |
|
slouken@1542
|
823 |
*(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */
|
slouken@1542
|
824 |
|
slouken@1542
|
825 |
srcp += 4;
|
slouken@1542
|
826 |
dstp += 4;
|
slouken@1895
|
827 |
}, width);
|
slouken@1895
|
828 |
/* *INDENT-ON* */
|
slouken@1895
|
829 |
srcp += srcskip;
|
slouken@1895
|
830 |
dstp += dstskip;
|
slouken@1895
|
831 |
}
|
slouken@1895
|
832 |
_mm_empty();
|
slouken@1895
|
833 |
}
|
slouken@1542
|
834 |
}
|
slouken@1542
|
835 |
|
slouken@1542
|
836 |
/* fast RGB555->RGB555 blending with surface alpha */
|
slouken@1895
|
837 |
static void
|
slouken@1895
|
838 |
Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info)
|
slouken@1542
|
839 |
{
|
slouken@2267
|
840 |
unsigned alpha = info->a;
|
slouken@1895
|
841 |
if (alpha == 128) {
|
slouken@1895
|
842 |
Blit16to16SurfaceAlpha128(info, 0xfbde);
|
slouken@1895
|
843 |
} else {
|
slouken@2262
|
844 |
int width = info->dst_w;
|
slouken@2262
|
845 |
int height = info->dst_h;
|
slouken@2262
|
846 |
Uint16 *srcp = (Uint16 *) info->src;
|
slouken@2267
|
847 |
int srcskip = info->src_skip >> 1;
|
slouken@2262
|
848 |
Uint16 *dstp = (Uint16 *) info->dst;
|
slouken@2267
|
849 |
int dstskip = info->dst_skip >> 1;
|
slouken@1895
|
850 |
Uint32 s, d;
|
slouken@1542
|
851 |
|
slouken@1895
|
852 |
__m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha;
|
slouken@1542
|
853 |
|
slouken@1895
|
854 |
alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
|
slouken@1895
|
855 |
mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */
|
slouken@1895
|
856 |
alpha >>= 3; /* downscale alpha to 5 bits */
|
slouken@1542
|
857 |
|
slouken@1895
|
858 |
mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
|
slouken@1895
|
859 |
mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
|
slouken@1895
|
860 |
/* position alpha to allow for mullo and mulhi on diff channels
|
slouken@1895
|
861 |
to reduce the number of operations */
|
slouken@1895
|
862 |
mm_alpha = _mm_slli_si64(mm_alpha, 3);
|
slouken@1895
|
863 |
|
slouken@1895
|
864 |
/* Setup the 555 color channel masks */
|
slouken@1895
|
865 |
rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */
|
slouken@1895
|
866 |
gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */
|
slouken@1895
|
867 |
bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
|
slouken@1895
|
868 |
|
slouken@1895
|
869 |
while (height--) {
|
slouken@1895
|
870 |
/* *INDENT-OFF* */
|
slouken@3035
|
871 |
DUFFS_LOOP_124(
|
slouken@1542
|
872 |
{
|
slouken@1542
|
873 |
s = *srcp++;
|
slouken@1542
|
874 |
d = *dstp;
|
slouken@1542
|
875 |
/*
|
slouken@1542
|
876 |
* shift out the middle component (green) to
|
slouken@1542
|
877 |
* the high 16 bits, and process all three RGB
|
slouken@1542
|
878 |
* components at the same time.
|
slouken@1542
|
879 |
*/
|
slouken@1542
|
880 |
s = (s | s << 16) & 0x03e07c1f;
|
slouken@1542
|
881 |
d = (d | d << 16) & 0x03e07c1f;
|
slouken@1542
|
882 |
d += (s - d) * alpha >> 5;
|
slouken@1542
|
883 |
d &= 0x03e07c1f;
|
slouken@1546
|
884 |
*dstp++ = (Uint16)(d | d >> 16);
|
slouken@1542
|
885 |
},{
|
slouken@1542
|
886 |
s = *srcp++;
|
slouken@1542
|
887 |
d = *dstp;
|
slouken@1542
|
888 |
/*
|
slouken@1542
|
889 |
* shift out the middle component (green) to
|
slouken@1542
|
890 |
* the high 16 bits, and process all three RGB
|
slouken@1542
|
891 |
* components at the same time.
|
slouken@1542
|
892 |
*/
|
slouken@1542
|
893 |
s = (s | s << 16) & 0x03e07c1f;
|
slouken@1542
|
894 |
d = (d | d << 16) & 0x03e07c1f;
|
slouken@1542
|
895 |
d += (s - d) * alpha >> 5;
|
slouken@1542
|
896 |
d &= 0x03e07c1f;
|
slouken@1546
|
897 |
*dstp++ = (Uint16)(d | d >> 16);
|
slouken@1542
|
898 |
s = *srcp++;
|
slouken@1542
|
899 |
d = *dstp;
|
slouken@1542
|
900 |
/*
|
slouken@1542
|
901 |
* shift out the middle component (green) to
|
slouken@1542
|
902 |
* the high 16 bits, and process all three RGB
|
slouken@1542
|
903 |
* components at the same time.
|
slouken@1542
|
904 |
*/
|
slouken@1542
|
905 |
s = (s | s << 16) & 0x03e07c1f;
|
slouken@1542
|
906 |
d = (d | d << 16) & 0x03e07c1f;
|
slouken@1542
|
907 |
d += (s - d) * alpha >> 5;
|
slouken@1542
|
908 |
d &= 0x03e07c1f;
|
slouken@1546
|
909 |
*dstp++ = (Uint16)(d | d >> 16);
|
slouken@1542
|
910 |
},{
|
slouken@1542
|
911 |
src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */
|
slouken@1542
|
912 |
dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */
|
slouken@1542
|
913 |
|
slouken@1542
|
914 |
/* red -- process the bits in place */
|
slouken@1542
|
915 |
src2 = src1;
|
slouken@1542
|
916 |
src2 = _mm_and_si64(src2, rmask); /* src & MASKRED -> src2 */
|
slouken@1542
|
917 |
|
slouken@1542
|
918 |
dst2 = dst1;
|
slouken@1542
|
919 |
dst2 = _mm_and_si64(dst2, rmask); /* dst & MASKRED -> dst2 */
|
slouken@1542
|
920 |
|
slouken@1542
|
921 |
/* blend */
|
slouken@1542
|
922 |
src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
|
slouken@1542
|
923 |
src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
|
slouken@1542
|
924 |
src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */
|
slouken@1542
|
925 |
dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
|
slouken@1542
|
926 |
dst2 = _mm_and_si64(dst2, rmask); /* dst2 & MASKRED -> dst2 */
|
slouken@1542
|
927 |
|
slouken@1542
|
928 |
mm_res = dst2; /* RED -> mm_res */
|
slouken@1542
|
929 |
|
slouken@1542
|
930 |
/* green -- process the bits in place */
|
slouken@1542
|
931 |
src2 = src1;
|
slouken@1542
|
932 |
src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */
|
slouken@1542
|
933 |
|
slouken@1542
|
934 |
dst2 = dst1;
|
slouken@1542
|
935 |
dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */
|
slouken@1542
|
936 |
|
slouken@1542
|
937 |
/* blend */
|
slouken@1542
|
938 |
src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
|
slouken@1542
|
939 |
src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
|
slouken@1542
|
940 |
src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */
|
slouken@1542
|
941 |
dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
|
slouken@1542
|
942 |
|
slouken@1542
|
943 |
mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */
|
slouken@1542
|
944 |
|
slouken@1542
|
945 |
/* blue */
|
slouken@1542
|
946 |
src2 = src1; /* src -> src2 */
|
slouken@1542
|
947 |
src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */
|
slouken@1542
|
948 |
|
slouken@1542
|
949 |
dst2 = dst1; /* dst -> dst2 */
|
slouken@1542
|
950 |
dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */
|
slouken@1542
|
951 |
|
slouken@1542
|
952 |
/* blend */
|
slouken@1542
|
953 |
src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
|
slouken@1542
|
954 |
src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
|
slouken@1542
|
955 |
src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */
|
slouken@1542
|
956 |
dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
|
slouken@1542
|
957 |
dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */
|
slouken@1542
|
958 |
|
slouken@1542
|
959 |
mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */
|
slouken@1542
|
960 |
|
slouken@1542
|
961 |
*(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */
|
slouken@1542
|
962 |
|
slouken@1542
|
963 |
srcp += 4;
|
slouken@1542
|
964 |
dstp += 4;
|
slouken@1895
|
965 |
}, width);
|
slouken@1895
|
966 |
/* *INDENT-ON* */
|
slouken@1895
|
967 |
srcp += srcskip;
|
slouken@1895
|
968 |
dstp += dstskip;
|
slouken@1895
|
969 |
}
|
slouken@1895
|
970 |
_mm_empty();
|
slouken@1895
|
971 |
}
|
slouken@1542
|
972 |
}
|
slouken@2255
|
973 |
|
slouken@2255
|
974 |
#endif /* __MMX__ */
|
slouken@689
|
975 |
|
slouken@1
|
976 |
/* fast RGB565->RGB565 blending with surface alpha */
|
slouken@1895
|
977 |
static void
|
slouken@1895
|
978 |
Blit565to565SurfaceAlpha(SDL_BlitInfo * info)
|
slouken@1
|
979 |
{
|
slouken@2267
|
980 |
unsigned alpha = info->a;
|
slouken@1895
|
981 |
if (alpha == 128) {
|
slouken@1895
|
982 |
Blit16to16SurfaceAlpha128(info, 0xf7de);
|
slouken@1895
|
983 |
} else {
|
slouken@2262
|
984 |
int width = info->dst_w;
|
slouken@2262
|
985 |
int height = info->dst_h;
|
slouken@2262
|
986 |
Uint16 *srcp = (Uint16 *) info->src;
|
slouken@2267
|
987 |
int srcskip = info->src_skip >> 1;
|
slouken@2262
|
988 |
Uint16 *dstp = (Uint16 *) info->dst;
|
slouken@2267
|
989 |
int dstskip = info->dst_skip >> 1;
|
slouken@1895
|
990 |
alpha >>= 3; /* downscale alpha to 5 bits */
|
slouken@1
|
991 |
|
slouken@1895
|
992 |
while (height--) {
|
slouken@1895
|
993 |
/* *INDENT-OFF* */
|
slouken@1
|
994 |
DUFFS_LOOP4({
|
slouken@1
|
995 |
Uint32 s = *srcp++;
|
slouken@1
|
996 |
Uint32 d = *dstp;
|
slouken@1
|
997 |
/*
|
slouken@1
|
998 |
* shift out the middle component (green) to
|
slouken@1
|
999 |
* the high 16 bits, and process all three RGB
|
slouken@1
|
1000 |
* components at the same time.
|
slouken@1
|
1001 |
*/
|
slouken@1
|
1002 |
s = (s | s << 16) & 0x07e0f81f;
|
slouken@1
|
1003 |
d = (d | d << 16) & 0x07e0f81f;
|
slouken@1
|
1004 |
d += (s - d) * alpha >> 5;
|
slouken@1
|
1005 |
d &= 0x07e0f81f;
|
slouken@1428
|
1006 |
*dstp++ = (Uint16)(d | d >> 16);
|
slouken@1
|
1007 |
}, width);
|
slouken@1895
|
1008 |
/* *INDENT-ON* */
|
slouken@1895
|
1009 |
srcp += srcskip;
|
slouken@1895
|
1010 |
dstp += dstskip;
|
slouken@1895
|
1011 |
}
|
slouken@1895
|
1012 |
}
|
slouken@0
|
1013 |
}
|
slouken@0
|
1014 |
|
slouken@0
|
1015 |
/* fast RGB555->RGB555 blending with surface alpha */
|
slouken@1895
|
1016 |
static void
|
slouken@1895
|
1017 |
Blit555to555SurfaceAlpha(SDL_BlitInfo * info)
|
slouken@0
|
1018 |
{
|
slouken@2267
|
1019 |
unsigned alpha = info->a; /* downscale alpha to 5 bits */
|
slouken@1895
|
1020 |
if (alpha == 128) {
|
slouken@1895
|
1021 |
Blit16to16SurfaceAlpha128(info, 0xfbde);
|
slouken@1895
|
1022 |
} else {
|
slouken@2262
|
1023 |
int width = info->dst_w;
|
slouken@2262
|
1024 |
int height = info->dst_h;
|
slouken@2262
|
1025 |
Uint16 *srcp = (Uint16 *) info->src;
|
slouken@2267
|
1026 |
int srcskip = info->src_skip >> 1;
|
slouken@2262
|
1027 |
Uint16 *dstp = (Uint16 *) info->dst;
|
slouken@2267
|
1028 |
int dstskip = info->dst_skip >> 1;
|
slouken@1895
|
1029 |
alpha >>= 3; /* downscale alpha to 5 bits */
|
slouken@0
|
1030 |
|
slouken@1895
|
1031 |
while (height--) {
|
slouken@1895
|
1032 |
/* *INDENT-OFF* */
|
slouken@1
|
1033 |
DUFFS_LOOP4({
|
slouken@1
|
1034 |
Uint32 s = *srcp++;
|
slouken@1
|
1035 |
Uint32 d = *dstp;
|
slouken@1
|
1036 |
/*
|
slouken@1
|
1037 |
* shift out the middle component (green) to
|
slouken@1
|
1038 |
* the high 16 bits, and process all three RGB
|
slouken@1
|
1039 |
* components at the same time.
|
slouken@1
|
1040 |
*/
|
slouken@1
|
1041 |
s = (s | s << 16) & 0x03e07c1f;
|
slouken@1
|
1042 |
d = (d | d << 16) & 0x03e07c1f;
|
slouken@1
|
1043 |
d += (s - d) * alpha >> 5;
|
slouken@1
|
1044 |
d &= 0x03e07c1f;
|
slouken@1428
|
1045 |
*dstp++ = (Uint16)(d | d >> 16);
|
slouken@1
|
1046 |
}, width);
|
slouken@1895
|
1047 |
/* *INDENT-ON* */
|
slouken@1895
|
1048 |
srcp += srcskip;
|
slouken@1895
|
1049 |
dstp += dstskip;
|
slouken@1895
|
1050 |
}
|
slouken@1895
|
1051 |
}
|
slouken@0
|
1052 |
}
|
slouken@0
|
1053 |
|
slouken@0
|
1054 |
/* fast ARGB8888->RGB565 blending with pixel alpha */
|
slouken@1895
|
1055 |
static void
|
slouken@1895
|
1056 |
BlitARGBto565PixelAlpha(SDL_BlitInfo * info)
|
slouken@0
|
1057 |
{
|
slouken@2262
|
1058 |
int width = info->dst_w;
|
slouken@2262
|
1059 |
int height = info->dst_h;
|
slouken@2262
|
1060 |
Uint32 *srcp = (Uint32 *) info->src;
|
slouken@2267
|
1061 |
int srcskip = info->src_skip >> 2;
|
slouken@2262
|
1062 |
Uint16 *dstp = (Uint16 *) info->dst;
|
slouken@2267
|
1063 |
int dstskip = info->dst_skip >> 1;
|
slouken@0
|
1064 |
|
slouken@1895
|
1065 |
while (height--) {
|
slouken@1895
|
1066 |
/* *INDENT-OFF* */
|
slouken@0
|
1067 |
DUFFS_LOOP4({
|
slouken@0
|
1068 |
Uint32 s = *srcp;
|
slouken@0
|
1069 |
unsigned alpha = s >> 27; /* downscale alpha to 5 bits */
|
slouken@0
|
1070 |
/* FIXME: Here we special-case opaque alpha since the
|
slouken@0
|
1071 |
compositioning used (>>8 instead of /255) doesn't handle
|
slouken@0
|
1072 |
it correctly. Also special-case alpha=0 for speed?
|
slouken@0
|
1073 |
Benchmark this! */
|
slouken@689
|
1074 |
if(alpha) {
|
slouken@689
|
1075 |
if(alpha == (SDL_ALPHA_OPAQUE >> 3)) {
|
slouken@1428
|
1076 |
*dstp = (Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f));
|
slouken@689
|
1077 |
} else {
|
slouken@0
|
1078 |
Uint32 d = *dstp;
|
slouken@0
|
1079 |
/*
|
slouken@0
|
1080 |
* convert source and destination to G0RAB65565
|
slouken@0
|
1081 |
* and blend all components at the same time
|
slouken@0
|
1082 |
*/
|
slouken@0
|
1083 |
s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800)
|
slouken@0
|
1084 |
+ (s >> 3 & 0x1f);
|
slouken@0
|
1085 |
d = (d | d << 16) & 0x07e0f81f;
|
slouken@0
|
1086 |
d += (s - d) * alpha >> 5;
|
slouken@0
|
1087 |
d &= 0x07e0f81f;
|
slouken@1428
|
1088 |
*dstp = (Uint16)(d | d >> 16);
|
slouken@689
|
1089 |
}
|
slouken@0
|
1090 |
}
|
slouken@0
|
1091 |
srcp++;
|
slouken@0
|
1092 |
dstp++;
|
slouken@0
|
1093 |
}, width);
|
slouken@1895
|
1094 |
/* *INDENT-ON* */
|
slouken@1895
|
1095 |
srcp += srcskip;
|
slouken@1895
|
1096 |
dstp += dstskip;
|
slouken@1895
|
1097 |
}
|
slouken@0
|
1098 |
}
|
slouken@0
|
1099 |
|
slouken@0
|
1100 |
/* fast ARGB8888->RGB555 blending with pixel alpha */
|
slouken@1895
|
1101 |
static void
|
slouken@1895
|
1102 |
BlitARGBto555PixelAlpha(SDL_BlitInfo * info)
|
slouken@0
|
1103 |
{
|
slouken@2262
|
1104 |
int width = info->dst_w;
|
slouken@2262
|
1105 |
int height = info->dst_h;
|
slouken@2262
|
1106 |
Uint32 *srcp = (Uint32 *) info->src;
|
slouken@2267
|
1107 |
int srcskip = info->src_skip >> 2;
|
slouken@2262
|
1108 |
Uint16 *dstp = (Uint16 *) info->dst;
|
slouken@2267
|
1109 |
int dstskip = info->dst_skip >> 1;
|
slouken@0
|
1110 |
|
slouken@1895
|
1111 |
while (height--) {
|
slouken@1895
|
1112 |
/* *INDENT-OFF* */
|
slouken@0
|
1113 |
DUFFS_LOOP4({
|
slouken@0
|
1114 |
unsigned alpha;
|
slouken@0
|
1115 |
Uint32 s = *srcp;
|
slouken@0
|
1116 |
alpha = s >> 27; /* downscale alpha to 5 bits */
|
slouken@0
|
1117 |
/* FIXME: Here we special-case opaque alpha since the
|
slouken@0
|
1118 |
compositioning used (>>8 instead of /255) doesn't handle
|
slouken@0
|
1119 |
it correctly. Also special-case alpha=0 for speed?
|
slouken@0
|
1120 |
Benchmark this! */
|
slouken@689
|
1121 |
if(alpha) {
|
slouken@689
|
1122 |
if(alpha == (SDL_ALPHA_OPAQUE >> 3)) {
|
slouken@1428
|
1123 |
*dstp = (Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f));
|
slouken@689
|
1124 |
} else {
|
slouken@0
|
1125 |
Uint32 d = *dstp;
|
slouken@0
|
1126 |
/*
|
slouken@0
|
1127 |
* convert source and destination to G0RAB65565
|
slouken@0
|
1128 |
* and blend all components at the same time
|
slouken@0
|
1129 |
*/
|
slouken@0
|
1130 |
s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00)
|
slouken@0
|
1131 |
+ (s >> 3 & 0x1f);
|
slouken@0
|
1132 |
d = (d | d << 16) & 0x03e07c1f;
|
slouken@0
|
1133 |
d += (s - d) * alpha >> 5;
|
slouken@0
|
1134 |
d &= 0x03e07c1f;
|
slouken@1428
|
1135 |
*dstp = (Uint16)(d | d >> 16);
|
slouken@689
|
1136 |
}
|
slouken@0
|
1137 |
}
|
slouken@0
|
1138 |
srcp++;
|
slouken@0
|
1139 |
dstp++;
|
slouken@0
|
1140 |
}, width);
|
slouken@1895
|
1141 |
/* *INDENT-ON* */
|
slouken@1895
|
1142 |
srcp += srcskip;
|
slouken@1895
|
1143 |
dstp += dstskip;
|
slouken@1895
|
1144 |
}
|
slouken@0
|
1145 |
}
|
slouken@0
|
1146 |
|
slouken@0
|
1147 |
/* General (slow) N->N blending with per-surface alpha */
|
slouken@1895
|
1148 |
static void
|
slouken@1895
|
1149 |
BlitNtoNSurfaceAlpha(SDL_BlitInfo * info)
|
slouken@0
|
1150 |
{
|
slouken@2262
|
1151 |
int width = info->dst_w;
|
slouken@2262
|
1152 |
int height = info->dst_h;
|
slouken@2262
|
1153 |
Uint8 *src = info->src;
|
slouken@2267
|
1154 |
int srcskip = info->src_skip;
|
slouken@2262
|
1155 |
Uint8 *dst = info->dst;
|
slouken@2267
|
1156 |
int dstskip = info->dst_skip;
|
slouken@2267
|
1157 |
SDL_PixelFormat *srcfmt = info->src_fmt;
|
slouken@2267
|
1158 |
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
slouken@1895
|
1159 |
int srcbpp = srcfmt->BytesPerPixel;
|
slouken@1895
|
1160 |
int dstbpp = dstfmt->BytesPerPixel;
|
slouken@7502
|
1161 |
Uint32 Pixel;
|
slouken@7502
|
1162 |
unsigned sR, sG, sB;
|
slouken@7502
|
1163 |
unsigned dR, dG, dB, dA;
|
slouken@7502
|
1164 |
const unsigned sA = info->a;
|
slouken@0
|
1165 |
|
slouken@1895
|
1166 |
if (sA) {
|
slouken@1895
|
1167 |
while (height--) {
|
slouken@1895
|
1168 |
/* *INDENT-OFF* */
|
slouken@0
|
1169 |
DUFFS_LOOP4(
|
slouken@0
|
1170 |
{
|
icculus@1162
|
1171 |
DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
|
slouken@7502
|
1172 |
DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
|
slouken@7502
|
1173 |
ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
|
slouken@0
|
1174 |
ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
|
slouken@0
|
1175 |
src += srcbpp;
|
slouken@0
|
1176 |
dst += dstbpp;
|
slouken@0
|
1177 |
},
|
slouken@0
|
1178 |
width);
|
slouken@1895
|
1179 |
/* *INDENT-ON* */
|
slouken@1895
|
1180 |
src += srcskip;
|
slouken@1895
|
1181 |
dst += dstskip;
|
slouken@1895
|
1182 |
}
|
slouken@1895
|
1183 |
}
|
slouken@0
|
1184 |
}
|
slouken@0
|
1185 |
|
slouken@0
|
1186 |
/* General (slow) colorkeyed N->N blending with per-surface alpha */
|
slouken@1895
|
1187 |
static void
|
slouken@1895
|
1188 |
BlitNtoNSurfaceAlphaKey(SDL_BlitInfo * info)
|
slouken@0
|
1189 |
{
|
slouken@2262
|
1190 |
int width = info->dst_w;
|
slouken@2262
|
1191 |
int height = info->dst_h;
|
slouken@2262
|
1192 |
Uint8 *src = info->src;
|
slouken@2267
|
1193 |
int srcskip = info->src_skip;
|
slouken@2262
|
1194 |
Uint8 *dst = info->dst;
|
slouken@2267
|
1195 |
int dstskip = info->dst_skip;
|
slouken@2267
|
1196 |
SDL_PixelFormat *srcfmt = info->src_fmt;
|
slouken@2267
|
1197 |
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
slouken@2267
|
1198 |
Uint32 ckey = info->colorkey;
|
slouken@1895
|
1199 |
int srcbpp = srcfmt->BytesPerPixel;
|
slouken@1895
|
1200 |
int dstbpp = dstfmt->BytesPerPixel;
|
slouken@7502
|
1201 |
Uint32 Pixel;
|
slouken@7502
|
1202 |
unsigned sR, sG, sB;
|
slouken@7502
|
1203 |
unsigned dR, dG, dB, dA;
|
slouken@7502
|
1204 |
const unsigned sA = info->a;
|
slouken@0
|
1205 |
|
slouken@1895
|
1206 |
while (height--) {
|
slouken@1895
|
1207 |
/* *INDENT-OFF* */
|
slouken@0
|
1208 |
DUFFS_LOOP4(
|
slouken@0
|
1209 |
{
|
icculus@1162
|
1210 |
RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
|
icculus@1162
|
1211 |
if(sA && Pixel != ckey) {
|
icculus@1162
|
1212 |
RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
|
slouken@7502
|
1213 |
DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
|
slouken@7502
|
1214 |
ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
|
slouken@0
|
1215 |
ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
|
slouken@0
|
1216 |
}
|
slouken@0
|
1217 |
src += srcbpp;
|
slouken@0
|
1218 |
dst += dstbpp;
|
slouken@0
|
1219 |
},
|
slouken@0
|
1220 |
width);
|
slouken@1895
|
1221 |
/* *INDENT-ON* */
|
slouken@1895
|
1222 |
src += srcskip;
|
slouken@1895
|
1223 |
dst += dstskip;
|
slouken@1895
|
1224 |
}
|
slouken@0
|
1225 |
}
|
slouken@0
|
1226 |
|
slouken@0
|
1227 |
/* General (slow) N->N blending with pixel alpha */
|
slouken@1895
|
1228 |
static void
|
slouken@1895
|
1229 |
BlitNtoNPixelAlpha(SDL_BlitInfo * info)
|
slouken@0
|
1230 |
{
|
slouken@2262
|
1231 |
int width = info->dst_w;
|
slouken@2262
|
1232 |
int height = info->dst_h;
|
slouken@2262
|
1233 |
Uint8 *src = info->src;
|
slouken@2267
|
1234 |
int srcskip = info->src_skip;
|
slouken@2262
|
1235 |
Uint8 *dst = info->dst;
|
slouken@2267
|
1236 |
int dstskip = info->dst_skip;
|
slouken@2267
|
1237 |
SDL_PixelFormat *srcfmt = info->src_fmt;
|
slouken@2267
|
1238 |
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
slouken@1895
|
1239 |
int srcbpp;
|
slouken@1895
|
1240 |
int dstbpp;
|
slouken@7502
|
1241 |
Uint32 Pixel;
|
slouken@7502
|
1242 |
unsigned sR, sG, sB, sA;
|
slouken@7502
|
1243 |
unsigned dR, dG, dB, dA;
|
slouken@0
|
1244 |
|
slouken@1895
|
1245 |
/* Set up some basic variables */
|
slouken@1895
|
1246 |
srcbpp = srcfmt->BytesPerPixel;
|
slouken@1895
|
1247 |
dstbpp = dstfmt->BytesPerPixel;
|
slouken@0
|
1248 |
|
slouken@1895
|
1249 |
while (height--) {
|
slouken@1895
|
1250 |
/* *INDENT-OFF* */
|
slouken@0
|
1251 |
DUFFS_LOOP4(
|
slouken@0
|
1252 |
{
|
icculus@1162
|
1253 |
DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
|
slouken@689
|
1254 |
if(sA) {
|
slouken@7502
|
1255 |
DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
|
slouken@7502
|
1256 |
ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
|
slouken@7502
|
1257 |
ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
|
slouken@689
|
1258 |
}
|
slouken@0
|
1259 |
src += srcbpp;
|
slouken@0
|
1260 |
dst += dstbpp;
|
slouken@0
|
1261 |
},
|
slouken@0
|
1262 |
width);
|
slouken@1895
|
1263 |
/* *INDENT-ON* */
|
slouken@1895
|
1264 |
src += srcskip;
|
slouken@1895
|
1265 |
dst += dstskip;
|
slouken@1895
|
1266 |
}
|
slouken@0
|
1267 |
}
|
slouken@0
|
1268 |
|
slouken@0
|
1269 |
|
slouken@2267
|
1270 |
SDL_BlitFunc
|
slouken@2267
|
1271 |
SDL_CalculateBlitA(SDL_Surface * surface)
|
slouken@0
|
1272 |
{
|
slouken@0
|
1273 |
SDL_PixelFormat *sf = surface->format;
|
slouken@0
|
1274 |
SDL_PixelFormat *df = surface->map->dst->format;
|
slouken@0
|
1275 |
|
slouken@2853
|
1276 |
switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
|
slouken@2267
|
1277 |
case SDL_COPY_BLEND:
|
slouken@1895
|
1278 |
/* Per-pixel alpha blits */
|
slouken@1895
|
1279 |
switch (df->BytesPerPixel) {
|
slouken@1895
|
1280 |
case 1:
|
slouken@1895
|
1281 |
return BlitNto1PixelAlpha;
|
slouken@0
|
1282 |
|
slouken@1895
|
1283 |
case 2:
|
slouken@5389
|
1284 |
if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
|
slouken@5389
|
1285 |
&& sf->Gmask == 0xff00
|
slouken@5389
|
1286 |
&& ((sf->Rmask == 0xff && df->Rmask == 0x1f)
|
slouken@5389
|
1287 |
|| (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
|
slouken@1895
|
1288 |
if (df->Gmask == 0x7e0)
|
slouken@1895
|
1289 |
return BlitARGBto565PixelAlpha;
|
slouken@1895
|
1290 |
else if (df->Gmask == 0x3e0)
|
slouken@1895
|
1291 |
return BlitARGBto555PixelAlpha;
|
slouken@1895
|
1292 |
}
|
slouken@1895
|
1293 |
return BlitNtoNPixelAlpha;
|
slouken@0
|
1294 |
|
slouken@1895
|
1295 |
case 4:
|
slouken@1895
|
1296 |
if (sf->Rmask == df->Rmask
|
slouken@1895
|
1297 |
&& sf->Gmask == df->Gmask
|
slouken@1895
|
1298 |
&& sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
|
slouken@5389
|
1299 |
#if defined(__MMX__) || defined(__3dNOW__)
|
slouken@1895
|
1300 |
if (sf->Rshift % 8 == 0
|
slouken@1895
|
1301 |
&& sf->Gshift % 8 == 0
|
slouken@1895
|
1302 |
&& sf->Bshift % 8 == 0
|
slouken@1895
|
1303 |
&& sf->Ashift % 8 == 0 && sf->Aloss == 0) {
|
slouken@5389
|
1304 |
#ifdef __3dNOW__
|
slouken@5389
|
1305 |
if (SDL_Has3DNow())
|
slouken@5389
|
1306 |
return BlitRGBtoRGBPixelAlphaMMX3DNOW;
|
slouken@5389
|
1307 |
#endif
|
slouken@5389
|
1308 |
#ifdef __MMX__
|
slouken@1895
|
1309 |
if (SDL_HasMMX())
|
slouken@1895
|
1310 |
return BlitRGBtoRGBPixelAlphaMMX;
|
slouken@5389
|
1311 |
#endif
|
slouken@1895
|
1312 |
}
|
slouken@5389
|
1313 |
#endif /* __MMX__ || __3dNOW__ */
|
slouken@1895
|
1314 |
if (sf->Amask == 0xff000000) {
|
slouken@1895
|
1315 |
return BlitRGBtoRGBPixelAlpha;
|
slouken@1895
|
1316 |
}
|
slouken@1895
|
1317 |
}
|
slouken@7502
|
1318 |
return BlitNtoNPixelAlpha;
|
slouken@0
|
1319 |
|
slouken@1895
|
1320 |
case 3:
|
slouken@1895
|
1321 |
default:
|
slouken@1895
|
1322 |
return BlitNtoNPixelAlpha;
|
slouken@1895
|
1323 |
}
|
slouken@2267
|
1324 |
break;
|
slouken@2267
|
1325 |
|
slouken@2267
|
1326 |
case SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND:
|
slouken@2267
|
1327 |
if (sf->Amask == 0) {
|
slouken@2267
|
1328 |
/* Per-surface alpha blits */
|
slouken@2267
|
1329 |
switch (df->BytesPerPixel) {
|
slouken@2267
|
1330 |
case 1:
|
slouken@2267
|
1331 |
return BlitNto1SurfaceAlpha;
|
slouken@2267
|
1332 |
|
slouken@2267
|
1333 |
case 2:
|
slouken@2267
|
1334 |
if (surface->map->identity) {
|
slouken@2267
|
1335 |
if (df->Gmask == 0x7e0) {
|
slouken@2267
|
1336 |
#ifdef __MMX__
|
slouken@2267
|
1337 |
if (SDL_HasMMX())
|
slouken@2267
|
1338 |
return Blit565to565SurfaceAlphaMMX;
|
slouken@2267
|
1339 |
else
|
slouken@2267
|
1340 |
#endif
|
slouken@2267
|
1341 |
return Blit565to565SurfaceAlpha;
|
slouken@2267
|
1342 |
} else if (df->Gmask == 0x3e0) {
|
slouken@2267
|
1343 |
#ifdef __MMX__
|
slouken@2267
|
1344 |
if (SDL_HasMMX())
|
slouken@2267
|
1345 |
return Blit555to555SurfaceAlphaMMX;
|
slouken@2267
|
1346 |
else
|
slouken@2267
|
1347 |
#endif
|
slouken@2267
|
1348 |
return Blit555to555SurfaceAlpha;
|
slouken@2267
|
1349 |
}
|
slouken@2267
|
1350 |
}
|
slouken@2267
|
1351 |
return BlitNtoNSurfaceAlpha;
|
slouken@2267
|
1352 |
|
slouken@2267
|
1353 |
case 4:
|
slouken@2267
|
1354 |
if (sf->Rmask == df->Rmask
|
slouken@2267
|
1355 |
&& sf->Gmask == df->Gmask
|
slouken@2267
|
1356 |
&& sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
|
slouken@2267
|
1357 |
#ifdef __MMX__
|
slouken@2267
|
1358 |
if (sf->Rshift % 8 == 0
|
slouken@2267
|
1359 |
&& sf->Gshift % 8 == 0
|
slouken@2267
|
1360 |
&& sf->Bshift % 8 == 0 && SDL_HasMMX())
|
slouken@2267
|
1361 |
return BlitRGBtoRGBSurfaceAlphaMMX;
|
slouken@2267
|
1362 |
#endif
|
slouken@2267
|
1363 |
if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
|
slouken@2267
|
1364 |
return BlitRGBtoRGBSurfaceAlpha;
|
slouken@2267
|
1365 |
}
|
slouken@2267
|
1366 |
}
|
slouken@7502
|
1367 |
return BlitNtoNSurfaceAlpha;
|
slouken@2267
|
1368 |
|
slouken@2267
|
1369 |
case 3:
|
slouken@2267
|
1370 |
default:
|
slouken@2267
|
1371 |
return BlitNtoNSurfaceAlpha;
|
slouken@2267
|
1372 |
}
|
slouken@2267
|
1373 |
}
|
slouken@2267
|
1374 |
break;
|
slouken@2267
|
1375 |
|
slouken@2267
|
1376 |
case SDL_COPY_COLORKEY | SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND:
|
slouken@2267
|
1377 |
if (sf->Amask == 0) {
|
slouken@7502
|
1378 |
if (df->BytesPerPixel == 1) {
|
slouken@2267
|
1379 |
return BlitNto1SurfaceAlphaKey;
|
slouken@7502
|
1380 |
} else {
|
slouken@2267
|
1381 |
return BlitNtoNSurfaceAlphaKey;
|
slouken@7502
|
1382 |
}
|
slouken@2267
|
1383 |
}
|
slouken@2267
|
1384 |
break;
|
slouken@0
|
1385 |
}
|
slouken@2267
|
1386 |
|
slouken@2267
|
1387 |
return NULL;
|
slouken@0
|
1388 |
}
|
slouken@0
|
1389 |
|
slouken@1895
|
1390 |
/* vi: set ts=4 sw=4 expandtab: */
|