slouken@2247
|
1 |
/*
|
slouken@2247
|
2 |
SDL - Simple DirectMedia Layer
|
slouken@2247
|
3 |
Copyright (C) 1997-2006 Sam Lantinga
|
slouken@2247
|
4 |
|
slouken@2247
|
5 |
This library is free software; you can redistribute it and/or
|
slouken@2247
|
6 |
modify it under the terms of the GNU Lesser General Public
|
slouken@2247
|
7 |
License as published by the Free Software Foundation; either
|
slouken@2247
|
8 |
version 2.1 of the License, or (at your option) any later version.
|
slouken@2247
|
9 |
|
slouken@2247
|
10 |
This library is distributed in the hope that it will be useful,
|
slouken@2247
|
11 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
slouken@2247
|
12 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
slouken@2247
|
13 |
Lesser General Public License for more details.
|
slouken@2247
|
14 |
|
slouken@2247
|
15 |
You should have received a copy of the GNU Lesser General Public
|
slouken@2247
|
16 |
License along with this library; if not, write to the Free Software
|
slouken@2247
|
17 |
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
slouken@2247
|
18 |
|
slouken@2247
|
19 |
Sam Lantinga
|
slouken@2247
|
20 |
slouken@libsdl.org
|
slouken@2247
|
21 |
*/
|
slouken@2247
|
22 |
#include "SDL_config.h"
|
slouken@2247
|
23 |
|
slouken@2250
|
24 |
#include "SDL_cpuinfo.h"
|
slouken@2247
|
25 |
#include "SDL_video.h"
|
slouken@2247
|
26 |
#include "SDL_blit.h"
|
slouken@2249
|
27 |
#include "SDL_blit_copy.h"
|
slouken@2247
|
28 |
|
slouken@2247
|
29 |
|
slouken@2247
|
30 |
#ifdef __MMX__
|
slouken@2247
|
31 |
static __inline__ void
|
slouken@2248
|
32 |
SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
|
slouken@2247
|
33 |
{
|
slouken@2247
|
34 |
int i;
|
slouken@2247
|
35 |
|
slouken@2248
|
36 |
__m64 values[8];
|
slouken@2247
|
37 |
for (i = len / 64; i--;) {
|
slouken@2248
|
38 |
_mm_prefetch(src, _MM_HINT_NTA);
|
slouken@2248
|
39 |
values[0] = *(__m64 *) (src + 0);
|
slouken@2248
|
40 |
values[1] = *(__m64 *) (src + 8);
|
slouken@2248
|
41 |
values[2] = *(__m64 *) (src + 16);
|
slouken@2248
|
42 |
values[3] = *(__m64 *) (src + 24);
|
slouken@2248
|
43 |
values[4] = *(__m64 *) (src + 32);
|
slouken@2248
|
44 |
values[5] = *(__m64 *) (src + 40);
|
slouken@2248
|
45 |
values[6] = *(__m64 *) (src + 48);
|
slouken@2248
|
46 |
values[7] = *(__m64 *) (src + 56);
|
slouken@2248
|
47 |
_mm_stream_pi((__m64 *) (dst + 0), values[0]);
|
slouken@2248
|
48 |
_mm_stream_pi((__m64 *) (dst + 8), values[1]);
|
slouken@2248
|
49 |
_mm_stream_pi((__m64 *) (dst + 16), values[2]);
|
slouken@2248
|
50 |
_mm_stream_pi((__m64 *) (dst + 24), values[3]);
|
slouken@2248
|
51 |
_mm_stream_pi((__m64 *) (dst + 32), values[4]);
|
slouken@2248
|
52 |
_mm_stream_pi((__m64 *) (dst + 40), values[5]);
|
slouken@2248
|
53 |
_mm_stream_pi((__m64 *) (dst + 48), values[6]);
|
slouken@2248
|
54 |
_mm_stream_pi((__m64 *) (dst + 56), values[7]);
|
slouken@2247
|
55 |
src += 64;
|
slouken@2247
|
56 |
dst += 64;
|
slouken@2247
|
57 |
}
|
slouken@2248
|
58 |
|
slouken@2247
|
59 |
if (len & 63)
|
slouken@2247
|
60 |
SDL_memcpy(dst, src, len & 63);
|
slouken@2247
|
61 |
}
|
slouken@2247
|
62 |
#endif /* __MMX__ */
|
slouken@2247
|
63 |
|
slouken@2247
|
64 |
#ifdef __SSE__
|
slouken@2247
|
65 |
static __inline__ void
|
slouken@2248
|
66 |
SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
|
slouken@2247
|
67 |
{
|
slouken@2247
|
68 |
int i;
|
slouken@2247
|
69 |
|
slouken@2248
|
70 |
__m128 values[4];
|
slouken@2247
|
71 |
for (i = len / 64; i--;) {
|
slouken@2248
|
72 |
_mm_prefetch(src, _MM_HINT_NTA);
|
slouken@2248
|
73 |
values[0] = *(__m128 *) (src + 0);
|
slouken@2248
|
74 |
values[1] = *(__m128 *) (src + 16);
|
slouken@2248
|
75 |
values[2] = *(__m128 *) (src + 32);
|
slouken@2248
|
76 |
values[3] = *(__m128 *) (src + 48);
|
slouken@2248
|
77 |
_mm_stream_ps((float *) (dst + 0), values[0]);
|
slouken@2248
|
78 |
_mm_stream_ps((float *) (dst + 16), values[1]);
|
slouken@2248
|
79 |
_mm_stream_ps((float *) (dst + 32), values[2]);
|
slouken@2248
|
80 |
_mm_stream_ps((float *) (dst + 48), values[3]);
|
slouken@2247
|
81 |
src += 64;
|
slouken@2247
|
82 |
dst += 64;
|
slouken@2247
|
83 |
}
|
slouken@2248
|
84 |
|
slouken@2247
|
85 |
if (len & 63)
|
slouken@2247
|
86 |
SDL_memcpy(dst, src, len & 63);
|
slouken@2247
|
87 |
}
|
slouken@2247
|
88 |
#endif /* __SSE__ */
|
slouken@2247
|
89 |
|
slouken@2247
|
90 |
void
|
slouken@2247
|
91 |
SDL_BlitCopy(SDL_BlitInfo * info)
|
slouken@2247
|
92 |
{
|
slouken@2247
|
93 |
Uint8 *src, *dst;
|
slouken@2247
|
94 |
int w, h;
|
slouken@2247
|
95 |
int srcskip, dstskip;
|
slouken@2247
|
96 |
|
slouken@2247
|
97 |
w = info->d_width * info->dst->BytesPerPixel;
|
slouken@2247
|
98 |
h = info->d_height;
|
slouken@2247
|
99 |
src = info->s_pixels;
|
slouken@2247
|
100 |
dst = info->d_pixels;
|
slouken@2247
|
101 |
srcskip = w + info->s_skip;
|
slouken@2247
|
102 |
dstskip = w + info->d_skip;
|
slouken@2247
|
103 |
|
slouken@2247
|
104 |
#ifdef __SSE__
|
slouken@2248
|
105 |
if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) {
|
slouken@2247
|
106 |
while (h--) {
|
slouken@2247
|
107 |
SDL_memcpySSE(dst, src, w);
|
slouken@2247
|
108 |
src += srcskip;
|
slouken@2247
|
109 |
dst += dstskip;
|
slouken@2247
|
110 |
}
|
slouken@2247
|
111 |
return;
|
slouken@2247
|
112 |
}
|
slouken@2247
|
113 |
#endif
|
slouken@2247
|
114 |
|
slouken@2247
|
115 |
#ifdef __MMX__
|
slouken@2248
|
116 |
if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) {
|
slouken@2247
|
117 |
while (h--) {
|
slouken@2247
|
118 |
SDL_memcpyMMX(dst, src, w);
|
slouken@2247
|
119 |
src += srcskip;
|
slouken@2247
|
120 |
dst += dstskip;
|
slouken@2247
|
121 |
}
|
slouken@2248
|
122 |
_mm_empty();
|
slouken@2247
|
123 |
return;
|
slouken@2247
|
124 |
}
|
slouken@2247
|
125 |
#endif
|
slouken@2247
|
126 |
|
slouken@2247
|
127 |
while (h--) {
|
slouken@2247
|
128 |
SDL_memcpy(dst, src, w);
|
slouken@2247
|
129 |
src += srcskip;
|
slouken@2247
|
130 |
dst += dstskip;
|
slouken@2247
|
131 |
}
|
slouken@2247
|
132 |
}
|
slouken@2247
|
133 |
|
slouken@2247
|
134 |
void
|
slouken@2247
|
135 |
SDL_BlitCopyOverlap(SDL_BlitInfo * info)
|
slouken@2247
|
136 |
{
|
slouken@2247
|
137 |
Uint8 *src, *dst;
|
slouken@2247
|
138 |
int w, h;
|
slouken@2247
|
139 |
int skip;
|
slouken@2247
|
140 |
|
slouken@2247
|
141 |
w = info->d_width * info->dst->BytesPerPixel;
|
slouken@2247
|
142 |
h = info->d_height;
|
slouken@2247
|
143 |
src = info->s_pixels;
|
slouken@2247
|
144 |
dst = info->d_pixels;
|
slouken@2247
|
145 |
skip = w + info->s_skip;
|
slouken@2248
|
146 |
if ((dst < src) || (dst >= (src + h * skip))) {
|
slouken@2247
|
147 |
SDL_BlitCopy(info);
|
slouken@2247
|
148 |
} else {
|
slouken@2247
|
149 |
src += ((h - 1) * skip);
|
slouken@2247
|
150 |
dst += ((h - 1) * skip);
|
slouken@2247
|
151 |
while (h--) {
|
slouken@2247
|
152 |
SDL_revcpy(dst, src, w);
|
slouken@2247
|
153 |
src -= skip;
|
slouken@2247
|
154 |
dst -= skip;
|
slouken@2247
|
155 |
}
|
slouken@2247
|
156 |
}
|
slouken@2247
|
157 |
}
|
slouken@2247
|
158 |
|
slouken@2247
|
159 |
/* vi: set ts=4 sw=4 expandtab: */
|