src/video/SDL_copy.c
changeset 2253 6d99edd791bf
parent 2252 b80e3d57941f
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/video/SDL_copy.c	Thu Aug 16 21:43:19 2007 +0000
     1.3 @@ -0,0 +1,160 @@
     1.4 +/*
     1.5 +    SDL - Simple DirectMedia Layer
     1.6 +    Copyright (C) 1997-2006 Sam Lantinga
     1.7 +
     1.8 +    This library is free software; you can redistribute it and/or
     1.9 +    modify it under the terms of the GNU Lesser General Public
    1.10 +    License as published by the Free Software Foundation; either
    1.11 +    version 2.1 of the License, or (at your option) any later version.
    1.12 +
    1.13 +    This library is distributed in the hope that it will be useful,
    1.14 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.15 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.16 +    Lesser General Public License for more details.
    1.17 +
    1.18 +    You should have received a copy of the GNU Lesser General Public
    1.19 +    License along with this library; if not, write to the Free Software
    1.20 +    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    1.21 +
    1.22 +    Sam Lantinga
    1.23 +    slouken@libsdl.org
    1.24 +*/
    1.25 +#include "SDL_config.h"
    1.26 +
    1.27 +#include "SDL_video.h"
    1.28 +#include "SDL_blit.h"
    1.29 +#include "SDL_copy.h"
    1.30 +
    1.31 +
    1.32 +#ifdef __SSE__
    1.33 +/* This assumes 16-byte aligned src and dst */
    1.34 +static __inline__ void
    1.35 +SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
    1.36 +{
    1.37 +    int i;
    1.38 +
    1.39 +    __m128 values[4];
    1.40 +    for (i = len / 64; i--;) {
    1.41 +        _mm_prefetch(src, _MM_HINT_NTA);
    1.42 +        values[0] = *(__m128 *) (src + 0);
    1.43 +        values[1] = *(__m128 *) (src + 16);
    1.44 +        values[2] = *(__m128 *) (src + 32);
    1.45 +        values[3] = *(__m128 *) (src + 48);
    1.46 +        _mm_stream_ps((float *) (dst + 0), values[0]);
    1.47 +        _mm_stream_ps((float *) (dst + 16), values[1]);
    1.48 +        _mm_stream_ps((float *) (dst + 32), values[2]);
    1.49 +        _mm_stream_ps((float *) (dst + 48), values[3]);
    1.50 +        src += 64;
    1.51 +        dst += 64;
    1.52 +    }
    1.53 +
    1.54 +    if (len & 63)
    1.55 +        SDL_memcpy(dst, src, len & 63);
    1.56 +}
    1.57 +#endif /* __SSE__ */
    1.58 +
    1.59 +#ifdef __MMX__
    1.60 +/* This assumes 8-byte aligned src and dst */
    1.61 +static __inline__ void
    1.62 +SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
    1.63 +{
    1.64 +    int i;
    1.65 +
    1.66 +    __m64 values[8];
    1.67 +    for (i = len / 64; i--;) {
    1.68 +        _mm_prefetch(src, _MM_HINT_NTA);
    1.69 +        values[0] = *(__m64 *) (src + 0);
    1.70 +        values[1] = *(__m64 *) (src + 8);
    1.71 +        values[2] = *(__m64 *) (src + 16);
    1.72 +        values[3] = *(__m64 *) (src + 24);
    1.73 +        values[4] = *(__m64 *) (src + 32);
    1.74 +        values[5] = *(__m64 *) (src + 40);
    1.75 +        values[6] = *(__m64 *) (src + 48);
    1.76 +        values[7] = *(__m64 *) (src + 56);
    1.77 +        _mm_stream_pi((__m64 *) (dst + 0), values[0]);
    1.78 +        _mm_stream_pi((__m64 *) (dst + 8), values[1]);
    1.79 +        _mm_stream_pi((__m64 *) (dst + 16), values[2]);
    1.80 +        _mm_stream_pi((__m64 *) (dst + 24), values[3]);
    1.81 +        _mm_stream_pi((__m64 *) (dst + 32), values[4]);
    1.82 +        _mm_stream_pi((__m64 *) (dst + 40), values[5]);
    1.83 +        _mm_stream_pi((__m64 *) (dst + 48), values[6]);
    1.84 +        _mm_stream_pi((__m64 *) (dst + 56), values[7]);
    1.85 +        src += 64;
    1.86 +        dst += 64;
    1.87 +    }
    1.88 +
    1.89 +    if (len & 63)
    1.90 +        SDL_memcpy(dst, src, len & 63);
    1.91 +}
    1.92 +#endif /* __MMX__ */
    1.93 +
    1.94 +void
    1.95 +SDL_BlitCopy(SDL_BlitInfo * info)
    1.96 +{
    1.97 +    Uint8 *src, *dst;
    1.98 +    int w, h;
    1.99 +    int srcskip, dstskip;
   1.100 +
   1.101 +    w = info->d_width * info->dst->BytesPerPixel;
   1.102 +    h = info->d_height;
   1.103 +    src = info->s_pixels;
   1.104 +    dst = info->d_pixels;
   1.105 +    srcskip = w + info->s_skip;
   1.106 +    dstskip = w + info->d_skip;
   1.107 +
   1.108 +#ifdef __SSE__
   1.109 +    if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) {
   1.110 +        while (h--) {
   1.111 +            SDL_memcpySSE(dst, src, w);
   1.112 +            src += srcskip;
   1.113 +            dst += dstskip;
   1.114 +        }
   1.115 +        return;
   1.116 +    }
   1.117 +#endif
   1.118 +
   1.119 +#ifdef __MMX__
   1.120 +    if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) {
   1.121 +        while (h--) {
   1.122 +            SDL_memcpyMMX(dst, src, w);
   1.123 +            src += srcskip;
   1.124 +            dst += dstskip;
   1.125 +        }
   1.126 +        _mm_empty();
   1.127 +        return;
   1.128 +    }
   1.129 +#endif
   1.130 +
   1.131 +    while (h--) {
   1.132 +        SDL_memcpy(dst, src, w);
   1.133 +        src += srcskip;
   1.134 +        dst += dstskip;
   1.135 +    }
   1.136 +}
   1.137 +
   1.138 +void
   1.139 +SDL_BlitCopyOverlap(SDL_BlitInfo * info)
   1.140 +{
   1.141 +    Uint8 *src, *dst;
   1.142 +    int w, h;
   1.143 +    int skip;
   1.144 +
   1.145 +    w = info->d_width * info->dst->BytesPerPixel;
   1.146 +    h = info->d_height;
   1.147 +    src = info->s_pixels;
   1.148 +    dst = info->d_pixels;
   1.149 +    skip = w + info->s_skip;
   1.150 +    if ((dst < src) || (dst >= (src + h * skip))) {
   1.151 +        SDL_BlitCopy(info);
   1.152 +    } else {
   1.153 +        src += ((h - 1) * skip);
   1.154 +        dst += ((h - 1) * skip);
   1.155 +        while (h--) {
   1.156 +            SDL_revcpy(dst, src, w);
   1.157 +            src -= skip;
   1.158 +            dst -= skip;
   1.159 +        }
   1.160 +    }
   1.161 +}
   1.162 +
   1.163 +/* vi: set ts=4 sw=4 expandtab: */