src/video/SDL_copy.c
author Sam Lantinga <slouken@libsdl.org>
Thu, 16 Aug 2007 21:43:19 +0000
changeset 2253 6d99edd791bf
parent 2252 src/video/SDL_blit_copy.c@b80e3d57941f
permissions -rw-r--r--
Added notes on the next steps for SDL 1.3
Moved fill and copy routines to their own files.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #include "SDL_video.h"
    25 #include "SDL_blit.h"
    26 #include "SDL_copy.h"
    27 
    28 
    29 #ifdef __SSE__
    30 /* This assumes 16-byte aligned src and dst */
    31 static __inline__ void
    32 SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
    33 {
    34     int i;
    35 
    36     __m128 values[4];
    37     for (i = len / 64; i--;) {
    38         _mm_prefetch(src, _MM_HINT_NTA);
    39         values[0] = *(__m128 *) (src + 0);
    40         values[1] = *(__m128 *) (src + 16);
    41         values[2] = *(__m128 *) (src + 32);
    42         values[3] = *(__m128 *) (src + 48);
    43         _mm_stream_ps((float *) (dst + 0), values[0]);
    44         _mm_stream_ps((float *) (dst + 16), values[1]);
    45         _mm_stream_ps((float *) (dst + 32), values[2]);
    46         _mm_stream_ps((float *) (dst + 48), values[3]);
    47         src += 64;
    48         dst += 64;
    49     }
    50 
    51     if (len & 63)
    52         SDL_memcpy(dst, src, len & 63);
    53 }
    54 #endif /* __SSE__ */
    55 
    56 #ifdef __MMX__
    57 /* This assumes 8-byte aligned src and dst */
    58 static __inline__ void
    59 SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
    60 {
    61     int i;
    62 
    63     __m64 values[8];
    64     for (i = len / 64; i--;) {
    65         _mm_prefetch(src, _MM_HINT_NTA);
    66         values[0] = *(__m64 *) (src + 0);
    67         values[1] = *(__m64 *) (src + 8);
    68         values[2] = *(__m64 *) (src + 16);
    69         values[3] = *(__m64 *) (src + 24);
    70         values[4] = *(__m64 *) (src + 32);
    71         values[5] = *(__m64 *) (src + 40);
    72         values[6] = *(__m64 *) (src + 48);
    73         values[7] = *(__m64 *) (src + 56);
    74         _mm_stream_pi((__m64 *) (dst + 0), values[0]);
    75         _mm_stream_pi((__m64 *) (dst + 8), values[1]);
    76         _mm_stream_pi((__m64 *) (dst + 16), values[2]);
    77         _mm_stream_pi((__m64 *) (dst + 24), values[3]);
    78         _mm_stream_pi((__m64 *) (dst + 32), values[4]);
    79         _mm_stream_pi((__m64 *) (dst + 40), values[5]);
    80         _mm_stream_pi((__m64 *) (dst + 48), values[6]);
    81         _mm_stream_pi((__m64 *) (dst + 56), values[7]);
    82         src += 64;
    83         dst += 64;
    84     }
    85 
    86     if (len & 63)
    87         SDL_memcpy(dst, src, len & 63);
    88 }
    89 #endif /* __MMX__ */
    90 
    91 void
    92 SDL_BlitCopy(SDL_BlitInfo * info)
    93 {
    94     Uint8 *src, *dst;
    95     int w, h;
    96     int srcskip, dstskip;
    97 
    98     w = info->d_width * info->dst->BytesPerPixel;
    99     h = info->d_height;
   100     src = info->s_pixels;
   101     dst = info->d_pixels;
   102     srcskip = w + info->s_skip;
   103     dstskip = w + info->d_skip;
   104 
   105 #ifdef __SSE__
   106     if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) {
   107         while (h--) {
   108             SDL_memcpySSE(dst, src, w);
   109             src += srcskip;
   110             dst += dstskip;
   111         }
   112         return;
   113     }
   114 #endif
   115 
   116 #ifdef __MMX__
   117     if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) {
   118         while (h--) {
   119             SDL_memcpyMMX(dst, src, w);
   120             src += srcskip;
   121             dst += dstskip;
   122         }
   123         _mm_empty();
   124         return;
   125     }
   126 #endif
   127 
   128     while (h--) {
   129         SDL_memcpy(dst, src, w);
   130         src += srcskip;
   131         dst += dstskip;
   132     }
   133 }
   134 
   135 void
   136 SDL_BlitCopyOverlap(SDL_BlitInfo * info)
   137 {
   138     Uint8 *src, *dst;
   139     int w, h;
   140     int skip;
   141 
   142     w = info->d_width * info->dst->BytesPerPixel;
   143     h = info->d_height;
   144     src = info->s_pixels;
   145     dst = info->d_pixels;
   146     skip = w + info->s_skip;
   147     if ((dst < src) || (dst >= (src + h * skip))) {
   148         SDL_BlitCopy(info);
   149     } else {
   150         src += ((h - 1) * skip);
   151         dst += ((h - 1) * skip);
   152         while (h--) {
   153             SDL_revcpy(dst, src, w);
   154             src -= skip;
   155             dst -= skip;
   156         }
   157     }
   158 }
   159 
   160 /* vi: set ts=4 sw=4 expandtab: */