src/video/SDL_blit_copy.c
author Sam Lantinga
Sat, 19 Sep 2009 13:29:40 +0000
changeset 3280 00cace2d9080
parent 2859 99210400e8b9
child 3577 72024425b437
permissions -rw-r--r--
Merged a cleaned up version of Jiang's code changes from Google Summer of Code 2009
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2009 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #include "SDL_video.h"
    25 #include "SDL_blit.h"
    26 #include "SDL_blit_copy.h"
    27 
    28 
    29 #ifdef __SSE__
    30 /* This assumes 16-byte aligned src and dst */
    31 static __inline__ void
    32 SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
    33 {
    34     int i;
    35 
    36     __m128 values[4];
    37     for (i = len / 64; i--;) {
    38         _mm_prefetch(src, _MM_HINT_NTA);
    39         values[0] = *(__m128 *) (src + 0);
    40         values[1] = *(__m128 *) (src + 16);
    41         values[2] = *(__m128 *) (src + 32);
    42         values[3] = *(__m128 *) (src + 48);
    43         _mm_stream_ps((float *) (dst + 0), values[0]);
    44         _mm_stream_ps((float *) (dst + 16), values[1]);
    45         _mm_stream_ps((float *) (dst + 32), values[2]);
    46         _mm_stream_ps((float *) (dst + 48), values[3]);
    47         src += 64;
    48         dst += 64;
    49     }
    50 
    51     if (len & 63)
    52         SDL_memcpy(dst, src, len & 63);
    53 }
    54 #endif /* __SSE__ */
    55 
    56 #ifdef __MMX__
    57 #ifdef _MSC_VER
    58 #pragma warning(disable:4799)
    59 #endif
    60 /* This assumes 8-byte aligned src and dst */
    61 static __inline__ void
    62 SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
    63 {
    64     int i;
    65 
    66     __m64 values[8];
    67     for (i = len / 64; i--;) {
    68         _mm_prefetch(src, _MM_HINT_NTA);
    69         values[0] = *(__m64 *) (src + 0);
    70         values[1] = *(__m64 *) (src + 8);
    71         values[2] = *(__m64 *) (src + 16);
    72         values[3] = *(__m64 *) (src + 24);
    73         values[4] = *(__m64 *) (src + 32);
    74         values[5] = *(__m64 *) (src + 40);
    75         values[6] = *(__m64 *) (src + 48);
    76         values[7] = *(__m64 *) (src + 56);
    77         _mm_stream_pi((__m64 *) (dst + 0), values[0]);
    78         _mm_stream_pi((__m64 *) (dst + 8), values[1]);
    79         _mm_stream_pi((__m64 *) (dst + 16), values[2]);
    80         _mm_stream_pi((__m64 *) (dst + 24), values[3]);
    81         _mm_stream_pi((__m64 *) (dst + 32), values[4]);
    82         _mm_stream_pi((__m64 *) (dst + 40), values[5]);
    83         _mm_stream_pi((__m64 *) (dst + 48), values[6]);
    84         _mm_stream_pi((__m64 *) (dst + 56), values[7]);
    85         src += 64;
    86         dst += 64;
    87     }
    88 
    89     if (len & 63)
    90         SDL_memcpy(dst, src, len & 63);
    91 }
    92 #endif /* __MMX__ */
    93 
    94 void
    95 SDL_BlitCopy(SDL_BlitInfo * info)
    96 {
    97     Uint8 *src, *dst;
    98     int w, h;
    99     int srcskip, dstskip;
   100 
   101     w = info->dst_w * info->dst_fmt->BytesPerPixel;
   102     h = info->dst_h;
   103     src = info->src;
   104     dst = info->dst;
   105     srcskip = info->src_pitch;
   106     dstskip = info->dst_pitch;
   107 
   108 #ifdef __SSE__
   109     if (SDL_HasSSE() &&
   110         !((uintptr_t) src & 15) && !(srcskip & 15) &&
   111         !((uintptr_t) dst & 15) && !(dstskip & 15)) {
   112         while (h--) {
   113             SDL_memcpySSE(dst, src, w);
   114             src += srcskip;
   115             dst += dstskip;
   116         }
   117         return;
   118     }
   119 #endif
   120 
   121 #ifdef __MMX__
   122     if (SDL_HasMMX() &&
   123         !((uintptr_t) src & 7) && !(srcskip & 7) &&
   124         !((uintptr_t) dst & 7) && !(dstskip & 7)) {
   125         while (h--) {
   126             SDL_memcpyMMX(dst, src, w);
   127             src += srcskip;
   128             dst += dstskip;
   129         }
   130         _mm_empty();
   131         return;
   132     }
   133 #endif
   134 
   135     while (h--) {
   136         SDL_memcpy(dst, src, w);
   137         src += srcskip;
   138         dst += dstskip;
   139     }
   140 }
   141 
   142 void
   143 SDL_BlitCopyOverlap(SDL_BlitInfo * info)
   144 {
   145     Uint8 *src, *dst;
   146     int w, h;
   147     int skip;
   148 
   149     w = info->dst_w * info->dst_fmt->BytesPerPixel;
   150     h = info->dst_h;
   151     src = info->src;
   152     dst = info->dst;
   153     skip = info->src_pitch;
   154     if ((dst < src) || (dst >= (src + h * skip))) {
   155         SDL_BlitCopy(info);
   156     } else {
   157         src += ((h - 1) * skip);
   158         dst += ((h - 1) * skip);
   159         while (h--) {
   160             SDL_revcpy(dst, src, w);
   161             src -= skip;
   162             dst -= skip;
   163         }
   164     }
   165 }
   166 
   167 /* vi: set ts=4 sw=4 expandtab: */