src/video/SDL_blit_copy.c
author Sam Lantinga <slouken@libsdl.org>
Tue, 08 Feb 2011 16:50:51 -0800
changeset 5229 c015d3e63631
parent 3697 f7b03b6838cb
child 5262 b530ef003506
permissions -rw-r--r--
Fixed setting the texture unit, still doesn't work.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2010 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #include "SDL_video.h"
    25 #include "SDL_blit.h"
    26 #include "SDL_blit_copy.h"
    27 
    28 
    29 #ifdef __SSE__
    30 /* This assumes 16-byte aligned src and dst */
    31 static __inline__ void
    32 SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
    33 {
    34     int i;
    35 
    36     __m128 values[4];
    37     for (i = len / 64; i--;) {
    38         _mm_prefetch(src, _MM_HINT_NTA);
    39         values[0] = *(__m128 *) (src + 0);
    40         values[1] = *(__m128 *) (src + 16);
    41         values[2] = *(__m128 *) (src + 32);
    42         values[3] = *(__m128 *) (src + 48);
    43         _mm_stream_ps((float *) (dst + 0), values[0]);
    44         _mm_stream_ps((float *) (dst + 16), values[1]);
    45         _mm_stream_ps((float *) (dst + 32), values[2]);
    46         _mm_stream_ps((float *) (dst + 48), values[3]);
    47         src += 64;
    48         dst += 64;
    49     }
    50 
    51     if (len & 63)
    52         SDL_memcpy(dst, src, len & 63);
    53 }
    54 #endif /* __SSE__ */
    55 
    56 #ifdef __MMX__
    57 #ifdef _MSC_VER
    58 #pragma warning(disable:4799)
    59 #endif
    60 /* This assumes 8-byte aligned src and dst */
    61 static __inline__ void
    62 SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
    63 {
    64     int i;
    65 
    66     __m64 values[8];
    67     for (i = len / 64; i--;) {
    68 #ifdef __SSE__
    69         _mm_prefetch(src, _MM_HINT_NTA);
    70 #endif
    71         values[0] = *(__m64 *) (src + 0);
    72         values[1] = *(__m64 *) (src + 8);
    73         values[2] = *(__m64 *) (src + 16);
    74         values[3] = *(__m64 *) (src + 24);
    75         values[4] = *(__m64 *) (src + 32);
    76         values[5] = *(__m64 *) (src + 40);
    77         values[6] = *(__m64 *) (src + 48);
    78         values[7] = *(__m64 *) (src + 56);
    79         _mm_stream_pi((__m64 *) (dst + 0), values[0]);
    80         _mm_stream_pi((__m64 *) (dst + 8), values[1]);
    81         _mm_stream_pi((__m64 *) (dst + 16), values[2]);
    82         _mm_stream_pi((__m64 *) (dst + 24), values[3]);
    83         _mm_stream_pi((__m64 *) (dst + 32), values[4]);
    84         _mm_stream_pi((__m64 *) (dst + 40), values[5]);
    85         _mm_stream_pi((__m64 *) (dst + 48), values[6]);
    86         _mm_stream_pi((__m64 *) (dst + 56), values[7]);
    87         src += 64;
    88         dst += 64;
    89     }
    90 
    91     if (len & 63)
    92         SDL_memcpy(dst, src, len & 63);
    93 }
    94 #endif /* __MMX__ */
    95 
    96 void
    97 SDL_BlitCopy(SDL_BlitInfo * info)
    98 {
    99     Uint8 *src, *dst;
   100     int w, h;
   101     int srcskip, dstskip;
   102 
   103     w = info->dst_w * info->dst_fmt->BytesPerPixel;
   104     h = info->dst_h;
   105     src = info->src;
   106     dst = info->dst;
   107     srcskip = info->src_pitch;
   108     dstskip = info->dst_pitch;
   109 
   110 #ifdef __SSE__
   111     if (SDL_HasSSE() &&
   112         !((uintptr_t) src & 15) && !(srcskip & 15) &&
   113         !((uintptr_t) dst & 15) && !(dstskip & 15)) {
   114         while (h--) {
   115             SDL_memcpySSE(dst, src, w);
   116             src += srcskip;
   117             dst += dstskip;
   118         }
   119         return;
   120     }
   121 #endif
   122 
   123 #ifdef __MMX__
   124     if (SDL_HasMMX() &&
   125         !((uintptr_t) src & 7) && !(srcskip & 7) &&
   126         !((uintptr_t) dst & 7) && !(dstskip & 7)) {
   127         while (h--) {
   128             SDL_memcpyMMX(dst, src, w);
   129             src += srcskip;
   130             dst += dstskip;
   131         }
   132         _mm_empty();
   133         return;
   134     }
   135 #endif
   136 
   137     while (h--) {
   138         SDL_memcpy(dst, src, w);
   139         src += srcskip;
   140         dst += dstskip;
   141     }
   142 }
   143 
   144 void
   145 SDL_BlitCopyOverlap(SDL_BlitInfo * info)
   146 {
   147     Uint8 *src, *dst;
   148     int w, h;
   149     int skip;
   150 
   151     w = info->dst_w * info->dst_fmt->BytesPerPixel;
   152     h = info->dst_h;
   153     src = info->src;
   154     dst = info->dst;
   155     skip = info->src_pitch;
   156     if ((dst < src) || (dst >= (src + h * skip))) {
   157         SDL_BlitCopy(info);
   158     } else {
   159         src += ((h - 1) * skip);
   160         dst += ((h - 1) * skip);
   161         while (h--) {
   162             SDL_revcpy(dst, src, w);
   163             src -= skip;
   164             dst -= skip;
   165         }
   166     }
   167 }
   168 
   169 /* vi: set ts=4 sw=4 expandtab: */