src/video/SDL_blit_copy.c
author Sam Lantinga <slouken@libsdl.org>
Tue, 08 Feb 2011 16:50:51 -0800
changeset 5229 c015d3e63631
parent 3697 f7b03b6838cb
child 5262 b530ef003506
permissions -rw-r--r--
Fixed setting the texture unit, still doesn't work.
slouken@2247
     1
/*
slouken@2247
     2
    SDL - Simple DirectMedia Layer
slouken@3697
     3
    Copyright (C) 1997-2010 Sam Lantinga
slouken@2247
     4
slouken@2247
     5
    This library is free software; you can redistribute it and/or
slouken@2247
     6
    modify it under the terms of the GNU Lesser General Public
slouken@2247
     7
    License as published by the Free Software Foundation; either
slouken@2247
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@2247
     9
slouken@2247
    10
    This library is distributed in the hope that it will be useful,
slouken@2247
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@2247
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@2247
    13
    Lesser General Public License for more details.
slouken@2247
    14
slouken@2247
    15
    You should have received a copy of the GNU Lesser General Public
slouken@2247
    16
    License along with this library; if not, write to the Free Software
slouken@2247
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@2247
    18
slouken@2247
    19
    Sam Lantinga
slouken@2247
    20
    slouken@libsdl.org
slouken@2247
    21
*/
slouken@2247
    22
#include "SDL_config.h"
slouken@2247
    23
slouken@2247
    24
#include "SDL_video.h"
slouken@2247
    25
#include "SDL_blit.h"
slouken@2254
    26
#include "SDL_blit_copy.h"
slouken@2247
    27
slouken@2247
    28
slouken@2253
    29
#ifdef __SSE__
slouken@2253
    30
/* This assumes 16-byte aligned src and dst */
slouken@2253
    31
static __inline__ void
slouken@2253
    32
SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
slouken@2253
    33
{
slouken@2253
    34
    int i;
slouken@2253
    35
slouken@2253
    36
    __m128 values[4];
slouken@2253
    37
    for (i = len / 64; i--;) {
slouken@2253
    38
        _mm_prefetch(src, _MM_HINT_NTA);
slouken@2253
    39
        values[0] = *(__m128 *) (src + 0);
slouken@2253
    40
        values[1] = *(__m128 *) (src + 16);
slouken@2253
    41
        values[2] = *(__m128 *) (src + 32);
slouken@2253
    42
        values[3] = *(__m128 *) (src + 48);
slouken@2253
    43
        _mm_stream_ps((float *) (dst + 0), values[0]);
slouken@2253
    44
        _mm_stream_ps((float *) (dst + 16), values[1]);
slouken@2253
    45
        _mm_stream_ps((float *) (dst + 32), values[2]);
slouken@2253
    46
        _mm_stream_ps((float *) (dst + 48), values[3]);
slouken@2253
    47
        src += 64;
slouken@2253
    48
        dst += 64;
slouken@2253
    49
    }
slouken@2253
    50
slouken@2253
    51
    if (len & 63)
slouken@2253
    52
        SDL_memcpy(dst, src, len & 63);
slouken@2253
    53
}
slouken@2253
    54
#endif /* __SSE__ */
slouken@2253
    55
slouken@2247
    56
#ifdef __MMX__
slouken@2780
    57
#ifdef _MSC_VER
slouken@2780
    58
#pragma warning(disable:4799)
slouken@2780
    59
#endif
slouken@2253
    60
/* This assumes 8-byte aligned src and dst */
slouken@2247
    61
static __inline__ void
slouken@2248
    62
SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
slouken@2247
    63
{
slouken@2247
    64
    int i;
slouken@2247
    65
slouken@2248
    66
    __m64 values[8];
slouken@2247
    67
    for (i = len / 64; i--;) {
slouken@3577
    68
#ifdef __SSE__
slouken@2248
    69
        _mm_prefetch(src, _MM_HINT_NTA);
slouken@3577
    70
#endif
slouken@2248
    71
        values[0] = *(__m64 *) (src + 0);
slouken@2248
    72
        values[1] = *(__m64 *) (src + 8);
slouken@2248
    73
        values[2] = *(__m64 *) (src + 16);
slouken@2248
    74
        values[3] = *(__m64 *) (src + 24);
slouken@2248
    75
        values[4] = *(__m64 *) (src + 32);
slouken@2248
    76
        values[5] = *(__m64 *) (src + 40);
slouken@2248
    77
        values[6] = *(__m64 *) (src + 48);
slouken@2248
    78
        values[7] = *(__m64 *) (src + 56);
slouken@2248
    79
        _mm_stream_pi((__m64 *) (dst + 0), values[0]);
slouken@2248
    80
        _mm_stream_pi((__m64 *) (dst + 8), values[1]);
slouken@2248
    81
        _mm_stream_pi((__m64 *) (dst + 16), values[2]);
slouken@2248
    82
        _mm_stream_pi((__m64 *) (dst + 24), values[3]);
slouken@2248
    83
        _mm_stream_pi((__m64 *) (dst + 32), values[4]);
slouken@2248
    84
        _mm_stream_pi((__m64 *) (dst + 40), values[5]);
slouken@2248
    85
        _mm_stream_pi((__m64 *) (dst + 48), values[6]);
slouken@2248
    86
        _mm_stream_pi((__m64 *) (dst + 56), values[7]);
slouken@2247
    87
        src += 64;
slouken@2247
    88
        dst += 64;
slouken@2247
    89
    }
slouken@2248
    90
slouken@2247
    91
    if (len & 63)
slouken@2247
    92
        SDL_memcpy(dst, src, len & 63);
slouken@2247
    93
}
slouken@2247
    94
#endif /* __MMX__ */
slouken@2247
    95
slouken@2247
    96
void
slouken@2247
    97
SDL_BlitCopy(SDL_BlitInfo * info)
slouken@2247
    98
{
slouken@2247
    99
    Uint8 *src, *dst;
slouken@2247
   100
    int w, h;
slouken@2247
   101
    int srcskip, dstskip;
slouken@2247
   102
slouken@2267
   103
    w = info->dst_w * info->dst_fmt->BytesPerPixel;
slouken@2262
   104
    h = info->dst_h;
slouken@2262
   105
    src = info->src;
slouken@2262
   106
    dst = info->dst;
slouken@2267
   107
    srcskip = info->src_pitch;
slouken@2267
   108
    dstskip = info->dst_pitch;
slouken@2247
   109
slouken@2247
   110
#ifdef __SSE__
slouken@2826
   111
    if (SDL_HasSSE() &&
slouken@2826
   112
        !((uintptr_t) src & 15) && !(srcskip & 15) &&
slouken@2826
   113
        !((uintptr_t) dst & 15) && !(dstskip & 15)) {
slouken@2247
   114
        while (h--) {
slouken@2247
   115
            SDL_memcpySSE(dst, src, w);
slouken@2247
   116
            src += srcskip;
slouken@2247
   117
            dst += dstskip;
slouken@2247
   118
        }
slouken@2247
   119
        return;
slouken@2247
   120
    }
slouken@2247
   121
#endif
slouken@2247
   122
slouken@2247
   123
#ifdef __MMX__
slouken@2826
   124
    if (SDL_HasMMX() &&
slouken@2826
   125
        !((uintptr_t) src & 7) && !(srcskip & 7) &&
slouken@2826
   126
        !((uintptr_t) dst & 7) && !(dstskip & 7)) {
slouken@2247
   127
        while (h--) {
slouken@2247
   128
            SDL_memcpyMMX(dst, src, w);
slouken@2247
   129
            src += srcskip;
slouken@2247
   130
            dst += dstskip;
slouken@2247
   131
        }
slouken@2248
   132
        _mm_empty();
slouken@2247
   133
        return;
slouken@2247
   134
    }
slouken@2247
   135
#endif
slouken@2247
   136
slouken@2247
   137
    while (h--) {
slouken@2247
   138
        SDL_memcpy(dst, src, w);
slouken@2247
   139
        src += srcskip;
slouken@2247
   140
        dst += dstskip;
slouken@2247
   141
    }
slouken@2247
   142
}
slouken@2247
   143
slouken@2247
   144
void
slouken@2247
   145
SDL_BlitCopyOverlap(SDL_BlitInfo * info)
slouken@2247
   146
{
slouken@2247
   147
    Uint8 *src, *dst;
slouken@2247
   148
    int w, h;
slouken@2247
   149
    int skip;
slouken@2247
   150
slouken@2267
   151
    w = info->dst_w * info->dst_fmt->BytesPerPixel;
slouken@2262
   152
    h = info->dst_h;
slouken@2262
   153
    src = info->src;
slouken@2262
   154
    dst = info->dst;
slouken@2267
   155
    skip = info->src_pitch;
slouken@2248
   156
    if ((dst < src) || (dst >= (src + h * skip))) {
slouken@2247
   157
        SDL_BlitCopy(info);
slouken@2247
   158
    } else {
slouken@2247
   159
        src += ((h - 1) * skip);
slouken@2247
   160
        dst += ((h - 1) * skip);
slouken@2247
   161
        while (h--) {
slouken@2247
   162
            SDL_revcpy(dst, src, w);
slouken@2247
   163
            src -= skip;
slouken@2247
   164
            dst -= skip;
slouken@2247
   165
        }
slouken@2247
   166
    }
slouken@2247
   167
}
slouken@2247
   168
slouken@2247
   169
/* vi: set ts=4 sw=4 expandtab: */