src/video/SDL_blit_copy.c
author Sam Lantinga
Thu, 16 Aug 2007 06:20:51 +0000
changeset 2250 e1d228456537
parent 2249 5a58b57b6724
child 2252 b80e3d57941f
permissions -rw-r--r--
Fixed a few compiler warnings.
Added SDL_blit_copy.c to the Visual C++ project

The SSE and MMX intrinsics don't compile on Visual Studio yet...
slouken@2247
     1
/*
slouken@2247
     2
    SDL - Simple DirectMedia Layer
slouken@2247
     3
    Copyright (C) 1997-2006 Sam Lantinga
slouken@2247
     4
slouken@2247
     5
    This library is free software; you can redistribute it and/or
slouken@2247
     6
    modify it under the terms of the GNU Lesser General Public
slouken@2247
     7
    License as published by the Free Software Foundation; either
slouken@2247
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@2247
     9
slouken@2247
    10
    This library is distributed in the hope that it will be useful,
slouken@2247
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@2247
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@2247
    13
    Lesser General Public License for more details.
slouken@2247
    14
slouken@2247
    15
    You should have received a copy of the GNU Lesser General Public
slouken@2247
    16
    License along with this library; if not, write to the Free Software
slouken@2247
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@2247
    18
slouken@2247
    19
    Sam Lantinga
slouken@2247
    20
    slouken@libsdl.org
slouken@2247
    21
*/
slouken@2247
    22
#include "SDL_config.h"
slouken@2247
    23
slouken@2250
    24
#include "SDL_cpuinfo.h"
slouken@2247
    25
#include "SDL_video.h"
slouken@2247
    26
#include "SDL_blit.h"
slouken@2249
    27
#include "SDL_blit_copy.h"
slouken@2247
    28
slouken@2247
    29
slouken@2247
    30
#ifdef __MMX__
slouken@2247
    31
static __inline__ void
slouken@2248
    32
SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
slouken@2247
    33
{
slouken@2247
    34
    int i;
slouken@2247
    35
slouken@2248
    36
    __m64 values[8];
slouken@2247
    37
    for (i = len / 64; i--;) {
slouken@2248
    38
        _mm_prefetch(src, _MM_HINT_NTA);
slouken@2248
    39
        values[0] = *(__m64 *) (src + 0);
slouken@2248
    40
        values[1] = *(__m64 *) (src + 8);
slouken@2248
    41
        values[2] = *(__m64 *) (src + 16);
slouken@2248
    42
        values[3] = *(__m64 *) (src + 24);
slouken@2248
    43
        values[4] = *(__m64 *) (src + 32);
slouken@2248
    44
        values[5] = *(__m64 *) (src + 40);
slouken@2248
    45
        values[6] = *(__m64 *) (src + 48);
slouken@2248
    46
        values[7] = *(__m64 *) (src + 56);
slouken@2248
    47
        _mm_stream_pi((__m64 *) (dst + 0), values[0]);
slouken@2248
    48
        _mm_stream_pi((__m64 *) (dst + 8), values[1]);
slouken@2248
    49
        _mm_stream_pi((__m64 *) (dst + 16), values[2]);
slouken@2248
    50
        _mm_stream_pi((__m64 *) (dst + 24), values[3]);
slouken@2248
    51
        _mm_stream_pi((__m64 *) (dst + 32), values[4]);
slouken@2248
    52
        _mm_stream_pi((__m64 *) (dst + 40), values[5]);
slouken@2248
    53
        _mm_stream_pi((__m64 *) (dst + 48), values[6]);
slouken@2248
    54
        _mm_stream_pi((__m64 *) (dst + 56), values[7]);
slouken@2247
    55
        src += 64;
slouken@2247
    56
        dst += 64;
slouken@2247
    57
    }
slouken@2248
    58
slouken@2247
    59
    if (len & 63)
slouken@2247
    60
        SDL_memcpy(dst, src, len & 63);
slouken@2247
    61
}
slouken@2247
    62
#endif /* __MMX__ */
slouken@2247
    63
slouken@2247
    64
#ifdef __SSE__
slouken@2247
    65
static __inline__ void
slouken@2248
    66
SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
slouken@2247
    67
{
slouken@2247
    68
    int i;
slouken@2247
    69
slouken@2248
    70
    __m128 values[4];
slouken@2247
    71
    for (i = len / 64; i--;) {
slouken@2248
    72
        _mm_prefetch(src, _MM_HINT_NTA);
slouken@2248
    73
        values[0] = *(__m128 *) (src + 0);
slouken@2248
    74
        values[1] = *(__m128 *) (src + 16);
slouken@2248
    75
        values[2] = *(__m128 *) (src + 32);
slouken@2248
    76
        values[3] = *(__m128 *) (src + 48);
slouken@2248
    77
        _mm_stream_ps((float *) (dst + 0), values[0]);
slouken@2248
    78
        _mm_stream_ps((float *) (dst + 16), values[1]);
slouken@2248
    79
        _mm_stream_ps((float *) (dst + 32), values[2]);
slouken@2248
    80
        _mm_stream_ps((float *) (dst + 48), values[3]);
slouken@2247
    81
        src += 64;
slouken@2247
    82
        dst += 64;
slouken@2247
    83
    }
slouken@2248
    84
slouken@2247
    85
    if (len & 63)
slouken@2247
    86
        SDL_memcpy(dst, src, len & 63);
slouken@2247
    87
}
slouken@2247
    88
#endif /* __SSE__ */
slouken@2247
    89
slouken@2247
    90
void
slouken@2247
    91
SDL_BlitCopy(SDL_BlitInfo * info)
slouken@2247
    92
{
slouken@2247
    93
    Uint8 *src, *dst;
slouken@2247
    94
    int w, h;
slouken@2247
    95
    int srcskip, dstskip;
slouken@2247
    96
slouken@2247
    97
    w = info->d_width * info->dst->BytesPerPixel;
slouken@2247
    98
    h = info->d_height;
slouken@2247
    99
    src = info->s_pixels;
slouken@2247
   100
    dst = info->d_pixels;
slouken@2247
   101
    srcskip = w + info->s_skip;
slouken@2247
   102
    dstskip = w + info->d_skip;
slouken@2247
   103
slouken@2247
   104
#ifdef __SSE__
slouken@2248
   105
    if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) {
slouken@2247
   106
        while (h--) {
slouken@2247
   107
            SDL_memcpySSE(dst, src, w);
slouken@2247
   108
            src += srcskip;
slouken@2247
   109
            dst += dstskip;
slouken@2247
   110
        }
slouken@2247
   111
        return;
slouken@2247
   112
    }
slouken@2247
   113
#endif
slouken@2247
   114
slouken@2247
   115
#ifdef __MMX__
slouken@2248
   116
    if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) {
slouken@2247
   117
        while (h--) {
slouken@2247
   118
            SDL_memcpyMMX(dst, src, w);
slouken@2247
   119
            src += srcskip;
slouken@2247
   120
            dst += dstskip;
slouken@2247
   121
        }
slouken@2248
   122
        _mm_empty();
slouken@2247
   123
        return;
slouken@2247
   124
    }
slouken@2247
   125
#endif
slouken@2247
   126
slouken@2247
   127
    while (h--) {
slouken@2247
   128
        SDL_memcpy(dst, src, w);
slouken@2247
   129
        src += srcskip;
slouken@2247
   130
        dst += dstskip;
slouken@2247
   131
    }
slouken@2247
   132
}
slouken@2247
   133
slouken@2247
   134
void
slouken@2247
   135
SDL_BlitCopyOverlap(SDL_BlitInfo * info)
slouken@2247
   136
{
slouken@2247
   137
    Uint8 *src, *dst;
slouken@2247
   138
    int w, h;
slouken@2247
   139
    int skip;
slouken@2247
   140
slouken@2247
   141
    w = info->d_width * info->dst->BytesPerPixel;
slouken@2247
   142
    h = info->d_height;
slouken@2247
   143
    src = info->s_pixels;
slouken@2247
   144
    dst = info->d_pixels;
slouken@2247
   145
    skip = w + info->s_skip;
slouken@2248
   146
    if ((dst < src) || (dst >= (src + h * skip))) {
slouken@2247
   147
        SDL_BlitCopy(info);
slouken@2247
   148
    } else {
slouken@2247
   149
        src += ((h - 1) * skip);
slouken@2247
   150
        dst += ((h - 1) * skip);
slouken@2247
   151
        while (h--) {
slouken@2247
   152
            SDL_revcpy(dst, src, w);
slouken@2247
   153
            src -= skip;
slouken@2247
   154
            dst -= skip;
slouken@2247
   155
        }
slouken@2247
   156
    }
slouken@2247
   157
}
slouken@2247
   158
slouken@2247
   159
/* vi: set ts=4 sw=4 expandtab: */