src/video/SDL_blit_copy.c
author Edgar Simo <bobbens@gmail.com>
Sun, 06 Jul 2008 17:06:37 +0000
branchgsoc2008_force_feedback
changeset 2498 ab567bd667bf
parent 2267 c785543d1843
child 2780 cc15254e44b6
permissions -rw-r--r--
Fixed various mistakes in the doxygen.
slouken@2247
     1
/*
slouken@2247
     2
    SDL - Simple DirectMedia Layer
slouken@2247
     3
    Copyright (C) 1997-2006 Sam Lantinga
slouken@2247
     4
slouken@2247
     5
    This library is free software; you can redistribute it and/or
slouken@2247
     6
    modify it under the terms of the GNU Lesser General Public
slouken@2247
     7
    License as published by the Free Software Foundation; either
slouken@2247
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@2247
     9
slouken@2247
    10
    This library is distributed in the hope that it will be useful,
slouken@2247
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@2247
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@2247
    13
    Lesser General Public License for more details.
slouken@2247
    14
slouken@2247
    15
    You should have received a copy of the GNU Lesser General Public
slouken@2247
    16
    License along with this library; if not, write to the Free Software
slouken@2247
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@2247
    18
slouken@2247
    19
    Sam Lantinga
slouken@2247
    20
    slouken@libsdl.org
slouken@2247
    21
*/
slouken@2247
    22
#include "SDL_config.h"
slouken@2247
    23
slouken@2247
    24
#include "SDL_video.h"
slouken@2247
    25
#include "SDL_blit.h"
slouken@2254
    26
#include "SDL_blit_copy.h"
slouken@2247
    27
slouken@2247
    28
slouken@2253
    29
#ifdef __SSE__
slouken@2253
    30
/* This assumes 16-byte aligned src and dst */
slouken@2253
    31
static __inline__ void
slouken@2253
    32
SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
slouken@2253
    33
{
slouken@2253
    34
    int i;
slouken@2253
    35
slouken@2253
    36
    __m128 values[4];
slouken@2253
    37
    for (i = len / 64; i--;) {
slouken@2253
    38
        _mm_prefetch(src, _MM_HINT_NTA);
slouken@2253
    39
        values[0] = *(__m128 *) (src + 0);
slouken@2253
    40
        values[1] = *(__m128 *) (src + 16);
slouken@2253
    41
        values[2] = *(__m128 *) (src + 32);
slouken@2253
    42
        values[3] = *(__m128 *) (src + 48);
slouken@2253
    43
        _mm_stream_ps((float *) (dst + 0), values[0]);
slouken@2253
    44
        _mm_stream_ps((float *) (dst + 16), values[1]);
slouken@2253
    45
        _mm_stream_ps((float *) (dst + 32), values[2]);
slouken@2253
    46
        _mm_stream_ps((float *) (dst + 48), values[3]);
slouken@2253
    47
        src += 64;
slouken@2253
    48
        dst += 64;
slouken@2253
    49
    }
slouken@2253
    50
slouken@2253
    51
    if (len & 63)
slouken@2253
    52
        SDL_memcpy(dst, src, len & 63);
slouken@2253
    53
}
slouken@2253
    54
#endif /* __SSE__ */
slouken@2253
    55
slouken@2247
    56
#ifdef __MMX__
slouken@2253
    57
/* This assumes 8-byte aligned src and dst */
slouken@2247
    58
static __inline__ void
slouken@2248
    59
SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
slouken@2247
    60
{
slouken@2247
    61
    int i;
slouken@2247
    62
slouken@2248
    63
    __m64 values[8];
slouken@2247
    64
    for (i = len / 64; i--;) {
slouken@2248
    65
        _mm_prefetch(src, _MM_HINT_NTA);
slouken@2248
    66
        values[0] = *(__m64 *) (src + 0);
slouken@2248
    67
        values[1] = *(__m64 *) (src + 8);
slouken@2248
    68
        values[2] = *(__m64 *) (src + 16);
slouken@2248
    69
        values[3] = *(__m64 *) (src + 24);
slouken@2248
    70
        values[4] = *(__m64 *) (src + 32);
slouken@2248
    71
        values[5] = *(__m64 *) (src + 40);
slouken@2248
    72
        values[6] = *(__m64 *) (src + 48);
slouken@2248
    73
        values[7] = *(__m64 *) (src + 56);
slouken@2248
    74
        _mm_stream_pi((__m64 *) (dst + 0), values[0]);
slouken@2248
    75
        _mm_stream_pi((__m64 *) (dst + 8), values[1]);
slouken@2248
    76
        _mm_stream_pi((__m64 *) (dst + 16), values[2]);
slouken@2248
    77
        _mm_stream_pi((__m64 *) (dst + 24), values[3]);
slouken@2248
    78
        _mm_stream_pi((__m64 *) (dst + 32), values[4]);
slouken@2248
    79
        _mm_stream_pi((__m64 *) (dst + 40), values[5]);
slouken@2248
    80
        _mm_stream_pi((__m64 *) (dst + 48), values[6]);
slouken@2248
    81
        _mm_stream_pi((__m64 *) (dst + 56), values[7]);
slouken@2247
    82
        src += 64;
slouken@2247
    83
        dst += 64;
slouken@2247
    84
    }
slouken@2248
    85
slouken@2247
    86
    if (len & 63)
slouken@2247
    87
        SDL_memcpy(dst, src, len & 63);
slouken@2247
    88
}
slouken@2247
    89
#endif /* __MMX__ */
slouken@2247
    90
slouken@2247
    91
void
slouken@2247
    92
SDL_BlitCopy(SDL_BlitInfo * info)
slouken@2247
    93
{
slouken@2247
    94
    Uint8 *src, *dst;
slouken@2247
    95
    int w, h;
slouken@2247
    96
    int srcskip, dstskip;
slouken@2247
    97
slouken@2267
    98
    w = info->dst_w * info->dst_fmt->BytesPerPixel;
slouken@2262
    99
    h = info->dst_h;
slouken@2262
   100
    src = info->src;
slouken@2262
   101
    dst = info->dst;
slouken@2267
   102
    srcskip = info->src_pitch;
slouken@2267
   103
    dstskip = info->dst_pitch;
slouken@2247
   104
slouken@2247
   105
#ifdef __SSE__
slouken@2248
   106
    if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) {
slouken@2247
   107
        while (h--) {
slouken@2247
   108
            SDL_memcpySSE(dst, src, w);
slouken@2247
   109
            src += srcskip;
slouken@2247
   110
            dst += dstskip;
slouken@2247
   111
        }
slouken@2247
   112
        return;
slouken@2247
   113
    }
slouken@2247
   114
#endif
slouken@2247
   115
slouken@2247
   116
#ifdef __MMX__
slouken@2248
   117
    if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) {
slouken@2247
   118
        while (h--) {
slouken@2247
   119
            SDL_memcpyMMX(dst, src, w);
slouken@2247
   120
            src += srcskip;
slouken@2247
   121
            dst += dstskip;
slouken@2247
   122
        }
slouken@2248
   123
        _mm_empty();
slouken@2247
   124
        return;
slouken@2247
   125
    }
slouken@2247
   126
#endif
slouken@2247
   127
slouken@2247
   128
    while (h--) {
slouken@2247
   129
        SDL_memcpy(dst, src, w);
slouken@2247
   130
        src += srcskip;
slouken@2247
   131
        dst += dstskip;
slouken@2247
   132
    }
slouken@2247
   133
}
slouken@2247
   134
slouken@2247
   135
void
slouken@2247
   136
SDL_BlitCopyOverlap(SDL_BlitInfo * info)
slouken@2247
   137
{
slouken@2247
   138
    Uint8 *src, *dst;
slouken@2247
   139
    int w, h;
slouken@2247
   140
    int skip;
slouken@2247
   141
slouken@2267
   142
    w = info->dst_w * info->dst_fmt->BytesPerPixel;
slouken@2262
   143
    h = info->dst_h;
slouken@2262
   144
    src = info->src;
slouken@2262
   145
    dst = info->dst;
slouken@2267
   146
    skip = info->src_pitch;
slouken@2248
   147
    if ((dst < src) || (dst >= (src + h * skip))) {
slouken@2247
   148
        SDL_BlitCopy(info);
slouken@2247
   149
    } else {
slouken@2247
   150
        src += ((h - 1) * skip);
slouken@2247
   151
        dst += ((h - 1) * skip);
slouken@2247
   152
        while (h--) {
slouken@2247
   153
            SDL_revcpy(dst, src, w);
slouken@2247
   154
            src -= skip;
slouken@2247
   155
            dst -= skip;
slouken@2247
   156
        }
slouken@2247
   157
    }
slouken@2247
   158
}
slouken@2247
   159
slouken@2247
   160
/* vi: set ts=4 sw=4 expandtab: */