src/video/SDL_blit_copy.c
author Ryan C. Gordon <icculus@icculus.org>
Tue, 23 Oct 2018 01:34:03 -0400
branchSDL-ryan-batching-renderer
changeset 12352 2b1707cc57fc
parent 11811 5d94cb6b24d3
child 12503 806492103856
permissions -rw-r--r--
render: Add floating point versions of various draw APIs.
slouken@2247
     1
/*
slouken@5535
     2
  Simple DirectMedia Layer
slouken@11811
     3
  Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
slouken@2247
     4
slouken@5535
     5
  This software is provided 'as-is', without any express or implied
slouken@5535
     6
  warranty.  In no event will the authors be held liable for any damages
slouken@5535
     7
  arising from the use of this software.
slouken@2247
     8
slouken@5535
     9
  Permission is granted to anyone to use this software for any purpose,
slouken@5535
    10
  including commercial applications, and to alter it and redistribute it
slouken@5535
    11
  freely, subject to the following restrictions:
slouken@2247
    12
slouken@5535
    13
  1. The origin of this software must not be misrepresented; you must not
slouken@5535
    14
     claim that you wrote the original software. If you use this software
slouken@5535
    15
     in a product, an acknowledgment in the product documentation would be
slouken@5535
    16
     appreciated but is not required.
slouken@5535
    17
  2. Altered source versions must be plainly marked as such, and must not be
slouken@5535
    18
     misrepresented as being the original software.
slouken@5535
    19
  3. This notice may not be removed or altered from any source distribution.
slouken@2247
    20
*/
icculus@8093
    21
#include "../SDL_internal.h"
slouken@2247
    22
slouken@2247
    23
#include "SDL_video.h"
slouken@2247
    24
#include "SDL_blit.h"
slouken@2254
    25
#include "SDL_blit_copy.h"
slouken@2247
    26
slouken@2247
    27
slouken@2253
    28
#ifdef __SSE__
slouken@2253
    29
/* This assumes 16-byte aligned src and dst */
slouken@7860
    30
static SDL_INLINE void
slouken@2253
    31
SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
slouken@2253
    32
{
slouken@2253
    33
    int i;
slouken@2253
    34
slouken@2253
    35
    __m128 values[4];
slouken@2253
    36
    for (i = len / 64; i--;) {
slouken@2253
    37
        _mm_prefetch(src, _MM_HINT_NTA);
slouken@2253
    38
        values[0] = *(__m128 *) (src + 0);
slouken@2253
    39
        values[1] = *(__m128 *) (src + 16);
slouken@2253
    40
        values[2] = *(__m128 *) (src + 32);
slouken@2253
    41
        values[3] = *(__m128 *) (src + 48);
slouken@2253
    42
        _mm_stream_ps((float *) (dst + 0), values[0]);
slouken@2253
    43
        _mm_stream_ps((float *) (dst + 16), values[1]);
slouken@2253
    44
        _mm_stream_ps((float *) (dst + 32), values[2]);
slouken@2253
    45
        _mm_stream_ps((float *) (dst + 48), values[3]);
slouken@2253
    46
        src += 64;
slouken@2253
    47
        dst += 64;
slouken@2253
    48
    }
slouken@2253
    49
slouken@2253
    50
    if (len & 63)
slouken@2253
    51
        SDL_memcpy(dst, src, len & 63);
slouken@2253
    52
}
slouken@2253
    53
#endif /* __SSE__ */
slouken@2253
    54
slouken@2247
    55
#ifdef __MMX__
slouken@2780
    56
#ifdef _MSC_VER
slouken@2780
    57
#pragma warning(disable:4799)
slouken@2780
    58
#endif
slouken@7860
    59
static SDL_INLINE void
slouken@2248
    60
SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
slouken@2247
    61
{
icculus@6034
    62
    const int remain = (len & 63);
slouken@2247
    63
    int i;
slouken@2247
    64
icculus@6032
    65
    __m64* d64 = (__m64*)dst;
icculus@6032
    66
    __m64* s64 = (__m64*)src;
icculus@6032
    67
icculus@6032
    68
    for(i= len / 64; i--;) {
icculus@6032
    69
        d64[0] = s64[0];
icculus@6032
    70
        d64[1] = s64[1];
icculus@6032
    71
        d64[2] = s64[2];
icculus@6032
    72
        d64[3] = s64[3];
icculus@6032
    73
        d64[4] = s64[4];
icculus@6032
    74
        d64[5] = s64[5];
icculus@6032
    75
        d64[6] = s64[6];
icculus@6032
    76
        d64[7] = s64[7];
icculus@6032
    77
icculus@6032
    78
        d64 += 8;
icculus@6032
    79
        s64 += 8;
slouken@2247
    80
    }
slouken@2248
    81
icculus@6034
    82
    if (remain)
icculus@6034
    83
    {
icculus@6034
    84
        const int skip = len - remain;
icculus@6034
    85
        SDL_memcpy(dst + skip, src + skip, remain);
icculus@6034
    86
    }
slouken@2247
    87
}
slouken@2247
    88
#endif /* __MMX__ */
slouken@2247
    89
slouken@2247
    90
void
slouken@2247
    91
SDL_BlitCopy(SDL_BlitInfo * info)
slouken@2247
    92
{
slouken@5325
    93
    SDL_bool overlap;
slouken@2247
    94
    Uint8 *src, *dst;
slouken@2247
    95
    int w, h;
slouken@2247
    96
    int srcskip, dstskip;
slouken@2247
    97
slouken@2267
    98
    w = info->dst_w * info->dst_fmt->BytesPerPixel;
slouken@2262
    99
    h = info->dst_h;
slouken@2262
   100
    src = info->src;
slouken@2262
   101
    dst = info->dst;
slouken@2267
   102
    srcskip = info->src_pitch;
slouken@2267
   103
    dstskip = info->dst_pitch;
slouken@2247
   104
slouken@5325
   105
    /* Properly handle overlapping blits */
slouken@5325
   106
    if (src < dst) {
slouken@5325
   107
        overlap = (dst < (src + h*srcskip));
slouken@5325
   108
    } else {
slouken@5325
   109
        overlap = (src < (dst + h*dstskip));
slouken@5325
   110
    }
slouken@5325
   111
    if (overlap) {
kratz00@10509
   112
        if ( dst < src ) {
kratz00@10509
   113
                while ( h-- ) {
kratz00@10509
   114
                        SDL_memmove(dst, src, w);
kratz00@10509
   115
                        src += srcskip;
kratz00@10509
   116
                        dst += dstskip;
kratz00@10509
   117
                }
kratz00@10509
   118
        } else {
kratz00@10509
   119
                src += ((h-1) * srcskip);
kratz00@10509
   120
                dst += ((h-1) * dstskip);
kratz00@10509
   121
                while ( h-- ) {
kratz00@10509
   122
                        SDL_memmove(dst, src, w);
kratz00@10509
   123
                        src -= srcskip;
kratz00@10509
   124
                        dst -= dstskip;
kratz00@10509
   125
                }
slouken@5325
   126
        }
slouken@5325
   127
        return;
slouken@5325
   128
    }
slouken@5325
   129
slouken@2247
   130
#ifdef __SSE__
slouken@2826
   131
    if (SDL_HasSSE() &&
slouken@2826
   132
        !((uintptr_t) src & 15) && !(srcskip & 15) &&
slouken@2826
   133
        !((uintptr_t) dst & 15) && !(dstskip & 15)) {
slouken@2247
   134
        while (h--) {
slouken@2247
   135
            SDL_memcpySSE(dst, src, w);
slouken@2247
   136
            src += srcskip;
slouken@2247
   137
            dst += dstskip;
slouken@2247
   138
        }
slouken@2247
   139
        return;
slouken@2247
   140
    }
slouken@2247
   141
#endif
slouken@2247
   142
slouken@2247
   143
#ifdef __MMX__
icculus@6033
   144
    if (SDL_HasMMX() && !(srcskip & 7) && !(dstskip & 7)) {
slouken@2247
   145
        while (h--) {
slouken@2247
   146
            SDL_memcpyMMX(dst, src, w);
slouken@2247
   147
            src += srcskip;
slouken@2247
   148
            dst += dstskip;
slouken@2247
   149
        }
slouken@2248
   150
        _mm_empty();
slouken@2247
   151
        return;
slouken@2247
   152
    }
slouken@2247
   153
#endif
slouken@2247
   154
slouken@2247
   155
    while (h--) {
slouken@2247
   156
        SDL_memcpy(dst, src, w);
slouken@2247
   157
        src += srcskip;
slouken@2247
   158
        dst += dstskip;
slouken@2247
   159
    }
slouken@2247
   160
}
slouken@2247
   161
slouken@2247
   162
/* vi: set ts=4 sw=4 expandtab: */