src/video/SDL_fillrect.c
author Sam Lantinga
Sat, 16 Nov 2013 11:54:16 -0800
changeset 7998 097e3fac1979
parent 7682 f182a912606a
child 8093 b43765095a6f
permissions -rw-r--r--
Fixed bug 2241 - SSE intrinsic in fillrect MMX path

norfanin

The MMX path in SDL_fillrect.c uses the SSE intrinsic _mm_stream_pi. The function or symbol provided by the compiler will not be present because the SSE header may not get included. The linker will complain about an undefined reference.

Since this is the only intrinsic used here (and someone forgot to create one for MOVQ), I think the MMX path can be removed completely. At least I don't see another way to move 64-bits from an MMX register to memory.
slouken@2888
     1
/*
slouken@5535
     2
  Simple DirectMedia Layer
slouken@6885
     3
  Copyright (C) 1997-2013 Sam Lantinga <slouken@libsdl.org>
slouken@2888
     4
slouken@5535
     5
  This software is provided 'as-is', without any express or implied
slouken@5535
     6
  warranty.  In no event will the authors be held liable for any damages
slouken@5535
     7
  arising from the use of this software.
slouken@2888
     8
slouken@5535
     9
  Permission is granted to anyone to use this software for any purpose,
slouken@5535
    10
  including commercial applications, and to alter it and redistribute it
slouken@5535
    11
  freely, subject to the following restrictions:
slouken@2888
    12
slouken@5535
    13
  1. The origin of this software must not be misrepresented; you must not
slouken@5535
    14
     claim that you wrote the original software. If you use this software
slouken@5535
    15
     in a product, an acknowledgment in the product documentation would be
slouken@5535
    16
     appreciated but is not required.
slouken@5535
    17
  2. Altered source versions must be plainly marked as such, and must not be
slouken@5535
    18
     misrepresented as being the original software.
slouken@5535
    19
  3. This notice may not be removed or altered from any source distribution.
slouken@2888
    20
*/
slouken@2888
    21
#include "SDL_config.h"
slouken@2888
    22
slouken@2888
    23
#include "SDL_video.h"
slouken@2888
    24
#include "SDL_blit.h"
slouken@2888
    25
slouken@2888
    26
slouken@2888
    27
#ifdef __SSE__
slouken@2888
    28
/* *INDENT-OFF* */
slouken@2888
    29
slouken@2888
    30
#ifdef _MSC_VER
slouken@2888
    31
#define SSE_BEGIN \
slouken@2888
    32
    __m128 c128; \
slouken@2888
    33
    c128.m128_u32[0] = color; \
slouken@2888
    34
    c128.m128_u32[1] = color; \
slouken@2888
    35
    c128.m128_u32[2] = color; \
slouken@2888
    36
    c128.m128_u32[3] = color;
slouken@2888
    37
#else
slouken@2888
    38
#define SSE_BEGIN \
gabomdq@7679
    39
    __m128 c128; \
slouken@2888
    40
    DECLARE_ALIGNED(Uint32, cccc[4], 16); \
slouken@2888
    41
    cccc[0] = color; \
slouken@2888
    42
    cccc[1] = color; \
slouken@2888
    43
    cccc[2] = color; \
slouken@2888
    44
    cccc[3] = color; \
gabomdq@7679
    45
    c128 = *(__m128 *)cccc;
slouken@2888
    46
#endif
slouken@2888
    47
slouken@2888
    48
#define SSE_WORK \
slouken@2888
    49
    for (i = n / 64; i--;) { \
slouken@2888
    50
        _mm_stream_ps((float *)(p+0), c128); \
slouken@2888
    51
        _mm_stream_ps((float *)(p+16), c128); \
slouken@2888
    52
        _mm_stream_ps((float *)(p+32), c128); \
slouken@2888
    53
        _mm_stream_ps((float *)(p+48), c128); \
slouken@2888
    54
        p += 64; \
slouken@2888
    55
    }
slouken@2888
    56
slouken@2888
    57
#define SSE_END
slouken@2888
    58
slouken@2888
    59
#define DEFINE_SSE_FILLRECT(bpp, type) \
slouken@2888
    60
static void \
slouken@2888
    61
SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
slouken@2888
    62
{ \
gabomdq@7679
    63
    int i, n; \
gabomdq@7679
    64
    Uint8 *p = NULL; \
gabomdq@7679
    65
 \
slouken@2888
    66
    SSE_BEGIN; \
slouken@2888
    67
 \
slouken@2888
    68
    while (h--) { \
gabomdq@7679
    69
        n = w * bpp; \
gabomdq@7679
    70
        p = pixels; \
slouken@2888
    71
 \
slouken@3012
    72
        if (n > 63) { \
slouken@2888
    73
            int adjust = 16 - ((uintptr_t)p & 15); \
slouken@2888
    74
            if (adjust < 16) { \
slouken@2888
    75
                n -= adjust; \
slouken@2888
    76
                adjust /= bpp; \
slouken@2888
    77
                while (adjust--) { \
slouken@2888
    78
                    *((type *)p) = (type)color; \
slouken@2888
    79
                    p += bpp; \
slouken@2888
    80
                } \
slouken@2888
    81
            } \
slouken@2888
    82
            SSE_WORK; \
slouken@2888
    83
        } \
slouken@2888
    84
        if (n & 63) { \
slouken@2888
    85
            int remainder = (n & 63); \
slouken@2888
    86
            remainder /= bpp; \
slouken@2888
    87
            while (remainder--) { \
slouken@2888
    88
                *((type *)p) = (type)color; \
slouken@2888
    89
                p += bpp; \
slouken@2888
    90
            } \
slouken@2888
    91
        } \
slouken@2888
    92
        pixels += pitch; \
slouken@2888
    93
    } \
slouken@2888
    94
 \
slouken@2888
    95
    SSE_END; \
slouken@2888
    96
}
slouken@2888
    97
slouken@3012
    98
static void
slouken@3012
    99
SDL_FillRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
slouken@3012
   100
{
gabomdq@7679
   101
    int i, n;
gabomdq@7679
   102
    Uint8 *p = NULL;
gabomdq@7679
   103
    
slouken@3012
   104
    SSE_BEGIN;
slouken@3012
   105
    while (h--) {
gabomdq@7679
   106
        n = w;
gabomdq@7679
   107
        p = pixels;
slouken@3012
   108
slouken@3012
   109
        if (n > 63) {
slouken@3012
   110
            int adjust = 16 - ((uintptr_t)p & 15);
slouken@3012
   111
            if (adjust) {
slouken@3012
   112
                n -= adjust;
slouken@3012
   113
                SDL_memset(p, color, adjust);
slouken@3012
   114
                p += adjust;
slouken@3012
   115
            }
slouken@3012
   116
            SSE_WORK;
slouken@3012
   117
        }
slouken@3012
   118
        if (n & 63) {
slouken@3012
   119
            int remainder = (n & 63);
slouken@3012
   120
            SDL_memset(p, color, remainder);
slouken@3012
   121
            p += remainder;
slouken@3012
   122
        }
slouken@3012
   123
        pixels += pitch;
slouken@3012
   124
    }
slouken@3012
   125
slouken@3012
   126
    SSE_END;
slouken@3012
   127
}
gabomdq@7678
   128
/* DEFINE_SSE_FILLRECT(1, Uint8) */
slouken@2888
   129
DEFINE_SSE_FILLRECT(2, Uint16)
slouken@2888
   130
DEFINE_SSE_FILLRECT(4, Uint32)
slouken@2888
   131
slouken@2888
   132
/* *INDENT-ON* */
slouken@2888
   133
#endif /* __SSE__ */
slouken@2888
   134
slouken@2888
   135
static void
slouken@2888
   136
SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
slouken@2888
   137
{
gabomdq@7680
   138
    int n;
gabomdq@7680
   139
    Uint8 *p = NULL;
gabomdq@7680
   140
    
slouken@2888
   141
    while (h--) {
gabomdq@7680
   142
        n = w;
gabomdq@7680
   143
        p = pixels;
slouken@2888
   144
slouken@2888
   145
        if (n > 3) {
slouken@2888
   146
            switch ((uintptr_t) p & 3) {
slouken@2888
   147
            case 1:
slouken@2888
   148
                *p++ = (Uint8) color;
slouken@2888
   149
                --n;
slouken@2888
   150
            case 2:
slouken@2888
   151
                *p++ = (Uint8) color;
slouken@2888
   152
                --n;
slouken@2888
   153
            case 3:
slouken@2888
   154
                *p++ = (Uint8) color;
slouken@2888
   155
                --n;
slouken@2888
   156
            }
slouken@2888
   157
            SDL_memset4(p, color, (n >> 2));
slouken@2888
   158
        }
slouken@2888
   159
        if (n & 3) {
slouken@2888
   160
            p += (n & ~3);
slouken@2888
   161
            switch (n & 3) {
slouken@2888
   162
            case 3:
slouken@2888
   163
                *p++ = (Uint8) color;
slouken@2888
   164
            case 2:
slouken@2888
   165
                *p++ = (Uint8) color;
slouken@2888
   166
            case 1:
slouken@2888
   167
                *p++ = (Uint8) color;
slouken@2888
   168
            }
slouken@2888
   169
        }
slouken@2888
   170
        pixels += pitch;
slouken@2888
   171
    }
slouken@2888
   172
}
slouken@2888
   173
slouken@2888
   174
static void
slouken@2888
   175
SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
slouken@2888
   176
{
gabomdq@7680
   177
    int n;
gabomdq@7682
   178
    Uint16 *p = NULL;
gabomdq@7680
   179
    
slouken@2888
   180
    while (h--) {
gabomdq@7680
   181
        n = w;
gabomdq@7680
   182
        p = (Uint16 *) pixels;
slouken@2888
   183
slouken@2888
   184
        if (n > 1) {
slouken@2888
   185
            if ((uintptr_t) p & 2) {
slouken@2888
   186
                *p++ = (Uint16) color;
slouken@2888
   187
                --n;
slouken@2888
   188
            }
slouken@2888
   189
            SDL_memset4(p, color, (n >> 1));
slouken@2888
   190
        }
slouken@2888
   191
        if (n & 1) {
slouken@2888
   192
            p[n - 1] = (Uint16) color;
slouken@2888
   193
        }
slouken@2888
   194
        pixels += pitch;
slouken@2888
   195
    }
slouken@2888
   196
}
slouken@2888
   197
slouken@2888
   198
static void
slouken@2888
   199
SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
slouken@2888
   200
{
slouken@2888
   201
    Uint8 r = (Uint8) ((color >> 16) & 0xFF);
slouken@2888
   202
    Uint8 g = (Uint8) ((color >> 8) & 0xFF);
slouken@2888
   203
    Uint8 b = (Uint8) (color & 0xFF);
gabomdq@7680
   204
    int n;
gabomdq@7680
   205
    Uint8 *p = NULL;
slouken@2888
   206
slouken@2888
   207
    while (h--) {
gabomdq@7680
   208
        n = w;
gabomdq@7680
   209
        p = pixels;
slouken@2888
   210
slouken@2888
   211
        while (n--) {
slouken@2888
   212
            *p++ = r;
slouken@2888
   213
            *p++ = g;
slouken@2888
   214
            *p++ = b;
slouken@2888
   215
        }
slouken@2888
   216
        pixels += pitch;
slouken@2888
   217
    }
slouken@2888
   218
}
slouken@2888
   219
slouken@2888
   220
static void
slouken@2888
   221
SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
slouken@2888
   222
{
slouken@2888
   223
    while (h--) {
slouken@2888
   224
        SDL_memset4(pixels, color, w);
slouken@2888
   225
        pixels += pitch;
slouken@2888
   226
    }
slouken@2888
   227
}
slouken@2888
   228
slouken@2888
   229
/* 
slouken@2888
   230
 * This function performs a fast fill of the given rectangle with 'color'
slouken@2888
   231
 */
slouken@2888
   232
int
slouken@3536
   233
SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color)
slouken@2888
   234
{
slouken@3536
   235
    SDL_Rect clipped;
slouken@2888
   236
    Uint8 *pixels;
slouken@2888
   237
slouken@3536
   238
    if (!dst) {
icculus@7037
   239
        return SDL_SetError("Passed NULL destination surface");
slouken@3536
   240
    }
slouken@3536
   241
slouken@2888
   242
    /* This function doesn't work on surfaces < 8 bpp */
slouken@2888
   243
    if (dst->format->BitsPerPixel < 8) {
icculus@7037
   244
        return SDL_SetError("SDL_FillRect(): Unsupported surface format");
slouken@2888
   245
    }
slouken@2888
   246
slouken@3536
   247
    /* If 'rect' == NULL, then fill the whole surface */
slouken@3536
   248
    if (rect) {
slouken@2888
   249
        /* Perform clipping */
slouken@3536
   250
        if (!SDL_IntersectRect(rect, &dst->clip_rect, &clipped)) {
slouken@3536
   251
            return 0;
slouken@2888
   252
        }
slouken@3536
   253
        rect = &clipped;
slouken@2888
   254
    } else {
slouken@3536
   255
        rect = &dst->clip_rect;
slouken@2888
   256
    }
slouken@2888
   257
slouken@2888
   258
    /* Perform software fill */
slouken@2888
   259
    if (!dst->pixels) {
icculus@7037
   260
        return SDL_SetError("SDL_FillRect(): You must lock the surface");
slouken@2888
   261
    }
slouken@2888
   262
slouken@3536
   263
    pixels = (Uint8 *) dst->pixels + rect->y * dst->pitch +
slouken@3536
   264
                                     rect->x * dst->format->BytesPerPixel;
slouken@2888
   265
slouken@2888
   266
    switch (dst->format->BytesPerPixel) {
slouken@2888
   267
    case 1:
slouken@2888
   268
        {
slouken@2888
   269
            color |= (color << 8);
slouken@2888
   270
            color |= (color << 16);
slouken@2888
   271
#ifdef __SSE__
slouken@2888
   272
            if (SDL_HasSSE()) {
slouken@3536
   273
                SDL_FillRect1SSE(pixels, dst->pitch, color, rect->w, rect->h);
slouken@2888
   274
                break;
slouken@2888
   275
            }
slouken@2888
   276
#endif
slouken@3536
   277
            SDL_FillRect1(pixels, dst->pitch, color, rect->w, rect->h);
slouken@2888
   278
            break;
slouken@2888
   279
        }
slouken@2888
   280
slouken@2888
   281
    case 2:
slouken@2888
   282
        {
slouken@2888
   283
            color |= (color << 16);
slouken@2888
   284
#ifdef __SSE__
slouken@2888
   285
            if (SDL_HasSSE()) {
slouken@3536
   286
                SDL_FillRect2SSE(pixels, dst->pitch, color, rect->w, rect->h);
slouken@2888
   287
                break;
slouken@2888
   288
            }
slouken@2888
   289
#endif
slouken@3536
   290
            SDL_FillRect2(pixels, dst->pitch, color, rect->w, rect->h);
slouken@2888
   291
            break;
slouken@2888
   292
        }
slouken@2888
   293
slouken@2888
   294
    case 3:
slouken@2888
   295
        /* 24-bit RGB is a slow path, at least for now. */
slouken@2888
   296
        {
slouken@3536
   297
            SDL_FillRect3(pixels, dst->pitch, color, rect->w, rect->h);
slouken@2888
   298
            break;
slouken@2888
   299
        }
slouken@2888
   300
slouken@2888
   301
    case 4:
slouken@2888
   302
        {
slouken@2888
   303
#ifdef __SSE__
slouken@2888
   304
            if (SDL_HasSSE()) {
slouken@3536
   305
                SDL_FillRect4SSE(pixels, dst->pitch, color, rect->w, rect->h);
slouken@2888
   306
                break;
slouken@2888
   307
            }
slouken@2888
   308
#endif
slouken@3536
   309
            SDL_FillRect4(pixels, dst->pitch, color, rect->w, rect->h);
slouken@2888
   310
            break;
slouken@2888
   311
        }
slouken@2888
   312
    }
slouken@2888
   313
slouken@2888
   314
    /* We're done! */
slouken@3536
   315
    return 0;
slouken@3536
   316
}
slouken@3536
   317
slouken@3536
   318
int
slouken@5297
   319
SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count,
slouken@3536
   320
              Uint32 color)
slouken@3536
   321
{
slouken@3536
   322
    int i;
slouken@3536
   323
    int status = 0;
slouken@3536
   324
slouken@5297
   325
    if (!rects) {
icculus@7037
   326
        return SDL_SetError("SDL_FillRects() passed NULL rects");
slouken@5297
   327
    }
slouken@5297
   328
slouken@3536
   329
    for (i = 0; i < count; ++i) {
slouken@5297
   330
        status += SDL_FillRect(dst, &rects[i], color);
slouken@3536
   331
    }
slouken@3536
   332
    return status;
slouken@2888
   333
}
slouken@2888
   334
slouken@2888
   335
/* vi: set ts=4 sw=4 expandtab: */