src/video/SDL_fillrect.c
author Sam Lantinga
Sat, 16 Nov 2013 11:54:16 -0800
changeset 7998 097e3fac1979
parent 7682 f182a912606a
child 8093 b43765095a6f
permissions -rw-r--r--
Fixed bug 2241 - SSE intrinsic in fillrect MMX path

norfanin

The MMX path in SDL_fillrect.c uses the SSE intrinsic _mm_stream_pi. The function or symbol provided by the compiler will not be present because the SSE header may not get included. The linker will complain about an undefined reference.

Since this is the only intrinsic used here (and someone forgot to create one for MOVQ), I think the MMX path can be removed completely. At least I don't see another way to move 64-bits from an MMX register to memory.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2013 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 #include "SDL_video.h"
    24 #include "SDL_blit.h"
    25 
    26 
    27 #ifdef __SSE__
    28 /* *INDENT-OFF* */
    29 
    30 #ifdef _MSC_VER
    31 #define SSE_BEGIN \
    32     __m128 c128; \
    33     c128.m128_u32[0] = color; \
    34     c128.m128_u32[1] = color; \
    35     c128.m128_u32[2] = color; \
    36     c128.m128_u32[3] = color;
    37 #else
    38 #define SSE_BEGIN \
    39     __m128 c128; \
    40     DECLARE_ALIGNED(Uint32, cccc[4], 16); \
    41     cccc[0] = color; \
    42     cccc[1] = color; \
    43     cccc[2] = color; \
    44     cccc[3] = color; \
    45     c128 = *(__m128 *)cccc;
    46 #endif
    47 
    48 #define SSE_WORK \
    49     for (i = n / 64; i--;) { \
    50         _mm_stream_ps((float *)(p+0), c128); \
    51         _mm_stream_ps((float *)(p+16), c128); \
    52         _mm_stream_ps((float *)(p+32), c128); \
    53         _mm_stream_ps((float *)(p+48), c128); \
    54         p += 64; \
    55     }
    56 
    57 #define SSE_END
    58 
    59 #define DEFINE_SSE_FILLRECT(bpp, type) \
    60 static void \
    61 SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
    62 { \
    63     int i, n; \
    64     Uint8 *p = NULL; \
    65  \
    66     SSE_BEGIN; \
    67  \
    68     while (h--) { \
    69         n = w * bpp; \
    70         p = pixels; \
    71  \
    72         if (n > 63) { \
    73             int adjust = 16 - ((uintptr_t)p & 15); \
    74             if (adjust < 16) { \
    75                 n -= adjust; \
    76                 adjust /= bpp; \
    77                 while (adjust--) { \
    78                     *((type *)p) = (type)color; \
    79                     p += bpp; \
    80                 } \
    81             } \
    82             SSE_WORK; \
    83         } \
    84         if (n & 63) { \
    85             int remainder = (n & 63); \
    86             remainder /= bpp; \
    87             while (remainder--) { \
    88                 *((type *)p) = (type)color; \
    89                 p += bpp; \
    90             } \
    91         } \
    92         pixels += pitch; \
    93     } \
    94  \
    95     SSE_END; \
    96 }
    97 
    98 static void
    99 SDL_FillRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
   100 {
   101     int i, n;
   102     Uint8 *p = NULL;
   103     
   104     SSE_BEGIN;
   105     while (h--) {
   106         n = w;
   107         p = pixels;
   108 
   109         if (n > 63) {
   110             int adjust = 16 - ((uintptr_t)p & 15);
   111             if (adjust) {
   112                 n -= adjust;
   113                 SDL_memset(p, color, adjust);
   114                 p += adjust;
   115             }
   116             SSE_WORK;
   117         }
   118         if (n & 63) {
   119             int remainder = (n & 63);
   120             SDL_memset(p, color, remainder);
   121             p += remainder;
   122         }
   123         pixels += pitch;
   124     }
   125 
   126     SSE_END;
   127 }
   128 /* DEFINE_SSE_FILLRECT(1, Uint8) */
   129 DEFINE_SSE_FILLRECT(2, Uint16)
   130 DEFINE_SSE_FILLRECT(4, Uint32)
   131 
   132 /* *INDENT-ON* */
   133 #endif /* __SSE__ */
   134 
   135 static void
   136 SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
   137 {
   138     int n;
   139     Uint8 *p = NULL;
   140     
   141     while (h--) {
   142         n = w;
   143         p = pixels;
   144 
   145         if (n > 3) {
   146             switch ((uintptr_t) p & 3) {
   147             case 1:
   148                 *p++ = (Uint8) color;
   149                 --n;
   150             case 2:
   151                 *p++ = (Uint8) color;
   152                 --n;
   153             case 3:
   154                 *p++ = (Uint8) color;
   155                 --n;
   156             }
   157             SDL_memset4(p, color, (n >> 2));
   158         }
   159         if (n & 3) {
   160             p += (n & ~3);
   161             switch (n & 3) {
   162             case 3:
   163                 *p++ = (Uint8) color;
   164             case 2:
   165                 *p++ = (Uint8) color;
   166             case 1:
   167                 *p++ = (Uint8) color;
   168             }
   169         }
   170         pixels += pitch;
   171     }
   172 }
   173 
   174 static void
   175 SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
   176 {
   177     int n;
   178     Uint16 *p = NULL;
   179     
   180     while (h--) {
   181         n = w;
   182         p = (Uint16 *) pixels;
   183 
   184         if (n > 1) {
   185             if ((uintptr_t) p & 2) {
   186                 *p++ = (Uint16) color;
   187                 --n;
   188             }
   189             SDL_memset4(p, color, (n >> 1));
   190         }
   191         if (n & 1) {
   192             p[n - 1] = (Uint16) color;
   193         }
   194         pixels += pitch;
   195     }
   196 }
   197 
   198 static void
   199 SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
   200 {
   201     Uint8 r = (Uint8) ((color >> 16) & 0xFF);
   202     Uint8 g = (Uint8) ((color >> 8) & 0xFF);
   203     Uint8 b = (Uint8) (color & 0xFF);
   204     int n;
   205     Uint8 *p = NULL;
   206 
   207     while (h--) {
   208         n = w;
   209         p = pixels;
   210 
   211         while (n--) {
   212             *p++ = r;
   213             *p++ = g;
   214             *p++ = b;
   215         }
   216         pixels += pitch;
   217     }
   218 }
   219 
   220 static void
   221 SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
   222 {
   223     while (h--) {
   224         SDL_memset4(pixels, color, w);
   225         pixels += pitch;
   226     }
   227 }
   228 
   229 /* 
   230  * This function performs a fast fill of the given rectangle with 'color'
   231  */
   232 int
   233 SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color)
   234 {
   235     SDL_Rect clipped;
   236     Uint8 *pixels;
   237 
   238     if (!dst) {
   239         return SDL_SetError("Passed NULL destination surface");
   240     }
   241 
   242     /* This function doesn't work on surfaces < 8 bpp */
   243     if (dst->format->BitsPerPixel < 8) {
   244         return SDL_SetError("SDL_FillRect(): Unsupported surface format");
   245     }
   246 
   247     /* If 'rect' == NULL, then fill the whole surface */
   248     if (rect) {
   249         /* Perform clipping */
   250         if (!SDL_IntersectRect(rect, &dst->clip_rect, &clipped)) {
   251             return 0;
   252         }
   253         rect = &clipped;
   254     } else {
   255         rect = &dst->clip_rect;
   256     }
   257 
   258     /* Perform software fill */
   259     if (!dst->pixels) {
   260         return SDL_SetError("SDL_FillRect(): You must lock the surface");
   261     }
   262 
   263     pixels = (Uint8 *) dst->pixels + rect->y * dst->pitch +
   264                                      rect->x * dst->format->BytesPerPixel;
   265 
   266     switch (dst->format->BytesPerPixel) {
   267     case 1:
   268         {
   269             color |= (color << 8);
   270             color |= (color << 16);
   271 #ifdef __SSE__
   272             if (SDL_HasSSE()) {
   273                 SDL_FillRect1SSE(pixels, dst->pitch, color, rect->w, rect->h);
   274                 break;
   275             }
   276 #endif
   277             SDL_FillRect1(pixels, dst->pitch, color, rect->w, rect->h);
   278             break;
   279         }
   280 
   281     case 2:
   282         {
   283             color |= (color << 16);
   284 #ifdef __SSE__
   285             if (SDL_HasSSE()) {
   286                 SDL_FillRect2SSE(pixels, dst->pitch, color, rect->w, rect->h);
   287                 break;
   288             }
   289 #endif
   290             SDL_FillRect2(pixels, dst->pitch, color, rect->w, rect->h);
   291             break;
   292         }
   293 
   294     case 3:
   295         /* 24-bit RGB is a slow path, at least for now. */
   296         {
   297             SDL_FillRect3(pixels, dst->pitch, color, rect->w, rect->h);
   298             break;
   299         }
   300 
   301     case 4:
   302         {
   303 #ifdef __SSE__
   304             if (SDL_HasSSE()) {
   305                 SDL_FillRect4SSE(pixels, dst->pitch, color, rect->w, rect->h);
   306                 break;
   307             }
   308 #endif
   309             SDL_FillRect4(pixels, dst->pitch, color, rect->w, rect->h);
   310             break;
   311         }
   312     }
   313 
   314     /* We're done! */
   315     return 0;
   316 }
   317 
   318 int
   319 SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count,
   320               Uint32 color)
   321 {
   322     int i;
   323     int status = 0;
   324 
   325     if (!rects) {
   326         return SDL_SetError("SDL_FillRects() passed NULL rects");
   327     }
   328 
   329     for (i = 0; i < count; ++i) {
   330         status += SDL_FillRect(dst, &rects[i], color);
   331     }
   332     return status;
   333 }
   334 
   335 /* vi: set ts=4 sw=4 expandtab: */