src/video/SDL_fill.c
changeset 2253 6d99edd791bf
child 2830 af3dd02cf043
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/video/SDL_fill.c	Thu Aug 16 21:43:19 2007 +0000
     1.3 @@ -0,0 +1,365 @@
     1.4 +/*
     1.5 +    SDL - Simple DirectMedia Layer
     1.6 +    Copyright (C) 1997-2006 Sam Lantinga
     1.7 +
     1.8 +    This library is free software; you can redistribute it and/or
     1.9 +    modify it under the terms of the GNU Lesser General Public
    1.10 +    License as published by the Free Software Foundation; either
    1.11 +    version 2.1 of the License, or (at your option) any later version.
    1.12 +
    1.13 +    This library is distributed in the hope that it will be useful,
    1.14 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.15 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.16 +    Lesser General Public License for more details.
    1.17 +
    1.18 +    You should have received a copy of the GNU Lesser General Public
    1.19 +    License along with this library; if not, write to the Free Software
    1.20 +    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    1.21 +
    1.22 +    Sam Lantinga
    1.23 +    slouken@libsdl.org
    1.24 +*/
    1.25 +#include "SDL_config.h"
    1.26 +
    1.27 +#include "SDL_video.h"
    1.28 +#include "SDL_blit.h"
    1.29 +
    1.30 +
    1.31 +#ifdef __SSE__
    1.32 +/* *INDENT-OFF* */
    1.33 +
    1.34 +#ifdef _MSC_VER
    1.35 +#define SSE_BEGIN \
    1.36 +    __m128 c128; \
    1.37 +	c128.m128_u32[0] = color; \
    1.38 +	c128.m128_u32[1] = color; \
    1.39 +	c128.m128_u32[2] = color; \
    1.40 +	c128.m128_u32[3] = color;
    1.41 +#else
    1.42 +#define SSE_BEGIN \
    1.43 +    DECLARE_ALIGNED(Uint32, cccc[4], 16); \
    1.44 +    cccc[0] = color; \
    1.45 +    cccc[1] = color; \
    1.46 +    cccc[2] = color; \
    1.47 +    cccc[3] = color; \
    1.48 +    __m128 c128 = *(__m128 *)cccc;
    1.49 +#endif
    1.50 +
    1.51 +#define SSE_WORK \
    1.52 +    for (i = n / 64; i--;) { \
    1.53 +        _mm_stream_ps((float *)(p+0), c128); \
    1.54 +        _mm_stream_ps((float *)(p+16), c128); \
    1.55 +        _mm_stream_ps((float *)(p+32), c128); \
    1.56 +        _mm_stream_ps((float *)(p+48), c128); \
    1.57 +        p += 64; \
    1.58 +    }
    1.59 +
    1.60 +#define SSE_END
    1.61 +
    1.62 +#define DEFINE_SSE_FILLRECT(bpp, type) \
    1.63 +static void \
    1.64 +SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
    1.65 +{ \
    1.66 +    SSE_BEGIN; \
    1.67 + \
    1.68 +    while (h--) { \
    1.69 +        int i, n = w * bpp; \
    1.70 +        Uint8 *p = pixels; \
    1.71 + \
    1.72 +        if (n > 15) { \
    1.73 +            int adjust = 16 - ((uintptr_t)p & 15); \
    1.74 +            if (adjust < 16) { \
    1.75 +                n -= adjust; \
    1.76 +                adjust /= bpp; \
    1.77 +                while(adjust--) { \
    1.78 +                    *((type *)p) = (type)color; \
    1.79 +                    p += bpp; \
    1.80 +                } \
    1.81 +            } \
    1.82 +            SSE_WORK; \
    1.83 +        } \
    1.84 +        if (n & 63) { \
    1.85 +            int remainder = (n & 63); \
    1.86 +            remainder /= bpp; \
    1.87 +            while(remainder--) { \
    1.88 +                *((type *)p) = (type)color; \
    1.89 +                p += bpp; \
    1.90 +            } \
    1.91 +        } \
    1.92 +        pixels += pitch; \
    1.93 +    } \
    1.94 + \
    1.95 +    SSE_END; \
    1.96 +}
    1.97 +
    1.98 +DEFINE_SSE_FILLRECT(1, Uint8)
    1.99 +DEFINE_SSE_FILLRECT(2, Uint16)
   1.100 +DEFINE_SSE_FILLRECT(4, Uint32)
   1.101 +
   1.102 +/* *INDENT-ON* */
   1.103 +#endif /* __SSE__ */
   1.104 +
   1.105 +#ifdef __MMX__
   1.106 +/* *INDENT-OFF* */
   1.107 +
   1.108 +#define MMX_BEGIN \
   1.109 +    __m64 c64 = _mm_set_pi32(color, color)
   1.110 +
   1.111 +#define MMX_WORK \
   1.112 +    for (i = n / 64; i--;) { \
   1.113 +        _mm_stream_pi((__m64 *)(p+0), c64); \
   1.114 +        _mm_stream_pi((__m64 *)(p+8), c64); \
   1.115 +        _mm_stream_pi((__m64 *)(p+16), c64); \
   1.116 +        _mm_stream_pi((__m64 *)(p+24), c64); \
   1.117 +        _mm_stream_pi((__m64 *)(p+32), c64); \
   1.118 +        _mm_stream_pi((__m64 *)(p+40), c64); \
   1.119 +        _mm_stream_pi((__m64 *)(p+48), c64); \
   1.120 +        _mm_stream_pi((__m64 *)(p+56), c64); \
   1.121 +        p += 64; \
   1.122 +    }
   1.123 +
   1.124 +#define MMX_END \
   1.125 +    _mm_empty()
   1.126 +
   1.127 +#define DEFINE_MMX_FILLRECT(bpp, type) \
   1.128 +static void \
   1.129 +SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
   1.130 +{ \
   1.131 +    MMX_BEGIN; \
   1.132 + \
   1.133 +    while (h--) { \
   1.134 +        int i, n = w * bpp; \
   1.135 +        Uint8 *p = pixels; \
   1.136 + \
   1.137 +        if (n > 7) { \
   1.138 +            int adjust = 8 - ((uintptr_t)p & 7); \
   1.139 +            if (adjust < 8) { \
   1.140 +                n -= adjust; \
   1.141 +                adjust /= bpp; \
   1.142 +                while(adjust--) { \
   1.143 +                    *((type *)p) = (type)color; \
   1.144 +                    p += bpp; \
   1.145 +                } \
   1.146 +            } \
   1.147 +            MMX_WORK; \
   1.148 +        } \
   1.149 +        if (n & 63) { \
   1.150 +            int remainder = (n & 63); \
   1.151 +            remainder /= bpp; \
   1.152 +            while(remainder--) { \
   1.153 +                *((type *)p) = (type)color; \
   1.154 +                p += bpp; \
   1.155 +            } \
   1.156 +        } \
   1.157 +        pixels += pitch; \
   1.158 +    } \
   1.159 + \
   1.160 +    MMX_END; \
   1.161 +}
   1.162 +
   1.163 +DEFINE_MMX_FILLRECT(1, Uint8)
   1.164 +DEFINE_MMX_FILLRECT(2, Uint16)
   1.165 +DEFINE_MMX_FILLRECT(4, Uint32)
   1.166 +
   1.167 +/* *INDENT-ON* */
   1.168 +#endif /* __MMX__ */
   1.169 +
   1.170 +static void
   1.171 +SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
   1.172 +{
   1.173 +    while (h--) {
   1.174 +        int n = w;
   1.175 +        Uint8 *p = pixels;
   1.176 +
   1.177 +        if (n > 3) {
   1.178 +            switch ((uintptr_t) p & 3) {
   1.179 +            case 1:
   1.180 +                *p++ = (Uint8) color;
   1.181 +                --n;
   1.182 +            case 2:
   1.183 +                *p++ = (Uint8) color;
   1.184 +                --n;
   1.185 +            case 3:
   1.186 +                *p++ = (Uint8) color;
   1.187 +                --n;
   1.188 +            }
   1.189 +            SDL_memset4(p, color, (n >> 2));
   1.190 +        }
   1.191 +        if (n & 3) {
   1.192 +            p += (n & ~3);
   1.193 +            switch (n & 3) {
   1.194 +            case 3:
   1.195 +                *p++ = (Uint8) color;
   1.196 +            case 2:
   1.197 +                *p++ = (Uint8) color;
   1.198 +            case 1:
   1.199 +                *p++ = (Uint8) color;
   1.200 +            }
   1.201 +        }
   1.202 +        pixels += pitch;
   1.203 +    }
   1.204 +}
   1.205 +
   1.206 +static void
   1.207 +SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
   1.208 +{
   1.209 +    while (h--) {
   1.210 +        int n = w;
   1.211 +        Uint16 *p = (Uint16 *) pixels;
   1.212 +
   1.213 +        if (n > 1) {
   1.214 +            if ((uintptr_t) p & 2) {
   1.215 +                *p++ = (Uint16) color;
   1.216 +                --n;
   1.217 +            }
   1.218 +            SDL_memset4(p, color, (n >> 1));
   1.219 +        }
   1.220 +        if (n & 1) {
   1.221 +            p[n - 1] = (Uint16) color;
   1.222 +        }
   1.223 +        pixels += pitch;
   1.224 +    }
   1.225 +}
   1.226 +
   1.227 +static void
   1.228 +SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
   1.229 +{
   1.230 +    Uint8 r = (Uint8) ((color >> 16) & 0xFF);
   1.231 +    Uint8 g = (Uint8) ((color >> 8) & 0xFF);
   1.232 +    Uint8 b = (Uint8) (color & 0xFF);
   1.233 +
   1.234 +    while (h--) {
   1.235 +        int n = w;
   1.236 +        Uint8 *p = pixels;
   1.237 +
   1.238 +        while (n--) {
   1.239 +            *p++ = r;
   1.240 +            *p++ = g;
   1.241 +            *p++ = b;
   1.242 +        }
   1.243 +        pixels += pitch;
   1.244 +    }
   1.245 +}
   1.246 +
   1.247 +static void
   1.248 +SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
   1.249 +{
   1.250 +    while (h--) {
   1.251 +        SDL_memset4(pixels, color, w);
   1.252 +        pixels += pitch;
   1.253 +    }
   1.254 +}
   1.255 +
   1.256 +/* 
   1.257 + * This function performs a fast fill of the given rectangle with 'color'
   1.258 + */
   1.259 +int
   1.260 +SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
   1.261 +{
   1.262 +    Uint8 *pixels;
   1.263 +
   1.264 +    /* This function doesn't work on surfaces < 8 bpp */
   1.265 +    if (dst->format->BitsPerPixel < 8) {
   1.266 +        SDL_SetError("SDL_FillRect(): Unsupported surface format");
   1.267 +        return (-1);
   1.268 +    }
   1.269 +
   1.270 +    /* If 'dstrect' == NULL, then fill the whole surface */
   1.271 +    if (dstrect) {
   1.272 +        /* Perform clipping */
   1.273 +        if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) {
   1.274 +            return (0);
   1.275 +        }
   1.276 +    } else {
   1.277 +        dstrect = &dst->clip_rect;
   1.278 +    }
   1.279 +
   1.280 +    /* Perform software fill */
   1.281 +    if (!dst->pixels) {
   1.282 +        SDL_SetError("SDL_FillRect(): You must lock the surface");
   1.283 +        return (-1);
   1.284 +    }
   1.285 +
   1.286 +    pixels =
   1.287 +        (Uint8 *) dst->pixels + dstrect->y * dst->pitch +
   1.288 +        dstrect->x * dst->format->BytesPerPixel;
   1.289 +
   1.290 +    switch (dst->format->BytesPerPixel) {
   1.291 +    case 1:
   1.292 +        {
   1.293 +            color |= (color << 8);
   1.294 +            color |= (color << 16);
   1.295 +#ifdef __SSE__
   1.296 +            if (SDL_HasSSE()) {
   1.297 +                SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w,
   1.298 +                                 dstrect->h);
   1.299 +                break;
   1.300 +            }
   1.301 +#endif
   1.302 +#ifdef __MMX__
   1.303 +            if (SDL_HasMMX()) {
   1.304 +                SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w,
   1.305 +                                 dstrect->h);
   1.306 +                break;
   1.307 +            }
   1.308 +#endif
   1.309 +            SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h);
   1.310 +            break;
   1.311 +        }
   1.312 +
   1.313 +    case 2:
   1.314 +        {
   1.315 +            color |= (color << 16);
   1.316 +#ifdef __SSE__
   1.317 +            if (SDL_HasSSE()) {
   1.318 +                SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w,
   1.319 +                                 dstrect->h);
   1.320 +                break;
   1.321 +            }
   1.322 +#endif
   1.323 +#ifdef __MMX__
   1.324 +            if (SDL_HasMMX()) {
   1.325 +                SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w,
   1.326 +                                 dstrect->h);
   1.327 +                break;
   1.328 +            }
   1.329 +#endif
   1.330 +            SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h);
   1.331 +            break;
   1.332 +        }
   1.333 +
   1.334 +    case 3:
   1.335 +        /* 24-bit RGB is a slow path, at least for now. */
   1.336 +        {
   1.337 +            SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h);
   1.338 +            break;
   1.339 +        }
   1.340 +
   1.341 +    case 4:
   1.342 +        {
   1.343 +#ifdef __SSE__
   1.344 +            if (SDL_HasSSE()) {
   1.345 +                SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w,
   1.346 +                                 dstrect->h);
   1.347 +                break;
   1.348 +            }
   1.349 +#endif
   1.350 +#ifdef __MMX__
   1.351 +            if (SDL_HasMMX()) {
   1.352 +                SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w,
   1.353 +                                 dstrect->h);
   1.354 +                break;
   1.355 +            }
   1.356 +#endif
   1.357 +            SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h);
   1.358 +            break;
   1.359 +        }
   1.360 +    }
   1.361 +
   1.362 +    SDL_UnlockSurface(dst);
   1.363 +
   1.364 +    /* We're done! */
   1.365 +    return (0);
   1.366 +}
   1.367 +
   1.368 +/* vi: set ts=4 sw=4 expandtab: */