From 1946a4e0577d9451c4b5b20d1f345e2764f168a1 Mon Sep 17 00:00:00 2001 From: Sam Lantinga Date: Thu, 16 Aug 2007 21:43:19 +0000 Subject: [PATCH] Added notes on the next steps for SDL 1.3 Moved fill and copy routines to their own files. --- NOTES | 10 + src/video/SDL_blit.c | 2 +- src/video/{SDL_blit_copy.c => SDL_copy.c} | 56 ++-- src/video/{SDL_blit_copy.h => SDL_copy.h} | 0 src/video/SDL_fill.c | 365 ++++++++++++++++++++++ src/video/SDL_surface.c | 336 -------------------- 6 files changed, 405 insertions(+), 364 deletions(-) rename src/video/{SDL_blit_copy.c => SDL_copy.c} (97%) rename src/video/{SDL_blit_copy.h => SDL_copy.h} (100%) create mode 100644 src/video/SDL_fill.c diff --git a/NOTES b/NOTES index e0c512605..49dd2d058 100644 --- a/NOTES +++ b/NOTES @@ -157,6 +157,16 @@ Change textures to static/streaming. Static textures are not lockable, streaming textures are lockable and may have system memory pixels available. SDL_compat will use a streaming video texture, and will never be HWSURFACE, but may be PREALLOC, if system memory pixels are available. +*** DONE Thu Aug 16 14:18:42 PDT 2007 The software renderer will be abstracted so the surface management can be used by any renderer that provides functions to copy surfaces to the window. + +Blitters... +---- +Copy blit and fill rect are optimized with MMX and SSE now. + +Here are the pieces we still need: +- Merging SDL texture capabilities into the SDL surface system +- Generic fallback blitter architecture +- Custom fast path blitters diff --git a/src/video/SDL_blit.c b/src/video/SDL_blit.c index cbc7e3f1f..d3384a761 100644 --- a/src/video/SDL_blit.c +++ b/src/video/SDL_blit.c @@ -24,7 +24,7 @@ #include "SDL_video.h" #include "SDL_sysvideo.h" #include "SDL_blit.h" -#include "SDL_blit_copy.h" +#include "SDL_copy.h" #include "SDL_RLEaccel_c.h" #include "SDL_pixels_c.h" diff --git a/src/video/SDL_blit_copy.c b/src/video/SDL_copy.c similarity index 97% rename from src/video/SDL_blit_copy.c rename to src/video/SDL_copy.c index 01be9ecc5..7fd902ca9 100644 --- a/src/video/SDL_blit_copy.c +++ b/src/video/SDL_copy.c @@ -23,10 +23,38 @@ #include "SDL_video.h" #include "SDL_blit.h" -#include "SDL_blit_copy.h" +#include "SDL_copy.h" +#ifdef __SSE__ +/* This assumes 16-byte aligned src and dst */ +static __inline__ void +SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len) +{ + int i; + + __m128 values[4]; + for (i = len / 64; i--;) { + _mm_prefetch(src, _MM_HINT_NTA); + values[0] = *(__m128 *) (src + 0); + values[1] = *(__m128 *) (src + 16); + values[2] = *(__m128 *) (src + 32); + values[3] = *(__m128 *) (src + 48); + _mm_stream_ps((float *) (dst + 0), values[0]); + _mm_stream_ps((float *) (dst + 16), values[1]); + _mm_stream_ps((float *) (dst + 32), values[2]); + _mm_stream_ps((float *) (dst + 48), values[3]); + src += 64; + dst += 64; + } + + if (len & 63) + SDL_memcpy(dst, src, len & 63); +} +#endif /* __SSE__ */ + #ifdef __MMX__ +/* This assumes 8-byte aligned src and dst */ static __inline__ void SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len) { @@ -60,32 +88,6 @@ SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len) } #endif /* __MMX__ */ -#ifdef __SSE__ -static __inline__ void -SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len) -{ - int i; - - __m128 values[4]; - for (i = len / 64; i--;) { - _mm_prefetch(src, _MM_HINT_NTA); - values[0] = *(__m128 *) (src + 0); - values[1] = *(__m128 *) (src + 16); - values[2] = *(__m128 *) (src + 32); - values[3] = *(__m128 *) (src + 48); - _mm_stream_ps((float *) (dst + 0), values[0]); - _mm_stream_ps((float *) (dst + 16), values[1]); - _mm_stream_ps((float *) (dst + 32), values[2]); - _mm_stream_ps((float *) (dst + 48), values[3]); - src += 64; - dst += 64; - } - - if (len & 63) - SDL_memcpy(dst, src, len & 63); -} -#endif /* __SSE__ */ - void SDL_BlitCopy(SDL_BlitInfo * info) { diff --git a/src/video/SDL_blit_copy.h b/src/video/SDL_copy.h similarity index 100% rename from src/video/SDL_blit_copy.h rename to src/video/SDL_copy.h diff --git a/src/video/SDL_fill.c b/src/video/SDL_fill.c new file mode 100644 index 000000000..ce74e286d --- /dev/null +++ b/src/video/SDL_fill.c @@ -0,0 +1,365 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2006 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +#include "SDL_video.h" +#include "SDL_blit.h" + + +#ifdef __SSE__ +/* *INDENT-OFF* */ + +#ifdef _MSC_VER +#define SSE_BEGIN \ + __m128 c128; \ + c128.m128_u32[0] = color; \ + c128.m128_u32[1] = color; \ + c128.m128_u32[2] = color; \ + c128.m128_u32[3] = color; +#else +#define SSE_BEGIN \ + DECLARE_ALIGNED(Uint32, cccc[4], 16); \ + cccc[0] = color; \ + cccc[1] = color; \ + cccc[2] = color; \ + cccc[3] = color; \ + __m128 c128 = *(__m128 *)cccc; +#endif + +#define SSE_WORK \ + for (i = n / 64; i--;) { \ + _mm_stream_ps((float *)(p+0), c128); \ + _mm_stream_ps((float *)(p+16), c128); \ + _mm_stream_ps((float *)(p+32), c128); \ + _mm_stream_ps((float *)(p+48), c128); \ + p += 64; \ + } + +#define SSE_END + +#define DEFINE_SSE_FILLRECT(bpp, type) \ +static void \ +SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \ +{ \ + SSE_BEGIN; \ + \ + while (h--) { \ + int i, n = w * bpp; \ + Uint8 *p = pixels; \ + \ + if (n > 15) { \ + int adjust = 16 - ((uintptr_t)p & 15); \ + if (adjust < 16) { \ + n -= adjust; \ + adjust /= bpp; \ + while(adjust--) { \ + *((type *)p) = (type)color; \ + p += bpp; \ + } \ + } \ + SSE_WORK; \ + } \ + if (n & 63) { \ + int remainder = (n & 63); \ + remainder /= bpp; \ + while(remainder--) { \ + *((type *)p) = (type)color; \ + p += bpp; \ + } \ + } \ + pixels += pitch; \ + } \ + \ + SSE_END; \ +} + +DEFINE_SSE_FILLRECT(1, Uint8) +DEFINE_SSE_FILLRECT(2, Uint16) +DEFINE_SSE_FILLRECT(4, Uint32) + +/* *INDENT-ON* */ +#endif /* __SSE__ */ + +#ifdef __MMX__ +/* *INDENT-OFF* */ + +#define MMX_BEGIN \ + __m64 c64 = _mm_set_pi32(color, color) + +#define MMX_WORK \ + for (i = n / 64; i--;) { \ + _mm_stream_pi((__m64 *)(p+0), c64); \ + _mm_stream_pi((__m64 *)(p+8), c64); \ + _mm_stream_pi((__m64 *)(p+16), c64); \ + _mm_stream_pi((__m64 *)(p+24), c64); \ + _mm_stream_pi((__m64 *)(p+32), c64); \ + _mm_stream_pi((__m64 *)(p+40), c64); \ + _mm_stream_pi((__m64 *)(p+48), c64); \ + _mm_stream_pi((__m64 *)(p+56), c64); \ + p += 64; \ + } + +#define MMX_END \ + _mm_empty() + +#define DEFINE_MMX_FILLRECT(bpp, type) \ +static void \ +SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \ +{ \ + MMX_BEGIN; \ + \ + while (h--) { \ + int i, n = w * bpp; \ + Uint8 *p = pixels; \ + \ + if (n > 7) { \ + int adjust = 8 - ((uintptr_t)p & 7); \ + if (adjust < 8) { \ + n -= adjust; \ + adjust /= bpp; \ + while(adjust--) { \ + *((type *)p) = (type)color; \ + p += bpp; \ + } \ + } \ + MMX_WORK; \ + } \ + if (n & 63) { \ + int remainder = (n & 63); \ + remainder /= bpp; \ + while(remainder--) { \ + *((type *)p) = (type)color; \ + p += bpp; \ + } \ + } \ + pixels += pitch; \ + } \ + \ + MMX_END; \ +} + +DEFINE_MMX_FILLRECT(1, Uint8) +DEFINE_MMX_FILLRECT(2, Uint16) +DEFINE_MMX_FILLRECT(4, Uint32) + +/* *INDENT-ON* */ +#endif /* __MMX__ */ + +static void +SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h) +{ + while (h--) { + int n = w; + Uint8 *p = pixels; + + if (n > 3) { + switch ((uintptr_t) p & 3) { + case 1: + *p++ = (Uint8) color; + --n; + case 2: + *p++ = (Uint8) color; + --n; + case 3: + *p++ = (Uint8) color; + --n; + } + SDL_memset4(p, color, (n >> 2)); + } + if (n & 3) { + p += (n & ~3); + switch (n & 3) { + case 3: + *p++ = (Uint8) color; + case 2: + *p++ = (Uint8) color; + case 1: + *p++ = (Uint8) color; + } + } + pixels += pitch; + } +} + +static void +SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h) +{ + while (h--) { + int n = w; + Uint16 *p = (Uint16 *) pixels; + + if (n > 1) { + if ((uintptr_t) p & 2) { + *p++ = (Uint16) color; + --n; + } + SDL_memset4(p, color, (n >> 1)); + } + if (n & 1) { + p[n - 1] = (Uint16) color; + } + pixels += pitch; + } +} + +static void +SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h) +{ + Uint8 r = (Uint8) ((color >> 16) & 0xFF); + Uint8 g = (Uint8) ((color >> 8) & 0xFF); + Uint8 b = (Uint8) (color & 0xFF); + + while (h--) { + int n = w; + Uint8 *p = pixels; + + while (n--) { + *p++ = r; + *p++ = g; + *p++ = b; + } + pixels += pitch; + } +} + +static void +SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h) +{ + while (h--) { + SDL_memset4(pixels, color, w); + pixels += pitch; + } +} + +/* + * This function performs a fast fill of the given rectangle with 'color' + */ +int +SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color) +{ + Uint8 *pixels; + + /* This function doesn't work on surfaces < 8 bpp */ + if (dst->format->BitsPerPixel < 8) { + SDL_SetError("SDL_FillRect(): Unsupported surface format"); + return (-1); + } + + /* If 'dstrect' == NULL, then fill the whole surface */ + if (dstrect) { + /* Perform clipping */ + if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) { + return (0); + } + } else { + dstrect = &dst->clip_rect; + } + + /* Perform software fill */ + if (!dst->pixels) { + SDL_SetError("SDL_FillRect(): You must lock the surface"); + return (-1); + } + + pixels = + (Uint8 *) dst->pixels + dstrect->y * dst->pitch + + dstrect->x * dst->format->BytesPerPixel; + + switch (dst->format->BytesPerPixel) { + case 1: + { + color |= (color << 8); + color |= (color << 16); +#ifdef __SSE__ + if (SDL_HasSSE()) { + SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w, + dstrect->h); + break; + } +#endif +#ifdef __MMX__ + if (SDL_HasMMX()) { + SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w, + dstrect->h); + break; + } +#endif + SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h); + break; + } + + case 2: + { + color |= (color << 16); +#ifdef __SSE__ + if (SDL_HasSSE()) { + SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w, + dstrect->h); + break; + } +#endif +#ifdef __MMX__ + if (SDL_HasMMX()) { + SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w, + dstrect->h); + break; + } +#endif + SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h); + break; + } + + case 3: + /* 24-bit RGB is a slow path, at least for now. */ + { + SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h); + break; + } + + case 4: + { +#ifdef __SSE__ + if (SDL_HasSSE()) { + SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w, + dstrect->h); + break; + } +#endif +#ifdef __MMX__ + if (SDL_HasMMX()) { + SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w, + dstrect->h); + break; + } +#endif + SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h); + break; + } + } + + SDL_UnlockSurface(dst); + + /* We're done! */ + return (0); +} + +/* vi: set ts=4 sw=4 expandtab: */ diff --git a/src/video/SDL_surface.c b/src/video/SDL_surface.c index fa1daaf0e..e3614cd70 100644 --- a/src/video/SDL_surface.c +++ b/src/video/SDL_surface.c @@ -509,342 +509,6 @@ SDL_UpperBlit(SDL_Surface * src, SDL_Rect * srcrect, return 0; } -#ifdef __SSE__ -/* *INDENT-OFF* */ - -#ifdef _MSC_VER -#define SSE_BEGIN \ - __m128 c128; \ - c128.m128_u32[0] = color; \ - c128.m128_u32[1] = color; \ - c128.m128_u32[2] = color; \ - c128.m128_u32[3] = color; -#else -#define SSE_BEGIN \ - DECLARE_ALIGNED(Uint32, cccc[4], 16); \ - cccc[0] = color; \ - cccc[1] = color; \ - cccc[2] = color; \ - cccc[3] = color; \ - __m128 c128 = *(__m128 *)cccc; -#endif - -#define SSE_WORK \ - for (i = n / 64; i--;) { \ - _mm_stream_ps((float *)(p+0), c128); \ - _mm_stream_ps((float *)(p+16), c128); \ - _mm_stream_ps((float *)(p+32), c128); \ - _mm_stream_ps((float *)(p+48), c128); \ - p += 64; \ - } - -#define SSE_END - -#define DEFINE_SSE_FILLRECT(bpp, type) \ -static void \ -SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \ -{ \ - SSE_BEGIN; \ - \ - while (h--) { \ - int i, n = w * bpp; \ - Uint8 *p = pixels; \ - \ - if (n > 15) { \ - int adjust = 16 - ((uintptr_t)p & 15); \ - if (adjust < 16) { \ - n -= adjust; \ - adjust /= bpp; \ - while(adjust--) { \ - *((type *)p) = (type)color; \ - p += bpp; \ - } \ - } \ - SSE_WORK; \ - } \ - if (n & 63) { \ - int remainder = (n & 63); \ - remainder /= bpp; \ - while(remainder--) { \ - *((type *)p) = (type)color; \ - p += bpp; \ - } \ - } \ - pixels += pitch; \ - } \ - \ - SSE_END; \ -} - -DEFINE_SSE_FILLRECT(1, Uint8) -DEFINE_SSE_FILLRECT(2, Uint16) -DEFINE_SSE_FILLRECT(4, Uint32) - -/* *INDENT-ON* */ -#endif /* __SSE__ */ - -#ifdef __MMX__ -/* *INDENT-OFF* */ - -#define MMX_BEGIN \ - __m64 c64 = _mm_set_pi32(color, color) - -#define MMX_WORK \ - for (i = n / 64; i--;) { \ - _mm_stream_pi((__m64 *)(p+0), c64); \ - _mm_stream_pi((__m64 *)(p+8), c64); \ - _mm_stream_pi((__m64 *)(p+16), c64); \ - _mm_stream_pi((__m64 *)(p+24), c64); \ - _mm_stream_pi((__m64 *)(p+32), c64); \ - _mm_stream_pi((__m64 *)(p+40), c64); \ - _mm_stream_pi((__m64 *)(p+48), c64); \ - _mm_stream_pi((__m64 *)(p+56), c64); \ - p += 64; \ - } - -#define MMX_END \ - _mm_empty() - -#define DEFINE_MMX_FILLRECT(bpp, type) \ -static void \ -SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \ -{ \ - MMX_BEGIN; \ - \ - while (h--) { \ - int i, n = w * bpp; \ - Uint8 *p = pixels; \ - \ - if (n > 7) { \ - int adjust = 8 - ((uintptr_t)p & 7); \ - if (adjust < 8) { \ - n -= adjust; \ - adjust /= bpp; \ - while(adjust--) { \ - *((type *)p) = (type)color; \ - p += bpp; \ - } \ - } \ - MMX_WORK; \ - } \ - if (n & 63) { \ - int remainder = (n & 63); \ - remainder /= bpp; \ - while(remainder--) { \ - *((type *)p) = (type)color; \ - p += bpp; \ - } \ - } \ - pixels += pitch; \ - } \ - \ - MMX_END; \ -} - -DEFINE_MMX_FILLRECT(1, Uint8) -DEFINE_MMX_FILLRECT(2, Uint16) -DEFINE_MMX_FILLRECT(4, Uint32) - -/* *INDENT-ON* */ -#endif /* __MMX__ */ - -static void -SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h) -{ - while (h--) { - int n = w; - Uint8 *p = pixels; - - if (n > 3) { - switch ((uintptr_t) p & 3) { - case 1: - *p++ = (Uint8) color; - --n; - case 2: - *p++ = (Uint8) color; - --n; - case 3: - *p++ = (Uint8) color; - --n; - } - SDL_memset4(p, color, (n >> 2)); - } - if (n & 3) { - p += (n & ~3); - switch (n & 3) { - case 3: - *p++ = (Uint8) color; - case 2: - *p++ = (Uint8) color; - case 1: - *p++ = (Uint8) color; - } - } - pixels += pitch; - } -} - -static void -SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h) -{ - while (h--) { - int n = w; - Uint16 *p = (Uint16 *) pixels; - - if (n > 1) { - if ((uintptr_t) p & 2) { - *p++ = (Uint16) color; - --n; - } - SDL_memset4(p, color, (n >> 1)); - } - if (n & 1) { - p[n - 1] = (Uint16) color; - } - pixels += pitch; - } -} - -static void -SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h) -{ - Uint8 r = (Uint8) (color & 0xFF); - Uint8 g = (Uint8) ((color >> 8) & 0xFF); - Uint8 b = (Uint8) ((color >> 16) & 0xFF); - - while (h--) { - int n = w; - Uint8 *p = pixels; - - while (n--) { - *p++ = r; - *p++ = g; - *p++ = b; - } - pixels += pitch; - } -} - -static void -SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h) -{ - while (h--) { - SDL_memset4(pixels, color, w); - pixels += pitch; - } -} - -/* - * This function performs a fast fill of the given rectangle with 'color' - */ -int -SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color) -{ - Uint8 *pixels; - - /* This function doesn't work on surfaces < 8 bpp */ - if (dst->format->BitsPerPixel < 8) { - SDL_SetError("Fill rect on unsupported surface format"); - return (-1); - } - - /* If 'dstrect' == NULL, then fill the whole surface */ - if (dstrect) { - /* Perform clipping */ - if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) { - return (0); - } - } else { - dstrect = &dst->clip_rect; - } - - /* Perform software fill */ - if (SDL_LockSurface(dst) != 0) { - return (-1); - } - - pixels = - (Uint8 *) dst->pixels + dstrect->y * dst->pitch + - dstrect->x * dst->format->BytesPerPixel; - - switch (dst->format->BytesPerPixel) { - case 1: - { - color |= (color << 8); - color |= (color << 16); -#ifdef __SSE__ - if (SDL_HasSSE()) { - SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w, - dstrect->h); - break; - } -#endif -#ifdef __MMX__ - if (SDL_HasMMX()) { - SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w, - dstrect->h); - break; - } -#endif - SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h); - break; - } - - case 2: - { - color |= (color << 16); -#ifdef __SSE__ - if (SDL_HasSSE()) { - SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w, - dstrect->h); - break; - } -#endif -#ifdef __MMX__ - if (SDL_HasMMX()) { - SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w, - dstrect->h); - break; - } -#endif - SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h); - break; - } - - case 3: - /* 24-bit RGB is a slow path, at least for now. */ - { - SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h); - break; - } - - case 4: - { -#ifdef __SSE__ - if (SDL_HasSSE()) { - SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w, - dstrect->h); - break; - } -#endif -#ifdef __MMX__ - if (SDL_HasMMX()) { - SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w, - dstrect->h); - break; - } -#endif - SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h); - break; - } - } - - SDL_UnlockSurface(dst); - - /* We're done! */ - return (0); -} - /* * Lock a surface to directly access the pixels */