From c91f0539529434e843806bef4b4432c2d24be7e6 Mon Sep 17 00:00:00 2001 From: Sam Lantinga Date: Wed, 16 Feb 2011 15:25:10 -0800 Subject: [PATCH] Fixed bug #1090 (SDL_BlitCopyOverlap() assumes memcpy() operates in order) Even if we're blitting between two different surfaces their pixels might still overlap, because of SDL_CreateRGBSurfaceFrom(), so always use SDL_BlitCopy() and check for overlap in that function. When handling overlapping surfaces, don't assume that memcpy() iterates forward, instead use memmove() correctly, and provide a fallback implementation of SDL_memmove() that handles the different cases. Fixed a bug with SDL_memset() not completely filling lengths that aren't a multiple of 4. Optimized SDL_memcpy() a bit using the same technique as SDL_memset(). --- include/SDL_stdinc.h | 56 ++++---------------------- src/stdlib/SDL_string.c | 84 ++++++++++++++++++++++++--------------- src/video/SDL_blit.c | 7 +--- src/video/SDL_blit_copy.c | 41 ++++++++----------- src/video/SDL_blit_copy.h | 1 - 5 files changed, 77 insertions(+), 112 deletions(-) diff --git a/include/SDL_stdinc.h b/include/SDL_stdinc.h index e06656a62..9f134c703 100644 --- a/include/SDL_stdinc.h +++ b/include/SDL_stdinc.h @@ -352,8 +352,8 @@ do { \ #endif /* We can count on memcpy existing on Mac OS X and being well-tuned. */ -#if defined(__MACH__) && defined(__APPLE__) -#define SDL_memcpy(dst, src, len) memcpy(dst, src, len) +#if defined(__MACOSX__) +#define SDL_memcpy memcpy #elif defined(__GNUC__) && defined(i386) #define SDL_memcpy(dst, src, len) \ do { \ @@ -385,8 +385,8 @@ extern DECLSPEC void *SDLCALL SDL_memcpy(void *dst, const void *src, #endif /* We can count on memcpy existing on Mac OS X and being well-tuned. */ -#if defined(__MACH__) && defined(__APPLE__) -#define SDL_memcpy4(dst, src, len) memcpy(dst, src, (len)*4) +#if defined(__MACOSX__) +#define SDL_memcpy4(dst, src, len) SDL_memcpy((dst), (src), (len) << 2) #elif defined(__GNUC__) && defined(i386) #define SDL_memcpy4(dst, src, len) \ do { \ @@ -400,54 +400,14 @@ do { \ } while(0) #endif #ifndef SDL_memcpy4 -#define SDL_memcpy4(dst, src, len) SDL_memcpy(dst, src, (len) << 2) -#endif - -#if defined(__GNUC__) && defined(i386) -#define SDL_revcpy(dst, src, len) \ -do { \ - int u0, u1, u2; \ - char *dstp = SDL_static_cast(char *, dst); \ - char *srcp = SDL_static_cast(char *, src); \ - int n = (len); \ - if ( n >= 4 ) { \ - __asm__ __volatile__ ( \ - "std\n\t" \ - "rep ; movsl\n\t" \ - "cld\n\t" \ - : "=&c" (u0), "=&D" (u1), "=&S" (u2) \ - : "0" (n >> 2), \ - "1" (dstp+(n-4)), "2" (srcp+(n-4)) \ - : "memory" ); \ - } \ - switch (n & 3) { \ - case 3: dstp[2] = srcp[2]; \ - case 2: dstp[1] = srcp[1]; \ - case 1: dstp[0] = srcp[0]; \ - break; \ - default: \ - break; \ - } \ -} while(0) -#endif -#ifndef SDL_revcpy -extern DECLSPEC void *SDLCALL SDL_revcpy(void *dst, const void *src, - size_t len); +#define SDL_memcpy4(dst, src, len) SDL_memcpy((dst), (src), (len) << 2) #endif #ifdef HAVE_MEMMOVE #define SDL_memmove memmove -#elif defined(HAVE_BCOPY) -#define SDL_memmove(d, s, n) bcopy((s), (d), (n)) -#else -#define SDL_memmove(dst, src, len) \ -do { \ - if ( dst < src ) { \ - SDL_memcpy(dst, src, len); \ - } else { \ - SDL_revcpy(dst, src, len); \ - } \ -} while(0) +#else +extern DECLSPEC void *SDLCALL SDL_memmove(void *dst, const void *src, + size_t len); #endif #ifdef HAVE_MEMCMP diff --git a/src/stdlib/SDL_string.c b/src/stdlib/SDL_string.c index a8e7c5067..00d351d3a 100644 --- a/src/stdlib/SDL_string.c +++ b/src/stdlib/SDL_string.c @@ -265,31 +265,27 @@ void * SDL_memset(void *dst, int c, size_t len) { size_t left = (len % 4); - if (len >= 4) { - Uint32 value = 0; - Uint32 *dstp = (Uint32 *) dst; - int i; - for (i = 0; i < 4; ++i) { - value <<= 8; - value |= c; - } - len /= 4; - while (len--) { - *dstp++ = value; - } + Uint32 *dstp4; + Uint8 *dstp1; + Uint32 value4 = (c | (c << 8) | (c << 16) | (c << 24)); + Uint8 value1 = (Uint8) c; + + dstp4 = (Uint32 *) dst; + len /= 4; + while (len--) { + *dstp4++ = value4; } - if (left > 0) { - Uint8 value = (Uint8) c; - Uint8 *dstp = (Uint8 *) dst; - switch (left) { - case 3: - *dstp++ = value; - case 2: - *dstp++ = value; - case 1: - *dstp++ = value; - } + + dstp1 = (Uint8 *) dstp4; + switch (left) { + case 3: + *dstp1++ = value1; + case 2: + *dstp1++ = value1; + case 1: + *dstp1++ = value1; } + return dst; } #endif @@ -298,25 +294,49 @@ SDL_memset(void *dst, int c, size_t len) void * SDL_memcpy(void *dst, const void *src, size_t len) { - char *srcp = (char *) src; - char *dstp = (char *) dst; + size_t left = (len % 4); + Uint32 *srcp4, *dstp4; + Uint8 *srcp1, *dstp1; + + srcp4 = (Uint32 *) src; + dstp4 = (Uint32 *) dst; + len /= 4; while (len--) { - *dstp++ = *srcp++; + *dstp4++ = *srcp4++; + } + + srcp1 = (Uint8 *) srcp4; + dstp1 = (Uint8 *) dstp4; + switch (left) { + case 3: + *dstp1++ = *srcp1++; + case 2: + *dstp1++ = *srcp1++; + case 1: + *dstp1++ = *srcp1++; } + return dst; } #endif -#ifndef SDL_revcpy +#ifndef SDL_memmove void * -SDL_revcpy(void *dst, const void *src, size_t len) +SDL_memmove(void *dst, const void *src, size_t len) { char *srcp = (char *) src; char *dstp = (char *) dst; - srcp += len - 1; - dstp += len - 1; - while (len--) { - *dstp-- = *srcp--; + + if (src < dst) { + srcp += len - 1; + dstp += len - 1; + while (len--) { + *dstp-- = *srcp--; + } + } else { + while (len--) { + *dstp++ = *srcp++; + } } return dst; } diff --git a/src/video/SDL_blit.c b/src/video/SDL_blit.c index 1040d63e2..f1bcbfee5 100644 --- a/src/video/SDL_blit.c +++ b/src/video/SDL_blit.c @@ -205,12 +205,7 @@ SDL_CalculateBlit(SDL_Surface * surface) /* Choose a standard blit function */ if (map->identity && !(map->info.flags & ~SDL_COPY_RLE_DESIRED)) { - /* Handle overlapping blits on the same surface */ - if (surface == dst) { - blit = SDL_BlitCopyOverlap; - } else { - blit = SDL_BlitCopy; - } + blit = SDL_BlitCopy; } else if (surface->format->BitsPerPixel < 8) { blit = SDL_CalculateBlit0(surface); } else if (surface->format->BytesPerPixel == 1) { diff --git a/src/video/SDL_blit_copy.c b/src/video/SDL_blit_copy.c index c23d6bba4..f750d2e32 100644 --- a/src/video/SDL_blit_copy.c +++ b/src/video/SDL_blit_copy.c @@ -96,6 +96,7 @@ SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len) void SDL_BlitCopy(SDL_BlitInfo * info) { + SDL_bool overlap; Uint8 *src, *dst; int w, h; int srcskip, dstskip; @@ -107,6 +108,21 @@ SDL_BlitCopy(SDL_BlitInfo * info) srcskip = info->src_pitch; dstskip = info->dst_pitch; + /* Properly handle overlapping blits */ + if (src < dst) { + overlap = (dst < (src + h*srcskip)); + } else { + overlap = (src < (dst + h*dstskip)); + } + if (overlap) { + while (h--) { + SDL_memmove(dst, src, w); + src += srcskip; + dst += dstskip; + } + return; + } + #ifdef __SSE__ if (SDL_HasSSE() && !((uintptr_t) src & 15) && !(srcskip & 15) && @@ -141,29 +157,4 @@ SDL_BlitCopy(SDL_BlitInfo * info) } } -void -SDL_BlitCopyOverlap(SDL_BlitInfo * info) -{ - Uint8 *src, *dst; - int w, h; - int skip; - - w = info->dst_w * info->dst_fmt->BytesPerPixel; - h = info->dst_h; - src = info->src; - dst = info->dst; - skip = info->src_pitch; - if ((dst < src) || (dst >= (src + h * skip))) { - SDL_BlitCopy(info); - } else { - src += ((h - 1) * skip); - dst += ((h - 1) * skip); - while (h--) { - SDL_revcpy(dst, src, w); - src -= skip; - dst -= skip; - } - } -} - /* vi: set ts=4 sw=4 expandtab: */ diff --git a/src/video/SDL_blit_copy.h b/src/video/SDL_blit_copy.h index 8817e5045..068d5e0b6 100644 --- a/src/video/SDL_blit_copy.h +++ b/src/video/SDL_blit_copy.h @@ -21,6 +21,5 @@ */ void SDL_BlitCopy(SDL_BlitInfo * info); -void SDL_BlitCopyOverlap(SDL_BlitInfo * info); /* vi: set ts=4 sw=4 expandtab: */