Skip to content
This repository has been archived by the owner on Feb 11, 2021. It is now read-only.

Commit

Permalink
Fixed crash with movaps instruction in SDL_memcpy(), due to unaligned…
Browse files Browse the repository at this point in the history
… Uint32* cast and -O3 vectorization optimizations with gcc 4.9.0
  • Loading branch information
slouken committed May 27, 2013
1 parent 1763e2f commit 0823b69
Showing 1 changed file with 39 additions and 19 deletions.
58 changes: 39 additions & 19 deletions src/stdlib/SDL_string.c
Expand Up @@ -310,29 +310,49 @@ void *SDL_memcpy(void *dst, const void *src, size_t len) { return SDL_memcpy_inl
void *
SDL_memcpy(void *dst, const void *src, size_t len)
{
size_t left = (len % 4);
Uint32 *srcp4, *dstp4;
Uint8 *srcp1, *dstp1;
#ifdef __GNUC__
/* Presumably this is well tuned for speed.
On my machine this is twice as fast as the C code below.
*/
return __builtin_memcpy(dst, src, len);
#else
/* GCC 4.9.0 with -O3 will generate movaps instructions with the loop
using Uint32* pointers, so we need to make sure the pointers are
aligned before we loop using them.
*/
if (((intptr_t)src & 0x3) || ((intptr_t)dst & 0x3)) {
/* Do an unaligned byte copy */
Uint8 *srcp1 = (Uint8 *)src;
Uint8 *dstp1 = (Uint8 *)dst;

srcp4 = (Uint32 *) src;
dstp4 = (Uint32 *) dst;
len /= 4;
while (len--) {
*dstp4++ = *srcp4++;
}
while (len--) {
*dstp1++ = *srcp1++;
}
} else {
size_t left = (len % 4);
Uint32 *srcp4, *dstp4;
Uint8 *srcp1, *dstp1;

srcp1 = (Uint8 *) srcp4;
dstp1 = (Uint8 *) dstp4;
switch (left) {
case 3:
*dstp1++ = *srcp1++;
case 2:
*dstp1++ = *srcp1++;
case 1:
*dstp1++ = *srcp1++;
}
srcp4 = (Uint32 *) src;
dstp4 = (Uint32 *) dst;
len /= 4;
while (len--) {
*dstp4++ = *srcp4++;
}

srcp1 = (Uint8 *) srcp4;
dstp1 = (Uint8 *) dstp4;
switch (left) {
case 3:
*dstp1++ = *srcp1++;
case 2:
*dstp1++ = *srcp1++;
case 1:
*dstp1++ = *srcp1++;
}
}
return dst;
#endif /* __GNUC__ */
}
#endif

Expand Down

0 comments on commit 0823b69

Please sign in to comment.