From 77a223741a276580cf80ad2165d728a3fd4451fb Mon Sep 17 00:00:00 2001 From: Sam Lantinga Date: Fri, 6 Sep 2019 08:50:40 -0700 Subject: [PATCH] SDL_blit_N.c: Correct vec_perm() application on little-endian 64-bit PowerPC The LE transformation for vec_perm has an implicit assumption that the permutation is being used to reorder vector elements (in this case 4-byte integer word elements), not to reorder bytes within those elements. Although this is legal behavior, it is not anticipated by the transformation performed by the compilers. This causes pygame-1.9.1 test failure on PPC64LE because blitted pixmaps are corrupted there due to how SDL uses vec_perm(). From RedHat / Fedora: https://bugzilla.redhat.com/show_bug.cgi?id=1392465 Original patch was provided by: Menanteau Guy --- src/video/SDL_blit_N.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c index 146a10007..cfd3e8008 100644 --- a/src/video/SDL_blit_N.c +++ b/src/video/SDL_blit_N.c @@ -146,6 +146,34 @@ static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt, return(vswiz); } +#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN) +/* reorder bytes for PowerPC little endian */ +static vector unsigned char reorder_ppc64le_vec(vector unsigned char vpermute) +{ + /* The result vector of calc_swizzle32 reorder bytes using vec_perm. + The LE transformation for vec_perm has an implicit assumption + that the permutation is being used to reorder vector elements, + not to reorder bytes within those elements. + Unfortunatly the result order is not the expected one for powerpc + little endian when the two first vector parameters of vec_perm are + not of type 'vector char'. This is because the numbering from the + left for BE, and numbering from the right for LE, produces a + different interpretation of what the odd and even lanes are. + Refer to fedora bug 1392465 + */ + + const vector unsigned char ppc64le_reorder = VECUINT8_LITERAL( + 0x01, 0x00, 0x03, 0x02, + 0x05, 0x04, 0x07, 0x06, + 0x09, 0x08, 0x0B, 0x0A, + 0x0D, 0x0C, 0x0F, 0x0E ); + + vector unsigned char vswiz_ppc64le; + vswiz_ppc64le = vec_perm(vpermute, vpermute, ppc64le_reorder); + return(vswiz_ppc64le); +} +#endif + static void Blit_RGB888_RGB565(SDL_BlitInfo *info); static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) { int height = info->d_height; @@ -630,6 +658,10 @@ static void Blit32to32KeyAltivec(SDL_BlitInfo *info) /* vsel is set for items that match the key */ vsel = (vector unsigned char)vec_and(vs, vrgbmask); vsel = (vector unsigned char)vec_cmpeq(vs, vckey); +#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN) + /* reorder bytes for PowerPC little endian */ + vpermute = reorder_ppc64le_vec(vpermute); +#endif /* permute the src vec to the dest format */ vs = vec_perm(vs, valpha, vpermute); /* load the destination vec */ @@ -706,6 +738,10 @@ static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info) src += 4; width -= 4; vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */ +#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN) + /* reorder bytes for PowerPC little endian */ + vpermute = reorder_ppc64le_vec(vpermute); +#endif vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */ vec_st(vbits, 0, dst); /* store it back out. */ dst += 4; @@ -792,6 +828,10 @@ static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info) src += 4; width -= 4; vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */ +#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN) + /* reorder bytes for PowerPC little endian */ + vpermute = reorder_ppc64le_vec(vpermute); +#endif vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */ vec_st(vbits, 0, dst); /* store it back out. */ dst += 4;