SDL_blit_N.c: Correct vec_perm() application on little-endian 64-bit PowerPC
authorSam Lantinga <slouken@libsdl.org>
Fri, 06 Sep 2019 08:50:19 -0700
changeset 130649aa7aea27dec
parent 13063 33bcd45f0447
child 13069 f0b633817471
SDL_blit_N.c: Correct vec_perm() application on little-endian 64-bit PowerPC

The LE transformation for vec_perm has an implicit assumption that the
permutation is being used to reorder vector elements (in this case 4-byte
integer word elements), not to reorder bytes within those elements. Although
this is legal behavior, it is not anticipated by the transformation performed
by the compilers.

This causes pygame-1.9.1 test failure on PPC64LE because blitted pixmaps are
corrupted there due to how SDL uses vec_perm().

From RedHat / Fedora: https://bugzilla.redhat.com/show_bug.cgi?id=1392465
Original patch was provided by: Menanteau Guy <menantea@linux.vnet.ibm.com>
src/video/SDL_blit_N.c
     1.1 --- a/src/video/SDL_blit_N.c	Fri Sep 06 08:44:46 2019 -0700
     1.2 +++ b/src/video/SDL_blit_N.c	Fri Sep 06 08:50:19 2019 -0700
     1.3 @@ -165,6 +165,34 @@
     1.4      return (vswiz);
     1.5  }
     1.6  
     1.7 +#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
     1.8 +/* reorder bytes for PowerPC little endian */
     1.9 +static vector unsigned char reorder_ppc64le_vec(vector unsigned char vpermute)
    1.10 +{
    1.11 +    /* The result vector of calc_swizzle32 reorder bytes using vec_perm.
    1.12 +       The LE transformation for vec_perm has an implicit assumption
    1.13 +       that the permutation is being used to reorder vector elements,
    1.14 +       not to reorder bytes within those elements.  
    1.15 +       Unfortunatly the result order is not the expected one for powerpc
    1.16 +       little endian when the two first vector parameters of vec_perm are
    1.17 +       not of type 'vector char'. This is because the numbering from the
    1.18 +       left for BE, and numbering from the right for LE, produces a
    1.19 +       different interpretation of what the odd and even lanes are.
    1.20 +       Refer to fedora bug 1392465
    1.21 +     */
    1.22 +
    1.23 +    const vector unsigned char ppc64le_reorder = VECUINT8_LITERAL(
    1.24 +                                      0x01, 0x00, 0x03, 0x02,
    1.25 +                                      0x05, 0x04, 0x07, 0x06,
    1.26 +                                      0x09, 0x08, 0x0B, 0x0A,
    1.27 +                                      0x0D, 0x0C, 0x0F, 0x0E );
    1.28 +
    1.29 +    vector unsigned char vswiz_ppc64le;
    1.30 +    vswiz_ppc64le = vec_perm(vpermute, vpermute, ppc64le_reorder);
    1.31 +    return(vswiz_ppc64le);
    1.32 +}
    1.33 +#endif
    1.34 +
    1.35  static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
    1.36  static void
    1.37  Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
    1.38 @@ -658,6 +686,10 @@
    1.39                  /* vsel is set for items that match the key */
    1.40                  vsel = (vector unsigned char) vec_and(vs, vrgbmask);
    1.41                  vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
    1.42 +#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
    1.43 +                /* reorder bytes for PowerPC little endian */
    1.44 +                vpermute = reorder_ppc64le_vec(vpermute);
    1.45 +#endif
    1.46                  /* permute the src vec to the dest format */
    1.47                  vs = vec_perm(vs, valpha, vpermute);
    1.48                  /* load the destination vec */
    1.49 @@ -737,6 +769,10 @@
    1.50              src += 4;
    1.51              width -= 4;
    1.52              vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
    1.53 +#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
    1.54 +            /* reorder bytes for PowerPC little endian */
    1.55 +            vpermute = reorder_ppc64le_vec(vpermute);
    1.56 +#endif
    1.57              vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
    1.58              vec_st(vbits, 0, dst);      /* store it back out. */
    1.59              dst += 4;
    1.60 @@ -828,6 +864,10 @@
    1.61              src += 4;
    1.62              width -= 4;
    1.63              vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
    1.64 +#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
    1.65 +            /* reorder bytes for PowerPC little endian */
    1.66 +            vpermute = reorder_ppc64le_vec(vpermute);
    1.67 +#endif
    1.68              vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
    1.69              vec_st(vbits, 0, dst);      /* store it back out. */
    1.70              dst += 4;