Fix invalid memory access and optimise Blit_3or4_to_3or4__*
authorSylvain Becker <sylvain.becker@gmail.com>
Sun, 17 Feb 2019 16:20:23 +0100
changeset 12610587922004ea0
parent 12609 48e26b5d4f56
child 12612 07c39cbbeacf
Fix invalid memory access and optimise Blit_3or4_to_3or4__*

Fix invalid write at last pixel of the surface:
when surface has no padding (pitch == w * bpp) and bpp is 3
with Blit, no colorkey, and NO_ALPHA same or inverse rgb triplet

Optimise by using int32 access:

BGR24 -> ARGB8888 : faster x1.897875 (362405 -> 190953)
RGB24 -> ABGR8888 : faster x1.660416 (363304 -> 218803)

ABGR8888 -> RGB24 : faster x1.686319 (334962 -> 198635)
ARGB8888 -> BGR24 : faster x1.691868 (324524 -> 191814)
BGR24 -> RGB888 : faster x1.678459 (326811 -> 194709)
BGR888 -> RGB24 : faster x1.731772 (327724 -> 189242)
RGB24 -> BGR888 : faster x1.690989 (328916 -> 194511)
RGB888 -> BGR24 : faster x1.698333 (326175 -> 192056)
src/video/SDL_blit_N.c
     1.1 --- a/src/video/SDL_blit_N.c	Sat Feb 09 17:40:32 2019 +0100
     1.2 +++ b/src/video/SDL_blit_N.c	Sun Feb 17 16:20:23 2019 +0100
     1.3 @@ -2938,15 +2938,54 @@
     1.4      if (dstfmt->Amask) {
     1.5          /* SET_ALPHA */
     1.6          Uint32 mask = info->a << dstfmt->Ashift;
     1.7 +        int last_line = 0;
     1.8 +        if (srcbpp == 3 && height) {
     1.9 +            height -= 1;
    1.10 +            last_line = 1;
    1.11 +        }
    1.12 +
    1.13          while (height--) {
    1.14              /* *INDENT-OFF* */
    1.15              DUFFS_LOOP(
    1.16              {
    1.17                  Uint32  *dst32 = (Uint32*)dst;
    1.18 +                Uint32  *src32 = (Uint32*)src;
    1.19 +                *dst32 = *src32 | mask;
    1.20 +                dst += 4;
    1.21 +                src += srcbpp;
    1.22 +            }, width);
    1.23 +            /* *INDENT-ON* */
    1.24 +            src += srcskip;
    1.25 +            dst += dstskip;
    1.26 +        }
    1.27 +
    1.28 +        if (last_line) {
    1.29 +            while (width--) {
    1.30 +                Uint32  *dst32 = (Uint32*)dst;
    1.31                  Uint8 s0 = src[0];
    1.32                  Uint8 s1 = src[1];
    1.33                  Uint8 s2 = src[2];
    1.34                  *dst32 = (s0) | (s1 << 8) | (s2 << 16) | mask;
    1.35 +                dst += 4;
    1.36 +                src += srcbpp;
    1.37 +            }
    1.38 +        }
    1.39 +    } else {
    1.40 +        /* NO_ALPHA */
    1.41 +        int mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
    1.42 +        int last_line = 0;
    1.43 +        if ((dstbpp == 3 || srcbpp == 3) && height) {
    1.44 +            height -= 1;
    1.45 +            last_line = 1;
    1.46 +        }
    1.47 +
    1.48 +        while (height--) {
    1.49 +            /* *INDENT-OFF* */
    1.50 +            DUFFS_LOOP(
    1.51 +            {
    1.52 +                Uint32  *dst32 = (Uint32*)dst;
    1.53 +                Uint32  *src32 = (Uint32*)src;
    1.54 +                *dst32 = *src32 & mask;
    1.55                  dst += dstbpp;
    1.56                  src += srcbpp;
    1.57              }, width);
    1.58 @@ -2954,23 +2993,18 @@
    1.59              src += srcskip;
    1.60              dst += dstskip;
    1.61          }
    1.62 -    } else {
    1.63 -        /* NO_ALPHA */
    1.64 -        while (height--) {
    1.65 -            /* *INDENT-OFF* */
    1.66 -            DUFFS_LOOP(
    1.67 -            {
    1.68 -                Uint32  *dst32 = (Uint32*)dst;
    1.69 +
    1.70 +        if (last_line) {
    1.71 +            while (width--) {
    1.72                  Uint8 s0 = src[0];
    1.73                  Uint8 s1 = src[1];
    1.74                  Uint8 s2 = src[2];
    1.75 -                *dst32 = (s0) | (s1 << 8) | (s2 << 16);
    1.76 +                dst[0] = s0;
    1.77 +                dst[1] = s1;
    1.78 +                dst[2] = s2;
    1.79                  dst += dstbpp;
    1.80                  src += srcbpp;
    1.81 -            }, width);
    1.82 -            /* *INDENT-ON* */
    1.83 -            src += srcskip;
    1.84 -            dst += dstskip;
    1.85 +            }
    1.86          }
    1.87      }
    1.88      return;
    1.89 @@ -3036,6 +3070,12 @@
    1.90          }
    1.91      } else {
    1.92          /* NO_ALPHA */
    1.93 +        int last_line = 0;
    1.94 +        if (dstbpp == 3 && height) {
    1.95 +            height -= 1;
    1.96 +            last_line = 1;
    1.97 +        }
    1.98 +
    1.99          while (height--) {
   1.100              /* *INDENT-OFF* */
   1.101              DUFFS_LOOP(
   1.102 @@ -3053,6 +3093,20 @@
   1.103              src += srcskip;
   1.104              dst += dstskip;
   1.105          }
   1.106 +
   1.107 +        if (last_line) {
   1.108 +            while (width--) {
   1.109 +                Uint8 s0 = src[0];
   1.110 +                Uint8 s1 = src[1];
   1.111 +                Uint8 s2 = src[2];
   1.112 +                /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
   1.113 +                dst[0] = s2;
   1.114 +                dst[1] = s1;
   1.115 +                dst[2] = s0;
   1.116 +                dst += dstbpp;
   1.117 +                src += srcbpp;
   1.118 +            }
   1.119 +        }
   1.120      }
   1.121      return;
   1.122  }