Faster blit when using No Alpha or Set Alpha, + ColorKey
authorSylvain Becker <sylvain.becker@gmail.com>
Thu, 07 Feb 2019 17:52:28 +0100
changeset 1260122056748671d
parent 12600 be2c1d4ee0d1
child 12602 75f13d4bfa9a
Faster blit when using No Alpha or Set Alpha, + ColorKey
Applied to following formats:

ABGR8888 -> BGRX8888 : faster x4 (2794295 -> 610587)
ABGR8888 -> RGB888 : faster x4 (2835693 -> 615561)
ABGR8888 -> RGBX8888 : faster x4 (2880475 -> 610479)

ARGB8888 -> BGR888 : faster x4 (2802718 -> 610702)
ARGB8888 -> BGRX8888 : faster x4 (2792481 -> 606311)
ARGB8888 -> RGBX8888 : faster x4 (2821621 -> 624745)

BGR888 -> ARGB8888 : faster x4 (2791705 -> 637889)
BGR888 -> BGRA8888 : faster x4 (2793195 -> 652299)
BGR888 -> BGRX8888 : faster x4 (2800713 -> 609326)
BGR888 -> RGB888 : faster x4 (2812260 -> 610471)
BGR888 -> RGBA8888 : faster x4 (2792327 -> 629288)
BGR888 -> RGBX8888 : faster x4 (2799224 -> 607073)

BGRA8888 -> BGR888 : faster x4 (2800520 -> 606897)
BGRA8888 -> RGB888 : faster x4 (2825274 -> 616156)
BGRA8888 -> RGBX8888 : faster x4 (2812530 -> 610340)

BGRX8888 -> ABGR8888 : faster x4 (2793940 -> 628596)
BGRX8888 -> ARGB8888 : faster x4 (2822686 -> 638899)
BGRX8888 -> BGR888 : faster x4 (2818141 -> 613659)
BGRX8888 -> RGB888 : faster x4 (2929017 -> 611794)
BGRX8888 -> RGBA8888 : faster x4 (2799709 -> 629750)
BGRX8888 -> RGBX8888 : faster x4 (2911010 -> 605640)

RGB888 -> ABGR8888 : faster x4 (2800671 -> 631542)
RGB888 -> BGR888 : faster x4 (2802644 -> 604461)
RGB888 -> BGRA8888 : faster x4 (2801919 -> 628729)
RGB888 -> BGRX8888 : faster x4 (2938244 -> 604135)
RGB888 -> RGBA8888 : faster x4 (2912447 -> 642185)
RGB888 -> RGBX8888 : faster x4 (2831676 -> 634293)

RGBA8888 -> BGR888 : faster x4 (2928896 -> 614960)
RGBA8888 -> BGRX8888 : faster x4 (2821422 -> 608146)
RGBA8888 -> RGB888 : faster x4 (2825927 -> 617184)

RGBX8888 -> ABGR8888 : faster x4 (2803852 -> 654129)
RGBX8888 -> ARGB8888 : faster x4 (2923615 -> 642644)
RGBX8888 -> BGR888 : faster x4 (2806523 -> 610447)
RGBX8888 -> BGRA8888 : faster x4 (2813388 -> 630305)
RGBX8888 -> BGRX8888 : faster x4 (2800052 -> 607881)
RGBX8888 -> RGB888 : faster x4 (2807722 -> 610263)
src/video/SDL_blit_N.c
     1.1 --- a/src/video/SDL_blit_N.c	Thu Feb 07 16:13:25 2019 +0100
     1.2 +++ b/src/video/SDL_blit_N.c	Thu Feb 07 17:52:28 2019 +0100
     1.3 @@ -2379,6 +2379,123 @@
     1.4          }
     1.5      }
     1.6  
     1.7 +    /* Any src/dst 8888, not CopyAlpha, no ARGB2101010 */
     1.8 +    if (srcbpp == 4 && dstbpp == 4 &&
     1.9 +        srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
    1.10 +        dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
    1.11 +
    1.12 +        Uint32 *src32 = (Uint32*)src;
    1.13 +        Uint32 *dst32 = (Uint32*)dst;
    1.14 +        
    1.15 +        if (dstfmt->Amask) {
    1.16 +            /* Find the appropriate permutation */
    1.17 +            int missing = 0, r, g, b, a;
    1.18 +            int Pixel = 0x03020100;
    1.19 +            RGB_FROM_PIXEL(Pixel, srcfmt, r, g, b);
    1.20 +            PIXEL_FROM_RGBA(Pixel, dstfmt, r, g, b, 255);
    1.21 +            r = Pixel & 0xFF;
    1.22 +            g = (Pixel >> 8) & 0xFF;
    1.23 +            b = (Pixel >> 16) & 0xFF;
    1.24 +            a = (Pixel >> 24) & 0xFF;
    1.25 +
    1.26 +            {
    1.27 +                int val;
    1.28 +                for (val = 0; val <= 3; val++) {
    1.29 +                    if (r != val && g != val && b != val && a != val) {
    1.30 +                        missing = val;
    1.31 +                    }
    1.32 +                }
    1.33 +            }
    1.34 +
    1.35 +            if (r == 255) {
    1.36 +                r = missing;
    1.37 +                missing = 0;
    1.38 +            } else if (g == 255) {
    1.39 +                g = missing;
    1.40 +                missing = 1;
    1.41 +            } else if (b == 255) {
    1.42 +                b = missing;
    1.43 +                missing = 2;
    1.44 +            } else if (a == 255) {
    1.45 +                a = missing;
    1.46 +                missing = 3;
    1.47 +            }
    1.48 +
    1.49 +            while (height--) {
    1.50 +                /* *INDENT-OFF* */
    1.51 +                DUFFS_LOOP(
    1.52 +                {
    1.53 +                    if ((*src32 & rgbmask) != ckey) {
    1.54 +                        Uint8 *s8 = (Uint8 *)src32;
    1.55 +                        Uint8 *d8 = (Uint8 *)dst32;
    1.56 +                        d8[0] = s8[r];
    1.57 +                        d8[1] = s8[g];
    1.58 +                        d8[2] = s8[b];
    1.59 +                        d8[3] = s8[a];
    1.60 +                        d8[missing] = alpha;
    1.61 +                    }
    1.62 +                    ++src32;
    1.63 +                    ++dst32;
    1.64 +                }, width);
    1.65 +                /* *INDENT-ON* */
    1.66 +                src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
    1.67 +                dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
    1.68 +            }
    1.69 +        
    1.70 +            return;
    1.71 +        } else {
    1.72 +            /* Find the appropriate permutation */
    1.73 +            int missing = 0, r, g, b, a;
    1.74 +            int Pixel = 0x04030201; /* +1 */
    1.75 +            RGBA_FROM_PIXEL(Pixel, srcfmt, r, g, b, a);
    1.76 +            missing = a;
    1.77 +            PIXEL_FROM_RGB(Pixel, dstfmt, r, g, b);
    1.78 +            r = Pixel & 0xFF;
    1.79 +            g = (Pixel >> 8) & 0xFF;
    1.80 +            b = (Pixel >> 16) & 0xFF;
    1.81 +            a = (Pixel >> 24) & 0xFF;
    1.82 +
    1.83 +            if (r == 0) {
    1.84 +                r = missing;
    1.85 +                missing = 0;
    1.86 +            } else if (g == 0) {
    1.87 +                g = missing;
    1.88 +                missing = 1;
    1.89 +            } else if (b == 0) {
    1.90 +                b = missing;
    1.91 +                missing = 2;
    1.92 +            } else if (a == 0) {
    1.93 +                a = missing;
    1.94 +                missing = 3;
    1.95 +            }
    1.96 +        
    1.97 +            /* -1 */
    1.98 +            r -= 1; g -= 1; b -= 1; a -= 1;
    1.99 +
   1.100 +            while (height--) {
   1.101 +                /* *INDENT-OFF* */
   1.102 +                DUFFS_LOOP(
   1.103 +                {
   1.104 +                    if ((*src32 & rgbmask) != ckey) {
   1.105 +                        Uint8 *s8 = (Uint8 *)src32;
   1.106 +                        Uint8 *d8 = (Uint8 *)dst32;
   1.107 +                        d8[0] = s8[r];
   1.108 +                        d8[1] = s8[g];
   1.109 +                        d8[2] = s8[b];
   1.110 +                        d8[3] = s8[a];
   1.111 +                        d8[missing] = 0;
   1.112 +                    }
   1.113 +                    ++src32;
   1.114 +                    ++dst32;
   1.115 +                }, width);
   1.116 +                /* *INDENT-ON* */
   1.117 +                src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
   1.118 +                dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
   1.119 +            }
   1.120 +            return;
   1.121 +        }
   1.122 +    }
   1.123 +
   1.124      /* BPP 3, same rgb triplet */
   1.125      if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_RGB24) ||
   1.126          (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_BGR24)) {