Faster blit with no ColorKey
authorSylvain Becker <sylvain.becker@gmail.com>
Thu, 07 Feb 2019 18:51:14 +0100
changeset 1260275f13d4bfa9a
parent 12601 22056748671d
child 12603 0e163b7a3a2d
Faster blit with no ColorKey
Applied to following formats:

ABGR8888 -> BGRX8888 : faster x5 (3177493 -> 630439)
ABGR8888 -> RGBX8888 : faster x5 (3178104 -> 628925)

ARGB8888 -> BGRX8888 : faster x4 (3141089 -> 629448)
ARGB8888 -> RGBX8888 : faster x5 (3216413 -> 630465)

BGR888 -> BGRA8888 : faster x4 (3145403 -> 637701)
BGR888 -> BGRX8888 : faster x4 (3142106 -> 630144)
BGR888 -> RGBA8888 : faster x4 (3202685 -> 649384)
BGR888 -> RGBX8888 : faster x4 (3170617 -> 658670)

BGRA8888 -> BGR888 : faster x4 (3203308 -> 657697)
BGRA8888 -> RGB888 : faster x5 (3201475 -> 631747)
BGRA8888 -> RGBX8888 : faster x5 (3274544 -> 630409)

BGRX8888 -> ABGR8888 : faster x4 (3149753 -> 638682)
BGRX8888 -> ARGB8888 : faster x5 (3164101 -> 631273)
BGRX8888 -> BGR888 : faster x4 (3144454 -> 630712)
BGRX8888 -> RGB888 : faster x4 (3160490 -> 638047)
BGRX8888 -> RGBA8888 : faster x5 (3308988 -> 631232)
BGRX8888 -> RGBX8888 : faster x5 (3216775 -> 638065)

RGB888 -> BGRA8888 : faster x4 (3143135 -> 655146)
RGB888 -> BGRX8888 : faster x4 (3141790 -> 653771)
RGB888 -> RGBA8888 : faster x5 (3214402 -> 637001)
RGB888 -> RGBX8888 : faster x4 (3143082 -> 630009)

RGBA8888 -> BGR888 : faster x3 (3157048 -> 920375)
RGBA8888 -> BGRX8888 : faster x5 (3196692 -> 632996)
RGBA8888 -> RGB888 : faster x4 (3141570 -> 652151)

RGBX8888 -> ABGR8888 : faster x5 (3175401 -> 631218)
RGBX8888 -> ARGB8888 : faster x4 (3144690 -> 639440)
RGBX8888 -> BGR888 : faster x4 (3144250 -> 630171)
RGBX8888 -> BGRA8888 : faster x5 (3220321 -> 630731)
RGBX8888 -> BGRX8888 : faster x4 (3178453 -> 637445)
RGBX8888 -> RGB888 : faster x5 (3203623 -> 632596)
src/video/SDL_blit_N.c
     1.1 --- a/src/video/SDL_blit_N.c	Thu Feb 07 17:52:28 2019 +0100
     1.2 +++ b/src/video/SDL_blit_N.c	Thu Feb 07 18:51:14 2019 +0100
     1.3 @@ -2163,7 +2163,77 @@
     1.4      SDL_PixelFormat *dstfmt = info->dst_fmt;
     1.5      int dstbpp = dstfmt->BytesPerPixel;
     1.6      unsigned alpha = dstfmt->Amask ? info->a : 0;
     1.7 +    
     1.8 +    /* Any src/dst 8888 for CopyAlpha, no ARGB2101010 */
     1.9 +    if (srcbpp == 4 && dstbpp == 4 &&
    1.10 +        srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
    1.11 +        dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
    1.12  
    1.13 +        Uint32 *src32 = (Uint32*)src;
    1.14 +        Uint32 *dst32 = (Uint32*)dst;
    1.15 +
    1.16 +        /* Find the appropriate permutation */
    1.17 +        int missing = 0, r, g, b, a;
    1.18 +        int Pixel = 0x04030201; /* +1 */
    1.19 +        RGB_FROM_PIXEL(Pixel, srcfmt, r, g, b);
    1.20 +
    1.21 +        if (dstfmt->Amask) {
    1.22 +            PIXEL_FROM_RGBA(Pixel, dstfmt, r, g, b, 0);
    1.23 +        } else {
    1.24 +            PIXEL_FROM_RGB(Pixel, dstfmt, r, g, b);
    1.25 +        }
    1.26 +        r = Pixel & 0xFF;
    1.27 +        g = (Pixel >> 8) & 0xFF;
    1.28 +        b = (Pixel >> 16) & 0xFF;
    1.29 +        a = (Pixel >> 24) & 0xFF;
    1.30 +
    1.31 +        {
    1.32 +            int val;
    1.33 +            for (val = 0; val <= 3; val++) {
    1.34 +                if (r != val && g != val && b != val && a != val) {
    1.35 +                    missing = val;
    1.36 +                }
    1.37 +            }
    1.38 +        }
    1.39 +
    1.40 +        if (r == 0) {
    1.41 +            r = missing;
    1.42 +            missing = 0;
    1.43 +        } else if (g == 0) {
    1.44 +            g = missing;
    1.45 +            missing = 1;
    1.46 +        } else if (b == 0) {
    1.47 +            b = missing;
    1.48 +            missing = 2;
    1.49 +        } else if (a == 0) {
    1.50 +            a = missing;
    1.51 +            missing = 3;
    1.52 +        }
    1.53 +
    1.54 +        /* -1 */
    1.55 +        r -= 1; g -= 1; b -= 1; a -= 1;
    1.56 +
    1.57 +        while (height--) {
    1.58 +            /* *INDENT-OFF* */
    1.59 +            DUFFS_LOOP(
    1.60 +            {
    1.61 +                Uint8 *s8 = (Uint8 *)src32;
    1.62 +                Uint8 *d8 = (Uint8 *)dst32;
    1.63 +                d8[0] = s8[r];
    1.64 +                d8[1] = s8[g];
    1.65 +                d8[2] = s8[b];
    1.66 +                d8[3] = s8[a];
    1.67 +                d8[missing] = alpha;
    1.68 +                ++src32;
    1.69 +                ++dst32;
    1.70 +            }, width);
    1.71 +            /* *INDENT-ON* */
    1.72 +            src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
    1.73 +            dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
    1.74 +        }
    1.75 +        return;
    1.76 +    }
    1.77 +    
    1.78      while (height--) {
    1.79          /* *INDENT-OFF* */
    1.80          DUFFS_LOOP(