Faster blit when using CopyAlpha + ColorKey
authorSylvain Becker <sylvain.becker@gmail.com>
Thu, 07 Feb 2019 15:12:17 +0100
changeset 12599e9be1aa42927
parent 12598 d3acc7299a95
child 12600 be2c1d4ee0d1
Faster blit when using CopyAlpha + ColorKey
Applied to following formats:

ABGR8888 -> ARGB8888 : faster x7 (3959672 -> 537227)
ABGR8888 -> BGRA8888 : faster x7 (4008716 -> 532064)
ABGR8888 -> RGBA8888 : faster x7 (3998576 -> 530964)

ARGB8888 -> ABGR8888 : faster x7 (3942420 -> 532503)
ARGB8888 -> BGRA8888 : faster x7 (3995382 -> 527722)
ARGB8888 -> RGBA8888 : faster x7 (4259330 -> 543033)

BGRA8888 -> ABGR8888 : faster x7 (4110411 -> 529402)
BGRA8888 -> ARGB8888 : faster x7 (4071906 -> 538393)
BGRA8888 -> RGBA8888 : faster x6 (4038320 -> 585141)

RGBA8888 -> ABGR8888 : faster x7 (3937018 -> 534127)
RGBA8888 -> ARGB8888 : faster x7 (3979577 -> 537810)
RGBA8888 -> BGRA8888 : faster x7 (3975656 -> 528355)
src/video/SDL_blit_N.c
     1.1 --- a/src/video/SDL_blit_N.c	Tue Feb 05 15:14:15 2019 +0100
     1.2 +++ b/src/video/SDL_blit_N.c	Thu Feb 07 15:12:17 2019 +0100
     1.3 @@ -2339,7 +2339,7 @@
     1.4      if (srcbpp == 4 && dstbpp == 4 && srcfmt->Rmask == dstfmt->Rmask && srcfmt->Gmask == dstfmt->Gmask && srcfmt->Bmask == dstfmt->Bmask) {
     1.5          Uint32 *src32 = (Uint32*)src;
     1.6          Uint32 *dst32 = (Uint32*)dst;
     1.7 - 
     1.8 +
     1.9          if (dstfmt->Amask) {
    1.10              /* RGB->RGBA, SET_ALPHA */
    1.11              Uint32 mask = info->a << dstfmt->Ashift;
    1.12 @@ -2441,8 +2441,8 @@
    1.13              dst += dstskip;
    1.14          }
    1.15          return;
    1.16 -    } 
    1.17 -   
    1.18 +    }
    1.19 +
    1.20      while (height--) {
    1.21          /* *INDENT-OFF* */
    1.22          DUFFS_LOOP(
    1.23 @@ -2519,6 +2519,46 @@
    1.24          return;
    1.25      }
    1.26  
    1.27 +    /* Any src/dst 8888 for CopyAlpha, no ARGB2101010 */
    1.28 +    if (srcbpp == 4 && dstbpp == 4 &&
    1.29 +        srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
    1.30 +        dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
    1.31 +
    1.32 +        Uint32 *src32 = (Uint32*)src;
    1.33 +        Uint32 *dst32 = (Uint32*)dst;
    1.34 +
    1.35 +        /* Find the appropriate permutation */
    1.36 +        int r, g, b, a;
    1.37 +        Pixel = 0x03020100;
    1.38 +        RGBA_FROM_PIXEL(Pixel, srcfmt, r, g, b, a);
    1.39 +        PIXEL_FROM_RGBA(Pixel, dstfmt, r, g, b, a);
    1.40 +        r = Pixel & 0xFF;
    1.41 +        g = (Pixel >> 8) & 0xFF;
    1.42 +        b = (Pixel >> 16) & 0xFF;
    1.43 +        a = (Pixel >> 24) & 0xFF;
    1.44 +
    1.45 +        while (height--) {
    1.46 +            /* *INDENT-OFF* */
    1.47 +            DUFFS_LOOP(
    1.48 +            {
    1.49 +                if ((*src32 & rgbmask) != ckey) {
    1.50 +                    Uint8 *s8 = src32;
    1.51 +                    Uint8 *d8 = dst32;
    1.52 +                    d8[0] = s8[r];
    1.53 +                    d8[1] = s8[g];
    1.54 +                    d8[2] = s8[b];
    1.55 +                    d8[3] = s8[a];
    1.56 +                }
    1.57 +                ++src32;
    1.58 +                ++dst32;
    1.59 +            }, width);
    1.60 +            /* *INDENT-ON* */
    1.61 +            src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
    1.62 +            dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
    1.63 +        }
    1.64 +        return;
    1.65 +    }
    1.66 +
    1.67      while (height--) {
    1.68          /* *INDENT-OFF* */
    1.69          DUFFS_LOOP(