Faster blit colorkey or not, applied to bpp: 3->4 and 4->3
authorSylvain Becker <sylvain.becker@gmail.com>
Sat, 09 Feb 2019 17:20:53 +0100
changeset 12608b3a02787a4dd
parent 12607 4fb583571d7c
child 12609 48e26b5d4f56
Faster blit colorkey or not, applied to bpp: 3->4 and 4->3

===== BlitNtoNKey ========
ABGR8888 -> BGR24 : faster x3 (2168709 -> 562738)
ABGR8888 -> RGB24 : faster x3 (2165055 -> 567458)

ARGB8888 -> BGR24 : faster x3 (2169109 -> 564338)
ARGB8888 -> RGB24 : faster x3 (2165266 -> 567081)

BGR24 -> ABGR8888 : faster x3 (2997675 -> 891636)
BGR24 -> ARGB8888 : faster x3 (2985449 -> 892028)
BGR24 -> BGR888 : faster x3 (2961611 -> 891913)
BGR24 -> BGRA8888 : faster x3 (3116305 -> 891534)
BGR24 -> BGRX8888 : faster x3 (3179654 -> 896978)
BGR24 -> RGB888 : faster x3 (2968191 -> 895112)
BGR24 -> RGBA8888 : faster x3 (2998428 -> 893147)
BGR24 -> RGBX8888 : faster x3 (2976529 -> 914853)

BGR888 -> BGR24 : faster x3 (2161906 -> 563921)
BGR888 -> RGB24 : faster x3 (2168228 -> 566634)

BGRA8888 -> BGR24 : faster x4 (2270501 -> 561873)
BGRA8888 -> RGB24 : faster x3 (2163179 -> 567330)

BGRX8888 -> BGR24 : faster x3 (2162911 -> 562322)
BGRX8888 -> RGB24 : faster x3 (2169617 -> 570927)

RGB24 -> ABGR8888 : faster x3 (2977061 -> 925975)
RGB24 -> ARGB8888 : faster x3 (2978148 -> 923680)
RGB24 -> BGR888 : faster x3 (3001413 -> 935074)
RGB24 -> BGRA8888 : faster x3 (2959003 -> 924096)
RGB24 -> BGRX8888 : faster x3 (2965240 -> 927100)
RGB24 -> RGB888 : faster x3 (2983921 -> 926063)
RGB24 -> RGBA8888 : faster x3 (2963908 -> 925457)
RGB24 -> RGBX8888 : faster x3 (2967957 -> 931700)

RGB888 -> BGR24 : faster x3 (2173299 -> 563226)
RGB888 -> RGB24 : faster x3 (2218374 -> 566164)

RGBA8888 -> BGR24 : faster x3 (2166355 -> 561381)
RGBA8888 -> RGB24 : faster x3 (2170322 -> 566729)

RGBX8888 -> BGR24 : faster x3 (2168524 -> 564072)
RGBX8888 -> RGB24 : faster x3 (2163680 -> 566956)

===== BlitNtoN ========

BGR24 -> BGRA8888 : faster x3 (2458958 -> 797557)
BGR24 -> BGRX8888 : faster x3 (2486085 -> 797745)
BGR24 -> RGBA8888 : faster x3 (2422116 -> 797637)
BGR24 -> RGBX8888 : faster x3 (2454426 -> 799085)

BGRA8888 -> BGR24 : faster x4 (2468206 -> 524486)
BGRA8888 -> RGB24 : faster x4 (2463581 -> 525561)

BGRX8888 -> BGR24 : faster x4 (2583355 -> 524468)
BGRX8888 -> RGB24 : faster x4 (2477242 -> 524284)

RGB24 -> BGRA8888 : faster x2 (2453414 -> 818415)
RGB24 -> BGRX8888 : faster x3 (2414915 -> 800863)
RGB24 -> RGBA8888 : faster x3 (2461114 -> 798148)
RGB24 -> RGBX8888 : faster x3 (2400922 -> 799203)

RGBA8888 -> BGR24 : faster x4 (2494472 -> 526428)
RGBA8888 -> RGB24 : faster x4 (2462260 -> 526791)

RGBX8888 -> BGR24 : faster x4 (2541115 -> 524390)
RGBX8888 -> RGB24 : faster x4 (2469059 -> 525416)
src/video/SDL_blit_N.c
     1.1 --- a/src/video/SDL_blit_N.c	Fri Feb 08 17:15:30 2019 +0100
     1.2 +++ b/src/video/SDL_blit_N.c	Sat Feb 09 17:20:53 2019 +0100
     1.3 @@ -2219,7 +2219,7 @@
     1.4      int dstbpp = dstfmt->BytesPerPixel;
     1.5      unsigned alpha = dstfmt->Amask ? info->a : 0;
     1.6  
     1.7 -    /* Any src/dst 8888, not CopyAlpha, no ARGB2101010 */
     1.8 +    /* Blit with permutation: 4->4 */
     1.9      if (srcbpp == 4 && dstbpp == 4 &&
    1.10          srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
    1.11          dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
    1.12 @@ -2252,6 +2252,64 @@
    1.13          return;
    1.14      }
    1.15  
    1.16 +    /* Blit with permutation: 4->3 */
    1.17 +    if (srcbpp == 4 && dstbpp == 3 &&
    1.18 +        srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
    1.19 +
    1.20 +        Uint32 *src32 = (Uint32*)src;
    1.21 +
    1.22 +        /* Find the appropriate permutation */
    1.23 +        int r, g, b, a;
    1.24 +        get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, NULL);
    1.25 +
    1.26 +        while (height--) {
    1.27 +            /* *INDENT-OFF* */
    1.28 +            DUFFS_LOOP(
    1.29 +            {
    1.30 +                Uint8 *s8 = (Uint8 *)src32;
    1.31 +                dst[0] = s8[r];
    1.32 +                dst[1] = s8[g];
    1.33 +                dst[2] = s8[b];
    1.34 +                ++src32;
    1.35 +                dst += 3;
    1.36 +            }, width);
    1.37 +            /* *INDENT-ON* */
    1.38 +            src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
    1.39 +            dst += dstskip;
    1.40 +        }
    1.41 +        return;
    1.42 +    }
    1.43 +
    1.44 +    /* Blit with permutation: 3->4 */
    1.45 +    if (srcbpp == 3 && dstbpp == 4 &&
    1.46 +        dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
    1.47 +
    1.48 +        Uint32 *dst32 = (Uint32*)dst;
    1.49 +
    1.50 +        /* Find the appropriate permutation */
    1.51 +        int missing = 0, r, g, b, a;
    1.52 +        get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, &missing);
    1.53 +
    1.54 +        while (height--) {
    1.55 +            /* *INDENT-OFF* */
    1.56 +            DUFFS_LOOP(
    1.57 +            {
    1.58 +                Uint8 *d8 = (Uint8 *)dst32;
    1.59 +                d8[0] = src[r];
    1.60 +                d8[1] = src[g];
    1.61 +                d8[2] = src[b];
    1.62 +                d8[3] = src[a];
    1.63 +                d8[missing] = alpha;
    1.64 +                src += 3;
    1.65 +                ++dst32;
    1.66 +            }, width);
    1.67 +            /* *INDENT-ON* */
    1.68 +            src += srcskip;
    1.69 +            dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
    1.70 +        }
    1.71 +        return;
    1.72 +    }
    1.73 +
    1.74      while (height--) {
    1.75          /* *INDENT-OFF* */
    1.76          DUFFS_LOOP(
    1.77 @@ -2287,7 +2345,7 @@
    1.78      int dstbpp = dstfmt->BytesPerPixel;
    1.79      int c;
    1.80  
    1.81 -    /* Any src/dst 8888 for CopyAlpha, no ARGB2101010 */
    1.82 +    /* Blit with permutation: 4->4 */
    1.83      if (srcbpp == 4 && dstbpp == 4 &&
    1.84          srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
    1.85          dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
    1.86 @@ -2499,7 +2557,7 @@
    1.87          }
    1.88      }
    1.89  
    1.90 -    /* Any src/dst 8888, not CopyAlpha, no ARGB2101010 */
    1.91 +    /* Blit with permutation: 4->4 */
    1.92      if (srcbpp == 4 && dstbpp == 4 &&
    1.93          srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
    1.94          dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
    1.95 @@ -2598,6 +2656,77 @@
    1.96          return;
    1.97      }
    1.98  
    1.99 +    /* Blit with permutation: 4->3 */
   1.100 +    if (srcbpp == 4 && dstbpp == 3 &&
   1.101 +        srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   1.102 +
   1.103 +        Uint32 *src32 = (Uint32*)src;
   1.104 +
   1.105 +        /* Find the appropriate permutation */
   1.106 +        int r, g, b, a;
   1.107 +        get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, NULL);
   1.108 +
   1.109 +        while (height--) {
   1.110 +            /* *INDENT-OFF* */
   1.111 +            DUFFS_LOOP(
   1.112 +            {
   1.113 +                if ((*src32 & rgbmask) != ckey) {
   1.114 +                    Uint8 *s8 = (Uint8 *)src32;
   1.115 +                    dst[0] = s8[r];
   1.116 +                    dst[1] = s8[g];
   1.117 +                    dst[2] = s8[b];
   1.118 +                }
   1.119 +                ++src32;
   1.120 +                dst += 3;
   1.121 +            }, width);
   1.122 +            /* *INDENT-ON* */
   1.123 +            src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
   1.124 +            dst += dstskip;
   1.125 +        }
   1.126 +        return;
   1.127 +    }
   1.128 +
   1.129 +    /* Blit with permutation: 3->4 */
   1.130 +    if (srcbpp == 3 && dstbpp == 4 &&
   1.131 +        dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   1.132 +
   1.133 +        Uint32 *dst32 = (Uint32*)dst;
   1.134 +
   1.135 +        Uint8 k0 = ckey & 0xFF;
   1.136 +        Uint8 k1 = (ckey >> 8)  & 0xFF;
   1.137 +        Uint8 k2 = (ckey >> 16) & 0xFF;
   1.138 +
   1.139 +        /* Find the appropriate permutation */
   1.140 +        int missing = 0, r, g, b, a;
   1.141 +        get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, &missing);
   1.142 +
   1.143 +        while (height--) {
   1.144 +            /* *INDENT-OFF* */
   1.145 +            DUFFS_LOOP(
   1.146 +            {
   1.147 +                Uint8 s0 = src[0];
   1.148 +                Uint8 s1 = src[1];
   1.149 +                Uint8 s2 = src[2];
   1.150 +
   1.151 +                if (k0 != s0 || k1 != s1 || k2 != s2) {
   1.152 +                    Uint8 *d8 = (Uint8 *)dst32;
   1.153 +                    d8[0] = src[r];
   1.154 +                    d8[1] = src[g];
   1.155 +                    d8[2] = src[b];
   1.156 +                    d8[3] = src[a];
   1.157 +                    d8[missing] = alpha;
   1.158 +                }
   1.159 +                src += 3;
   1.160 +                ++dst32;
   1.161 +            }, width);
   1.162 +            /* *INDENT-ON* */
   1.163 +            src += srcskip;
   1.164 +            dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
   1.165 +
   1.166 +        }
   1.167 +        return;
   1.168 +    }
   1.169 +
   1.170      while (height--) {
   1.171          /* *INDENT-OFF* */
   1.172          DUFFS_LOOP(
   1.173 @@ -2674,7 +2803,7 @@
   1.174          return;
   1.175      }
   1.176  
   1.177 -    /* Any src/dst 8888 for CopyAlpha, no ARGB2101010 */
   1.178 +    /* Blit with permutation: 4->4 */
   1.179      if (srcbpp == 4 && dstbpp == 4 &&
   1.180          srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
   1.181          dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {