From f6a2ae60078f6bf717788442144e5ad57676e0b6 Mon Sep 17 00:00:00 2001 From: Sylvain Becker Date: Sat, 9 Feb 2019 17:20:53 +0100 Subject: [PATCH] Faster blit colorkey or not, applied to bpp: 3->4 and 4->3 ===== BlitNtoNKey ======== ABGR8888 -> BGR24 : faster x3 (2168709 -> 562738) ABGR8888 -> RGB24 : faster x3 (2165055 -> 567458) ARGB8888 -> BGR24 : faster x3 (2169109 -> 564338) ARGB8888 -> RGB24 : faster x3 (2165266 -> 567081) BGR24 -> ABGR8888 : faster x3 (2997675 -> 891636) BGR24 -> ARGB8888 : faster x3 (2985449 -> 892028) BGR24 -> BGR888 : faster x3 (2961611 -> 891913) BGR24 -> BGRA8888 : faster x3 (3116305 -> 891534) BGR24 -> BGRX8888 : faster x3 (3179654 -> 896978) BGR24 -> RGB888 : faster x3 (2968191 -> 895112) BGR24 -> RGBA8888 : faster x3 (2998428 -> 893147) BGR24 -> RGBX8888 : faster x3 (2976529 -> 914853) BGR888 -> BGR24 : faster x3 (2161906 -> 563921) BGR888 -> RGB24 : faster x3 (2168228 -> 566634) BGRA8888 -> BGR24 : faster x4 (2270501 -> 561873) BGRA8888 -> RGB24 : faster x3 (2163179 -> 567330) BGRX8888 -> BGR24 : faster x3 (2162911 -> 562322) BGRX8888 -> RGB24 : faster x3 (2169617 -> 570927) RGB24 -> ABGR8888 : faster x3 (2977061 -> 925975) RGB24 -> ARGB8888 : faster x3 (2978148 -> 923680) RGB24 -> BGR888 : faster x3 (3001413 -> 935074) RGB24 -> BGRA8888 : faster x3 (2959003 -> 924096) RGB24 -> BGRX8888 : faster x3 (2965240 -> 927100) RGB24 -> RGB888 : faster x3 (2983921 -> 926063) RGB24 -> RGBA8888 : faster x3 (2963908 -> 925457) RGB24 -> RGBX8888 : faster x3 (2967957 -> 931700) RGB888 -> BGR24 : faster x3 (2173299 -> 563226) RGB888 -> RGB24 : faster x3 (2218374 -> 566164) RGBA8888 -> BGR24 : faster x3 (2166355 -> 561381) RGBA8888 -> RGB24 : faster x3 (2170322 -> 566729) RGBX8888 -> BGR24 : faster x3 (2168524 -> 564072) RGBX8888 -> RGB24 : faster x3 (2163680 -> 566956) ===== BlitNtoN ======== BGR24 -> BGRA8888 : faster x3 (2458958 -> 797557) BGR24 -> BGRX8888 : faster x3 (2486085 -> 797745) BGR24 -> RGBA8888 : faster x3 (2422116 -> 797637) BGR24 -> RGBX8888 : faster x3 (2454426 -> 799085) BGRA8888 -> BGR24 : faster x4 (2468206 -> 524486) BGRA8888 -> RGB24 : faster x4 (2463581 -> 525561) BGRX8888 -> BGR24 : faster x4 (2583355 -> 524468) BGRX8888 -> RGB24 : faster x4 (2477242 -> 524284) RGB24 -> BGRA8888 : faster x2 (2453414 -> 818415) RGB24 -> BGRX8888 : faster x3 (2414915 -> 800863) RGB24 -> RGBA8888 : faster x3 (2461114 -> 798148) RGB24 -> RGBX8888 : faster x3 (2400922 -> 799203) RGBA8888 -> BGR24 : faster x4 (2494472 -> 526428) RGBA8888 -> RGB24 : faster x4 (2462260 -> 526791) RGBX8888 -> BGR24 : faster x4 (2541115 -> 524390) RGBX8888 -> RGB24 : faster x4 (2469059 -> 525416) --- src/video/SDL_blit_N.c | 137 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 133 insertions(+), 4 deletions(-) diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c index b566f7e823754..5908285743100 100644 --- a/src/video/SDL_blit_N.c +++ b/src/video/SDL_blit_N.c @@ -2219,7 +2219,7 @@ BlitNtoN(SDL_BlitInfo * info) int dstbpp = dstfmt->BytesPerPixel; unsigned alpha = dstfmt->Amask ? info->a : 0; - /* Any src/dst 8888, not CopyAlpha, no ARGB2101010 */ + /* Blit with permutation: 4->4 */ if (srcbpp == 4 && dstbpp == 4 && srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 && dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) { @@ -2252,6 +2252,64 @@ BlitNtoN(SDL_BlitInfo * info) return; } + /* Blit with permutation: 4->3 */ + if (srcbpp == 4 && dstbpp == 3 && + srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) { + + Uint32 *src32 = (Uint32*)src; + + /* Find the appropriate permutation */ + int r, g, b, a; + get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, NULL); + + while (height--) { + /* *INDENT-OFF* */ + DUFFS_LOOP( + { + Uint8 *s8 = (Uint8 *)src32; + dst[0] = s8[r]; + dst[1] = s8[g]; + dst[2] = s8[b]; + ++src32; + dst += 3; + }, width); + /* *INDENT-ON* */ + src32 = (Uint32 *)((Uint8 *)src32 + srcskip); + dst += dstskip; + } + return; + } + + /* Blit with permutation: 3->4 */ + if (srcbpp == 3 && dstbpp == 4 && + dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) { + + Uint32 *dst32 = (Uint32*)dst; + + /* Find the appropriate permutation */ + int missing = 0, r, g, b, a; + get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, &missing); + + while (height--) { + /* *INDENT-OFF* */ + DUFFS_LOOP( + { + Uint8 *d8 = (Uint8 *)dst32; + d8[0] = src[r]; + d8[1] = src[g]; + d8[2] = src[b]; + d8[3] = src[a]; + d8[missing] = alpha; + src += 3; + ++dst32; + }, width); + /* *INDENT-ON* */ + src += srcskip; + dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip); + } + return; + } + while (height--) { /* *INDENT-OFF* */ DUFFS_LOOP( @@ -2287,7 +2345,7 @@ BlitNtoNCopyAlpha(SDL_BlitInfo * info) int dstbpp = dstfmt->BytesPerPixel; int c; - /* Any src/dst 8888 for CopyAlpha, no ARGB2101010 */ + /* Blit with permutation: 4->4 */ if (srcbpp == 4 && dstbpp == 4 && srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 && dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) { @@ -2499,7 +2557,7 @@ BlitNtoNKey(SDL_BlitInfo * info) } } - /* Any src/dst 8888, not CopyAlpha, no ARGB2101010 */ + /* Blit with permutation: 4->4 */ if (srcbpp == 4 && dstbpp == 4 && srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 && dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) { @@ -2598,6 +2656,77 @@ BlitNtoNKey(SDL_BlitInfo * info) return; } + /* Blit with permutation: 4->3 */ + if (srcbpp == 4 && dstbpp == 3 && + srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) { + + Uint32 *src32 = (Uint32*)src; + + /* Find the appropriate permutation */ + int r, g, b, a; + get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, NULL); + + while (height--) { + /* *INDENT-OFF* */ + DUFFS_LOOP( + { + if ((*src32 & rgbmask) != ckey) { + Uint8 *s8 = (Uint8 *)src32; + dst[0] = s8[r]; + dst[1] = s8[g]; + dst[2] = s8[b]; + } + ++src32; + dst += 3; + }, width); + /* *INDENT-ON* */ + src32 = (Uint32 *)((Uint8 *)src32 + srcskip); + dst += dstskip; + } + return; + } + + /* Blit with permutation: 3->4 */ + if (srcbpp == 3 && dstbpp == 4 && + dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) { + + Uint32 *dst32 = (Uint32*)dst; + + Uint8 k0 = ckey & 0xFF; + Uint8 k1 = (ckey >> 8) & 0xFF; + Uint8 k2 = (ckey >> 16) & 0xFF; + + /* Find the appropriate permutation */ + int missing = 0, r, g, b, a; + get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, &missing); + + while (height--) { + /* *INDENT-OFF* */ + DUFFS_LOOP( + { + Uint8 s0 = src[0]; + Uint8 s1 = src[1]; + Uint8 s2 = src[2]; + + if (k0 != s0 || k1 != s1 || k2 != s2) { + Uint8 *d8 = (Uint8 *)dst32; + d8[0] = src[r]; + d8[1] = src[g]; + d8[2] = src[b]; + d8[3] = src[a]; + d8[missing] = alpha; + } + src += 3; + ++dst32; + }, width); + /* *INDENT-ON* */ + src += srcskip; + dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip); + + } + return; + } + while (height--) { /* *INDENT-OFF* */ DUFFS_LOOP( @@ -2674,7 +2803,7 @@ BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info) return; } - /* Any src/dst 8888 for CopyAlpha, no ARGB2101010 */ + /* Blit with permutation: 4->4 */ if (srcbpp == 4 && dstbpp == 4 && srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 && dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {