From 7372295ec916abe098754544850a9bd31a90b426 Mon Sep 17 00:00:00 2001 From: Sylvain Becker Date: Thu, 7 Feb 2019 17:52:28 +0100 Subject: [PATCH] Faster blit when using No Alpha or Set Alpha, + ColorKey Applied to following formats: ABGR8888 -> BGRX8888 : faster x4 (2794295 -> 610587) ABGR8888 -> RGB888 : faster x4 (2835693 -> 615561) ABGR8888 -> RGBX8888 : faster x4 (2880475 -> 610479) ARGB8888 -> BGR888 : faster x4 (2802718 -> 610702) ARGB8888 -> BGRX8888 : faster x4 (2792481 -> 606311) ARGB8888 -> RGBX8888 : faster x4 (2821621 -> 624745) BGR888 -> ARGB8888 : faster x4 (2791705 -> 637889) BGR888 -> BGRA8888 : faster x4 (2793195 -> 652299) BGR888 -> BGRX8888 : faster x4 (2800713 -> 609326) BGR888 -> RGB888 : faster x4 (2812260 -> 610471) BGR888 -> RGBA8888 : faster x4 (2792327 -> 629288) BGR888 -> RGBX8888 : faster x4 (2799224 -> 607073) BGRA8888 -> BGR888 : faster x4 (2800520 -> 606897) BGRA8888 -> RGB888 : faster x4 (2825274 -> 616156) BGRA8888 -> RGBX8888 : faster x4 (2812530 -> 610340) BGRX8888 -> ABGR8888 : faster x4 (2793940 -> 628596) BGRX8888 -> ARGB8888 : faster x4 (2822686 -> 638899) BGRX8888 -> BGR888 : faster x4 (2818141 -> 613659) BGRX8888 -> RGB888 : faster x4 (2929017 -> 611794) BGRX8888 -> RGBA8888 : faster x4 (2799709 -> 629750) BGRX8888 -> RGBX8888 : faster x4 (2911010 -> 605640) RGB888 -> ABGR8888 : faster x4 (2800671 -> 631542) RGB888 -> BGR888 : faster x4 (2802644 -> 604461) RGB888 -> BGRA8888 : faster x4 (2801919 -> 628729) RGB888 -> BGRX8888 : faster x4 (2938244 -> 604135) RGB888 -> RGBA8888 : faster x4 (2912447 -> 642185) RGB888 -> RGBX8888 : faster x4 (2831676 -> 634293) RGBA8888 -> BGR888 : faster x4 (2928896 -> 614960) RGBA8888 -> BGRX8888 : faster x4 (2821422 -> 608146) RGBA8888 -> RGB888 : faster x4 (2825927 -> 617184) RGBX8888 -> ABGR8888 : faster x4 (2803852 -> 654129) RGBX8888 -> ARGB8888 : faster x4 (2923615 -> 642644) RGBX8888 -> BGR888 : faster x4 (2806523 -> 610447) RGBX8888 -> BGRA8888 : faster x4 (2813388 -> 630305) RGBX8888 -> BGRX8888 : faster x4 (2800052 -> 607881) RGBX8888 -> RGB888 : faster x4 (2807722 -> 610263) --- src/video/SDL_blit_N.c | 117 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c index 2067dfbe0cef1..0886bedb587e0 100644 --- a/src/video/SDL_blit_N.c +++ b/src/video/SDL_blit_N.c @@ -2379,6 +2379,123 @@ BlitNtoNKey(SDL_BlitInfo * info) } } + /* Any src/dst 8888, not CopyAlpha, no ARGB2101010 */ + if (srcbpp == 4 && dstbpp == 4 && + srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 && + dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) { + + Uint32 *src32 = (Uint32*)src; + Uint32 *dst32 = (Uint32*)dst; + + if (dstfmt->Amask) { + /* Find the appropriate permutation */ + int missing = 0, r, g, b, a; + int Pixel = 0x03020100; + RGB_FROM_PIXEL(Pixel, srcfmt, r, g, b); + PIXEL_FROM_RGBA(Pixel, dstfmt, r, g, b, 255); + r = Pixel & 0xFF; + g = (Pixel >> 8) & 0xFF; + b = (Pixel >> 16) & 0xFF; + a = (Pixel >> 24) & 0xFF; + + { + int val; + for (val = 0; val <= 3; val++) { + if (r != val && g != val && b != val && a != val) { + missing = val; + } + } + } + + if (r == 255) { + r = missing; + missing = 0; + } else if (g == 255) { + g = missing; + missing = 1; + } else if (b == 255) { + b = missing; + missing = 2; + } else if (a == 255) { + a = missing; + missing = 3; + } + + while (height--) { + /* *INDENT-OFF* */ + DUFFS_LOOP( + { + if ((*src32 & rgbmask) != ckey) { + Uint8 *s8 = (Uint8 *)src32; + Uint8 *d8 = (Uint8 *)dst32; + d8[0] = s8[r]; + d8[1] = s8[g]; + d8[2] = s8[b]; + d8[3] = s8[a]; + d8[missing] = alpha; + } + ++src32; + ++dst32; + }, width); + /* *INDENT-ON* */ + src32 = (Uint32 *)((Uint8 *)src32 + srcskip); + dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip); + } + + return; + } else { + /* Find the appropriate permutation */ + int missing = 0, r, g, b, a; + int Pixel = 0x04030201; /* +1 */ + RGBA_FROM_PIXEL(Pixel, srcfmt, r, g, b, a); + missing = a; + PIXEL_FROM_RGB(Pixel, dstfmt, r, g, b); + r = Pixel & 0xFF; + g = (Pixel >> 8) & 0xFF; + b = (Pixel >> 16) & 0xFF; + a = (Pixel >> 24) & 0xFF; + + if (r == 0) { + r = missing; + missing = 0; + } else if (g == 0) { + g = missing; + missing = 1; + } else if (b == 0) { + b = missing; + missing = 2; + } else if (a == 0) { + a = missing; + missing = 3; + } + + /* -1 */ + r -= 1; g -= 1; b -= 1; a -= 1; + + while (height--) { + /* *INDENT-OFF* */ + DUFFS_LOOP( + { + if ((*src32 & rgbmask) != ckey) { + Uint8 *s8 = (Uint8 *)src32; + Uint8 *d8 = (Uint8 *)dst32; + d8[0] = s8[r]; + d8[1] = s8[g]; + d8[2] = s8[b]; + d8[3] = s8[a]; + d8[missing] = 0; + } + ++src32; + ++dst32; + }, width); + /* *INDENT-ON* */ + src32 = (Uint32 *)((Uint8 *)src32 + srcskip); + dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip); + } + return; + } + } + /* BPP 3, same rgb triplet */ if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_RGB24) || (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_BGR24)) {