From 1128d57316ee62459ba2aa2e1d909ecbba14d652 Mon Sep 17 00:00:00 2001 From: Sylvain Becker Date: Wed, 30 Jan 2019 15:23:33 +0100 Subject: [PATCH] Fixed bug 4290 - add fastpaths for format conversion in BlitNtoN All following conversion are faster (no colorkey, no blending). (ratio isn't very accurate) ABGR8888 -> ARGB8888 : faster x6 (2655837 -> 416607) ABGR8888 -> BGR24 : faster x7 (2470117 -> 325693) ABGR8888 -> RGB24 : faster x7 (2478107 -> 335445) ABGR8888 -> RGB888 : faster x9 (3178524 -> 333859) ARGB8888 -> ABGR8888 : faster x6 (2648366 -> 406977) ARGB8888 -> BGR24 : faster x7 (2474978 -> 327819) ARGB8888 -> BGR888 : faster x9 (3189072 -> 326710) ARGB8888 -> RGB24 : faster x7 (2473689 -> 324729) BGR24 -> ABGR8888 : faster x6 (2268763 -> 359946) BGR24 -> ARGB8888 : faster x6 (2306393 -> 359213) BGR24 -> BGR888 : faster x6 (2231141 -> 324195) BGR24 -> RGB24 : faster x4 (1557835 -> 322033) BGR24 -> RGB888 : faster x6 (2229854 -> 323849) BGR888 -> ARGB8888 : faster x8 (3215202 -> 363137) BGR888 -> BGR24 : faster x7 (2474775 -> 347916) BGR888 -> RGB24 : faster x7 (2532783 -> 327354) BGR888 -> RGB888 : faster x9 (3134634 -> 344987) RGB24 -> ABGR8888 : faster x6 (2229486 -> 358919) RGB24 -> ARGB8888 : faster x6 (2271587 -> 358521) RGB24 -> BGR24 : faster x4 (1530913 -> 321149) RGB24 -> BGR888 : faster x6 (2227284 -> 327453) RGB24 -> RGB888 : faster x6 (2227125 -> 329061) RGB888 -> ABGR8888 : faster x8 (3163292 -> 362445) RGB888 -> BGR24 : faster x7 (2469489 -> 327127) RGB888 -> BGR888 : faster x9 (3190526 -> 326022) RGB888 -> RGB24 : faster x7 (2479084 -> 324982) --- src/video/SDL_blit_N.c | 177 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 176 insertions(+), 1 deletion(-) diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c index daa2779fb0448..1fe6a6bc54768 100644 --- a/src/video/SDL_blit_N.c +++ b/src/video/SDL_blit_N.c @@ -2515,6 +2515,146 @@ BlitNto2101010(SDL_BlitInfo * info) } } +/* Blit_3or4_to_3or4__same_rgb: 3 or 4 bpp, same RGB triplet */ +static void +Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo * info) +{ + int width = info->dst_w; + int height = info->dst_h; + Uint8 *src = info->src; + int srcskip = info->src_skip; + Uint8 *dst = info->dst; + int dstskip = info->dst_skip; + SDL_PixelFormat *srcfmt = info->src_fmt; + int srcbpp = srcfmt->BytesPerPixel; + SDL_PixelFormat *dstfmt = info->dst_fmt; + int dstbpp = dstfmt->BytesPerPixel; + + if (dstfmt->Amask) { + /* SET_ALPHA */ + unsigned alpha = info->a; + int alphashift = alpha << 24; + while (height--) { + /* *INDENT-OFF* */ + DUFFS_LOOP( + { + Uint32 *dst32 = (Uint32*)dst; + unsigned s0 = src[0]; + unsigned s1 = src[1]; + unsigned s2 = src[2]; + *dst32 = (s0) | (s1 << 8) | (s2 << 16) | alphashift; + dst += dstbpp; + src += srcbpp; + }, width); + /* *INDENT-ON* */ + src += srcskip; + dst += dstskip; + } + } else { + /* NO_ALPHA */ + while (height--) { + /* *INDENT-OFF* */ + DUFFS_LOOP( + { + Uint32 *dst32 = (Uint32*)dst; + unsigned s0 = src[0]; + unsigned s1 = src[1]; + unsigned s2 = src[2]; + *dst32 = (s0) | (s1 << 8) | (s2 << 16); + dst += dstbpp; + src += srcbpp; + }, width); + /* *INDENT-ON* */ + src += srcskip; + dst += dstskip; + } + } + return; +} + +/* Blit_3or4_to_3or4__inversed_rgb: 3 or 4 bpp, inversed RGB triplet */ +static void +Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo * info) +{ + int width = info->dst_w; + int height = info->dst_h; + Uint8 *src = info->src; + int srcskip = info->src_skip; + Uint8 *dst = info->dst; + int dstskip = info->dst_skip; + SDL_PixelFormat *srcfmt = info->src_fmt; + int srcbpp = srcfmt->BytesPerPixel; + SDL_PixelFormat *dstfmt = info->dst_fmt; + int dstbpp = dstfmt->BytesPerPixel; + + if (dstfmt->Amask) { + + if (srcfmt->Amask) { + /* COPY_ALPHA */ + /* Only to switch ABGR8888 <-> ARGB8888 */ + while (height--) { + /* *INDENT-OFF* */ + DUFFS_LOOP( + { + Uint32 *dst32 = (Uint32*)dst; + unsigned s0 = src[0]; + unsigned s1 = src[1]; + unsigned s2 = src[2]; + unsigned alphashift = src[3] << 24; + /* inversed, compared to Blit_3or4_to_3or4__same_rgb */ + *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift; + dst += dstbpp; + src += srcbpp; + }, width); + /* *INDENT-ON* */ + src += srcskip; + dst += dstskip; + } + } else { + /* SET_ALPHA */ + unsigned alpha = info->a; + int alphashift = alpha << 24; + while (height--) { + /* *INDENT-OFF* */ + DUFFS_LOOP( + { + Uint32 *dst32 = (Uint32*)dst; + unsigned s0 = src[0]; + unsigned s1 = src[1]; + unsigned s2 = src[2]; + /* inversed, compared to Blit_3or4_to_3or4__same_rgb */ + *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift; + dst += dstbpp; + src += srcbpp; + }, width); + /* *INDENT-ON* */ + src += srcskip; + dst += dstskip; + } + } + } else { + /* NO_ALPHA */ + while (height--) { + /* *INDENT-OFF* */ + DUFFS_LOOP( + { + Uint32 *dst32 = (Uint32*)dst; + unsigned s0 = src[0]; + unsigned s1 = src[1]; + unsigned s2 = src[2]; + /* inversed, compared to Blit_3or4_to_3or4__same_rgb */ + *dst32 = (s0 << 16) | (s1 << 8) | (s2); + dst += dstbpp; + src += srcbpp; + }, width); + /* *INDENT-ON* */ + src += srcskip; + dst += dstskip; + } + } + return; +} + /* Normal N to N optimized blitters */ #define NO_ALPHA 1 #define SET_ALPHA 2 @@ -2555,6 +2695,23 @@ static const struct blit_table normal_blit_2[] = { }; static const struct blit_table normal_blit_3[] = { +#if SDL_BYTEORDER == SDL_LIL_ENDIAN + /* 3->4 with same rgb triplet */ + {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x000000FF, 0x0000FF00, 0x00FF0000, + 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA}, + {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x00FF0000, 0x0000FF00, 0x000000FF, + 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA}, + /* 3->4 with inversed rgb triplet */ + {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF, + 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA}, + {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000, + 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA}, + /* 3->3 to switch RGB 24 <-> BGR 24 */ + {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF, + 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA }, + {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000, + 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA }, +#endif /* Default for 24-bit RGB source, never optimized */ {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0} }; @@ -2571,6 +2728,24 @@ static const struct blit_table normal_blit_4[] = { {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F, 2, Blit_RGB888_RGB565Altivec, NO_ALPHA}, #endif +#if SDL_BYTEORDER == SDL_LIL_ENDIAN + /* 4->3 with same rgb triplet */ + {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000, + 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA}, + {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00, 0x000000FF, + 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA}, + /* 4->3 with inversed rgb triplet */ + {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF, + 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA}, + {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000, + 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA}, +#endif + /* 4->4 with inversed rgb triplet, and COPY_ALPHA to switch ABGR8888 <-> ARGB8888 */ + {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF, + 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA | COPY_ALPHA}, + {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000, + 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA | COPY_ALPHA}, + /* RBG 888 and RGB 565 */ {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F, 0, Blit_RGB888_RGB565, NO_ALPHA}, {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F, @@ -2623,7 +2798,7 @@ SDL_CalculateBlitN(SDL_Surface * surface) } } else { /* Now the meat, choose the blitter we want */ - int a_need = NO_ALPHA; + Uint32 a_need = NO_ALPHA; if (dstfmt->Amask) a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA; table = normal_blit[srcfmt->BytesPerPixel - 1];