Skip to content

Commit

Permalink
Fixed bug 4290 - add fastpaths for format conversion in BlitNtoN
Browse files Browse the repository at this point in the history
All following conversion are faster (no colorkey, no blending).
(ratio isn't very accurate)

ABGR8888 -> ARGB8888 :  faster x6   (2655837 -> 416607)
ABGR8888 -> BGR24 :  faster x7   (2470117 -> 325693)
ABGR8888 -> RGB24 :  faster x7   (2478107 -> 335445)
ABGR8888 -> RGB888 :  faster x9   (3178524 -> 333859)

ARGB8888 -> ABGR8888 :  faster x6   (2648366 -> 406977)
ARGB8888 -> BGR24 :  faster x7   (2474978 -> 327819)
ARGB8888 -> BGR888 :  faster x9   (3189072 -> 326710)
ARGB8888 -> RGB24 :  faster x7   (2473689 -> 324729)

BGR24 -> ABGR8888 :  faster x6   (2268763 -> 359946)
BGR24 -> ARGB8888 :  faster x6   (2306393 -> 359213)
BGR24 -> BGR888 :  faster x6   (2231141 -> 324195)
BGR24 -> RGB24 :  faster x4   (1557835 -> 322033)
BGR24 -> RGB888 :  faster x6   (2229854 -> 323849)

BGR888 -> ARGB8888 :  faster x8   (3215202 -> 363137)
BGR888 -> BGR24 :  faster x7   (2474775 -> 347916)
BGR888 -> RGB24 :  faster x7   (2532783 -> 327354)
BGR888 -> RGB888 :  faster x9   (3134634 -> 344987)

RGB24 -> ABGR8888 :  faster x6   (2229486 -> 358919)
RGB24 -> ARGB8888 :  faster x6   (2271587 -> 358521)
RGB24 -> BGR24 :  faster x4   (1530913 -> 321149)
RGB24 -> BGR888 :  faster x6   (2227284 -> 327453)
RGB24 -> RGB888 :  faster x6   (2227125 -> 329061)

RGB888 -> ABGR8888 :  faster x8   (3163292 -> 362445)
RGB888 -> BGR24 :  faster x7   (2469489 -> 327127)
RGB888 -> BGR888 :  faster x9   (3190526 -> 326022)
RGB888 -> RGB24 :  faster x7   (2479084 -> 324982)
  • Loading branch information
1bsyl committed Jan 30, 2019
1 parent 3b4e369 commit 1128d57
Showing 1 changed file with 176 additions and 1 deletion.
177 changes: 176 additions & 1 deletion src/video/SDL_blit_N.c
Expand Up @@ -2515,6 +2515,146 @@ BlitNto2101010(SDL_BlitInfo * info)
}
}

/* Blit_3or4_to_3or4__same_rgb: 3 or 4 bpp, same RGB triplet */
static void
Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo * info)
{
int width = info->dst_w;
int height = info->dst_h;
Uint8 *src = info->src;
int srcskip = info->src_skip;
Uint8 *dst = info->dst;
int dstskip = info->dst_skip;
SDL_PixelFormat *srcfmt = info->src_fmt;
int srcbpp = srcfmt->BytesPerPixel;
SDL_PixelFormat *dstfmt = info->dst_fmt;
int dstbpp = dstfmt->BytesPerPixel;

if (dstfmt->Amask) {
/* SET_ALPHA */
unsigned alpha = info->a;
int alphashift = alpha << 24;
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
unsigned s0 = src[0];
unsigned s1 = src[1];
unsigned s2 = src[2];
*dst32 = (s0) | (s1 << 8) | (s2 << 16) | alphashift;
dst += dstbpp;
src += srcbpp;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
}
} else {
/* NO_ALPHA */
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
unsigned s0 = src[0];
unsigned s1 = src[1];
unsigned s2 = src[2];
*dst32 = (s0) | (s1 << 8) | (s2 << 16);
dst += dstbpp;
src += srcbpp;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
}
}
return;
}

/* Blit_3or4_to_3or4__inversed_rgb: 3 or 4 bpp, inversed RGB triplet */
static void
Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo * info)
{
int width = info->dst_w;
int height = info->dst_h;
Uint8 *src = info->src;
int srcskip = info->src_skip;
Uint8 *dst = info->dst;
int dstskip = info->dst_skip;
SDL_PixelFormat *srcfmt = info->src_fmt;
int srcbpp = srcfmt->BytesPerPixel;
SDL_PixelFormat *dstfmt = info->dst_fmt;
int dstbpp = dstfmt->BytesPerPixel;

if (dstfmt->Amask) {

if (srcfmt->Amask) {
/* COPY_ALPHA */
/* Only to switch ABGR8888 <-> ARGB8888 */
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
unsigned s0 = src[0];
unsigned s1 = src[1];
unsigned s2 = src[2];
unsigned alphashift = src[3] << 24;
/* inversed, compared to Blit_3or4_to_3or4__same_rgb */
*dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
dst += dstbpp;
src += srcbpp;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
}
} else {
/* SET_ALPHA */
unsigned alpha = info->a;
int alphashift = alpha << 24;
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
unsigned s0 = src[0];
unsigned s1 = src[1];
unsigned s2 = src[2];
/* inversed, compared to Blit_3or4_to_3or4__same_rgb */
*dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
dst += dstbpp;
src += srcbpp;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
}
}
} else {
/* NO_ALPHA */
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
unsigned s0 = src[0];
unsigned s1 = src[1];
unsigned s2 = src[2];
/* inversed, compared to Blit_3or4_to_3or4__same_rgb */
*dst32 = (s0 << 16) | (s1 << 8) | (s2);
dst += dstbpp;
src += srcbpp;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
}
}
return;
}

/* Normal N to N optimized blitters */
#define NO_ALPHA 1
#define SET_ALPHA 2
Expand Down Expand Up @@ -2555,6 +2695,23 @@ static const struct blit_table normal_blit_2[] = {
};

static const struct blit_table normal_blit_3[] = {
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
/* 3->4 with same rgb triplet */
{0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
/* 3->4 with inversed rgb triplet */
{0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
/* 3->3 to switch RGB 24 <-> BGR 24 */
{0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
{0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
#endif
/* Default for 24-bit RGB source, never optimized */
{0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
};
Expand All @@ -2571,6 +2728,24 @@ static const struct blit_table normal_blit_4[] = {
{0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
#endif
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
/* 4->3 with same rgb triplet */
{0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
/* 4->3 with inversed rgb triplet */
{0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
#endif
/* 4->4 with inversed rgb triplet, and COPY_ALPHA to switch ABGR8888 <-> ARGB8888 */
{0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA | COPY_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA | COPY_ALPHA},
/* RBG 888 and RGB 565 */
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
0, Blit_RGB888_RGB565, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
Expand Down Expand Up @@ -2623,7 +2798,7 @@ SDL_CalculateBlitN(SDL_Surface * surface)
}
} else {
/* Now the meat, choose the blitter we want */
int a_need = NO_ALPHA;
Uint32 a_need = NO_ALPHA;
if (dstfmt->Amask)
a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
table = normal_blit[srcfmt->BytesPerPixel - 1];
Expand Down

0 comments on commit 1128d57

Please sign in to comment.