Skip to content

Commit

Permalink
Faster blit colorkey or not, applied to bpp: 3->4 and 4->3
Browse files Browse the repository at this point in the history
===== BlitNtoNKey ========
ABGR8888 -> BGR24 :  faster x3   (2168709 -> 562738)
ABGR8888 -> RGB24 :  faster x3   (2165055 -> 567458)

ARGB8888 -> BGR24 :  faster x3   (2169109 -> 564338)
ARGB8888 -> RGB24 :  faster x3   (2165266 -> 567081)

BGR24 -> ABGR8888 :  faster x3   (2997675 -> 891636)
BGR24 -> ARGB8888 :  faster x3   (2985449 -> 892028)
BGR24 -> BGR888 :  faster x3   (2961611 -> 891913)
BGR24 -> BGRA8888 :  faster x3   (3116305 -> 891534)
BGR24 -> BGRX8888 :  faster x3   (3179654 -> 896978)
BGR24 -> RGB888 :  faster x3   (2968191 -> 895112)
BGR24 -> RGBA8888 :  faster x3   (2998428 -> 893147)
BGR24 -> RGBX8888 :  faster x3   (2976529 -> 914853)

BGR888 -> BGR24 :  faster x3   (2161906 -> 563921)
BGR888 -> RGB24 :  faster x3   (2168228 -> 566634)

BGRA8888 -> BGR24 :  faster x4   (2270501 -> 561873)
BGRA8888 -> RGB24 :  faster x3   (2163179 -> 567330)

BGRX8888 -> BGR24 :  faster x3   (2162911 -> 562322)
BGRX8888 -> RGB24 :  faster x3   (2169617 -> 570927)

RGB24 -> ABGR8888 :  faster x3   (2977061 -> 925975)
RGB24 -> ARGB8888 :  faster x3   (2978148 -> 923680)
RGB24 -> BGR888 :  faster x3   (3001413 -> 935074)
RGB24 -> BGRA8888 :  faster x3   (2959003 -> 924096)
RGB24 -> BGRX8888 :  faster x3   (2965240 -> 927100)
RGB24 -> RGB888 :  faster x3   (2983921 -> 926063)
RGB24 -> RGBA8888 :  faster x3   (2963908 -> 925457)
RGB24 -> RGBX8888 :  faster x3   (2967957 -> 931700)

RGB888 -> BGR24 :  faster x3   (2173299 -> 563226)
RGB888 -> RGB24 :  faster x3   (2218374 -> 566164)

RGBA8888 -> BGR24 :  faster x3   (2166355 -> 561381)
RGBA8888 -> RGB24 :  faster x3   (2170322 -> 566729)

RGBX8888 -> BGR24 :  faster x3   (2168524 -> 564072)
RGBX8888 -> RGB24 :  faster x3   (2163680 -> 566956)

===== BlitNtoN ========

BGR24 -> BGRA8888 :  faster x3   (2458958 -> 797557)
BGR24 -> BGRX8888 :  faster x3   (2486085 -> 797745)
BGR24 -> RGBA8888 :  faster x3   (2422116 -> 797637)
BGR24 -> RGBX8888 :  faster x3   (2454426 -> 799085)

BGRA8888 -> BGR24 :  faster x4   (2468206 -> 524486)
BGRA8888 -> RGB24 :  faster x4   (2463581 -> 525561)

BGRX8888 -> BGR24 :  faster x4   (2583355 -> 524468)
BGRX8888 -> RGB24 :  faster x4   (2477242 -> 524284)

RGB24 -> BGRA8888 :  faster x2   (2453414 -> 818415)
RGB24 -> BGRX8888 :  faster x3   (2414915 -> 800863)
RGB24 -> RGBA8888 :  faster x3   (2461114 -> 798148)
RGB24 -> RGBX8888 :  faster x3   (2400922 -> 799203)

RGBA8888 -> BGR24 :  faster x4   (2494472 -> 526428)
RGBA8888 -> RGB24 :  faster x4   (2462260 -> 526791)

RGBX8888 -> BGR24 :  faster x4   (2541115 -> 524390)
RGBX8888 -> RGB24 :  faster x4   (2469059 -> 525416)
  • Loading branch information
1bsyl committed Feb 9, 2019
1 parent 604b44f commit f6a2ae6
Showing 1 changed file with 133 additions and 4 deletions.
137 changes: 133 additions & 4 deletions src/video/SDL_blit_N.c
Expand Up @@ -2219,7 +2219,7 @@ BlitNtoN(SDL_BlitInfo * info)
int dstbpp = dstfmt->BytesPerPixel;
unsigned alpha = dstfmt->Amask ? info->a : 0;

/* Any src/dst 8888, not CopyAlpha, no ARGB2101010 */
/* Blit with permutation: 4->4 */
if (srcbpp == 4 && dstbpp == 4 &&
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
Expand Down Expand Up @@ -2252,6 +2252,64 @@ BlitNtoN(SDL_BlitInfo * info)
return;
}

/* Blit with permutation: 4->3 */
if (srcbpp == 4 && dstbpp == 3 &&
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {

Uint32 *src32 = (Uint32*)src;

/* Find the appropriate permutation */
int r, g, b, a;
get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, NULL);

while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint8 *s8 = (Uint8 *)src32;
dst[0] = s8[r];
dst[1] = s8[g];
dst[2] = s8[b];
++src32;
dst += 3;
}, width);
/* *INDENT-ON* */
src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
dst += dstskip;
}
return;
}

/* Blit with permutation: 3->4 */
if (srcbpp == 3 && dstbpp == 4 &&
dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {

Uint32 *dst32 = (Uint32*)dst;

/* Find the appropriate permutation */
int missing = 0, r, g, b, a;
get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, &missing);

while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint8 *d8 = (Uint8 *)dst32;
d8[0] = src[r];
d8[1] = src[g];
d8[2] = src[b];
d8[3] = src[a];
d8[missing] = alpha;
src += 3;
++dst32;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
}
return;
}

while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
Expand Down Expand Up @@ -2287,7 +2345,7 @@ BlitNtoNCopyAlpha(SDL_BlitInfo * info)
int dstbpp = dstfmt->BytesPerPixel;
int c;

/* Any src/dst 8888 for CopyAlpha, no ARGB2101010 */
/* Blit with permutation: 4->4 */
if (srcbpp == 4 && dstbpp == 4 &&
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
Expand Down Expand Up @@ -2499,7 +2557,7 @@ BlitNtoNKey(SDL_BlitInfo * info)
}
}

/* Any src/dst 8888, not CopyAlpha, no ARGB2101010 */
/* Blit with permutation: 4->4 */
if (srcbpp == 4 && dstbpp == 4 &&
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
Expand Down Expand Up @@ -2598,6 +2656,77 @@ BlitNtoNKey(SDL_BlitInfo * info)
return;
}

/* Blit with permutation: 4->3 */
if (srcbpp == 4 && dstbpp == 3 &&
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {

Uint32 *src32 = (Uint32*)src;

/* Find the appropriate permutation */
int r, g, b, a;
get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, NULL);

while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
if ((*src32 & rgbmask) != ckey) {
Uint8 *s8 = (Uint8 *)src32;
dst[0] = s8[r];
dst[1] = s8[g];
dst[2] = s8[b];
}
++src32;
dst += 3;
}, width);
/* *INDENT-ON* */
src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
dst += dstskip;
}
return;
}

/* Blit with permutation: 3->4 */
if (srcbpp == 3 && dstbpp == 4 &&
dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {

Uint32 *dst32 = (Uint32*)dst;

Uint8 k0 = ckey & 0xFF;
Uint8 k1 = (ckey >> 8) & 0xFF;
Uint8 k2 = (ckey >> 16) & 0xFF;

/* Find the appropriate permutation */
int missing = 0, r, g, b, a;
get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, &missing);

while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint8 s0 = src[0];
Uint8 s1 = src[1];
Uint8 s2 = src[2];

if (k0 != s0 || k1 != s1 || k2 != s2) {
Uint8 *d8 = (Uint8 *)dst32;
d8[0] = src[r];
d8[1] = src[g];
d8[2] = src[b];
d8[3] = src[a];
d8[missing] = alpha;
}
src += 3;
++dst32;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);

}
return;
}

while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
Expand Down Expand Up @@ -2674,7 +2803,7 @@ BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
return;
}

/* Any src/dst 8888 for CopyAlpha, no ARGB2101010 */
/* Blit with permutation: 4->4 */
if (srcbpp == 4 && dstbpp == 4 &&
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
Expand Down

0 comments on commit f6a2ae6

Please sign in to comment.