Skip to content

Commit

Permalink
ARM: SIMD assembly optimization for BGR-to-RGB 32bpp normal blits
Browse files Browse the repository at this point in the history
  • Loading branch information
bavison committed Oct 25, 2019
1 parent 8425d9d commit 7ac733f
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 2 deletions.
26 changes: 24 additions & 2 deletions src/video/SDL_blit_N.c
Expand Up @@ -41,7 +41,8 @@
enum blit_features {
BLIT_FEATURE_HAS_MMX = 1,
BLIT_FEATURE_HAS_ALTIVEC = 2,
BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH = 4
BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH = 4,
BLIT_FEATURE_HAS_ARM_SIMD = 8
};

#if SDL_ALTIVEC_BLITTERS
Expand Down Expand Up @@ -931,7 +932,24 @@ GetBlitFeatures(void)
#endif
#else
/* Feature 1 is has-MMX */
#define GetBlitFeatures() (SDL_HasMMX() ? BLIT_FEATURE_HAS_MMX : 0)
#define GetBlitFeatures() ((SDL_HasMMX() ? BLIT_FEATURE_HAS_MMX : 0) | (SDL_HasARMSIMD() ? BLIT_FEATURE_HAS_ARM_SIMD : 0))
#endif

#if SDL_ARM_SIMD_BLITTERS
void Blit_BGR888_RGB888ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);

static void
Blit_BGR888_RGB888ARMSIMD(SDL_BlitInfo * info)
{
int32_t width = info->dst_w;
int32_t height = info->dst_h;
uint32_t *dstp = (uint32_t *)info->dst;
int32_t dststride = width + (info->dst_skip >> 2);
uint32_t *srcp = (uint32_t *)info->src;
int32_t srcstride = width + (info->src_skip >> 2);

Blit_BGR888_RGB888ARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
}
#endif

/* This is now endian dependent */
Expand Down Expand Up @@ -3269,6 +3287,10 @@ static const struct blit_table normal_blit_4[] = {
/* has-altivec */
{0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB888_RGB565Altivec, NO_ALPHA},
#endif
#if SDL_ARM_SIMD_BLITTERS
{0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
BLIT_FEATURE_HAS_ARM_SIMD, Blit_BGR888_RGB888ARMSIMD, NO_ALPHA | COPY_ALPHA },
#endif
/* 4->3 with same rgb triplet */
{0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
Expand Down
42 changes: 42 additions & 0 deletions src/video/arm/pixman-arm-simd-asm.S
Expand Up @@ -363,3 +363,45 @@ generate_composite_function \
nop_macro, /* cleanup */ \
ARGBto565PixelAlpha_process_head, \
ARGBto565PixelAlpha_process_tail

/******************************************************************************/

.macro BGR888toRGB888_1pixel cond, reg, tmp
uxtb16&cond tmp, WK&reg, ror #8
uxtb16&cond WK&reg, WK&reg, ror #16
orr&cond WK&reg, WK&reg, tmp, lsl #8
.endm

.macro BGR888toRGB888_2pixels cond, reg1, reg2, tmp1, tmp2
uxtb16&cond tmp1, WK&reg1, ror #8
uxtb16&cond WK&reg1, WK&reg1, ror #16
uxtb16&cond tmp2, WK&reg2, ror #8
uxtb16&cond WK&reg2, WK&reg2, ror #16
orr&cond WK&reg1, WK&reg1, tmp1, lsl #8
orr&cond WK&reg2, WK&reg2, tmp2, lsl #8
.endm

.macro BGR888toRGB888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
pixld cond, numbytes, firstreg, SRC, unaligned_src
.endm

.macro BGR888toRGB888_process_tail cond, numbytes, firstreg
.if numbytes >= 8
BGR888toRGB888_2pixels cond, %(firstreg+0), %(firstreg+1), MASK, STRIDE_M
.if numbytes == 16
BGR888toRGB888_2pixels cond, %(firstreg+2), %(firstreg+3), MASK, STRIDE_M
.endif
.else @ numbytes == 4
BGR888toRGB888_1pixel cond, %(firstreg+0), MASK
.endif
.endm

generate_composite_function \
Blit_BGR888_RGB888ARMSIMDAsm, 32, 0, 32, \
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_SCRATCH, \
2, /* prefetch distance */ \
nop_macro, /* init */ \
nop_macro, /* newline */ \
nop_macro, /* cleanup */ \
BGR888toRGB888_process_head, \
BGR888toRGB888_process_tail

0 comments on commit 7ac733f

Please sign in to comment.