Skip to content

Commit

Permalink
ARM: assembly optimization for SDL_FillRect
Browse files Browse the repository at this point in the history
  • Loading branch information
bavison committed Oct 25, 2019
1 parent 7ac733f commit becc649
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 0 deletions.
23 changes: 23 additions & 0 deletions src/video/SDL_fillrect.c
Expand Up @@ -22,6 +22,7 @@

#include "SDL_video.h"
#include "SDL_blit.h"
#include "SDL_cpuinfo.h"


#ifdef __SSE__
Expand Down Expand Up @@ -280,6 +281,28 @@ SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count,
return SDL_SetError("SDL_FillRects() passed NULL rects");
}

#if SDL_ARM_SIMD_BLITTERS
if (SDL_HasARMSIMD() && dst->format->BytesPerPixel != 3) {
void FillRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
void FillRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
void FillRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
switch (dst->format->BytesPerPixel) {
case 1:
FillRect8ARMSIMDAsm(rect->w, rect->h, (uint8_t *) pixels, dst->pitch >> 0, color);
break;
case 2:
FillRect16ARMSIMDAsm(rect->w, rect->h, (uint16_t *) pixels, dst->pitch >> 1, color);
break;
case 4:
FillRect32ARMSIMDAsm(rect->w, rect->h, (uint32_t *) pixels, dst->pitch >> 2, color);
break;
}

SDL_UnlockSurface(dst);
return(0);
}
#endif

switch (dst->format->BytesPerPixel) {
case 1:
{
Expand Down
68 changes: 68 additions & 0 deletions src/video/arm/pixman-arm-simd-asm.S
Expand Up @@ -47,6 +47,74 @@

/******************************************************************************/

.macro FillRect32_init
ldr SRC, [sp, #ARGS_STACK_OFFSET]
mov STRIDE_S, SRC
mov MASK, SRC
mov STRIDE_M, SRC
.endm

.macro FillRect16_init
ldrh SRC, [sp, #ARGS_STACK_OFFSET]
orr SRC, SRC, lsl #16
mov STRIDE_S, SRC
mov MASK, SRC
mov STRIDE_M, SRC
.endm

.macro FillRect8_init
ldrb SRC, [sp, #ARGS_STACK_OFFSET]
orr SRC, SRC, lsl #8
orr SRC, SRC, lsl #16
mov STRIDE_S, SRC
mov MASK, SRC
mov STRIDE_M, SRC
.endm

.macro FillRect_process_tail cond, numbytes, firstreg
WK4 .req SRC
WK5 .req STRIDE_S
WK6 .req MASK
WK7 .req STRIDE_M
pixst cond, numbytes, 4, DST
.unreq WK4
.unreq WK5
.unreq WK6
.unreq WK7
.endm

generate_composite_function \
FillRect32ARMSIMDAsm, 0, 0, 32, \
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
0, /* prefetch distance doesn't apply */ \
FillRect32_init \
nop_macro, /* newline */ \
nop_macro /* cleanup */ \
nop_macro /* process head */ \
FillRect_process_tail

generate_composite_function \
FillRect16ARMSIMDAsm, 0, 0, 16, \
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
0, /* prefetch distance doesn't apply */ \
FillRect16_init \
nop_macro, /* newline */ \
nop_macro /* cleanup */ \
nop_macro /* process head */ \
FillRect_process_tail

generate_composite_function \
FillRect8ARMSIMDAsm, 0, 0, 8, \
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
0, /* prefetch distance doesn't apply */ \
FillRect8_init \
nop_macro, /* newline */ \
nop_macro /* cleanup */ \
nop_macro /* process head */ \
FillRect_process_tail

/******************************************************************************/

/* This differs from the over_8888_8888 routine in Pixman in that the destination
* alpha component is always left unchanged, and RGB components are not
* premultiplied by alpha. It differs from BlitRGBtoRGBPixelAlpha in that
Expand Down

0 comments on commit becc649

Please sign in to comment.