Skip to content

Commit

Permalink
metal: SDL_RenderFillRects uses one draw call per 16k rectangles (wit…
Browse files Browse the repository at this point in the history
…hin the given FillRects call), instead of one draw call per rectangle. Reduces CPU usage when drawing many rectangles.
  • Loading branch information
slime73 committed Nov 22, 2018
1 parent 5f98051 commit 4a58722
Showing 1 changed file with 61 additions and 18 deletions.
79 changes: 61 additions & 18 deletions src/render/metal/SDL_render_metal.m
Expand Up @@ -117,6 +117,7 @@ @interface METAL_RenderData : NSObject
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplernearest;
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplerlinear;
@property (nonatomic, retain) id<MTLBuffer> mtlbufconstants;
@property (nonatomic, retain) id<MTLBuffer> mtlbufquadindices;
@property (nonatomic, retain) CAMetalLayer *mtllayer;
@property (nonatomic, retain) MTLRenderPassDescriptor *mtlpassdesc;
@property (nonatomic, assign) METAL_ShaderPipelines *activepipelines;
Expand All @@ -137,6 +138,7 @@ - (void)dealloc
[_mtlsamplernearest release];
[_mtlsamplerlinear release];
[_mtlbufconstants release];
[_mtlbufquadindices release];
[_mtllayer release];
[_mtlpassdesc release];
[super dealloc];
Expand Down Expand Up @@ -794,7 +796,6 @@ - (void)dealloc
static int
METAL_QueueFillRects(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_FRect * rects, int count)
{
// !!! FIXME: use an index buffer
const size_t vertlen = (sizeof (float) * 8) * count;
float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
if (!verts) {
Expand All @@ -803,6 +804,11 @@ - (void)dealloc

cmd->data.draw.count = count;

/* Quads in the following vertex order (matches the quad index buffer):
* 1---3
* | \ |
* 0---2
*/
for (int i = 0; i < count; i++, rects++) {
if ((rects->w <= 0.0f) || (rects->h <= 0.0f)) {
cmd->data.draw.count--;
Expand All @@ -829,9 +835,8 @@ - (void)dealloc
METAL_QueueCopy(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture * texture,
const SDL_Rect * srcrect, const SDL_FRect * dstrect)
{
METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
const float texw = (float) texturedata.mtltexture.width;
const float texh = (float) texturedata.mtltexture.height;
const float texw = (float) texture->w;
const float texh = (float) texture->h;
// !!! FIXME: use an index buffer
const size_t vertlen = (sizeof (float) * 16);
float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
Expand Down Expand Up @@ -867,9 +872,8 @@ - (void)dealloc
const SDL_Rect * srcquad, const SDL_FRect * dstrect,
const double angle, const SDL_FPoint *center, const SDL_RendererFlip flip)
{
METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
const float texw = (float) texturedata.mtltexture.width;
const float texh = (float) texturedata.mtltexture.height;
const float texw = (float) texture->w;
const float texh = (float) texture->h;
const float rads = (float)(M_PI * (float) angle / 180.0f);
const float c = cosf(rads), s = sinf(rads);
float minu, maxu, minv, maxv;
Expand Down Expand Up @@ -1159,10 +1163,19 @@ - (void)dealloc

case SDL_RENDERCMD_FILL_RECTS: {
const size_t count = cmd->data.draw.count;
size_t start = 0;
const size_t maxcount = UINT16_MAX / 6;
SetDrawState(renderer, cmd, SDL_METAL_FRAGMENT_SOLID, CONSTANTS_OFFSET_IDENTITY, mtlbufvertex, &statecache);
for (size_t i = 0; i < count; i++, start += 4) { // !!! FIXME: can we do all of these this with a single draw call, using MTLPrimitiveTypeTriangle and an index buffer?
[data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:start vertexCount:4];
/* Our index buffer has 16 bit indices, so we can only draw 65k
* vertices (16k rects) at a time. */
for (size_t i = 0; i < count; i += maxcount) {
/* Set the vertex buffer offset for our current positions.
* The vertex buffer itself was bound in SetDrawState. */
[data.mtlcmdencoder setVertexBufferOffset:cmd->data.draw.first + i*sizeof(float)*8 atIndex:0];
[data.mtlcmdencoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
indexCount:SDL_min(maxcount, count - i) * 6
indexType:MTLIndexTypeUInt16
indexBuffer:data.mtlbufquadindices
indexBufferOffset:0];
}
break;
}
Expand Down Expand Up @@ -1424,11 +1437,6 @@ - (void)dealloc
#if !__has_feature(objc_arc)
[mtlbufconstantstaging autorelease];
#endif
mtlbufconstantstaging.label = @"SDL constant staging data";

id<MTLBuffer> mtlbufconstants = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModePrivate];
data.mtlbufconstants = mtlbufconstants;
data.mtlbufconstants.label = @"SDL constant data";

char *constantdata = [mtlbufconstantstaging contents];
SDL_memcpy(constantdata + CONSTANTS_OFFSET_IDENTITY, identitytransform, sizeof(identitytransform));
Expand All @@ -1437,10 +1445,42 @@ - (void)dealloc
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT601, decodetransformBT601, sizeof(decodetransformBT601));
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT709, decodetransformBT709, sizeof(decodetransformBT709));

int quadcount = UINT16_MAX / 4;
size_t indicessize = sizeof(UInt16) * quadcount * 6;
id<MTLBuffer> mtlbufquadindicesstaging = [data.mtldevice newBufferWithLength:indicessize options:MTLResourceStorageModeShared];
#if !__has_feature(objc_arc)
[mtlbufquadindicesstaging autorelease];
#endif

/* Quads in the following vertex order (matches the FillRects vertices):
* 1---3
* | \ |
* 0---2
*/
UInt16 *indexdata = [mtlbufquadindicesstaging contents];
for (int i = 0; i < quadcount; i++) {
indexdata[i * 6 + 0] = i * 4 + 0;
indexdata[i * 6 + 1] = i * 4 + 1;
indexdata[i * 6 + 2] = i * 4 + 2;

indexdata[i * 6 + 3] = i * 4 + 2;
indexdata[i * 6 + 4] = i * 4 + 1;
indexdata[i * 6 + 5] = i * 4 + 3;
}

id<MTLBuffer> mtlbufconstants = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModePrivate];
data.mtlbufconstants = mtlbufconstants;
data.mtlbufconstants.label = @"SDL constant data";

id<MTLBuffer> mtlbufquadindices = [data.mtldevice newBufferWithLength:indicessize options:MTLResourceStorageModePrivate];
data.mtlbufquadindices = mtlbufquadindices;
data.mtlbufquadindices.label = @"SDL quad index buffer";

id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
id<MTLBlitCommandEncoder> blitcmd = [cmdbuffer blitCommandEncoder];

[blitcmd copyFromBuffer:mtlbufconstantstaging sourceOffset:0 toBuffer:data.mtlbufconstants destinationOffset:0 size:CONSTANTS_LENGTH];
[blitcmd copyFromBuffer:mtlbufconstantstaging sourceOffset:0 toBuffer:mtlbufconstants destinationOffset:0 size:CONSTANTS_LENGTH];
[blitcmd copyFromBuffer:mtlbufquadindicesstaging sourceOffset:0 toBuffer:mtlbufquadindices destinationOffset:0 size:indicessize];

[blitcmd endEncoding];
[cmdbuffer commit];
Expand Down Expand Up @@ -1503,8 +1543,10 @@ - (void)dealloc
#endif
#else
#ifdef __IPHONE_11_0
if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
maxtexsize = 16384;
if (@available(iOS 11.0, *)) {
if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
maxtexsize = 16384;
}
} else
#endif
#ifdef __IPHONE_10_0
Expand All @@ -1529,6 +1571,7 @@ - (void)dealloc
[mtlsamplernearest release];
[mtlsamplerlinear release];
[mtlbufconstants release];
[mtlbufquadindices release];
[view release];
[data release];
[mtldevice release];
Expand Down

0 comments on commit 4a58722

Please sign in to comment.