metal: avoid an extra buffer allocation and GPU data copy in RunCommandQueue, it's not needed. Improves overall performance.
authorAlex Szpakowski <slime73@gmail.com>
Thu, 01 Nov 2018 20:24:21 -0300
changeset 1238545038f8422c9
parent 12384 b1f5162fd621
child 12386 a80816d0e9d2
metal: avoid an extra buffer allocation and GPU data copy in RunCommandQueue, it's not needed. Improves overall performance.
src/render/metal/SDL_render_metal.m
     1.1 --- a/src/render/metal/SDL_render_metal.m	Thu Nov 01 19:49:01 2018 -0300
     1.2 +++ b/src/render/metal/SDL_render_metal.m	Thu Nov 01 20:24:21 2018 -0300
     1.3 @@ -1073,24 +1073,19 @@
     1.4  
     1.5      // !!! FIXME: have a ring of pre-made MTLBuffers we cycle through? How expensive is creation?
     1.6      if (vertsize > 0) {
     1.7 -        id<MTLBuffer> mtlbufvertexstaging = [data.mtldevice newBufferWithLength:vertsize options:MTLResourceStorageModeShared];
     1.8 -        #if !__has_feature(objc_arc)
     1.9 -        [mtlbufvertexstaging autorelease];
    1.10 -        #endif
    1.11 -        mtlbufvertexstaging.label = @"SDL vertex staging data";
    1.12 -        SDL_memcpy([mtlbufvertexstaging contents], vertices, vertsize);
    1.13 -
    1.14 -        // Move our new vertex buffer from system RAM to GPU memory so any draw calls can use it.
    1.15 -        mtlbufvertex = [data.mtldevice newBufferWithLength:vertsize options:MTLResourceStorageModePrivate];
    1.16 +        /* We can memcpy to a shared buffer from the CPU and read it from the GPU
    1.17 +         * without any extra copying. It's a bit slower on macOS to read shared
    1.18 +         * data from the GPU than to read managed/private data, but we avoid the
    1.19 +         * cost of copying the data and the code's simpler. Apple's best
    1.20 +         * practices guide recommends this approach for streamed vertex data.
    1.21 +         * TODO: this buffer is also used for constants. Is performance still
    1.22 +         * good for those, or should we have a managed buffer for them? */
    1.23 +        mtlbufvertex = [data.mtldevice newBufferWithLength:vertsize options:MTLResourceStorageModeShared];
    1.24          #if !__has_feature(objc_arc)
    1.25          [mtlbufvertex autorelease];
    1.26          #endif
    1.27          mtlbufvertex.label = @"SDL vertex data";
    1.28 -        id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
    1.29 -        id<MTLBlitCommandEncoder> blitcmd = [cmdbuffer blitCommandEncoder];
    1.30 -        [blitcmd copyFromBuffer:mtlbufvertexstaging sourceOffset:0 toBuffer:mtlbufvertex destinationOffset:0 size:vertsize];
    1.31 -        [blitcmd endEncoding];
    1.32 -        [cmdbuffer commit];
    1.33 +        SDL_memcpy([mtlbufvertex contents], vertices, vertsize);
    1.34      }
    1.35  
    1.36      // If there's a command buffer here unexpectedly (app requested one?). Commit it so we can start fresh.