metal: avoid an extra buffer allocation and GPU data copy in RunCommandQueue, it's not needed. Improves overall performance.
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
21 #include "../../SDL_internal.h"
23 #if SDL_VIDEO_RENDER_METAL && !SDL_RENDER_DISABLED
25 #include "SDL_hints.h"
27 #include "SDL_assert.h"
28 #include "SDL_syswm.h"
29 #include "../SDL_sysrender.h"
32 #include "../../video/cocoa/SDL_cocoametalview.h"
34 #include "../../video/uikit/SDL_uikitmetalview.h"
36 #include <Availability.h>
37 #import <Metal/Metal.h>
38 #import <QuartzCore/CAMetalLayer.h>
40 /* Regenerate these with build-metal-shaders.sh */
42 #include "SDL_shaders_metal_osx.h"
44 #include "SDL_shaders_metal_ios.h"
47 /* Apple Metal renderer implementation */
49 /* macOS requires constants in a buffer to have a 256 byte alignment. */
51 #define CONSTANT_ALIGN 256
53 #define CONSTANT_ALIGN 4
56 #define ALIGN_CONSTANTS(size) ((size + CONSTANT_ALIGN - 1) & (~(CONSTANT_ALIGN - 1)))
58 static const size_t CONSTANTS_OFFSET_INVALID = 0xFFFFFFFF;
59 static const size_t CONSTANTS_OFFSET_IDENTITY = 0;
60 static const size_t CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM = ALIGN_CONSTANTS(CONSTANTS_OFFSET_IDENTITY + sizeof(float) * 16);
61 static const size_t CONSTANTS_OFFSET_DECODE_JPEG = ALIGN_CONSTANTS(CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM + sizeof(float) * 16);
62 static const size_t CONSTANTS_OFFSET_DECODE_BT601 = ALIGN_CONSTANTS(CONSTANTS_OFFSET_DECODE_JPEG + sizeof(float) * 4 * 4);
63 static const size_t CONSTANTS_OFFSET_DECODE_BT709 = ALIGN_CONSTANTS(CONSTANTS_OFFSET_DECODE_BT601 + sizeof(float) * 4 * 4);
64 static const size_t CONSTANTS_LENGTH = CONSTANTS_OFFSET_DECODE_BT709 + sizeof(float) * 6;
66 typedef enum SDL_MetalVertexFunction
68 SDL_METAL_VERTEX_SOLID,
69 SDL_METAL_VERTEX_COPY,
70 } SDL_MetalVertexFunction;
72 typedef enum SDL_MetalFragmentFunction
74 SDL_METAL_FRAGMENT_SOLID = 0,
75 SDL_METAL_FRAGMENT_COPY,
76 SDL_METAL_FRAGMENT_YUV,
77 SDL_METAL_FRAGMENT_NV12,
78 SDL_METAL_FRAGMENT_NV21,
79 SDL_METAL_FRAGMENT_COUNT,
80 } SDL_MetalFragmentFunction;
82 typedef struct METAL_PipelineState
84 SDL_BlendMode blendMode;
86 } METAL_PipelineState;
88 typedef struct METAL_PipelineCache
90 METAL_PipelineState *states;
92 SDL_MetalVertexFunction vertexFunction;
93 SDL_MetalFragmentFunction fragmentFunction;
94 MTLPixelFormat renderTargetFormat;
96 } METAL_PipelineCache;
98 /* Each shader combination used by drawing functions has a separate pipeline
99 * cache, and we have a separate list of caches for each render target pixel
100 * format. This is more efficient than iterating over a global cache to find
101 * the pipeline based on the specified shader combination and RT pixel format,
102 * since we know what the RT pixel format is when we set the render target, and
103 * we know what the shader combination is inside each drawing function's code. */
104 typedef struct METAL_ShaderPipelines
106 MTLPixelFormat renderTargetFormat;
107 METAL_PipelineCache caches[SDL_METAL_FRAGMENT_COUNT];
108 } METAL_ShaderPipelines;
110 @interface METAL_RenderData : NSObject
111 @property (nonatomic, retain) id<MTLDevice> mtldevice;
112 @property (nonatomic, retain) id<MTLCommandQueue> mtlcmdqueue;
113 @property (nonatomic, retain) id<MTLCommandBuffer> mtlcmdbuffer;
114 @property (nonatomic, retain) id<MTLRenderCommandEncoder> mtlcmdencoder;
115 @property (nonatomic, retain) id<MTLLibrary> mtllibrary;
116 @property (nonatomic, retain) id<CAMetalDrawable> mtlbackbuffer;
117 @property (nonatomic, retain) id<MTLSamplerState> mtlsamplernearest;
118 @property (nonatomic, retain) id<MTLSamplerState> mtlsamplerlinear;
119 @property (nonatomic, retain) id<MTLBuffer> mtlbufconstants;
120 @property (nonatomic, retain) CAMetalLayer *mtllayer;
121 @property (nonatomic, retain) MTLRenderPassDescriptor *mtlpassdesc;
122 @property (nonatomic, assign) METAL_ShaderPipelines *activepipelines;
123 @property (nonatomic, assign) METAL_ShaderPipelines *allpipelines;
124 @property (nonatomic, assign) int pipelinescount;
127 @implementation METAL_RenderData
128 #if !__has_feature(objc_arc)
131 [_mtldevice release];
132 [_mtlcmdqueue release];
133 [_mtlcmdbuffer release];
134 [_mtlcmdencoder release];
135 [_mtllibrary release];
136 [_mtlbackbuffer release];
137 [_mtlsamplernearest release];
138 [_mtlsamplerlinear release];
139 [_mtlbufconstants release];
141 [_mtlpassdesc release];
147 @interface METAL_TextureData : NSObject
148 @property (nonatomic, retain) id<MTLTexture> mtltexture;
149 @property (nonatomic, retain) id<MTLTexture> mtltexture_uv;
150 @property (nonatomic, retain) id<MTLSamplerState> mtlsampler;
151 @property (nonatomic, assign) SDL_MetalFragmentFunction fragmentFunction;
152 @property (nonatomic, assign) BOOL yuv;
153 @property (nonatomic, assign) BOOL nv12;
154 @property (nonatomic, assign) size_t conversionBufferOffset;
157 @implementation METAL_TextureData
158 #if !__has_feature(objc_arc)
161 [_mtltexture release];
162 [_mtltexture_uv release];
163 [_mtlsampler release];
170 IsMetalAvailable(const SDL_SysWMinfo *syswm)
172 if (syswm->subsystem != SDL_SYSWM_COCOA && syswm->subsystem != SDL_SYSWM_UIKIT) {
173 return SDL_SetError("Metal render target only supports Cocoa and UIKit video targets at the moment.");
176 // this checks a weak symbol.
177 #if (defined(__MACOSX__) && (MAC_OS_X_VERSION_MIN_REQUIRED < 101100))
178 if (MTLCreateSystemDefaultDevice == NULL) { // probably on 10.10 or lower.
179 return SDL_SetError("Metal framework not available on this system");
186 static const MTLBlendOperation invalidBlendOperation = (MTLBlendOperation)0xFFFFFFFF;
187 static const MTLBlendFactor invalidBlendFactor = (MTLBlendFactor)0xFFFFFFFF;
189 static MTLBlendOperation
190 GetBlendOperation(SDL_BlendOperation operation)
193 case SDL_BLENDOPERATION_ADD: return MTLBlendOperationAdd;
194 case SDL_BLENDOPERATION_SUBTRACT: return MTLBlendOperationSubtract;
195 case SDL_BLENDOPERATION_REV_SUBTRACT: return MTLBlendOperationReverseSubtract;
196 case SDL_BLENDOPERATION_MINIMUM: return MTLBlendOperationMin;
197 case SDL_BLENDOPERATION_MAXIMUM: return MTLBlendOperationMax;
198 default: return invalidBlendOperation;
202 static MTLBlendFactor
203 GetBlendFactor(SDL_BlendFactor factor)
206 case SDL_BLENDFACTOR_ZERO: return MTLBlendFactorZero;
207 case SDL_BLENDFACTOR_ONE: return MTLBlendFactorOne;
208 case SDL_BLENDFACTOR_SRC_COLOR: return MTLBlendFactorSourceColor;
209 case SDL_BLENDFACTOR_ONE_MINUS_SRC_COLOR: return MTLBlendFactorOneMinusSourceColor;
210 case SDL_BLENDFACTOR_SRC_ALPHA: return MTLBlendFactorSourceAlpha;
211 case SDL_BLENDFACTOR_ONE_MINUS_SRC_ALPHA: return MTLBlendFactorOneMinusSourceAlpha;
212 case SDL_BLENDFACTOR_DST_COLOR: return MTLBlendFactorDestinationColor;
213 case SDL_BLENDFACTOR_ONE_MINUS_DST_COLOR: return MTLBlendFactorOneMinusDestinationColor;
214 case SDL_BLENDFACTOR_DST_ALPHA: return MTLBlendFactorDestinationAlpha;
215 case SDL_BLENDFACTOR_ONE_MINUS_DST_ALPHA: return MTLBlendFactorOneMinusDestinationAlpha;
216 default: return invalidBlendFactor;
221 GetVertexFunctionName(SDL_MetalVertexFunction function)
224 case SDL_METAL_VERTEX_SOLID: return @"SDL_Solid_vertex";
225 case SDL_METAL_VERTEX_COPY: return @"SDL_Copy_vertex";
231 GetFragmentFunctionName(SDL_MetalFragmentFunction function)
234 case SDL_METAL_FRAGMENT_SOLID: return @"SDL_Solid_fragment";
235 case SDL_METAL_FRAGMENT_COPY: return @"SDL_Copy_fragment";
236 case SDL_METAL_FRAGMENT_YUV: return @"SDL_YUV_fragment";
237 case SDL_METAL_FRAGMENT_NV12: return @"SDL_NV12_fragment";
238 case SDL_METAL_FRAGMENT_NV21: return @"SDL_NV21_fragment";
243 static id<MTLRenderPipelineState>
244 MakePipelineState(METAL_RenderData *data, METAL_PipelineCache *cache,
245 NSString *blendlabel, SDL_BlendMode blendmode)
247 id<MTLFunction> mtlvertfn = [data.mtllibrary newFunctionWithName:GetVertexFunctionName(cache->vertexFunction)];
248 id<MTLFunction> mtlfragfn = [data.mtllibrary newFunctionWithName:GetFragmentFunctionName(cache->fragmentFunction)];
249 SDL_assert(mtlvertfn != nil);
250 SDL_assert(mtlfragfn != nil);
252 MTLRenderPipelineDescriptor *mtlpipedesc = [[MTLRenderPipelineDescriptor alloc] init];
253 mtlpipedesc.vertexFunction = mtlvertfn;
254 mtlpipedesc.fragmentFunction = mtlfragfn;
256 MTLRenderPipelineColorAttachmentDescriptor *rtdesc = mtlpipedesc.colorAttachments[0];
258 rtdesc.pixelFormat = cache->renderTargetFormat;
260 if (blendmode != SDL_BLENDMODE_NONE) {
261 rtdesc.blendingEnabled = YES;
262 rtdesc.sourceRGBBlendFactor = GetBlendFactor(SDL_GetBlendModeSrcColorFactor(blendmode));
263 rtdesc.destinationRGBBlendFactor = GetBlendFactor(SDL_GetBlendModeDstColorFactor(blendmode));
264 rtdesc.rgbBlendOperation = GetBlendOperation(SDL_GetBlendModeColorOperation(blendmode));
265 rtdesc.sourceAlphaBlendFactor = GetBlendFactor(SDL_GetBlendModeSrcAlphaFactor(blendmode));
266 rtdesc.destinationAlphaBlendFactor = GetBlendFactor(SDL_GetBlendModeDstAlphaFactor(blendmode));
267 rtdesc.alphaBlendOperation = GetBlendOperation(SDL_GetBlendModeAlphaOperation(blendmode));
269 rtdesc.blendingEnabled = NO;
272 mtlpipedesc.label = [@(cache->label) stringByAppendingString:blendlabel];
275 id<MTLRenderPipelineState> state = [data.mtldevice newRenderPipelineStateWithDescriptor:mtlpipedesc error:&err];
276 SDL_assert(err == nil);
278 METAL_PipelineState pipeline;
279 pipeline.blendMode = blendmode;
280 pipeline.pipe = (void *)CFBridgingRetain(state);
282 METAL_PipelineState *states = SDL_realloc(cache->states, (cache->count + 1) * sizeof(pipeline));
284 #if !__has_feature(objc_arc)
285 [mtlpipedesc release]; // !!! FIXME: can these be reused for each creation, or does the pipeline obtain it?
292 states[cache->count++] = pipeline;
293 cache->states = states;
294 return (__bridge id<MTLRenderPipelineState>)pipeline.pipe;
296 CFBridgingRelease(pipeline.pipe);
303 MakePipelineCache(METAL_RenderData *data, METAL_PipelineCache *cache, const char *label,
304 MTLPixelFormat rtformat, SDL_MetalVertexFunction vertfn, SDL_MetalFragmentFunction fragfn)
308 cache->vertexFunction = vertfn;
309 cache->fragmentFunction = fragfn;
310 cache->renderTargetFormat = rtformat;
311 cache->label = label;
313 /* Create pipeline states for the default blend modes. Custom blend modes
314 * will be added to the cache on-demand. */
315 MakePipelineState(data, cache, @" (blend=none)", SDL_BLENDMODE_NONE);
316 MakePipelineState(data, cache, @" (blend=blend)", SDL_BLENDMODE_BLEND);
317 MakePipelineState(data, cache, @" (blend=add)", SDL_BLENDMODE_ADD);
318 MakePipelineState(data, cache, @" (blend=mod)", SDL_BLENDMODE_MOD);
322 DestroyPipelineCache(METAL_PipelineCache *cache)
325 for (int i = 0; i < cache->count; i++) {
326 CFBridgingRelease(cache->states[i].pipe);
329 SDL_free(cache->states);
334 MakeShaderPipelines(METAL_RenderData *data, METAL_ShaderPipelines *pipelines, MTLPixelFormat rtformat)
336 SDL_zerop(pipelines);
338 pipelines->renderTargetFormat = rtformat;
340 MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_SOLID], "SDL primitives pipeline", rtformat, SDL_METAL_VERTEX_SOLID, SDL_METAL_FRAGMENT_SOLID);
341 MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_COPY], "SDL copy pipeline", rtformat, SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_COPY);
342 MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_YUV], "SDL YUV pipeline", rtformat, SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_YUV);
343 MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_NV12], "SDL NV12 pipeline", rtformat, SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_NV12);
344 MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_NV21], "SDL NV21 pipeline", rtformat, SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_NV21);
347 static METAL_ShaderPipelines *
348 ChooseShaderPipelines(METAL_RenderData *data, MTLPixelFormat rtformat)
350 METAL_ShaderPipelines *allpipelines = data.allpipelines;
351 int count = data.pipelinescount;
353 for (int i = 0; i < count; i++) {
354 if (allpipelines[i].renderTargetFormat == rtformat) {
355 return &allpipelines[i];
359 allpipelines = SDL_realloc(allpipelines, (count + 1) * sizeof(METAL_ShaderPipelines));
361 if (allpipelines == NULL) {
366 MakeShaderPipelines(data, &allpipelines[count], rtformat);
368 data.allpipelines = allpipelines;
369 data.pipelinescount = count + 1;
371 return &data.allpipelines[count];
375 DestroyAllPipelines(METAL_ShaderPipelines *allpipelines, int count)
377 if (allpipelines != NULL) {
378 for (int i = 0; i < count; i++) {
379 for (int cache = 0; cache < SDL_METAL_FRAGMENT_COUNT; cache++) {
380 DestroyPipelineCache(&allpipelines[i].caches[cache]);
384 SDL_free(allpipelines);
388 static inline id<MTLRenderPipelineState>
389 ChoosePipelineState(METAL_RenderData *data, METAL_ShaderPipelines *pipelines, SDL_MetalFragmentFunction fragfn, SDL_BlendMode blendmode)
391 METAL_PipelineCache *cache = &pipelines->caches[fragfn];
393 for (int i = 0; i < cache->count; i++) {
394 if (cache->states[i].blendMode == blendmode) {
395 return (__bridge id<MTLRenderPipelineState>)cache->states[i].pipe;
399 return MakePipelineState(data, cache, [NSString stringWithFormat:@" (blend=custom 0x%x)", blendmode], blendmode);
403 METAL_ActivateRenderCommandEncoder(SDL_Renderer * renderer, MTLLoadAction load, MTLClearColor *clear_color)
405 METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
407 /* Our SetRenderTarget just signals that the next render operation should
408 * set up a new render pass. This is where that work happens. */
409 if (data.mtlcmdencoder == nil) {
410 id<MTLTexture> mtltexture = nil;
412 if (renderer->target != NULL) {
413 METAL_TextureData *texdata = (__bridge METAL_TextureData *)renderer->target->driverdata;
414 mtltexture = texdata.mtltexture;
416 if (data.mtlbackbuffer == nil) {
417 /* The backbuffer's contents aren't guaranteed to persist after
418 * presenting, so we can leave it undefined when loading it. */
419 data.mtlbackbuffer = [data.mtllayer nextDrawable];
420 if (load == MTLLoadActionLoad) {
421 load = MTLLoadActionDontCare;
424 mtltexture = data.mtlbackbuffer.texture;
427 SDL_assert(mtltexture);
429 if (load == MTLLoadActionClear) {
430 SDL_assert(clear_color != NULL);
431 data.mtlpassdesc.colorAttachments[0].clearColor = *clear_color;
434 data.mtlpassdesc.colorAttachments[0].loadAction = load;
435 data.mtlpassdesc.colorAttachments[0].texture = mtltexture;
437 data.mtlcmdbuffer = [data.mtlcmdqueue commandBuffer];
438 data.mtlcmdencoder = [data.mtlcmdbuffer renderCommandEncoderWithDescriptor:data.mtlpassdesc];
440 if (data.mtlbackbuffer != nil && mtltexture == data.mtlbackbuffer.texture) {
441 data.mtlcmdencoder.label = @"SDL metal renderer backbuffer";
443 data.mtlcmdencoder.label = @"SDL metal renderer render target";
446 data.activepipelines = ChooseShaderPipelines(data, mtltexture.pixelFormat);
448 // make sure this has a definite place in the queue. This way it will
449 // execute reliably whether the app tries to make its own command buffers
450 // or whatever. This means we can _always_ batch rendering commands!
451 [data.mtlcmdbuffer enqueue];
456 METAL_WindowEvent(SDL_Renderer * renderer, const SDL_WindowEvent *event)
458 if (event->event == SDL_WINDOWEVENT_SHOWN ||
459 event->event == SDL_WINDOWEVENT_HIDDEN) {
460 // !!! FIXME: write me
465 METAL_GetOutputSize(SDL_Renderer * renderer, int *w, int *h)
467 METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
469 *w = (int)data.mtllayer.drawableSize.width;
472 *h = (int)data.mtllayer.drawableSize.height;
478 METAL_SupportsBlendMode(SDL_Renderer * renderer, SDL_BlendMode blendMode)
480 SDL_BlendFactor srcColorFactor = SDL_GetBlendModeSrcColorFactor(blendMode);
481 SDL_BlendFactor srcAlphaFactor = SDL_GetBlendModeSrcAlphaFactor(blendMode);
482 SDL_BlendOperation colorOperation = SDL_GetBlendModeColorOperation(blendMode);
483 SDL_BlendFactor dstColorFactor = SDL_GetBlendModeDstColorFactor(blendMode);
484 SDL_BlendFactor dstAlphaFactor = SDL_GetBlendModeDstAlphaFactor(blendMode);
485 SDL_BlendOperation alphaOperation = SDL_GetBlendModeAlphaOperation(blendMode);
487 if (GetBlendFactor(srcColorFactor) == invalidBlendFactor ||
488 GetBlendFactor(srcAlphaFactor) == invalidBlendFactor ||
489 GetBlendOperation(colorOperation) == invalidBlendOperation ||
490 GetBlendFactor(dstColorFactor) == invalidBlendFactor ||
491 GetBlendFactor(dstAlphaFactor) == invalidBlendFactor ||
492 GetBlendOperation(alphaOperation) == invalidBlendOperation) {
499 METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
501 METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
502 MTLPixelFormat pixfmt;
504 switch (texture->format) {
505 case SDL_PIXELFORMAT_ABGR8888:
506 pixfmt = MTLPixelFormatRGBA8Unorm;
508 case SDL_PIXELFORMAT_ARGB8888:
509 pixfmt = MTLPixelFormatBGRA8Unorm;
511 case SDL_PIXELFORMAT_IYUV:
512 case SDL_PIXELFORMAT_YV12:
513 case SDL_PIXELFORMAT_NV12:
514 case SDL_PIXELFORMAT_NV21:
515 pixfmt = MTLPixelFormatR8Unorm;
518 return SDL_SetError("Texture format %s not supported by Metal", SDL_GetPixelFormatName(texture->format));
521 MTLTextureDescriptor *mtltexdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pixfmt
522 width:(NSUInteger)texture->w height:(NSUInteger)texture->h mipmapped:NO];
524 /* Not available in iOS 8. */
525 if ([mtltexdesc respondsToSelector:@selector(usage)]) {
526 if (texture->access == SDL_TEXTUREACCESS_TARGET) {
527 mtltexdesc.usage = MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget;
529 mtltexdesc.usage = MTLTextureUsageShaderRead;
533 id<MTLTexture> mtltexture = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
534 if (mtltexture == nil) {
535 return SDL_SetError("Texture allocation failed");
538 id<MTLTexture> mtltexture_uv = nil;
540 BOOL yuv = (texture->format == SDL_PIXELFORMAT_IYUV) || (texture->format == SDL_PIXELFORMAT_YV12);
541 BOOL nv12 = (texture->format == SDL_PIXELFORMAT_NV12) || (texture->format == SDL_PIXELFORMAT_NV21);
544 mtltexdesc.pixelFormat = MTLPixelFormatR8Unorm;
545 mtltexdesc.width = (texture->w + 1) / 2;
546 mtltexdesc.height = (texture->h + 1) / 2;
547 mtltexdesc.textureType = MTLTextureType2DArray;
548 mtltexdesc.arrayLength = 2;
550 mtltexdesc.pixelFormat = MTLPixelFormatRG8Unorm;
551 mtltexdesc.width = (texture->w + 1) / 2;
552 mtltexdesc.height = (texture->h + 1) / 2;
556 mtltexture_uv = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
557 if (mtltexture_uv == nil) {
558 #if !__has_feature(objc_arc)
559 [mtltexture release];
561 return SDL_SetError("Texture allocation failed");
565 METAL_TextureData *texturedata = [[METAL_TextureData alloc] init];
566 if (texture->scaleMode == SDL_ScaleModeNearest) {
567 texturedata.mtlsampler = data.mtlsamplernearest;
569 texturedata.mtlsampler = data.mtlsamplerlinear;
571 texturedata.mtltexture = mtltexture;
572 texturedata.mtltexture_uv = mtltexture_uv;
574 texturedata.yuv = yuv;
575 texturedata.nv12 = nv12;
578 texturedata.fragmentFunction = SDL_METAL_FRAGMENT_YUV;
579 } else if (texture->format == SDL_PIXELFORMAT_NV12) {
580 texturedata.fragmentFunction = SDL_METAL_FRAGMENT_NV12;
581 } else if (texture->format == SDL_PIXELFORMAT_NV21) {
582 texturedata.fragmentFunction = SDL_METAL_FRAGMENT_NV21;
584 texturedata.fragmentFunction = SDL_METAL_FRAGMENT_COPY;
589 SDL_YUV_CONVERSION_MODE mode = SDL_GetYUVConversionModeForResolution(texture->w, texture->h);
591 case SDL_YUV_CONVERSION_JPEG: offset = CONSTANTS_OFFSET_DECODE_JPEG; break;
592 case SDL_YUV_CONVERSION_BT601: offset = CONSTANTS_OFFSET_DECODE_BT601; break;
593 case SDL_YUV_CONVERSION_BT709: offset = CONSTANTS_OFFSET_DECODE_BT709; break;
594 default: offset = 0; break;
596 texturedata.conversionBufferOffset = offset;
599 texture->driverdata = (void*)CFBridgingRetain(texturedata);
601 #if !__has_feature(objc_arc)
602 [texturedata release];
603 [mtltexture release];
604 [mtltexture_uv release];
611 METAL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
612 const SDL_Rect * rect, const void *pixels, int pitch)
614 METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
616 /* !!! FIXME: replaceRegion does not do any synchronization, so it might
617 * !!! FIXME: stomp on a previous frame's data that's currently being read
618 * !!! FIXME: by the GPU. */
619 [texturedata.mtltexture replaceRegion:MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h)
624 if (texturedata.yuv) {
625 int Uslice = texture->format == SDL_PIXELFORMAT_YV12 ? 1 : 0;
626 int Vslice = texture->format == SDL_PIXELFORMAT_YV12 ? 0 : 1;
628 /* Skip to the correct offset into the next texture */
629 pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
630 [texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
634 bytesPerRow:(pitch + 1) / 2
637 /* Skip to the correct offset into the next texture */
638 pixels = (const void*)((const Uint8*)pixels + ((rect->h + 1) / 2) * ((pitch + 1)/2));
639 [texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
643 bytesPerRow:(pitch + 1) / 2
647 if (texturedata.nv12) {
648 /* Skip to the correct offset into the next texture */
649 pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
650 [texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
654 bytesPerRow:2 * ((pitch + 1) / 2)
662 METAL_UpdateTextureYUV(SDL_Renderer * renderer, SDL_Texture * texture,
663 const SDL_Rect * rect,
664 const Uint8 *Yplane, int Ypitch,
665 const Uint8 *Uplane, int Upitch,
666 const Uint8 *Vplane, int Vpitch)
668 METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
669 const int Uslice = 0;
670 const int Vslice = 1;
672 /* Bail out if we're supposed to update an empty rectangle */
673 if (rect->w <= 0 || rect->h <= 0) {
677 [texturedata.mtltexture replaceRegion:MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h)
682 [texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
689 [texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
700 METAL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
701 const SDL_Rect * rect, void **pixels, int *pitch)
703 return SDL_Unsupported(); // !!! FIXME: write me
707 METAL_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
709 // !!! FIXME: write me
713 METAL_SetRenderTarget(SDL_Renderer * renderer, SDL_Texture * texture)
715 METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
717 if (data.mtlcmdencoder) {
718 /* End encoding for the previous render target so we can set up a new
719 * render pass for this one. */
720 [data.mtlcmdencoder endEncoding];
721 [data.mtlcmdbuffer commit];
723 data.mtlcmdencoder = nil;
724 data.mtlcmdbuffer = nil;
727 /* We don't begin a new render pass right away - we delay it until an actual
728 * draw or clear happens. That way we can use hardware clears when possible,
729 * which are only available when beginning a new render pass. */
734 // normalize a value from 0.0f to len into 0.0f to 1.0f.
736 normtex(const float _val, const float len)
742 METAL_QueueSetViewport(SDL_Renderer * renderer, SDL_RenderCommand *cmd)
744 float projection[4][4]; /* Prepare an orthographic projection */
745 const int w = cmd->data.viewport.rect.w;
746 const int h = cmd->data.viewport.rect.h;
747 const size_t matrixlen = sizeof (projection);
748 float *matrix = (float *) SDL_AllocateRenderVertices(renderer, matrixlen, CONSTANT_ALIGN, &cmd->data.viewport.first);
753 SDL_memset(projection, '\0', matrixlen);
755 projection[0][0] = 2.0f / w;
756 projection[1][1] = -2.0f / h;
757 projection[3][0] = -1.0f;
758 projection[3][1] = 1.0f;
759 projection[3][3] = 1.0f;
761 SDL_memcpy(matrix, projection, matrixlen);
767 METAL_QueueSetDrawColor(SDL_Renderer *renderer, SDL_RenderCommand *cmd)
769 const size_t vertlen = sizeof (float) * 4;
770 float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, CONSTANT_ALIGN, &cmd->data.color.first);
774 *(verts++) = ((float)cmd->data.color.r) / 255.0f;
775 *(verts++) = ((float)cmd->data.color.g) / 255.0f;
776 *(verts++) = ((float)cmd->data.color.b) / 255.0f;
777 *(verts++) = ((float)cmd->data.color.a) / 255.0f;
782 METAL_QueueDrawPoints(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_FPoint * points, int count)
784 const size_t vertlen = (sizeof (float) * 2) * count;
785 float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
789 cmd->data.draw.count = count;
790 SDL_memcpy(verts, points, vertlen);
795 METAL_QueueFillRects(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_FRect * rects, int count)
797 // !!! FIXME: use an index buffer
798 const size_t vertlen = (sizeof (float) * 8) * count;
799 float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
804 cmd->data.draw.count = count;
806 for (int i = 0; i < count; i++, rects++) {
807 if ((rects->w <= 0.0f) || (rects->h <= 0.0f)) {
808 cmd->data.draw.count--;
810 *(verts++) = rects->x;
811 *(verts++) = rects->y + rects->h;
812 *(verts++) = rects->x;
813 *(verts++) = rects->y;
814 *(verts++) = rects->x + rects->w;
815 *(verts++) = rects->y + rects->h;
816 *(verts++) = rects->x + rects->w;
817 *(verts++) = rects->y;
821 if (cmd->data.draw.count == 0) {
822 cmd->command = SDL_RENDERCMD_NO_OP; // nothing to do, just skip this one later.
829 METAL_QueueCopy(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture * texture,
830 const SDL_Rect * srcrect, const SDL_FRect * dstrect)
832 METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
833 const float texw = (float) texturedata.mtltexture.width;
834 const float texh = (float) texturedata.mtltexture.height;
835 // !!! FIXME: use an index buffer
836 const size_t vertlen = (sizeof (float) * 16);
837 float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
842 cmd->data.draw.count = 1;
844 *(verts++) = dstrect->x;
845 *(verts++) = dstrect->y + dstrect->h;
846 *(verts++) = dstrect->x;
847 *(verts++) = dstrect->y;
848 *(verts++) = dstrect->x + dstrect->w;
849 *(verts++) = dstrect->y + dstrect->h;
850 *(verts++) = dstrect->x + dstrect->w;
851 *(verts++) = dstrect->y;
853 *(verts++) = normtex(srcrect->x, texw);
854 *(verts++) = normtex(srcrect->y + srcrect->h, texh);
855 *(verts++) = normtex(srcrect->x, texw);
856 *(verts++) = normtex(srcrect->y, texh);
857 *(verts++) = normtex(srcrect->x + srcrect->w, texw);
858 *(verts++) = normtex(srcrect->y + srcrect->h, texh);
859 *(verts++) = normtex(srcrect->x + srcrect->w, texw);
860 *(verts++) = normtex(srcrect->y, texh);
866 METAL_QueueCopyEx(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture * texture,
867 const SDL_Rect * srcquad, const SDL_FRect * dstrect,
868 const double angle, const SDL_FPoint *center, const SDL_RendererFlip flip)
870 METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
871 const float texw = (float) texturedata.mtltexture.width;
872 const float texh = (float) texturedata.mtltexture.height;
873 const float rads = (float)(M_PI * (float) angle / 180.0f);
874 const float c = cosf(rads), s = sinf(rads);
875 float minu, maxu, minv, maxv;
876 const size_t vertlen = (sizeof (float) * 32);
879 // cheat and store this offset in (count) because it needs to be aligned in ways other fields don't and we aren't using count otherwise.
880 verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, CONSTANT_ALIGN, &cmd->data.draw.count);
886 SDL_memset(verts, '\0', sizeof (*verts) * 16);
887 verts[10] = verts[15] = 1.0f;
895 verts[12] = dstrect->x + center->x;
896 verts[13] = dstrect->y + center->y;
898 // rest of the vertices don't need the aggressive alignment. Pack them in.
899 verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
904 minu = normtex(srcquad->x, texw);
905 maxu = normtex(srcquad->x + srcquad->w, texw);
906 minv = normtex(srcquad->y, texh);
907 maxv = normtex(srcquad->y + srcquad->h, texh);
909 if (flip & SDL_FLIP_HORIZONTAL) {
914 if (flip & SDL_FLIP_VERTICAL) {
921 *(verts++) = -center->x;
922 *(verts++) = dstrect->h - center->y;
923 *(verts++) = -center->x;
924 *(verts++) = -center->y;
925 *(verts++) = dstrect->w - center->x;
926 *(verts++) = dstrect->h - center->y;
927 *(verts++) = dstrect->w - center->x;
928 *(verts++) = -center->y;
946 #if __has_feature(objc_arc)
947 __unsafe_unretained id<MTLRenderPipelineState> pipeline;
949 id<MTLRenderPipelineState> pipeline;
951 size_t constants_offset;
952 SDL_Texture *texture;
953 SDL_bool cliprect_dirty;
954 SDL_bool cliprect_enabled;
956 SDL_bool viewport_dirty;
958 size_t projection_offset;
959 SDL_bool color_dirty;
961 } METAL_DrawStateCache;
964 SetDrawState(SDL_Renderer *renderer, const SDL_RenderCommand *cmd, const SDL_MetalFragmentFunction shader,
965 const size_t constants_offset, id<MTLBuffer> mtlbufvertex, METAL_DrawStateCache *statecache)
967 METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
968 const SDL_BlendMode blend = cmd->data.draw.blend;
969 size_t first = cmd->data.draw.first;
970 id<MTLRenderPipelineState> newpipeline;
972 METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionLoad, NULL);
974 if (statecache->viewport_dirty) {
975 MTLViewport viewport;
976 viewport.originX = statecache->viewport.x;
977 viewport.originY = statecache->viewport.y;
978 viewport.width = statecache->viewport.w;
979 viewport.height = statecache->viewport.h;
980 viewport.znear = 0.0;
982 [data.mtlcmdencoder setViewport:viewport];
983 [data.mtlcmdencoder setVertexBuffer:mtlbufvertex offset:statecache->projection_offset atIndex:2]; // projection
984 statecache->viewport_dirty = SDL_FALSE;
987 if (statecache->cliprect_dirty) {
988 MTLScissorRect mtlrect;
989 if (statecache->cliprect_enabled) {
990 const SDL_Rect *rect = &statecache->cliprect;
991 mtlrect.x = statecache->viewport.x + rect->x;
992 mtlrect.y = statecache->viewport.y + rect->y;
993 mtlrect.width = rect->w;
994 mtlrect.height = rect->h;
996 mtlrect.x = statecache->viewport.x;
997 mtlrect.y = statecache->viewport.y;
998 mtlrect.width = statecache->viewport.w;
999 mtlrect.height = statecache->viewport.h;
1001 if (mtlrect.width > 0 && mtlrect.height > 0) {
1002 [data.mtlcmdencoder setScissorRect:mtlrect];
1004 statecache->cliprect_dirty = SDL_FALSE;
1007 if (statecache->color_dirty) {
1008 [data.mtlcmdencoder setFragmentBuffer:mtlbufvertex offset:statecache->color_offset atIndex:0];
1009 statecache->color_dirty = SDL_FALSE;
1012 newpipeline = ChoosePipelineState(data, data.activepipelines, shader, blend);
1013 if (newpipeline != statecache->pipeline) {
1014 [data.mtlcmdencoder setRenderPipelineState:newpipeline];
1015 statecache->pipeline = newpipeline;
1018 if (constants_offset != statecache->constants_offset) {
1019 if (constants_offset != CONSTANTS_OFFSET_INVALID) {
1020 [data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:constants_offset atIndex:3];
1022 statecache->constants_offset = constants_offset;
1025 [data.mtlcmdencoder setVertexBuffer:mtlbufvertex offset:first atIndex:0]; // position
1029 SetCopyState(SDL_Renderer *renderer, const SDL_RenderCommand *cmd, const size_t constants_offset,
1030 id<MTLBuffer> mtlbufvertex, METAL_DrawStateCache *statecache)
1032 METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
1033 SDL_Texture *texture = cmd->data.draw.texture;
1034 METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
1036 SetDrawState(renderer, cmd, texturedata.fragmentFunction, constants_offset, mtlbufvertex, statecache);
1038 [data.mtlcmdencoder setVertexBuffer:mtlbufvertex offset:cmd->data.draw.first+(8*sizeof (float)) atIndex:1]; // texcoords
1040 if (texture != statecache->texture) {
1041 METAL_TextureData *oldtexturedata = NULL;
1042 if (statecache->texture) {
1043 oldtexturedata = (__bridge METAL_TextureData *) statecache->texture->driverdata;
1045 if (!oldtexturedata || (texturedata.mtlsampler != oldtexturedata.mtlsampler)) {
1046 [data.mtlcmdencoder setFragmentSamplerState:texturedata.mtlsampler atIndex:0];
1049 [data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture atIndex:0];
1050 if (texturedata.yuv || texturedata.nv12) {
1051 [data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture_uv atIndex:1];
1052 [data.mtlcmdencoder setFragmentBuffer:data.mtlbufconstants offset:texturedata.conversionBufferOffset atIndex:1];
1054 statecache->texture = texture;
1059 METAL_RunCommandQueue(SDL_Renderer * renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
1060 { @autoreleasepool {
1061 METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
1062 METAL_DrawStateCache statecache;
1063 id<MTLBuffer> mtlbufvertex = nil;
1065 statecache.pipeline = nil;
1066 statecache.constants_offset = CONSTANTS_OFFSET_INVALID;
1067 statecache.texture = NULL;
1068 statecache.color_dirty = SDL_TRUE;
1069 statecache.cliprect_dirty = SDL_TRUE;
1070 statecache.viewport_dirty = SDL_TRUE;
1071 statecache.projection_offset = 0;
1072 statecache.color_offset = 0;
1074 // !!! FIXME: have a ring of pre-made MTLBuffers we cycle through? How expensive is creation?
1076 /* We can memcpy to a shared buffer from the CPU and read it from the GPU
1077 * without any extra copying. It's a bit slower on macOS to read shared
1078 * data from the GPU than to read managed/private data, but we avoid the
1079 * cost of copying the data and the code's simpler. Apple's best
1080 * practices guide recommends this approach for streamed vertex data.
1081 * TODO: this buffer is also used for constants. Is performance still
1082 * good for those, or should we have a managed buffer for them? */
1083 mtlbufvertex = [data.mtldevice newBufferWithLength:vertsize options:MTLResourceStorageModeShared];
1084 #if !__has_feature(objc_arc)
1085 [mtlbufvertex autorelease];
1087 mtlbufvertex.label = @"SDL vertex data";
1088 SDL_memcpy([mtlbufvertex contents], vertices, vertsize);
1091 // If there's a command buffer here unexpectedly (app requested one?). Commit it so we can start fresh.
1092 [data.mtlcmdencoder endEncoding];
1093 [data.mtlcmdbuffer commit];
1094 data.mtlcmdencoder = nil;
1095 data.mtlcmdbuffer = nil;
1098 switch (cmd->command) {
1099 case SDL_RENDERCMD_SETVIEWPORT: {
1100 SDL_memcpy(&statecache.viewport, &cmd->data.viewport.rect, sizeof (statecache.viewport));
1101 statecache.projection_offset = cmd->data.viewport.first;
1102 statecache.viewport_dirty = SDL_TRUE;
1106 case SDL_RENDERCMD_SETCLIPRECT: {
1107 SDL_memcpy(&statecache.cliprect, &cmd->data.cliprect.rect, sizeof (statecache.cliprect));
1108 statecache.cliprect_enabled = cmd->data.cliprect.enabled;
1109 statecache.cliprect_dirty = SDL_TRUE;
1113 case SDL_RENDERCMD_SETDRAWCOLOR: {
1114 statecache.color_offset = cmd->data.color.first;
1115 statecache.color_dirty = SDL_TRUE;
1119 case SDL_RENDERCMD_CLEAR: {
1120 /* If we're already encoding a command buffer, dump it without committing it. We'd just
1121 clear all its work anyhow, and starting a new encoder will let us use a hardware clear
1122 operation via MTLLoadActionClear. */
1123 if (data.mtlcmdencoder != nil) {
1124 [data.mtlcmdencoder endEncoding];
1126 // !!! FIXME: have to commit, or an uncommitted but enqueued buffer will prevent the frame from finishing.
1127 [data.mtlcmdbuffer commit];
1128 data.mtlcmdencoder = nil;
1129 data.mtlcmdbuffer = nil;
1132 // force all this state to be reconfigured on next command buffer.
1133 statecache.pipeline = nil;
1134 statecache.constants_offset = CONSTANTS_OFFSET_INVALID;
1135 statecache.texture = NULL;
1136 statecache.color_dirty = SDL_TRUE;
1137 statecache.cliprect_dirty = SDL_TRUE;
1138 statecache.viewport_dirty = SDL_TRUE;
1140 const Uint8 r = cmd->data.color.r;
1141 const Uint8 g = cmd->data.color.g;
1142 const Uint8 b = cmd->data.color.b;
1143 const Uint8 a = cmd->data.color.a;
1144 MTLClearColor color = MTLClearColorMake(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f);
1146 // get new command encoder, set up with an initial clear operation.
1147 METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionClear, &color);
1151 case SDL_RENDERCMD_DRAW_POINTS:
1152 case SDL_RENDERCMD_DRAW_LINES: {
1153 const size_t count = cmd->data.draw.count;
1154 const MTLPrimitiveType primtype = (cmd->command == SDL_RENDERCMD_DRAW_POINTS) ? MTLPrimitiveTypePoint : MTLPrimitiveTypeLineStrip;
1155 SetDrawState(renderer, cmd, SDL_METAL_FRAGMENT_SOLID, CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM, mtlbufvertex, &statecache);
1156 [data.mtlcmdencoder drawPrimitives:primtype vertexStart:0 vertexCount:count];
1160 case SDL_RENDERCMD_FILL_RECTS: {
1161 const size_t count = cmd->data.draw.count;
1163 SetDrawState(renderer, cmd, SDL_METAL_FRAGMENT_SOLID, CONSTANTS_OFFSET_IDENTITY, mtlbufvertex, &statecache);
1164 for (size_t i = 0; i < count; i++, start += 4) { // !!! FIXME: can we do all of these this with a single draw call, using MTLPrimitiveTypeTriangle and an index buffer?
1165 [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:start vertexCount:4];
1170 case SDL_RENDERCMD_COPY: {
1171 SetCopyState(renderer, cmd, CONSTANTS_OFFSET_IDENTITY, mtlbufvertex, &statecache);
1172 [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
1176 case SDL_RENDERCMD_COPY_EX: {
1177 SetCopyState(renderer, cmd, CONSTANTS_OFFSET_INVALID, mtlbufvertex, &statecache);
1178 [data.mtlcmdencoder setVertexBuffer:mtlbufvertex offset:cmd->data.draw.count atIndex:3]; // transform
1179 [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
1183 case SDL_RENDERCMD_NO_OP:
1193 METAL_RenderReadPixels(SDL_Renderer * renderer, const SDL_Rect * rect,
1194 Uint32 pixel_format, void * pixels, int pitch)
1195 { @autoreleasepool {
1196 METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
1197 METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionLoad, NULL);
1199 // Commit any current command buffer, and waitUntilCompleted, so any output is ready to be read.
1200 [data.mtlcmdencoder endEncoding];
1201 [data.mtlcmdbuffer commit];
1202 [data.mtlcmdbuffer waitUntilCompleted];
1203 data.mtlcmdencoder = nil;
1204 data.mtlcmdbuffer = nil;
1206 id<MTLTexture> mtltexture = data.mtlpassdesc.colorAttachments[0].texture;
1207 MTLRegion mtlregion = MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h);
1209 // we only do BGRA8 or RGBA8 at the moment, so 4 will do.
1210 const int temp_pitch = rect->w * 4;
1211 void *temp_pixels = SDL_malloc(temp_pitch * rect->h);
1213 return SDL_OutOfMemory();
1216 [mtltexture getBytes:temp_pixels bytesPerRow:temp_pitch fromRegion:mtlregion mipmapLevel:0];
1218 const Uint32 temp_format = (mtltexture.pixelFormat == MTLPixelFormatBGRA8Unorm) ? SDL_PIXELFORMAT_ARGB8888 : SDL_PIXELFORMAT_ABGR8888;
1219 const int status = SDL_ConvertPixels(rect->w, rect->h, temp_format, temp_pixels, temp_pitch, pixel_format, pixels, pitch);
1220 SDL_free(temp_pixels);
1225 METAL_RenderPresent(SDL_Renderer * renderer)
1226 { @autoreleasepool {
1227 METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
1229 if (data.mtlcmdencoder != nil) {
1230 [data.mtlcmdencoder endEncoding];
1232 if (data.mtlbackbuffer != nil) {
1233 [data.mtlcmdbuffer presentDrawable:data.mtlbackbuffer];
1235 if (data.mtlcmdbuffer != nil) {
1236 [data.mtlcmdbuffer commit];
1238 data.mtlcmdencoder = nil;
1239 data.mtlcmdbuffer = nil;
1240 data.mtlbackbuffer = nil;
1244 METAL_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture)
1245 { @autoreleasepool {
1246 CFBridgingRelease(texture->driverdata);
1247 texture->driverdata = NULL;
1251 METAL_DestroyRenderer(SDL_Renderer * renderer)
1252 { @autoreleasepool {
1253 if (renderer->driverdata) {
1254 METAL_RenderData *data = CFBridgingRelease(renderer->driverdata);
1256 if (data.mtlcmdencoder != nil) {
1257 [data.mtlcmdencoder endEncoding];
1260 DestroyAllPipelines(data.allpipelines, data.pipelinescount);
1267 METAL_GetMetalLayer(SDL_Renderer * renderer)
1268 { @autoreleasepool {
1269 METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
1270 return (__bridge void*)data.mtllayer;
1274 METAL_GetMetalCommandEncoder(SDL_Renderer * renderer)
1275 { @autoreleasepool {
1276 METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionLoad, NULL);
1277 METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
1278 return (__bridge void*)data.mtlcmdencoder;
1281 static SDL_Renderer *
1282 METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
1283 { @autoreleasepool {
1284 SDL_Renderer *renderer = NULL;
1285 METAL_RenderData *data = NULL;
1286 id<MTLDevice> mtldevice = nil;
1287 SDL_SysWMinfo syswm;
1289 SDL_VERSION(&syswm.version);
1290 if (!SDL_GetWindowWMInfo(window, &syswm)) {
1294 if (IsMetalAvailable(&syswm) == -1) {
1298 renderer = (SDL_Renderer *) SDL_calloc(1, sizeof(*renderer));
1304 // !!! FIXME: MTLCopyAllDevices() can find other GPUs on macOS...
1305 mtldevice = MTLCreateSystemDefaultDevice();
1307 if (mtldevice == nil) {
1309 SDL_SetError("Failed to obtain Metal device");
1313 // !!! FIXME: error checking on all of this.
1314 data = [[METAL_RenderData alloc] init];
1316 renderer->driverdata = (void*)CFBridgingRetain(data);
1317 renderer->window = window;
1320 NSView *view = Cocoa_Mtl_AddMetalView(window);
1321 CAMetalLayer *layer = (CAMetalLayer *)[view layer];
1323 layer.device = mtldevice;
1325 //layer.colorspace = nil;
1328 UIView *view = UIKit_Mtl_AddMetalView(window);
1329 CAMetalLayer *layer = (CAMetalLayer *)[view layer];
1332 // Necessary for RenderReadPixels.
1333 layer.framebufferOnly = NO;
1335 data.mtldevice = layer.device;
1336 data.mtllayer = layer;
1337 id<MTLCommandQueue> mtlcmdqueue = [data.mtldevice newCommandQueue];
1338 data.mtlcmdqueue = mtlcmdqueue;
1339 data.mtlcmdqueue.label = @"SDL Metal Renderer";
1340 data.mtlpassdesc = [MTLRenderPassDescriptor renderPassDescriptor];
1344 // The compiled .metallib is embedded in a static array in a header file
1345 // but the original shader source code is in SDL_shaders_metal.metal.
1346 dispatch_data_t mtllibdata = dispatch_data_create(sdl_metallib, sdl_metallib_len, dispatch_get_global_queue(0, 0), ^{});
1347 id<MTLLibrary> mtllibrary = [data.mtldevice newLibraryWithData:mtllibdata error:&err];
1348 data.mtllibrary = mtllibrary;
1349 SDL_assert(err == nil);
1350 #if !__has_feature(objc_arc)
1351 dispatch_release(mtllibdata);
1353 data.mtllibrary.label = @"SDL Metal renderer shader library";
1355 /* Do some shader pipeline state loading up-front rather than on demand. */
1356 data.pipelinescount = 0;
1357 data.allpipelines = NULL;
1358 ChooseShaderPipelines(data, MTLPixelFormatBGRA8Unorm);
1360 MTLSamplerDescriptor *samplerdesc = [[MTLSamplerDescriptor alloc] init];
1362 samplerdesc.minFilter = MTLSamplerMinMagFilterNearest;
1363 samplerdesc.magFilter = MTLSamplerMinMagFilterNearest;
1364 id<MTLSamplerState> mtlsamplernearest = [data.mtldevice newSamplerStateWithDescriptor:samplerdesc];
1365 data.mtlsamplernearest = mtlsamplernearest;
1367 samplerdesc.minFilter = MTLSamplerMinMagFilterLinear;
1368 samplerdesc.magFilter = MTLSamplerMinMagFilterLinear;
1369 id<MTLSamplerState> mtlsamplerlinear = [data.mtldevice newSamplerStateWithDescriptor:samplerdesc];
1370 data.mtlsamplerlinear = mtlsamplerlinear;
1372 /* Note: matrices are column major. */
1373 float identitytransform[16] = {
1374 1.0f, 0.0f, 0.0f, 0.0f,
1375 0.0f, 1.0f, 0.0f, 0.0f,
1376 0.0f, 0.0f, 1.0f, 0.0f,
1377 0.0f, 0.0f, 0.0f, 1.0f,
1380 float halfpixeltransform[16] = {
1381 1.0f, 0.0f, 0.0f, 0.0f,
1382 0.0f, 1.0f, 0.0f, 0.0f,
1383 0.0f, 0.0f, 1.0f, 0.0f,
1384 0.5f, 0.5f, 0.0f, 1.0f,
1387 /* Metal pads float3s to 16 bytes. */
1388 float decodetransformJPEG[4*4] = {
1389 0.0, -0.501960814, -0.501960814, 0.0, /* offset */
1390 1.0000, 0.0000, 1.4020, 0.0, /* Rcoeff */
1391 1.0000, -0.3441, -0.7141, 0.0, /* Gcoeff */
1392 1.0000, 1.7720, 0.0000, 0.0, /* Bcoeff */
1395 float decodetransformBT601[4*4] = {
1396 -0.0627451017, -0.501960814, -0.501960814, 0.0, /* offset */
1397 1.1644, 0.0000, 1.5960, 0.0, /* Rcoeff */
1398 1.1644, -0.3918, -0.8130, 0.0, /* Gcoeff */
1399 1.1644, 2.0172, 0.0000, 0.0, /* Bcoeff */
1402 float decodetransformBT709[4*4] = {
1403 0.0, -0.501960814, -0.501960814, 0.0, /* offset */
1404 1.0000, 0.0000, 1.4020, 0.0, /* Rcoeff */
1405 1.0000, -0.3441, -0.7141, 0.0, /* Gcoeff */
1406 1.0000, 1.7720, 0.0000, 0.0, /* Bcoeff */
1409 id<MTLBuffer> mtlbufconstantstaging = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModeShared];
1410 #if !__has_feature(objc_arc)
1411 [mtlbufconstantstaging autorelease];
1413 mtlbufconstantstaging.label = @"SDL constant staging data";
1415 id<MTLBuffer> mtlbufconstants = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModePrivate];
1416 data.mtlbufconstants = mtlbufconstants;
1417 data.mtlbufconstants.label = @"SDL constant data";
1419 char *constantdata = [mtlbufconstantstaging contents];
1420 SDL_memcpy(constantdata + CONSTANTS_OFFSET_IDENTITY, identitytransform, sizeof(identitytransform));
1421 SDL_memcpy(constantdata + CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM, halfpixeltransform, sizeof(halfpixeltransform));
1422 SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_JPEG, decodetransformJPEG, sizeof(decodetransformJPEG));
1423 SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT601, decodetransformBT601, sizeof(decodetransformBT601));
1424 SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT709, decodetransformBT709, sizeof(decodetransformBT709));
1426 id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
1427 id<MTLBlitCommandEncoder> blitcmd = [cmdbuffer blitCommandEncoder];
1429 [blitcmd copyFromBuffer:mtlbufconstantstaging sourceOffset:0 toBuffer:data.mtlbufconstants destinationOffset:0 size:CONSTANTS_LENGTH];
1431 [blitcmd endEncoding];
1434 // !!! FIXME: force more clears here so all the drawables are sane to start, and our static buffers are definitely flushed.
1436 renderer->WindowEvent = METAL_WindowEvent;
1437 renderer->GetOutputSize = METAL_GetOutputSize;
1438 renderer->SupportsBlendMode = METAL_SupportsBlendMode;
1439 renderer->CreateTexture = METAL_CreateTexture;
1440 renderer->UpdateTexture = METAL_UpdateTexture;
1441 renderer->UpdateTextureYUV = METAL_UpdateTextureYUV;
1442 renderer->LockTexture = METAL_LockTexture;
1443 renderer->UnlockTexture = METAL_UnlockTexture;
1444 renderer->SetRenderTarget = METAL_SetRenderTarget;
1445 renderer->QueueSetViewport = METAL_QueueSetViewport;
1446 renderer->QueueSetDrawColor = METAL_QueueSetDrawColor;
1447 renderer->QueueDrawPoints = METAL_QueueDrawPoints;
1448 renderer->QueueDrawLines = METAL_QueueDrawPoints; // lines and points queue the same way.
1449 renderer->QueueFillRects = METAL_QueueFillRects;
1450 renderer->QueueCopy = METAL_QueueCopy;
1451 renderer->QueueCopyEx = METAL_QueueCopyEx;
1452 renderer->RunCommandQueue = METAL_RunCommandQueue;
1453 renderer->RenderReadPixels = METAL_RenderReadPixels;
1454 renderer->RenderPresent = METAL_RenderPresent;
1455 renderer->DestroyTexture = METAL_DestroyTexture;
1456 renderer->DestroyRenderer = METAL_DestroyRenderer;
1457 renderer->GetMetalLayer = METAL_GetMetalLayer;
1458 renderer->GetMetalCommandEncoder = METAL_GetMetalCommandEncoder;
1460 renderer->info = METAL_RenderDriver.info;
1461 renderer->info.flags = (SDL_RENDERER_ACCELERATED | SDL_RENDERER_TARGETTEXTURE);
1463 renderer->always_batch = SDL_TRUE;
1465 #if defined(__MACOSX__) && defined(MAC_OS_X_VERSION_10_13)
1466 if (@available(macOS 10.13, *)) {
1467 data.mtllayer.displaySyncEnabled = (flags & SDL_RENDERER_PRESENTVSYNC) != 0;
1471 renderer->info.flags |= SDL_RENDERER_PRESENTVSYNC;
1474 /* https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
1475 int maxtexsize = 4096;
1476 #if defined(__MACOSX__)
1478 #elif defined(__TVOS__)
1481 if (@available(tvOS 11.0, *)) {
1482 if ([mtldevice supportsFeatureSet:MTLFeatureSet_tvOS_GPUFamily2_v1]) {
1488 #ifdef __IPHONE_11_0
1489 if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
1493 #ifdef __IPHONE_10_0
1494 if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1]) {
1498 if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily2_v2] || [mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily1_v2]) {
1505 renderer->info.max_texture_width = maxtexsize;
1506 renderer->info.max_texture_height = maxtexsize;
1508 #if !__has_feature(objc_arc)
1509 [mtlcmdqueue release];
1510 [mtllibrary release];
1511 [samplerdesc release];
1512 [mtlsamplernearest release];
1513 [mtlsamplerlinear release];
1514 [mtlbufconstants release];
1517 [mtldevice release];
1523 SDL_RenderDriver METAL_RenderDriver = {
1524 METAL_CreateRenderer,
1527 (SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_TARGETTEXTURE),
1530 SDL_PIXELFORMAT_ARGB8888,
1531 SDL_PIXELFORMAT_ABGR8888,
1532 SDL_PIXELFORMAT_YV12,
1533 SDL_PIXELFORMAT_IYUV,
1534 SDL_PIXELFORMAT_NV12,
1535 SDL_PIXELFORMAT_NV21
1541 #endif /* SDL_VIDEO_RENDER_METAL && !SDL_RENDER_DISABLED */
1543 /* vi: set ts=4 sw=4 expandtab: */