src/render/metal/SDL_render_metal.m
author Sam Lantinga
Mon, 19 Oct 2020 17:29:16 -0700
changeset 14174 71e97db0ba5d
parent 14095 3093f8936e5b
permissions -rw-r--r--
Removed debug log message
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../../SDL_internal.h"
    22 
    23 #if SDL_VIDEO_RENDER_METAL && !SDL_RENDER_DISABLED
    24 
    25 #include "SDL_hints.h"
    26 #include "SDL_assert.h"
    27 #include "SDL_syswm.h"
    28 #include "SDL_metal.h"
    29 #include "../SDL_sysrender.h"
    30 
    31 #include <Availability.h>
    32 #import <Metal/Metal.h>
    33 #import <QuartzCore/CAMetalLayer.h>
    34 
    35 #ifdef __MACOSX__
    36 #import <AppKit/NSView.h>
    37 #endif
    38 
    39 /* Regenerate these with build-metal-shaders.sh */
    40 #ifdef __MACOSX__
    41 #include "SDL_shaders_metal_osx.h"
    42 #elif defined(__TVOS__)
    43 #include "SDL_shaders_metal_tvos.h"
    44 #else
    45 #include "SDL_shaders_metal_ios.h"
    46 #endif
    47 
    48 /* Apple Metal renderer implementation */
    49 
    50 /* Used to re-create the window with Metal capability */
    51 extern int SDL_RecreateWindow(SDL_Window * window, Uint32 flags);
    52 
    53 /* macOS requires constants in a buffer to have a 256 byte alignment. */
    54 /* Use native type alignments from https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf */
    55 #ifdef __MACOSX__
    56 #define CONSTANT_ALIGN(x) (256)
    57 #else
    58 #define CONSTANT_ALIGN(x) (x < 4 ? 4 : x)
    59 #endif
    60 
    61 #define DEVICE_ALIGN(x) (x < 4 ? 4 : x)
    62 
    63 #define ALIGN_CONSTANTS(align, size) ((size + CONSTANT_ALIGN(align) - 1) & (~(CONSTANT_ALIGN(align) - 1)))
    64 
    65 static const size_t CONSTANTS_OFFSET_INVALID = 0xFFFFFFFF;
    66 static const size_t CONSTANTS_OFFSET_IDENTITY = 0;
    67 static const size_t CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM = ALIGN_CONSTANTS(16, CONSTANTS_OFFSET_IDENTITY + sizeof(float) * 16);
    68 static const size_t CONSTANTS_OFFSET_DECODE_JPEG = ALIGN_CONSTANTS(16, CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM + sizeof(float) * 16);
    69 static const size_t CONSTANTS_OFFSET_DECODE_BT601 = ALIGN_CONSTANTS(16, CONSTANTS_OFFSET_DECODE_JPEG + sizeof(float) * 4 * 4);
    70 static const size_t CONSTANTS_OFFSET_DECODE_BT709 = ALIGN_CONSTANTS(16, CONSTANTS_OFFSET_DECODE_BT601 + sizeof(float) * 4 * 4);
    71 static const size_t CONSTANTS_LENGTH = CONSTANTS_OFFSET_DECODE_BT709 + sizeof(float) * 4 * 4;
    72 
    73 typedef enum SDL_MetalVertexFunction
    74 {
    75     SDL_METAL_VERTEX_SOLID,
    76     SDL_METAL_VERTEX_COPY,
    77 } SDL_MetalVertexFunction;
    78 
    79 typedef enum SDL_MetalFragmentFunction
    80 {
    81     SDL_METAL_FRAGMENT_SOLID = 0,
    82     SDL_METAL_FRAGMENT_COPY,
    83     SDL_METAL_FRAGMENT_YUV,
    84     SDL_METAL_FRAGMENT_NV12,
    85     SDL_METAL_FRAGMENT_NV21,
    86     SDL_METAL_FRAGMENT_COUNT,
    87 } SDL_MetalFragmentFunction;
    88 
    89 typedef struct METAL_PipelineState
    90 {
    91     SDL_BlendMode blendMode;
    92     void *pipe;
    93 } METAL_PipelineState;
    94 
    95 typedef struct METAL_PipelineCache
    96 {
    97     METAL_PipelineState *states;
    98     int count;
    99     SDL_MetalVertexFunction vertexFunction;
   100     SDL_MetalFragmentFunction fragmentFunction;
   101     MTLPixelFormat renderTargetFormat;
   102     const char *label;
   103 } METAL_PipelineCache;
   104 
   105 /* Each shader combination used by drawing functions has a separate pipeline
   106  * cache, and we have a separate list of caches for each render target pixel
   107  * format. This is more efficient than iterating over a global cache to find
   108  * the pipeline based on the specified shader combination and RT pixel format,
   109  * since we know what the RT pixel format is when we set the render target, and
   110  * we know what the shader combination is inside each drawing function's code. */
   111 typedef struct METAL_ShaderPipelines
   112 {
   113     MTLPixelFormat renderTargetFormat;
   114     METAL_PipelineCache caches[SDL_METAL_FRAGMENT_COUNT];
   115 } METAL_ShaderPipelines;
   116 
   117 @interface METAL_RenderData : NSObject
   118     @property (nonatomic, retain) id<MTLDevice> mtldevice;
   119     @property (nonatomic, retain) id<MTLCommandQueue> mtlcmdqueue;
   120     @property (nonatomic, retain) id<MTLCommandBuffer> mtlcmdbuffer;
   121     @property (nonatomic, retain) id<MTLRenderCommandEncoder> mtlcmdencoder;
   122     @property (nonatomic, retain) id<MTLLibrary> mtllibrary;
   123     @property (nonatomic, retain) id<CAMetalDrawable> mtlbackbuffer;
   124     @property (nonatomic, retain) id<MTLSamplerState> mtlsamplernearest;
   125     @property (nonatomic, retain) id<MTLSamplerState> mtlsamplerlinear;
   126     @property (nonatomic, retain) id<MTLBuffer> mtlbufconstants;
   127     @property (nonatomic, retain) id<MTLBuffer> mtlbufquadindices;
   128     @property (nonatomic, assign) SDL_MetalView mtlview;
   129     @property (nonatomic, retain) CAMetalLayer *mtllayer;
   130     @property (nonatomic, retain) MTLRenderPassDescriptor *mtlpassdesc;
   131     @property (nonatomic, assign) METAL_ShaderPipelines *activepipelines;
   132     @property (nonatomic, assign) METAL_ShaderPipelines *allpipelines;
   133     @property (nonatomic, assign) int pipelinescount;
   134 @end
   135 
   136 @implementation METAL_RenderData
   137 #if !__has_feature(objc_arc)
   138 - (void)dealloc
   139 {
   140     [_mtldevice release];
   141     [_mtlcmdqueue release];
   142     [_mtlcmdbuffer release];
   143     [_mtlcmdencoder release];
   144     [_mtllibrary release];
   145     [_mtlbackbuffer release];
   146     [_mtlsamplernearest release];
   147     [_mtlsamplerlinear release];
   148     [_mtlbufconstants release];
   149     [_mtlbufquadindices release];
   150     [_mtllayer release];
   151     [_mtlpassdesc release];
   152     [super dealloc];
   153 }
   154 #endif
   155 @end
   156 
   157 @interface METAL_TextureData : NSObject
   158     @property (nonatomic, retain) id<MTLTexture> mtltexture;
   159     @property (nonatomic, retain) id<MTLTexture> mtltexture_uv;
   160     @property (nonatomic, retain) id<MTLSamplerState> mtlsampler;
   161     @property (nonatomic, assign) SDL_MetalFragmentFunction fragmentFunction;
   162     @property (nonatomic, assign) BOOL yuv;
   163     @property (nonatomic, assign) BOOL nv12;
   164     @property (nonatomic, assign) size_t conversionBufferOffset;
   165     @property (nonatomic, assign) BOOL hasdata;
   166 
   167     @property (nonatomic, retain) id<MTLBuffer> lockedbuffer;
   168     @property (nonatomic, assign) SDL_Rect lockedrect;
   169 @end
   170 
   171 @implementation METAL_TextureData
   172 #if !__has_feature(objc_arc)
   173 - (void)dealloc
   174 {
   175     [_mtltexture release];
   176     [_mtltexture_uv release];
   177     [_mtlsampler release];
   178     [_lockedbuffer release];
   179     [super dealloc];
   180 }
   181 #endif
   182 @end
   183 
   184 static int
   185 IsMetalAvailable(const SDL_SysWMinfo *syswm)
   186 {
   187     if (syswm->subsystem != SDL_SYSWM_COCOA && syswm->subsystem != SDL_SYSWM_UIKIT) {
   188         return SDL_SetError("Metal render target only supports Cocoa and UIKit video targets at the moment.");
   189     }
   190 
   191     // this checks a weak symbol.
   192 #if (defined(__MACOSX__) && (MAC_OS_X_VERSION_MIN_REQUIRED < 101100))
   193     if (MTLCreateSystemDefaultDevice == NULL) {  // probably on 10.10 or lower.
   194         return SDL_SetError("Metal framework not available on this system");
   195     }
   196 #endif
   197 
   198     return 0;
   199 }
   200 
   201 static const MTLBlendOperation invalidBlendOperation = (MTLBlendOperation)0xFFFFFFFF;
   202 static const MTLBlendFactor invalidBlendFactor = (MTLBlendFactor)0xFFFFFFFF;
   203 
   204 static MTLBlendOperation
   205 GetBlendOperation(SDL_BlendOperation operation)
   206 {
   207     switch (operation) {
   208         case SDL_BLENDOPERATION_ADD: return MTLBlendOperationAdd;
   209         case SDL_BLENDOPERATION_SUBTRACT: return MTLBlendOperationSubtract;
   210         case SDL_BLENDOPERATION_REV_SUBTRACT: return MTLBlendOperationReverseSubtract;
   211         case SDL_BLENDOPERATION_MINIMUM: return MTLBlendOperationMin;
   212         case SDL_BLENDOPERATION_MAXIMUM: return MTLBlendOperationMax;
   213         default: return invalidBlendOperation;
   214     }
   215 }
   216 
   217 static MTLBlendFactor
   218 GetBlendFactor(SDL_BlendFactor factor)
   219 {
   220     switch (factor) {
   221         case SDL_BLENDFACTOR_ZERO: return MTLBlendFactorZero;
   222         case SDL_BLENDFACTOR_ONE: return MTLBlendFactorOne;
   223         case SDL_BLENDFACTOR_SRC_COLOR: return MTLBlendFactorSourceColor;
   224         case SDL_BLENDFACTOR_ONE_MINUS_SRC_COLOR: return MTLBlendFactorOneMinusSourceColor;
   225         case SDL_BLENDFACTOR_SRC_ALPHA: return MTLBlendFactorSourceAlpha;
   226         case SDL_BLENDFACTOR_ONE_MINUS_SRC_ALPHA: return MTLBlendFactorOneMinusSourceAlpha;
   227         case SDL_BLENDFACTOR_DST_COLOR: return MTLBlendFactorDestinationColor;
   228         case SDL_BLENDFACTOR_ONE_MINUS_DST_COLOR: return MTLBlendFactorOneMinusDestinationColor;
   229         case SDL_BLENDFACTOR_DST_ALPHA: return MTLBlendFactorDestinationAlpha;
   230         case SDL_BLENDFACTOR_ONE_MINUS_DST_ALPHA: return MTLBlendFactorOneMinusDestinationAlpha;
   231         default: return invalidBlendFactor;
   232     }
   233 }
   234 
   235 static NSString *
   236 GetVertexFunctionName(SDL_MetalVertexFunction function)
   237 {
   238     switch (function) {
   239         case SDL_METAL_VERTEX_SOLID: return @"SDL_Solid_vertex";
   240         case SDL_METAL_VERTEX_COPY: return @"SDL_Copy_vertex";
   241         default: return nil;
   242     }
   243 }
   244 
   245 static NSString *
   246 GetFragmentFunctionName(SDL_MetalFragmentFunction function)
   247 {
   248     switch (function) {
   249         case SDL_METAL_FRAGMENT_SOLID: return @"SDL_Solid_fragment";
   250         case SDL_METAL_FRAGMENT_COPY: return @"SDL_Copy_fragment";
   251         case SDL_METAL_FRAGMENT_YUV: return @"SDL_YUV_fragment";
   252         case SDL_METAL_FRAGMENT_NV12: return @"SDL_NV12_fragment";
   253         case SDL_METAL_FRAGMENT_NV21: return @"SDL_NV21_fragment";
   254         default: return nil;
   255     }
   256 }
   257 
   258 static id<MTLRenderPipelineState>
   259 MakePipelineState(METAL_RenderData *data, METAL_PipelineCache *cache,
   260                   NSString *blendlabel, SDL_BlendMode blendmode)
   261 {
   262     id<MTLFunction> mtlvertfn = [data.mtllibrary newFunctionWithName:GetVertexFunctionName(cache->vertexFunction)];
   263     id<MTLFunction> mtlfragfn = [data.mtllibrary newFunctionWithName:GetFragmentFunctionName(cache->fragmentFunction)];
   264     SDL_assert(mtlvertfn != nil);
   265     SDL_assert(mtlfragfn != nil);
   266 
   267     MTLRenderPipelineDescriptor *mtlpipedesc = [[MTLRenderPipelineDescriptor alloc] init];
   268     mtlpipedesc.vertexFunction = mtlvertfn;
   269     mtlpipedesc.fragmentFunction = mtlfragfn;
   270 
   271     MTLVertexDescriptor *vertdesc = [MTLVertexDescriptor vertexDescriptor];
   272 
   273     switch (cache->vertexFunction) {
   274         case SDL_METAL_VERTEX_SOLID:
   275             /* position (float2) */
   276             vertdesc.layouts[0].stride = sizeof(float) * 2;
   277             vertdesc.layouts[0].stepFunction = MTLVertexStepFunctionPerVertex;
   278 
   279             vertdesc.attributes[0].format = MTLVertexFormatFloat2;
   280             vertdesc.attributes[0].offset = 0;
   281             vertdesc.attributes[0].bufferIndex = 0;
   282             break;
   283         case SDL_METAL_VERTEX_COPY:
   284             /* position (float2), texcoord (float2) */
   285             vertdesc.layouts[0].stride = sizeof(float) * 4;
   286             vertdesc.layouts[0].stepFunction = MTLVertexStepFunctionPerVertex;
   287 
   288             vertdesc.attributes[0].format = MTLVertexFormatFloat2;
   289             vertdesc.attributes[0].offset = 0;
   290             vertdesc.attributes[0].bufferIndex = 0;
   291 
   292             vertdesc.attributes[1].format = MTLVertexFormatFloat2;
   293             vertdesc.attributes[1].offset = sizeof(float) * 2;
   294             vertdesc.attributes[1].bufferIndex = 0;
   295             break;
   296     }
   297 
   298     mtlpipedesc.vertexDescriptor = vertdesc;
   299 
   300     MTLRenderPipelineColorAttachmentDescriptor *rtdesc = mtlpipedesc.colorAttachments[0];
   301     rtdesc.pixelFormat = cache->renderTargetFormat;
   302 
   303     if (blendmode != SDL_BLENDMODE_NONE) {
   304         rtdesc.blendingEnabled = YES;
   305         rtdesc.sourceRGBBlendFactor = GetBlendFactor(SDL_GetBlendModeSrcColorFactor(blendmode));
   306         rtdesc.destinationRGBBlendFactor = GetBlendFactor(SDL_GetBlendModeDstColorFactor(blendmode));
   307         rtdesc.rgbBlendOperation = GetBlendOperation(SDL_GetBlendModeColorOperation(blendmode));
   308         rtdesc.sourceAlphaBlendFactor = GetBlendFactor(SDL_GetBlendModeSrcAlphaFactor(blendmode));
   309         rtdesc.destinationAlphaBlendFactor = GetBlendFactor(SDL_GetBlendModeDstAlphaFactor(blendmode));
   310         rtdesc.alphaBlendOperation = GetBlendOperation(SDL_GetBlendModeAlphaOperation(blendmode));
   311     } else {
   312         rtdesc.blendingEnabled = NO;
   313     }
   314 
   315     mtlpipedesc.label = [@(cache->label) stringByAppendingString:blendlabel];
   316 
   317     NSError *err = nil;
   318     id<MTLRenderPipelineState> state = [data.mtldevice newRenderPipelineStateWithDescriptor:mtlpipedesc error:&err];
   319     SDL_assert(err == nil);
   320 
   321     METAL_PipelineState pipeline;
   322     pipeline.blendMode = blendmode;
   323     pipeline.pipe = (void *)CFBridgingRetain(state);
   324 
   325     METAL_PipelineState *states = SDL_realloc(cache->states, (cache->count + 1) * sizeof(pipeline));
   326 
   327 #if !__has_feature(objc_arc)
   328     [mtlpipedesc release];  // !!! FIXME: can these be reused for each creation, or does the pipeline obtain it?
   329     [mtlvertfn release];
   330     [mtlfragfn release];
   331     [state release];
   332 #endif
   333 
   334     if (states) {
   335         states[cache->count++] = pipeline;
   336         cache->states = states;
   337         return (__bridge id<MTLRenderPipelineState>)pipeline.pipe;
   338     } else {
   339         CFBridgingRelease(pipeline.pipe);
   340         SDL_OutOfMemory();
   341         return NULL;
   342     }
   343 }
   344 
   345 static void
   346 MakePipelineCache(METAL_RenderData *data, METAL_PipelineCache *cache, const char *label,
   347                   MTLPixelFormat rtformat, SDL_MetalVertexFunction vertfn, SDL_MetalFragmentFunction fragfn)
   348 {
   349     SDL_zerop(cache);
   350 
   351     cache->vertexFunction = vertfn;
   352     cache->fragmentFunction = fragfn;
   353     cache->renderTargetFormat = rtformat;
   354     cache->label = label;
   355 
   356     /* Create pipeline states for the default blend modes. Custom blend modes
   357      * will be added to the cache on-demand. */
   358     MakePipelineState(data, cache, @" (blend=none)", SDL_BLENDMODE_NONE);
   359     MakePipelineState(data, cache, @" (blend=blend)", SDL_BLENDMODE_BLEND);
   360     MakePipelineState(data, cache, @" (blend=add)", SDL_BLENDMODE_ADD);
   361     MakePipelineState(data, cache, @" (blend=mod)", SDL_BLENDMODE_MOD);
   362     MakePipelineState(data, cache, @" (blend=mul)", SDL_BLENDMODE_MUL);
   363 }
   364 
   365 static void
   366 DestroyPipelineCache(METAL_PipelineCache *cache)
   367 {
   368     if (cache != NULL) {
   369         for (int i = 0; i < cache->count; i++) {
   370             CFBridgingRelease(cache->states[i].pipe);
   371         }
   372 
   373         SDL_free(cache->states);
   374     }
   375 }
   376 
   377 void
   378 MakeShaderPipelines(METAL_RenderData *data, METAL_ShaderPipelines *pipelines, MTLPixelFormat rtformat)
   379 {
   380     SDL_zerop(pipelines);
   381 
   382     pipelines->renderTargetFormat = rtformat;
   383 
   384     MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_SOLID], "SDL primitives pipeline", rtformat, SDL_METAL_VERTEX_SOLID, SDL_METAL_FRAGMENT_SOLID);
   385     MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_COPY], "SDL copy pipeline", rtformat, SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_COPY);
   386     MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_YUV], "SDL YUV pipeline", rtformat, SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_YUV);
   387     MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_NV12], "SDL NV12 pipeline", rtformat, SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_NV12);
   388     MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_NV21], "SDL NV21 pipeline", rtformat, SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_NV21);
   389 }
   390 
   391 static METAL_ShaderPipelines *
   392 ChooseShaderPipelines(METAL_RenderData *data, MTLPixelFormat rtformat)
   393 {
   394     METAL_ShaderPipelines *allpipelines = data.allpipelines;
   395     int count = data.pipelinescount;
   396 
   397     for (int i = 0; i < count; i++) {
   398         if (allpipelines[i].renderTargetFormat == rtformat) {
   399             return &allpipelines[i];
   400         }
   401     }
   402 
   403     allpipelines = SDL_realloc(allpipelines, (count + 1) * sizeof(METAL_ShaderPipelines));
   404 
   405     if (allpipelines == NULL) {
   406         SDL_OutOfMemory();
   407         return NULL;
   408     }
   409 
   410     MakeShaderPipelines(data, &allpipelines[count], rtformat);
   411 
   412     data.allpipelines = allpipelines;
   413     data.pipelinescount = count + 1;
   414 
   415     return &data.allpipelines[count];
   416 }
   417 
   418 static void
   419 DestroyAllPipelines(METAL_ShaderPipelines *allpipelines, int count)
   420 {
   421     if (allpipelines != NULL) {
   422         for (int i = 0; i < count; i++) {
   423             for (int cache = 0; cache < SDL_METAL_FRAGMENT_COUNT; cache++) {
   424                 DestroyPipelineCache(&allpipelines[i].caches[cache]);
   425             }
   426         }
   427 
   428         SDL_free(allpipelines);
   429     }
   430 }
   431 
   432 static inline id<MTLRenderPipelineState>
   433 ChoosePipelineState(METAL_RenderData *data, METAL_ShaderPipelines *pipelines, SDL_MetalFragmentFunction fragfn, SDL_BlendMode blendmode)
   434 {
   435     METAL_PipelineCache *cache = &pipelines->caches[fragfn];
   436 
   437     for (int i = 0; i < cache->count; i++) {
   438         if (cache->states[i].blendMode == blendmode) {
   439             return (__bridge id<MTLRenderPipelineState>)cache->states[i].pipe;
   440         }
   441     }
   442 
   443     return MakePipelineState(data, cache, [NSString stringWithFormat:@" (blend=custom 0x%x)", blendmode], blendmode);
   444 }
   445 
   446 static void
   447 METAL_ActivateRenderCommandEncoder(SDL_Renderer * renderer, MTLLoadAction load, MTLClearColor *clear_color, id<MTLBuffer> vertex_buffer)
   448 {
   449     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   450 
   451     /* Our SetRenderTarget just signals that the next render operation should
   452      * set up a new render pass. This is where that work happens. */
   453     if (data.mtlcmdencoder == nil) {
   454         id<MTLTexture> mtltexture = nil;
   455 
   456         if (renderer->target != NULL) {
   457             METAL_TextureData *texdata = (__bridge METAL_TextureData *)renderer->target->driverdata;
   458             mtltexture = texdata.mtltexture;
   459         } else {
   460             if (data.mtlbackbuffer == nil) {
   461                 /* The backbuffer's contents aren't guaranteed to persist after
   462                  * presenting, so we can leave it undefined when loading it. */
   463                 data.mtlbackbuffer = [data.mtllayer nextDrawable];
   464                 if (load == MTLLoadActionLoad) {
   465                     load = MTLLoadActionDontCare;
   466                 }
   467             }
   468             mtltexture = data.mtlbackbuffer.texture;
   469         }
   470 
   471         SDL_assert(mtltexture);
   472 
   473         if (load == MTLLoadActionClear) {
   474             SDL_assert(clear_color != NULL);
   475             data.mtlpassdesc.colorAttachments[0].clearColor = *clear_color;
   476         }
   477 
   478         data.mtlpassdesc.colorAttachments[0].loadAction = load;
   479         data.mtlpassdesc.colorAttachments[0].texture = mtltexture;
   480 
   481         data.mtlcmdbuffer = [data.mtlcmdqueue commandBuffer];
   482         data.mtlcmdencoder = [data.mtlcmdbuffer renderCommandEncoderWithDescriptor:data.mtlpassdesc];
   483 
   484         if (data.mtlbackbuffer != nil && mtltexture == data.mtlbackbuffer.texture) {
   485             data.mtlcmdencoder.label = @"SDL metal renderer backbuffer";
   486         } else {
   487             data.mtlcmdencoder.label = @"SDL metal renderer render target";
   488         }
   489 
   490         /* Set up buffer bindings for positions, texcoords, and color once here,
   491          * the offsets are adjusted in the code that uses them. */
   492         if (vertex_buffer != nil) {
   493             [data.mtlcmdencoder setVertexBuffer:vertex_buffer offset:0 atIndex:0];
   494             [data.mtlcmdencoder setFragmentBuffer:vertex_buffer offset:0 atIndex:0];
   495         }
   496 
   497         data.activepipelines = ChooseShaderPipelines(data, mtltexture.pixelFormat);
   498 
   499         // make sure this has a definite place in the queue. This way it will
   500         //  execute reliably whether the app tries to make its own command buffers
   501         //  or whatever. This means we can _always_ batch rendering commands!
   502         [data.mtlcmdbuffer enqueue];
   503     }
   504 }
   505 
   506 static void
   507 METAL_WindowEvent(SDL_Renderer * renderer, const SDL_WindowEvent *event)
   508 {
   509     if (event->event == SDL_WINDOWEVENT_SHOWN ||
   510         event->event == SDL_WINDOWEVENT_HIDDEN) {
   511         // !!! FIXME: write me
   512     }
   513 }
   514 
   515 static int
   516 METAL_GetOutputSize(SDL_Renderer * renderer, int *w, int *h)
   517 { @autoreleasepool {
   518     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   519     if (w) {
   520         *w = (int)data.mtllayer.drawableSize.width;
   521     }
   522     if (h) {
   523         *h = (int)data.mtllayer.drawableSize.height;
   524     }
   525     return 0;
   526 }}
   527 
   528 static SDL_bool
   529 METAL_SupportsBlendMode(SDL_Renderer * renderer, SDL_BlendMode blendMode)
   530 {
   531     SDL_BlendFactor srcColorFactor = SDL_GetBlendModeSrcColorFactor(blendMode);
   532     SDL_BlendFactor srcAlphaFactor = SDL_GetBlendModeSrcAlphaFactor(blendMode);
   533     SDL_BlendOperation colorOperation = SDL_GetBlendModeColorOperation(blendMode);
   534     SDL_BlendFactor dstColorFactor = SDL_GetBlendModeDstColorFactor(blendMode);
   535     SDL_BlendFactor dstAlphaFactor = SDL_GetBlendModeDstAlphaFactor(blendMode);
   536     SDL_BlendOperation alphaOperation = SDL_GetBlendModeAlphaOperation(blendMode);
   537 
   538     if (GetBlendFactor(srcColorFactor) == invalidBlendFactor ||
   539         GetBlendFactor(srcAlphaFactor) == invalidBlendFactor ||
   540         GetBlendOperation(colorOperation) == invalidBlendOperation ||
   541         GetBlendFactor(dstColorFactor) == invalidBlendFactor ||
   542         GetBlendFactor(dstAlphaFactor) == invalidBlendFactor ||
   543         GetBlendOperation(alphaOperation) == invalidBlendOperation) {
   544         return SDL_FALSE;
   545     }
   546     return SDL_TRUE;
   547 }
   548 
   549 static int
   550 METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
   551 { @autoreleasepool {
   552     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   553     MTLPixelFormat pixfmt;
   554 
   555     switch (texture->format) {
   556         case SDL_PIXELFORMAT_ABGR8888:
   557             pixfmt = MTLPixelFormatRGBA8Unorm;
   558             break;
   559         case SDL_PIXELFORMAT_ARGB8888:
   560             pixfmt = MTLPixelFormatBGRA8Unorm;
   561             break;
   562         case SDL_PIXELFORMAT_IYUV:
   563         case SDL_PIXELFORMAT_YV12:
   564         case SDL_PIXELFORMAT_NV12:
   565         case SDL_PIXELFORMAT_NV21:
   566             pixfmt = MTLPixelFormatR8Unorm;
   567             break;
   568         default:
   569             return SDL_SetError("Texture format %s not supported by Metal", SDL_GetPixelFormatName(texture->format));
   570     }
   571 
   572     MTLTextureDescriptor *mtltexdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pixfmt
   573                                             width:(NSUInteger)texture->w height:(NSUInteger)texture->h mipmapped:NO];
   574 
   575     /* Not available in iOS 8. */
   576     if ([mtltexdesc respondsToSelector:@selector(usage)]) {
   577         if (texture->access == SDL_TEXTUREACCESS_TARGET) {
   578             mtltexdesc.usage = MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget;
   579         } else {
   580             mtltexdesc.usage = MTLTextureUsageShaderRead;
   581         }
   582     }
   583     
   584     id<MTLTexture> mtltexture = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
   585     if (mtltexture == nil) {
   586         return SDL_SetError("Texture allocation failed");
   587     }
   588 
   589     id<MTLTexture> mtltexture_uv = nil;
   590 
   591     BOOL yuv = (texture->format == SDL_PIXELFORMAT_IYUV) || (texture->format == SDL_PIXELFORMAT_YV12);
   592     BOOL nv12 = (texture->format == SDL_PIXELFORMAT_NV12) || (texture->format == SDL_PIXELFORMAT_NV21);
   593 
   594     if (yuv) {
   595         mtltexdesc.pixelFormat = MTLPixelFormatR8Unorm;
   596         mtltexdesc.width = (texture->w + 1) / 2;
   597         mtltexdesc.height = (texture->h + 1) / 2;
   598         mtltexdesc.textureType = MTLTextureType2DArray;
   599         mtltexdesc.arrayLength = 2;
   600     } else if (nv12) {
   601         mtltexdesc.pixelFormat = MTLPixelFormatRG8Unorm;
   602         mtltexdesc.width = (texture->w + 1) / 2;
   603         mtltexdesc.height = (texture->h + 1) / 2;
   604     }
   605 
   606     if (yuv || nv12) {
   607         mtltexture_uv = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
   608         if (mtltexture_uv == nil) {
   609 #if !__has_feature(objc_arc)
   610             [mtltexture release];
   611 #endif
   612             return SDL_SetError("Texture allocation failed");
   613         }
   614     }
   615 
   616     METAL_TextureData *texturedata = [[METAL_TextureData alloc] init];
   617     if (texture->scaleMode == SDL_ScaleModeNearest) {
   618         texturedata.mtlsampler = data.mtlsamplernearest;
   619     } else {
   620         texturedata.mtlsampler = data.mtlsamplerlinear;
   621     }
   622     texturedata.mtltexture = mtltexture;
   623     texturedata.mtltexture_uv = mtltexture_uv;
   624 
   625     texturedata.yuv = yuv;
   626     texturedata.nv12 = nv12;
   627 
   628     if (yuv) {
   629         texturedata.fragmentFunction = SDL_METAL_FRAGMENT_YUV;
   630     } else if (texture->format == SDL_PIXELFORMAT_NV12) {
   631         texturedata.fragmentFunction = SDL_METAL_FRAGMENT_NV12;
   632     } else if (texture->format == SDL_PIXELFORMAT_NV21) {
   633         texturedata.fragmentFunction = SDL_METAL_FRAGMENT_NV21;
   634     } else {
   635         texturedata.fragmentFunction = SDL_METAL_FRAGMENT_COPY;
   636     }
   637 
   638     if (yuv || nv12) {
   639         size_t offset = 0;
   640         SDL_YUV_CONVERSION_MODE mode = SDL_GetYUVConversionModeForResolution(texture->w, texture->h);
   641         switch (mode) {
   642             case SDL_YUV_CONVERSION_JPEG: offset = CONSTANTS_OFFSET_DECODE_JPEG; break;
   643             case SDL_YUV_CONVERSION_BT601: offset = CONSTANTS_OFFSET_DECODE_BT601; break;
   644             case SDL_YUV_CONVERSION_BT709: offset = CONSTANTS_OFFSET_DECODE_BT709; break;
   645             default: offset = 0; break;
   646         }
   647         texturedata.conversionBufferOffset = offset;
   648     }
   649 
   650     texture->driverdata = (void*)CFBridgingRetain(texturedata);
   651 
   652 #if !__has_feature(objc_arc)
   653     [texturedata release];
   654     [mtltexture release];
   655     [mtltexture_uv release];
   656 #endif
   657 
   658     return 0;
   659 }}
   660 
   661 static void
   662 METAL_UploadTextureData(id<MTLTexture> texture, SDL_Rect rect, int slice,
   663                         const void * pixels, int pitch)
   664 {
   665     [texture replaceRegion:MTLRegionMake2D(rect.x, rect.y, rect.w, rect.h)
   666                mipmapLevel:0
   667                      slice:slice
   668                  withBytes:pixels
   669                bytesPerRow:pitch
   670              bytesPerImage:0];
   671 }
   672 
   673 static MTLStorageMode
   674 METAL_GetStorageMode(id<MTLResource> resource)
   675 {
   676     /* iOS 8 does not have this method. */
   677     if ([resource respondsToSelector:@selector(storageMode)]) {
   678         return resource.storageMode;
   679     }
   680     return MTLStorageModeShared;
   681 }
   682 
   683 static int
   684 METAL_UpdateTextureInternal(SDL_Renderer * renderer, METAL_TextureData *texturedata,
   685                             id<MTLTexture> texture, SDL_Rect rect, int slice,
   686                             const void * pixels, int pitch)
   687 {
   688     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   689     SDL_Rect stagingrect = {0, 0, rect.w, rect.h};
   690     MTLTextureDescriptor *desc;
   691 
   692     /* If the texture is managed or shared and this is the first upload, we can
   693      * use replaceRegion to upload to it directly. Otherwise we upload the data
   694      * to a staging texture and copy that over. */
   695     if (!texturedata.hasdata && METAL_GetStorageMode(texture) != MTLStorageModePrivate) {
   696         METAL_UploadTextureData(texture, rect, slice, pixels, pitch);
   697         return 0;
   698     }
   699 
   700     desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:texture.pixelFormat
   701                                                               width:rect.w
   702                                                              height:rect.h
   703                                                           mipmapped:NO];
   704 
   705     if (desc == nil) {
   706         return SDL_OutOfMemory();
   707     }
   708 
   709     /* TODO: We could have a pool of textures or a MTLHeap we allocate from,
   710      * and release a staging texture back to the pool in the command buffer's
   711      * completion handler. */
   712     id<MTLTexture> stagingtex = [data.mtldevice newTextureWithDescriptor:desc];
   713     if (stagingtex == nil) {
   714         return SDL_OutOfMemory();
   715     }
   716 
   717 #if !__has_feature(objc_arc)
   718     [stagingtex autorelease];
   719 #endif
   720 
   721     METAL_UploadTextureData(stagingtex, stagingrect, 0, pixels, pitch);
   722 
   723     if (data.mtlcmdencoder != nil) {
   724         [data.mtlcmdencoder endEncoding];
   725         data.mtlcmdencoder = nil;
   726     }
   727 
   728     if (data.mtlcmdbuffer == nil) {
   729         data.mtlcmdbuffer = [data.mtlcmdqueue commandBuffer];
   730     }
   731 
   732     id<MTLBlitCommandEncoder> blitcmd = [data.mtlcmdbuffer blitCommandEncoder];
   733 
   734     [blitcmd copyFromTexture:stagingtex
   735                  sourceSlice:0
   736                  sourceLevel:0
   737                 sourceOrigin:MTLOriginMake(0, 0, 0)
   738                   sourceSize:MTLSizeMake(rect.w, rect.h, 1)
   739                    toTexture:texture
   740             destinationSlice:slice
   741             destinationLevel:0
   742            destinationOrigin:MTLOriginMake(rect.x, rect.y, 0)];
   743 
   744     [blitcmd endEncoding];
   745 
   746     /* TODO: This isn't very efficient for the YUV formats, which call
   747      * UpdateTextureInternal multiple times in a row. */
   748     [data.mtlcmdbuffer commit];
   749     data.mtlcmdbuffer = nil;
   750 
   751     return 0;
   752 }
   753 
   754 static int
   755 METAL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
   756                     const SDL_Rect * rect, const void *pixels, int pitch)
   757 { @autoreleasepool {
   758     METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
   759 
   760     if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture, *rect, 0, pixels, pitch) < 0) {
   761         return -1;
   762     }
   763 
   764     if (texturedata.yuv) {
   765         int Uslice = texture->format == SDL_PIXELFORMAT_YV12 ? 1 : 0;
   766         int Vslice = texture->format == SDL_PIXELFORMAT_YV12 ? 0 : 1;
   767         int UVpitch = (pitch + 1) / 2;
   768         SDL_Rect UVrect = {rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2};
   769 
   770         /* Skip to the correct offset into the next texture */
   771         pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
   772         if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture_uv, UVrect, Uslice, pixels, UVpitch) < 0) {
   773             return -1;
   774         }
   775 
   776         /* Skip to the correct offset into the next texture */
   777         pixels = (const void*)((const Uint8*)pixels + UVrect.h * UVpitch);
   778         if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture_uv, UVrect, Vslice, pixels, UVpitch) < 0) {
   779             return -1;
   780         }
   781     }
   782 
   783     if (texturedata.nv12) {
   784         SDL_Rect UVrect = {rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2};
   785         int UVpitch = 2 * ((pitch + 1) / 2);
   786 
   787         /* Skip to the correct offset into the next texture */
   788         pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
   789         if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture_uv, UVrect, 0, pixels, UVpitch) < 0) {
   790             return -1;
   791         }
   792     }
   793 
   794     texturedata.hasdata = YES;
   795 
   796     return 0;
   797 }}
   798 
   799 static int
   800 METAL_UpdateTextureYUV(SDL_Renderer * renderer, SDL_Texture * texture,
   801                     const SDL_Rect * rect,
   802                     const Uint8 *Yplane, int Ypitch,
   803                     const Uint8 *Uplane, int Upitch,
   804                     const Uint8 *Vplane, int Vpitch)
   805 { @autoreleasepool {
   806     METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
   807     const int Uslice = 0;
   808     const int Vslice = 1;
   809     SDL_Rect UVrect = {rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2};
   810 
   811     /* Bail out if we're supposed to update an empty rectangle */
   812     if (rect->w <= 0 || rect->h <= 0) {
   813         return 0;
   814     }
   815 
   816     if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture, *rect, 0, Yplane, Ypitch) < 0) {
   817         return -1;
   818     }
   819     if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture_uv, UVrect, Uslice, Uplane, Upitch)) {
   820         return -1;
   821     }
   822     if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture_uv, UVrect, Vslice, Vplane, Vpitch)) {
   823         return -1;
   824     }
   825 
   826     texturedata.hasdata = YES;
   827 
   828     return 0;
   829 }}
   830 
   831 static int
   832 METAL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
   833                const SDL_Rect * rect, void **pixels, int *pitch)
   834 { @autoreleasepool {
   835     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   836     METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
   837     int buffersize = 0;
   838     id<MTLBuffer> lockedbuffer = nil;
   839 
   840     if (rect->w <= 0 || rect->h <= 0) {
   841         return SDL_SetError("Invalid rectangle dimensions for LockTexture.");
   842     }
   843 
   844     *pitch = SDL_BYTESPERPIXEL(texture->format) * rect->w;
   845 
   846     if (texturedata.yuv || texturedata.nv12) {
   847         buffersize = ((*pitch) * rect->h) + (2 * (*pitch + 1) / 2) * ((rect->h + 1) / 2);
   848     } else {
   849         buffersize = (*pitch) * rect->h;
   850     }
   851 
   852     lockedbuffer = [data.mtldevice newBufferWithLength:buffersize options:MTLResourceStorageModeShared];
   853     if (lockedbuffer == nil) {
   854         return SDL_OutOfMemory();
   855     }
   856 
   857     texturedata.lockedrect = *rect;
   858     texturedata.lockedbuffer = lockedbuffer;
   859     *pixels = [lockedbuffer contents];
   860 
   861     /* METAL_TextureData.lockedbuffer retains. */
   862 #if !__has_feature(objc_arc)
   863     [lockedbuffer release];
   864 #endif
   865 
   866     return 0;
   867 }}
   868 
   869 static void
   870 METAL_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
   871 { @autoreleasepool {
   872     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   873     METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
   874     SDL_Rect rect = texturedata.lockedrect;
   875     int pitch = SDL_BYTESPERPIXEL(texture->format) * rect.w;
   876     SDL_Rect UVrect = {rect.x / 2, rect.y / 2, (rect.w + 1) / 2, (rect.h + 1) / 2};
   877 
   878     if (texturedata.lockedbuffer == nil) {
   879         return;
   880     }
   881 
   882     if (data.mtlcmdencoder != nil) {
   883         [data.mtlcmdencoder endEncoding];
   884         data.mtlcmdencoder = nil;
   885     }
   886 
   887     if (data.mtlcmdbuffer == nil) {
   888         data.mtlcmdbuffer = [data.mtlcmdqueue commandBuffer];
   889     }
   890 
   891     id<MTLBlitCommandEncoder> blitcmd = [data.mtlcmdbuffer blitCommandEncoder];
   892 
   893     [blitcmd copyFromBuffer:texturedata.lockedbuffer
   894                sourceOffset:0
   895           sourceBytesPerRow:pitch
   896         sourceBytesPerImage:0
   897                  sourceSize:MTLSizeMake(rect.w, rect.h, 1)
   898                   toTexture:texturedata.mtltexture
   899            destinationSlice:0
   900            destinationLevel:0
   901           destinationOrigin:MTLOriginMake(rect.x, rect.y, 0)];
   902 
   903     if (texturedata.yuv) {
   904         int Uslice = texture->format == SDL_PIXELFORMAT_YV12 ? 1 : 0;
   905         int Vslice = texture->format == SDL_PIXELFORMAT_YV12 ? 0 : 1;
   906         int UVpitch = (pitch + 1) / 2;
   907 
   908         [blitcmd copyFromBuffer:texturedata.lockedbuffer
   909                    sourceOffset:rect.h * pitch
   910               sourceBytesPerRow:UVpitch
   911             sourceBytesPerImage:UVpitch * UVrect.h
   912                      sourceSize:MTLSizeMake(UVrect.w, UVrect.h, 1)
   913                       toTexture:texturedata.mtltexture_uv
   914                destinationSlice:Uslice
   915                destinationLevel:0
   916               destinationOrigin:MTLOriginMake(UVrect.x, UVrect.y, 0)];
   917 
   918         [blitcmd copyFromBuffer:texturedata.lockedbuffer
   919                    sourceOffset:(rect.h * pitch) + UVrect.h * UVpitch
   920               sourceBytesPerRow:UVpitch
   921             sourceBytesPerImage:UVpitch * UVrect.h
   922                      sourceSize:MTLSizeMake(UVrect.w, UVrect.h, 1)
   923                       toTexture:texturedata.mtltexture_uv
   924                destinationSlice:Vslice
   925                destinationLevel:0
   926               destinationOrigin:MTLOriginMake(UVrect.x, UVrect.y, 0)];
   927     }
   928 
   929     if (texturedata.nv12) {
   930         int UVpitch = 2 * ((pitch + 1) / 2);
   931 
   932         [blitcmd copyFromBuffer:texturedata.lockedbuffer
   933                    sourceOffset:rect.h * pitch
   934               sourceBytesPerRow:UVpitch
   935             sourceBytesPerImage:0
   936                      sourceSize:MTLSizeMake(UVrect.w, UVrect.h, 1)
   937                       toTexture:texturedata.mtltexture_uv
   938                destinationSlice:0
   939                destinationLevel:0
   940               destinationOrigin:MTLOriginMake(UVrect.x, UVrect.y, 0)];
   941     }
   942 
   943     [blitcmd endEncoding];
   944 
   945     [data.mtlcmdbuffer commit];
   946     data.mtlcmdbuffer = nil;
   947 
   948     texturedata.lockedbuffer = nil; /* Retained property, so it calls release. */
   949     texturedata.hasdata = YES;
   950 }}
   951 
   952 static void
   953 METAL_SetTextureScaleMode(SDL_Renderer * renderer, SDL_Texture * texture, SDL_ScaleMode scaleMode)
   954 { @autoreleasepool {
   955     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   956     METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
   957 
   958     if (scaleMode == SDL_ScaleModeNearest) {
   959         texturedata.mtlsampler = data.mtlsamplernearest;
   960     } else {
   961         texturedata.mtlsampler = data.mtlsamplerlinear;
   962     }
   963 }}
   964 
   965 static int
   966 METAL_SetRenderTarget(SDL_Renderer * renderer, SDL_Texture * texture)
   967 { @autoreleasepool {
   968     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   969 
   970     if (data.mtlcmdencoder) {
   971         /* End encoding for the previous render target so we can set up a new
   972          * render pass for this one. */
   973         [data.mtlcmdencoder endEncoding];
   974         [data.mtlcmdbuffer commit];
   975 
   976         data.mtlcmdencoder = nil;
   977         data.mtlcmdbuffer = nil;
   978     }
   979 
   980     /* We don't begin a new render pass right away - we delay it until an actual
   981      * draw or clear happens. That way we can use hardware clears when possible,
   982      * which are only available when beginning a new render pass. */
   983     return 0;
   984 }}
   985 
   986 
   987 // normalize a value from 0.0f to len into 0.0f to 1.0f.
   988 static inline float
   989 normtex(const float _val, const float len)
   990 {
   991     return _val / len;
   992 }
   993 
   994 static int
   995 METAL_QueueSetViewport(SDL_Renderer * renderer, SDL_RenderCommand *cmd)
   996 {
   997     float projection[4][4];    /* Prepare an orthographic projection */
   998     const int w = cmd->data.viewport.rect.w;
   999     const int h = cmd->data.viewport.rect.h;
  1000     const size_t matrixlen = sizeof (projection);
  1001     float *matrix = (float *) SDL_AllocateRenderVertices(renderer, matrixlen, CONSTANT_ALIGN(16), &cmd->data.viewport.first);
  1002     if (!matrix) {
  1003         return -1;
  1004     }
  1005 
  1006     SDL_memset(projection, '\0', matrixlen);
  1007     if (w && h) {
  1008         projection[0][0] = 2.0f / w;
  1009         projection[1][1] = -2.0f / h;
  1010         projection[3][0] = -1.0f;
  1011         projection[3][1] = 1.0f;
  1012         projection[3][3] = 1.0f;
  1013     }
  1014     SDL_memcpy(matrix, projection, matrixlen);
  1015 
  1016     return 0;
  1017 }
  1018 
  1019 static int
  1020 METAL_QueueSetDrawColor(SDL_Renderer *renderer, SDL_RenderCommand *cmd)
  1021 {
  1022     const size_t vertlen = sizeof (float) * 4;
  1023     float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, DEVICE_ALIGN(16), &cmd->data.color.first);
  1024     if (!verts) {
  1025         return -1;
  1026     }
  1027     *(verts++) = ((float)cmd->data.color.r) / 255.0f;
  1028     *(verts++) = ((float)cmd->data.color.g) / 255.0f;
  1029     *(verts++) = ((float)cmd->data.color.b) / 255.0f;
  1030     *(verts++) = ((float)cmd->data.color.a) / 255.0f;
  1031     return 0;
  1032 }
  1033 
  1034 static int
  1035 METAL_QueueDrawPoints(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_FPoint * points, int count)
  1036 {
  1037     const size_t vertlen = (sizeof (float) * 2) * count;
  1038     float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, DEVICE_ALIGN(8), &cmd->data.draw.first);
  1039     if (!verts) {
  1040         return -1;
  1041     }
  1042     cmd->data.draw.count = count;
  1043     SDL_memcpy(verts, points, vertlen);
  1044     return 0;
  1045 }
  1046 
  1047 static int
  1048 METAL_QueueFillRects(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_FRect * rects, int count)
  1049 {
  1050     const size_t vertlen = (sizeof (float) * 8) * count;
  1051     float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, DEVICE_ALIGN(8), &cmd->data.draw.first);
  1052     if (!verts) {
  1053         return -1;
  1054     }
  1055 
  1056     cmd->data.draw.count = count;
  1057 
  1058     /* Quads in the following vertex order (matches the quad index buffer):
  1059      * 1---3
  1060      * | \ |
  1061      * 0---2
  1062      */
  1063     for (int i = 0; i < count; i++, rects++) {
  1064         if ((rects->w <= 0.0f) || (rects->h <= 0.0f)) {
  1065             cmd->data.draw.count--;
  1066         } else {
  1067             *(verts++) = rects->x;
  1068             *(verts++) = rects->y + rects->h;
  1069             *(verts++) = rects->x;
  1070             *(verts++) = rects->y;
  1071             *(verts++) = rects->x + rects->w;
  1072             *(verts++) = rects->y + rects->h;
  1073             *(verts++) = rects->x + rects->w;
  1074             *(verts++) = rects->y;
  1075         }
  1076     }
  1077 
  1078     if (cmd->data.draw.count == 0) {
  1079         cmd->command = SDL_RENDERCMD_NO_OP;  // nothing to do, just skip this one later.
  1080     }
  1081 
  1082     return 0;
  1083 }
  1084 
  1085 static int
  1086 METAL_QueueCopy(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture * texture,
  1087                 const SDL_Rect * srcrect, const SDL_FRect * dstrect)
  1088 {
  1089     const float texw = (float) texture->w;
  1090     const float texh = (float) texture->h;
  1091     // !!! FIXME: use an index buffer
  1092     const size_t vertlen = (sizeof (float) * 16);
  1093     float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, DEVICE_ALIGN(8), &cmd->data.draw.first);
  1094     if (!verts) {
  1095         return -1;
  1096     }
  1097 
  1098     cmd->data.draw.count = 1;
  1099 
  1100     /* Interleaved positions and texture coordinates */
  1101     *(verts++) = dstrect->x;
  1102     *(verts++) = dstrect->y + dstrect->h;
  1103     *(verts++) = normtex(srcrect->x, texw);
  1104     *(verts++) = normtex(srcrect->y + srcrect->h, texh);
  1105 
  1106     *(verts++) = dstrect->x;
  1107     *(verts++) = dstrect->y;
  1108     *(verts++) = normtex(srcrect->x, texw);
  1109     *(verts++) = normtex(srcrect->y, texh);
  1110 
  1111     *(verts++) = dstrect->x + dstrect->w;
  1112     *(verts++) = dstrect->y + dstrect->h;
  1113     *(verts++) = normtex(srcrect->x + srcrect->w, texw);
  1114     *(verts++) = normtex(srcrect->y + srcrect->h, texh);
  1115 
  1116     *(verts++) = dstrect->x + dstrect->w;
  1117     *(verts++) = dstrect->y;
  1118     *(verts++) = normtex(srcrect->x + srcrect->w, texw);
  1119     *(verts++) = normtex(srcrect->y, texh);
  1120 
  1121     return 0;
  1122 }
  1123 
  1124 static int
  1125 METAL_QueueCopyEx(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture * texture,
  1126                   const SDL_Rect * srcquad, const SDL_FRect * dstrect,
  1127                   const double angle, const SDL_FPoint *center, const SDL_RendererFlip flip)
  1128 {
  1129     const float texw = (float) texture->w;
  1130     const float texh = (float) texture->h;
  1131     const float rads = (float)(M_PI * (float) angle / 180.0f);
  1132     const float c = cosf(rads), s = sinf(rads);
  1133     float minu, maxu, minv, maxv;
  1134     const size_t vertlen = (sizeof (float) * 32);
  1135     float *verts;
  1136 
  1137     // cheat and store this offset in (count) because it needs to be aligned in ways other fields don't and we aren't using count otherwise.
  1138     verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, CONSTANT_ALIGN(16), &cmd->data.draw.count);
  1139     if (!verts) {
  1140         return -1;
  1141     }
  1142 
  1143     // transform matrix
  1144     SDL_memset(verts, '\0', sizeof (*verts) * 16);
  1145     verts[10] = verts[15] = 1.0f;
  1146     // rotation
  1147     verts[0] = c;
  1148     verts[1] = s;
  1149     verts[4] = -s;
  1150     verts[5] = c;
  1151 
  1152     // translation
  1153     verts[12] = dstrect->x + center->x;
  1154     verts[13] = dstrect->y + center->y;
  1155 
  1156     // rest of the vertices don't need the aggressive alignment. Pack them in.
  1157     verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, DEVICE_ALIGN(8), &cmd->data.draw.first);
  1158     if (!verts) {
  1159         return -1;
  1160     }
  1161 
  1162     minu = normtex(srcquad->x, texw);
  1163     maxu = normtex(srcquad->x + srcquad->w, texw);
  1164     minv = normtex(srcquad->y, texh);
  1165     maxv = normtex(srcquad->y + srcquad->h, texh);
  1166 
  1167     if (flip & SDL_FLIP_HORIZONTAL) {
  1168         float tmp = maxu;
  1169         maxu = minu;
  1170         minu = tmp;
  1171     }
  1172     if (flip & SDL_FLIP_VERTICAL) {
  1173         float tmp = maxv;
  1174         maxv = minv;
  1175         minv = tmp;
  1176     }
  1177 
  1178     /* Interleaved positions and texture coordinates */
  1179     *(verts++) = -center->x;
  1180     *(verts++) = dstrect->h - center->y;
  1181     *(verts++) = minu;
  1182     *(verts++) = maxv;
  1183 
  1184     *(verts++) = -center->x;
  1185     *(verts++) = -center->y;
  1186     *(verts++) = minu;
  1187     *(verts++) = minv;
  1188 
  1189     *(verts++) = dstrect->w - center->x;
  1190     *(verts++) = dstrect->h - center->y;
  1191     *(verts++) = maxu;
  1192     *(verts++) = maxv;
  1193 
  1194     *(verts++) = dstrect->w - center->x;
  1195     *(verts++) = -center->y;
  1196     *(verts++) = maxu;
  1197     *(verts++) = minv;
  1198 
  1199     return 0;
  1200 }
  1201 
  1202 
  1203 typedef struct
  1204 {
  1205     #if __has_feature(objc_arc)
  1206     __unsafe_unretained id<MTLRenderPipelineState> pipeline;
  1207     __unsafe_unretained id<MTLBuffer> vertex_buffer;
  1208     #else
  1209     id<MTLRenderPipelineState> pipeline;
  1210     id<MTLBuffer> vertex_buffer;
  1211     #endif
  1212     size_t constants_offset;
  1213     SDL_Texture *texture;
  1214     SDL_bool cliprect_dirty;
  1215     SDL_bool cliprect_enabled;
  1216     SDL_Rect cliprect;
  1217     SDL_bool viewport_dirty;
  1218     SDL_Rect viewport;
  1219     size_t projection_offset;
  1220     SDL_bool color_dirty;
  1221     size_t color_offset;
  1222 } METAL_DrawStateCache;
  1223 
  1224 static void
  1225 SetDrawState(SDL_Renderer *renderer, const SDL_RenderCommand *cmd, const SDL_MetalFragmentFunction shader,
  1226              const size_t constants_offset, id<MTLBuffer> mtlbufvertex, METAL_DrawStateCache *statecache)
  1227 {
  1228     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
  1229     const SDL_BlendMode blend = cmd->data.draw.blend;
  1230     size_t first = cmd->data.draw.first;
  1231     id<MTLRenderPipelineState> newpipeline;
  1232 
  1233     METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionLoad, NULL, statecache->vertex_buffer);
  1234 
  1235     if (statecache->viewport_dirty) {
  1236         MTLViewport viewport;
  1237         viewport.originX = statecache->viewport.x;
  1238         viewport.originY = statecache->viewport.y;
  1239         viewport.width = statecache->viewport.w;
  1240         viewport.height = statecache->viewport.h;
  1241         viewport.znear = 0.0;
  1242         viewport.zfar = 1.0;
  1243         [data.mtlcmdencoder setViewport:viewport];
  1244         [data.mtlcmdencoder setVertexBuffer:mtlbufvertex offset:statecache->projection_offset atIndex:2];  // projection
  1245         statecache->viewport_dirty = SDL_FALSE;
  1246     }
  1247 
  1248     if (statecache->cliprect_dirty) {
  1249         MTLScissorRect mtlrect;
  1250         if (statecache->cliprect_enabled) {
  1251             const SDL_Rect *rect = &statecache->cliprect;
  1252             mtlrect.x = statecache->viewport.x + rect->x;
  1253             mtlrect.y = statecache->viewport.y + rect->y;
  1254             mtlrect.width = rect->w;
  1255             mtlrect.height = rect->h;
  1256         } else {
  1257             mtlrect.x = statecache->viewport.x;
  1258             mtlrect.y = statecache->viewport.y;
  1259             mtlrect.width = statecache->viewport.w;
  1260             mtlrect.height = statecache->viewport.h;
  1261         }
  1262         if (mtlrect.width > 0 && mtlrect.height > 0) {
  1263             [data.mtlcmdencoder setScissorRect:mtlrect];
  1264         }
  1265         statecache->cliprect_dirty = SDL_FALSE;
  1266     }
  1267 
  1268     if (statecache->color_dirty) {
  1269         [data.mtlcmdencoder setFragmentBufferOffset:statecache->color_offset atIndex:0];
  1270         statecache->color_dirty = SDL_FALSE;
  1271     }
  1272 
  1273     newpipeline = ChoosePipelineState(data, data.activepipelines, shader, blend);
  1274     if (newpipeline != statecache->pipeline) {
  1275         [data.mtlcmdencoder setRenderPipelineState:newpipeline];
  1276         statecache->pipeline = newpipeline;
  1277     }
  1278 
  1279     if (constants_offset != statecache->constants_offset) {
  1280         if (constants_offset != CONSTANTS_OFFSET_INVALID) {
  1281             [data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:constants_offset atIndex:3];
  1282         }
  1283         statecache->constants_offset = constants_offset;
  1284     }
  1285 
  1286     [data.mtlcmdencoder setVertexBufferOffset:first atIndex:0]; /* position/texcoords */
  1287 }
  1288 
  1289 static void
  1290 SetCopyState(SDL_Renderer *renderer, const SDL_RenderCommand *cmd, const size_t constants_offset,
  1291              id<MTLBuffer> mtlbufvertex, METAL_DrawStateCache *statecache)
  1292 {
  1293     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
  1294     SDL_Texture *texture = cmd->data.draw.texture;
  1295     METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
  1296 
  1297     SetDrawState(renderer, cmd, texturedata.fragmentFunction, constants_offset, mtlbufvertex, statecache);
  1298 
  1299     if (texture != statecache->texture) {
  1300         METAL_TextureData *oldtexturedata = NULL;
  1301         if (statecache->texture) {
  1302             oldtexturedata = (__bridge METAL_TextureData *) statecache->texture->driverdata;
  1303         }
  1304         if (!oldtexturedata || (texturedata.mtlsampler != oldtexturedata.mtlsampler)) {
  1305             [data.mtlcmdencoder setFragmentSamplerState:texturedata.mtlsampler atIndex:0];
  1306         }
  1307 
  1308         [data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture atIndex:0];
  1309         if (texturedata.yuv || texturedata.nv12) {
  1310             [data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture_uv atIndex:1];
  1311             [data.mtlcmdencoder setFragmentBuffer:data.mtlbufconstants offset:texturedata.conversionBufferOffset atIndex:1];
  1312         }
  1313         statecache->texture = texture;
  1314     }
  1315 }
  1316 
  1317 static int
  1318 METAL_RunCommandQueue(SDL_Renderer * renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
  1319 { @autoreleasepool {
  1320     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
  1321     METAL_DrawStateCache statecache;
  1322     SDL_zero(statecache);
  1323 
  1324     id<MTLBuffer> mtlbufvertex = nil;
  1325 
  1326     statecache.pipeline = nil;
  1327     statecache.vertex_buffer = nil;
  1328     statecache.constants_offset = CONSTANTS_OFFSET_INVALID;
  1329     statecache.texture = NULL;
  1330     statecache.color_dirty = SDL_TRUE;
  1331     statecache.cliprect_dirty = SDL_TRUE;
  1332     statecache.viewport_dirty = SDL_TRUE;
  1333     statecache.projection_offset = 0;
  1334     statecache.color_offset = 0;
  1335 
  1336     // !!! FIXME: have a ring of pre-made MTLBuffers we cycle through? How expensive is creation?
  1337     if (vertsize > 0) {
  1338         /* We can memcpy to a shared buffer from the CPU and read it from the GPU
  1339          * without any extra copying. It's a bit slower on macOS to read shared
  1340          * data from the GPU than to read managed/private data, but we avoid the
  1341          * cost of copying the data and the code's simpler. Apple's best
  1342          * practices guide recommends this approach for streamed vertex data.
  1343          * TODO: this buffer is also used for constants. Is performance still
  1344          * good for those, or should we have a managed buffer for them? */
  1345         mtlbufvertex = [data.mtldevice newBufferWithLength:vertsize options:MTLResourceStorageModeShared];
  1346         #if !__has_feature(objc_arc)
  1347         [mtlbufvertex autorelease];
  1348         #endif
  1349         mtlbufvertex.label = @"SDL vertex data";
  1350         SDL_memcpy([mtlbufvertex contents], vertices, vertsize);
  1351 
  1352         statecache.vertex_buffer = mtlbufvertex;
  1353     }
  1354 
  1355     // If there's a command buffer here unexpectedly (app requested one?). Commit it so we can start fresh.
  1356     [data.mtlcmdencoder endEncoding];
  1357     [data.mtlcmdbuffer commit];
  1358     data.mtlcmdencoder = nil;
  1359     data.mtlcmdbuffer = nil;
  1360 
  1361     while (cmd) {
  1362         switch (cmd->command) {
  1363             case SDL_RENDERCMD_SETVIEWPORT: {
  1364                 SDL_memcpy(&statecache.viewport, &cmd->data.viewport.rect, sizeof (statecache.viewport));
  1365                 statecache.projection_offset = cmd->data.viewport.first;
  1366                 statecache.viewport_dirty = SDL_TRUE;
  1367                 statecache.cliprect_dirty = SDL_TRUE;
  1368                 break;
  1369             }
  1370 
  1371             case SDL_RENDERCMD_SETCLIPRECT: {
  1372                 SDL_memcpy(&statecache.cliprect, &cmd->data.cliprect.rect, sizeof (statecache.cliprect));
  1373                 statecache.cliprect_enabled = cmd->data.cliprect.enabled;
  1374                 statecache.cliprect_dirty = SDL_TRUE;
  1375                 break;
  1376             }
  1377 
  1378             case SDL_RENDERCMD_SETDRAWCOLOR: {
  1379                 statecache.color_offset = cmd->data.color.first;
  1380                 statecache.color_dirty = SDL_TRUE;
  1381                 break;
  1382             }
  1383 
  1384             case SDL_RENDERCMD_CLEAR: {
  1385                 /* If we're already encoding a command buffer, dump it without committing it. We'd just
  1386                     clear all its work anyhow, and starting a new encoder will let us use a hardware clear
  1387                     operation via MTLLoadActionClear. */
  1388                 if (data.mtlcmdencoder != nil) {
  1389                     [data.mtlcmdencoder endEncoding];
  1390 
  1391                     // !!! FIXME: have to commit, or an uncommitted but enqueued buffer will prevent the frame from finishing.
  1392                     [data.mtlcmdbuffer commit];
  1393                     data.mtlcmdencoder = nil;
  1394                     data.mtlcmdbuffer = nil;
  1395                 }
  1396 
  1397                 // force all this state to be reconfigured on next command buffer.
  1398                 statecache.pipeline = nil;
  1399                 statecache.constants_offset = CONSTANTS_OFFSET_INVALID;
  1400                 statecache.texture = NULL;
  1401                 statecache.color_dirty = SDL_TRUE;
  1402                 statecache.cliprect_dirty = SDL_TRUE;
  1403                 statecache.viewport_dirty = SDL_TRUE;
  1404 
  1405                 const Uint8 r = cmd->data.color.r;
  1406                 const Uint8 g = cmd->data.color.g;
  1407                 const Uint8 b = cmd->data.color.b;
  1408                 const Uint8 a = cmd->data.color.a;
  1409                 MTLClearColor color = MTLClearColorMake(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f);
  1410 
  1411                 // get new command encoder, set up with an initial clear operation.
  1412                 METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionClear, &color, mtlbufvertex);
  1413                 break;
  1414             }
  1415 
  1416             case SDL_RENDERCMD_DRAW_POINTS:
  1417             case SDL_RENDERCMD_DRAW_LINES: {
  1418                 const size_t count = cmd->data.draw.count;
  1419                 const MTLPrimitiveType primtype = (cmd->command == SDL_RENDERCMD_DRAW_POINTS) ? MTLPrimitiveTypePoint : MTLPrimitiveTypeLineStrip;
  1420                 SetDrawState(renderer, cmd, SDL_METAL_FRAGMENT_SOLID, CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM, mtlbufvertex, &statecache);
  1421                 [data.mtlcmdencoder drawPrimitives:primtype vertexStart:0 vertexCount:count];
  1422                 break;
  1423             }
  1424 
  1425             case SDL_RENDERCMD_FILL_RECTS: {
  1426                 const size_t count = cmd->data.draw.count;
  1427                 const size_t maxcount = UINT16_MAX / 4;
  1428                 SetDrawState(renderer, cmd, SDL_METAL_FRAGMENT_SOLID, CONSTANTS_OFFSET_IDENTITY, mtlbufvertex, &statecache);
  1429                 if (count == 1) {
  1430                     [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
  1431                 } else {
  1432                     /* Our index buffer has 16 bit indices, so we can only draw
  1433                      * 65k vertices (16k rects) at a time. */
  1434                     for (size_t i = 0; i < count; i += maxcount) {
  1435                         /* Set the vertex buffer offset for our current positions.
  1436                          * The vertex buffer itself was bound in SetDrawState. */
  1437                         [data.mtlcmdencoder setVertexBufferOffset:cmd->data.draw.first + i*sizeof(float)*8 atIndex:0];
  1438                         [data.mtlcmdencoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
  1439                                                        indexCount:SDL_min(maxcount, count - i) * 6
  1440                                                         indexType:MTLIndexTypeUInt16
  1441                                                       indexBuffer:data.mtlbufquadindices
  1442                                                 indexBufferOffset:0];
  1443                     }
  1444                 }
  1445                 break;
  1446             }
  1447 
  1448             case SDL_RENDERCMD_COPY: {
  1449                 SetCopyState(renderer, cmd, CONSTANTS_OFFSET_IDENTITY, mtlbufvertex, &statecache);
  1450                 [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
  1451                 break;
  1452             }
  1453 
  1454             case SDL_RENDERCMD_COPY_EX: {
  1455                 SetCopyState(renderer, cmd, CONSTANTS_OFFSET_INVALID, mtlbufvertex, &statecache);
  1456                 [data.mtlcmdencoder setVertexBuffer:mtlbufvertex offset:cmd->data.draw.count atIndex:3];  // transform
  1457                 [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
  1458                 break;
  1459             }
  1460 
  1461             case SDL_RENDERCMD_NO_OP:
  1462                 break;
  1463         }
  1464         cmd = cmd->next;
  1465     }
  1466 
  1467     return 0;
  1468 }}
  1469 
  1470 static int
  1471 METAL_RenderReadPixels(SDL_Renderer * renderer, const SDL_Rect * rect,
  1472                     Uint32 pixel_format, void * pixels, int pitch)
  1473 { @autoreleasepool {
  1474     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
  1475     METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionLoad, NULL, nil);
  1476 
  1477     [data.mtlcmdencoder endEncoding];
  1478     id<MTLTexture> mtltexture = data.mtlpassdesc.colorAttachments[0].texture;
  1479 
  1480 #ifdef __MACOSX__
  1481     /* on macOS with managed-storage textures, we need to tell the driver to
  1482      * update the CPU-side copy of the texture data.
  1483      * NOTE: Currently all of our textures are managed on macOS. We'll need some
  1484      * extra copying for any private textures. */
  1485     if (METAL_GetStorageMode(mtltexture) == MTLStorageModeManaged) {
  1486         id<MTLBlitCommandEncoder> blit = [data.mtlcmdbuffer blitCommandEncoder];
  1487         [blit synchronizeResource:mtltexture];
  1488         [blit endEncoding];
  1489     }
  1490 #endif
  1491 
  1492     /* Commit the current command buffer and wait until it's completed, to make
  1493      * sure the GPU has finished rendering to it by the time we read it. */
  1494     [data.mtlcmdbuffer commit];
  1495     [data.mtlcmdbuffer waitUntilCompleted];
  1496     data.mtlcmdencoder = nil;
  1497     data.mtlcmdbuffer = nil;
  1498 
  1499     MTLRegion mtlregion = MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h);
  1500 
  1501     // we only do BGRA8 or RGBA8 at the moment, so 4 will do.
  1502     const int temp_pitch = rect->w * 4;
  1503     void *temp_pixels = SDL_malloc(temp_pitch * rect->h);
  1504     if (!temp_pixels) {
  1505         return SDL_OutOfMemory();
  1506     }
  1507 
  1508     [mtltexture getBytes:temp_pixels bytesPerRow:temp_pitch fromRegion:mtlregion mipmapLevel:0];
  1509 
  1510     const Uint32 temp_format = (mtltexture.pixelFormat == MTLPixelFormatBGRA8Unorm) ? SDL_PIXELFORMAT_ARGB8888 : SDL_PIXELFORMAT_ABGR8888;
  1511     const int status = SDL_ConvertPixels(rect->w, rect->h, temp_format, temp_pixels, temp_pitch, pixel_format, pixels, pitch);
  1512     SDL_free(temp_pixels);
  1513     return status;
  1514 }}
  1515 
  1516 static void
  1517 METAL_RenderPresent(SDL_Renderer * renderer)
  1518 { @autoreleasepool {
  1519     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
  1520 
  1521     // If we don't have a command buffer, we can't present, so activate to get one.
  1522     if (data.mtlcmdencoder == nil) {
  1523         // We haven't even gotten a backbuffer yet? Clear it to black. Otherwise, load the existing data.
  1524         if (data.mtlbackbuffer == nil) {
  1525             MTLClearColor color = MTLClearColorMake(0.0f, 0.0f, 0.0f, 1.0f);
  1526             METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionClear, &color, nil);
  1527         } else {
  1528             METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionLoad, NULL, nil);
  1529         }
  1530     }
  1531 
  1532     [data.mtlcmdencoder endEncoding];
  1533     [data.mtlcmdbuffer presentDrawable:data.mtlbackbuffer];
  1534     [data.mtlcmdbuffer commit];
  1535 
  1536     data.mtlcmdencoder = nil;
  1537     data.mtlcmdbuffer = nil;
  1538     data.mtlbackbuffer = nil;
  1539 }}
  1540 
  1541 static void
  1542 METAL_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture)
  1543 { @autoreleasepool {
  1544     CFBridgingRelease(texture->driverdata);
  1545     texture->driverdata = NULL;
  1546 }}
  1547 
  1548 static void
  1549 METAL_DestroyRenderer(SDL_Renderer * renderer)
  1550 { @autoreleasepool {
  1551     if (renderer->driverdata) {
  1552         METAL_RenderData *data = CFBridgingRelease(renderer->driverdata);
  1553 
  1554         if (data.mtlcmdencoder != nil) {
  1555             [data.mtlcmdencoder endEncoding];
  1556         }
  1557 
  1558         DestroyAllPipelines(data.allpipelines, data.pipelinescount);
  1559 
  1560         SDL_Metal_DestroyView(data.mtlview);
  1561     }
  1562 
  1563     SDL_free(renderer);
  1564 }}
  1565 
  1566 static void *
  1567 METAL_GetMetalLayer(SDL_Renderer * renderer)
  1568 { @autoreleasepool {
  1569     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
  1570     return (__bridge void*)data.mtllayer;
  1571 }}
  1572 
  1573 static void *
  1574 METAL_GetMetalCommandEncoder(SDL_Renderer * renderer)
  1575 { @autoreleasepool {
  1576     METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionLoad, NULL, nil);
  1577     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
  1578     return (__bridge void*)data.mtlcmdencoder;
  1579 }}
  1580 
  1581 static SDL_Renderer *
  1582 METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
  1583 { @autoreleasepool {
  1584     SDL_Renderer *renderer = NULL;
  1585     METAL_RenderData *data = NULL;
  1586     id<MTLDevice> mtldevice = nil;
  1587     SDL_MetalView view = NULL;
  1588     CAMetalLayer *layer = nil;
  1589     SDL_SysWMinfo syswm;
  1590     Uint32 window_flags;
  1591     SDL_bool changed_window = SDL_FALSE;
  1592 
  1593     SDL_VERSION(&syswm.version);
  1594     if (!SDL_GetWindowWMInfo(window, &syswm)) {
  1595         return NULL;
  1596     }
  1597 
  1598     if (IsMetalAvailable(&syswm) == -1) {
  1599         return NULL;
  1600     }
  1601 
  1602     window_flags = SDL_GetWindowFlags(window);
  1603     if (!(window_flags & SDL_WINDOW_METAL)) {
  1604         changed_window = SDL_TRUE;
  1605         if (SDL_RecreateWindow(window, (window_flags & ~SDL_WINDOW_OPENGL) | SDL_WINDOW_METAL) < 0) {
  1606             return NULL;
  1607         }
  1608     }
  1609 
  1610     renderer = (SDL_Renderer *) SDL_calloc(1, sizeof(*renderer));
  1611     if (!renderer) {
  1612         SDL_OutOfMemory();
  1613         if (changed_window) {
  1614             SDL_RecreateWindow(window, window_flags);
  1615         }
  1616         return NULL;
  1617     }
  1618 
  1619     // !!! FIXME: MTLCopyAllDevices() can find other GPUs on macOS...
  1620     mtldevice = MTLCreateSystemDefaultDevice();
  1621 
  1622     if (mtldevice == nil) {
  1623         SDL_free(renderer);
  1624         SDL_SetError("Failed to obtain Metal device");
  1625         if (changed_window) {
  1626             SDL_RecreateWindow(window, window_flags);
  1627         }
  1628         return NULL;
  1629     }
  1630 
  1631     view = SDL_Metal_CreateView(window);
  1632 
  1633     if (view == NULL) {
  1634 #if !__has_feature(objc_arc)
  1635         [mtldevice release];
  1636 #endif
  1637         SDL_free(renderer);
  1638         if (changed_window) {
  1639             SDL_RecreateWindow(window, window_flags);
  1640         }
  1641         return NULL;
  1642     }
  1643 
  1644     // !!! FIXME: error checking on all of this.
  1645     data = [[METAL_RenderData alloc] init];
  1646 
  1647     if (data == nil) {
  1648 #if !__has_feature(objc_arc)
  1649         [mtldevice release];
  1650 #endif
  1651         SDL_Metal_DestroyView(view);
  1652         SDL_free(renderer);
  1653         if (changed_window) {
  1654             SDL_RecreateWindow(window, window_flags);
  1655         }
  1656         return NULL;
  1657     }
  1658 
  1659     renderer->driverdata = (void*)CFBridgingRetain(data);
  1660     renderer->window = window;
  1661 
  1662     data.mtlview = view;
  1663 
  1664 #ifdef __MACOSX__
  1665     layer = (CAMetalLayer *)[(NSView *)view layer];
  1666 #else
  1667     layer = (CAMetalLayer *)[(__bridge UIView *)view layer];
  1668 #endif
  1669 
  1670     layer.device = mtldevice;
  1671 
  1672     /* Necessary for RenderReadPixels. */
  1673     layer.framebufferOnly = NO;
  1674 
  1675     data.mtldevice = layer.device;
  1676     data.mtllayer = layer;
  1677     id<MTLCommandQueue> mtlcmdqueue = [data.mtldevice newCommandQueue];
  1678     data.mtlcmdqueue = mtlcmdqueue;
  1679     data.mtlcmdqueue.label = @"SDL Metal Renderer";
  1680     data.mtlpassdesc = [MTLRenderPassDescriptor renderPassDescriptor];
  1681 
  1682     NSError *err = nil;
  1683 
  1684     // The compiled .metallib is embedded in a static array in a header file
  1685     // but the original shader source code is in SDL_shaders_metal.metal.
  1686     dispatch_data_t mtllibdata = dispatch_data_create(sdl_metallib, sdl_metallib_len, dispatch_get_global_queue(0, 0), ^{});
  1687     id<MTLLibrary> mtllibrary = [data.mtldevice newLibraryWithData:mtllibdata error:&err];
  1688     data.mtllibrary = mtllibrary;
  1689     SDL_assert(err == nil);
  1690 #if !__has_feature(objc_arc)
  1691     dispatch_release(mtllibdata);
  1692 #endif
  1693     data.mtllibrary.label = @"SDL Metal renderer shader library";
  1694 
  1695     /* Do some shader pipeline state loading up-front rather than on demand. */
  1696     data.pipelinescount = 0;
  1697     data.allpipelines = NULL;
  1698     ChooseShaderPipelines(data, MTLPixelFormatBGRA8Unorm);
  1699 
  1700     MTLSamplerDescriptor *samplerdesc = [[MTLSamplerDescriptor alloc] init];
  1701 
  1702     samplerdesc.minFilter = MTLSamplerMinMagFilterNearest;
  1703     samplerdesc.magFilter = MTLSamplerMinMagFilterNearest;
  1704     id<MTLSamplerState> mtlsamplernearest = [data.mtldevice newSamplerStateWithDescriptor:samplerdesc];
  1705     data.mtlsamplernearest = mtlsamplernearest;
  1706 
  1707     samplerdesc.minFilter = MTLSamplerMinMagFilterLinear;
  1708     samplerdesc.magFilter = MTLSamplerMinMagFilterLinear;
  1709     id<MTLSamplerState> mtlsamplerlinear = [data.mtldevice newSamplerStateWithDescriptor:samplerdesc];
  1710     data.mtlsamplerlinear = mtlsamplerlinear;
  1711 
  1712     /* Note: matrices are column major. */
  1713     float identitytransform[16] = {
  1714         1.0f, 0.0f, 0.0f, 0.0f,
  1715         0.0f, 1.0f, 0.0f, 0.0f,
  1716         0.0f, 0.0f, 1.0f, 0.0f,
  1717         0.0f, 0.0f, 0.0f, 1.0f,
  1718     };
  1719 
  1720     float halfpixeltransform[16] = {
  1721         1.0f, 0.0f, 0.0f, 0.0f,
  1722         0.0f, 1.0f, 0.0f, 0.0f,
  1723         0.0f, 0.0f, 1.0f, 0.0f,
  1724         0.5f, 0.5f, 0.0f, 1.0f,
  1725     };
  1726 
  1727     /* Metal pads float3s to 16 bytes. */
  1728     float decodetransformJPEG[4*4] = {
  1729         0.0, -0.501960814, -0.501960814, 0.0, /* offset */
  1730         1.0000,  0.0000,  1.4020, 0.0,        /* Rcoeff */
  1731         1.0000, -0.3441, -0.7141, 0.0,        /* Gcoeff */
  1732         1.0000,  1.7720,  0.0000, 0.0,        /* Bcoeff */
  1733     };
  1734 
  1735     float decodetransformBT601[4*4] = {
  1736         -0.0627451017, -0.501960814, -0.501960814, 0.0, /* offset */
  1737         1.1644,  0.0000,  1.5960, 0.0,                  /* Rcoeff */
  1738         1.1644, -0.3918, -0.8130, 0.0,                  /* Gcoeff */
  1739         1.1644,  2.0172,  0.0000, 0.0,                  /* Bcoeff */
  1740     };
  1741 
  1742     float decodetransformBT709[4*4] = {
  1743         0.0, -0.501960814, -0.501960814, 0.0, /* offset */
  1744         1.0000,  0.0000,  1.4020, 0.0,        /* Rcoeff */
  1745         1.0000, -0.3441, -0.7141, 0.0,        /* Gcoeff */
  1746         1.0000,  1.7720,  0.0000, 0.0,        /* Bcoeff */
  1747     };
  1748 
  1749     id<MTLBuffer> mtlbufconstantstaging = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModeShared];
  1750     #if !__has_feature(objc_arc)
  1751     [mtlbufconstantstaging autorelease];
  1752     #endif
  1753 
  1754     char *constantdata = [mtlbufconstantstaging contents];
  1755     SDL_memcpy(constantdata + CONSTANTS_OFFSET_IDENTITY, identitytransform, sizeof(identitytransform));
  1756     SDL_memcpy(constantdata + CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM, halfpixeltransform, sizeof(halfpixeltransform));
  1757     SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_JPEG, decodetransformJPEG, sizeof(decodetransformJPEG));
  1758     SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT601, decodetransformBT601, sizeof(decodetransformBT601));
  1759     SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT709, decodetransformBT709, sizeof(decodetransformBT709));
  1760 
  1761     int quadcount = UINT16_MAX / 4;
  1762     size_t indicessize = sizeof(UInt16) * quadcount * 6;
  1763     id<MTLBuffer> mtlbufquadindicesstaging = [data.mtldevice newBufferWithLength:indicessize options:MTLResourceStorageModeShared];
  1764 #if !__has_feature(objc_arc)
  1765     [mtlbufquadindicesstaging autorelease];
  1766 #endif
  1767 
  1768     /* Quads in the following vertex order (matches the FillRects vertices):
  1769      * 1---3
  1770      * | \ |
  1771      * 0---2
  1772      */
  1773     UInt16 *indexdata = [mtlbufquadindicesstaging contents];
  1774     for (int i = 0; i < quadcount; i++) {
  1775         indexdata[i * 6 + 0] = i * 4 + 0;
  1776         indexdata[i * 6 + 1] = i * 4 + 1;
  1777         indexdata[i * 6 + 2] = i * 4 + 2;
  1778 
  1779         indexdata[i * 6 + 3] = i * 4 + 2;
  1780         indexdata[i * 6 + 4] = i * 4 + 1;
  1781         indexdata[i * 6 + 5] = i * 4 + 3;
  1782     }
  1783 
  1784     id<MTLBuffer> mtlbufconstants = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModePrivate];
  1785     data.mtlbufconstants = mtlbufconstants;
  1786     data.mtlbufconstants.label = @"SDL constant data";
  1787 
  1788     id<MTLBuffer> mtlbufquadindices = [data.mtldevice newBufferWithLength:indicessize options:MTLResourceStorageModePrivate];
  1789     data.mtlbufquadindices = mtlbufquadindices;
  1790     data.mtlbufquadindices.label = @"SDL quad index buffer";
  1791 
  1792     id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
  1793     id<MTLBlitCommandEncoder> blitcmd = [cmdbuffer blitCommandEncoder];
  1794 
  1795     [blitcmd copyFromBuffer:mtlbufconstantstaging sourceOffset:0 toBuffer:mtlbufconstants destinationOffset:0 size:CONSTANTS_LENGTH];
  1796     [blitcmd copyFromBuffer:mtlbufquadindicesstaging sourceOffset:0 toBuffer:mtlbufquadindices destinationOffset:0 size:indicessize];
  1797 
  1798     [blitcmd endEncoding];
  1799     [cmdbuffer commit];
  1800 
  1801     // !!! FIXME: force more clears here so all the drawables are sane to start, and our static buffers are definitely flushed.
  1802 
  1803     renderer->WindowEvent = METAL_WindowEvent;
  1804     renderer->GetOutputSize = METAL_GetOutputSize;
  1805     renderer->SupportsBlendMode = METAL_SupportsBlendMode;
  1806     renderer->CreateTexture = METAL_CreateTexture;
  1807     renderer->UpdateTexture = METAL_UpdateTexture;
  1808     renderer->UpdateTextureYUV = METAL_UpdateTextureYUV;
  1809     renderer->LockTexture = METAL_LockTexture;
  1810     renderer->UnlockTexture = METAL_UnlockTexture;
  1811     renderer->SetTextureScaleMode = METAL_SetTextureScaleMode;
  1812     renderer->SetRenderTarget = METAL_SetRenderTarget;
  1813     renderer->QueueSetViewport = METAL_QueueSetViewport;
  1814     renderer->QueueSetDrawColor = METAL_QueueSetDrawColor;
  1815     renderer->QueueDrawPoints = METAL_QueueDrawPoints;
  1816     renderer->QueueDrawLines = METAL_QueueDrawPoints;  // lines and points queue the same way.
  1817     renderer->QueueFillRects = METAL_QueueFillRects;
  1818     renderer->QueueCopy = METAL_QueueCopy;
  1819     renderer->QueueCopyEx = METAL_QueueCopyEx;
  1820     renderer->RunCommandQueue = METAL_RunCommandQueue;
  1821     renderer->RenderReadPixels = METAL_RenderReadPixels;
  1822     renderer->RenderPresent = METAL_RenderPresent;
  1823     renderer->DestroyTexture = METAL_DestroyTexture;
  1824     renderer->DestroyRenderer = METAL_DestroyRenderer;
  1825     renderer->GetMetalLayer = METAL_GetMetalLayer;
  1826     renderer->GetMetalCommandEncoder = METAL_GetMetalCommandEncoder;
  1827 
  1828     renderer->info = METAL_RenderDriver.info;
  1829     renderer->info.flags = (SDL_RENDERER_ACCELERATED | SDL_RENDERER_TARGETTEXTURE);
  1830 
  1831     renderer->always_batch = SDL_TRUE;
  1832 
  1833 #if defined(__MACOSX__) && defined(MAC_OS_X_VERSION_10_13)
  1834     if (@available(macOS 10.13, *)) {
  1835         data.mtllayer.displaySyncEnabled = (flags & SDL_RENDERER_PRESENTVSYNC) != 0;
  1836         if (data.mtllayer.displaySyncEnabled) {
  1837             renderer->info.flags |= SDL_RENDERER_PRESENTVSYNC;
  1838         }
  1839     } else
  1840 #endif
  1841     {
  1842         renderer->info.flags |= SDL_RENDERER_PRESENTVSYNC;
  1843     }
  1844 
  1845     /* https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
  1846     int maxtexsize = 4096;
  1847 #if defined(__MACOSX__)
  1848     maxtexsize = 16384;
  1849 #elif defined(__TVOS__)
  1850     maxtexsize = 8192;
  1851 #ifdef __TVOS_11_0
  1852     if (@available(tvOS 11.0, *)) {
  1853         if ([mtldevice supportsFeatureSet:MTLFeatureSet_tvOS_GPUFamily2_v1]) {
  1854             maxtexsize = 16384;
  1855         }
  1856     }
  1857 #endif
  1858 #else
  1859 #ifdef __IPHONE_11_0
  1860 #pragma clang diagnostic push
  1861 #pragma clang diagnostic ignored "-Wunguarded-availability-new"
  1862     if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
  1863         maxtexsize = 16384;
  1864     } else
  1865 #pragma clang diagnostic pop
  1866 #endif
  1867 #ifdef __IPHONE_10_0
  1868     if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1]) {
  1869         maxtexsize = 16384;
  1870     } else
  1871 #endif
  1872     if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily2_v2] || [mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily1_v2]) {
  1873         maxtexsize = 8192;
  1874     } else {
  1875         maxtexsize = 4096;
  1876     }
  1877 #endif
  1878 
  1879     renderer->info.max_texture_width = maxtexsize;
  1880     renderer->info.max_texture_height = maxtexsize;
  1881 
  1882 #if !__has_feature(objc_arc)
  1883     [mtlcmdqueue release];
  1884     [mtllibrary release];
  1885     [samplerdesc release];
  1886     [mtlsamplernearest release];
  1887     [mtlsamplerlinear release];
  1888     [mtlbufconstants release];
  1889     [mtlbufquadindices release];
  1890     [data release];
  1891     [mtldevice release];
  1892 #endif
  1893 
  1894     return renderer;
  1895 }}
  1896 
  1897 SDL_RenderDriver METAL_RenderDriver = {
  1898     METAL_CreateRenderer,
  1899     {
  1900         "metal",
  1901         (SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_TARGETTEXTURE),
  1902         6,
  1903         {
  1904             SDL_PIXELFORMAT_ARGB8888,
  1905             SDL_PIXELFORMAT_ABGR8888,
  1906             SDL_PIXELFORMAT_YV12,
  1907             SDL_PIXELFORMAT_IYUV,
  1908             SDL_PIXELFORMAT_NV12,
  1909             SDL_PIXELFORMAT_NV21
  1910         },
  1911     0, 0,
  1912     }
  1913 };
  1914 
  1915 #endif /* SDL_VIDEO_RENDER_METAL && !SDL_RENDER_DISABLED */
  1916 
  1917 /* vi: set ts=4 sw=4 expandtab: */