//-------------------------------------------------------------------------------------------------------------------------------------------------------------
//
// Copyright 2023 Apple Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//-------------------------------------------------------------------------------------------------------------------------------------------------------------


#define NS_PRIVATE_IMPLEMENTATION
#define CA_PRIVATE_IMPLEMENTATION
#define MTL_PRIVATE_IMPLEMENTATION
#include <Foundation/Foundation.hpp>
#include <Metal/Metal.hpp>
#include <QuartzCore/QuartzCore.hpp>

#define IR_RUNTIME_METALCPP
#define IR_PRIVATE_IMPLEMENTATION
#include <metal_irconverter_runtime/metal_irconverter_runtime.h>

#include <metal_irconverter/metal_irconverter.h>

#include <fstream>
#include <simd/simd.h>
#include <utility>
#include <variant>
#include <vector>

// Include RenderCore after metal-cpp and metal-irconverter to generate
// their implementations inline in this file.
#include "RenderCore.hpp"

#include "MathUtils.hpp"

#define NUM_ELEMS(arr) (sizeof(arr) / sizeof(arr[0]))

static constexpr size_t kInstanceRows                    = 10;
static constexpr size_t kInstanceColumns                 = 10;
static constexpr size_t kInstanceDepth                   = 10;
static constexpr size_t kNumInstances                    = (kInstanceRows * kInstanceColumns * kInstanceDepth);
static constexpr uint32_t kTextureWidth                  = 128;
static constexpr uint32_t kTextureHeight                 = 128;
static constexpr uint64_t kPerFrameBumpAllocatorCapacity = 1024; // 1 KiB

RenderCore::RenderCore(MTL::Device* pDevice, const std::string& shaderSearchPath)
    : _pDevice(pDevice->retain())
    , _angle(0.f)
    , _frame(0)
    , _animationIndex(0)
    , _aspectRatio(1.f)
    , _renderMode(RenderCore::RM_GS_TS)
    , _triangleFillMode(MTL::TriangleFillModeFill)
    , _animationFactor(1.0f)
    , _tessellationInnerValue(5.0f)
{
    _pCommandQueue = _pDevice->newCommandQueue();
    buildRenderPipelines(shaderSearchPath);
    buildComputePipelines(shaderSearchPath);
    buildDepthStencilStates();
    buildTextures();
    buildSamplers();
    buildBuffers();

    _semaphore = dispatch_semaphore_create(kMaxFramesInFlight);

    for (size_t i = 0; i < kMaxFramesInFlight; ++i)
    {
        _bufferAllocator[i] = new BumpAllocator(pDevice, kPerFrameBumpAllocatorCapacity, MTL::ResourceStorageModeShared);
    }
}

static void releaseMesh(IndexedMesh* pIndexedMesh)
{
    pIndexedMesh->pVertices->release();
    pIndexedMesh->pIndices->release();
}

static void releaseMesh(NonIndexedMesh* pNonIndexedMesh)
{
    pNonIndexedMesh->pVertices->release();
}

RenderCore::~RenderCore()
{
    _pTexture->release();
    _pSampler->release();
    _pDepthStencilState->release();

    for (int i = 0; i < kMaxFramesInFlight; ++i)
    {
        _pInstanceDataBuffer[i]->release();
    }

    for (int i = 0; i < kMaxFramesInFlight; ++i)
    {
        _pCameraDataBuffer[i]->release();
    }

    for (size_t i = 0; i < kMaxFramesInFlight; ++i)
    {
        delete _bufferAllocator[i];
    }

    _pScratch->release();
    releaseMesh(&_cubeMesh);
    releaseMesh(&_pretessellatedGrassMesh);
    releaseMesh(&_presessellatedNonIndexedGrassMesh);
    releaseMesh(&_tessellationPatchGrassMesh);
    releaseMesh(&_tessellationNonIndexedPatchGrassMesh);
    releaseMesh(&_quadMesh);
    _pTessellatorTables->release();
    _pComputeMandelbrotPipeline->release();
    _pSimpleFlatGrassPipeline->release();
    _geometryPipeline.pRenderPipelineState->release();
    _tessellationPipeline.pRenderPipelineState->release();
    _pDebugPipeline->release();
    _pInstancingPipeline->release();
    _pCommandQueue->release();
    _pDevice->release();
    dispatch_release(_semaphore);
}

namespace shader_types
{
struct InstanceData
{
    simd::float4x4 instanceTransform;
    simd::float4x4 instanceNormalTransform;
    simd::float4 instanceColor;
};

struct CameraData
{
    simd::float4x4 perspectiveTransform;
    simd::float4x4 worldTransform;
    simd::float4x3 worldNormalTransform;
};

struct MandelbrotComputeData
{
    static_assert(sizeof(float) == sizeof(uint32_t));
    float anim;
    uint32_t gridWidth;
    uint32_t gridHeight;
};

}

void RenderCore::buildRenderPipelines(const std::string& shaderSearchPath)
{
    _pInstancingPipeline      = shader_pipeline::newCubePipeline(shaderSearchPath, _pDevice);
    _pSimpleFlatGrassPipeline = shader_pipeline::newSimpleGrassPipeline(shaderSearchPath, _pDevice);
    _geometryPipeline         = shader_pipeline::newGeometryPipeline(shaderSearchPath, _pDevice);
    _tessellationPipeline     = shader_pipeline::newTessellationPipeline(shaderSearchPath, _pDevice);
}

void RenderCore::buildComputePipelines(const std::string& shaderSearchPath)
{
    _pComputeMandelbrotPipeline = shader_pipeline::newComputePipeline(shaderSearchPath, _pDevice);
}

void RenderCore::buildDepthStencilStates()
{
    MTL::DepthStencilDescriptor* pDsDesc = MTL::DepthStencilDescriptor::alloc()->init();
    pDsDesc->setDepthCompareFunction(MTL::CompareFunction::CompareFunctionLess);
    pDsDesc->setDepthWriteEnabled(true);

    _pDepthStencilState = _pDevice->newDepthStencilState(pDsDesc);

    pDsDesc->release();
}

void RenderCore::buildTextures()
{
    MTL::TextureDescriptor* pTextureDesc = MTL::TextureDescriptor::alloc()->init();
    pTextureDesc->setWidth(kTextureWidth);
    pTextureDesc->setHeight(kTextureHeight);
    pTextureDesc->setDepth(1);
    pTextureDesc->setPixelFormat(MTL::PixelFormatRGBA8Unorm);
#if IR_VERSION_MAJOR < 3
    pTextureDesc->setTextureType(MTL::TextureType2DArray);
#else
    pTextureDesc->setTextureType(MTL::TextureType2D);
#endif
    pTextureDesc->setArrayLength(1);
    pTextureDesc->setStorageMode(MTL::StorageModePrivate);
    pTextureDesc->setUsage(MTL::ResourceUsageSample | MTL::ResourceUsageRead | MTL::ResourceUsageWrite);

    MTL::Texture* pTexture = _pDevice->newTexture(pTextureDesc);
    _pTexture              = pTexture;

    pTextureDesc->release();
}

void RenderCore::buildSamplers()
{
    MTL::SamplerDescriptor* pSampDesc = MTL::SamplerDescriptor::alloc()->init()->autorelease();
    pSampDesc->setSupportArgumentBuffers(true);
    pSampDesc->setMagFilter(MTL::SamplerMinMagFilterLinear);
    pSampDesc->setMinFilter(MTL::SamplerMinMagFilterLinear);
    pSampDesc->setRAddressMode(MTL::SamplerAddressModeRepeat);
    pSampDesc->setSAddressMode(MTL::SamplerAddressModeRepeat);
    _pSampler = _pDevice->newSamplerState(pSampDesc);
    assert(_pSampler);
}

void RenderCore::buildBuffers()
{
    using simd::float2;
    using simd::float3;
    
    // Tessellation tables:
    
    uint64_t tessellationTablesSize = IRRuntimeTessellatorTablesSize();
    _pTessellatorTables = _pDevice->newBuffer( tessellationTablesSize, MTL::ResourceStorageModeShared );
    IRRuntimeLoadTessellatorTables( _pTessellatorTables );

    // Floating instanced cubes:

    {
        _cubeMesh = mesh_utils::newCubeMesh(1.0f, _pDevice);

        const size_t instanceDataSize = kMaxFramesInFlight * kNumInstances * sizeof(shader_types::InstanceData);
        for (size_t i = 0; i < kMaxFramesInFlight; ++i)
        {
            _pInstanceDataBuffer[i] = _pDevice->newBuffer(instanceDataSize, MTL::ResourceStorageModeShared);
        }

        const size_t cameraDataSize = kMaxFramesInFlight * sizeof(shader_types::CameraData);
        for (size_t i = 0; i < kMaxFramesInFlight; ++i)
        {
            _pCameraDataBuffer[i] = _pDevice->newBuffer(cameraDataSize, MTL::ResourceStorageModeShared);
        }
    }

    // The grass mesh for geometry and tessellation:

    {
        _pretessellatedGrassMesh    = mesh_utils::newHorizontalQuad(40.0f, 800, _pDevice);
        _presessellatedNonIndexedGrassMesh = mesh_utils::newNonIndexedHorizontalQuad(40.0f, 800, _pDevice);
        
        _tessellationPatchGrassMesh = mesh_utils::newHorizontalQuad(40.0f, 200, _pDevice);
        _tessellationNonIndexedPatchGrassMesh = mesh_utils::newNonIndexedHorizontalQuad(40.0f, 200, _pDevice);
        
        _quadMesh                   = mesh_utils::newHorizontalQuad(40.0f, 20, _pDevice);
    }

    // Some extra scratch memory for argument buffers:

    MTL::HeapDescriptor* pHeapDesc = MTL::HeapDescriptor::alloc()->init()->autorelease();
    pHeapDesc->setSize(4 * 1024); // 4 KiB
    pHeapDesc->setStorageMode(MTL::StorageModeShared);
    pHeapDesc->setHazardTrackingMode(MTL::HazardTrackingModeTracked);
    pHeapDesc->setType(MTL::HeapTypeAutomatic);
    _pScratch = _pDevice->newHeap(pHeapDesc);
}

void RenderCore::generateMandelbrotTexture(MTL::CommandBuffer* pCommandBuffer)
{
    assert(pCommandBuffer);

    MTL::ComputeCommandEncoder* pComputeEncoder = pCommandBuffer->computeCommandEncoder();

    pComputeEncoder->setComputePipelineState(_pComputeMandelbrotPipeline);

    MTL::Size gridSize = MTL::Size(kTextureWidth, kTextureHeight, 1);

    // Bind resources (according to root signature):
    MTL::Buffer* pUAVTable = _pScratch->newBuffer(sizeof(IRDescriptorTableEntry), MTL::ResourceStorageModeShared);

    //#-code-listing(encodeArgumentBuffers)
    IRDescriptorTableSetTexture((IRDescriptorTableEntry*)pUAVTable->contents(), _pTexture, 0, 0);
    //#-end-code-listing

    auto [pCmpData, cmpDataOffset] = _bufferAllocator[_frame]->allocate<shader_types::MandelbrotComputeData>();
    pCmpData->anim                                = _angle * 5;
    pCmpData->gridWidth                           = kTextureWidth;
    pCmpData->gridHeight                          = kTextureHeight;

    struct TopLevelAB
    {
        uint64_t computeDataAddr;
        uint64_t uavTableAddr;
    };
    auto [topLevelABContents, offset]   = _bufferAllocator[_frame]->allocate<TopLevelAB>();
    topLevelABContents->computeDataAddr = _bufferAllocator[_frame]->baseBuffer()->gpuAddress() + cmpDataOffset;
    topLevelABContents->uavTableAddr    = pUAVTable->gpuAddress();

    pComputeEncoder->useResource(_pTexture, MTL::ResourceUsageWrite | MTL::ResourceUsageRead);
    pComputeEncoder->useResource(pUAVTable, MTL::ResourceUsageRead);
    pComputeEncoder->setBuffer(_bufferAllocator[_frame]->baseBuffer(), offset, kIRArgumentBufferBindPoint);

    // Dispatch threads:
    NS::UInteger threadGroupSize = _pComputeMandelbrotPipeline->maxTotalThreadsPerThreadgroup();
    MTL::Size threadgroupSize(threadGroupSize, 1, 1);

    pComputeEncoder->dispatchThreads(gridSize, threadgroupSize);

    pComputeEncoder->endEncoding();

    pUAVTable->release();
}

void RenderCore::updateWorld()
{
    using simd::float3;
    using simd::float4;
    using simd::float4x4;

    // Update animated objects:

    _angle += (0.002f * _animationFactor);

    MTL::Buffer* pInstanceDataBuffer = _pInstanceDataBuffer[_frame];

    const float scl                           = 0.2f;
    shader_types::InstanceData* pInstanceData = reinterpret_cast<shader_types::InstanceData*>(pInstanceDataBuffer->contents());

    float3 objectPosition = { 0.f, 0.f, -10.f };

    float4x4 rt            = math::makeTranslate(objectPosition);
    float4x4 rr1           = math::makeYRotate(-_angle);
    float4x4 rr0           = math::makeXRotate(_angle * 0.5);
    float4x4 rtInv         = math::makeTranslate({ -objectPosition.x, -objectPosition.y, -objectPosition.z });
    float4x4 fullObjectRot = rt * rr1 * rr0 * rtInv;

    size_t ix = 0;
    size_t iy = 0;
    size_t iz = 0;
    for (size_t i = 0; i < kNumInstances; ++i)
    {
        if (ix == kInstanceRows)
        {
            ix = 0;
            iy += 1;
        }
        if (iy == kInstanceRows)
        {
            iy = 0;
            iz += 1;
        }

        float4x4 scale = math::makeScale((float3) { scl, scl, scl });
        float4x4 zrot  = math::makeZRotate(_angle * sinf((float)ix));
        float4x4 yrot  = math::makeYRotate(_angle * cosf((float)iy));

        float x            = ((float)ix - (float)kInstanceRows / 2.f) * (2.f * scl) + scl;
        float y            = ((float)iy - (float)kInstanceColumns / 2.f) * (2.f * scl) + scl;
        float z            = ((float)iz - (float)kInstanceDepth / 2.f) * (2.f * scl);
        float4x4 translate = math::makeTranslate(math::add(objectPosition, { x, y, z }));

        pInstanceData[i].instanceTransform       = fullObjectRot * translate * yrot * zrot * scale;
        pInstanceData[i].instanceNormalTransform = pInstanceData[i].instanceTransform;

        float iDivNumInstances         = i / (float)kNumInstances;
        float r                        = iDivNumInstances;
        float g                        = 1.0f - r;
        float b                        = sinf(M_PI * 2.0f * iDivNumInstances);
        pInstanceData[i].instanceColor = (float4) { r, g, b, 1.0f };

        ix += 1;
    }

    // Update camera state:

    MTL::Buffer* pCameraDataBuffer        = _pCameraDataBuffer[_frame];
    shader_types::CameraData* pCameraData = reinterpret_cast<shader_types::CameraData*>(pCameraDataBuffer->contents());
    pCameraData->perspectiveTransform     = math::makePerspective(45.f * M_PI / 180.f, _aspectRatio, 0.03f, 500.0f);
    pCameraData->worldTransform           = math::makeIdentity();
    pCameraData->worldNormalTransform     = math::discardTranslation(pCameraData->worldTransform);
}

void RenderCore::drawInstancedCubes(MTL::RenderCommandEncoder* pEnc)
{
    // Set up the camera data:

    struct GlobalResources
    {
        uint64_t cameraData;
        uint64_t vertexData;
        uint64_t instanceData;
        uint64_t textureTable;
        uint64_t sampler;
    };

    auto [pGlobalResources, globalResourcesOffset] = _bufferAllocator[_frame]->allocate<GlobalResources>();

    MTL::Buffer* pTextureTable = _pScratch->newBuffer(sizeof(IRDescriptorTableEntry), MTL::ResourceStorageModeShared)->autorelease();
    auto* pEntry               = (IRDescriptorTableEntry*)pTextureTable->contents();
    IRDescriptorTableSetTexture(pEntry, _pTexture, 0, 0);

    MTL::Buffer* pSamplerTable = _pScratch->newBuffer(sizeof(IRDescriptorTableEntry), MTL::ResourceStorageModeShared)->autorelease();
    pEntry                     = (IRDescriptorTableEntry*)pSamplerTable->contents();
    IRDescriptorTableSetSampler(pEntry, _pSampler, 0);

    MTL::Buffer* pCameraDataBuffer   = _pCameraDataBuffer[_frame];
    MTL::Buffer* pInstanceDataBuffer = _pInstanceDataBuffer[_frame];

    pGlobalResources->cameraData   = pCameraDataBuffer->gpuAddress();
    pGlobalResources->vertexData   = _cubeMesh.pVertices->gpuAddress();
    pGlobalResources->instanceData = pInstanceDataBuffer->gpuAddress();
    pGlobalResources->textureTable = pTextureTable->gpuAddress();
    pGlobalResources->sampler      = pSamplerTable->gpuAddress();

    MTL::Resource* indirectGlobalResources[] = {
        pCameraDataBuffer,
        pInstanceDataBuffer,
        pTextureTable,
        pSamplerTable
    };

    pEnc->useResources(indirectGlobalResources, NUM_ELEMS(indirectGlobalResources), MTL::ResourceUsageRead, MTL::RenderStageVertex | MTL::RenderStageFragment);

    // Encode the draw call:

    pEnc->setTriangleFillMode(_triangleFillMode);
    pEnc->setCullMode(MTL::CullModeBack);

    pEnc->setRenderPipelineState(_pInstancingPipeline);
    pEnc->setDepthStencilState(_pDepthStencilState);

    pEnc->setVertexBuffer(_bufferAllocator[_frame]->baseBuffer(), globalResourcesOffset, kIRArgumentBufferBindPoint);
    pEnc->setFragmentBuffer(_bufferAllocator[_frame]->baseBuffer(), globalResourcesOffset, kIRArgumentBufferBindPoint);

    MTL::Resource* indirectResources[] {
        _cubeMesh.pVertices,
        _pTexture,
    };

    pEnc->useResources(indirectResources, NUM_ELEMS(indirectResources), MTL::ResourceUsageRead, MTL::RenderStageVertex | MTL::RenderStageFragment);

    pEnc->setFrontFacingWinding(_cubeMesh.winding);

    //#-code-listing(drawCompanion)
    IRRuntimeDrawIndexedPrimitives(pEnc,
        MTL::PrimitiveTypeTriangle,
        6 * 6,
        _cubeMesh.indexType,
        _cubeMesh.pIndices,
        0,
        kNumInstances);
    //#-end-code-listing
}

void RenderCore::drawGrass(MTL::RenderCommandEncoder* pEnc)
{
    MTL::Buffer* pCameraDataBuffer = _pCameraDataBuffer[_frame];

    pEnc->setFrontFacingWinding(_pretessellatedGrassMesh.winding);

    // Encode draws:

    if (_renderMode == RenderCore::RM_NoGS_NoTS)
    {
        // Set up global camera data:
        struct CameraDataABLayout
        {
            uint64_t perspectiveTransform;
        };

        auto [pGlobalResources, globalResourcesOffset] = _bufferAllocator[_frame]->allocate<CameraDataABLayout>();
        pGlobalResources->perspectiveTransform         = pCameraDataBuffer->gpuAddress();

        // Encode the draw call:

        //#-code-listing(draw)
        pEnc->setRenderPipelineState(_pSimpleFlatGrassPipeline);
        pEnc->setVertexBuffer(_bufferAllocator[_frame]->baseBuffer(), globalResourcesOffset, kIRArgumentBufferBindPoint);
        pEnc->setVertexBuffer(_pretessellatedGrassMesh.pVertices, 0, 0);
        pEnc->drawIndexedPrimitives(MTL::PrimitiveTypeTriangle,
            _pretessellatedGrassMesh.numIndices,
            _pretessellatedGrassMesh.indexType,
            _pretessellatedGrassMesh.pIndices,
            0);
        //#-end-code-listing
    }
    else
    {
        static float frameAnim  = 0.f;
        MTL::Buffer* pFrameData = _pScratch->newBuffer(sizeof(float), MTL::ResourceStorageModeShared)->autorelease();
        float* pFrameAnim       = (float*)pFrameData->contents();
        *pFrameAnim             = frameAnim += 0.01 * _animationFactor;

        MTL::Buffer* pTessParamsBuffer = _pScratch->newBuffer(2 * sizeof(float), MTL::ResourceStorageModeShared)->autorelease();
        float* pTessParams             = (float*)pTessParamsBuffer->contents();
        pTessParams[0]                 = 1.0f;
        pTessParams[1]                 = _tessellationInnerValue;

        // Set up global camera data and animation buffers, common to both GS and TSGS pipelines:

        struct TessellationABLayout
        {
            uint64_t cameraData;
            uint64_t frameData;
            uint64_t tessellationData;
        };

        MTL::Buffer* pCameraDataBuffer               = _pCameraDataBuffer[_frame];
        auto [pMeshCameraData, meshCameraDataOffset] = _bufferAllocator[_frame]->allocate<TessellationABLayout>();

        pMeshCameraData->cameraData       = pCameraDataBuffer->gpuAddress();
        pMeshCameraData->frameData        = pFrameData->gpuAddress();
        pMeshCameraData->tessellationData = pTessParamsBuffer->gpuAddress();

        pEnc->setObjectBuffer(_bufferAllocator[_frame]->baseBuffer(), meshCameraDataOffset, kIRArgumentBufferBindPoint);
        pEnc->setMeshBuffer(_bufferAllocator[_frame]->baseBuffer(), meshCameraDataOffset, kIRArgumentBufferBindPoint);

        MTL::Resource* meshPipelineIndirectResources[] = {
            pCameraDataBuffer,
            pTessParamsBuffer,
            pFrameData
        };

        pEnc->useResources(meshPipelineIndirectResources, NUM_ELEMS(meshPipelineIndirectResources), MTL::ResourceUsageRead, MTL::RenderStageObject|MTL::RenderStageMesh);

        // Render acccording to use selection (GS or TSGS):

        if (_renderMode == RenderCore::RM_GS_NoTS || _renderMode == RenderCore::RM_GS_NoTS_NonIndexed)
        {

            // Set the vertex buffers:
            MTL::Buffer* pVertexBuffer = (_renderMode == RenderCore::RM_GS_NoTS) ? _pretessellatedGrassMesh.pVertices : _presessellatedNonIndexedGrassMesh.pVertices;
            MTL::Buffer* pIndexBuffer = (_renderMode == RenderCore::RM_GS_NoTS) ? _pretessellatedGrassMesh.pIndices : nullptr;

            auto [pVertexIn, vertexInOffset] = _bufferAllocator[_frame]->allocate<IRRuntimeVertexBuffers>();

            pVertexIn[0]->addr = pVertexBuffer->gpuAddress();
            
            uint32_t vertexBufferStride = sizeof(simd::float4);
            pVertexIn[0]->stride = vertexBufferStride;
            
            pEnc->useResource(pVertexBuffer, MTL::ResourceUsageRead, MTL::RenderStageObject);
            pEnc->setObjectBuffer(_bufferAllocator[_frame]->baseBuffer(), vertexInOffset, kIRVertexBufferBindPoint);

            // Encode the draw call:

            pEnc->setRenderPipelineState(_geometryPipeline.pRenderPipelineState);
            pEnc->useResource(pVertexBuffer, MTL::ResourceUsageRead, MTL::RenderStageObject | MTL::RenderStageMesh);
            if (pIndexBuffer) pEnc->useResource(pIndexBuffer, MTL::ResourceUsageRead, MTL::RenderStageObject | MTL::RenderStageMesh);

            if (_renderMode == RenderCore::RM_GS_NoTS)
            {
                IRRuntimeDrawIndexedPrimitivesGeometryEmulation(/* enc */ pEnc,
                                                                /* primitiveType */ IRRuntimePrimitiveTypeTriangle,
                                                                /* indexType */ _pretessellatedGrassMesh.indexType,
                                                                /* indexBuffer */ _pretessellatedGrassMesh.pIndices,
                                                                /* geometryPipelineConfig */ _geometryPipeline.pipelineConfig,
                                                                /*instanceCount*/ 1,
                                                                /* indexCountPerInstance */ _pretessellatedGrassMesh.numIndices,
                                                                /* startIndex */ 0,
                                                                /* baseVertex */ 0,
                                                                /* baseInstance */ 0);
            }
            else
            {
                assert(_renderMode == RenderCore::RM_GS_NoTS_NonIndexed);
                IRRuntimeDrawPrimitivesGeometryEmulation(/* enc */ pEnc,
                                                         /* primitiveType */ IRRuntimePrimitiveTypeTriangle,
                                                         /* geometryPipelineConfig */ _geometryPipeline.pipelineConfig,
                                                         /* instanceCount */ 1,
                                                         /* vertexCountPerInstance*/ _presessellatedNonIndexedGrassMesh.numVertices,
                                                         /* baseVertex */ 0,
                                                         /* baseInstance */ 0);
            }
        }
        else if (_renderMode == RenderCore::RM_GS_TS || _renderMode == RenderCore::RM_GS_TS_NonIndexed)
        {
            // Set the vertex buffers:
            MTL::Buffer* pVertexBuffer = ( _renderMode == RenderCore::RM_GS_TS ) ? _tessellationPatchGrassMesh.pVertices : _tessellationNonIndexedPatchGrassMesh.pVertices;
            MTL::Buffer* pIndexBuffer = ( _renderMode == RenderCore::RM_GS_TS ) ? _tessellationPatchGrassMesh.pIndices : nullptr;

            auto [pVertexIn, vertexInOffset] = _bufferAllocator[_frame]->allocate<IRRuntimeVertexBuffers>();

            (*pVertexIn)[0].addr = pVertexBuffer->gpuAddress();
            pEnc->useResource(_tessellationPatchGrassMesh.pVertices, MTL::ResourceUsageRead, MTL::RenderStageObject);

            uint32_t vertexBufferStride = sizeof(simd::float4);
            (*pVertexIn)[0].stride        = vertexBufferStride;

            pEnc->setObjectBuffer(_bufferAllocator[_frame]->baseBuffer(), vertexInOffset, kIRVertexBufferBindPoint);

            // Encode the draw call:

            pEnc->setRenderPipelineState(_tessellationPipeline.pRenderPipelineState);
            pEnc->useResource(pVertexBuffer, MTL::ResourceUsageRead, MTL::RenderStageObject | MTL::RenderStageMesh);
            if (pIndexBuffer) pEnc->useResource(pIndexBuffer, MTL::ResourceUsageRead, MTL::RenderStageObject | MTL::RenderStageMesh);

            pEnc->setObjectBuffer(_bufferAllocator[_frame]->baseBuffer(), meshCameraDataOffset, kIRArgumentBufferHullDomainBindPoint);
            pEnc->setMeshBuffer(_bufferAllocator[_frame]->baseBuffer(), meshCameraDataOffset, kIRArgumentBufferHullDomainBindPoint);

            // Work around the validation error:
            auto [pZero, zeroOffset] = _bufferAllocator[_frame]->allocate<uint64_t>();
            *pZero                   = 0ul;
            pEnc->setObjectBuffer(_bufferAllocator[_frame]->baseBuffer(), zeroOffset, kIRDescriptorHeapBindPoint);
            pEnc->setObjectBuffer(_bufferAllocator[_frame]->baseBuffer(), zeroOffset, kIRSamplerHeapBindPoint);
            pEnc->setMeshBuffer(_bufferAllocator[_frame]->baseBuffer(), zeroOffset, kIRDescriptorHeapBindPoint);
            pEnc->setMeshBuffer(_bufferAllocator[_frame]->baseBuffer(), zeroOffset, kIRSamplerHeapBindPoint);
            
            // Bind tessellator tables:
            pEnc->setObjectBuffer(_pTessellatorTables, 0, kIRRuntimeTessellatorTablesBindPoint);
            pEnc->setMeshBuffer(_pTessellatorTables, 0, kIRRuntimeTessellatorTablesBindPoint);

            if ( _renderMode == RenderCore::RM_GS_TS )
            {
                // Indexed draw call:
                IRRuntimeDrawIndexedPatchesTessellationEmulation(/* enc */ pEnc,
                                                                 /* primitiveTopology */ IRRuntimePrimitiveType3ControlPointPatchlist,
                                                                 /* indexType */ _tessellationPatchGrassMesh.indexType,
                                                                 /* indexBuffer */ _tessellationPatchGrassMesh.pIndices,
                                                                 /* tessellationPipelineConfig*/ _tessellationPipeline.pipelineConfig,
                                                                 /* instanceCount */ 1,
                                                                 /* indexCountPerInstance */ _tessellationPatchGrassMesh.numIndices,
                                                                 /* baseInstance */ 0,
                                                                 /* baseVertex */ 0,
                                                                 /* startIndex */ 0);
            }
            else
            {
                assert( _renderMode == RenderCore::RM_GS_TS_NonIndexed );
                
                // Non-indexed draw call:
                IRRuntimeDrawPatchesTessellationEmulation(/* enc */ pEnc,
                                                          /* primitiveTopology */ IRRuntimePrimitiveType3ControlPointPatchlist,
                                                          /* tessellationPipelineConfig */ _tessellationPipeline.pipelineConfig,
                                                          /* instanceCount*/ 1,
                                                          /* vertexCountPerInstance*/ _tessellationNonIndexedPatchGrassMesh.numVertices,
                                                          /* baseInstance*/ 0,
                                                          /* baseVertex */ 0);
            }
        }
    }
}

void RenderCore::draw(MTL::RenderPassDescriptor* pRenderPass, CA::MetalDrawable* pDrawable)
{

    NS::AutoreleasePool* pPool = NS::AutoreleasePool::alloc()->init();

    _frame = (_frame + 1) % kMaxFramesInFlight;

    // Wait for the signal to start encoding the next frame.
    dispatch_semaphore_wait(_semaphore, DISPATCH_TIME_FOREVER);
    RenderCore* pRenderCore = this;

    // Reset the bump allocator for this new frame.
    _bufferAllocator[_frame]->reset();

    MTL::CommandBuffer* pCmd = _pCommandQueue->commandBuffer();
    pCmd->addCompletedHandler(^void(MTL::CommandBuffer* pCmd) {
        dispatch_semaphore_signal(pRenderCore->_semaphore);
    });

    // Update buffer data to produce the rotating cubes.
    updateWorld();

    // Update the texture:
    generateMandelbrotTexture(pCmd);

    // Encode the render pass:

    MTL::RenderPassDescriptor* pRpd = pRenderPass;
    MTL::RenderCommandEncoder* pEnc = pCmd->renderCommandEncoder(pRpd);

    drawInstancedCubes(pEnc);
    drawGrass(pEnc);

    pEnc->endEncoding();
    pCmd->presentDrawable(pDrawable);
    pCmd->commit();

    pPool->release();
}

void RenderCore::resizeDrawable(float width, float height)
{
    _aspectRatio = width / height;
}
