//-------------------------------------------------------------------------------------------------------------------------------------------------------------
//
// Copyright 2023 Apple Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//-------------------------------------------------------------------------------------------------------------------------------------------------------------


#define NS_PRIVATE_IMPLEMENTATION
#define CA_PRIVATE_IMPLEMENTATION
#define MTL_PRIVATE_IMPLEMENTATION
#include <Foundation/Foundation.hpp>
#include <Metal/Metal.hpp>
#include <QuartzCore/QuartzCore.hpp>

#define IR_RUNTIME_METALCPP
#define IR_PRIVATE_IMPLEMENTATION
#include <metal_irconverter_runtime/metal_irconverter_runtime.h>

#include <metal_irconverter/metal_irconverter.h>

#include <fstream>
#include <simd/simd.h>
#include <utility>
#include <variant>
#include <vector>

// Include RenderCore after metal-cpp and metal-irconverter to generate
// their implementations inline in this file.
#include "RenderCore.hpp"

#include "MathUtils.hpp"

#define NUM_ELEMS(arr) (sizeof(arr) / sizeof(arr[0]))

static constexpr uint32_t kTextureWidth                  = 800 * 2;
static constexpr uint32_t kTextureHeight                 = 600 * 2;
static constexpr uint64_t kPerFrameBumpAllocatorCapacity = 1024; // 1 KiB

RenderCore::RenderCore(MTL::Device* pDevice, const std::string& shaderSearchPath)
    : _pDevice(pDevice->retain())
    , _frame(0)
{
    _pCommandQueue = _pDevice->newCommandQueue();
    buildRenderPipelines(shaderSearchPath);
    buildComputePipelines(shaderSearchPath);
    buildShaderTables();
    buildTextures();
    buildSamplers();
    buildBuffers();

    _semaphore = dispatch_semaphore_create(kMaxFramesInFlight);

    for (size_t i = 0; i < kMaxFramesInFlight; ++i)
    {
        _bufferAllocator[i] = new BumpAllocator(pDevice, kPerFrameBumpAllocatorCapacity, MTL::ResourceStorageModeShared);
    }
}

static void releaseMesh(IndexedMesh* pIndexedMesh)
{
    pIndexedMesh->pVertices->release();
    pIndexedMesh->pIndices->release();
}

RenderCore::~RenderCore()
{
    _pTriangleTexture->release();
    _pSampler->release();

    for (int i = 0; i < kMaxFramesInFlight; ++i)
    {
        _pInstanceDataBuffer[i]->release();
    }

    for (int i = 0; i < kMaxFramesInFlight; ++i)
    {
        _pCameraDataBuffer[i]->release();
    }

    for (size_t i = 0; i < kMaxFramesInFlight; ++i)
    {
        delete _bufferAllocator[i];
    }

    _pPresentPipeline->release();
    releaseAccelerationStructureWithResources(&_triangleSphereAccelStructure);
    
    _pTriangleSphereSBT->release();
    _triangleSphereRTPipelineContext.pIFT->release();
    _triangleSphereRTPipelineContext.pVFT->release();
    _triangleSphereRTPipelineContext.pRTPSO->release();
    
    _pScratch->release();
    releaseMesh(&_cubeMesh);
    releaseMesh(&_pretessellatedGrassMesh);
    releaseMesh(&_tessellationPatchGrassMesh);
    releaseMesh(&_quadMesh);
    releaseMesh(&_screenMesh);
    _pCommandQueue->release();
    _pDevice->release();
    dispatch_release(_semaphore);
}

void RenderCore::buildRenderPipelines(const std::string& shaderSearchPath)
{
    _pPresentPipeline = shader_pipeline::newPresentPipeline(shaderSearchPath, _pDevice);
    assert(_pPresentPipeline);
}

void RenderCore::buildComputePipelines(const std::string& shaderSearchPath)
{
    _triangleSphereRTPipelineContext = shader_pipeline::newTriangleSphereRTPipeline(shaderSearchPath, _pDevice);
    assert(_triangleSphereRTPipelineContext.pRTPSO);
    assert(_triangleSphereRTPipelineContext.pVFT);
    assert(_triangleSphereRTPipelineContext.pIFT);
    
}

struct ShaderRecord {
    IRShaderIdentifier shaderIdentifier;
    /* no local root signature */
};

struct ShaderRecordWithData {
    IRShaderIdentifier shaderIdentifier;
    float lrsData; // local root signature data
};

struct TriangleSphereSBT
{
    ShaderRecord         rayGenRecord;
    ShaderRecordWithData leftTriangle;
    ShaderRecordWithData rightTriangle;
    ShaderRecordWithData leftSphere;
    ShaderRecordWithData rightSphere;
    ShaderRecord         missRecord;
};

void RenderCore::buildShaderTables()
{
    assert(_triangleSphereRTPipelineContext.pRTPSO && "This function must be called after buildComputePipelines");
    
    // A shader table consists of shader records.
    // A shader record contains shader identifiers and local root signature data.
    // A shader identifier (IRShaderIdentifier) contains an index into the visible function table for raygen, miss, callable shaders.
    // For closest hit shaders, the shader identifier additionally references an intersection function (index into the intersection function table)
    
    const size_t sbtSize = sizeof(TriangleSphereSBT);
    _pTriangleSphereSBT = _pDevice->newBuffer(sbtSize, MTL::ResourceStorageModeShared);
    auto* shaderRecords = (TriangleSphereSBT *)(_pTriangleSphereSBT->contents());
    
    // ray gen
    IRShaderIdentifierInit(&(shaderRecords->rayGenRecord.shaderIdentifier), kTriangleRayGenIndex);
    
    // left triangle hit group with a closest hit shader
    IRShaderIdentifierInit(&(shaderRecords->leftTriangle.shaderIdentifier), kTriangleClosestHitIndex);
    shaderRecords->leftTriangle.lrsData = 1.0f;
    
    // right triangle hit group with a closest hit shader
    IRShaderIdentifierInitWithCustomIntersection(&(shaderRecords->rightTriangle.shaderIdentifier), kTriangleClosestHitIndex, kTriangleAnyHitIndex);
    shaderRecords->rightTriangle.lrsData = 1.0f;
    
    // left sphere hit group with a custom intersection
    IRShaderIdentifierInitWithCustomIntersection(&(shaderRecords->leftSphere.shaderIdentifier), kSphereClosestHitIndex, kSphereIntersectionIndex);
    shaderRecords->leftSphere.lrsData = 0.60f;
    
    // right sphere hit group with a custom intersection and an anyhit shader
    IRShaderIdentifierInitWithCustomIntersection(&(shaderRecords->rightSphere.shaderIdentifier), kSphereClosestHitIndex, kSphereIntersectionAnyHitIndex);
    shaderRecords->rightSphere.lrsData = 0.95f;
    
    // miss record
    IRShaderIdentifierInit(&(shaderRecords->missRecord.shaderIdentifier), kSphereMissIndex);
    
}

void RenderCore::buildTextures()
{
    NS::SharedPtr<MTL::TextureDescriptor> pTextureDesc = NS::TransferPtr(MTL::TextureDescriptor::alloc()->init());
    pTextureDesc->setWidth(kTextureWidth);
    pTextureDesc->setHeight(kTextureHeight);
    pTextureDesc->setDepth(1);
    pTextureDesc->setPixelFormat(MTL::PixelFormatBGRA8Unorm);
#if IR_VERSION_MAJOR < 3
    pTextureDesc->setTextureType(MTL::TextureType2DArray);
#else
    pTextureDesc->setTextureType(MTL::TextureType2D);
#endif
    pTextureDesc->setArrayLength(1);
    pTextureDesc->setStorageMode(MTL::StorageModePrivate);
    pTextureDesc->setUsage(MTL::ResourceUsageSample | MTL::ResourceUsageRead | MTL::ResourceUsageWrite);

    _pTriangleTexture = _pDevice->newTexture(pTextureDesc.get());
    _pSphereTexture   = _pDevice->newTexture(pTextureDesc.get());
}

void RenderCore::buildSamplers()
{
    MTL::SamplerDescriptor* pSampDesc = MTL::SamplerDescriptor::alloc()->init()->autorelease();
    pSampDesc->setSupportArgumentBuffers(true);
    pSampDesc->setMagFilter(MTL::SamplerMinMagFilterLinear);
    pSampDesc->setMinFilter(MTL::SamplerMinMagFilterLinear);
    pSampDesc->setRAddressMode(MTL::SamplerAddressModeRepeat);
    pSampDesc->setSAddressMode(MTL::SamplerAddressModeRepeat);
    _pSampler = _pDevice->newSamplerState(pSampDesc);
    assert(_pSampler);
}

void RenderCore::buildBuffers()
{
    using simd::float2;
    using simd::float3;
    
    // Screen mesh helps present the ray traced objects on screen:
    _screenMesh = mesh_utils::newScreenQuad(_pDevice);

    // Some extra scratch memory for argument buffers:

    MTL::HeapDescriptor* pHeapDesc = MTL::HeapDescriptor::alloc()->init()->autorelease();
    pHeapDesc->setSize(4 * 1024); // 4 KiB
    pHeapDesc->setStorageMode(MTL::StorageModeShared);
    pHeapDesc->setHazardTrackingMode(MTL::HazardTrackingModeTracked);
    pHeapDesc->setType(MTL::HeapTypeAutomatic);
    _pScratch = _pDevice->newHeap(pHeapDesc);
    
    // Build scene data
    
    auto newTriangleDescriptor = [&pDevice=_pDevice]() -> MTL::AccelerationStructureTriangleGeometryDescriptor* {
        
        float3 vertexData[] = {
            { -1.0, -1.0, -1.5 },
            { +1.0, -1.0, -1.5 },
            {  0.0, +1.0, -1.5 }
        };
        
        uint16_t indexData[] = {
            0, 1, 2
        };
        
        auto vertexBuffer = NS::TransferPtr(pDevice->newBuffer(sizeof(vertexData), MTL::ResourceStorageModeShared));
        auto indexBuffer = NS::TransferPtr(pDevice->newBuffer(sizeof(indexData), MTL::ResourceStorageModeShared));
        
        memcpy(vertexBuffer->contents(), vertexData, sizeof(vertexData));
        memcpy(indexBuffer->contents(), indexData, sizeof(indexData));
        
        auto pGeometryDescriptor = MTL::AccelerationStructureTriangleGeometryDescriptor::alloc()->init();
        
        pGeometryDescriptor->setVertexBuffer(vertexBuffer.get());
        pGeometryDescriptor->setVertexBufferOffset(0);
        pGeometryDescriptor->setVertexStride(sizeof(float3));
        
        pGeometryDescriptor->setIndexBuffer(indexBuffer.get());
        pGeometryDescriptor->setIndexBufferOffset(0);
        pGeometryDescriptor->setIndexType(MTL::IndexTypeUInt16);
        
        pGeometryDescriptor->setTriangleCount(NUM_ELEMS(indexData)/3);
        pGeometryDescriptor->setIntersectionFunctionTableOffset(0);
        
        return pGeometryDescriptor;
    };
    
    auto newBoundingBoxDescriptor = [&pDevice=_pDevice]() -> MTL::AccelerationStructureBoundingBoxGeometryDescriptor* {
        
        MTL::AxisAlignedBoundingBox aabbData[] = {
            { MTL::PackedFloat3(-1.0, -1.0, -1.0), MTL::PackedFloat3(+1.0, +1.0, +1.0) }
        };
        
        auto vertexBuffer = NS::TransferPtr(pDevice->newBuffer(sizeof(aabbData), MTL::ResourceStorageModeShared));
        memcpy(vertexBuffer->contents(), aabbData, sizeof(aabbData));
        
        auto pGeometryDescriptor = MTL::AccelerationStructureBoundingBoxGeometryDescriptor::alloc()->init();
        
        pGeometryDescriptor->setBoundingBoxBuffer(vertexBuffer.get());
        pGeometryDescriptor->setBoundingBoxBufferOffset(0);
        pGeometryDescriptor->setBoundingBoxStride(sizeof(MTL::AxisAlignedBoundingBox));
        
        pGeometryDescriptor->setBoundingBoxCount(NUM_ELEMS(aabbData));
        pGeometryDescriptor->setIntersectionFunctionTableOffset(0);
        
        return pGeometryDescriptor;
    };
    
    auto* pTriangleDescriptor = newTriangleDescriptor()->autorelease();
    NS::Array* pTriangleGeometries = ((NS::Array *)CFArrayCreate(CFAllocatorGetDefault(), (const void**)&pTriangleDescriptor, 1, &kCFTypeArrayCallBacks))->autorelease();
    
    std::vector<MTL::AccelerationStructureInstanceDescriptor> triangleInstances{
        MTL::AccelerationStructureInstanceDescriptor{
            .accelerationStructureIndex = 0,
            .intersectionFunctionTableOffset = 0, // this is an opaque instance, so it uses the built-in Metal triangle intersection
            .mask = 0xFF,
            .options = MTL::AccelerationStructureInstanceOptionOpaque,
            .transformationMatrix = MTL::PackedFloat4x3({ 1, 0, 0 },       /* col 0 */
                                                        { 0, 1, 0 },       /* col 1 */
                                                        { 0, 0, 1 },       /* col 2 */
                                                        { -1.5, 1.5, -3 }) /* col 3 */
        },
        MTL::AccelerationStructureInstanceDescriptor{
            .accelerationStructureIndex = 0,
            .intersectionFunctionTableOffset = kSBTTriangleIntersectionFunctionIndex, // Use indirect triangle intersection function
            .mask = 0xFF,
            .options = MTL::AccelerationStructureInstanceOptionDisableTriangleCulling,
            .transformationMatrix = MTL::PackedFloat4x3({ 1, 0, 0 },       /* col 0 */
                                                        { 0, 1, 0 },       /* col 1 */
                                                        { 0, 0, 1 },       /* col 2 */
                                                        { 1.5, 1.5, -3 })  /* col 3 */
            
        }
    };
    
    auto* pSphereDescriptor = newBoundingBoxDescriptor()->autorelease();
    auto pSphereGeometries = ((NS::Array *)CFArrayCreate(CFAllocatorGetDefault(), (const void**)&pSphereDescriptor, 1, &kCFTypeArrayCallBacks))->autorelease();
    
    std::vector<MTL::AccelerationStructureInstanceDescriptor> sphereInstances{
        MTL::AccelerationStructureInstanceDescriptor{
            .accelerationStructureIndex = 1,
            .intersectionFunctionTableOffset = kSBTBoxIntersectionFunctionIndex, // Use indirect sphere intersection function
            .mask = 0xFF,
            .options = MTL::AccelerationStructureInstanceOptionNone,
            .transformationMatrix = MTL::PackedFloat4x3({1, 0, 0},    /* col 0 */
                                                        {0, 1, 0},    /* col 1 */
                                                        {0, 0, 1},    /* col 2 */
                                                        {-1.5, -1.5, -4.5}) /* col 3 */
        },
        MTL::AccelerationStructureInstanceDescriptor{
            .accelerationStructureIndex = 1,
            .intersectionFunctionTableOffset = kSBTBoxIntersectionFunctionIndex, // Use indirect sphere intersection function
            .mask = 0xFF,
            .options = MTL::AccelerationStructureInstanceOptionNone,
            .transformationMatrix = MTL::PackedFloat4x3({1, 0, 0},
                                                        {0, 1, 0},
                                                        {0, 0, 1},
                                                        {+1.5, -1.5, -4.5})
        }
    };
    
    MTL::AccelerationStructure* pTrianglePrimStructure = newPrimitiveAccelerationStructure(_pDevice,
                                                                                           _pCommandQueue,
                                                                                           pTriangleGeometries)->autorelease();
    
    MTL::AccelerationStructure* pSpherePrimStructure = newPrimitiveAccelerationStructure(_pDevice,
                                                                                         _pCommandQueue,
                                                                                         pSphereGeometries)->autorelease();
    
    std::vector<MTL::AccelerationStructure*> bottomLevelStrucutres {
        pTrianglePrimStructure,
        pSpherePrimStructure
    };
    
    
    std::vector<MTL::AccelerationStructureInstanceDescriptor> triangleAndSphereInstances;
    triangleAndSphereInstances.insert(triangleAndSphereInstances.end(), triangleInstances.begin(), triangleInstances.end());
    triangleAndSphereInstances.insert(triangleAndSphereInstances.end(), sphereInstances.begin(), sphereInstances.end());
    
    MTL::AccelerationStructure* pTriangleSphereInstanceStructure = newInstanceAcceleartionStructure(_pDevice,
                                                                                                    _pCommandQueue,
                                                                                                    bottomLevelStrucutres,
                                                                                                    triangleAndSphereInstances);
    
    NS::Array* pIndirectResources = (NS::Array *)CFArrayCreate(CFAllocatorGetDefault(),
                                                               (const void **)&(bottomLevelStrucutres[0]),
                                                               bottomLevelStrucutres.size(),
                                                               &kCFTypeArrayCallBacks);
    
    _triangleSphereAccelStructure = AccelerationStructureWithResources{
        .pAccelStructure = pTriangleSphereInstanceStructure,
        .pIndirectResources = pIndirectResources
    };
}

void RenderCore::raytrace(MTL::CommandBuffer* pCommandBuffer)
{
    assert(pCommandBuffer);

    MTL::ComputeCommandEncoder* pComputeEncoder = pCommandBuffer->computeCommandEncoder();

    pComputeEncoder->setComputePipelineState(_triangleSphereRTPipelineContext.pRTPSO);


    // Bind resources (according to root signature):
    
    MTL::Buffer* pUAVTable = _pScratch->newBuffer(sizeof(IRDescriptorTableEntry), MTL::ResourceStorageModeShared)->autorelease();
    IRDescriptorTableSetTexture((IRDescriptorTableEntry*)pUAVTable->contents(), _pSphereTexture, 0, 0);

    // Encode acceleration structure into its header with instance contributions:
    
    std::vector<uint32_t> instanceContributions{ 0, 1, 2, 3 };
    
    NS::UInteger headerSize = sizeof(IRRaytracingAccelerationStructureGPUHeader) + sizeof(uint32_t) * instanceContributions.size();
    MTL::Buffer* pAccelStructureHdrBuffer = _pDevice->newBuffer(headerSize, MTL::ResourceStorageModeShared)->autorelease();
    
    IRRaytracingSetAccelerationStructure((uint8_t *)pAccelStructureHdrBuffer->contents(),
                                         _triangleSphereAccelStructure.pAccelStructure->gpuResourceID(),
                                         (uint8_t *)pAccelStructureHdrBuffer->contents() + sizeof(IRRaytracingAccelerationStructureGPUHeader),
                                         instanceContributions.data(), instanceContributions.size());
    
    auto pHdr = (IRRaytracingAccelerationStructureGPUHeader *)(pAccelStructureHdrBuffer->contents());
    pHdr->addressOfInstanceContributions = pAccelStructureHdrBuffer->gpuAddress() + sizeof(IRRaytracingAccelerationStructureGPUHeader);

    // Encode top-level argument buffer:
    
    struct TopLevelAB
    {
        uint64_t accelStructureHeaderAddr;
        uint64_t uavTableAddr;
    };
    
    auto [topLevelABContents, offset]   = _bufferAllocator[_frame]->allocate<TopLevelAB>();
    
    topLevelABContents->accelStructureHeaderAddr = pAccelStructureHdrBuffer->gpuAddress();
    topLevelABContents->uavTableAddr    = pUAVTable->gpuAddress();

    // Make resident indirect resources:
    
    std::vector<MTL::Resource*> indirectROResources {
        _triangleSphereAccelStructure.pAccelStructure,
        pAccelStructureHdrBuffer,
        pUAVTable,
        _pTriangleSphereSBT,
        _triangleSphereRTPipelineContext.pIFT,
        _triangleSphereRTPipelineContext.pVFT,
        _bufferAllocator[_frame]->baseBuffer()
    };
    
    for ( size_t i = 0; i < _triangleSphereAccelStructure.pIndirectResources->count(); ++i )
    {
        indirectROResources.push_back((MTL::Resource*)(_triangleSphereAccelStructure.pIndirectResources->object(i)));
    }
    
    pComputeEncoder->useResources(indirectROResources.data(), indirectROResources.size(), MTL::ResourceUsageRead);
    pComputeEncoder->useResource(_pSphereTexture, MTL::ResourceUsageWrite);
    
    // Prepare ray dispatch
    
    /*
     ShaderRecord rayGenRecord;
     ShaderRecord leftTriangle;
     ShaderRecord rightTriangle;
     ShaderRecordWithData leftSphere;
     ShaderRecordWithData rightSphere;
     ShaderRecord missRecord;
     */
    constexpr uint64_t hgSBTOffset   = offsetof(TriangleSphereSBT, leftTriangle);
    constexpr uint64_t missSBTOffset = offsetof(TriangleSphereSBT, missRecord);
    
    
    IRDispatchRaysDescriptor dispatchRaysDesc;
    dispatchRaysDesc.RayGenerationShaderRecord = {
        .StartAddress = _pTriangleSphereSBT->gpuAddress(),
        .SizeInBytes = sizeof(ShaderRecord)
    };
    dispatchRaysDesc.HitGroupTable = {
        .StartAddress = _pTriangleSphereSBT->gpuAddress() + hgSBTOffset,
        .SizeInBytes = (missSBTOffset - hgSBTOffset),       // size of the hitgroup table
        .StrideInBytes = sizeof(ShaderRecordWithData)       // stride between shader records in the hitgroup table
    };
    dispatchRaysDesc.MissShaderTable = {
        .StartAddress = _pTriangleSphereSBT->gpuAddress() + missSBTOffset,
        .SizeInBytes = sizeof(ShaderRecord),
        .StrideInBytes = sizeof(ShaderRecord)
    };
    dispatchRaysDesc.CallableShaderTable = {
        .StartAddress = 0,
        .SizeInBytes = 0,
        .StrideInBytes = 0
    };
    dispatchRaysDesc.Width = kTextureWidth;
    dispatchRaysDesc.Height = kTextureHeight;
    dispatchRaysDesc.Depth = 1;
    
    
    IRDispatchRaysArgument dispatchRaysArgs;
    dispatchRaysArgs.DispatchRaysDesc          = dispatchRaysDesc;
    dispatchRaysArgs.GRS                       = _bufferAllocator[_frame]->baseBuffer()->gpuAddress() + offset;
    dispatchRaysArgs.ResDescHeap               = 0;
    dispatchRaysArgs.SmpDescHeap               = 0;
    dispatchRaysArgs.VisibleFunctionTable      = _triangleSphereRTPipelineContext.pVFT->gpuResourceID();
    dispatchRaysArgs.IntersectionFunctionTable = _triangleSphereRTPipelineContext.pIFT->gpuResourceID();
    
    pComputeEncoder->setBytes(&dispatchRaysArgs, sizeof(IRDispatchRaysArgument), kIRRayDispatchArgumentsBindPoint);

    // Dispatch threads:
    NS::UInteger threadGroupSize = _triangleSphereRTPipelineContext.pRTPSO->maxTotalThreadsPerThreadgroup();
    MTL::Size threadgroupSize(threadGroupSize, 1, 1);

    MTL::Size gridSize = MTL::Size(kTextureWidth, kTextureHeight, 1);
    pComputeEncoder->dispatchThreads(gridSize, threadgroupSize);

    pComputeEncoder->endEncoding();
}

void RenderCore::presentTexture(MTL::RenderCommandEncoder* pRenderEnc, MTL::Texture* pTexture)
{
    struct PresentTLAB
    {
        uint64_t srvTable;
        uint64_t smpTable;
    };
    
    auto [srvPtr, srvOff] = _bufferAllocator[_frame]->allocate<IRDescriptorTableEntry>();
    auto [smpPtr, smpOff] = _bufferAllocator[_frame]->allocate<IRDescriptorTableEntry>();
    
    IRDescriptorTableSetTexture(srvPtr, pTexture, 0, 0);
    IRDescriptorTableSetSampler(smpPtr, _pSampler, 0);
    
    auto [presentTlabPtr, presentTlabOff] = _bufferAllocator[_frame]->allocate<PresentTLAB>();
    presentTlabPtr->srvTable = _bufferAllocator[_frame]->baseBuffer()->gpuAddress() + srvOff;
    presentTlabPtr->smpTable = _bufferAllocator[_frame]->baseBuffer()->gpuAddress() + smpOff;
    
    pRenderEnc->useResource(pTexture, MTL::ResourceUsageRead);
    pRenderEnc->setVertexBuffer(_screenMesh.pVertices, 0, kIRVertexBufferBindPoint);
    pRenderEnc->setFragmentBuffer(_bufferAllocator[_frame]->baseBuffer(), presentTlabOff, kIRArgumentBufferBindPoint);
    
    pRenderEnc->drawIndexedPrimitives(MTL::PrimitiveTypeTriangle, _screenMesh.numIndices, _screenMesh.indexType, _screenMesh.pIndices, 0);
}

void RenderCore::draw(MTL::RenderPassDescriptor* pRenderPass, CA::MetalDrawable* pDrawable)
{
    NS::AutoreleasePool* pPool = NS::AutoreleasePool::alloc()->init();

//#define CAPTURE
#ifdef CAPTURE
    MTL::CaptureDescriptor* pCapDesc = MTL::CaptureDescriptor::alloc()->init()->autorelease();
    pCapDesc->setDestination(MTL::CaptureDestinationDeveloperTools);
    pCapDesc->setCaptureObject(_pDevice);
    
    NS::Error* pError = nullptr;
    MTL::CaptureManager* pCapMan = MTL::CaptureManager::sharedCaptureManager();
    if (!pCapMan->startCapture(pCapDesc, &pError))
    {
        printf("%s\n", pError->localizedDescription()->utf8String());
        __builtin_trap();
    }
#endif

    _frame = (_frame + 1) % kMaxFramesInFlight;

    // Wait for the signal to start encoding the next frame.
    dispatch_semaphore_wait(_semaphore, DISPATCH_TIME_FOREVER);
    RenderCore* pRenderCore = this;

    // Reset the bump allocator for this new frame.
    _bufferAllocator[_frame]->reset();

    MTL::CommandBuffer* pCmd = _pCommandQueue->commandBuffer();
    pCmd->addCompletedHandler(^void(MTL::CommandBuffer* pCmd) {
        dispatch_semaphore_signal(pRenderCore->_semaphore);
    });

    // Ray trace triangles and spheres:
    raytrace(pCmd);
    
    // Present the textures by blending them onto the drawable:
    MTL::RenderCommandEncoder* pRenderEnc = pCmd->renderCommandEncoder(pRenderPass);
    
    pRenderEnc->setRenderPipelineState(_pPresentPipeline);
    presentTexture(pRenderEnc, _pSphereTexture);
    
    pRenderEnc->endEncoding();
    
    
    pCmd->presentDrawable(pDrawable);
    pCmd->commit();

#ifdef CAPTURE
    pCapMan->stopCapture();
#endif
    
    pPool->release();
}

void RenderCore::resizeDrawable(float width, float height)
{
    // Handle resizing events, such as creating a new texture into which to trace rays
}
