From 3d206fd4c772628fe92e95d6e4526170df09900c Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 25 Sep 2025 17:22:18 +0200 Subject: [PATCH 01/10] RWMC setup --- .../app_resources/hlsl/pathtracer.hlsl | 59 +++++-- .../app_resources/hlsl/render.comp.hlsl | 14 +- .../app_resources/hlsl/render_common.hlsl | 1 + .../app_resources/hlsl/reweighting.hlsl | 38 ++++ 31_HLSLPathTracer/main.cpp | 163 +++++++++++++++--- 5 files changed, 231 insertions(+), 44 deletions(-) create mode 100644 31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index f5d5206dc..e65cd950e 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -265,6 +265,29 @@ struct Unidirectional // #endif } + measure_type getSingleSampleMeasure(uint32_t sampleID, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) + { + vector3_type uvw = rand3d(0u, sampleID, randGen.rng()); // TODO: take from scramblebuf? + ray_type ray = rayGen.generate(uvw); + + // bounces + bool hit = true; + bool rayAlive = true; + for (int d = 1; (d <= depth) && hit && rayAlive; d += 2) + { + ray.intersectionT = numeric_limits::max; + ray.objectID = intersector_type::traceRay(ray, scene); + + hit = ray.objectID.id != -1; + if (hit) + rayAlive = closestHitProgram(1, sampleID, ray, scene); + } + if (!hit) + missProgram(ray); + + return ray.payload.accumulation; + } + // Li measure_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) { @@ -272,25 +295,7 @@ struct Unidirectional scalar_type meanLumaSq = 0.0; for (uint32_t i = 0; i < numSamples; i++) { - vector3_type uvw = rand3d(0u, i, randGen.rng()); // TODO: take from scramblebuf? - ray_type ray = rayGen.generate(uvw); - - // bounces - bool hit = true; - bool rayAlive = true; - for (int d = 1; (d <= depth) && hit && rayAlive; d += 2) - { - ray.intersectionT = numeric_limits::max; - ray.objectID = intersector_type::traceRay(ray, scene); - - hit = ray.objectID.id != -1; - if (hit) - rayAlive = closestHitProgram(1, i, ray, scene); - } - if (!hit) - missProgram(ray); - - measure_type accumulation = ray.payload.accumulation; + measure_type accumulation = getSingleSampleMeasure(i, depth, scene); scalar_type rcpSampleSize = 1.0 / (i + 1); Li += (accumulation - Li) * rcpSampleSize; @@ -302,6 +307,22 @@ struct Unidirectional return Li; } + void generateCascades(int32_t2 coords, uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) + { + measure_type Li = (measure_type)0.0; + scalar_type meanLumaSq = 0.0; + for (uint32_t i = 0; i < numSamples; i++) + { + measure_type accumulation = getSingleSampleMeasure(i, depth, scene); + scalar_type rcpSampleSize = 1.0 / (i + 1); + Li += (accumulation - Li) * rcpSampleSize; + } + + cascade[uint3(coords.x, coords.y, 0)] = float4(Li.r, 0.0f, 0.0f, 0.0f); + cascade[uint3(coords.x, coords.y, 1)] = float4(0.0f, Li.g, 0.0f, 0.0f); + cascade[uint3(coords.x, coords.y, 2)] = float4(0.0f, 0.0f, Li.b, 0.0f); + } + NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u; NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_SAMPLES_LOG2 = 10u; diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl index 81736f508..be76667f3 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -217,9 +217,17 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams); - float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); - float32_t4 pixCol = float32_t4(color, 1.0); - outImage[coords] = pixCol; + bool useRWMC = true; // TODO: move to push constants if we keep it + if(!useRWMC) + { + float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); + float32_t4 pixCol = float32_t4(color, 1.0); + outImage[coords] = pixCol; + } + else + { + pathtracer.generateCascades(coords, pc.sampleCount, pc.depth, scene); + } #ifdef PERSISTENT_WORKGROUPS } diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl index 5e5cf89da..f6b5f779f 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -19,5 +19,6 @@ struct SPushConstants [[vk::combinedImageSampler]][[vk::binding(2, 2)]] SamplerState scrambleSampler; [[vk::image_format("rgba16f")]][[vk::binding(0, 0)]] RWTexture2D outImage; +[[vk::image_format("rgba16f")]][[vk::binding(1, 0)]] RWTexture2DArray cascade; #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl b/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl new file mode 100644 index 000000000..bca35370b --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl @@ -0,0 +1,38 @@ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +[[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D outImage; +[[vk::image_format("rgba16f")]] [[vk::binding(1, 0)]] RWTexture2DArray cascade; + +using namespace nbl; +using namespace hlsl; + +NBL_CONSTEXPR uint32_t WorkgroupSize = 512; +NBL_CONSTEXPR uint32_t MAX_DEPTH_LOG2 = 4; +NBL_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 10; + +int32_t2 getCoordinates() +{ + uint32_t width, height; + outImage.GetDimensions(width, height); + return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); +} + +float calculateLumaRec709(float32_t4 color) +{ + return 0.2126 * color.r + 0.7152 * color.g + 0.0722 * color.b; +} + +[numthreads(WorkgroupSize, 1, 1)] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + const int32_t2 coords = getCoordinates(); + + float r = cascade.Load(uint3(coords, 0)).r; + float g = cascade.Load(uint3(coords, 1)).g; + float b = cascade.Load(uint3(coords, 2)).b; + float32_t4 color = float32_t4(r, g, b, 1.0f); + float luma = calculateLumaRec709(color); + float32_t4 output = float32_t4(luma, luma, luma, 1.0f); + + outImage[coords] = output; +} diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index 0dc5fc053..50275d311 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -60,6 +60,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, static inline std::array PTGLSLShaderPaths = { "app_resources/glsl/litBySphere.comp", "app_resources/glsl/litByTriangle.comp", "app_resources/glsl/litByRectangle.comp" }; static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; static inline std::array PTHLSLShaderVariants = { "SPHERE_LIGHT", "TRIANGLE_LIGHT", "RECTANGLE_LIGHT" }; + static inline std::string ReweightingShaderPath = "app_resources/hlsl/reweighting.hlsl"; static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { @@ -256,7 +257,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return gpuDS; }; - std::array descriptorSet0Bindings = {}; + std::array descriptorSet0Bindings = {}; std::array descriptorSet3Bindings = {}; std::array presentDescriptorSetBindings; @@ -268,6 +269,15 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .count = 1u, .immutableSamplers = nullptr }; + descriptorSet0Bindings[1] = { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[0] = { .binding = 0u, .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, @@ -292,6 +302,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .count = 1u, .immutableSamplers = nullptr }; + presentDescriptorSetBindings[0] = { .binding = 0u, .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, @@ -496,6 +507,34 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, } } + // Create reweighting pipeline + { + auto pipelineLayout = m_device->createPipelineLayout( + {}, + core::smart_refctd_ptr(gpuDescriptorSetLayout0) + ); + + if (!pipelineLayout) { + return logFail("Failed to create reweighting pipeline layout"); + } + + { + auto shader = loadAndCompileHLSLShader(ReweightingShaderPath); + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = pipelineLayout.get(); + params.shader.shader = shader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.shader.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_reweightingPipeline)) + return logFail("Failed to create HLSL reweighting compute pipeline!\n"); + } + + + } + // Create graphics pipeline { auto scRes = static_cast(m_surface->getSwapchainResources()); @@ -676,7 +715,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, // create views for textures { - auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height, const bool useCascadeCreationParameters = false) -> smart_refctd_ptr { IGPUImage::SCreationParams imgInfo; imgInfo.format = colorFormat; imgInfo.type = IGPUImage::ET_2D; @@ -684,10 +723,19 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, imgInfo.extent.height = height; imgInfo.extent.depth = 1u; imgInfo.mipLevels = 1u; - imgInfo.arrayLayers = 1u; imgInfo.samples = IGPUImage::ESCF_1_BIT; imgInfo.flags = static_cast(0u); - imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + + if (!useCascadeCreationParameters) + { + imgInfo.arrayLayers = 1u; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + } + else + { + imgInfo.arrayLayers = CascadeSize; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT; + } auto image = m_device->createImage(std::move(imgInfo)); auto imageMemReqs = image->getMemoryReqs(); @@ -696,13 +744,12 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return image; }; - auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr + auto createHDRIImageView = [this](smart_refctd_ptr img, const bool useCascadeCreationParameters = false) -> smart_refctd_ptr { auto format = img->getCreationParameters().format; IGPUImageView::SCreationParams imgViewInfo; imgViewInfo.image = std::move(img); imgViewInfo.format = format; - imgViewInfo.viewType = IGPUImageView::ET_2D; imgViewInfo.flags = static_cast(0u); imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; imgViewInfo.subresourceRange.baseArrayLayer = 0u; @@ -710,21 +757,36 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, imgViewInfo.subresourceRange.layerCount = 1u; imgViewInfo.subresourceRange.levelCount = 1u; + if(!useCascadeCreationParameters) + imgViewInfo.viewType = IGPUImageView::ET_2D; + else + imgViewInfo.viewType = IGPUImageView::ET_2D_ARRAY; + return m_device->createImageView(std::move(imgViewInfo)); }; auto params = envMap->getCreationParameters(); auto extent = params.extent; + envMap->setObjectDebugName("Env Map"); m_envMapView = createHDRIImageView(envMap); m_envMapView->setObjectDebugName("Env Map View"); + scrambleMap->setObjectDebugName("Scramble Map"); m_scrambleView = createHDRIImageView(scrambleMap); m_scrambleView->setObjectDebugName("Scramble Map View"); + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); outImg->setObjectDebugName("Output Image"); m_outImgView = createHDRIImageView(outImg); m_outImgView->setObjectDebugName("Output Image View"); + + auto cascade = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y, true); + cascade->setObjectDebugName("Cascade"); + m_cascadeView = createHDRIImageView(cascade, true); + m_cascadeView->setObjectDebugName("Cascade View"); + + // TODO: change cascade layout to general } // create sequence buffer view @@ -855,22 +917,24 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, }; auto sampler1 = m_device->createSampler(samplerParams1); - std::array writeDSInfos = {}; + std::array writeDSInfos = {}; writeDSInfos[0].desc = m_outImgView; writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; - writeDSInfos[1].desc = m_envMapView; + writeDSInfos[1].desc = m_cascadeView; + writeDSInfos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[2].desc = m_envMapView; // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - writeDSInfos[1].info.combinedImageSampler.sampler = sampler0; - writeDSInfos[1].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[2].desc = m_sequenceBufferView; - writeDSInfos[3].desc = m_scrambleView; + writeDSInfos[2].info.combinedImageSampler.sampler = sampler0; + writeDSInfos[2].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[3].desc = m_sequenceBufferView; + writeDSInfos[4].desc = m_scrambleView; // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; - writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; - writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[4].desc = m_outImgView; - writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[4].info.combinedImageSampler.sampler = sampler1; + writeDSInfos[4].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[5].desc = m_outImgView; + writeDSInfos[5].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - std::array writeDescriptorSets = {}; + std::array writeDescriptorSets = {}; writeDescriptorSets[0] = { .dstSet = m_descriptorSet0.get(), .binding = 0, @@ -879,32 +943,39 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .info = &writeDSInfos[0] }; writeDescriptorSets[1] = { - .dstSet = m_descriptorSet2.get(), - .binding = 0, + .dstSet = m_descriptorSet0.get(), + .binding = 1, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[1] }; writeDescriptorSets[2] = { .dstSet = m_descriptorSet2.get(), - .binding = 1, + .binding = 0, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[2] }; writeDescriptorSets[3] = { .dstSet = m_descriptorSet2.get(), - .binding = 2, + .binding = 1, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[3] }; writeDescriptorSets[4] = { + .dstSet = m_descriptorSet2.get(), + .binding = 2, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[4] + }; + writeDescriptorSets[5] = { .dstSet = m_presentDescriptorSet.get(), .binding = 0, .arrayElement = 0u, .count = 1u, - .info = &writeDSInfos[4] + .info = &writeDSInfos[5] }; m_device->updateDescriptorSets(writeDescriptorSets, {}); @@ -1140,6 +1211,51 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); } + // TODO: create it once outside of the loop? + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 6u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + + // reweighting + { + IGPUComputePipeline* pipeline; + if (usePersistentWorkGroups) + pipeline = nullptr; + else + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_reweightingPipeline.get() : nullptr; + + if (!pipeline) + { + m_logger->log("Reweighting pipeline is not valid", ILogger::ELL_ERROR); + std::exit(-1); + } + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); + } + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) { const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { @@ -1371,6 +1487,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelines; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPersistentWGPipelines; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines; + smart_refctd_ptr m_reweightingPipeline; smart_refctd_ptr m_presentPipeline; uint64_t m_realFrameIx = 0; std::array, MaxFramesInFlight> m_cmdBufs; @@ -1388,6 +1505,8 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, smart_refctd_ptr m_envMapView, m_scrambleView; smart_refctd_ptr m_sequenceBufferView; smart_refctd_ptr m_outImgView; + static constexpr uint32_t CascadeSize = 6u; + smart_refctd_ptr m_cascadeView; // sync smart_refctd_ptr m_semaphore; From 2107be783ddaf4da4a05fedb6715ec509e058815 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 29 Sep 2025 14:28:00 +0200 Subject: [PATCH 02/10] Implemented splatting --- .../app_resources/hlsl/pathtracer.hlsl | 58 ++++++++++++++++--- .../app_resources/hlsl/render.comp.hlsl | 7 ++- .../app_resources/hlsl/reweighting.hlsl | 21 ++++--- 3 files changed, 65 insertions(+), 21 deletions(-) diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index e65cd950e..4ed6e632a 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -307,20 +307,60 @@ struct Unidirectional return Li; } - void generateCascades(int32_t2 coords, uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) + struct RWMCCascadeSettings { - measure_type Li = (measure_type)0.0; - scalar_type meanLumaSq = 0.0; + uint32_t size; + uint32_t start; + uint32_t base; + }; + + // tmp + float calculateLumaRec709(float32_t4 color) + { + return 0.2126 * color.r + 0.7152 * color.g + 0.0722 * color.b; + } + + void generateCascade(int32_t2 coords, uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(RWMCCascadeSettings) cascadeSettings, NBL_CONST_REF_ARG(scene_type) scene) + { + float lowerScale = cascadeSettings.start; + float upperScale = lowerScale * cascadeSettings.base; + + // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp for (uint32_t i = 0; i < numSamples; i++) { - measure_type accumulation = getSingleSampleMeasure(i, depth, scene); - scalar_type rcpSampleSize = 1.0 / (i + 1); - Li += (accumulation - Li) * rcpSampleSize; + const float luma = getLuma(accumulation); + //const float luma = calculateLumaRec709(float32_t4(accumulation, 1.0f)); + + uint32_t lowerCascadeIndex = 0u; + while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2) + { + lowerScale = upperScale; + upperScale *= cascadeSettings.base; + ++lowerCascadeIndex; + } + + float lowerCascadeLevelWeight; + float higherCascadeLevelWeight; + + if (luma <= lowerScale) + lowerCascadeLevelWeight = 1.0f; + else if (luma < upperScale) + lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale)); + else // Inf, NaN ... + lowerCascadeLevelWeight = 0.0f; + + if (luma < upperScale) + higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight); + else + higherCascadeLevelWeight = upperScale / luma; + + cascade[uint3(coords.x, coords.y, lowerCascadeIndex)] = float32_t4(accumulation * lowerCascadeLevelWeight, 1.0f); + cascade[uint3(coords.x, coords.y, lowerCascadeIndex + 1u)] = float32_t4(accumulation * higherCascadeLevelWeight, 1.0f); } - cascade[uint3(coords.x, coords.y, 0)] = float4(Li.r, 0.0f, 0.0f, 0.0f); - cascade[uint3(coords.x, coords.y, 1)] = float4(0.0f, Li.g, 0.0f, 0.0f); - cascade[uint3(coords.x, coords.y, 2)] = float4(0.0f, 0.0f, Li.b, 0.0f); + cascade[uint3(coords.x, coords.y, 0)] = float32_t4(Li.r, 0.0f, 0.0f, 0.0f); + cascade[uint3(coords.x, coords.y, 1)] = float32_t4(0.0f, Li.g, 0.0f, 0.0f); + cascade[uint3(coords.x, coords.y, 2)] = float32_t4(0.0f, 0.0f, Li.b, 0.0f); } NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u; diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl index be76667f3..dd6aad625 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -226,7 +226,12 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) } else { - pathtracer.generateCascades(coords, pc.sampleCount, pc.depth, scene); + pathtracer_type::RWMCCascadeSettings cascadeSettings; + cascadeSettings.size = 6u; + cascadeSettings.start = 1u; + cascadeSettings.base = 8u; + + pathtracer.generateCascade(coords, pc.sampleCount, pc.depth, cascadeSettings, scene); } #ifdef PERSISTENT_WORKGROUPS diff --git a/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl b/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl index bca35370b..ef8307363 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl @@ -17,22 +17,21 @@ int32_t2 getCoordinates() return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); } -float calculateLumaRec709(float32_t4 color) -{ - return 0.2126 * color.r + 0.7152 * color.g + 0.0722 * color.b; -} - [numthreads(WorkgroupSize, 1, 1)] void main(uint32_t3 threadID : SV_DispatchThreadID) { const int32_t2 coords = getCoordinates(); - float r = cascade.Load(uint3(coords, 0)).r; - float g = cascade.Load(uint3(coords, 1)).g; - float b = cascade.Load(uint3(coords, 2)).b; - float32_t4 color = float32_t4(r, g, b, 1.0f); - float luma = calculateLumaRec709(color); - float32_t4 output = float32_t4(luma, luma, luma, 1.0f); + float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); + + for (int i = 0; i < 6; ++i) + { + float32_t4 cascadeLevel = cascade.Load(uint3(coords, i)); + accumulation += float32_t3(cascadeLevel.r, cascadeLevel.g, cascadeLevel.b); + } + + //accumulation /= 32.0f; + float32_t4 output = float32_t4(accumulation, 1.0f); outImage[coords] = output; } From 57a6a0ff011de319104a49d433b959826d1a8868 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 1 Oct 2025 17:07:12 +0200 Subject: [PATCH 03/10] Fixed splatting --- .../app_resources/hlsl/pathtracer.hlsl | 12 +- .../app_resources/hlsl/reweighting.hlsl | 26 +++- 31_HLSLPathTracer/main.cpp | 122 ++++++++++++++---- 3 files changed, 121 insertions(+), 39 deletions(-) diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index 4ed6e632a..e2ae43c63 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -292,7 +292,7 @@ struct Unidirectional measure_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) { measure_type Li = (measure_type)0.0; - scalar_type meanLumaSq = 0.0; + //scalar_type meanLumaSq = 0.0; for (uint32_t i = 0; i < numSamples; i++) { measure_type accumulation = getSingleSampleMeasure(i, depth, scene); @@ -328,6 +328,8 @@ struct Unidirectional // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp for (uint32_t i = 0; i < numSamples; i++) { + measure_type accumulation = getSingleSampleMeasure(i, depth, scene); + const float luma = getLuma(accumulation); //const float luma = calculateLumaRec709(float32_t4(accumulation, 1.0f)); @@ -354,13 +356,9 @@ struct Unidirectional else higherCascadeLevelWeight = upperScale / luma; - cascade[uint3(coords.x, coords.y, lowerCascadeIndex)] = float32_t4(accumulation * lowerCascadeLevelWeight, 1.0f); - cascade[uint3(coords.x, coords.y, lowerCascadeIndex + 1u)] = float32_t4(accumulation * higherCascadeLevelWeight, 1.0f); + cascade[uint3(coords.x, coords.y, lowerCascadeIndex)] += float32_t4(accumulation * lowerCascadeLevelWeight, 1.0f); + cascade[uint3(coords.x, coords.y, lowerCascadeIndex + 1u)] += float32_t4(accumulation * higherCascadeLevelWeight, 1.0f); } - - cascade[uint3(coords.x, coords.y, 0)] = float32_t4(Li.r, 0.0f, 0.0f, 0.0f); - cascade[uint3(coords.x, coords.y, 1)] = float32_t4(0.0f, Li.g, 0.0f, 0.0f); - cascade[uint3(coords.x, coords.y, 2)] = float32_t4(0.0f, 0.0f, Li.b, 0.0f); } NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u; diff --git a/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl b/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl index ef8307363..1a2ab2844 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl @@ -17,11 +17,10 @@ int32_t2 getCoordinates() return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); } -[numthreads(WorkgroupSize, 1, 1)] -void main(uint32_t3 threadID : SV_DispatchThreadID) +// this function is for testing purpose +// simply adds every cascade buffer, output shoud be nearly the same as output of default accumulator (RWMC off) +void sumCascade(in const int32_t2 coords) { - const int32_t2 coords = getCoordinates(); - float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); for (int i = 0; i < 6; ++i) @@ -30,8 +29,25 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) accumulation += float32_t3(cascadeLevel.r, cascadeLevel.g, cascadeLevel.b); } - //accumulation /= 32.0f; + accumulation /= 32.0f; + float32_t4 output = float32_t4(accumulation, 1.0f); outImage[coords] = output; } + +[numthreads(WorkgroupSize, 1, 1)] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + // TODO: remove, ideally shader should not be called at all when we don't use RWMC + bool useRWMC = true; + if (!useRWMC) + return; + + const int32_t2 coords = getCoordinates(); + sumCascade(coords); + + // zero out cascade + for (int i = 0; i < 6; ++i) + cascade[uint3(coords.x, coords.y, i)] = float32_t4(0.0f, 0.0f, 0.0f, 0.0f); +} diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index 50275d311..9e3da1a88 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -754,13 +754,18 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; imgViewInfo.subresourceRange.baseArrayLayer = 0u; imgViewInfo.subresourceRange.baseMipLevel = 0u; - imgViewInfo.subresourceRange.layerCount = 1u; imgViewInfo.subresourceRange.levelCount = 1u; - if(!useCascadeCreationParameters) + if (!useCascadeCreationParameters) + { + imgViewInfo.subresourceRange.layerCount = 1u; imgViewInfo.viewType = IGPUImageView::ET_2D; + } else + { + imgViewInfo.subresourceRange.layerCount = CascadeSize; imgViewInfo.viewType = IGPUImageView::ET_2D_ARRAY; + } return m_device->createImageView(std::move(imgViewInfo)); }; @@ -1117,6 +1122,9 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return m_device->updateDescriptorSets(writes, {}); } + // TODO: DON'T DO THAT! tansition layout once at the initialization stage + bool cascadeLayoutTransitioned = false; + inline void workLoopBody() override { // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. @@ -1191,6 +1199,36 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); } + // TODO: remove! we want to transition cascade layout only once right after its creation + if (!cascadeLayoutTransitioned) + { + cascadeLayoutTransitioned = true; + + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 6u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + // cube envmap handle { IGPUComputePipeline* pipeline; @@ -1211,31 +1249,33 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); } - // TODO: create it once outside of the loop? - const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = m_cascadeView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 6u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::GENERAL - } - }; - - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + // m_cascadeView synchronization - wait for previous compute shader to write into the cascade + // TODO: create this and every other barrier once outside of the loop? + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 6u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } // reweighting { @@ -1283,6 +1323,34 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); } + // m_cascadeView synchronization - wait for previous compute shader to zero-out the cascade + // TODO: create this and every other barrier once outside of the loop? + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 6u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + // TODO: tone mapping and stuff } From 389248c2ec4760dac958f3c0f552634ed1a222df Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 1 Oct 2025 22:40:07 +0200 Subject: [PATCH 04/10] Implemented reweighting --- .../app_resources/hlsl/pathtracer.hlsl | 16 +- .../app_resources/hlsl/render.comp.hlsl | 1 + .../app_resources/hlsl/render_common.hlsl | 3 + .../app_resources/hlsl/reweighting.hlsl | 166 +++++++++++++++++- 31_HLSLPathTracer/main.cpp | 40 ++++- 5 files changed, 210 insertions(+), 16 deletions(-) diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index e2ae43c63..226525ee9 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -314,10 +314,19 @@ struct Unidirectional uint32_t base; }; - // tmp - float calculateLumaRec709(float32_t4 color) + /** + * @brief Resets all buffers in the cascade to 0 at the given pixel coordinates. + * + * This function writes zero values to every buffer in the cascade + * for the specified 2D pixel location. + * + * @param coords Integer 2D coordinates of the pixel to reset. + * @param cascadeSize number of buffers in the cascade to clear. + */ + void resetCascade(NBL_CONST_REF_ARG(int32_t2) coords, uint32_t cascadeSize) { - return 0.2126 * color.r + 0.7152 * color.g + 0.0722 * color.b; + for (int i = 0; i < 6; ++i) + cascade[uint3(coords.x, coords.y, i)] = float32_t4(0.0f, 0.0f, 0.0f, 0.0f); } void generateCascade(int32_t2 coords, uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(RWMCCascadeSettings) cascadeSettings, NBL_CONST_REF_ARG(scene_type) scene) @@ -331,7 +340,6 @@ struct Unidirectional measure_type accumulation = getSingleSampleMeasure(i, depth, scene); const float luma = getLuma(accumulation); - //const float luma = calculateLumaRec709(float32_t4(accumulation, 1.0f)); uint32_t lowerCascadeIndex = 0u; while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2) diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl index dd6aad625..77af3958c 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -231,6 +231,7 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) cascadeSettings.start = 1u; cascadeSettings.base = 8u; + pathtracer.resetCascade(coords, 6u); pathtracer.generateCascade(coords, pc.sampleCount, pc.depth, cascadeSettings, scene); } diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl index f6b5f779f..8c79703fd 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -6,6 +6,9 @@ struct SPushConstants float32_t4x4 invMVP; int sampleCount; int depth; + uint32_t rwmcCascadeSize; + uint32_t rwmcCascadeStart; + uint32_t rwmcCascadeBase; }; [[vk::push_constant]] SPushConstants pc; diff --git a/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl b/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl index 1a2ab2844..9a431779f 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl @@ -1,5 +1,16 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include +struct SPushConstants +{ + uint32_t cascadeCount; + float base; + uint32_t sampleCount; + float minReliableLuma; + float kappa; +}; + +[[vk::push_constant]] SPushConstants pc; [[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D outImage; [[vk::image_format("rgba16f")]] [[vk::binding(1, 0)]] RWTexture2DArray cascade; @@ -10,6 +21,145 @@ NBL_CONSTEXPR uint32_t WorkgroupSize = 512; NBL_CONSTEXPR uint32_t MAX_DEPTH_LOG2 = 4; NBL_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 10; +struct RWMCReweightingParameters +{ + uint32_t lastCascadeIndex; + float initialEmin; // a minimum image brightness that we always consider reliable + float reciprocalBase; + float reciprocalN; + float reciprocalKappa; + float colorReliabilityFactor; + float NOverKappa; +}; + +RWMCReweightingParameters computeReweightingParameters(uint32_t cascadeCount, float base, uint32_t sampleCount, float minReliableLuma, float kappa) +{ + RWMCReweightingParameters retval; + retval.lastCascadeIndex = cascadeCount - 1u; + retval.initialEmin = minReliableLuma; + retval.reciprocalBase = 1.f / base; + const float N = float(sampleCount); + retval.reciprocalN = 1.f / N; + retval.reciprocalKappa = 1.f / kappa; + // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have + // allow up to ~ more energy in one sample to lessen bias in some cases + retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; + retval.NOverKappa = N * retval.reciprocalKappa; + + return retval; +} + +struct RWMCCascadeSample +{ + float32_t3 centerValue; + float normalizedCenterLuma; + float normalizedNeighbourhoodAverageLuma; +}; + +// TODO: figure out what values should pixels outside have, 0.0f is incorrect +float32_t3 RWMCsampleCascadeTexel(int32_t2 currentCoord, int32_t2 offset, uint32_t cascadeIndex) +{ + const int32_t2 texelCoord = currentCoord + offset; + if (any(texelCoord < int32_t2(0, 0))) + return float32_t3(0.0f, 0.0f, 0.0f); + + float32_t4 output = cascade.Load(int32_t3(texelCoord, int32_t(cascadeIndex))); + return float32_t3(output.r, output.g, output.b); +} + +float32_t calcLuma(in float32_t3 col) +{ + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); +} + +RWMCCascadeSample RWMCSampleCascade(in int32_t2 coord, in uint cascadeIndex, in float reciprocalBaseI) +{ + float32_t3 neighbourhood[9]; + neighbourhood[0] = RWMCsampleCascadeTexel(coord, int32_t2(-1, -1), cascadeIndex); + neighbourhood[1] = RWMCsampleCascadeTexel(coord, int32_t2(0, -1), cascadeIndex); + neighbourhood[2] = RWMCsampleCascadeTexel(coord, int32_t2(1, -1), cascadeIndex); + neighbourhood[3] = RWMCsampleCascadeTexel(coord, int32_t2(-1, 0), cascadeIndex); + neighbourhood[4] = RWMCsampleCascadeTexel(coord, int32_t2(0, 0), cascadeIndex); + neighbourhood[5] = RWMCsampleCascadeTexel(coord, int32_t2(1, 0), cascadeIndex); + neighbourhood[6] = RWMCsampleCascadeTexel(coord, int32_t2(-1, 1), cascadeIndex); + neighbourhood[7] = RWMCsampleCascadeTexel(coord, int32_t2(0, 1), cascadeIndex); + neighbourhood[8] = RWMCsampleCascadeTexel(coord, int32_t2(1, 1), cascadeIndex); + + // numerical robustness + float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + + ((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8])); + + RWMCCascadeSample retval; + retval.centerValue = neighbourhood[4]; + retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = calcLuma(neighbourhood[4]) * reciprocalBaseI; + retval.normalizedNeighbourhoodAverageLuma = (calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f; + return retval; +} + +float32_t3 RWMCReweight(in RWMCReweightingParameters params, in int32_t2 coord) +{ + float reciprocalBaseI = 1.f; + RWMCCascadeSample curr = RWMCSampleCascade(coord, 0u, reciprocalBaseI); + + float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); + float Emin = params.initialEmin; + + float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; + for (uint i = 0u; i <= params.lastCascadeIndex; i++) + { + const bool notFirstCascade = i != 0u; + const bool notLastCascade = i != params.lastCascadeIndex; + + RWMCCascadeSample next; + if (notLastCascade) + { + reciprocalBaseI *= params.reciprocalBase; + next = RWMCSampleCascade(coord, i + 1u, reciprocalBaseI); + } + + + float reliability = 1.f; + // sample counting-based reliability estimation + if (params.reciprocalKappa <= 1.f) + { + float localReliability = curr.normalizedCenterLuma; + // reliability in 3x3 pixel block (see robustness) + float globalReliability = curr.normalizedNeighbourhoodAverageLuma; + if (notFirstCascade) + { + localReliability += prevNormalizedCenterLuma; + globalReliability += prevNormalizedNeighbourhoodAverageLuma; + } + if (notLastCascade) + { + localReliability += next.normalizedCenterLuma; + globalReliability += next.normalizedNeighbourhoodAverageLuma; + } + // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) + reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; + { + const float accumLuma = calcLuma(accumulation); + if (accumLuma > Emin) + Emin = accumLuma; + + const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; + + reliability += colorReliability; + reliability *= params.NOverKappa; + reliability -= params.reciprocalKappa; + reliability = clamp(reliability * 0.5f, 0.f, 1.f); + } + } + accumulation += curr.centerValue * reliability; + + prevNormalizedCenterLuma = curr.normalizedCenterLuma; + prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma; + curr = next; + } + + return accumulation; +} + int32_t2 getCoordinates() { uint32_t width, height; @@ -19,7 +169,7 @@ int32_t2 getCoordinates() // this function is for testing purpose // simply adds every cascade buffer, output shoud be nearly the same as output of default accumulator (RWMC off) -void sumCascade(in const int32_t2 coords) +float32_t3 sumCascade(in const int32_t2 coords) { float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); @@ -31,9 +181,7 @@ void sumCascade(in const int32_t2 coords) accumulation /= 32.0f; - float32_t4 output = float32_t4(accumulation, 1.0f); - - outImage[coords] = output; + return accumulation; } [numthreads(WorkgroupSize, 1, 1)] @@ -45,9 +193,11 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) return; const int32_t2 coords = getCoordinates(); - sumCascade(coords); + //float32_t3 color = sumCascade(coords); - // zero out cascade - for (int i = 0; i < 6; ++i) - cascade[uint3(coords.x, coords.y, i)] = float32_t4(0.0f, 0.0f, 0.0f, 0.0f); + RWMCReweightingParameters reweightingParameters = computeReweightingParameters(pc.cascadeCount, pc.base, pc.sampleCount, pc.minReliableLuma, pc.kappa); + float32_t3 color = RWMCReweight(reweightingParameters, coords); + color /= pc.sampleCount; + + outImage[coords] = float32_t4(color, 1.0f); } diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index 9e3da1a88..6435be27f 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -15,10 +15,24 @@ using namespace asset; using namespace ui; using namespace video; -struct PTPushConstant { +static constexpr uint32_t CascadeSize = 6u; +struct PTPushConstant +{ matrix4SIMD invMVP; int sampleCount; int depth; + const uint32_t rwmcCascadeSize = CascadeSize; + uint32_t rwmcCascadeStart; + uint32_t rwmcCascadeBase; +}; + +struct RWMCPushConstants +{ + const uint32_t cascadeSize = CascadeSize; + float base; + uint32_t sampleCount; + float minReliableLuma; + float kappa; }; // TODO: Add a QueryPool for timestamping once its ready @@ -509,8 +523,14 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, // Create reweighting pipeline { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RWMCPushConstants) + }; + auto pipelineLayout = m_device->createPipelineLayout( - {}, + { &pcRange, 1 }, core::smart_refctd_ptr(gpuDescriptorSetLayout0) ); @@ -1098,6 +1118,15 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, m_oracle.reportBeginFrameRecord(); m_camera.mapKeysToWASD(); + // set initial push constants contents + rwmcPushConstants.base = 8.0f; + rwmcPushConstants.sampleCount = spp; + rwmcPushConstants.minReliableLuma = 1.0f; + rwmcPushConstants.kappa = 5.0f; + + pc.rwmcCascadeStart = 1.0; + pc.rwmcCascadeBase = 8.0f; + return true; } @@ -1162,11 +1191,12 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); // disregard surface/swapchain transformation for now const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - PTPushConstant pc; viewProjectionMatrix.getInverseTransform(pc.invMVP); pc.sampleCount = spp; pc.depth = depth; + rwmcPushConstants.sampleCount = spp; + // safe to proceed // upload buffer data cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); @@ -1293,6 +1323,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->bindComputePipeline(pipeline); cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(RWMCPushConstants), &rwmcPushConstants); cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); } @@ -1573,7 +1604,6 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, smart_refctd_ptr m_envMapView, m_scrambleView; smart_refctd_ptr m_sequenceBufferView; smart_refctd_ptr m_outImgView; - static constexpr uint32_t CascadeSize = 6u; smart_refctd_ptr m_cascadeView; // sync @@ -1610,6 +1640,8 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, int spp = 32; int depth = 3; bool usePersistentWorkGroups = false; + RWMCPushConstants rwmcPushConstants; + PTPushConstant pc; bool m_firstFrame = true; IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; From ca8c2329c0f643851125197e77eb1b8f70ff21f4 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 8 Oct 2025 00:10:14 +0200 Subject: [PATCH 05/10] Added "enable RWMC" toggle --- .../app_resources/hlsl/pathtracer.hlsl | 31 +++++----- .../app_resources/hlsl/render.comp.hlsl | 12 ++-- .../app_resources/hlsl/render_common.hlsl | 1 + .../{reweighting.hlsl => resolve.comp.hlsl} | 9 --- 31_HLSLPathTracer/main.cpp | 61 ++++++------------- 5 files changed, 39 insertions(+), 75 deletions(-) rename 31_HLSLPathTracer/app_resources/hlsl/{reweighting.hlsl => resolve.comp.hlsl} (97%) diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index 226525ee9..66c720782 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -314,23 +314,14 @@ struct Unidirectional uint32_t base; }; - /** - * @brief Resets all buffers in the cascade to 0 at the given pixel coordinates. - * - * This function writes zero values to every buffer in the cascade - * for the specified 2D pixel location. - * - * @param coords Integer 2D coordinates of the pixel to reset. - * @param cascadeSize number of buffers in the cascade to clear. - */ - void resetCascade(NBL_CONST_REF_ARG(int32_t2) coords, uint32_t cascadeSize) - { - for (int i = 0; i < 6; ++i) - cascade[uint3(coords.x, coords.y, i)] = float32_t4(0.0f, 0.0f, 0.0f, 0.0f); - } - void generateCascade(int32_t2 coords, uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(RWMCCascadeSettings) cascadeSettings, NBL_CONST_REF_ARG(scene_type) scene) { + // TODO: move `MaxCascadeSize` somewhere else + const static uint32_t MaxCascadeSize = 10u; + float32_t4 cascadeEntry[MaxCascadeSize]; + for (int i = 0; i < MaxCascadeSize; ++i) + cascadeEntry[i] = float32_t4(0.0f, 0.0f, 0.0f, 0.0f); + float lowerScale = cascadeSettings.start; float upperScale = lowerScale * cascadeSettings.base; @@ -364,8 +355,14 @@ struct Unidirectional else higherCascadeLevelWeight = upperScale / luma; - cascade[uint3(coords.x, coords.y, lowerCascadeIndex)] += float32_t4(accumulation * lowerCascadeLevelWeight, 1.0f); - cascade[uint3(coords.x, coords.y, lowerCascadeIndex + 1u)] += float32_t4(accumulation * higherCascadeLevelWeight, 1.0f); + // TODO: odrazu liczyc srednia + cascadeEntry[lowerCascadeIndex] += float32_t4(accumulation * lowerCascadeLevelWeight, 1.0f); + } + + for (uint32_t i = 0; i < 6; i++) + { + cascadeEntry[i] /= float(numSamples); + cascade[uint3(coords.x, coords.y, i)] = cascadeEntry[i]; } } diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl index 77af3958c..ee0486865 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -217,8 +217,8 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams); - bool useRWMC = true; // TODO: move to push constants if we keep it - if(!useRWMC) + bool useRWMC = bool(pc.useRWMC); + if (!useRWMC) { float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); float32_t4 pixCol = float32_t4(color, 1.0); @@ -227,11 +227,11 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) else { pathtracer_type::RWMCCascadeSettings cascadeSettings; - cascadeSettings.size = 6u; - cascadeSettings.start = 1u; - cascadeSettings.base = 8u; + cascadeSettings.size = pc.rwmcCascadeSize; + cascadeSettings.start = pc.rwmcCascadeStart; + cascadeSettings.base = pc.rwmcCascadeBase; - pathtracer.resetCascade(coords, 6u); + // TODO: template parameter should be pathtracer.generateCascade(coords, pc.sampleCount, pc.depth, cascadeSettings, scene); } diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl index 8c79703fd..b54d28227 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -7,6 +7,7 @@ struct SPushConstants int sampleCount; int depth; uint32_t rwmcCascadeSize; + int useRWMC; uint32_t rwmcCascadeStart; uint32_t rwmcCascadeBase; }; diff --git a/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl similarity index 97% rename from 31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl rename to 31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl index 9a431779f..49c1e306c 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/reweighting.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl @@ -117,7 +117,6 @@ float32_t3 RWMCReweight(in RWMCReweightingParameters params, in int32_t2 coord) next = RWMCSampleCascade(coord, i + 1u, reciprocalBaseI); } - float reliability = 1.f; // sample counting-based reliability estimation if (params.reciprocalKappa <= 1.f) @@ -179,25 +178,17 @@ float32_t3 sumCascade(in const int32_t2 coords) accumulation += float32_t3(cascadeLevel.r, cascadeLevel.g, cascadeLevel.b); } - accumulation /= 32.0f; - return accumulation; } [numthreads(WorkgroupSize, 1, 1)] void main(uint32_t3 threadID : SV_DispatchThreadID) { - // TODO: remove, ideally shader should not be called at all when we don't use RWMC - bool useRWMC = true; - if (!useRWMC) - return; - const int32_t2 coords = getCoordinates(); //float32_t3 color = sumCascade(coords); RWMCReweightingParameters reweightingParameters = computeReweightingParameters(pc.cascadeCount, pc.base, pc.sampleCount, pc.minReliableLuma, pc.kappa); float32_t3 color = RWMCReweight(reweightingParameters, coords); - color /= pc.sampleCount; outImage[coords] = float32_t4(color, 1.0f); } diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index 6435be27f..fb59d71cb 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -22,6 +22,7 @@ struct PTPushConstant int sampleCount; int depth; const uint32_t rwmcCascadeSize = CascadeSize; + int useRWMC; uint32_t rwmcCascadeStart; uint32_t rwmcCascadeBase; }; @@ -74,7 +75,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, static inline std::array PTGLSLShaderPaths = { "app_resources/glsl/litBySphere.comp", "app_resources/glsl/litByTriangle.comp", "app_resources/glsl/litByRectangle.comp" }; static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; static inline std::array PTHLSLShaderVariants = { "SPHERE_LIGHT", "TRIANGLE_LIGHT", "RECTANGLE_LIGHT" }; - static inline std::string ReweightingShaderPath = "app_resources/hlsl/reweighting.hlsl"; + static inline std::string ReweightingShaderPath = "app_resources/hlsl/resolve.comp.hlsl"; static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { @@ -1096,6 +1097,12 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + ImGui::Text("\nRWMC settings:"); + ImGui::Checkbox("Enable RWMC", &useRWMC); + ImGui::SliderFloat("base", &rwmcPushConstants.base, 1.0f, 32.0f); + ImGui::SliderFloat("minReliableLuma", &rwmcPushConstants.minReliableLuma, 0.1f, 32.0f); + ImGui::SliderFloat("kappa", &rwmcPushConstants.kappa, 0.1f, 32.0f); + ImGui::End(); } ); @@ -1151,9 +1158,6 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return m_device->updateDescriptorSets(writes, {}); } - // TODO: DON'T DO THAT! tansition layout once at the initialization stage - bool cascadeLayoutTransitioned = false; - inline void workLoopBody() override { // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. @@ -1192,6 +1196,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, // disregard surface/swapchain transformation for now const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); viewProjectionMatrix.getInverseTransform(pc.invMVP); + pc.useRWMC = useRWMC ? 1 : 0; pc.sampleCount = spp; pc.depth = depth; @@ -1229,19 +1234,17 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); } - // TODO: remove! we want to transition cascade layout only once right after its creation - if (!cascadeLayoutTransitioned) + // transit m_cascadeView layout to GENERAL, block until previous shader is done with reading from cascade + if (useRWMC) { - cascadeLayoutTransitioned = true; - const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { { .barrier = { .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, .srcAccessMask = ACCESS_FLAGS::NONE, .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + .dstAccessMask = ACCESS_FLAGS::NONE } }, .image = m_cascadeView->getCreationParameters().image.get(), @@ -1250,7 +1253,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .baseMipLevel = 0u, .levelCount = 1u, .baseArrayLayer = 0u, - .layerCount = 6u + .layerCount = CascadeSize }, .oldLayout = IImage::LAYOUT::UNDEFINED, .newLayout = IImage::LAYOUT::GENERAL @@ -1298,16 +1301,15 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .baseMipLevel = 0u, .levelCount = 1u, .baseArrayLayer = 0u, - .layerCount = 6u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::GENERAL + .layerCount = CascadeSize + } } }; cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); } // reweighting + if(useRWMC) { IGPUComputePipeline* pipeline; if (usePersistentWorkGroups) @@ -1354,34 +1356,6 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); } - // m_cascadeView synchronization - wait for previous compute shader to zero-out the cascade - // TODO: create this and every other barrier once outside of the loop? - { - const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }, - .image = m_cascadeView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 6u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::GENERAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); - } - // TODO: tone mapping and stuff } @@ -1640,6 +1614,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, int spp = 32; int depth = 3; bool usePersistentWorkGroups = false; + bool useRWMC = false; RWMCPushConstants rwmcPushConstants; PTPushConstant pc; From 04296d9a1a387de5562dde28676e9dede54301ce Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 16 Oct 2025 17:08:42 +0200 Subject: [PATCH 06/10] Added accumulators --- .../hlsl/RWMCCascadeAccumulator.hlsl | 100 ++++ .../app_resources/hlsl/pathtracer.hlsl | 133 ++--- .../app_resources/hlsl/render.comp.hlsl | 62 +- .../app_resources/hlsl/render_common.hlsl | 28 +- .../hlsl/render_rwmc_common.hlsl | 24 + .../app_resources/hlsl/resolve.comp.hlsl | 21 +- .../app_resources/hlsl/resolve_common.hlsl | 13 + .../hlsl/rwmc_global_settings_common.hlsl | 7 + 31_HLSLPathTracer/main.cpp | 556 +++++++++++------- 9 files changed, 590 insertions(+), 354 deletions(-) create mode 100644 31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl create mode 100644 31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl create mode 100644 31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl create mode 100644 31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl diff --git a/31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl b/31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl new file mode 100644 index 000000000..8b7e0ec40 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl @@ -0,0 +1,100 @@ +#ifndef _NBL_HLSL_RWMC_INCLUDED_ +#define _NBL_HLSL_RWMC_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ + +struct RWMCCascadeSettings +{ + uint32_t size; + uint32_t start; + uint32_t base; +}; + +template +struct CascadeEntry +{ + CascadeLayerType data[CascadeSize]; +}; + +template +struct RWMCCascadeAccumulator +{ + using output_storage_type = CascadeEntry; + output_storage_type accumulation; + uint32_t cascadeSampleCounter[CascadeSize]; + RWMCCascadeSettings cascadeSettings; + + void initialize() + { + for (int i = 0; i < CascadeSize; ++i) + { + accumulation.data[i] = (CascadeLayerType)0.0f; + cascadeSampleCounter[i] = 0u; + } + + // TODO: pass these values to this function + cascadeSettings.size = 6; + cascadeSettings.start = 1.0f; + cascadeSettings.base = 1.0f; + } + + typename vector_traits::scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) + { + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); + } + + // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp + void addSample(uint32_t sampleIndex, float32_t3 sample) + { + float lowerScale = cascadeSettings.start; + float upperScale = lowerScale * cascadeSettings.base; + + const float luma = getLuma(sample); + + uint32_t lowerCascadeIndex = 0u; + while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2) + { + lowerScale = upperScale; + upperScale *= cascadeSettings.base; + ++lowerCascadeIndex; + } + + float lowerCascadeLevelWeight; + float higherCascadeLevelWeight; + + if (luma <= lowerScale) + lowerCascadeLevelWeight = 1.0f; + else if (luma < upperScale) + lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale)); + else // Inf, NaN ... + lowerCascadeLevelWeight = 0.0f; + + if (luma < upperScale) + higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight); + else + higherCascadeLevelWeight = upperScale / luma; + + uint32_t higherCascadeIndex = lowerCascadeIndex + 1u; + + const uint32_t sampleCount = sampleIndex + 1u; + const float reciprocalSampleCount = 1.0f / float(sampleCount); + accumulation.data[lowerCascadeIndex] += (sample * lowerCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[lowerCascadeIndex])) * accumulation.data[lowerCascadeIndex]) * reciprocalSampleCount; + accumulation.data[higherCascadeIndex] += (sample * higherCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[higherCascadeIndex])) * accumulation.data[higherCascadeIndex]) * reciprocalSampleCount; + cascadeSampleCounter[lowerCascadeIndex] = sampleCount; + cascadeSampleCounter[higherCascadeIndex] = sampleCount; + } +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index 66c720782..1134c98e9 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -5,6 +5,7 @@ #include #include #include +#include #include "rand_gen.hlsl" #include "ray_gen.hlsl" @@ -40,10 +41,30 @@ struct PathTracerCreationParams BxDFCreation dielectricParams; }; -template +// TODO: maybe implement a concept to ensure that OutputTypeVec is a vector? +template +struct DefaultAccumulator +{ + using output_storage_type = OutputTypeVec; + output_storage_type accumulation; + + void initialize() + { + accumulation = (output_storage_type)0.0f; + } + + void addSample(uint32_t sampleIndex, float32_t3 sample) + { + using ScalarType = typename vector_traits::scalar_type; + ScalarType rcpSampleSize = 1.0 / (sampleIndex + 1); + accumulation += (sample - accumulation) * rcpSampleSize; + } +}; + +template struct Unidirectional { - using this_t = Unidirectional; + using this_t = Unidirectional; using randgen_type = RandGen; using raygen_type = RayGen; using intersector_type = Intersector; @@ -53,6 +74,7 @@ struct Unidirectional using scalar_type = typename MaterialSystem::scalar_type; using vector3_type = vector; using measure_type = typename MaterialSystem::measure_type; + using output_storage_type = typename Accumulator::output_storage_type; using sample_type = typename NextEventEstimator::sample_type; using ray_dir_info_type = typename sample_type::ray_dir_info_type; using ray_type = typename RayGen::ray_type; @@ -265,105 +287,40 @@ struct Unidirectional // #endif } - measure_type getSingleSampleMeasure(uint32_t sampleID, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) - { - vector3_type uvw = rand3d(0u, sampleID, randGen.rng()); // TODO: take from scramblebuf? - ray_type ray = rayGen.generate(uvw); - - // bounces - bool hit = true; - bool rayAlive = true; - for (int d = 1; (d <= depth) && hit && rayAlive; d += 2) - { - ray.intersectionT = numeric_limits::max; - ray.objectID = intersector_type::traceRay(ray, scene); - - hit = ray.objectID.id != -1; - if (hit) - rayAlive = closestHitProgram(1, sampleID, ray, scene); - } - if (!hit) - missProgram(ray); - - return ray.payload.accumulation; - } - // Li - measure_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) + output_storage_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) { - measure_type Li = (measure_type)0.0; + Accumulator accumulator; + accumulator.initialize(); //scalar_type meanLumaSq = 0.0; for (uint32_t i = 0; i < numSamples; i++) { - measure_type accumulation = getSingleSampleMeasure(i, depth, scene); - scalar_type rcpSampleSize = 1.0 / (i + 1); - Li += (accumulation - Li) * rcpSampleSize; + vector3_type uvw = rand3d(0u, i, randGen.rng()); // TODO: take from scramblebuf? + ray_type ray = rayGen.generate(uvw); - // TODO: visualize high variance - - // TODO: russian roulette early exit? - } - - return Li; - } - - struct RWMCCascadeSettings - { - uint32_t size; - uint32_t start; - uint32_t base; - }; - - void generateCascade(int32_t2 coords, uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(RWMCCascadeSettings) cascadeSettings, NBL_CONST_REF_ARG(scene_type) scene) - { - // TODO: move `MaxCascadeSize` somewhere else - const static uint32_t MaxCascadeSize = 10u; - float32_t4 cascadeEntry[MaxCascadeSize]; - for (int i = 0; i < MaxCascadeSize; ++i) - cascadeEntry[i] = float32_t4(0.0f, 0.0f, 0.0f, 0.0f); - - float lowerScale = cascadeSettings.start; - float upperScale = lowerScale * cascadeSettings.base; - - // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp - for (uint32_t i = 0; i < numSamples; i++) - { - measure_type accumulation = getSingleSampleMeasure(i, depth, scene); - - const float luma = getLuma(accumulation); - - uint32_t lowerCascadeIndex = 0u; - while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2) + // bounces + bool hit = true; + bool rayAlive = true; + for (int d = 1; (d <= depth) && hit && rayAlive; d += 2) { - lowerScale = upperScale; - upperScale *= cascadeSettings.base; - ++lowerCascadeIndex; - } + ray.intersectionT = numeric_limits::max; + ray.objectID = intersector_type::traceRay(ray, scene); - float lowerCascadeLevelWeight; - float higherCascadeLevelWeight; + hit = ray.objectID.id != -1; + if (hit) + rayAlive = closestHitProgram(1, i, ray, scene); + } + if (!hit) + missProgram(ray); - if (luma <= lowerScale) - lowerCascadeLevelWeight = 1.0f; - else if (luma < upperScale) - lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale)); - else // Inf, NaN ... - lowerCascadeLevelWeight = 0.0f; + accumulator.addSample(i, ray.payload.accumulation); - if (luma < upperScale) - higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight); - else - higherCascadeLevelWeight = upperScale / luma; + // TODO: visualize high variance - // TODO: odrazu liczyc srednia - cascadeEntry[lowerCascadeIndex] += float32_t4(accumulation * lowerCascadeLevelWeight, 1.0f); + // TODO: russian roulette early exit? } - for (uint32_t i = 0; i < 6; i++) - { - cascadeEntry[i] /= float(numSamples); - cascade[uint3(coords.x, coords.y, i)] = cascadeEntry[i]; - } + return accumulator.accumulation; } NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u; diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl index ee0486865..7f40b4b77 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -36,6 +36,32 @@ #define BXDF_COUNT 7 #include "render_common.hlsl" +#include "rwmc_global_settings_common.hlsl" + +#ifdef RWMC_ENABLED +#include "RWMCCascadeAccumulator.hlsl" +#include "render_rwmc_common.hlsl" +#endif + +#ifdef RWMC_ENABLED +[[vk::push_constant]] RenderRWMCPushConstants pc; +#else +[[vk::push_constant]] RenderPushConstants pc; +#endif + +[[vk::combinedImageSampler]] [[vk::binding(0, 2)]] Texture2D envMap; // unused +[[vk::combinedImageSampler]] [[vk::binding(0, 2)]] SamplerState envSampler; + +[[vk::binding(1, 2)]] Buffer sampleSequence; + +[[vk::combinedImageSampler]] [[vk::binding(2, 2)]] Texture2D scramblebuf; // unused +[[vk::combinedImageSampler]] [[vk::binding(2, 2)]] SamplerState scrambleSampler; + +#ifdef RWMC_ENABLED +[[vk::image_format("rgba16f")]] [[vk::binding(0, 1)]] RWTexture2DArray cascade; +#endif +[[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D outImage; + #include "pathtracer.hlsl" using namespace nbl; @@ -96,7 +122,15 @@ using raygen_type = ext::RayGen::Basic; using intersector_type = ext::Intersector::Comprehensive; using material_system_type = ext::MaterialSystem::System; using nee_type = ext::NextEventEstimator::Estimator; -using pathtracer_type = ext::PathTracer::Unidirectional; + +#ifdef RWMC_ENABLED +// TODO: get cascade size from a shared include file +using accumulator_type = rwmc::RWMCCascadeAccumulator; +#else +using accumulator_type = ext::PathTracer::DefaultAccumulator; +#endif + +using pathtracer_type = ext::PathTracer::Unidirectional; static const ext::Shape spheres[SPHERE_COUNT] = { ext::Shape::create(float3(0.0, -100.5, -1.0), 100.0, 0u, light_type::INVALID_ID), @@ -129,7 +163,7 @@ static const ext::Shape rectangles[1]; #endif static const light_type lights[LIGHT_COUNT] = { - light_type::create(spectral_t(30.0,25.0,15.0), + light_type::create(LightEminence, #ifdef SPHERE_LIGHT 8u, #else @@ -217,23 +251,19 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams); - bool useRWMC = bool(pc.useRWMC); - if (!useRWMC) +#ifdef RWMC_ENABLED + accumulator_type::output_storage_type cascadeEntry = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); + for (uint32_t i = 0; i < CascadeSize; ++i) { - float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); - float32_t4 pixCol = float32_t4(color, 1.0); - outImage[coords] = pixCol; + float32_t4 cascadeLayerEntry = float32_t4(cascadeEntry.data[i], 1.0f); + cascade[uint3(coords.x, coords.y, i)] = cascadeLayerEntry; } - else - { - pathtracer_type::RWMCCascadeSettings cascadeSettings; - cascadeSettings.size = pc.rwmcCascadeSize; - cascadeSettings.start = pc.rwmcCascadeStart; - cascadeSettings.base = pc.rwmcCascadeBase; +#else + float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); + outImage[coords] = float32_t4(color, 1.0); +#endif - // TODO: template parameter should be - pathtracer.generateCascade(coords, pc.sampleCount, pc.depth, cascadeSettings, scene); - } + #ifdef PERSISTENT_WORKGROUPS } diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl index b54d28227..6c3f63ee4 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -1,28 +1,22 @@ #ifndef _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ #define _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" -struct SPushConstants +#ifndef __HLSL_VERSION +#include "matrix4SIMD.h" +#endif + +struct RenderPushConstants { +#ifdef __HLSL_VERSION float32_t4x4 invMVP; +#else + nbl::core::matrix4SIMD invMVP; +#endif int sampleCount; int depth; - uint32_t rwmcCascadeSize; - int useRWMC; - uint32_t rwmcCascadeStart; - uint32_t rwmcCascadeBase; }; -[[vk::push_constant]] SPushConstants pc; - -[[vk::combinedImageSampler]][[vk::binding(0, 2)]] Texture2D envMap; // unused -[[vk::combinedImageSampler]][[vk::binding(0, 2)]] SamplerState envSampler; - -[[vk::binding(1, 2)]] Buffer sampleSequence; - -[[vk::combinedImageSampler]][[vk::binding(2, 2)]] Texture2D scramblebuf; // unused -[[vk::combinedImageSampler]][[vk::binding(2, 2)]] SamplerState scrambleSampler; - -[[vk::image_format("rgba16f")]][[vk::binding(0, 0)]] RWTexture2D outImage; -[[vk::image_format("rgba16f")]][[vk::binding(1, 0)]] RWTexture2DArray cascade; +NBL_CONSTEXPR nbl::hlsl::float32_t3 LightEminence = nbl::hlsl::float32_t3(30.0f, 25.0f, 15.0f); #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl new file mode 100644 index 000000000..a4994c80c --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl @@ -0,0 +1,24 @@ +#ifndef _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +#ifndef __HLSL_VERSION +#include "matrix4SIMD.h" +#endif + +struct RenderRWMCPushConstants +{ +#ifdef __HLSL_VERSION + float32_t4x4 invMVP; +#else + nbl::core::matrix4SIMD invMVP; +#endif + int sampleCount; + int depth; + float start; + float base; + float minReliableLuma; + float kappa; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl index 49c1e306c..b66edeb87 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl @@ -1,18 +1,11 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include +#include "resolve_common.hlsl" +#include "rwmc_global_settings_common.hlsl" -struct SPushConstants -{ - uint32_t cascadeCount; - float base; - uint32_t sampleCount; - float minReliableLuma; - float kappa; -}; - -[[vk::push_constant]] SPushConstants pc; +[[vk::push_constant]] ResolvePushConstants pc; [[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D outImage; -[[vk::image_format("rgba16f")]] [[vk::binding(1, 0)]] RWTexture2DArray cascade; +[[vk::image_format("rgba16f")]] [[vk::binding(0, 1)]] RWTexture2DArray cascade; using namespace nbl; using namespace hlsl; @@ -32,10 +25,10 @@ struct RWMCReweightingParameters float NOverKappa; }; -RWMCReweightingParameters computeReweightingParameters(uint32_t cascadeCount, float base, uint32_t sampleCount, float minReliableLuma, float kappa) +RWMCReweightingParameters computeReweightingParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa) { RWMCReweightingParameters retval; - retval.lastCascadeIndex = cascadeCount - 1u; + retval.lastCascadeIndex = CascadeSize - 1u; retval.initialEmin = minReliableLuma; retval.reciprocalBase = 1.f / base; const float N = float(sampleCount); @@ -187,7 +180,7 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) const int32_t2 coords = getCoordinates(); //float32_t3 color = sumCascade(coords); - RWMCReweightingParameters reweightingParameters = computeReweightingParameters(pc.cascadeCount, pc.base, pc.sampleCount, pc.minReliableLuma, pc.kappa); + RWMCReweightingParameters reweightingParameters = computeReweightingParameters(pc.base, pc.sampleCount, pc.minReliableLuma, pc.kappa); float32_t3 color = RWMCReweight(reweightingParameters, coords); outImage[coords] = float32_t4(color, 1.0f); diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl new file mode 100644 index 000000000..5937c42e2 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl @@ -0,0 +1,13 @@ +#ifndef _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +struct ResolvePushConstants +{ + uint32_t sampleCount; + float base; + float minReliableLuma; + float kappa; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl new file mode 100644 index 000000000..6a5d1b9d4 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl @@ -0,0 +1,7 @@ +#ifndef _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +NBL_CONSTEXPR uint32_t CascadeSize = 6u; + +#endif diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index fb59d71cb..1cfffcb73 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -6,6 +6,11 @@ #include "nbl/asset/interchange/IImageAssetHandlerBase.h" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "nbl/builtin/hlsl/surface_transform.h" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "app_resources/hlsl/render_common.hlsl" +#include "app_resources/hlsl/render_rwmc_common.hlsl" +#include "app_resources/hlsl/resolve_common.hlsl" +#include "app_resources/hlsl/rwmc_global_settings_common.hlsl" using namespace nbl; using namespace core; @@ -15,27 +20,6 @@ using namespace asset; using namespace ui; using namespace video; -static constexpr uint32_t CascadeSize = 6u; -struct PTPushConstant -{ - matrix4SIMD invMVP; - int sampleCount; - int depth; - const uint32_t rwmcCascadeSize = CascadeSize; - int useRWMC; - uint32_t rwmcCascadeStart; - uint32_t rwmcCascadeBase; -}; - -struct RWMCPushConstants -{ - const uint32_t cascadeSize = CascadeSize; - float base; - uint32_t sampleCount; - float minReliableLuma; - float kappa; -}; - // TODO: Add a QueryPool for timestamping once its ready // TODO: Do buffer creation using assConv class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication @@ -272,7 +256,8 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return gpuDS; }; - std::array descriptorSet0Bindings = {}; + std::array descriptorSet0Bindings = {}; + std::array descriptorSet1Bindings = {}; std::array descriptorSet3Bindings = {}; std::array presentDescriptorSetBindings; @@ -284,8 +269,9 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .count = 1u, .immutableSamplers = nullptr }; - descriptorSet0Bindings[1] = { - .binding = 1u, + + descriptorSet1Bindings[0] = { + .binding = 0u, .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, @@ -328,16 +314,20 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, }; auto cpuDescriptorSetLayout0 = make_smart_refctd_ptr(descriptorSet0Bindings); + auto cpuDescriptorSetLayout1 = make_smart_refctd_ptr(descriptorSet1Bindings); auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet3Bindings); auto gpuDescriptorSetLayout0 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout0); + auto gpuDescriptorSetLayout1 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout1); auto gpuDescriptorSetLayout2 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout2); auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(presentDescriptorSetBindings); auto cpuDescriptorSet0 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout0)); + auto cpuDescriptorSet1 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout1)); auto cpuDescriptorSet2 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout2)); m_descriptorSet0 = convertDSCPU2GPU(cpuDescriptorSet0); + m_descriptorSet1 = convertDSCPU2GPU(cpuDescriptorSet1); m_descriptorSet2 = convertDSCPU2GPU(cpuDescriptorSet2); smart_refctd_ptr presentDSPool; @@ -397,7 +387,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return shader; }; - auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "", bool persistentWorkGroups = false) -> smart_refctd_ptr + auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "", bool persistentWorkGroups = false, bool rwmc = false) -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.workingDirectory = localInputCWD; @@ -428,11 +418,16 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, options.preprocessorOptions.logger = m_logger.get(); options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - const IShaderCompiler::SMacroDefinition defines[2] = { {defineMacro, ""}, { "PERSISTENT_WORKGROUPS", "1" } }; - if (!defineMacro.empty() && persistentWorkGroups) - options.preprocessorOptions.extraDefines = { defines, defines + 2 }; - else if (!defineMacro.empty() && !persistentWorkGroups) - options.preprocessorOptions.extraDefines = { defines, defines + 1 }; + core::vector defines; + defines.reserve(3); + if (!defineMacro.empty()) + defines.push_back({ defineMacro, "" }); + if(persistentWorkGroups) + defines.push_back({ "PERSISTENT_WORKGROUPS", "1" }); + if(rwmc) + defines.push_back({ "RWMC_ENABLED", "" }); + + options.preprocessorOptions.extraDefines = defines; source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); @@ -448,11 +443,12 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, // Create compute pipelines { - for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) { + for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) + { const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, - .size = sizeof(PTPushConstant) + .size = sizeof(RenderPushConstants) }; auto ptPipelineLayout = m_device->createPipelineLayout( { &pcRange, 1 }, @@ -461,9 +457,24 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, core::smart_refctd_ptr(gpuDescriptorSetLayout2), nullptr ); - if (!ptPipelineLayout) { + if (!ptPipelineLayout) return logFail("Failed to create Pathtracing pipeline layout"); - } + + const nbl::asset::SPushConstantRange rwmcPcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RenderRWMCPushConstants) + }; + auto rwmcPtPipelineLayout = m_device->createPipelineLayout( + { &rwmcPcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0), + core::smart_refctd_ptr(gpuDescriptorSetLayout1), + core::smart_refctd_ptr(gpuDescriptorSetLayout2), + nullptr + ); + if (!rwmcPtPipelineLayout) + return logFail("Failed to create RWMC Pathtracing pipeline layout"); + { auto ptShader = loadAndCompileGLSLShader(PTGLSLShaderPaths[index]); @@ -492,6 +503,21 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return logFail("Failed to create HLSL compute pipeline!\n"); } + // rwmc pipelines + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], false, true); + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = rwmcPtPipelineLayout.get(); + params.shader.shader = ptShader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.shader.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelinesRWMC.data() + index)) + return logFail("Failed to create HLSL RWMC compute pipeline!\n"); + } + // persistent wg pipelines { auto ptShader = loadAndCompileGLSLShader(PTGLSLShaderPaths[index], true); @@ -527,12 +553,13 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, - .size = sizeof(RWMCPushConstants) + .size = sizeof(ResolvePushConstants) }; auto pipelineLayout = m_device->createPipelineLayout( { &pcRange, 1 }, - core::smart_refctd_ptr(gpuDescriptorSetLayout0) + core::smart_refctd_ptr(gpuDescriptorSetLayout0), + core::smart_refctd_ptr(gpuDescriptorSetLayout1) ); if (!pipelineLayout) { @@ -969,8 +996,8 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .info = &writeDSInfos[0] }; writeDescriptorSets[1] = { - .dstSet = m_descriptorSet0.get(), - .binding = 1, + .dstSet = m_descriptorSet1.get(), + .binding = 0, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[1] @@ -1099,9 +1126,10 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, ImGui::Text("\nRWMC settings:"); ImGui::Checkbox("Enable RWMC", &useRWMC); - ImGui::SliderFloat("base", &rwmcPushConstants.base, 1.0f, 32.0f); - ImGui::SliderFloat("minReliableLuma", &rwmcPushConstants.minReliableLuma, 0.1f, 32.0f); - ImGui::SliderFloat("kappa", &rwmcPushConstants.kappa, 0.1f, 32.0f); + ImGui::SliderFloat("start", &rwmcCascadeStart, 1.0f, 32.0f); + ImGui::SliderFloat("base", &rwmcCascadeBase, 1.0f, 32.0f); + ImGui::SliderFloat("minReliableLuma", &rwmcMinReliableLuma, 0.1f, 32.0f); + ImGui::SliderFloat("kappa", &rwmcKappa, 0.1f, 1024.0f); ImGui::End(); } @@ -1125,14 +1153,12 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, m_oracle.reportBeginFrameRecord(); m_camera.mapKeysToWASD(); - // set initial push constants contents - rwmcPushConstants.base = 8.0f; - rwmcPushConstants.sampleCount = spp; - rwmcPushConstants.minReliableLuma = 1.0f; - rwmcPushConstants.kappa = 5.0f; - - pc.rwmcCascadeStart = 1.0; - pc.rwmcCascadeBase = 8.0f; + // set initial rwmc settings + + rwmcCascadeStart = hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], LightEminence); + rwmcCascadeBase = 8.0f; + rwmcMinReliableLuma = 1.0f; + rwmcKappa = 5.0f; return true; } @@ -1190,174 +1216,12 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, if (!keepRunning()) return; - // render whole scene to offline frame buffer & submit - { - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - // disregard surface/swapchain transformation for now - const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - viewProjectionMatrix.getInverseTransform(pc.invMVP); - pc.useRWMC = useRWMC ? 1 : 0; - pc.sampleCount = spp; - pc.depth = depth; - - rwmcPushConstants.sampleCount = spp; - - // safe to proceed - // upload buffer data - cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + if (useRWMC) + beginCommandBufferAndDispatchPathracerPipelineUseRWMC(cmdbuf); + else + beginCommandBufferAndDispatchPathracerPipeline(cmdbuf); - // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::GENERAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } - - // transit m_cascadeView layout to GENERAL, block until previous shader is done with reading from cascade - if (useRWMC) - { - const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::NONE - } - }, - .image = m_cascadeView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = CascadeSize - }, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::GENERAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); - } - - // cube envmap handle - { - IGPUComputePipeline* pipeline; - if (usePersistentWorkGroups) - pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPersistentWGPipelines[PTPipeline].get() : m_PTGLSLPersistentWGPipelines[PTPipeline].get(); - else - pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get() : m_PTGLSLPipelines[PTPipeline].get(); - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PTPushConstant), &pc); - if (usePersistentWorkGroups) - { - uint32_t dispatchSize = m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize); - cmdbuf->dispatch(dispatchSize, 1u, 1u); - } - else - cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); - } - - // m_cascadeView synchronization - wait for previous compute shader to write into the cascade - // TODO: create this and every other barrier once outside of the loop? - { - const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = m_cascadeView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = CascadeSize - } - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); - } - - // reweighting - if(useRWMC) - { - IGPUComputePipeline* pipeline; - if (usePersistentWorkGroups) - pipeline = nullptr; - else - pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_reweightingPipeline.get() : nullptr; - - if (!pipeline) - { - m_logger->log("Reweighting pipeline is not valid", ILogger::ELL_ERROR); - std::exit(-1); - } - - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(RWMCPushConstants), &rwmcPushConstants); - cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); - } - - // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } - - // TODO: tone mapping and stuff - } + // TODO: tone mapping and stuff asset::SViewport viewport; { @@ -1549,6 +1413,254 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, m_ui.manager->update(params); } + + private: + void beginCommandBufferAndDispatchPathracerPipeline(IGPUCommandBuffer* cmdbuf) + { + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + // disregard surface/swapchain transformation for now + const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + viewProjectionMatrix.getInverseTransform(pc.invMVP); + pc.sampleCount = spp; + pc.depth = depth; + + // safe to proceed + // upload buffer data + cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // cube envmap handle + { + IGPUComputePipeline* pipeline; + if (usePersistentWorkGroups) + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPersistentWGPipelines[PTPipeline].get() : m_PTGLSLPersistentWGPipelines[PTPipeline].get(); + else + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get() : m_PTGLSLPipelines[PTPipeline].get(); + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(RenderPushConstants), &pc); + if (usePersistentWorkGroups) + { + uint32_t dispatchSize = m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize); + cmdbuf->dispatch(dispatchSize, 1u, 1u); + } + else + cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); + } + + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + + } + + void beginCommandBufferAndDispatchPathracerPipelineUseRWMC(IGPUCommandBuffer* cmdbuf) + { + if (renderMode != E_RENDER_MODE::ERM_HLSL) + { + m_logger->log("Only HLSL render mode is supported.", ILogger::ELL_ERROR); + std::exit(-1); + } + + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + // disregard surface/swapchain transformation for now + const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + viewProjectionMatrix.getInverseTransform(rwmcPushConstants.invMVP); + + rwmcPushConstants.start = rwmcCascadeStart; + rwmcPushConstants.depth = depth; + rwmcPushConstants.sampleCount = resolvePushConstants.sampleCount = spp; + rwmcPushConstants.base = resolvePushConstants.base = rwmcCascadeBase; + rwmcPushConstants.minReliableLuma = resolvePushConstants.minReliableLuma = rwmcMinReliableLuma; + rwmcPushConstants.kappa = resolvePushConstants.kappa = rwmcKappa; + + // safe to proceed + // upload buffer data + cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // transit m_cascadeView layout to GENERAL, block until previous shader is done with reading from cascade + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::NONE + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = CascadeSize + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + + { + auto pipeline = m_PTHLSLPipelinesRWMC[PTPipeline].get(); + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 1u, 1u, &m_descriptorSet1.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(RenderRWMCPushConstants), &rwmcPushConstants); + + // TODO: persistend work groups + + cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); + } + + // m_cascadeView synchronization - wait for previous compute shader to write into the cascade + // TODO: create this and every other barrier once outside of the loop? + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = CascadeSize + } + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + + // reweighting + { + cmdbuf->bindComputePipeline(m_reweightingPipeline.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_reweightingPipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_reweightingPipeline->getLayout(), 1u, 1u, &m_descriptorSet1.get()); + cmdbuf->pushConstants(m_reweightingPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); + cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); + } + + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + } private: smart_refctd_ptr m_window; @@ -1560,12 +1672,13 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelines; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPersistentWGPipelines; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelinesRWMC; smart_refctd_ptr m_reweightingPipeline; smart_refctd_ptr m_presentPipeline; uint64_t m_realFrameIx = 0; std::array, MaxFramesInFlight> m_cmdBufs; ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; - smart_refctd_ptr m_descriptorSet0, m_descriptorSet2, m_presentDescriptorSet; + smart_refctd_ptr m_descriptorSet0, m_descriptorSet1, m_descriptorSet2, m_presentDescriptorSet; core::smart_refctd_ptr m_guiDescriptorSetPool; @@ -1613,10 +1726,15 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, int renderMode = E_RENDER_MODE::ERM_HLSL; int spp = 32; int depth = 3; + float rwmcCascadeStart; + float rwmcCascadeBase; + float rwmcMinReliableLuma; + float rwmcKappa; bool usePersistentWorkGroups = false; bool useRWMC = false; - RWMCPushConstants rwmcPushConstants; - PTPushConstant pc; + RenderRWMCPushConstants rwmcPushConstants; + RenderPushConstants pc; + ResolvePushConstants resolvePushConstants; bool m_firstFrame = true; IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; From 6168e14475e8296c69c4c2010deb2efc24f61e08 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 16 Oct 2025 21:02:54 +0200 Subject: [PATCH 07/10] Fixed cascade settings initialization --- .../app_resources/hlsl/RWMCCascadeAccumulator.hlsl | 9 +++++---- 31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl | 9 ++++++--- .../app_resources/hlsl/render.comp.hlsl | 12 ++++++++---- .../app_resources/hlsl/render_rwmc_common.hlsl | 1 - 31_HLSLPathTracer/main.cpp | 2 +- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl b/31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl index 8b7e0ec40..bc0e77a45 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl @@ -28,11 +28,12 @@ template struct RWMCCascadeAccumulator { using output_storage_type = CascadeEntry; + using initialization_data = RWMCCascadeSettings; output_storage_type accumulation; uint32_t cascadeSampleCounter[CascadeSize]; RWMCCascadeSettings cascadeSettings; - void initialize() + void initialize(in RWMCCascadeSettings settings) { for (int i = 0; i < CascadeSize; ++i) { @@ -41,9 +42,9 @@ struct RWMCCascadeAccumulator } // TODO: pass these values to this function - cascadeSettings.size = 6; - cascadeSettings.start = 1.0f; - cascadeSettings.base = 1.0f; + cascadeSettings.size = settings.size; + cascadeSettings.start = settings.start; + cascadeSettings.base = settings.base; } typename vector_traits::scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index 1134c98e9..175d1e8bf 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -45,10 +45,13 @@ struct PathTracerCreationParams template struct DefaultAccumulator { + struct DefaultAccumulatorInitializationSettings {}; + using output_storage_type = OutputTypeVec; + using initialization_data = DefaultAccumulatorInitializationSettings; output_storage_type accumulation; - void initialize() + void initialize(in initialization_data initializationData) { accumulation = (output_storage_type)0.0f; } @@ -288,10 +291,10 @@ struct Unidirectional } // Li - output_storage_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) + output_storage_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene, NBL_REF_ARG(typename Accumulator::initialization_data) accumulatorInitData) { Accumulator accumulator; - accumulator.initialize(); + accumulator.initialize(accumulatorInitData); //scalar_type meanLumaSq = 0.0; for (uint32_t i = 0; i < numSamples; i++) { diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl index 7f40b4b77..a9939ce52 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -124,8 +124,7 @@ using material_system_type = ext::MaterialSystem::System; #ifdef RWMC_ENABLED -// TODO: get cascade size from a shared include file -using accumulator_type = rwmc::RWMCCascadeAccumulator; +using accumulator_type = rwmc::RWMCCascadeAccumulator; #else using accumulator_type = ext::PathTracer::DefaultAccumulator; #endif @@ -252,14 +251,19 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams); #ifdef RWMC_ENABLED - accumulator_type::output_storage_type cascadeEntry = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); + accumulator_type::initialization_data accumulatorInitData; + accumulatorInitData.size = CascadeSize; + accumulatorInitData.start = pc.start; + accumulatorInitData.base = pc.base; + accumulator_type::output_storage_type cascadeEntry = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene, accumulatorInitData); for (uint32_t i = 0; i < CascadeSize; ++i) { float32_t4 cascadeLayerEntry = float32_t4(cascadeEntry.data[i], 1.0f); cascade[uint3(coords.x, coords.y, i)] = cascadeLayerEntry; } #else - float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); + accumulator_type::initialization_data accumulatorInitData; + float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene, accumulatorInitData); outImage[coords] = float32_t4(color, 1.0); #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl index a4994c80c..7a0674869 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl @@ -17,7 +17,6 @@ struct RenderRWMCPushConstants int depth; float start; float base; - float minReliableLuma; float kappa; }; diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index 1cfffcb73..b8cc24a6a 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -1523,7 +1523,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, rwmcPushConstants.depth = depth; rwmcPushConstants.sampleCount = resolvePushConstants.sampleCount = spp; rwmcPushConstants.base = resolvePushConstants.base = rwmcCascadeBase; - rwmcPushConstants.minReliableLuma = resolvePushConstants.minReliableLuma = rwmcMinReliableLuma; + resolvePushConstants.minReliableLuma = rwmcMinReliableLuma; rwmcPushConstants.kappa = resolvePushConstants.kappa = rwmcKappa; // safe to proceed From 8ecc60ff8af7b68564d769e04ec06d2e87cbf8e2 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 18 Oct 2025 19:11:06 +0200 Subject: [PATCH 08/10] Refactor --- .../hlsl/RWMCCascadeAccumulator.hlsl | 101 ---------- .../app_resources/hlsl/pathtracer.hlsl | 4 +- .../app_resources/hlsl/render.comp.hlsl | 4 +- .../app_resources/hlsl/resolve.comp.hlsl | 179 ++---------------- 31_HLSLPathTracer/main.cpp | 157 ++++++++------- 5 files changed, 101 insertions(+), 344 deletions(-) delete mode 100644 31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl diff --git a/31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl b/31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl deleted file mode 100644 index bc0e77a45..000000000 --- a/31_HLSLPathTracer/app_resources/hlsl/RWMCCascadeAccumulator.hlsl +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef _NBL_HLSL_RWMC_INCLUDED_ -#define _NBL_HLSL_RWMC_INCLUDED_ -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include -#include - -namespace nbl -{ -namespace hlsl -{ -namespace rwmc -{ - -struct RWMCCascadeSettings -{ - uint32_t size; - uint32_t start; - uint32_t base; -}; - -template -struct CascadeEntry -{ - CascadeLayerType data[CascadeSize]; -}; - -template -struct RWMCCascadeAccumulator -{ - using output_storage_type = CascadeEntry; - using initialization_data = RWMCCascadeSettings; - output_storage_type accumulation; - uint32_t cascadeSampleCounter[CascadeSize]; - RWMCCascadeSettings cascadeSettings; - - void initialize(in RWMCCascadeSettings settings) - { - for (int i = 0; i < CascadeSize; ++i) - { - accumulation.data[i] = (CascadeLayerType)0.0f; - cascadeSampleCounter[i] = 0u; - } - - // TODO: pass these values to this function - cascadeSettings.size = settings.size; - cascadeSettings.start = settings.start; - cascadeSettings.base = settings.base; - } - - typename vector_traits::scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) - { - return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); - } - - // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp - void addSample(uint32_t sampleIndex, float32_t3 sample) - { - float lowerScale = cascadeSettings.start; - float upperScale = lowerScale * cascadeSettings.base; - - const float luma = getLuma(sample); - - uint32_t lowerCascadeIndex = 0u; - while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2) - { - lowerScale = upperScale; - upperScale *= cascadeSettings.base; - ++lowerCascadeIndex; - } - - float lowerCascadeLevelWeight; - float higherCascadeLevelWeight; - - if (luma <= lowerScale) - lowerCascadeLevelWeight = 1.0f; - else if (luma < upperScale) - lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale)); - else // Inf, NaN ... - lowerCascadeLevelWeight = 0.0f; - - if (luma < upperScale) - higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight); - else - higherCascadeLevelWeight = upperScale / luma; - - uint32_t higherCascadeIndex = lowerCascadeIndex + 1u; - - const uint32_t sampleCount = sampleIndex + 1u; - const float reciprocalSampleCount = 1.0f / float(sampleCount); - accumulation.data[lowerCascadeIndex] += (sample * lowerCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[lowerCascadeIndex])) * accumulation.data[lowerCascadeIndex]) * reciprocalSampleCount; - accumulation.data[higherCascadeIndex] += (sample * higherCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[higherCascadeIndex])) * accumulation.data[higherCascadeIndex]) * reciprocalSampleCount; - cascadeSampleCounter[lowerCascadeIndex] = sampleCount; - cascadeSampleCounter[higherCascadeIndex] = sampleCount; - } -}; - -} -} -} - -#endif \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index 175d1e8bf..9fe4ff5e7 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -6,6 +6,7 @@ #include #include #include +#include #include "rand_gen.hlsl" #include "ray_gen.hlsl" @@ -41,8 +42,7 @@ struct PathTracerCreationParams BxDFCreation dielectricParams; }; -// TODO: maybe implement a concept to ensure that OutputTypeVec is a vector? -template +template) struct DefaultAccumulator { struct DefaultAccumulatorInitializationSettings {}; diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl index a9939ce52..c17c18dfd 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -39,7 +39,7 @@ #include "rwmc_global_settings_common.hlsl" #ifdef RWMC_ENABLED -#include "RWMCCascadeAccumulator.hlsl" +#include #include "render_rwmc_common.hlsl" #endif @@ -124,7 +124,7 @@ using material_system_type = ext::MaterialSystem::System; #ifdef RWMC_ENABLED -using accumulator_type = rwmc::RWMCCascadeAccumulator; +using accumulator_type = rwmc::CascadeAccumulator; #else using accumulator_type = ext::PathTracer::DefaultAccumulator; #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl index b66edeb87..5e12a1ddc 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl @@ -1,7 +1,9 @@ -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include +#include #include "resolve_common.hlsl" #include "rwmc_global_settings_common.hlsl" +#ifdef PERSISTENT_WORKGROUPS +#include "nbl/builtin/hlsl/math/morton.hlsl" +#endif [[vk::push_constant]] ResolvePushConstants pc; [[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D outImage; @@ -14,144 +16,6 @@ NBL_CONSTEXPR uint32_t WorkgroupSize = 512; NBL_CONSTEXPR uint32_t MAX_DEPTH_LOG2 = 4; NBL_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 10; -struct RWMCReweightingParameters -{ - uint32_t lastCascadeIndex; - float initialEmin; // a minimum image brightness that we always consider reliable - float reciprocalBase; - float reciprocalN; - float reciprocalKappa; - float colorReliabilityFactor; - float NOverKappa; -}; - -RWMCReweightingParameters computeReweightingParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa) -{ - RWMCReweightingParameters retval; - retval.lastCascadeIndex = CascadeSize - 1u; - retval.initialEmin = minReliableLuma; - retval.reciprocalBase = 1.f / base; - const float N = float(sampleCount); - retval.reciprocalN = 1.f / N; - retval.reciprocalKappa = 1.f / kappa; - // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have - // allow up to ~ more energy in one sample to lessen bias in some cases - retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; - retval.NOverKappa = N * retval.reciprocalKappa; - - return retval; -} - -struct RWMCCascadeSample -{ - float32_t3 centerValue; - float normalizedCenterLuma; - float normalizedNeighbourhoodAverageLuma; -}; - -// TODO: figure out what values should pixels outside have, 0.0f is incorrect -float32_t3 RWMCsampleCascadeTexel(int32_t2 currentCoord, int32_t2 offset, uint32_t cascadeIndex) -{ - const int32_t2 texelCoord = currentCoord + offset; - if (any(texelCoord < int32_t2(0, 0))) - return float32_t3(0.0f, 0.0f, 0.0f); - - float32_t4 output = cascade.Load(int32_t3(texelCoord, int32_t(cascadeIndex))); - return float32_t3(output.r, output.g, output.b); -} - -float32_t calcLuma(in float32_t3 col) -{ - return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); -} - -RWMCCascadeSample RWMCSampleCascade(in int32_t2 coord, in uint cascadeIndex, in float reciprocalBaseI) -{ - float32_t3 neighbourhood[9]; - neighbourhood[0] = RWMCsampleCascadeTexel(coord, int32_t2(-1, -1), cascadeIndex); - neighbourhood[1] = RWMCsampleCascadeTexel(coord, int32_t2(0, -1), cascadeIndex); - neighbourhood[2] = RWMCsampleCascadeTexel(coord, int32_t2(1, -1), cascadeIndex); - neighbourhood[3] = RWMCsampleCascadeTexel(coord, int32_t2(-1, 0), cascadeIndex); - neighbourhood[4] = RWMCsampleCascadeTexel(coord, int32_t2(0, 0), cascadeIndex); - neighbourhood[5] = RWMCsampleCascadeTexel(coord, int32_t2(1, 0), cascadeIndex); - neighbourhood[6] = RWMCsampleCascadeTexel(coord, int32_t2(-1, 1), cascadeIndex); - neighbourhood[7] = RWMCsampleCascadeTexel(coord, int32_t2(0, 1), cascadeIndex); - neighbourhood[8] = RWMCsampleCascadeTexel(coord, int32_t2(1, 1), cascadeIndex); - - // numerical robustness - float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + - ((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8])); - - RWMCCascadeSample retval; - retval.centerValue = neighbourhood[4]; - retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = calcLuma(neighbourhood[4]) * reciprocalBaseI; - retval.normalizedNeighbourhoodAverageLuma = (calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f; - return retval; -} - -float32_t3 RWMCReweight(in RWMCReweightingParameters params, in int32_t2 coord) -{ - float reciprocalBaseI = 1.f; - RWMCCascadeSample curr = RWMCSampleCascade(coord, 0u, reciprocalBaseI); - - float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); - float Emin = params.initialEmin; - - float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; - for (uint i = 0u; i <= params.lastCascadeIndex; i++) - { - const bool notFirstCascade = i != 0u; - const bool notLastCascade = i != params.lastCascadeIndex; - - RWMCCascadeSample next; - if (notLastCascade) - { - reciprocalBaseI *= params.reciprocalBase; - next = RWMCSampleCascade(coord, i + 1u, reciprocalBaseI); - } - - float reliability = 1.f; - // sample counting-based reliability estimation - if (params.reciprocalKappa <= 1.f) - { - float localReliability = curr.normalizedCenterLuma; - // reliability in 3x3 pixel block (see robustness) - float globalReliability = curr.normalizedNeighbourhoodAverageLuma; - if (notFirstCascade) - { - localReliability += prevNormalizedCenterLuma; - globalReliability += prevNormalizedNeighbourhoodAverageLuma; - } - if (notLastCascade) - { - localReliability += next.normalizedCenterLuma; - globalReliability += next.normalizedNeighbourhoodAverageLuma; - } - // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) - reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; - { - const float accumLuma = calcLuma(accumulation); - if (accumLuma > Emin) - Emin = accumLuma; - - const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; - - reliability += colorReliability; - reliability *= params.NOverKappa; - reliability -= params.reciprocalKappa; - reliability = clamp(reliability * 0.5f, 0.f, 1.f); - } - } - accumulation += curr.centerValue * reliability; - - prevNormalizedCenterLuma = curr.normalizedCenterLuma; - prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma; - curr = next; - } - - return accumulation; -} - int32_t2 getCoordinates() { uint32_t width, height; @@ -159,29 +23,26 @@ int32_t2 getCoordinates() return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); } -// this function is for testing purpose -// simply adds every cascade buffer, output shoud be nearly the same as output of default accumulator (RWMC off) -float32_t3 sumCascade(in const int32_t2 coords) -{ - float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); - - for (int i = 0; i < 6; ++i) - { - float32_t4 cascadeLevel = cascade.Load(uint3(coords, i)); - accumulation += float32_t3(cascadeLevel.r, cascadeLevel.g, cascadeLevel.b); - } - - return accumulation; -} - [numthreads(WorkgroupSize, 1, 1)] void main(uint32_t3 threadID : SV_DispatchThreadID) { +#ifdef PERSISTENT_WORKGROUPS + uint32_t virtualThreadIndex; + [loop] + for (uint32_t virtualThreadBase = glsl::gl_WorkGroupID().x * WorkgroupSize; virtualThreadBase < 1920 * 1080; virtualThreadBase += glsl::gl_NumWorkGroups().x * WorkgroupSize) // not sure why 1280*720 doesn't cover draw surface + { + virtualThreadIndex = virtualThreadBase + glsl::gl_LocalInvocationIndex().x; + const int32_t2 coords = (int32_t2)math::Morton::decode2d(virtualThreadIndex); +#else const int32_t2 coords = getCoordinates(); - //float32_t3 color = sumCascade(coords); +#endif - RWMCReweightingParameters reweightingParameters = computeReweightingParameters(pc.base, pc.sampleCount, pc.minReliableLuma, pc.kappa); - float32_t3 color = RWMCReweight(reweightingParameters, coords); + rwmc::ReweightingParameters reweightingParameters = rwmc::computeReweightingParameters(pc.base, pc.sampleCount, pc.minReliableLuma, pc.kappa, CascadeSize); + float32_t3 color = rwmc::reweight(reweightingParameters, cascade, coords); - outImage[coords] = float32_t4(color, 1.0f); + outImage[coords] = float32_t4(color, 1.0f); + +#ifdef PERSISTENT_WORKGROUPS + } +#endif } diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index b8cc24a6a..895b74db1 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -59,7 +59,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, static inline std::array PTGLSLShaderPaths = { "app_resources/glsl/litBySphere.comp", "app_resources/glsl/litByTriangle.comp", "app_resources/glsl/litByRectangle.comp" }; static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; static inline std::array PTHLSLShaderVariants = { "SPHERE_LIGHT", "TRIANGLE_LIGHT", "RECTANGLE_LIGHT" }; - static inline std::string ReweightingShaderPath = "app_resources/hlsl/resolve.comp.hlsl"; + static inline std::string ResolveShaderPath = "app_resources/hlsl/resolve.comp.hlsl"; static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { @@ -441,6 +441,19 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return shader; }; + auto getComputePipelineCreationParams = [](IGPUShader* shader, IGPUPipelineLayout* pipelineLayout) -> IGPUComputePipeline::SCreationParams + { + IGPUComputePipeline::SCreationParams params = {}; + params.layout = pipelineLayout; + params.shader.shader = shader; + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.shader.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + + return params; + }; + // Create compute pipelines { for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) @@ -475,80 +488,56 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, if (!rwmcPtPipelineLayout) return logFail("Failed to create RWMC Pathtracing pipeline layout"); - { auto ptShader = loadAndCompileGLSLShader(PTGLSLShaderPaths[index]); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTGLSLPipelines.data() + index)) return logFail("Failed to create GLSL compute pipeline!\n"); } { auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index]); - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelines.data() + index)) return logFail("Failed to create HLSL compute pipeline!\n"); } - // rwmc pipelines - { - auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], false, true); - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = rwmcPtPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelinesRWMC.data() + index)) - return logFail("Failed to create HLSL RWMC compute pipeline!\n"); - } - // persistent wg pipelines { auto ptShader = loadAndCompileGLSLShader(PTGLSLShaderPaths[index], true); - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTGLSLPersistentWGPipelines.data() + index)) return logFail("Failed to create GLSL PersistentWG compute pipeline!\n"); } { auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true); - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelines.data() + index)) return logFail("Failed to create HLSL PersistentWG compute pipeline!\n"); } + + // rwmc pipelines + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], false, true); + auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelinesRWMC.data() + index)) + return logFail("Failed to create HLSL RWMC compute pipeline!\n"); + } + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true, true); + auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelinesRWMC.data() + index)) + return logFail("Failed to create HLSL RWMC PersistentWG compute pipeline!\n"); + } } } - // Create reweighting pipeline + // Create resolve pipelines { const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, @@ -563,23 +552,23 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, ); if (!pipelineLayout) { - return logFail("Failed to create reweighting pipeline layout"); + return logFail("Failed to create resolve pipeline layout"); } { - auto shader = loadAndCompileHLSLShader(ReweightingShaderPath); - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = pipelineLayout.get(); - params.shader.shader = shader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_reweightingPipeline)) - return logFail("Failed to create HLSL reweighting compute pipeline!\n"); + auto shader = loadAndCompileHLSLShader(ResolveShaderPath); + auto params = getComputePipelineCreationParams(shader.get(), pipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_resolvePipeline)) + return logFail("Failed to create HLSL resolve compute pipeline!\n"); } + { + auto shader = loadAndCompileHLSLShader(ResolveShaderPath, "", true); + auto params = getComputePipelineCreationParams(shader.get(), pipelineLayout.get()); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_resolvePersistentWGPipeline)) + return logFail("Failed to create HLSL resolve compute pipeline!\n"); + } } @@ -1205,7 +1194,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, } const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - m_api->startCapture(); + //m_api->startCapture(); // CPU events update(); @@ -1315,7 +1304,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, m_window->setCaption("[Nabla Engine] HLSL Compute Path Tracer"); m_surface->present(m_currentImageAcquire.imageIndex, rendered); } - m_api->endCapture(); + //m_api->endCapture(); } inline bool keepRunning() override @@ -1467,13 +1456,13 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(RenderPushConstants), &pc); - if (usePersistentWorkGroups) - { - uint32_t dispatchSize = m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize); - cmdbuf->dispatch(dispatchSize, 1u, 1u); - } - else - cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); + + // TODO: shouldn't it be computed only at initialization stage and on window resize? + const uint32_t dispatchSize = usePersistentWorkGroups ? + m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize) : + 1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize; + + cmdbuf->dispatch(dispatchSize, 1u, 1u); } // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) @@ -1585,8 +1574,13 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); } + // TODO: shouldn't it be computed only at initialization stage and on window resize? + const uint32_t dispatchSize = usePersistentWorkGroups ? + m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize) : + 1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize; + { - auto pipeline = m_PTHLSLPipelinesRWMC[PTPipeline].get(); + IGPUComputePipeline* pipeline = usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelinesRWMC[PTPipeline].get() : m_PTHLSLPipelinesRWMC[PTPipeline].get(); cmdbuf->bindComputePipeline(pipeline); cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); @@ -1594,9 +1588,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(RenderRWMCPushConstants), &rwmcPushConstants); - // TODO: persistend work groups - - cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); + cmdbuf->dispatch(dispatchSize, 1u, 1u); } // m_cascadeView synchronization - wait for previous compute shader to write into the cascade @@ -1627,11 +1619,14 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, // reweighting { - cmdbuf->bindComputePipeline(m_reweightingPipeline.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_reweightingPipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_reweightingPipeline->getLayout(), 1u, 1u, &m_descriptorSet1.get()); - cmdbuf->pushConstants(m_reweightingPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); - cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); + IGPUComputePipeline* pipeline = usePersistentWorkGroups ? m_resolvePersistentWGPipeline.get() : m_resolvePipeline.get(); + + cmdbuf->bindComputePipeline(m_resolvePipeline.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_resolvePipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_resolvePipeline->getLayout(), 1u, 1u, &m_descriptorSet1.get()); + cmdbuf->pushConstants(m_resolvePipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); + + cmdbuf->dispatch(dispatchSize, 1u, 1u); } // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) @@ -1673,7 +1668,9 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPersistentWGPipelines; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelinesRWMC; - smart_refctd_ptr m_reweightingPipeline; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelinesRWMC; + smart_refctd_ptr m_resolvePipeline; + smart_refctd_ptr m_resolvePersistentWGPipeline; smart_refctd_ptr m_presentPipeline; uint64_t m_realFrameIx = 0; std::array, MaxFramesInFlight> m_cmdBufs; From bbc8ab80fecf44abb9b03f4fa147918fee7c310f Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 20 Oct 2025 14:16:41 +0200 Subject: [PATCH 09/10] Fixed rwmc persistent workgroups --- 31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl | 2 +- 31_HLSLPathTracer/main.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl index 5e12a1ddc..e4aa95923 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl @@ -29,7 +29,7 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) #ifdef PERSISTENT_WORKGROUPS uint32_t virtualThreadIndex; [loop] - for (uint32_t virtualThreadBase = glsl::gl_WorkGroupID().x * WorkgroupSize; virtualThreadBase < 1920 * 1080; virtualThreadBase += glsl::gl_NumWorkGroups().x * WorkgroupSize) // not sure why 1280*720 doesn't cover draw surface + for (uint32_t virtualThreadBase = glsl::gl_WorkGroupID().x * WorkgroupSize; virtualThreadBase < 1920 * 1080; virtualThreadBase += glsl::gl_NumWorkGroups().x * WorkgroupSize) { virtualThreadIndex = virtualThreadBase + glsl::gl_LocalInvocationIndex().x; const int32_t2 coords = (int32_t2)math::Morton::decode2d(virtualThreadIndex); diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index 895b74db1..cf9e58764 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -1621,10 +1621,10 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, { IGPUComputePipeline* pipeline = usePersistentWorkGroups ? m_resolvePersistentWGPipeline.get() : m_resolvePipeline.get(); - cmdbuf->bindComputePipeline(m_resolvePipeline.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_resolvePipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_resolvePipeline->getLayout(), 1u, 1u, &m_descriptorSet1.get()); - cmdbuf->pushConstants(m_resolvePipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 1u, 1u, &m_descriptorSet1.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); cmdbuf->dispatch(dispatchSize, 1u, 1u); } From 3ffe973e0d098bb67028c3d0c26fcc25b1ff3d7d Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 10 Nov 2025 20:49:44 +0100 Subject: [PATCH 10/10] Refactor --- .../app_resources/hlsl/pathtracer.hlsl | 63 +-- .../app_resources/hlsl/present.frag.hlsl | 4 +- .../app_resources/hlsl/render.comp.hlsl | 72 +-- .../app_resources/hlsl/render_common.hlsl | 3 +- .../hlsl/render_rwmc_common.hlsl | 14 +- .../app_resources/hlsl/resolve.comp.hlsl | 43 +- .../app_resources/hlsl/resolve_common.hlsl | 8 +- .../hlsl/rwmc_global_settings_common.hlsl | 2 +- 31_HLSLPathTracer/main.cpp | 501 ++++++++---------- 9 files changed, 318 insertions(+), 392 deletions(-) diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index 9fe4ff5e7..5b55dcf4c 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -45,21 +45,22 @@ struct PathTracerCreationParams template) struct DefaultAccumulator { - struct DefaultAccumulatorInitializationSettings {}; - using output_storage_type = OutputTypeVec; - using initialization_data = DefaultAccumulatorInitializationSettings; + using this_t = DefaultAccumulator; output_storage_type accumulation; - void initialize(in initialization_data initializationData) + static this_t create() { - accumulation = (output_storage_type)0.0f; + this_t retval; + retval.accumulation = promote(0.0f); + + return retval; } - void addSample(uint32_t sampleIndex, float32_t3 sample) + void addSample(uint32_t sampleCount, float32_t3 sample) { using ScalarType = typename vector_traits::scalar_type; - ScalarType rcpSampleSize = 1.0 / (sampleIndex + 1); + ScalarType rcpSampleSize = 1.0 / (sampleCount); accumulation += (sample - accumulation) * rcpSampleSize; } }; @@ -77,7 +78,7 @@ struct Unidirectional using scalar_type = typename MaterialSystem::scalar_type; using vector3_type = vector; using measure_type = typename MaterialSystem::measure_type; - using output_storage_type = typename Accumulator::output_storage_type; + using output_storage_type = typename Accumulator::output_storage_type; // ? using sample_type = typename NextEventEstimator::sample_type; using ray_dir_info_type = typename sample_type::ray_dir_info_type; using ray_type = typename RayGen::ray_type; @@ -291,39 +292,33 @@ struct Unidirectional } // Li - output_storage_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene, NBL_REF_ARG(typename Accumulator::initialization_data) accumulatorInitData) + void sampleMeasure(uint32_t sampleIndex, uint32_t maxDepth, NBL_CONST_REF_ARG(scene_type) scene, NBL_REF_ARG(Accumulator) accumulator) { - Accumulator accumulator; - accumulator.initialize(accumulatorInitData); //scalar_type meanLumaSq = 0.0; - for (uint32_t i = 0; i < numSamples; i++) - { - vector3_type uvw = rand3d(0u, i, randGen.rng()); // TODO: take from scramblebuf? - ray_type ray = rayGen.generate(uvw); + vector3_type uvw = rand3d(0u, sampleIndex, randGen.rng()); // TODO: take from scramblebuf? + ray_type ray = rayGen.generate(uvw); - // bounces - bool hit = true; - bool rayAlive = true; - for (int d = 1; (d <= depth) && hit && rayAlive; d += 2) - { - ray.intersectionT = numeric_limits::max; - ray.objectID = intersector_type::traceRay(ray, scene); - - hit = ray.objectID.id != -1; - if (hit) - rayAlive = closestHitProgram(1, i, ray, scene); - } - if (!hit) - missProgram(ray); + // bounces + bool hit = true; + bool rayAlive = true; + for (int d = 1; (d <= maxDepth) && hit && rayAlive; d += 2) + { + ray.intersectionT = numeric_limits::max; + ray.objectID = intersector_type::traceRay(ray, scene); - accumulator.addSample(i, ray.payload.accumulation); + hit = ray.objectID.id != -1; + if (hit) + rayAlive = closestHitProgram(1, sampleIndex, ray, scene); + } + if (!hit) + missProgram(ray); - // TODO: visualize high variance + const uint32_t sampleCount = sampleIndex + 1; + accumulator.addSample(sampleCount, ray.payload.accumulation); - // TODO: russian roulette early exit? - } + // TODO: visualize high variance - return accumulator.accumulation; + // TODO: russian roulette early exit? } NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u; diff --git a/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl index 22695657c..d556a7162 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl @@ -10,10 +10,10 @@ using namespace nbl::hlsl; using namespace ext::FullScreenTriangle; // binding 0 set 0 -[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2DArray texture; [[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; [[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 { - return float32_t4(texture.Sample(samplerState, vxAttr.uv).rgb, 1.0f); + return float32_t4(texture.Sample(samplerState, float3(vxAttr.uv, 0)).rgb, 1.0f); } \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl index c17c18dfd..9c642a1a3 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -35,12 +35,12 @@ #define LIGHT_COUNT 1 #define BXDF_COUNT 7 -#include "render_common.hlsl" -#include "rwmc_global_settings_common.hlsl" +#include +#include #ifdef RWMC_ENABLED #include -#include "render_rwmc_common.hlsl" +#include #endif #ifdef RWMC_ENABLED @@ -57,10 +57,8 @@ [[vk::combinedImageSampler]] [[vk::binding(2, 2)]] Texture2D scramblebuf; // unused [[vk::combinedImageSampler]] [[vk::binding(2, 2)]] SamplerState scrambleSampler; -#ifdef RWMC_ENABLED -[[vk::image_format("rgba16f")]] [[vk::binding(0, 1)]] RWTexture2DArray cascade; -#endif -[[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D outImage; +[[vk::image_format("rgba16f")]] [[vk::binding(0)]] RWTexture2DArray outImage; +[[vk::image_format("rgba16f")]] [[vk::binding(1)]] RWTexture2DArray cascade; #include "pathtracer.hlsl" @@ -85,15 +83,15 @@ NBL_CONSTEXPR ext::PTPolygonMethod POLYGON_METHOD = ext::PPM_SOLID_ANGLE; int32_t2 getCoordinates() { - uint32_t width, height; - outImage.GetDimensions(width, height); + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); } float32_t2 getTexCoords() { - uint32_t width, height; - outImage.GetDimensions(width, height); + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); int32_t2 iCoords = getCoordinates(); return float32_t2(float(iCoords.x) / width, 1.0 - float(iCoords.y) / height); } @@ -124,7 +122,7 @@ using material_system_type = ext::MaterialSystem::System; #ifdef RWMC_ENABLED -using accumulator_type = rwmc::CascadeAccumulator; +using accumulator_type = rwmc::CascadeAccumulator; #else using accumulator_type = ext::PathTracer::DefaultAccumulator; #endif @@ -187,11 +185,22 @@ static const ext::Scene scene = ext::Scene> MAX_DEPTH_LOG2) > 0 || ((pc.sampleCount - 1) >> MAX_SAMPLES_LOG2) > 0) + if (((renderPushConstants.depth - 1) >> MAX_DEPTH_LOG2) > 0 || ((renderPushConstants.sampleCount - 1) >> MAX_SAMPLES_LOG2) > 0) { float32_t4 pixelCol = float32_t4(1.0,0.0,0.0,1.0); - outImage[coords] = pixelCol; + outImage[uint3(coords.x, coords.y, 0)] = pixelCol; #ifdef PERSISTENT_WORKGROUPS continue; #else @@ -236,13 +245,13 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) float4 NDC = float4(texCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); { - float4 tmp = mul(pc.invMVP, NDC); + float4 tmp = mul(renderPushConstants.invMVP, NDC); ptCreateParams.camPos = tmp.xyz / tmp.w; NDC.z = 1.0; } ptCreateParams.NDC = NDC; - ptCreateParams.invMVP = pc.invMVP; + ptCreateParams.invMVP = renderPushConstants.invMVP; ptCreateParams.diffuseParams = bxdfs[0].params; ptCreateParams.conductorParams = bxdfs[3].params; @@ -251,20 +260,19 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams); #ifdef RWMC_ENABLED - accumulator_type::initialization_data accumulatorInitData; - accumulatorInitData.size = CascadeSize; - accumulatorInitData.start = pc.start; - accumulatorInitData.base = pc.base; - accumulator_type::output_storage_type cascadeEntry = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene, accumulatorInitData); - for (uint32_t i = 0; i < CascadeSize; ++i) - { - float32_t4 cascadeLayerEntry = float32_t4(cascadeEntry.data[i], 1.0f); - cascade[uint3(coords.x, coords.y, i)] = cascadeLayerEntry; - } + accumulator_type accumulator = accumulator_type::create(pc.splattingParameters); +#else + accumulator_type accumulator = accumulator_type::create(); +#endif + // path tracing loop + for(int i = 0; i < renderPushConstants.sampleCount; ++i) + pathtracer.sampleMeasure(i, renderPushConstants.depth, scene, accumulator); + +#ifdef RWMC_ENABLED + for (uint32_t i = 0; i < CascadeCount; ++i) + cascade[uint3(coords.x, coords.y, i)] = float32_t4(accumulator.accumulation.data[i], 1.0f); #else - accumulator_type::initialization_data accumulatorInitData; - float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene, accumulatorInitData); - outImage[coords] = float32_t4(color, 1.0); + outImage[uint3(coords.x, coords.y, 0)] = float32_t4(accumulator.accumulation, 1.0); #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl index 6c3f63ee4..3096e08ed 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -11,12 +11,13 @@ struct RenderPushConstants #ifdef __HLSL_VERSION float32_t4x4 invMVP; #else - nbl::core::matrix4SIMD invMVP; + nbl::hlsl::float32_t4x4 invMVP; #endif int sampleCount; int depth; }; NBL_CONSTEXPR nbl::hlsl::float32_t3 LightEminence = nbl::hlsl::float32_t3(30.0f, 25.0f, 15.0f); +NBL_CONSTEXPR uint32_t RenderWorkgroupSize = 512u; #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl index 7a0674869..9d0175c56 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl @@ -1,6 +1,8 @@ #ifndef _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_ #define _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl" +#include "render_common.hlsl" #ifndef __HLSL_VERSION #include "matrix4SIMD.h" @@ -8,16 +10,8 @@ struct RenderRWMCPushConstants { -#ifdef __HLSL_VERSION - float32_t4x4 invMVP; -#else - nbl::core::matrix4SIMD invMVP; -#endif - int sampleCount; - int depth; - float start; - float base; - float kappa; + RenderPushConstants renderPushConstants; + nbl::hlsl::rwmc::SplattingParameters splattingParameters; }; #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl index e4aa95923..e512b9110 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl @@ -1,4 +1,4 @@ -#include +#include #include "resolve_common.hlsl" #include "rwmc_global_settings_common.hlsl" #ifdef PERSISTENT_WORKGROUPS @@ -6,43 +6,28 @@ #endif [[vk::push_constant]] ResolvePushConstants pc; -[[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D outImage; -[[vk::image_format("rgba16f")]] [[vk::binding(0, 1)]] RWTexture2DArray cascade; +[[vk::image_format("rgba16f")]] [[vk::binding(0)]] RWTexture2DArray outImage; +[[vk::image_format("rgba16f")]] [[vk::binding(1)]] RWTexture2DArray cascade; using namespace nbl; using namespace hlsl; -NBL_CONSTEXPR uint32_t WorkgroupSize = 512; -NBL_CONSTEXPR uint32_t MAX_DEPTH_LOG2 = 4; -NBL_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 10; - -int32_t2 getCoordinates() +int32_t2 getImageExtents() { - uint32_t width, height; - outImage.GetDimensions(width, height); - return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); + return int32_t2(width, height); } -[numthreads(WorkgroupSize, 1, 1)] +[numthreads(ResolveWorkgroupSizeX, ResolveWorkgroupSizeY, 1)] void main(uint32_t3 threadID : SV_DispatchThreadID) { -#ifdef PERSISTENT_WORKGROUPS - uint32_t virtualThreadIndex; - [loop] - for (uint32_t virtualThreadBase = glsl::gl_WorkGroupID().x * WorkgroupSize; virtualThreadBase < 1920 * 1080; virtualThreadBase += glsl::gl_NumWorkGroups().x * WorkgroupSize) - { - virtualThreadIndex = virtualThreadBase + glsl::gl_LocalInvocationIndex().x; - const int32_t2 coords = (int32_t2)math::Morton::decode2d(virtualThreadIndex); -#else - const int32_t2 coords = getCoordinates(); -#endif + const int32_t2 coords = int32_t2(threadID.x, threadID.y); + const int32_t2 imageExtents = getImageExtents(); + if (coords.x >= imageExtents.x || coords.y >= imageExtents.y) + return; - rwmc::ReweightingParameters reweightingParameters = rwmc::computeReweightingParameters(pc.base, pc.sampleCount, pc.minReliableLuma, pc.kappa, CascadeSize); - float32_t3 color = rwmc::reweight(reweightingParameters, cascade, coords); + float32_t3 color = rwmc::reweight(pc.resolveParameters, cascade, coords); - outImage[coords] = float32_t4(color, 1.0f); - -#ifdef PERSISTENT_WORKGROUPS - } -#endif + outImage[uint3(coords.x, coords.y, 0)] = float32_t4(color, 1.0f); } diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl index 5937c42e2..a3ad72364 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl @@ -1,13 +1,15 @@ #ifndef _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_ #define _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl" struct ResolvePushConstants { uint32_t sampleCount; - float base; - float minReliableLuma; - float kappa; + nbl::hlsl::rwmc::ResolveParameters resolveParameters; }; +NBL_CONSTEXPR uint32_t ResolveWorkgroupSizeX = 32u; +NBL_CONSTEXPR uint32_t ResolveWorkgroupSizeY = 16u; + #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl index 6a5d1b9d4..8adf0a5e1 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl @@ -2,6 +2,6 @@ #define _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" -NBL_CONSTEXPR uint32_t CascadeSize = 6u; +NBL_CONSTEXPR uint32_t CascadeCount = 6u; #endif diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index cf9e58764..1780d67c3 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -256,8 +256,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return gpuDS; }; - std::array descriptorSet0Bindings = {}; - std::array descriptorSet1Bindings = {}; + std::array descriptorSet0Bindings = {}; std::array descriptorSet3Bindings = {}; std::array presentDescriptorSetBindings; @@ -270,8 +269,8 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .immutableSamplers = nullptr }; - descriptorSet1Bindings[0] = { - .binding = 0u, + descriptorSet0Bindings[1] = { + .binding = 1u, .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, @@ -314,20 +313,16 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, }; auto cpuDescriptorSetLayout0 = make_smart_refctd_ptr(descriptorSet0Bindings); - auto cpuDescriptorSetLayout1 = make_smart_refctd_ptr(descriptorSet1Bindings); auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet3Bindings); auto gpuDescriptorSetLayout0 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout0); - auto gpuDescriptorSetLayout1 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout1); auto gpuDescriptorSetLayout2 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout2); auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(presentDescriptorSetBindings); auto cpuDescriptorSet0 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout0)); - auto cpuDescriptorSet1 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout1)); auto cpuDescriptorSet2 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout2)); m_descriptorSet0 = convertDSCPU2GPU(cpuDescriptorSet0); - m_descriptorSet1 = convertDSCPU2GPU(cpuDescriptorSet1); m_descriptorSet2 = convertDSCPU2GPU(cpuDescriptorSet2); smart_refctd_ptr presentDSPool; @@ -481,7 +476,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, auto rwmcPtPipelineLayout = m_device->createPipelineLayout( { &rwmcPcRange, 1 }, core::smart_refctd_ptr(gpuDescriptorSetLayout0), - core::smart_refctd_ptr(gpuDescriptorSetLayout1), + nullptr, core::smart_refctd_ptr(gpuDescriptorSetLayout2), nullptr ); @@ -547,8 +542,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, auto pipelineLayout = m_device->createPipelineLayout( { &pcRange, 1 }, - core::smart_refctd_ptr(gpuDescriptorSetLayout0), - core::smart_refctd_ptr(gpuDescriptorSetLayout1) + core::smart_refctd_ptr(gpuDescriptorSetLayout0) ); if (!pipelineLayout) { @@ -562,14 +556,6 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_resolvePipeline)) return logFail("Failed to create HLSL resolve compute pipeline!\n"); } - { - auto shader = loadAndCompileHLSLShader(ResolveShaderPath, "", true); - auto params = getComputePipelineCreationParams(shader.get(), pipelineLayout.get()); - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_resolvePersistentWGPipeline)) - return logFail("Failed to create HLSL resolve compute pipeline!\n"); - } - } // Create graphics pipeline @@ -770,7 +756,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, } else { - imgInfo.arrayLayers = CascadeSize; + imgInfo.arrayLayers = CascadeCount; imgInfo.usage = asset::IImage::EUF_STORAGE_BIT; } @@ -781,7 +767,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return image; }; - auto createHDRIImageView = [this](smart_refctd_ptr img, const bool useCascadeCreationParameters = false) -> smart_refctd_ptr + auto createHDRIImageView = [this](smart_refctd_ptr img, const uint32_t imageArraySize = 1u, const IGPUImageView::E_TYPE imageViewType = IGPUImageView::ET_2D) -> smart_refctd_ptr { auto format = img->getCreationParameters().format; IGPUImageView::SCreationParams imgViewInfo; @@ -792,17 +778,9 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, imgViewInfo.subresourceRange.baseArrayLayer = 0u; imgViewInfo.subresourceRange.baseMipLevel = 0u; imgViewInfo.subresourceRange.levelCount = 1u; + imgViewInfo.viewType = imageViewType; - if (!useCascadeCreationParameters) - { - imgViewInfo.subresourceRange.layerCount = 1u; - imgViewInfo.viewType = IGPUImageView::ET_2D; - } - else - { - imgViewInfo.subresourceRange.layerCount = CascadeSize; - imgViewInfo.viewType = IGPUImageView::ET_2D_ARRAY; - } + imgViewInfo.subresourceRange.layerCount = imageArraySize; return m_device->createImageView(std::move(imgViewInfo)); }; @@ -820,12 +798,12 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); outImg->setObjectDebugName("Output Image"); - m_outImgView = createHDRIImageView(outImg); + m_outImgView = createHDRIImageView(outImg, 1, IGPUImageView::ET_2D_ARRAY); m_outImgView->setObjectDebugName("Output Image View"); auto cascade = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y, true); cascade->setObjectDebugName("Cascade"); - m_cascadeView = createHDRIImageView(cascade, true); + m_cascadeView = createHDRIImageView(cascade, CascadeCount, IGPUImageView::ET_2D_ARRAY); m_cascadeView->setObjectDebugName("Cascade View"); // TODO: change cascade layout to general @@ -985,8 +963,8 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .info = &writeDSInfos[0] }; writeDescriptorSets[1] = { - .dstSet = m_descriptorSet1.get(), - .binding = 0, + .dstSet = m_descriptorSet0.get(), + .binding = 1, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[1] @@ -1115,8 +1093,8 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, ImGui::Text("\nRWMC settings:"); ImGui::Checkbox("Enable RWMC", &useRWMC); - ImGui::SliderFloat("start", &rwmcCascadeStart, 1.0f, 32.0f); - ImGui::SliderFloat("base", &rwmcCascadeBase, 1.0f, 32.0f); + ImGui::SliderFloat("start", &rwmcPushConstants.splattingParameters.start, 1.0f, 32.0f); + ImGui::SliderFloat("base", &rwmcPushConstants.splattingParameters.base, 1.0f, 32.0f); ImGui::SliderFloat("minReliableLuma", &rwmcMinReliableLuma, 0.1f, 32.0f); ImGui::SliderFloat("kappa", &rwmcKappa, 0.1f, 1024.0f); @@ -1144,8 +1122,8 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, // set initial rwmc settings - rwmcCascadeStart = hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], LightEminence); - rwmcCascadeBase = 8.0f; + rwmcPushConstants.splattingParameters.start = hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], LightEminence); + rwmcPushConstants.splattingParameters.base = 8.0f; rwmcMinReliableLuma = 1.0f; rwmcKappa = 5.0f; @@ -1205,10 +1183,175 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, if (!keepRunning()) return; - if (useRWMC) - beginCommandBufferAndDispatchPathracerPipelineUseRWMC(cmdbuf); - else - beginCommandBufferAndDispatchPathracerPipeline(cmdbuf); + if (renderMode != E_RENDER_MODE::ERM_HLSL) + { + m_logger->log("Only HLSL render mode is supported.", ILogger::ELL_ERROR); + std::exit(-1); + } + + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + + // safe to proceed + // upload buffer data + cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + updatePathtracerPushConstants(); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // transit m_cascadeView layout to GENERAL, block until previous shader is done with reading from the cascade + if(useRWMC) + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::NONE + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = CascadeCount + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + + { + // TODO: shouldn't it be computed only at initialization stage and on window resize? + const uint32_t dispatchSize = usePersistentWorkGroups ? + m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize) : + 1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize; + + IGPUComputePipeline* pipeline = pickPTPipeline(); + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); + + const uint32_t pushConstantsSize = useRWMC ? sizeof(RenderRWMCPushConstants) : sizeof(RenderPushConstants); + const void* pushConstantsPtr = useRWMC ? reinterpret_cast(&rwmcPushConstants) : reinterpret_cast(&pc); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, pushConstantsSize, pushConstantsPtr); + + cmdbuf->dispatch(dispatchSize, 1u, 1u); + } + + // m_cascadeView synchronization - wait for previous compute shader to write into the cascade + // TODO: create this and every other barrier once outside of the loop? + if(useRWMC) + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = CascadeCount + } + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + + // resolve + if(useRWMC) + { + if (renderMode != E_RENDER_MODE::ERM_HLSL) + { + m_logger->log("RWMC is only supported with HLSL.", ILogger::ELL_ERROR); + std::exit(-1); + } + + // TODO: shouldn't it be computed only at initialization stage and on window resize? + // Round up division + const uint32_t2 dispatchSize = uint32_t2( + (m_window->getWidth() + ResolveWorkgroupSizeX - 1) / ResolveWorkgroupSizeX, + (m_window->getHeight() + ResolveWorkgroupSizeY - 1) / ResolveWorkgroupSizeY + ); + + IGPUComputePipeline* pipeline = m_resolvePipeline.get(); + + resolvePushConstants.resolveParameters = rwmc::computeResolveParameters(rwmcPushConstants.splattingParameters.base, spp, rwmcMinReliableLuma, rwmcKappa, CascadeCount); + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); + + cmdbuf->dispatch(dispatchSize.x, dispatchSize.y, 1u); + } + + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } // TODO: tone mapping and stuff @@ -1404,257 +1547,58 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, } private: - void beginCommandBufferAndDispatchPathracerPipeline(IGPUCommandBuffer* cmdbuf) + void updatePathtracerPushConstants() { - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); // disregard surface/swapchain transformation for now const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - viewProjectionMatrix.getInverseTransform(pc.invMVP); - pc.sampleCount = spp; - pc.depth = depth; + // TODO: rewrite the `Camera` class so it uses hlsl::float32_t4x4 instead of core::matrix4SIMD + core::matrix4SIMD invMVP; + viewProjectionMatrix.getInverseTransform(invMVP); + hlsl::float32_t4x4* pcMVPMatrix; - // safe to proceed - // upload buffer data - cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - - // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::GENERAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } - - // cube envmap handle + if (useRWMC) { - IGPUComputePipeline* pipeline; - if (usePersistentWorkGroups) - pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPersistentWGPipelines[PTPipeline].get() : m_PTGLSLPersistentWGPipelines[PTPipeline].get(); - else - pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get() : m_PTGLSLPipelines[PTPipeline].get(); - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(RenderPushConstants), &pc); - - // TODO: shouldn't it be computed only at initialization stage and on window resize? - const uint32_t dispatchSize = usePersistentWorkGroups ? - m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize) : - 1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize; - - cmdbuf->dispatch(dispatchSize, 1u, 1u); + pcMVPMatrix = &rwmcPushConstants.renderPushConstants.invMVP; + rwmcPushConstants.renderPushConstants.depth = depth; + rwmcPushConstants.renderPushConstants.sampleCount = resolvePushConstants.sampleCount = spp; } - - // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + else { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + pcMVPMatrix = &pc.invMVP; + pc.sampleCount = spp; + pc.depth = depth; } - + *pcMVPMatrix = hlsl::float32_t4x4( + invMVP.rows[0].x, invMVP.rows[0].y, invMVP.rows[0].z, invMVP.rows[0].w, + invMVP.rows[1].x, invMVP.rows[1].y, invMVP.rows[1].z, invMVP.rows[1].w, + invMVP.rows[2].x, invMVP.rows[2].y, invMVP.rows[2].z, invMVP.rows[2].w, + invMVP.rows[3].x, invMVP.rows[3].y, invMVP.rows[3].z, invMVP.rows[3].w + ); } - void beginCommandBufferAndDispatchPathracerPipelineUseRWMC(IGPUCommandBuffer* cmdbuf) + IGPUComputePipeline* pickPTPipeline() { - if (renderMode != E_RENDER_MODE::ERM_HLSL) - { - m_logger->log("Only HLSL render mode is supported.", ILogger::ELL_ERROR); - std::exit(-1); - } - - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - // disregard surface/swapchain transformation for now - const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - viewProjectionMatrix.getInverseTransform(rwmcPushConstants.invMVP); - - rwmcPushConstants.start = rwmcCascadeStart; - rwmcPushConstants.depth = depth; - rwmcPushConstants.sampleCount = resolvePushConstants.sampleCount = spp; - rwmcPushConstants.base = resolvePushConstants.base = rwmcCascadeBase; - resolvePushConstants.minReliableLuma = rwmcMinReliableLuma; - rwmcPushConstants.kappa = resolvePushConstants.kappa = rwmcKappa; - - // safe to proceed - // upload buffer data - cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - - // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::GENERAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } - - // transit m_cascadeView layout to GENERAL, block until previous shader is done with reading from cascade - { - const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::NONE - } - }, - .image = m_cascadeView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = CascadeSize - }, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::GENERAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); - } - - // TODO: shouldn't it be computed only at initialization stage and on window resize? - const uint32_t dispatchSize = usePersistentWorkGroups ? - m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize) : - 1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize; - + IGPUComputePipeline* pipeline; + if (useRWMC) { - IGPUComputePipeline* pipeline = usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelinesRWMC[PTPipeline].get() : m_PTHLSLPipelinesRWMC[PTPipeline].get(); - - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 1u, 1u, &m_descriptorSet1.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(RenderRWMCPushConstants), &rwmcPushConstants); + if (renderMode != E_RENDER_MODE::ERM_HLSL) + { + m_logger->log("RWMC is only supported with HLSL.", ILogger::ELL_ERROR); + std::exit(-1); + } - cmdbuf->dispatch(dispatchSize, 1u, 1u); + pipeline = usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelinesRWMC[PTPipeline].get() : m_PTHLSLPipelinesRWMC[PTPipeline].get(); } - - // m_cascadeView synchronization - wait for previous compute shader to write into the cascade - // TODO: create this and every other barrier once outside of the loop? - { - const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = m_cascadeView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = CascadeSize - } - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); - } - - // reweighting + else { - IGPUComputePipeline* pipeline = usePersistentWorkGroups ? m_resolvePersistentWGPipeline.get() : m_resolvePipeline.get(); - - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 1u, 1u, &m_descriptorSet1.get()); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); - - cmdbuf->dispatch(dispatchSize, 1u, 1u); + if (usePersistentWorkGroups) + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPersistentWGPipelines[PTPipeline].get() : m_PTGLSLPersistentWGPipelines[PTPipeline].get(); + else + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get() : m_PTGLSLPipelines[PTPipeline].get(); } - // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } + return pipeline; } private: @@ -1670,12 +1614,11 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelinesRWMC; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelinesRWMC; smart_refctd_ptr m_resolvePipeline; - smart_refctd_ptr m_resolvePersistentWGPipeline; smart_refctd_ptr m_presentPipeline; uint64_t m_realFrameIx = 0; std::array, MaxFramesInFlight> m_cmdBufs; ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; - smart_refctd_ptr m_descriptorSet0, m_descriptorSet1, m_descriptorSet2, m_presentDescriptorSet; + smart_refctd_ptr m_descriptorSet0, m_descriptorSet2, m_presentDescriptorSet; core::smart_refctd_ptr m_guiDescriptorSetPool; @@ -1723,8 +1666,6 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, int renderMode = E_RENDER_MODE::ERM_HLSL; int spp = 32; int depth = 3; - float rwmcCascadeStart; - float rwmcCascadeBase; float rwmcMinReliableLuma; float rwmcKappa; bool usePersistentWorkGroups = false;