diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index f5d5206dc..5b55dcf4c 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include "rand_gen.hlsl" #include "ray_gen.hlsl" @@ -40,10 +42,33 @@ struct PathTracerCreationParams BxDFCreation dielectricParams; }; -template +template) +struct DefaultAccumulator +{ + using output_storage_type = OutputTypeVec; + using this_t = DefaultAccumulator; + output_storage_type accumulation; + + static this_t create() + { + this_t retval; + retval.accumulation = promote(0.0f); + + return retval; + } + + void addSample(uint32_t sampleCount, float32_t3 sample) + { + using ScalarType = typename vector_traits::scalar_type; + ScalarType rcpSampleSize = 1.0 / (sampleCount); + accumulation += (sample - accumulation) * rcpSampleSize; + } +}; + +template struct Unidirectional { - using this_t = Unidirectional; + using this_t = Unidirectional; using randgen_type = RandGen; using raygen_type = RayGen; using intersector_type = Intersector; @@ -53,6 +78,7 @@ struct Unidirectional using scalar_type = typename MaterialSystem::scalar_type; using vector3_type = vector; using measure_type = typename MaterialSystem::measure_type; + using output_storage_type = typename Accumulator::output_storage_type; // ? using sample_type = typename NextEventEstimator::sample_type; using ray_dir_info_type = typename sample_type::ray_dir_info_type; using ray_type = typename RayGen::ray_type; @@ -266,40 +292,33 @@ struct Unidirectional } // Li - measure_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) + void sampleMeasure(uint32_t sampleIndex, uint32_t maxDepth, NBL_CONST_REF_ARG(scene_type) scene, NBL_REF_ARG(Accumulator) accumulator) { - measure_type Li = (measure_type)0.0; - scalar_type meanLumaSq = 0.0; - for (uint32_t i = 0; i < numSamples; i++) + //scalar_type meanLumaSq = 0.0; + vector3_type uvw = rand3d(0u, sampleIndex, randGen.rng()); // TODO: take from scramblebuf? + ray_type ray = rayGen.generate(uvw); + + // bounces + bool hit = true; + bool rayAlive = true; + for (int d = 1; (d <= maxDepth) && hit && rayAlive; d += 2) { - vector3_type uvw = rand3d(0u, i, randGen.rng()); // TODO: take from scramblebuf? - ray_type ray = rayGen.generate(uvw); - - // bounces - bool hit = true; - bool rayAlive = true; - for (int d = 1; (d <= depth) && hit && rayAlive; d += 2) - { - ray.intersectionT = numeric_limits::max; - ray.objectID = intersector_type::traceRay(ray, scene); + ray.intersectionT = numeric_limits::max; + ray.objectID = intersector_type::traceRay(ray, scene); - hit = ray.objectID.id != -1; - if (hit) - rayAlive = closestHitProgram(1, i, ray, scene); - } - if (!hit) - missProgram(ray); - - measure_type accumulation = ray.payload.accumulation; - scalar_type rcpSampleSize = 1.0 / (i + 1); - Li += (accumulation - Li) * rcpSampleSize; + hit = ray.objectID.id != -1; + if (hit) + rayAlive = closestHitProgram(1, sampleIndex, ray, scene); + } + if (!hit) + missProgram(ray); - // TODO: visualize high variance + const uint32_t sampleCount = sampleIndex + 1; + accumulator.addSample(sampleCount, ray.payload.accumulation); - // TODO: russian roulette early exit? - } + // TODO: visualize high variance - return Li; + // TODO: russian roulette early exit? } NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u; diff --git a/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl index 22695657c..d556a7162 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl @@ -10,10 +10,10 @@ using namespace nbl::hlsl; using namespace ext::FullScreenTriangle; // binding 0 set 0 -[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2DArray texture; [[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; [[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 { - return float32_t4(texture.Sample(samplerState, vxAttr.uv).rgb, 1.0f); + return float32_t4(texture.Sample(samplerState, float3(vxAttr.uv, 0)).rgb, 1.0f); } \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl index 81736f508..9c642a1a3 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -35,7 +35,31 @@ #define LIGHT_COUNT 1 #define BXDF_COUNT 7 -#include "render_common.hlsl" +#include +#include + +#ifdef RWMC_ENABLED +#include +#include +#endif + +#ifdef RWMC_ENABLED +[[vk::push_constant]] RenderRWMCPushConstants pc; +#else +[[vk::push_constant]] RenderPushConstants pc; +#endif + +[[vk::combinedImageSampler]] [[vk::binding(0, 2)]] Texture2D envMap; // unused +[[vk::combinedImageSampler]] [[vk::binding(0, 2)]] SamplerState envSampler; + +[[vk::binding(1, 2)]] Buffer sampleSequence; + +[[vk::combinedImageSampler]] [[vk::binding(2, 2)]] Texture2D scramblebuf; // unused +[[vk::combinedImageSampler]] [[vk::binding(2, 2)]] SamplerState scrambleSampler; + +[[vk::image_format("rgba16f")]] [[vk::binding(0)]] RWTexture2DArray outImage; +[[vk::image_format("rgba16f")]] [[vk::binding(1)]] RWTexture2DArray cascade; + #include "pathtracer.hlsl" using namespace nbl; @@ -59,15 +83,15 @@ NBL_CONSTEXPR ext::PTPolygonMethod POLYGON_METHOD = ext::PPM_SOLID_ANGLE; int32_t2 getCoordinates() { - uint32_t width, height; - outImage.GetDimensions(width, height); + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); } float32_t2 getTexCoords() { - uint32_t width, height; - outImage.GetDimensions(width, height); + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); int32_t2 iCoords = getCoordinates(); return float32_t2(float(iCoords.x) / width, 1.0 - float(iCoords.y) / height); } @@ -96,7 +120,14 @@ using raygen_type = ext::RayGen::Basic; using intersector_type = ext::Intersector::Comprehensive; using material_system_type = ext::MaterialSystem::System; using nee_type = ext::NextEventEstimator::Estimator; -using pathtracer_type = ext::PathTracer::Unidirectional; + +#ifdef RWMC_ENABLED +using accumulator_type = rwmc::CascadeAccumulator; +#else +using accumulator_type = ext::PathTracer::DefaultAccumulator; +#endif + +using pathtracer_type = ext::PathTracer::Unidirectional; static const ext::Shape spheres[SPHERE_COUNT] = { ext::Shape::create(float3(0.0, -100.5, -1.0), 100.0, 0u, light_type::INVALID_ID), @@ -129,7 +160,7 @@ static const ext::Shape rectangles[1]; #endif static const light_type lights[LIGHT_COUNT] = { - light_type::create(spectral_t(30.0,25.0,15.0), + light_type::create(LightEminence, #ifdef SPHERE_LIGHT 8u, #else @@ -154,11 +185,22 @@ static const ext::Scene scene = ext::Scene> MAX_DEPTH_LOG2) > 0 || ((pc.sampleCount - 1) >> MAX_SAMPLES_LOG2) > 0) + if (((renderPushConstants.depth - 1) >> MAX_DEPTH_LOG2) > 0 || ((renderPushConstants.sampleCount - 1) >> MAX_SAMPLES_LOG2) > 0) { float32_t4 pixelCol = float32_t4(1.0,0.0,0.0,1.0); - outImage[coords] = pixelCol; + outImage[uint3(coords.x, coords.y, 0)] = pixelCol; #ifdef PERSISTENT_WORKGROUPS continue; #else @@ -203,13 +245,13 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) float4 NDC = float4(texCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); { - float4 tmp = mul(pc.invMVP, NDC); + float4 tmp = mul(renderPushConstants.invMVP, NDC); ptCreateParams.camPos = tmp.xyz / tmp.w; NDC.z = 1.0; } ptCreateParams.NDC = NDC; - ptCreateParams.invMVP = pc.invMVP; + ptCreateParams.invMVP = renderPushConstants.invMVP; ptCreateParams.diffuseParams = bxdfs[0].params; ptCreateParams.conductorParams = bxdfs[3].params; @@ -217,9 +259,23 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams); - float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); - float32_t4 pixCol = float32_t4(color, 1.0); - outImage[coords] = pixCol; +#ifdef RWMC_ENABLED + accumulator_type accumulator = accumulator_type::create(pc.splattingParameters); +#else + accumulator_type accumulator = accumulator_type::create(); +#endif + // path tracing loop + for(int i = 0; i < renderPushConstants.sampleCount; ++i) + pathtracer.sampleMeasure(i, renderPushConstants.depth, scene, accumulator); + +#ifdef RWMC_ENABLED + for (uint32_t i = 0; i < CascadeCount; ++i) + cascade[uint3(coords.x, coords.y, i)] = float32_t4(accumulator.accumulation.data[i], 1.0f); +#else + outImage[uint3(coords.x, coords.y, 0)] = float32_t4(accumulator.accumulation, 1.0); +#endif + + #ifdef PERSISTENT_WORKGROUPS } diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl index 5e5cf89da..3096e08ed 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -1,23 +1,23 @@ #ifndef _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ #define _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" -struct SPushConstants +#ifndef __HLSL_VERSION +#include "matrix4SIMD.h" +#endif + +struct RenderPushConstants { +#ifdef __HLSL_VERSION float32_t4x4 invMVP; +#else + nbl::hlsl::float32_t4x4 invMVP; +#endif int sampleCount; int depth; }; -[[vk::push_constant]] SPushConstants pc; - -[[vk::combinedImageSampler]][[vk::binding(0, 2)]] Texture2D envMap; // unused -[[vk::combinedImageSampler]][[vk::binding(0, 2)]] SamplerState envSampler; - -[[vk::binding(1, 2)]] Buffer sampleSequence; - -[[vk::combinedImageSampler]][[vk::binding(2, 2)]] Texture2D scramblebuf; // unused -[[vk::combinedImageSampler]][[vk::binding(2, 2)]] SamplerState scrambleSampler; - -[[vk::image_format("rgba16f")]][[vk::binding(0, 0)]] RWTexture2D outImage; +NBL_CONSTEXPR nbl::hlsl::float32_t3 LightEminence = nbl::hlsl::float32_t3(30.0f, 25.0f, 15.0f); +NBL_CONSTEXPR uint32_t RenderWorkgroupSize = 512u; #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl new file mode 100644 index 000000000..9d0175c56 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl @@ -0,0 +1,17 @@ +#ifndef _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl" +#include "render_common.hlsl" + +#ifndef __HLSL_VERSION +#include "matrix4SIMD.h" +#endif + +struct RenderRWMCPushConstants +{ + RenderPushConstants renderPushConstants; + nbl::hlsl::rwmc::SplattingParameters splattingParameters; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl new file mode 100644 index 000000000..e512b9110 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl @@ -0,0 +1,33 @@ +#include +#include "resolve_common.hlsl" +#include "rwmc_global_settings_common.hlsl" +#ifdef PERSISTENT_WORKGROUPS +#include "nbl/builtin/hlsl/math/morton.hlsl" +#endif + +[[vk::push_constant]] ResolvePushConstants pc; +[[vk::image_format("rgba16f")]] [[vk::binding(0)]] RWTexture2DArray outImage; +[[vk::image_format("rgba16f")]] [[vk::binding(1)]] RWTexture2DArray cascade; + +using namespace nbl; +using namespace hlsl; + +int32_t2 getImageExtents() +{ + uint32_t width, height, imageArraySize; + outImage.GetDimensions(width, height, imageArraySize); + return int32_t2(width, height); +} + +[numthreads(ResolveWorkgroupSizeX, ResolveWorkgroupSizeY, 1)] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + const int32_t2 coords = int32_t2(threadID.x, threadID.y); + const int32_t2 imageExtents = getImageExtents(); + if (coords.x >= imageExtents.x || coords.y >= imageExtents.y) + return; + + float32_t3 color = rwmc::reweight(pc.resolveParameters, cascade, coords); + + outImage[uint3(coords.x, coords.y, 0)] = float32_t4(color, 1.0f); +} diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl new file mode 100644 index 000000000..a3ad72364 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl @@ -0,0 +1,15 @@ +#ifndef _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl" + +struct ResolvePushConstants +{ + uint32_t sampleCount; + nbl::hlsl::rwmc::ResolveParameters resolveParameters; +}; + +NBL_CONSTEXPR uint32_t ResolveWorkgroupSizeX = 32u; +NBL_CONSTEXPR uint32_t ResolveWorkgroupSizeY = 16u; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl new file mode 100644 index 000000000..8adf0a5e1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl @@ -0,0 +1,7 @@ +#ifndef _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +NBL_CONSTEXPR uint32_t CascadeCount = 6u; + +#endif diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index 0dc5fc053..1780d67c3 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -6,6 +6,11 @@ #include "nbl/asset/interchange/IImageAssetHandlerBase.h" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "nbl/builtin/hlsl/surface_transform.h" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "app_resources/hlsl/render_common.hlsl" +#include "app_resources/hlsl/render_rwmc_common.hlsl" +#include "app_resources/hlsl/resolve_common.hlsl" +#include "app_resources/hlsl/rwmc_global_settings_common.hlsl" using namespace nbl; using namespace core; @@ -15,12 +20,6 @@ using namespace asset; using namespace ui; using namespace video; -struct PTPushConstant { - matrix4SIMD invMVP; - int sampleCount; - int depth; -}; - // TODO: Add a QueryPool for timestamping once its ready // TODO: Do buffer creation using assConv class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication @@ -60,6 +59,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, static inline std::array PTGLSLShaderPaths = { "app_resources/glsl/litBySphere.comp", "app_resources/glsl/litByTriangle.comp", "app_resources/glsl/litByRectangle.comp" }; static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; static inline std::array PTHLSLShaderVariants = { "SPHERE_LIGHT", "TRIANGLE_LIGHT", "RECTANGLE_LIGHT" }; + static inline std::string ResolveShaderPath = "app_resources/hlsl/resolve.comp.hlsl"; static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { @@ -256,7 +256,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return gpuDS; }; - std::array descriptorSet0Bindings = {}; + std::array descriptorSet0Bindings = {}; std::array descriptorSet3Bindings = {}; std::array presentDescriptorSetBindings; @@ -268,6 +268,16 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .count = 1u, .immutableSamplers = nullptr }; + + descriptorSet0Bindings[1] = { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[0] = { .binding = 0u, .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, @@ -292,6 +302,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .count = 1u, .immutableSamplers = nullptr }; + presentDescriptorSetBindings[0] = { .binding = 0u, .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, @@ -371,7 +382,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return shader; }; - auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "", bool persistentWorkGroups = false) -> smart_refctd_ptr + auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "", bool persistentWorkGroups = false, bool rwmc = false) -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.workingDirectory = localInputCWD; @@ -402,11 +413,16 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, options.preprocessorOptions.logger = m_logger.get(); options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - const IShaderCompiler::SMacroDefinition defines[2] = { {defineMacro, ""}, { "PERSISTENT_WORKGROUPS", "1" } }; - if (!defineMacro.empty() && persistentWorkGroups) - options.preprocessorOptions.extraDefines = { defines, defines + 2 }; - else if (!defineMacro.empty() && !persistentWorkGroups) - options.preprocessorOptions.extraDefines = { defines, defines + 1 }; + core::vector defines; + defines.reserve(3); + if (!defineMacro.empty()) + defines.push_back({ defineMacro, "" }); + if(persistentWorkGroups) + defines.push_back({ "PERSISTENT_WORKGROUPS", "1" }); + if(rwmc) + defines.push_back({ "RWMC_ENABLED", "" }); + + options.preprocessorOptions.extraDefines = defines; source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); @@ -420,13 +436,27 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return shader; }; + auto getComputePipelineCreationParams = [](IGPUShader* shader, IGPUPipelineLayout* pipelineLayout) -> IGPUComputePipeline::SCreationParams + { + IGPUComputePipeline::SCreationParams params = {}; + params.layout = pipelineLayout; + params.shader.shader = shader; + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.shader.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + + return params; + }; + // Create compute pipelines { - for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) { + for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) + { const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, - .size = sizeof(PTPushConstant) + .size = sizeof(RenderPushConstants) }; auto ptPipelineLayout = m_device->createPipelineLayout( { &pcRange, 1 }, @@ -435,33 +465,35 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, core::smart_refctd_ptr(gpuDescriptorSetLayout2), nullptr ); - if (!ptPipelineLayout) { + if (!ptPipelineLayout) return logFail("Failed to create Pathtracing pipeline layout"); - } + + const nbl::asset::SPushConstantRange rwmcPcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RenderRWMCPushConstants) + }; + auto rwmcPtPipelineLayout = m_device->createPipelineLayout( + { &rwmcPcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0), + nullptr, + core::smart_refctd_ptr(gpuDescriptorSetLayout2), + nullptr + ); + if (!rwmcPtPipelineLayout) + return logFail("Failed to create RWMC Pathtracing pipeline layout"); { auto ptShader = loadAndCompileGLSLShader(PTGLSLShaderPaths[index]); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTGLSLPipelines.data() + index)) return logFail("Failed to create GLSL compute pipeline!\n"); } { auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index]); - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelines.data() + index)) return logFail("Failed to create HLSL compute pipeline!\n"); } @@ -469,30 +501,60 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, // persistent wg pipelines { auto ptShader = loadAndCompileGLSLShader(PTGLSLShaderPaths[index], true); - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTGLSLPersistentWGPipelines.data() + index)) return logFail("Failed to create GLSL PersistentWG compute pipeline!\n"); } { auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true); - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelines.data() + index)) return logFail("Failed to create HLSL PersistentWG compute pipeline!\n"); } + + // rwmc pipelines + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], false, true); + auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelinesRWMC.data() + index)) + return logFail("Failed to create HLSL RWMC compute pipeline!\n"); + } + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true, true); + auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelinesRWMC.data() + index)) + return logFail("Failed to create HLSL RWMC PersistentWG compute pipeline!\n"); + } + } + } + + // Create resolve pipelines + { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(ResolvePushConstants) + }; + + auto pipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0) + ); + + if (!pipelineLayout) { + return logFail("Failed to create resolve pipeline layout"); + } + + { + auto shader = loadAndCompileHLSLShader(ResolveShaderPath); + auto params = getComputePipelineCreationParams(shader.get(), pipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_resolvePipeline)) + return logFail("Failed to create HLSL resolve compute pipeline!\n"); } } @@ -676,7 +738,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, // create views for textures { - auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height, const bool useCascadeCreationParameters = false) -> smart_refctd_ptr { IGPUImage::SCreationParams imgInfo; imgInfo.format = colorFormat; imgInfo.type = IGPUImage::ET_2D; @@ -684,10 +746,19 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, imgInfo.extent.height = height; imgInfo.extent.depth = 1u; imgInfo.mipLevels = 1u; - imgInfo.arrayLayers = 1u; imgInfo.samples = IGPUImage::ESCF_1_BIT; imgInfo.flags = static_cast(0u); - imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + + if (!useCascadeCreationParameters) + { + imgInfo.arrayLayers = 1u; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + } + else + { + imgInfo.arrayLayers = CascadeCount; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT; + } auto image = m_device->createImage(std::move(imgInfo)); auto imageMemReqs = image->getMemoryReqs(); @@ -696,35 +767,46 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return image; }; - auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr + auto createHDRIImageView = [this](smart_refctd_ptr img, const uint32_t imageArraySize = 1u, const IGPUImageView::E_TYPE imageViewType = IGPUImageView::ET_2D) -> smart_refctd_ptr { auto format = img->getCreationParameters().format; IGPUImageView::SCreationParams imgViewInfo; imgViewInfo.image = std::move(img); imgViewInfo.format = format; - imgViewInfo.viewType = IGPUImageView::ET_2D; imgViewInfo.flags = static_cast(0u); imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; imgViewInfo.subresourceRange.baseArrayLayer = 0u; imgViewInfo.subresourceRange.baseMipLevel = 0u; - imgViewInfo.subresourceRange.layerCount = 1u; imgViewInfo.subresourceRange.levelCount = 1u; + imgViewInfo.viewType = imageViewType; + + imgViewInfo.subresourceRange.layerCount = imageArraySize; return m_device->createImageView(std::move(imgViewInfo)); }; auto params = envMap->getCreationParameters(); auto extent = params.extent; + envMap->setObjectDebugName("Env Map"); m_envMapView = createHDRIImageView(envMap); m_envMapView->setObjectDebugName("Env Map View"); + scrambleMap->setObjectDebugName("Scramble Map"); m_scrambleView = createHDRIImageView(scrambleMap); m_scrambleView->setObjectDebugName("Scramble Map View"); + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); outImg->setObjectDebugName("Output Image"); - m_outImgView = createHDRIImageView(outImg); + m_outImgView = createHDRIImageView(outImg, 1, IGPUImageView::ET_2D_ARRAY); m_outImgView->setObjectDebugName("Output Image View"); + + auto cascade = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y, true); + cascade->setObjectDebugName("Cascade"); + m_cascadeView = createHDRIImageView(cascade, CascadeCount, IGPUImageView::ET_2D_ARRAY); + m_cascadeView->setObjectDebugName("Cascade View"); + + // TODO: change cascade layout to general } // create sequence buffer view @@ -855,22 +937,24 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, }; auto sampler1 = m_device->createSampler(samplerParams1); - std::array writeDSInfos = {}; + std::array writeDSInfos = {}; writeDSInfos[0].desc = m_outImgView; writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; - writeDSInfos[1].desc = m_envMapView; + writeDSInfos[1].desc = m_cascadeView; + writeDSInfos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[2].desc = m_envMapView; // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - writeDSInfos[1].info.combinedImageSampler.sampler = sampler0; - writeDSInfos[1].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[2].desc = m_sequenceBufferView; - writeDSInfos[3].desc = m_scrambleView; + writeDSInfos[2].info.combinedImageSampler.sampler = sampler0; + writeDSInfos[2].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[3].desc = m_sequenceBufferView; + writeDSInfos[4].desc = m_scrambleView; // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; - writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; - writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[4].desc = m_outImgView; - writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[4].info.combinedImageSampler.sampler = sampler1; + writeDSInfos[4].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[5].desc = m_outImgView; + writeDSInfos[5].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - std::array writeDescriptorSets = {}; + std::array writeDescriptorSets = {}; writeDescriptorSets[0] = { .dstSet = m_descriptorSet0.get(), .binding = 0, @@ -879,32 +963,39 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .info = &writeDSInfos[0] }; writeDescriptorSets[1] = { - .dstSet = m_descriptorSet2.get(), - .binding = 0, + .dstSet = m_descriptorSet0.get(), + .binding = 1, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[1] }; writeDescriptorSets[2] = { .dstSet = m_descriptorSet2.get(), - .binding = 1, + .binding = 0, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[2] }; writeDescriptorSets[3] = { .dstSet = m_descriptorSet2.get(), - .binding = 2, + .binding = 1, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[3] }; writeDescriptorSets[4] = { + .dstSet = m_descriptorSet2.get(), + .binding = 2, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[4] + }; + writeDescriptorSets[5] = { .dstSet = m_presentDescriptorSet.get(), .binding = 0, .arrayElement = 0u, .count = 1u, - .info = &writeDSInfos[4] + .info = &writeDSInfos[5] }; m_device->updateDescriptorSets(writeDescriptorSets, {}); @@ -1000,6 +1091,13 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + ImGui::Text("\nRWMC settings:"); + ImGui::Checkbox("Enable RWMC", &useRWMC); + ImGui::SliderFloat("start", &rwmcPushConstants.splattingParameters.start, 1.0f, 32.0f); + ImGui::SliderFloat("base", &rwmcPushConstants.splattingParameters.base, 1.0f, 32.0f); + ImGui::SliderFloat("minReliableLuma", &rwmcMinReliableLuma, 0.1f, 32.0f); + ImGui::SliderFloat("kappa", &rwmcKappa, 0.1f, 1024.0f); + ImGui::End(); } ); @@ -1022,6 +1120,13 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, m_oracle.reportBeginFrameRecord(); m_camera.mapKeysToWASD(); + // set initial rwmc settings + + rwmcPushConstants.splattingParameters.start = hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], LightEminence); + rwmcPushConstants.splattingParameters.base = 8.0f; + rwmcMinReliableLuma = 1.0f; + rwmcKappa = 5.0f; + return true; } @@ -1067,7 +1172,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, } const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - m_api->startCapture(); + //m_api->startCapture(); // CPU events update(); @@ -1078,98 +1183,178 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, if (!keepRunning()) return; - // render whole scene to offline frame buffer & submit + if (renderMode != E_RENDER_MODE::ERM_HLSL) { - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - // disregard surface/swapchain transformation for now - const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - PTPushConstant pc; - viewProjectionMatrix.getInverseTransform(pc.invMVP); - pc.sampleCount = spp; - pc.depth = depth; + m_logger->log("Only HLSL render mode is supported.", ILogger::ELL_ERROR); + std::exit(-1); + } - // safe to proceed - // upload buffer data - cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + // safe to proceed + // upload buffer data + cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + updatePathtracerPushConstants(); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // transit m_cascadeView layout to GENERAL, block until previous shader is done with reading from the cascade + if(useRWMC) + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { { .barrier = { .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::NONE, .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + .dstAccessMask = ACCESS_FLAGS::NONE } }, - .image = m_outImgView->getCreationParameters().image.get(), + .image = m_cascadeView->getCreationParameters().image.get(), .subresourceRange = { .aspectMask = IImage::EAF_COLOR_BIT, .baseMipLevel = 0u, .levelCount = 1u, .baseArrayLayer = 0u, - .layerCount = 1u + .layerCount = CascadeCount }, .oldLayout = IImage::LAYOUT::UNDEFINED, .newLayout = IImage::LAYOUT::GENERAL } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } - // cube envmap handle - { - IGPUComputePipeline* pipeline; - if (usePersistentWorkGroups) - pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPersistentWGPipelines[PTPipeline].get() : m_PTGLSLPersistentWGPipelines[PTPipeline].get(); - else - pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get() : m_PTGLSLPipelines[PTPipeline].get(); - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PTPushConstant), &pc); - if (usePersistentWorkGroups) - { - uint32_t dispatchSize = m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize); - cmdbuf->dispatch(dispatchSize, 1u, 1u); - } - else - cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); - } + { + // TODO: shouldn't it be computed only at initialization stage and on window resize? + const uint32_t dispatchSize = usePersistentWorkGroups ? + m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize) : + 1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize; - // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + IGPUComputePipeline* pipeline = pickPTPipeline(); + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); + + const uint32_t pushConstantsSize = useRWMC ? sizeof(RenderRWMCPushConstants) : sizeof(RenderPushConstants); + const void* pushConstantsPtr = useRWMC ? reinterpret_cast(&rwmcPushConstants) : reinterpret_cast(&pc); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, pushConstantsSize, pushConstantsPtr); + + cmdbuf->dispatch(dispatchSize, 1u, 1u); + } + + // m_cascadeView synchronization - wait for previous compute shader to write into the cascade + // TODO: create this and every other barrier once outside of the loop? + if(useRWMC) + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { { .barrier = { .dep = { .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS } }, - .image = m_outImgView->getCreationParameters().image.get(), + .image = m_cascadeView->getCreationParameters().image.get(), .subresourceRange = { .aspectMask = IImage::EAF_COLOR_BIT, .baseMipLevel = 0u, .levelCount = 1u, .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + .layerCount = CascadeCount + } } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + + // resolve + if(useRWMC) + { + if (renderMode != E_RENDER_MODE::ERM_HLSL) + { + m_logger->log("RWMC is only supported with HLSL.", ILogger::ELL_ERROR); + std::exit(-1); } - // TODO: tone mapping and stuff + // TODO: shouldn't it be computed only at initialization stage and on window resize? + // Round up division + const uint32_t2 dispatchSize = uint32_t2( + (m_window->getWidth() + ResolveWorkgroupSizeX - 1) / ResolveWorkgroupSizeX, + (m_window->getHeight() + ResolveWorkgroupSizeY - 1) / ResolveWorkgroupSizeY + ); + + IGPUComputePipeline* pipeline = m_resolvePipeline.get(); + + resolvePushConstants.resolveParameters = rwmc::computeResolveParameters(rwmcPushConstants.splattingParameters.base, spp, rwmcMinReliableLuma, rwmcKappa, CascadeCount); + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); + + cmdbuf->dispatch(dispatchSize.x, dispatchSize.y, 1u); + } + + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); } + // TODO: tone mapping and stuff + asset::SViewport viewport; { viewport.minDepth = 1.f; @@ -1262,7 +1447,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, m_window->setCaption("[Nabla Engine] HLSL Compute Path Tracer"); m_surface->present(m_currentImageAcquire.imageIndex, rendered); } - m_api->endCapture(); + //m_api->endCapture(); } inline bool keepRunning() override @@ -1360,6 +1545,61 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, m_ui.manager->update(params); } + + private: + void updatePathtracerPushConstants() + { + // disregard surface/swapchain transformation for now + const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + // TODO: rewrite the `Camera` class so it uses hlsl::float32_t4x4 instead of core::matrix4SIMD + core::matrix4SIMD invMVP; + viewProjectionMatrix.getInverseTransform(invMVP); + hlsl::float32_t4x4* pcMVPMatrix; + + if (useRWMC) + { + pcMVPMatrix = &rwmcPushConstants.renderPushConstants.invMVP; + rwmcPushConstants.renderPushConstants.depth = depth; + rwmcPushConstants.renderPushConstants.sampleCount = resolvePushConstants.sampleCount = spp; + } + else + { + pcMVPMatrix = &pc.invMVP; + pc.sampleCount = spp; + pc.depth = depth; + } + + *pcMVPMatrix = hlsl::float32_t4x4( + invMVP.rows[0].x, invMVP.rows[0].y, invMVP.rows[0].z, invMVP.rows[0].w, + invMVP.rows[1].x, invMVP.rows[1].y, invMVP.rows[1].z, invMVP.rows[1].w, + invMVP.rows[2].x, invMVP.rows[2].y, invMVP.rows[2].z, invMVP.rows[2].w, + invMVP.rows[3].x, invMVP.rows[3].y, invMVP.rows[3].z, invMVP.rows[3].w + ); + } + + IGPUComputePipeline* pickPTPipeline() + { + IGPUComputePipeline* pipeline; + if (useRWMC) + { + if (renderMode != E_RENDER_MODE::ERM_HLSL) + { + m_logger->log("RWMC is only supported with HLSL.", ILogger::ELL_ERROR); + std::exit(-1); + } + + pipeline = usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelinesRWMC[PTPipeline].get() : m_PTHLSLPipelinesRWMC[PTPipeline].get(); + } + else + { + if (usePersistentWorkGroups) + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPersistentWGPipelines[PTPipeline].get() : m_PTGLSLPersistentWGPipelines[PTPipeline].get(); + else + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get() : m_PTGLSLPipelines[PTPipeline].get(); + } + + return pipeline; + } private: smart_refctd_ptr m_window; @@ -1371,6 +1611,9 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelines; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPersistentWGPipelines; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelinesRWMC; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelinesRWMC; + smart_refctd_ptr m_resolvePipeline; smart_refctd_ptr m_presentPipeline; uint64_t m_realFrameIx = 0; std::array, MaxFramesInFlight> m_cmdBufs; @@ -1388,6 +1631,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, smart_refctd_ptr m_envMapView, m_scrambleView; smart_refctd_ptr m_sequenceBufferView; smart_refctd_ptr m_outImgView; + smart_refctd_ptr m_cascadeView; // sync smart_refctd_ptr m_semaphore; @@ -1422,7 +1666,13 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, int renderMode = E_RENDER_MODE::ERM_HLSL; int spp = 32; int depth = 3; + float rwmcMinReliableLuma; + float rwmcKappa; bool usePersistentWorkGroups = false; + bool useRWMC = false; + RenderRWMCPushConstants rwmcPushConstants; + RenderPushConstants pc; + ResolvePushConstants resolvePushConstants; bool m_firstFrame = true; IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} };