From 19ba15d93ba69745c83d5427e0323f0e3bd54f9f Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 25 Sep 2025 17:23:03 +0200 Subject: [PATCH 1/4] Updated examples --- examples_tests | 2 +- src/nbl/video/CSurfaceVulkan.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 3400a2a498..3d206fd4c7 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 3400a2a498b6b3738d63aff66dd0363a4a9f8c67 +Subproject commit 3d206fd4c772628fe92e95d6e4526170df09900c diff --git a/src/nbl/video/CSurfaceVulkan.cpp b/src/nbl/video/CSurfaceVulkan.cpp index 7d135b04cc..7feda9fcd3 100644 --- a/src/nbl/video/CSurfaceVulkan.cpp +++ b/src/nbl/video/CSurfaceVulkan.cpp @@ -60,7 +60,7 @@ bool ISurfaceVulkan::isSupportedForPhysicalDevice(const IPhysicalDevice* physica core::bitflag ISurfaceVulkan::getAvailablePresentModesForPhysicalDevice(const IPhysicalDevice* physicalDevice) const { - constexpr uint32_t MAX_PRESENT_MODE_COUNT = 4u; + constexpr uint32_t MAX_PRESENT_MODE_COUNT = 5u; core::bitflag result = ISurface::EPM_UNKNOWN; From 7ec3f394c645d4ce952a728d75bf001283e26a3c Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 18 Oct 2025 19:12:31 +0200 Subject: [PATCH 2/4] Created RWMC files --- examples_tests | 2 +- .../builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 100 +++++++++++ include/nbl/builtin/hlsl/rwmc/rwmc.hlsl | 160 ++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 3 + 4 files changed, 264 insertions(+), 1 deletion(-) create mode 100644 include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl create mode 100644 include/nbl/builtin/hlsl/rwmc/rwmc.hlsl diff --git a/examples_tests b/examples_tests index 3d206fd4c7..8ecc60ff8a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 3d206fd4c772628fe92e95d6e4526170df09900c +Subproject commit 8ecc60ff8af7b68564d769e04ec06d2e87cbf8e2 diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl new file mode 100644 index 0000000000..6678a66942 --- /dev/null +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -0,0 +1,100 @@ +#ifndef _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_ +#define _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ + +struct CascadeSettings +{ + uint32_t size; + uint32_t start; + uint32_t base; +}; + +template +struct CascadeEntry +{ + CascadeLayerType data[CascadeSize]; +}; + +template +struct CascadeAccumulator +{ + using output_storage_type = CascadeEntry; + using initialization_data = CascadeSettings; + output_storage_type accumulation; + uint32_t cascadeSampleCounter[CascadeSize]; + CascadeSettings cascadeSettings; + + void initialize(in CascadeSettings settings) + { + for (int i = 0; i < CascadeSize; ++i) + { + accumulation.data[i] = (CascadeLayerType)0.0f; + cascadeSampleCounter[i] = 0u; + } + + cascadeSettings.size = settings.size; + cascadeSettings.start = settings.start; + cascadeSettings.base = settings.base; + } + + typename vector_traits::scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) + { + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); + } + + // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp + void addSample(uint32_t sampleIndex, float32_t3 sample) + { + float lowerScale = cascadeSettings.start; + float upperScale = lowerScale * cascadeSettings.base; + + const float luma = getLuma(sample); + + uint32_t lowerCascadeIndex = 0u; + while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2) + { + lowerScale = upperScale; + upperScale *= cascadeSettings.base; + ++lowerCascadeIndex; + } + + float lowerCascadeLevelWeight; + float higherCascadeLevelWeight; + + if (luma <= lowerScale) + lowerCascadeLevelWeight = 1.0f; + else if (luma < upperScale) + lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale)); + else // Inf, NaN ... + lowerCascadeLevelWeight = 0.0f; + + if (luma < upperScale) + higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight); + else + higherCascadeLevelWeight = upperScale / luma; + + uint32_t higherCascadeIndex = lowerCascadeIndex + 1u; + + const uint32_t sampleCount = sampleIndex + 1u; + const float reciprocalSampleCount = 1.0f / float(sampleCount); + accumulation.data[lowerCascadeIndex] += (sample * lowerCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[lowerCascadeIndex])) * accumulation.data[lowerCascadeIndex]) * reciprocalSampleCount; + accumulation.data[higherCascadeIndex] += (sample * higherCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[higherCascadeIndex])) * accumulation.data[higherCascadeIndex]) * reciprocalSampleCount; + cascadeSampleCounter[lowerCascadeIndex] = sampleCount; + cascadeSampleCounter[higherCascadeIndex] = sampleCount; + } +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl b/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl new file mode 100644 index 0000000000..d7b151af86 --- /dev/null +++ b/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl @@ -0,0 +1,160 @@ +#ifndef _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ +namespace impl +{ + +struct CascadeSample +{ + float32_t3 centerValue; + float normalizedCenterLuma; + float normalizedNeighbourhoodAverageLuma; +}; + +// TODO: figure out what values should pixels outside have, 0.0f is incorrect +float32_t3 sampleCascadeTexel(int32_t2 currentCoord, int32_t2 offset, in RWTexture2DArray cascade, uint32_t cascadeIndex) +{ + const int32_t2 texelCoord = currentCoord + offset; + if (any(texelCoord < int32_t2(0, 0))) + return float32_t3(0.0f, 0.0f, 0.0f); + + float32_t4 output = cascade.Load(int32_t3(texelCoord, int32_t(cascadeIndex))); + return float32_t3(output.r, output.g, output.b); +} + +float32_t calcLuma(in float32_t3 col) +{ + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); +} + +CascadeSample SampleCascade(in int32_t2 coord, in RWTexture2DArray cascade, in uint cascadeIndex, in float reciprocalBaseI) +{ + float32_t3 neighbourhood[9]; + neighbourhood[0] = sampleCascadeTexel(coord, int32_t2(-1, -1), cascade, cascadeIndex); + neighbourhood[1] = sampleCascadeTexel(coord, int32_t2(0, -1), cascade, cascadeIndex); + neighbourhood[2] = sampleCascadeTexel(coord, int32_t2(1, -1), cascade, cascadeIndex); + neighbourhood[3] = sampleCascadeTexel(coord, int32_t2(-1, 0), cascade, cascadeIndex); + neighbourhood[4] = sampleCascadeTexel(coord, int32_t2(0, 0), cascade, cascadeIndex); + neighbourhood[5] = sampleCascadeTexel(coord, int32_t2(1, 0), cascade, cascadeIndex); + neighbourhood[6] = sampleCascadeTexel(coord, int32_t2(-1, 1), cascade, cascadeIndex); + neighbourhood[7] = sampleCascadeTexel(coord, int32_t2(0, 1), cascade, cascadeIndex); + neighbourhood[8] = sampleCascadeTexel(coord, int32_t2(1, 1), cascade, cascadeIndex); + + // numerical robustness + float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + + ((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8])); + + CascadeSample retval; + retval.centerValue = neighbourhood[4]; + retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = calcLuma(neighbourhood[4]) * reciprocalBaseI; + retval.normalizedNeighbourhoodAverageLuma = (calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f; + return retval; +} + +} // namespace impl + +struct ReweightingParameters +{ + uint32_t lastCascadeIndex; + float initialEmin; // a minimum image brightness that we always consider reliable + float reciprocalBase; + float reciprocalN; + float reciprocalKappa; + float colorReliabilityFactor; + float NOverKappa; +}; + +ReweightingParameters computeReweightingParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize) +{ + ReweightingParameters retval; + retval.lastCascadeIndex = cascadeSize - 1u; + retval.initialEmin = minReliableLuma; + retval.reciprocalBase = 1.f / base; + const float N = float(sampleCount); + retval.reciprocalN = 1.f / N; + retval.reciprocalKappa = 1.f / kappa; + // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have + // allow up to ~ more energy in one sample to lessen bias in some cases + retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; + retval.NOverKappa = N * retval.reciprocalKappa; + + return retval; +} + +float32_t3 reweight(in ReweightingParameters params, in RWTexture2DArray cascade, in int32_t2 coord) +{ + float reciprocalBaseI = 1.f; + impl::CascadeSample curr = impl::SampleCascade(coord, cascade, 0u, reciprocalBaseI); + + float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); + float Emin = params.initialEmin; + + float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; + for (uint i = 0u; i <= params.lastCascadeIndex; i++) + { + const bool notFirstCascade = i != 0u; + const bool notLastCascade = i != params.lastCascadeIndex; + + impl::CascadeSample next; + if (notLastCascade) + { + reciprocalBaseI *= params.reciprocalBase; + next = impl::SampleCascade(coord, cascade, i + 1u, reciprocalBaseI); + } + + float reliability = 1.f; + // sample counting-based reliability estimation + if (params.reciprocalKappa <= 1.f) + { + float localReliability = curr.normalizedCenterLuma; + // reliability in 3x3 pixel block (see robustness) + float globalReliability = curr.normalizedNeighbourhoodAverageLuma; + if (notFirstCascade) + { + localReliability += prevNormalizedCenterLuma; + globalReliability += prevNormalizedNeighbourhoodAverageLuma; + } + if (notLastCascade) + { + localReliability += next.normalizedCenterLuma; + globalReliability += next.normalizedNeighbourhoodAverageLuma; + } + // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) + reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; + { + const float accumLuma = impl::calcLuma(accumulation); + if (accumLuma > Emin) + Emin = accumLuma; + + const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; + + reliability += colorReliability; + reliability *= params.NOverKappa; + reliability -= params.reciprocalKappa; + reliability = clamp(reliability * 0.5f, 0.f, 1.f); + } + } + accumulation += curr.centerValue * reliability; + + prevNormalizedCenterLuma = curr.normalizedCenterLuma; + prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma; + curr = next; + } + + return accumulation; +} + +} +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 2f4d11baf3..e0bd6921cc 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -389,5 +389,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl") #blur LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") +#rwmc +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/rwmc.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/CascadeAccumulator.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From 4dcf56a8cd3bc2152d0f4b5b11c04bb03c7fb891 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 20 Oct 2025 14:20:51 +0200 Subject: [PATCH 3/4] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 8ecc60ff8a..bbc8ab80fe 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 8ecc60ff8af7b68564d769e04ec06d2e87cbf8e2 +Subproject commit bbc8ab80fecf44abb9b03f4fa147918fee7c310f From 2a7db14c902097142b01af7587462b0975fe6094 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 13 Nov 2025 21:58:26 +0100 Subject: [PATCH 4/4] Refactored resolve.hlsl --- examples_tests | 2 +- .../concepts/accessors/loadable_image.hlsl | 15 +- .../builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 126 ++++++------ .../builtin/hlsl/rwmc/ResolveParameters.hlsl | 45 +++++ .../hlsl/rwmc/SplattingParameters.hlsl | 23 +++ include/nbl/builtin/hlsl/rwmc/resolve.hlsl | 189 ++++++++++++++++++ include/nbl/builtin/hlsl/rwmc/rwmc.hlsl | 160 --------------- src/nbl/builtin/CMakeLists.txt | 4 +- 8 files changed, 334 insertions(+), 230 deletions(-) create mode 100644 include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl create mode 100644 include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl create mode 100644 include/nbl/builtin/hlsl/rwmc/resolve.hlsl delete mode 100644 include/nbl/builtin/hlsl/rwmc/rwmc.hlsl diff --git a/examples_tests b/examples_tests index bbc8ab80fe..badb4a615f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit bbc8ab80fecf44abb9b03f4fa147918fee7c310f +Subproject commit badb4a615f3d379cb494ad2b4bb2d12bad6ff9a9 diff --git a/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl index c272eeb1ab..8c7251214d 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl @@ -16,8 +16,15 @@ namespace concepts { namespace accessors { + +// concept `LoadableImage` translates to smth like this: +//template +//concept LoadableImage = requires(U a, vector uv, uint16_t layer) { +// ::nbl::hlsl::is_same_v().template get(uv,layer)), vector>; +//}; + // declare concept -#define NBL_CONCEPT_NAME StorableImage +#define NBL_CONCEPT_NAME LoadableImage #define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) #define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims) // not the greatest syntax but works @@ -26,12 +33,12 @@ namespace accessors #define NBL_CONCEPT_PARAM_2 (layer,uint16_t) // start concept NBL_CONCEPT_BEGIN(3) -// need to be defined AFTER the cocnept begins +// need to be defined AFTER the concept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define layer NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,layer)) , ::nbl::hlsl::is_same_v, vector)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,layer)), ::nbl::hlsl::is_same_v, vector)) ); #undef layer #undef uv @@ -39,7 +46,7 @@ NBL_CONCEPT_END( #include // declare concept -#define NBL_CONCEPT_NAME MipmappedStorableImage +#define NBL_CONCEPT_NAME MipmappedLoadableImage #define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) #define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims) // not the greatest syntax but works diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 6678a66942..77cfb3c283 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -1,8 +1,10 @@ #ifndef _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_ #define _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_ -#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include +#include #include #include +#include namespace nbl { @@ -11,86 +13,82 @@ namespace hlsl namespace rwmc { -struct CascadeSettings +template) +struct CascadeAccumulator { - uint32_t size; - uint32_t start; - uint32_t base; -}; + struct CascadeEntry + { + uint32_t cascadeSampleCounter[CascadeCount]; + CascadeLayerType data[CascadeCount]; -template -struct CascadeEntry -{ - CascadeLayerType data[CascadeSize]; -}; + void addSampleIntoCascadeEntry(CascadeLayerType _sample, uint32_t lowerCascadeIndex, float lowerCascadeLevelWeight, float higherCascadeLevelWeight, uint32_t sampleCount) + { + const float reciprocalSampleCount = 1.0f / float(sampleCount); + + uint32_t lowerCascadeSampleCount = cascadeSampleCounter[lowerCascadeIndex]; + data[lowerCascadeIndex] += (_sample * lowerCascadeLevelWeight - (sampleCount - lowerCascadeSampleCount) * data[lowerCascadeIndex]) * reciprocalSampleCount; + cascadeSampleCounter[lowerCascadeIndex] = sampleCount; + + uint32_t higherCascadeIndex = lowerCascadeIndex + 1u; + if (higherCascadeIndex < CascadeCount) + { + uint32_t higherCascadeSampleCount = cascadeSampleCounter[higherCascadeIndex]; + data[higherCascadeIndex] += (_sample * higherCascadeLevelWeight - (sampleCount - higherCascadeSampleCount) * data[higherCascadeIndex]) * reciprocalSampleCount; + cascadeSampleCounter[higherCascadeIndex] = sampleCount; + } + } + }; -template -struct CascadeAccumulator -{ - using output_storage_type = CascadeEntry; - using initialization_data = CascadeSettings; + using cascade_layer_scalar_type = typename vector_traits::scalar_type; + using this_t = CascadeAccumulator; + using output_storage_type = CascadeEntry; + using initialization_data = SplattingParameters; output_storage_type accumulation; - uint32_t cascadeSampleCounter[CascadeSize]; - CascadeSettings cascadeSettings; + + SplattingParameters splattingParameters; - void initialize(in CascadeSettings settings) + static this_t create(NBL_CONST_REF_ARG(SplattingParameters) settings) { - for (int i = 0; i < CascadeSize; ++i) + this_t retval; + for (int i = 0; i < CascadeCount; ++i) { - accumulation.data[i] = (CascadeLayerType)0.0f; - cascadeSampleCounter[i] = 0u; + retval.accumulation.data[i] = promote(0.0f); + retval.accumulation.cascadeSampleCounter[i] = 0u; } + retval.splattingParameters = settings; - cascadeSettings.size = settings.size; - cascadeSettings.start = settings.start; - cascadeSettings.base = settings.base; + return retval; } - - typename vector_traits::scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) + + cascade_layer_scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) { return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); } // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp - void addSample(uint32_t sampleIndex, float32_t3 sample) + void addSample(uint32_t sampleCount, CascadeLayerType _sample) { - float lowerScale = cascadeSettings.start; - float upperScale = lowerScale * cascadeSettings.base; - - const float luma = getLuma(sample); - - uint32_t lowerCascadeIndex = 0u; - while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2) - { - lowerScale = upperScale; - upperScale *= cascadeSettings.base; - ++lowerCascadeIndex; - } - - float lowerCascadeLevelWeight; - float higherCascadeLevelWeight; - - if (luma <= lowerScale) - lowerCascadeLevelWeight = 1.0f; - else if (luma < upperScale) - lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale)); - else // Inf, NaN ... - lowerCascadeLevelWeight = 0.0f; - - if (luma < upperScale) - higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight); - else - higherCascadeLevelWeight = upperScale / luma; - - uint32_t higherCascadeIndex = lowerCascadeIndex + 1u; - - const uint32_t sampleCount = sampleIndex + 1u; - const float reciprocalSampleCount = 1.0f / float(sampleCount); - accumulation.data[lowerCascadeIndex] += (sample * lowerCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[lowerCascadeIndex])) * accumulation.data[lowerCascadeIndex]) * reciprocalSampleCount; - accumulation.data[higherCascadeIndex] += (sample * higherCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[higherCascadeIndex])) * accumulation.data[higherCascadeIndex]) * reciprocalSampleCount; - cascadeSampleCounter[lowerCascadeIndex] = sampleCount; - cascadeSampleCounter[higherCascadeIndex] = sampleCount; + const cascade_layer_scalar_type log2Start = splattingParameters.log2Start; + const cascade_layer_scalar_type log2Base = splattingParameters.log2Base; + const cascade_layer_scalar_type luma = getLuma(_sample); + const cascade_layer_scalar_type log2Luma = log2(luma); + const cascade_layer_scalar_type cascade = log2Luma * 1.f / log2Base - log2Start / log2Base; + const cascade_layer_scalar_type clampedCascade = clamp(cascade, 0, CascadeCount - 1); + // c<=0 -> 0, c>=Count-1 -> Count-1 + uint32_t lowerCascadeIndex = floor(cascade); + // 0 whenever clamped or `cascade` is integer (when `clampedCascade` is integer) + cascade_layer_scalar_type higherCascadeWeight = clampedCascade - floor(clampedCascade); + // never 0 thanks to magic of `1-fract(x)` + cascade_layer_scalar_type lowerCascadeWeight = cascade_layer_scalar_type(1) - higherCascadeWeight; + + // handle super bright sample case + if (cascade > CascadeCount - 1) + lowerCascadeWeight = exp2(log2Start + log2Base * (CascadeCount - 1) - log2Luma); + + accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } + + }; } diff --git a/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl new file mode 100644 index 0000000000..7509eac493 --- /dev/null +++ b/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl @@ -0,0 +1,45 @@ +#ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ + +struct ResolveParameters +{ + uint32_t lastCascadeIndex; + float initialEmin; // a minimum image brightness that we always consider reliable + float reciprocalBase; + float reciprocalN; + float reciprocalKappa; + float colorReliabilityFactor; + float NOverKappa; +}; + +ResolveParameters computeResolveParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize) +{ + ResolveParameters retval; + retval.lastCascadeIndex = cascadeSize - 1u; + retval.initialEmin = minReliableLuma; + retval.reciprocalBase = 1.f / base; + const float N = float(sampleCount); + retval.reciprocalN = 1.f / N; + retval.reciprocalKappa = 1.f / kappa; + // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have + // allow up to ~ more energy in one sample to lessen bias in some cases + retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; + retval.NOverKappa = N * retval.reciprocalKappa; + + return retval; +} + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl new file mode 100644 index 0000000000..e74dd0e5bd --- /dev/null +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -0,0 +1,23 @@ +#ifndef _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ + +struct SplattingParameters +{ + float log2Start; + float log2Base; +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl new file mode 100644 index 0000000000..cb8d3b27d1 --- /dev/null +++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl @@ -0,0 +1,189 @@ +#ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ + // declare concept +#define NBL_CONCEPT_NAME ResolveAccessorBase +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T)(VectorScalarType)(Dims) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (a,T) +#define NBL_CONCEPT_PARAM_1 (scalar,VectorScalarType) +#define NBL_CONCEPT_PARAM_2 (vec,vector) +// start concept + NBL_CONCEPT_BEGIN(2) +// need to be defined AFTER the concept begins +#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define scalar NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define vec NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR)((a.calcLuma(vec)))) +); +#undef a +#undef vec +#include + +/* ResolveAccessor is required to: +* - satisfy `LoadableImage` concept requirements +* - implement function called `calcLuma` which calculates luma from a pixel value +*/ + +template +NBL_BOOL_CONCEPT ResolveAccessor = ResolveAccessorBase && concepts::accessors::LoadableImage; + +template +struct ResolveAccessorAdaptor +{ + using output_scalar_type = OutputScalar; + using output_type = vector; + NBL_CONSTEXPR int32_t image_dimension = 2; + + RWTexture2DArray cascade; + + float32_t calcLuma(in float32_t3 col) + { + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); + } + + template + output_type get(vector uv, uint16_t layer) + { + uint32_t imgWidth, imgHeight, layers; + cascade.GetDimensions(imgWidth, imgHeight, layers); + int16_t2 cascadeImageDimension = int16_t2(imgWidth, imgHeight); + + if (any(uv < int16_t2(0, 0)) || any(uv > cascadeImageDimension)) + return vector(0, 0, 0, 0); + + return cascade.Load(int32_t3(uv, int32_t(layer))); + } +}; + +template //NBL_PRIMARY_REQUIRES(ResolveAccessor) +struct Resolver +{ + using output_type = OutputColorType; + + struct CascadeSample + { + float32_t3 centerValue; + float normalizedCenterLuma; + float normalizedNeighbourhoodAverageLuma; + }; + + static Resolver create(NBL_REF_ARG(ResolveParameters) resolveParameters) + { + Resolver retval; + retval.params = resolveParameters; + + return retval; + } + + output_type operator()(NBL_REF_ARG(CascadeAccessor) acc, const int16_t2 coord) + { + float reciprocalBaseI = 1.f; + CascadeSample curr = __sampleCascade(acc, coord, 0u, reciprocalBaseI); + + float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); + float Emin = params.initialEmin; + + float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; + for (int16_t i = 0u; i <= params.lastCascadeIndex; i++) + { + const bool notFirstCascade = i != 0; + const bool notLastCascade = i != params.lastCascadeIndex; + + CascadeSample next; + if (notLastCascade) + { + reciprocalBaseI *= params.reciprocalBase; + next = __sampleCascade(acc, coord, int16_t(i + 1), reciprocalBaseI); + } + + float reliability = 1.f; + // sample counting-based reliability estimation + if (params.reciprocalKappa <= 1.f) + { + float localReliability = curr.normalizedCenterLuma; + // reliability in 3x3 pixel block (see robustness) + float globalReliability = curr.normalizedNeighbourhoodAverageLuma; + if (notFirstCascade) + { + localReliability += prevNormalizedCenterLuma; + globalReliability += prevNormalizedNeighbourhoodAverageLuma; + } + if (notLastCascade) + { + localReliability += next.normalizedCenterLuma; + globalReliability += next.normalizedNeighbourhoodAverageLuma; + } + // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) + reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; + { + const float accumLuma = acc.calcLuma(accumulation); + if (accumLuma > Emin) + Emin = accumLuma; + + const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; + + reliability += colorReliability; + reliability *= params.NOverKappa; + reliability -= params.reciprocalKappa; + reliability = clamp(reliability * 0.5f, 0.f, 1.f); + } + } + accumulation += curr.centerValue * reliability; + + prevNormalizedCenterLuma = curr.normalizedCenterLuma; + prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma; + curr = next; + } + + return accumulation; + } + + ResolveParameters params; + + // pseudo private stuff: + + CascadeSample __sampleCascade(NBL_REF_ARG(CascadeAccessor) acc, int16_t2 coord, uint16_t cascadeIndex, float reciprocalBaseI) + { + CascadeAccessor::output_type tmp; + output_type neighbourhood[9]; + neighbourhood[0] = acc.template get(coord + int16_t2(-1, -1), cascadeIndex); + neighbourhood[1] = acc.template get(coord + int16_t2(0, -1), cascadeIndex); + neighbourhood[2] = acc.template get(coord + int16_t2(1, -1), cascadeIndex); + neighbourhood[3] = acc.template get(coord + int16_t2(-1, 0), cascadeIndex); + neighbourhood[4] = acc.template get(coord + int16_t2(0, 0), cascadeIndex); + neighbourhood[5] = acc.template get(coord + int16_t2(1, 0), cascadeIndex); + neighbourhood[6] = acc.template get(coord + int16_t2(-1, 1), cascadeIndex); + neighbourhood[7] = acc.template get(coord + int16_t2(0, 1), cascadeIndex); + neighbourhood[8] = acc.template get(coord + int16_t2(1, 1), cascadeIndex); + + // numerical robustness + float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + + ((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8])); + + CascadeSample retval; + retval.centerValue = neighbourhood[4]; + retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = acc.calcLuma(neighbourhood[4]) * reciprocalBaseI; + retval.normalizedNeighbourhoodAverageLuma = (acc.calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f; + return retval; + } +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl b/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl deleted file mode 100644 index d7b151af86..0000000000 --- a/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl +++ /dev/null @@ -1,160 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include - -namespace nbl -{ -namespace hlsl -{ -namespace rwmc -{ -namespace impl -{ - -struct CascadeSample -{ - float32_t3 centerValue; - float normalizedCenterLuma; - float normalizedNeighbourhoodAverageLuma; -}; - -// TODO: figure out what values should pixels outside have, 0.0f is incorrect -float32_t3 sampleCascadeTexel(int32_t2 currentCoord, int32_t2 offset, in RWTexture2DArray cascade, uint32_t cascadeIndex) -{ - const int32_t2 texelCoord = currentCoord + offset; - if (any(texelCoord < int32_t2(0, 0))) - return float32_t3(0.0f, 0.0f, 0.0f); - - float32_t4 output = cascade.Load(int32_t3(texelCoord, int32_t(cascadeIndex))); - return float32_t3(output.r, output.g, output.b); -} - -float32_t calcLuma(in float32_t3 col) -{ - return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); -} - -CascadeSample SampleCascade(in int32_t2 coord, in RWTexture2DArray cascade, in uint cascadeIndex, in float reciprocalBaseI) -{ - float32_t3 neighbourhood[9]; - neighbourhood[0] = sampleCascadeTexel(coord, int32_t2(-1, -1), cascade, cascadeIndex); - neighbourhood[1] = sampleCascadeTexel(coord, int32_t2(0, -1), cascade, cascadeIndex); - neighbourhood[2] = sampleCascadeTexel(coord, int32_t2(1, -1), cascade, cascadeIndex); - neighbourhood[3] = sampleCascadeTexel(coord, int32_t2(-1, 0), cascade, cascadeIndex); - neighbourhood[4] = sampleCascadeTexel(coord, int32_t2(0, 0), cascade, cascadeIndex); - neighbourhood[5] = sampleCascadeTexel(coord, int32_t2(1, 0), cascade, cascadeIndex); - neighbourhood[6] = sampleCascadeTexel(coord, int32_t2(-1, 1), cascade, cascadeIndex); - neighbourhood[7] = sampleCascadeTexel(coord, int32_t2(0, 1), cascade, cascadeIndex); - neighbourhood[8] = sampleCascadeTexel(coord, int32_t2(1, 1), cascade, cascadeIndex); - - // numerical robustness - float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + - ((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8])); - - CascadeSample retval; - retval.centerValue = neighbourhood[4]; - retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = calcLuma(neighbourhood[4]) * reciprocalBaseI; - retval.normalizedNeighbourhoodAverageLuma = (calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f; - return retval; -} - -} // namespace impl - -struct ReweightingParameters -{ - uint32_t lastCascadeIndex; - float initialEmin; // a minimum image brightness that we always consider reliable - float reciprocalBase; - float reciprocalN; - float reciprocalKappa; - float colorReliabilityFactor; - float NOverKappa; -}; - -ReweightingParameters computeReweightingParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize) -{ - ReweightingParameters retval; - retval.lastCascadeIndex = cascadeSize - 1u; - retval.initialEmin = minReliableLuma; - retval.reciprocalBase = 1.f / base; - const float N = float(sampleCount); - retval.reciprocalN = 1.f / N; - retval.reciprocalKappa = 1.f / kappa; - // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have - // allow up to ~ more energy in one sample to lessen bias in some cases - retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; - retval.NOverKappa = N * retval.reciprocalKappa; - - return retval; -} - -float32_t3 reweight(in ReweightingParameters params, in RWTexture2DArray cascade, in int32_t2 coord) -{ - float reciprocalBaseI = 1.f; - impl::CascadeSample curr = impl::SampleCascade(coord, cascade, 0u, reciprocalBaseI); - - float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); - float Emin = params.initialEmin; - - float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; - for (uint i = 0u; i <= params.lastCascadeIndex; i++) - { - const bool notFirstCascade = i != 0u; - const bool notLastCascade = i != params.lastCascadeIndex; - - impl::CascadeSample next; - if (notLastCascade) - { - reciprocalBaseI *= params.reciprocalBase; - next = impl::SampleCascade(coord, cascade, i + 1u, reciprocalBaseI); - } - - float reliability = 1.f; - // sample counting-based reliability estimation - if (params.reciprocalKappa <= 1.f) - { - float localReliability = curr.normalizedCenterLuma; - // reliability in 3x3 pixel block (see robustness) - float globalReliability = curr.normalizedNeighbourhoodAverageLuma; - if (notFirstCascade) - { - localReliability += prevNormalizedCenterLuma; - globalReliability += prevNormalizedNeighbourhoodAverageLuma; - } - if (notLastCascade) - { - localReliability += next.normalizedCenterLuma; - globalReliability += next.normalizedNeighbourhoodAverageLuma; - } - // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) - reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; - { - const float accumLuma = impl::calcLuma(accumulation); - if (accumLuma > Emin) - Emin = accumLuma; - - const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; - - reliability += colorReliability; - reliability *= params.NOverKappa; - reliability -= params.reciprocalKappa; - reliability = clamp(reliability * 0.5f, 0.f, 1.f); - } - } - accumulation += curr.centerValue * reliability; - - prevNormalizedCenterLuma = curr.normalizedCenterLuma; - prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma; - curr = next; - } - - return accumulation; -} - -} -} -} - -#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index e0bd6921cc..c4f13ab2a1 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -390,7 +390,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") #rwmc -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/rwmc.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/Resolve.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/CascadeAccumulator.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/SplattingParameters.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/ResolveParameters.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL")