From 02d6d0f3e543aaad7d4e52ecf753a826879b8904 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 4 Feb 2025 14:16:42 +0700
Subject: [PATCH 01/39] initial example

---
 examples_tests                                |  2 +-
 .../hlsl/sampling/box_muller_transform.hlsl   | 27 +++++++++++++++++++
 src/nbl/builtin/CMakeLists.txt                |  1 +
 3 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
diff --git a/examples_tests b/examples_tests
index d7f7a87fa0..b171724bb0 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit d7f7a87fa08a56a16cd1bcc7d4d9fd48fc8c278c
+Subproject commit b171724bb0db3bf6f144d6eb077e95ddea806cbd
diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
new file mode 100644
index 0000000000..efa8d66e2b
--- /dev/null
+++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
@@ -0,0 +1,27 @@
+// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_BOX_MULLER_TRANSFORM_INCLUDED_
+#define _NBL_BUILTIN_HLSL_BOX_MULLER_TRANSFORM_INCLUDED_
+
+#include "nbl/builtin/hlsl/math/functions.hlsl"
+#include "nbl/builtin/hlsl/numbers.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+
+template<typename T>
+vector<T,2> boxMullerTransform(vector<T,2> xi, T stddev)
+{
+    T sinPhi, cosPhi;
+    nbl::hlsl::sincos<T>(2.0 * numbers::pi<float> * xi.y - numbers::pi<float>, sinPhi, cosPhi);
+    return vector<T,2>(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev;
+}
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index 35f22d6ba1..abab705d13 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -313,6 +313,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/line.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/beziers.hlsl")
 #sampling
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concentric_mapping.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/box_muller_transform.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/cos_weighted.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/uniform.hlsl")
 #

From 129b50e530f2a3d5b673df8ddaab004b9404fd80 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 6 Feb 2025 16:03:31 +0700
Subject: [PATCH 02/39] use bxdf creation params struct

---
 examples_tests                                |  2 +-
 include/nbl/builtin/hlsl/bxdf/common.hlsl     | 15 +++++++++
 include/nbl/builtin/hlsl/bxdf/reflection.hlsl | 26 ++++++++++++++++
 .../nbl/builtin/hlsl/bxdf/transmission.hlsl   | 31 +++++++++++++++++++
 4 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/examples_tests b/examples_tests
index b171724bb0..5a5fbfe55a 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit b171724bb0db3bf6f144d6eb077e95ddea806cbd
+Subproject commit 5a5fbfe55aa4cf062c562f19507ba30de085b7a6
diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl
index e518f0dcba..050366c6dc 100644
--- a/include/nbl/builtin/hlsl/bxdf/common.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl
@@ -844,6 +844,7 @@ enum BxDFClampMode : uint16_t
     BCM_ABS
 };
 
+// unified param struct for calls to BxDF::eval, BxDF::pdf, BxDF::quotient_and_pdf
 template<typename Scalar NBL_PRIMARY_REQUIRES(is_scalar_v<Scalar>)
 struct SBxDFParams
 {
@@ -976,6 +977,20 @@ struct SBxDFParams
     Scalar uNdotV;
 };
 
+// unified param struct for calls to BxDF::create
+template<typename Scalar, typename Spectrum NBL_PRIMARY_REQUIRES(is_scalar_v<Scalar>)
+struct SBxDFCreationParams
+{
+    bool is_aniso;
+    Scalar A;
+    vector<Scalar, 2> Axy;
+    Spectrum ior0;
+    Spectrum ior1;
+    Scalar eta;
+    Spectrum eta2;
+    Spectrum luminosityContributionHint;
+};
+
 // fresnel stuff
 namespace impl
 {
diff --git a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl
index bf534e2f8d..b074bcaddb 100644
--- a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl
@@ -59,6 +59,11 @@ struct SLambertianBxDF
         return retval;
     }
 
+    static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        return create();
+    }
+
     scalar_type __eval_pi_factored_out(scalar_type maxNdotL)
     {
         return maxNdotL;
@@ -117,6 +122,11 @@ struct SOrenNayarBxDF
         return retval;
     }
 
+    static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        return create(params.A);
+    }
+
     scalar_type __rec_pi_factored_out_wo_clamps(scalar_type VdotL, scalar_type maxNdotL, scalar_type maxNdotV)
     {
         scalar_type A2 = A * 0.5;
@@ -342,6 +352,14 @@ struct SBeckmannBxDF
         return retval;
     }
 
+    static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        if (params.is_aniso)
+            return create(params.Axy.x, params.Axy.y, params.ior0, params.ior1);
+        else
+            return create(params.A, params.ior0, params.ior1);
+    }
+
     scalar_type __eval_DG_wo_clamps(params_t params)
     {
         if (params.is_aniso)
@@ -570,6 +588,14 @@ struct SGGXBxDF
         return retval;
     }
 
+    static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        if (params.is_aniso)
+            return create(params.Axy.x, params.Axy.y, params.ior0, params.ior1);
+        else
+            return create(params.A, params.ior0, params.ior1);
+    }
+
     scalar_type __eval_DG_wo_clamps(params_t params)
     {
         if (params.is_aniso)
diff --git a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl
index 17682a7384..18d80e93aa 100644
--- a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl
@@ -63,6 +63,11 @@ struct SLambertianBxDF
         return retval;
     }
 
+    static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        return create();
+    }
+
     scalar_type __eval_pi_factored_out(scalar_type absNdotL)
     {
         return absNdotL;
@@ -124,6 +129,11 @@ struct SSmoothDielectricBxDF
         return retval;
     }
 
+    static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        return create(params.eta);
+    }
+
     spectral_type eval(params_t params)
     {
         return (spectral_type)0;
@@ -205,6 +215,11 @@ struct SSmoothDielectricBxDF<LightSample, IsoCache, AnisoCache, Spectrum, true>
         return retval;
     }
 
+    static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        return create(params.eta2, params.luminosityContributionHint);
+    }
+
     spectral_type eval(params_t params)
     {
         return (spectral_type)0;
@@ -299,6 +314,14 @@ struct SBeckmannDielectricBxDF
         return retval;
     }
 
+    static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        if (params.is_aniso)
+            return create(params.eta, params.Axy.x, params.Axy.y);
+        else
+            return create(params.eta, params.A);
+    }
+
     spectral_type eval(params_t params)
     {
         scalar_type orientedEta, dummy;
@@ -466,6 +489,14 @@ struct SGGXDielectricBxDF
         return retval;
     }
 
+    static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        if (params.is_aniso)
+            return create(params.eta, params.Axy.x, params.Axy.y);
+        else
+            return create(params.eta, params.A);
+    }
+
     spectral_type eval(params_t params)
     {
         scalar_type orientedEta, dummy;

From 74261dc22e7f375e12abf4de232e13ea5ebcbee5 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 7 Feb 2025 14:37:25 +0700
Subject: [PATCH 03/39] triangle and rectangle shapes

---
 examples_tests                                |  2 +-
 .../nbl/builtin/hlsl/shapes/rectangle.hlsl    | 51 ++++++++++
 include/nbl/builtin/hlsl/shapes/triangle.hlsl | 99 +++++++++++++++++++
 src/nbl/builtin/CMakeLists.txt                |  2 +
 4 files changed, 153 insertions(+), 1 deletion(-)
 create mode 100644 include/nbl/builtin/hlsl/shapes/rectangle.hlsl
 create mode 100644 include/nbl/builtin/hlsl/shapes/triangle.hlsl

diff --git a/examples_tests b/examples_tests
index 5a5fbfe55a..85e67ad0c4 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 5a5fbfe55aa4cf062c562f19507ba30de085b7a6
+Subproject commit 85e67ad0c4012d7d8d2014489327036d89b0bf57
diff --git a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
new file mode 100644
index 0000000000..854a326aaf
--- /dev/null
+++ b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
@@ -0,0 +1,51 @@
+// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_SHAPES_RECTANGLE_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SHAPES_RECTANGLE_INCLUDED_
+
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/numbers.hlsl>
+#include <nbl/builtin/hlsl/math/functions.hlsl>
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace shapes
+{
+
+template<typename T>
+struct SphericalRectangle
+{
+    using scalar_type = T;
+    using vector3_type = vector<T, 3>;
+    using vector4_type = vector<T, 4>;
+    using matrix3x3_type = matrix<T, 3, 3>;
+
+    static SphericalRectangle<T> create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(vector3_type) T, NBL_CONST_REF_ARG(vector3_type) B, NBL_CONST_REF_ARG(vector3_type) N)
+    {
+        matrix3x3_type TBN = nbl::hlsl::transpose<matrix3x3_type>(matrix3x3_type(T, B, isotropic_type::N));
+        return nbl::hlsl::mul(TBN, rectangleOrigin - observer);
+    }
+
+    scalar_type solidAngleOfRectangle(NBL_CONST_REF_ARG(vector3_type) r0, NBL_CONST_REF_ARG(vector<scalar_type, 2>) rectangleExtents)
+    {
+        const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x);
+        const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z);
+        const vector4_type cosGamma = vec4(
+            -n_z[0] * n_z[1],
+            -n_z[1] * n_z[2],
+            -n_z[2] * n_z[3],
+            -n_z[3] * n_z[0]
+        );
+        return math::getSumofArccosABCD(cosGamma[0], cosGamma[1], cosGamma[2], cosGamma[3]) - 2 * numbers::pi<float>;
+    }
+}
+
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
new file mode 100644
index 0000000000..f7ce67a1c9
--- /dev/null
+++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
@@ -0,0 +1,99 @@
+// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_
+
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/limits.hlsl>
+#include <nbl/builtin/hlsl/math/functions.hlsl>
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace shapes
+{
+
+template<typename T>
+struct SphericalTriangle
+{
+    using scalar_type = T;
+    using vector3_type = vector<T, 3>;
+
+    static SphericalTriangle<T> create(NBL_CONST_REF_ARG(vector3_type) vertex0, NBL_CONST_REF_ARG(vector3_type) vertex1, NBL_CONST_REF_ARG(vector3_type) vertex2, NBL_CONST_REF_ARG(vector3_type) origin)
+    {
+        SphericalTriangle<T> retval;
+        retval.vertex0 = nbl::hlsl::normalize(vertex0 - origin);
+        retval.vertex1 = nbl::hlsl::normalize(vertex1 - origin);
+        retval.vertex2 = nbl::hlsl::normalize(vertex2 - origin);
+        return retval;
+    }
+
+    bool pyramidAngles(NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides)
+    {
+        cos_sides = vector3_type(nbl::hlsl::dot(vertex1, vertex2), nbl::hlsl::dot(vertex2, vertex0), nbl::hlsl::dot(vertex0, vertex1));
+        csc_sides = 1.0 / nbl::hlsl::sqrt((vector3_type)(1.f) - cos_sides * cos_sides);
+        return nbl::hlsl::any(csc_sides >= (vector3_type)(numeric_limits<scalar_type>::max));
+    }
+
+    scalar_type solidAngleOfTriangle(NBL_REF_ARG(vector3_type) cos_vertices, NBL_REF_ARG(vector3_type) sin_vertices, NBL_REF_ARG(scalar_type) cos_a, NBL_REF_ARG(scalar_type) cos_c, NBL_REF_ARG(scalar_type) csc_b, NBL_REF_ARG(scalar_type) csc_c)
+    {
+        vector3_type cos_sides,csc_sides;
+        if (pyramidAngles(cos_sides, csc_sides))
+            return 0.f;
+
+        // these variables might eventually get optimized out
+        cos_a = cos_sides[0];
+        cos_c = cos_sides[2];
+        csc_b = csc_sides[1];
+        csc_c = csc_sides[2];
+        
+        // Both vertices and angles at the vertices are denoted by the same upper case letters A, B, and C. The angles A, B, C of the triangle are equal to the angles between the planes that intersect the surface of the sphere or, equivalently, the angles between the tangent vectors of the great circle arcs where they meet at the vertices. Angles are in radians. The angles of proper spherical triangles are (by convention) less than PI
+        cos_vertices = clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) 
+        sin_vertices = sqrt((vector3_type)1.f - cos_vertices * cos_vertices);
+
+        return math::getArccosSumofABC_minus_PI(cos_vertices[0], cos_vertices[1], cos_vertices[2], sin_vertices[0], sin_vertices[1], sin_vertices[2]);
+    }
+
+    scalar_type solidAngleOfTriangle()
+    {
+        vector3_type dummy0,dummy1;
+        scalar_type dummy2,dummy3,dummy4,dummy5;
+        return solidAngleOfTriangle(dummy0,dummy1,dummy2,dummy3,dummy4,dummy5);
+    }
+
+    scalar_type projectedSolidAngleOfTriangle(NBL_CONST_REF_ARG(vector3_type) receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices)
+    {
+        if (pyramidAngles(cos_sides, csc_sides))
+            return 0.f;
+
+        vector3_type awayFromEdgePlane0 = nbl::hlsl::cross(vertex1, vertex2) * csc_sides[0];
+        vector3_type awayFromEdgePlane1 = nbl::hlsl::cross(vertex2, vertex0) * csc_sides[1];
+        vector3_type awayFromEdgePlane2 = nbl::hlsl::cross(vertex0, vertex1) * csc_sides[2];
+
+        // useless here but could be useful somewhere else
+        cos_vertices[0] = nbl::hlsl::dot(awayFromEdgePlane1, awayFromEdgePlane2);
+        cos_vertices[1] = nbl::hlsl::dot(awayFromEdgePlane2, awayFromEdgePlane0);
+        cos_vertices[2] = nbl::hlsl::dot(awayFromEdgePlane0, awayFromEdgePlane1);
+        // TODO: above dot products are in the wrong order, either work out which is which, or try all 6 permutations till it works
+        cos_vertices = nbl::hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f);
+
+        matrix<scalar_type, 3, 3> mat = 
+        const vector3_type externalProducts = nbl::hlsl::abs(nbl::hlsl::transpose(awayFromEdgePlane) * receiverNormal);
+
+        const vector3_type pyramidAngles = acos(cos_sides);
+        return nbl::hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi<float>);
+    }
+
+    vector3_type vertex0;
+    vector3_type vertex1;
+    vector3_type vertex2;
+};
+
+}
+}
+}
+
+#endif
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index abab705d13..a79f9ac31a 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -311,6 +311,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/circle.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/ellipse.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/line.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/beziers.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/rectangle.hlsl")
 #sampling
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concentric_mapping.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/box_muller_transform.hlsl")

From ab3ae20171ed7ea0d78338114eb420e9f179b459 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Mon, 10 Feb 2025 16:58:22 +0700
Subject: [PATCH 04/39] more sampling methods

---
 examples_tests                                |   2 +-
 .../nbl/builtin/hlsl/sampling/bilinear.hlsl   |  61 ++++++++
 include/nbl/builtin/hlsl/sampling/linear.hlsl |  45 ++++++
 .../projected_spherical_triangle.hlsl         |  94 +++++++++++++
 .../hlsl/sampling/spherical_triangle.hlsl     | 132 ++++++++++++++++++
 src/nbl/builtin/CMakeLists.txt                |   4 +
 6 files changed, 337 insertions(+), 1 deletion(-)
 create mode 100644 include/nbl/builtin/hlsl/sampling/bilinear.hlsl
 create mode 100644 include/nbl/builtin/hlsl/sampling/linear.hlsl
 create mode 100644 include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
 create mode 100644 include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl

diff --git a/examples_tests b/examples_tests
index 85e67ad0c4..2c500b1e06 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 85e67ad0c4012d7d8d2014489327036d89b0bf57
+Subproject commit 2c500b1e06e3e83b2a427bf0aa1ef27878467e0b
diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl
new file mode 100644
index 0000000000..1d5f9a91e8
--- /dev/null
+++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl
@@ -0,0 +1,61 @@
+// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_SAMPLING_BILINEAR_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SAMPLING_BILINEAR_INCLUDED_
+
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/limits.hlsl>
+#include <nbl/builtin/hlsl/sampling/linear.hlsl>
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace sampling
+{
+
+template<typename T>
+struct Bilinear
+{
+    using scalar_type = T;
+    using vector2_type = vector<T, 2>;
+    using vector3_type = vector<T, 3>;
+    using vector4_type = vector<T, 4>;
+
+    static Bilinear<T> create(NBL_CONST_REF_ARG(vector4_type) bilinearCoeffs)
+    {
+        Bilinear<T> retval;
+        retval.bilinearCoeffs = bilinearCoeffs;
+        return retval;
+    }
+
+    vector2_type generate(NBL_REG_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u)
+    {
+        const vector2_type twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]);
+        Linear<scalar_type> lineary = Linear<scalar_type>::create(twiceAreasUnderXCurve);
+        u.y = lineary.generate(u.y);
+
+        const vector2_type ySliceEndPoints = vector2_type(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[2], u.y), nbl::hlsl::mix(bilinearCoeffs[1], bilinearCoeffs[3], u.y));
+        Linear<scalar_type> linearx = Linear<scalar_type>::create(ySliceEndPoints);
+        u.x = linearx.generate(u.x);
+
+        rcpPdf = (twiceAreasUnderXCurve[0] + twiceAreasUnderXCurve[1]) / (4.0 * nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], u.x));
+
+        return u;
+    }
+
+    scalar_type pdf(NBL_CONST_REF_ARG(vector2_type) u)
+    {
+        return 4.0 * nbl::hlsl::mix(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[1], u.x), nbl::hlsl::mix(bilinearCoeffs[2], bilinearCoeffs[3], u.x), u.y) / (bilinearCoeffs[0] + bilinearCoeffs[1] + bilinearCoeffs[2] + bilinearCoeffs[3]);
+    }
+
+    vector4_type bilinearCoeffs;
+};
+
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl
new file mode 100644
index 0000000000..8b9b3fb058
--- /dev/null
+++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl
@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_SAMPLING_BILINEAR_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SAMPLING_BILINEAR_INCLUDED_
+
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/limits.hlsl>
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace sampling
+{
+
+template<typename T>
+struct Linear
+{
+    using scalar_type = T;
+    using vector2_type = vector<T, 2>;
+
+    static Linear<T> create(NBL_CONST_REF_ARG(vector2_type) linearCoeffs)
+    {
+        Linear<T> retval;
+        retval.linearCoeffs = linearCoeffs;
+        return retval;
+    }
+
+    scalar_type generate(scalar_type u)
+    {
+        const scalar_type rcpDiff = 1.0 / (linearCoeffs[0] - linearCoeffs[1]);
+        const vector2_type squaredCoeffs = linearCoeffs * linearCoeffs;
+        return nbl::hlsl::abs(rcpDiff) < numeric_limits<scalar_type>::max ? (linearCoeffs[0] - nbl::hlsl::sqrt(nbl::hlsl::mix(squaredCoeffs[0], squaredCoeffs[1], u))) * rcpDiff : u;
+    }
+
+    vector2_type linearCoeffs;
+};
+
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
new file mode 100644
index 0000000000..5832e9aab2
--- /dev/null
+++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
@@ -0,0 +1,94 @@
+// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_SAMPLING_PROJECTED_SPHERICAL_TRIANGLE_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SAMPLING_PROJECTED_SPHERICAL_TRIANGLE_INCLUDED_
+
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/limits.hlsl>
+#include <nbl/builtin/hlsl/math/functions.hlsl>
+#include <nbl/builtin/hlsl/sampling/spherical_triangle.hlsl>
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace sampling
+{
+
+template<typename T>
+struct ProjectedSphericalTriangle
+{
+    using scalar_type = T;
+    using vector2_type = vector<T, 2>;
+    using vector3_type = vector<T, 3>;
+    using vector4_type = vector<T, 4>;
+
+    static ProjectedSphericalTriangle<T> create(NBL_CONST_REG_ARG(shapes::SphericalTriangle<T>) tri)
+    {
+        ProjectedSphericalTriangle<T> retval;
+        retval.tri = tri;
+        return retval;
+    }
+
+    vector4_type computeBilinearPatch(NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool isBSDF)
+    {
+        const scalar_type minimumProjSolidAngle = 0.0;
+    
+        matrix<T, 3, 3> m = matrix<T, 3, 3>(tri.vertex0, tri.vertex1, tri.vertex2);
+        const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), (vector3_type)minimumProjSolidAngle);
+
+        return bxdfPdfAtVertex.yyxz;
+    }
+
+    vector3_type generate(NBL_REG_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REG_ARG(vector3_type) cos_vertices, NBL_CONST_REG_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REG_ARG(vector2_type) u)
+    {
+        // pre-warp according to proj solid angle approximation
+        vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF);
+        Bilinear<scalar_type> bilinear = Bilinear<scalar_type>::create(patch);
+        u = bilinear.generate(rcpPdf, u);
+
+        // now warp the points onto a spherical triangle
+        const vector3_type L = tri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u);
+        rcpPdf *= solidAngle;
+
+        return L;
+    }
+
+    vector3_type generate(NBL_REG_ARG(scalar_type) rcpPdf, NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REG_ARG(vector2_type) u)
+    {
+        scalar_type cos_a, cos_c, csc_b, csc_c;
+        vector3_type cos_vertices, sin_vertices;
+        const scalar_type solidAngle = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c);
+        return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u);
+    }
+
+    scalar_type pdf(scalar_type solidAngle, NBL_CONST_REG_ARG(vector3_type) cos_vertices, NBL_CONST_REG_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REG_ARG(vector3_type) L)
+    {
+        scalar_type pdf;
+        const vector2_type u = tri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L);
+
+        vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF);
+        Bilinear<scalar_type> bilinear = Bilinear<scalar_type>::create(patch);
+        return pdf * bilinear.pdf(u);
+    }
+
+    scalar_type pdf(NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REG_ARG(vector3_type) L)
+    {
+        scalar_type pdf;
+        const vector2_type u = tri.generateInverse(pdf, L);
+
+        vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF);
+        Bilinear<scalar_type> bilinear = Bilinear<scalar_type>::create(patch);
+        return pdf * bilinear.pdf(u);
+    }
+
+    shapes::SphericalTriangle<T> tri;
+};
+
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
new file mode 100644
index 0000000000..9501cdc3d1
--- /dev/null
+++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
@@ -0,0 +1,132 @@
+// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_SAMPLING_SPHERICAL_TRIANGLE_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SAMPLING_SPHERICAL_TRIANGLE_INCLUDED_
+
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/limits.hlsl>
+#include <nbl/builtin/hlsl/math/functions.hlsl>
+#include <nbl/builtin/hlsl/shapes/triangle.hlsl>
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace sampling
+{
+
+template<typename T>
+struct SphericalTriangle
+{
+    using scalar_type = T;
+    using vector2_type = vector<T, 2>;
+    using vector3_type = vector<T, 3>;
+
+    static SphericalTriangle<T> create(NBL_CONST_REG_ARG(shapes::SphericalTriangle<T>) tri)
+    {
+        SphericalTriangle<T> retval;
+        retval.tri = tri;
+        return retval;
+    }
+
+    vector3_type slerp_delta(NBL_CONST_REF_ARG(vector3_type) start, NBL_CONST_REF_ARG(vector3_type) preScaledWaypoint, scalar_type cosAngleFromStart)
+    {
+        vector3_type planeNormal = nbl::hlsl::cross(start,preScaledWaypoint);
+    
+        cosAngleFromStart *= 0.5;
+        const scalar_type sinAngle = nbl::hlsl::sqrt(0.5 - cosAngleFromStart);
+        const scalar_type cosAngle = nbl::hlsl::sqrt(0.5 + cosAngleFromStart);
+        
+        planeNormal *= sinAngle;
+        const vector3_type precompPart = nbl::hlsl::cross(planeNormal, start) * 2.0;
+
+        return precompPart * cosAngle + nbl::hlsl::cross(planeNormal, precompPart);
+    }
+
+    // WARNING: can and will return NAN if one or three of the triangle edges are near zero length
+    vector3_type generate(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector2_type) u)
+    {
+        scalar_type negSinSubSolidAngle,negCosSubSolidAngle;
+        math::sincos(solidAngle * u.x - numbers::pi<scalar_type>, negSinSubSolidAngle, negCosSubSolidAngle);
+
+        const scalar_type p = negCosSubSolidAngle * sin_vertices[0] - negSinSubSolidAngle * cos_vertices[0];
+        const scalar_type q = -negSinSubSolidAngle * sin_vertices[0] - negCosSubSolidAngle * cos_vertices[0];
+        
+        // TODO: we could optimize everything up and including to the first slerp, because precision here is just godawful
+        scalar_type u_ = q - cos_vertices[0];
+        scalar_type v_ = p + sin_vertices[0] * cos_c;
+
+        // the slerps could probably be optimized by sidestepping `normalize` calls and accumulating scaling factors
+        vector3_type C_s = tri.vertex0;
+        if (csc_b < numeric_limits<scalar_type>::max)
+        {
+            const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cos_vertices[0] - v_) / ((v_ * p + u_ * q) * sin_vertices[0]);
+            if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f)
+                C_s += slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC);
+        }
+
+        vector3_type retval = tri.vertex1;
+        const scalar_type cosBC_s = nbl::hlsl::dot(C_s, tri.vertex1);
+        const scalar_type csc_b_s = 1.0 / nbl::hlsl::sqrt(1.0 - cosBC_s * cosBC_s);
+        if (csc_b_s < numeric_limits<scalar_type>::max)
+        {
+            const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f);
+            if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f)
+                retval += slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s);
+        }
+        return retval;
+    }
+
+    vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u)
+    {
+        scalar_type cos_a, cos_c, csc_b, csc_c;
+        vector3_type cos_vertices, sin_vertices;
+
+        rcpPdf = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c);
+
+        return generate(rcpPdf, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u);
+    }
+
+    vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) L)
+    {
+        pdf = 1.0 / solidAngle;
+
+        const scalar_type cosAngleAlongBC_s = nbl::hlsl::dot(L, tri.vertex1);
+        const scalar_type csc_a_ = 1.0 / nbl::hlsl::sqrt(1.0 - cosAngleAlongBC_s * cosAngleAlongBC_s);
+        const scalar_type cos_b_ = nbl::hlsl::dot(L, tri.vertex0);
+
+        const scalar_type cosB_ = (cos_b_ - cosAngleAlongBC_s * cos_c) * csc_a_ * csc_c;
+        const scalar_type sinB_ = nbl::hlsl::sqrt(1.0 - cosB_ * cosB_);
+
+        const scalar_type cosC_ = sin_vertices[0] * sinB_* cos_c - cos_vertices[0] * cosB_;
+        const scalar_type sinC_ = nbl::hlsl::sqrt(1.0 - cosC_ * cosC_);
+
+        const scalar_type subTriSolidAngleRatio = math::getArccosSumofABC_minus_PI(cos_vertices[0], cosB_, cosC_, sin_vertices[0], sinB_, sinC_) * pdf;
+        const scalar_type u = subTriSolidAngleRatio > numeric_limits<scalar_type>::min ? subTriSolidAngleRatio : 0.0;
+
+        const scalar_type cosBC_s = (cos_vertices[0] + cosB_ * cosC_) / (sinB_ * sinC_);
+        const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < asfloat(0x3f7fffff) ? cosBC_s : cos_c));
+
+        return vector2_type(u,v);
+    }
+
+    vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(vector3_type) L)
+    {
+        scalar_type cos_a, cos_c, csc_b, csc_c;
+        vector3_type cos_vertices, sin_vertices;
+
+        const scalar_type solidAngle = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c);
+
+        return generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L);
+    }
+
+    shapes::SphericalTriangle<T> tri;
+};
+
+}
+}
+}
+
+#endif
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index a79f9ac31a..5cc0108874 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -314,9 +314,13 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/beziers.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/rectangle.hlsl")
 #sampling
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/linear.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/bilinear.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concentric_mapping.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/box_muller_transform.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/cos_weighted.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_triangle.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/projected_spherical_triangle.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/uniform.hlsl")
 #
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ndarray_addressing.hlsl")

From 355cfecdcd508151435f01b49c7f9a1e02975a23 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 11 Feb 2025 14:03:51 +0700
Subject: [PATCH 05/39] spherical rectangle

---
 examples_tests                                |  2 +-
 .../hlsl/sampling/spherical_rectangle.hlsl    | 86 +++++++++++++++++++
 .../nbl/builtin/hlsl/shapes/rectangle.hlsl    | 15 +++-
 include/nbl/builtin/hlsl/shapes/triangle.hlsl |  2 +-
 src/nbl/builtin/CMakeLists.txt                |  1 +
 5 files changed, 102 insertions(+), 4 deletions(-)
 create mode 100644 include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl

diff --git a/examples_tests b/examples_tests
index 2c500b1e06..e6a99165c1 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 2c500b1e06e3e83b2a427bf0aa1ef27878467e0b
+Subproject commit e6a99165c1b153977192f9722381fc24f566c9ca
diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
new file mode 100644
index 0000000000..83224bfabd
--- /dev/null
+++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
@@ -0,0 +1,86 @@
+// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_SAMPLING_SPHERICAL_RECTANGLE_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SAMPLING_SPHERICAL_RECTANGLE_INCLUDED_
+
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/limits.hlsl>
+#include <nbl/builtin/hlsl/math/functions.hlsl>
+#include <nbl/builtin/hlsl/shapes/triangle.hlsl>
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace sampling
+{
+
+template<typename T>
+struct SphericalRectangle
+{
+    using scalar_type = T;
+    using vector2_type = vector<T, 2>;
+    using vector3_type = vector<T, 3>;
+    using vector4_type = vector<T, 4>;
+
+    static SphericalRectangle<T> create(NBL_CONST_REG_ARG(shapes::SphericalRectangle<T>) rect)
+    {
+        SphericalRectangle<T> retval;
+        retval.rect = rect;
+        return retval;
+    }
+
+    vector2_type generate(NBL_CONST_REF_ARG(vector2_type) rectangleExtents, NBL_CONST_REF_ARG(vector2_type) uv, NBL_REF_ARG(scalar_type) S)
+    {
+        const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x);
+        const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt(vector4_type(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z);
+        const vector4_type cosGamma = vector4_type(
+            -n_z[0] * n_z[1],
+            -n_z[1] * n_z[2],
+            -n_z[2] * n_z[3],
+            -n_z[3] * n_z[0]
+        );
+
+        scalar_type p = math::getSumofArccosAB(cosGamma[0], cosGamma[1]);
+        scalar_type q = math::getSumofArccosAB(cosGamma[2], cosGamma[3]);
+
+        const scalar_type k = 2 * numbers::pi<scalar_type> - q;
+        const scalar_type b0 = n_z[0];
+        const scalar_type b1 = n_z[2];
+        S = p + q - 2 * numbers::pi<scalar_type>;
+
+        const scalar_type CLAMP_EPS = 1e-5f;
+
+        // flip z axsis if rect.r0.z > 0
+        const uint32_t zFlipMask = (asuint(rect.r0.z) ^ 0x80000000u) & 0x80000000u;
+        rect.r0.z = asfloat(asuint(rect.r0.z) ^ zFlipMask);
+        vector3_type r1 = rect.r0 + vector3_type(rectangleExtents.x, rectangleExtents.y, 0);
+
+        const scalar_type au = uv.x * S + k;
+        const scalar_type fu = (nbl::hlsl::cos(au) * b0 - b1) / nbl::hlsl::sin(au);
+        const scalar_type cu_2 = nbl::hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1]
+        const scalar_type cu = asfloat(asuint(1.0 / nbl::hlsl::sqrt(cu_2)) ^ (asuint(fu) & 0x80000000u));
+
+        scalar_type xu = -(cu * rect.r0.z) * 1.0 / nbl::hlsl::sqrt(1 - cu * cu);
+        xu = nbl::hlsl::clamp(xu, rect.r0.x, r1.x); // avoid Infs
+        const scalar_type d_2 = xu * xu + rect.r0.z * rect.r0.z;
+        const scalar_type d = nbl::hlsl::sqrt(d_2);
+
+        const scalar_type h0 = rect.r0.y / nbl::hlsl::sqrt(d_2 + rect.r0.y * rect.r0.y);
+        const scalar_type h1 = r1.y / nbl::hlsl::sqrt(d_2 + r1.y * r1.y);
+        const scalar_type hv = h0 + uv.y * (h1 - h0), hv2 = hv * hv;
+        const scalar_type yv = (hv2 < 1 - CLAMP_EPS) ? (hv * d) / nbl::hlsl::sqrt(1 - hv2) : r1.y;
+
+        return vector2_type((xu - rect.r0.x) / rectangleExtents.x, (yv - rect.r0.y) / rectangleExtents.y);
+    }
+
+    shapes::SphericalRectangle<T> rect;
+};
+
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
index 854a326aaf..a61f23cafa 100644
--- a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
+++ b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
@@ -24,13 +24,22 @@ struct SphericalRectangle
     using vector4_type = vector<T, 4>;
     using matrix3x3_type = matrix<T, 3, 3>;
 
+    static SphericalRectangle<T> create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(matrix3x3_type) basis)
+    {
+        SphericalRectangle<T> retval;
+        retval.r0 = nbl::hlsl::mul(basis, rectangleOrigin - observer);
+        return retval;
+    }
+
     static SphericalRectangle<T> create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(vector3_type) T, NBL_CONST_REF_ARG(vector3_type) B, NBL_CONST_REF_ARG(vector3_type) N)
     {
+        SphericalRectangle<T> retval;
         matrix3x3_type TBN = nbl::hlsl::transpose<matrix3x3_type>(matrix3x3_type(T, B, isotropic_type::N));
-        return nbl::hlsl::mul(TBN, rectangleOrigin - observer);
+        retval.r0 = nbl::hlsl::mul(TBN, rectangleOrigin - observer);
+        return retval;
     }
 
-    scalar_type solidAngleOfRectangle(NBL_CONST_REF_ARG(vector3_type) r0, NBL_CONST_REF_ARG(vector<scalar_type, 2>) rectangleExtents)
+    scalar_type solidAngleOfRectangle(NBL_CONST_REF_ARG(vector<scalar_type, 2>) rectangleExtents)
     {
         const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x);
         const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z);
@@ -42,6 +51,8 @@ struct SphericalRectangle
         );
         return math::getSumofArccosABCD(cosGamma[0], cosGamma[1], cosGamma[2], cosGamma[3]) - 2 * numbers::pi<float>;
     }
+
+    vector3_type r0;
 }
 
 }
diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
index f7ce67a1c9..59ba508596 100644
--- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl
+++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
@@ -49,7 +49,7 @@ struct SphericalTriangle
         cos_c = cos_sides[2];
         csc_b = csc_sides[1];
         csc_c = csc_sides[2];
-        
+
         // Both vertices and angles at the vertices are denoted by the same upper case letters A, B, and C. The angles A, B, C of the triangle are equal to the angles between the planes that intersect the surface of the sphere or, equivalently, the angles between the tangent vectors of the great circle arcs where they meet at the vertices. Angles are in radians. The angles of proper spherical triangles are (by convention) less than PI
         cos_vertices = clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) 
         sin_vertices = sqrt((vector3_type)1.f - cos_vertices * cos_vertices);
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index 5cc0108874..82d081138a 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -321,6 +321,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/box_muller_transform
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/cos_weighted.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_triangle.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/projected_spherical_triangle.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_rectangle.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/uniform.hlsl")
 #
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ndarray_addressing.hlsl")

From 11180f45db44a5599bbaa08f342686cce315713e Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Mon, 17 Feb 2025 16:58:42 +0700
Subject: [PATCH 06/39] fix aniso cache bug

---
 examples_tests                            | 2 +-
 include/nbl/builtin/hlsl/bxdf/common.hlsl | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/examples_tests b/examples_tests
index 83d8a92584..159d1533e8 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 83d8a92584c28d9ad31c9853f6abb59922bc2249
+Subproject commit 159d1533e8d82e3c5e82165e8b79ea67c0f23111
diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl
index 050366c6dc..71bde312fe 100644
--- a/include/nbl/builtin/hlsl/bxdf/common.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl
@@ -610,6 +610,7 @@ struct SAnisotropicMicrofacetCache : SIsotropicMicrofacetCache<U>
 
     using ray_dir_info_type = ray_dir_info::SBasic<scalar_type>;
     using anisotropic_type = surface_interactions::SAnisotropic<ray_dir_info_type>;
+    using isocache_type = SIsotropicMicrofacetCache<U>;
     using sample_type = SLightSample<ray_dir_info_type>;
 
     // always valid by construction
@@ -671,7 +672,9 @@ struct SAnisotropicMicrofacetCache : SIsotropicMicrofacetCache<U>
         const scalar_type orientedEta, const scalar_type rcpOrientedEta, NBL_REF_ARG(vector3_type) H
     )
     {
-        const bool valid = this_t::compute(retval,transmitted,V,L,N,NdotL,VdotL,orientedEta,rcpOrientedEta,H);
+        isocache_type iso = (isocache_type)retval;
+        const bool valid = isocache_type::compute(iso,transmitted,V,L,N,NdotL,VdotL,orientedEta,rcpOrientedEta,H);
+        retval = (this_t)iso;
         if (valid)
         {
             retval.TdotH = nbl::hlsl::dot<vector3_type>(T,H);
@@ -687,8 +690,10 @@ struct SAnisotropicMicrofacetCache : SIsotropicMicrofacetCache<U>
         const scalar_type eta
     )
     {
+        isocache_type iso = (isocache_type)retval;
         vector3_type H;
-        const bool valid = this_t::compute(retval,interaction,_sample,eta,H);
+        const bool valid = isocache_type::compute(iso,interaction,_sample,eta,H);
+        retval = (this_t)iso;
         if (valid)
         {
             retval.TdotH = nbl::hlsl::dot<vector3_type>(interaction.T,H);

From 451605197b486b8da5970d10274f63825c407256 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 18 Feb 2025 15:24:42 +0700
Subject: [PATCH 07/39] init func to modify bxdf params directly

---
 examples_tests                                |  2 +-
 include/nbl/builtin/hlsl/bxdf/common.hlsl     |  9 +++--
 include/nbl/builtin/hlsl/bxdf/reflection.hlsl | 34 +++++++++++++++---
 .../nbl/builtin/hlsl/bxdf/transmission.hlsl   | 36 ++++++++++++++++---
 4 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/examples_tests b/examples_tests
index 159d1533e8..a7350db7d7 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 159d1533e8d82e3c5e82165e8b79ea67c0f23111
+Subproject commit a7350db7d7e422fa5086982b3327103c06cfbe44
diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl
index 71bde312fe..f4ae69aafc 100644
--- a/include/nbl/builtin/hlsl/bxdf/common.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl
@@ -987,11 +987,10 @@ template<typename Scalar, typename Spectrum NBL_PRIMARY_REQUIRES(is_scalar_v<Sca
 struct SBxDFCreationParams
 {
     bool is_aniso;
-    Scalar A;
-    vector<Scalar, 2> Axy;
-    Spectrum ior0;
-    Spectrum ior1;
-    Scalar eta;
+    vector<Scalar, 2> A;    // roughness
+    Spectrum ior0;          // source ior
+    Spectrum ior1;          // destination ior
+    Scalar eta;             // in most cases, eta will be calculated from ior0 and ior1; see monochromeEta in pathtracer.hlsl
     Spectrum eta2;
     Spectrum luminosityContributionHint;
 };
diff --git a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl
index b074bcaddb..76d00c268c 100644
--- a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl
@@ -64,6 +64,11 @@ struct SLambertianBxDF
         return create();
     }
 
+    void init(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        // do nothing
+    }
+
     scalar_type __eval_pi_factored_out(scalar_type maxNdotL)
     {
         return maxNdotL;
@@ -124,7 +129,12 @@ struct SOrenNayarBxDF
 
     static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
     {
-        return create(params.A);
+        return create(params.A.x);
+    }
+
+    void init(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        A = params.A.x;
     }
 
     scalar_type __rec_pi_factored_out_wo_clamps(scalar_type VdotL, scalar_type maxNdotL, scalar_type maxNdotV)
@@ -355,9 +365,16 @@ struct SBeckmannBxDF
     static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
     {
         if (params.is_aniso)
-            return create(params.Axy.x, params.Axy.y, params.ior0, params.ior1);
+            return create(params.A.x, params.A.y, params.ior0, params.ior1);
         else
-            return create(params.A, params.ior0, params.ior1);
+            return create(params.A.x, params.ior0, params.ior1);
+    }
+
+    void init(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        A = params.A;
+        ior0 = params.ior0;
+        ior1 = params.ior1;
     }
 
     scalar_type __eval_DG_wo_clamps(params_t params)
@@ -591,9 +608,16 @@ struct SGGXBxDF
     static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
     {
         if (params.is_aniso)
-            return create(params.Axy.x, params.Axy.y, params.ior0, params.ior1);
+            return create(params.A.x, params.A.y, params.ior0, params.ior1);
         else
-            return create(params.A, params.ior0, params.ior1);
+            return create(params.A.x, params.ior0, params.ior1);
+    }
+
+    void init(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        A = params.A;
+        ior0 = params.ior0;
+        ior1 = params.ior1;
     }
 
     scalar_type __eval_DG_wo_clamps(params_t params)
diff --git a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl
index 18d80e93aa..99e7e5f6a8 100644
--- a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl
@@ -68,6 +68,11 @@ struct SLambertianBxDF
         return create();
     }
 
+    void init(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        // do nothing
+    }
+
     scalar_type __eval_pi_factored_out(scalar_type absNdotL)
     {
         return absNdotL;
@@ -134,6 +139,11 @@ struct SSmoothDielectricBxDF
         return create(params.eta);
     }
 
+    void init(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        eta = params.eta;
+    }
+
     spectral_type eval(params_t params)
     {
         return (spectral_type)0;
@@ -220,6 +230,12 @@ struct SSmoothDielectricBxDF<LightSample, IsoCache, AnisoCache, Spectrum, true>
         return create(params.eta2, params.luminosityContributionHint);
     }
 
+    void init(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        eta2 = params.eta2;
+        luminosityContributionHint = params.luminosityContributionHint;
+    }
+
     spectral_type eval(params_t params)
     {
         return (spectral_type)0;
@@ -317,9 +333,15 @@ struct SBeckmannDielectricBxDF
     static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
     {
         if (params.is_aniso)
-            return create(params.eta, params.Axy.x, params.Axy.y);
+            return create(params.eta, params.A.x, params.A.y);
         else
-            return create(params.eta, params.A);
+            return create(params.eta, params.A.x);
+    }
+
+    void init(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        A = params.A;
+        eta = params.eta;
     }
 
     spectral_type eval(params_t params)
@@ -492,9 +514,15 @@ struct SGGXDielectricBxDF
     static this_t create(SBxDFCreationParams<scalar_type, spectral_type> params)
     {
         if (params.is_aniso)
-            return create(params.eta, params.Axy.x, params.Axy.y);
+            return create(params.eta, params.A.x, params.A.y);
         else
-            return create(params.eta, params.A);
+            return create(params.eta, params.A.x);
+    }
+
+    void init(SBxDFCreationParams<scalar_type, spectral_type> params)
+    {
+        A = params.A;
+        eta = params.eta;
     }
 
     spectral_type eval(params_t params)

From d9a00c9998e9ec33a1cdcbf132bcbf5c1dfc7652 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 20 Feb 2025 16:55:35 +0700
Subject: [PATCH 08/39] bug fixes

---
 examples_tests                                |  2 +-
 .../nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl    |  2 +-
 include/nbl/builtin/hlsl/bxdf/common.hlsl     |  2 +-
 include/nbl/builtin/hlsl/bxdf/geom_smith.hlsl |  4 ++--
 include/nbl/builtin/hlsl/bxdf/reflection.hlsl |  8 +++----
 .../nbl/builtin/hlsl/bxdf/transmission.hlsl   |  9 +++++---
 .../hlsl/sampling/box_muller_transform.hlsl   |  2 +-
 .../hlsl/sampling/concentric_mapping.hlsl     |  2 +-
 .../projected_spherical_triangle.hlsl         | 12 +++++-----
 .../hlsl/sampling/spherical_triangle.hlsl     |  2 +-
 .../nbl/builtin/hlsl/shapes/rectangle.hlsl    | 22 +++++++++----------
 include/nbl/builtin/hlsl/shapes/triangle.hlsl |  4 ++--
 12 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/examples_tests b/examples_tests
index a7350db7d7..2f77555ce4 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit a7350db7d7e422fa5086982b3327103c06cfbe44
+Subproject commit 2f77555ce484c2f8ecb390e68fc3f4c830b23ef7
diff --git a/include/nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl b/include/nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl
index e63cf0113e..78b23830bc 100644
--- a/include/nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl
@@ -101,4 +101,4 @@ struct bxdf_traits<bxdf::transmission::SGGXDielectricBxDF<L, I, A, S> >
 }
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl
index f4ae69aafc..6acdbab74b 100644
--- a/include/nbl/builtin/hlsl/bxdf/common.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl
@@ -874,7 +874,7 @@ struct SBxDFParams
         return retval;
     }
 
-    template<class LightSample, class Aniso NBL_FUNC_REQUIRES(Sample<LightSample> && surface_interactions::Anisotropic<Iso>)
+    template<class LightSample, class Aniso NBL_FUNC_REQUIRES(Sample<LightSample> && surface_interactions::Anisotropic<Aniso>)
     static SBxDFParams<Scalar> create(LightSample _sample, Aniso interaction, BxDFClampMode clamp = BCM_NONE)
     {
         this_t retval;
diff --git a/include/nbl/builtin/hlsl/bxdf/geom_smith.hlsl b/include/nbl/builtin/hlsl/bxdf/geom_smith.hlsl
index 61aa10399c..5a6f6cdf26 100644
--- a/include/nbl/builtin/hlsl/bxdf/geom_smith.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/geom_smith.hlsl
@@ -236,12 +236,12 @@ struct GGX
 
     scalar_type G1_wo_numerator(scalar_type NdotX, scalar_type NdotX2, scalar_type a2, scalar_type one_minus_a2)
     {
-        return 1.0 / (NdotX + ggx_devsh_part<T>(NdotX2,a2,one_minus_a2));
+        return 1.0 / (NdotX + devsh_part(NdotX2,a2,one_minus_a2));
     }
 
     scalar_type G1_wo_numerator(scalar_type NdotX, scalar_type TdotX2, scalar_type BdotX2, scalar_type NdotX2, scalar_type ax2, scalar_type ay2)
     {
-        return 1.0 / (NdotX + ggx_devsh_part<T>(TdotX2, BdotX2, NdotX2, ax2, ay2));
+        return 1.0 / (NdotX + devsh_part(TdotX2, BdotX2, NdotX2, ax2, ay2));
     }
 
     scalar_type G1_wo_numerator(scalar_type NdotX, scalar_type devsh_part)
diff --git a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl
index 76d00c268c..8f4da11b05 100644
--- a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl
@@ -232,7 +232,7 @@ struct SBlinnPhongBxDF
             ndf::SAnisotropicParams<scalar_type> ndfparams = ndf::SAnisotropicParams<scalar_type>::create(params.NdotH, 1.0 / (1.0 - params.NdotH2), params.TdotH2, params.BdotH2, n.x, n.y);
             ndf::BlinnPhong<scalar_type> blinn_phong;
             scalar_type DG = blinn_phong(ndfparams);
-            if (any<vector<bool, 2>>(a2 > (vector2_type)numeric_limits<scalar_type>::min))
+            if (any<vector<bool, 2> >(a2 > (vector2_type)numeric_limits<scalar_type>::min))
             {
                 smith::SAnisotropicParams<scalar_type> smithparams = smith::SAnisotropicParams<scalar_type>::create(a2.x, a2.y, params.TdotV2, params.BdotV2, params.NdotV2, params.TdotL2, params.BdotL2, params.NdotL2, 0);
                 smith::Beckmann<scalar_type> beckmann;
@@ -245,7 +245,7 @@ struct SBlinnPhongBxDF
             ndf::SIsotropicParams<scalar_type> ndfparams = ndf::SIsotropicParams<scalar_type>::create(n, params.NdotH, params.NdotH2);
             ndf::BlinnPhong<scalar_type> blinn_phong;
             scalar_type NG = blinn_phong(ndfparams);
-            if (any<vector<bool, 2>>(a2 > (vector2_type)numeric_limits<scalar_type>::min))
+            if (any<vector<bool, 2> >(a2 > (vector2_type)numeric_limits<scalar_type>::min))
             {
                 smith::SIsotropicParams<scalar_type> smithparams = smith::SIsotropicParams<scalar_type>::create(a2.x, params.NdotV2, params.NdotL2, 0);
                 smith::Beckmann<scalar_type> beckmann;
@@ -386,7 +386,7 @@ struct SBeckmannBxDF
             ndf::SAnisotropicParams<scalar_type> ndfparams = ndf::SAnisotropicParams<scalar_type>::create(A.x, A.y, ax2, ay2, params.TdotH2, params.BdotH2, params.NdotH2);
             ndf::Beckmann<scalar_type> beckmann_ndf;
             scalar_type NG = beckmann_ndf(ndfparams);
-            if (any<vector<bool, 2>>(A > (vector2_type)numeric_limits<scalar_type>::min))
+            if (any<vector<bool, 2> >(A > (vector2_type)numeric_limits<scalar_type>::min))
             {
                 smith::SAnisotropicParams<scalar_type> smithparams = smith::SAnisotropicParams<scalar_type>::create(ax2, ay2, params.TdotV2, params.BdotV2, params.NdotV2, params.TdotL2, params.BdotL2, params.NdotL2, 0);
                 smith::Beckmann<scalar_type> beckmann_smith;
@@ -629,7 +629,7 @@ struct SGGXBxDF
             ndf::SAnisotropicParams<scalar_type> ndfparams = ndf::SAnisotropicParams<scalar_type>::create(A.x, A.y, ax2, ay2, params.TdotH2, params.BdotH2, params.NdotH2);
             ndf::GGX<scalar_type> ggx_ndf;
             scalar_type NG = ggx_ndf(ndfparams);
-            if (any<vector<bool, 2>>(A > (vector2_type)numeric_limits<scalar_type>::min))
+            if (any<vector<bool, 2> >(A > (vector2_type)numeric_limits<scalar_type>::min))
             {
                 smith::SAnisotropicParams<scalar_type> smithparams = smith::SAnisotropicParams<scalar_type>::create(ax2, ay2, params.NdotV, params.TdotV2, params.BdotV2, params.NdotV2, params.NdotL, params.TdotL2, params.BdotL2, params.NdotL2);
                 smith::GGX<scalar_type> ggx_smith;
diff --git a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl
index 99e7e5f6a8..1d6bf8ead6 100644
--- a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl
@@ -110,8 +110,11 @@ struct SLambertianBxDF
 
 
 // microfacet bxdfs
-template<class LightSample, class IsoCache, class AnisoCache, class Spectrum, bool thin = false NBL_FUNC_REQUIRES(Sample<LightSample> && IsotropicMicrofacetCache<IsoCache> && AnisotropicMicrofacetCache<AnisoCache>)
-struct SSmoothDielectricBxDF
+template<class LightSample, class IsoCache, class AnisoCache, class Spectrum, bool thin> // NBL_FUNC_REQUIRES(Sample<LightSample> && IsotropicMicrofacetCache<IsoCache> && AnisotropicMicrofacetCache<AnisoCache>) // dxc won't let me put this in
+struct SSmoothDielectricBxDF;
+
+template<class LightSample, class IsoCache, class AnisoCache, class Spectrum>
+struct SSmoothDielectricBxDF<LightSample, IsoCache, AnisoCache, Spectrum, false>
 {
     using this_t = SSmoothDielectricBxDF<LightSample, IsoCache, AnisoCache, Spectrum, false>;
     using scalar_type = typename LightSample::scalar_type;
@@ -200,7 +203,7 @@ struct SSmoothDielectricBxDF
     scalar_type eta;
 };
 
-template<class LightSample, class IsoCache, class AnisoCache, class Spectrum NBL_FUNC_REQUIRES(Sample<LightSample> && IsotropicMicrofacetCache<IsoCache> && AnisotropicMicrofacetCache<AnisoCache>)
+template<class LightSample, class IsoCache, class AnisoCache, class Spectrum>
 struct SSmoothDielectricBxDF<LightSample, IsoCache, AnisoCache, Spectrum, true>
 {
     using this_t = SSmoothDielectricBxDF<LightSample, IsoCache, AnisoCache, Spectrum, true>;
diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
index efa8d66e2b..57a18589fd 100644
--- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
@@ -24,4 +24,4 @@ vector<T,2> boxMullerTransform(vector<T,2> xi, T stddev)
 }
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl
index 2b06581740..60865e7c8e 100644
--- a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl
@@ -39,4 +39,4 @@ vector<T,2> concentricMapping(vector<T,2> _u)
 }
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
index 5832e9aab2..945ca053b8 100644
--- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
@@ -25,14 +25,14 @@ struct ProjectedSphericalTriangle
     using vector3_type = vector<T, 3>;
     using vector4_type = vector<T, 4>;
 
-    static ProjectedSphericalTriangle<T> create(NBL_CONST_REG_ARG(shapes::SphericalTriangle<T>) tri)
+    static ProjectedSphericalTriangle<T> create(NBL_CONST_REF_ARG(shapes::SphericalTriangle<T>) tri)
     {
         ProjectedSphericalTriangle<T> retval;
         retval.tri = tri;
         return retval;
     }
 
-    vector4_type computeBilinearPatch(NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool isBSDF)
+    vector4_type computeBilinearPatch(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF)
     {
         const scalar_type minimumProjSolidAngle = 0.0;
     
@@ -42,7 +42,7 @@ struct ProjectedSphericalTriangle
         return bxdfPdfAtVertex.yyxz;
     }
 
-    vector3_type generate(NBL_REG_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REG_ARG(vector3_type) cos_vertices, NBL_CONST_REG_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REG_ARG(vector2_type) u)
+    vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) u)
     {
         // pre-warp according to proj solid angle approximation
         vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF);
@@ -56,7 +56,7 @@ struct ProjectedSphericalTriangle
         return L;
     }
 
-    vector3_type generate(NBL_REG_ARG(scalar_type) rcpPdf, NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REG_ARG(vector2_type) u)
+    vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) u)
     {
         scalar_type cos_a, cos_c, csc_b, csc_c;
         vector3_type cos_vertices, sin_vertices;
@@ -64,7 +64,7 @@ struct ProjectedSphericalTriangle
         return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u);
     }
 
-    scalar_type pdf(scalar_type solidAngle, NBL_CONST_REG_ARG(vector3_type) cos_vertices, NBL_CONST_REG_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REG_ARG(vector3_type) L)
+    scalar_type pdf(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L)
     {
         scalar_type pdf;
         const vector2_type u = tri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L);
@@ -74,7 +74,7 @@ struct ProjectedSphericalTriangle
         return pdf * bilinear.pdf(u);
     }
 
-    scalar_type pdf(NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REG_ARG(vector3_type) L)
+    scalar_type pdf(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L)
     {
         scalar_type pdf;
         const vector2_type u = tri.generateInverse(pdf, L);
diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
index 9501cdc3d1..1d4fda454d 100644
--- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
@@ -24,7 +24,7 @@ struct SphericalTriangle
     using vector2_type = vector<T, 2>;
     using vector3_type = vector<T, 3>;
 
-    static SphericalTriangle<T> create(NBL_CONST_REG_ARG(shapes::SphericalTriangle<T>) tri)
+    static SphericalTriangle<T> create(NBL_CONST_REF_ARG(shapes::SphericalTriangle<T>) tri)
     {
         SphericalTriangle<T> retval;
         retval.tri = tri;
diff --git a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
index a61f23cafa..47d3927f31 100644
--- a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
+++ b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
@@ -16,25 +16,25 @@ namespace hlsl
 namespace shapes
 {
 
-template<typename T>
+template<typename Scalar>
 struct SphericalRectangle
 {
-    using scalar_type = T;
-    using vector3_type = vector<T, 3>;
-    using vector4_type = vector<T, 4>;
-    using matrix3x3_type = matrix<T, 3, 3>;
+    using scalar_type = Scalar;
+    using vector3_type = vector<Scalar, 3>;
+    using vector4_type = vector<Scalar, 4>;
+    using matrix3x3_type = matrix<Scalar, 3, 3>;
 
-    static SphericalRectangle<T> create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(matrix3x3_type) basis)
+    static SphericalRectangle<scalar_type> create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(matrix3x3_type) basis)
     {
-        SphericalRectangle<T> retval;
+        SphericalRectangle<scalar_type> retval;
         retval.r0 = nbl::hlsl::mul(basis, rectangleOrigin - observer);
         return retval;
     }
 
-    static SphericalRectangle<T> create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(vector3_type) T, NBL_CONST_REF_ARG(vector3_type) B, NBL_CONST_REF_ARG(vector3_type) N)
+    static SphericalRectangle<Scalar> create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(vector3_type) T, NBL_CONST_REF_ARG(vector3_type) B, NBL_CONST_REF_ARG(vector3_type) N)
     {
-        SphericalRectangle<T> retval;
-        matrix3x3_type TBN = nbl::hlsl::transpose<matrix3x3_type>(matrix3x3_type(T, B, isotropic_type::N));
+        SphericalRectangle<scalar_type> retval;
+        matrix3x3_type TBN = nbl::hlsl::transpose<matrix3x3_type>(matrix3x3_type(T, B, N));
         retval.r0 = nbl::hlsl::mul(TBN, rectangleOrigin - observer);
         return retval;
     }
@@ -53,7 +53,7 @@ struct SphericalRectangle
     }
 
     vector3_type r0;
-}
+};
 
 }
 }
diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
index 59ba508596..d904ed7246 100644
--- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl
+++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
@@ -80,8 +80,8 @@ struct SphericalTriangle
         // TODO: above dot products are in the wrong order, either work out which is which, or try all 6 permutations till it works
         cos_vertices = nbl::hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f);
 
-        matrix<scalar_type, 3, 3> mat = 
-        const vector3_type externalProducts = nbl::hlsl::abs(nbl::hlsl::transpose(awayFromEdgePlane) * receiverNormal);
+        matrix<scalar_type, 3, 3> awayFromEdgePlane = matrix<scalar_type, 3, 3>(awayFromEdgePlane0, awayFromEdgePlane1, awayFromEdgePlane2);
+        const vector3_type externalProducts = nbl::hlsl::abs(/* transposed already */awayFromEdgePlane * receiverNormal);
 
         const vector3_type pyramidAngles = acos(cos_sides);
         return nbl::hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi<float>);

From ffe9029c78af7b2d1bf4d77cbf900d632960edd9 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 21 Feb 2025 14:16:59 +0700
Subject: [PATCH 09/39] fix sampling bugs #2

---
 examples_tests                                                | 2 +-
 include/nbl/builtin/hlsl/sampling/bilinear.hlsl               | 2 +-
 include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl   | 2 +-
 include/nbl/builtin/hlsl/sampling/linear.hlsl                 | 4 ++--
 .../builtin/hlsl/sampling/projected_spherical_triangle.hlsl   | 1 +
 include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl    | 2 +-
 6 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/examples_tests b/examples_tests
index 2f77555ce4..99aed4777c 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 2f77555ce484c2f8ecb390e68fc3f4c830b23ef7
+Subproject commit 99aed4777c208c5acc4e66bb7ea8dc48f814c8d0
diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl
index 1d5f9a91e8..3542e2dfef 100644
--- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl
@@ -31,7 +31,7 @@ struct Bilinear
         return retval;
     }
 
-    vector2_type generate(NBL_REG_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u)
+    vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u)
     {
         const vector2_type twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]);
         Linear<scalar_type> lineary = Linear<scalar_type>::create(twiceAreasUnderXCurve);
diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
index 57a18589fd..dcac2279be 100644
--- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl
@@ -17,7 +17,7 @@ template<typename T>
 vector<T,2> boxMullerTransform(vector<T,2> xi, T stddev)
 {
     T sinPhi, cosPhi;
-    nbl::hlsl::sincos<T>(2.0 * numbers::pi<float> * xi.y - numbers::pi<float>, sinPhi, cosPhi);
+    math::sincos<T>(2.0 * numbers::pi<float> * xi.y - numbers::pi<float>, sinPhi, cosPhi);
     return vector<T,2>(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev;
 }
 
diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl
index 8b9b3fb058..12d445eefe 100644
--- a/include/nbl/builtin/hlsl/sampling/linear.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl
@@ -2,8 +2,8 @@
 // This file is part of the "Nabla Engine".
 // For conditions of distribution and use, see copyright notice in nabla.h
 
-#ifndef _NBL_BUILTIN_HLSL_SAMPLING_BILINEAR_INCLUDED_
-#define _NBL_BUILTIN_HLSL_SAMPLING_BILINEAR_INCLUDED_
+#ifndef _NBL_BUILTIN_HLSL_SAMPLING_LINEAR_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SAMPLING_LINEAR_INCLUDED_
 
 #include <nbl/builtin/hlsl/cpp_compat.hlsl>
 #include <nbl/builtin/hlsl/limits.hlsl>
diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
index 945ca053b8..cfc96dc9cb 100644
--- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
@@ -8,6 +8,7 @@
 #include <nbl/builtin/hlsl/cpp_compat.hlsl>
 #include <nbl/builtin/hlsl/limits.hlsl>
 #include <nbl/builtin/hlsl/math/functions.hlsl>
+#include <nbl/builtin/hlsl/sampling/bilinear.hlsl>
 #include <nbl/builtin/hlsl/sampling/spherical_triangle.hlsl>
 
 namespace nbl
diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
index 83224bfabd..c42bf8e464 100644
--- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
@@ -25,7 +25,7 @@ struct SphericalRectangle
     using vector3_type = vector<T, 3>;
     using vector4_type = vector<T, 4>;
 
-    static SphericalRectangle<T> create(NBL_CONST_REG_ARG(shapes::SphericalRectangle<T>) rect)
+    static SphericalRectangle<T> create(NBL_CONST_REF_ARG(shapes::SphericalRectangle<T>) rect)
     {
         SphericalRectangle<T> retval;
         retval.rect = rect;

From b2073128e066d3c068362c9b505a2fe14e1f43fb Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 21 Feb 2025 16:57:49 +0700
Subject: [PATCH 10/39] fix rank type trait for matrix/vector

---
 examples_tests                            |  2 +-
 include/nbl/builtin/hlsl/type_traits.hlsl | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/examples_tests b/examples_tests
index a1a8ec03bc..a802a97943 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit a1a8ec03bcc90c0f46ffba6d7c50e05a633834b4
+Subproject commit a802a97943bd9e17187a306f8058c21d2774678b
diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl
index 3443a9d9b7..17d0ed827e 100644
--- a/include/nbl/builtin/hlsl/type_traits.hlsl
+++ b/include/nbl/builtin/hlsl/type_traits.hlsl
@@ -612,7 +612,19 @@ NBL_CONSTEXPR bool is_matrix_v = is_matrix<T>::value;
 
 #ifdef __HLSL_VERSION
 template<class T>
-struct rank : integral_constant<uint64_t, is_matrix<T>::value ? 2 : (is_vector<T>::value ? 1 : 0)> { };
+struct rank : integral_constant<uint64_t,
+    conditional_value<
+        is_matrix_v<T>,
+        uint64_t,
+        2ull,
+        conditional_value<
+            is_vector_v<T>,
+            uint64_t,
+            1ull,
+            0ull
+        >::value
+    >::value
+> { };
 
 template<class T, uint64_t N>
 struct rank<T[N]> : integral_constant<uint64_t, 1 + rank<T>::value> { };

From 69a257d878df518902acd56b3be52e8a28c35337 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 25 Feb 2025 16:57:01 +0700
Subject: [PATCH 11/39] temporary fix for dxc bug issue 7154

---
 examples_tests                            |  2 +-
 include/nbl/builtin/hlsl/bxdf/common.hlsl | 38 +++++++++++------------
 include/nbl/builtin/hlsl/limits.hlsl      |  4 +--
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/examples_tests b/examples_tests
index 3827fd3c33..f97757bffc 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 3827fd3c330eaac4c11478330cfb0f4a362f99c6
+Subproject commit f97757bffcc28ad208a10dfb485214b8d9e1fdd1
diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl
index 6acdbab74b..6e5174f73c 100644
--- a/include/nbl/builtin/hlsl/bxdf/common.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl
@@ -312,8 +312,8 @@ NBL_CONCEPT_END(
     ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::createFromTangentSpace(pV,rdirinfo,frame)), ::nbl::hlsl::is_same_v, T))
     ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::create(rdirinfo,pVdotL,pV)), ::nbl::hlsl::is_same_v, T))
     ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::create(rdirinfo,pVdotL,pV,pV,pV)), ::nbl::hlsl::is_same_v, T))
-    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::template create<typename T::ray_dir_info_type>(pV,iso)), ::nbl::hlsl::is_same_v, T))
-    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::template create<typename T::ray_dir_info_type>(pV,aniso)), ::nbl::hlsl::is_same_v, T))
+    //((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::template create<typename T::ray_dir_info_type>(pV,iso)), ::nbl::hlsl::is_same_v, T)) // NOTE: temporarily commented out due to dxc bug https://github.com/microsoft/DirectXShaderCompiler/issues/7154
+    //((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::template create<typename T::ray_dir_info_type>(pV,aniso)), ::nbl::hlsl::is_same_v, T))
     ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.getTangentSpaceL()), ::nbl::hlsl::is_same_v, typename T::vector3_type))
 ) && surface_interactions::Anisotropic<typename T::anisotropic_type> && surface_interactions::Isotropic<typename T::isotropic_type> &&
     ray_dir_info::Basic<typename T::ray_dir_info_type>;
@@ -380,21 +380,21 @@ struct SLightSample
         
         return retval;
     }
-    // overloads for surface_interactions
-    template<class ObserverRayDirInfo>
-    static this_t create(NBL_CONST_REF_ARG(vector3_type) L, NBL_CONST_REF_ARG(surface_interactions::SIsotropic<ObserverRayDirInfo>) interaction)
-    {
-        const vector3_type V = interaction.V.getDirection();
-        const scalar_type VdotL = nbl::hlsl::dot<vector3_type>(V,L);
-        return create(L, VdotL, interaction.N);
-    }
-    template<class ObserverRayDirInfo>
-    static this_t create(NBL_CONST_REF_ARG(vector3_type) L, NBL_CONST_REF_ARG(surface_interactions::SAnisotropic<ObserverRayDirInfo>) interaction)
-    {
-        const vector3_type V = interaction.V.getDirection();
-        const scalar_type VdotL = nbl::hlsl::dot<vector3_type>(V,L);
-        return create(L,VdotL,interaction.T,interaction.B,interaction.N);
-    }
+    // overloads for surface_interactions, NOTE: temporarily commented out due to dxc bug https://github.com/microsoft/DirectXShaderCompiler/issues/7154
+    // template<class ObserverRayDirInfo>
+    // static this_t create(NBL_CONST_REF_ARG(vector3_type) L, NBL_CONST_REF_ARG(surface_interactions::SIsotropic<ObserverRayDirInfo>) interaction)
+    // {
+    //     const vector3_type V = interaction.V.getDirection();
+    //     const scalar_type VdotL = nbl::hlsl::dot<vector3_type>(V,L);
+    //     return create(L, VdotL, interaction.N);
+    // }
+    // template<class ObserverRayDirInfo>
+    // static this_t create(NBL_CONST_REF_ARG(vector3_type) L, NBL_CONST_REF_ARG(surface_interactions::SAnisotropic<ObserverRayDirInfo>) interaction)
+    // {
+    //     const vector3_type V = interaction.V.getDirection();
+    //     const scalar_type VdotL = nbl::hlsl::dot<vector3_type>(V,L);
+    //     return create(L,VdotL,interaction.T,interaction.B,interaction.N);
+    // }
     //
     vector3_type getTangentSpaceL() NBL_CONST_MEMBER_FUNC
     {
@@ -875,7 +875,7 @@ struct SBxDFParams
     }
 
     template<class LightSample, class Aniso NBL_FUNC_REQUIRES(Sample<LightSample> && surface_interactions::Anisotropic<Aniso>)
-    static SBxDFParams<Scalar> create(LightSample _sample, Aniso interaction, BxDFClampMode clamp = BCM_NONE)
+    static this_t create(LightSample _sample, Aniso interaction, BxDFClampMode clamp = BCM_NONE)
     {
         this_t retval;
         retval.NdotV = clamp == BCM_ABS ? abs<Scalar>(interaction.NdotV) : 
@@ -922,7 +922,7 @@ struct SBxDFParams
     }
 
     template<class LightSample, class Aniso, class Cache NBL_FUNC_REQUIRES(Sample<LightSample> && surface_interactions::Anisotropic<Aniso> && AnisotropicMicrofacetCache<Cache>)
-    static SBxDFParams<Scalar> create(LightSample _sample, Aniso interaction, Cache cache, BxDFClampMode clamp = BCM_NONE)
+    static this_t create(LightSample _sample, Aniso interaction, Cache cache, BxDFClampMode clamp = BCM_NONE)
     {
         this_t retval;
         retval.NdotH = cache.NdotH;
diff --git a/include/nbl/builtin/hlsl/limits.hlsl b/include/nbl/builtin/hlsl/limits.hlsl
index 146957dc3e..5fe682c9e3 100644
--- a/include/nbl/builtin/hlsl/limits.hlsl
+++ b/include/nbl/builtin/hlsl/limits.hlsl
@@ -129,7 +129,7 @@ struct num_base : type_identity<T>
     NBL_CONSTEXPR_STATIC_INLINE int32_t float_max_decimal_exponent = 4*S16 + 30*S32 + 232*S64;
     
     NBL_CONSTEXPR_STATIC_INLINE int32_t float_exponent_bits = 8 * size - 1 - (float_digits-1);
-    NBL_CONSTEXPR_STATIC_INLINE int32_t float_max_exponent = 1 << (float_exponent_bits-1);
+    NBL_CONSTEXPR_STATIC_INLINE int32_t float_max_exponent = int32_t(1) << (float_exponent_bits-1);
     NBL_CONSTEXPR_STATIC_INLINE int32_t float_min_exponent = 3 - float_max_exponent;
     NBL_CONSTEXPR_STATIC_INLINE bool is_bool = is_same<T, bool>::value;
 
@@ -146,7 +146,7 @@ struct num_base : type_identity<T>
 
     // (TODO) think about what this means for HLSL
     // identifies floating-point types that can represent the special value "quiet not-a-number" (NaN)
-    NBL_CONSTEXPR_STATIC_INLINE bool has_quiet_NaN = !is_integer; 
+    NBL_CONSTEXPR_STATIC_INLINE bool has_quiet_NaN = !is_integer;
     // 	identifies floating-point types that can represent the special value "signaling not-a-number" (NaN)
     NBL_CONSTEXPR_STATIC_INLINE bool has_signaling_NaN = !is_integer;
     // 	identifies the denormalization style used by the floating-point type

From 866e6d7f15ce25e0573341a5d8631354f5b081a0 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 26 Feb 2025 16:55:23 +0700
Subject: [PATCH 12/39] some bug fixes again

---
 examples_tests                                |  2 +-
 include/nbl/builtin/hlsl/bxdf/reflection.hlsl | 20 ++++++------
 .../nbl/builtin/hlsl/bxdf/transmission.hlsl   | 32 +++++++++----------
 .../nbl/builtin/hlsl/sampling/bilinear.hlsl   |  3 +-
 .../hlsl/sampling/concentric_mapping.hlsl     |  2 +-
 .../projected_spherical_triangle.hlsl         | 12 ++++---
 .../hlsl/sampling/spherical_rectangle.hlsl    |  2 +-
 .../nbl/builtin/hlsl/shapes/rectangle.hlsl    |  2 +-
 include/nbl/builtin/hlsl/shapes/triangle.hlsl |  2 +-
 9 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/examples_tests b/examples_tests
index f97757bffc..8e759f24d5 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit f97757bffcc28ad208a10dfb485214b8d9e1fdd1
+Subproject commit 8e759f24d5b386291660f50af1c04efbff3eff08
diff --git a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl
index 8f4da11b05..4609216144 100644
--- a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl
@@ -18,13 +18,13 @@ namespace reflection
 {
 
 // still need these?
-template<class LightSample, class Iso, class Aniso, class RayDirInfo, typename Scalar 
+template<class LightSample, class Iso, class Aniso, class RayDirInfo, typename Scalar
     NBL_FUNC_REQUIRES(Sample<LightSample> && surface_interactions::Isotropic<Iso> && surface_interactions::Anisotropic<Aniso> && ray_dir_info::Basic<RayDirInfo> && is_scalar_v<Scalar>)
 LightSample cos_generate(NBL_CONST_REF_ARG(Iso) interaction)
 {
     return LightSample(interaction.V.reflect(interaction.N,interaction.NdotV),interaction.NdotV,interaction.N);
 }
-template<class LightSample, class Iso, class Aniso, class RayDirInfo, typename Scalar 
+template<class LightSample, class Iso, class Aniso, class RayDirInfo, typename Scalar
     NBL_FUNC_REQUIRES(Sample<LightSample> && surface_interactions::Isotropic<Iso> && surface_interactions::Anisotropic<Aniso> && ray_dir_info::Basic<RayDirInfo> && is_scalar_v<Scalar>)
 LightSample cos_generate(NBL_CONST_REF_ARG(Aniso) interaction)
 {
@@ -441,14 +441,14 @@ struct SBeckmannBxDF
             scalar_type sinTheta = sqrt<scalar_type>(1.0 - cosTheta * cosTheta);
             scalar_type tanTheta = sinTheta / cosTheta;
             scalar_type cotTheta = 1.0 / tanTheta;
-            
+
             scalar_type a = -1.0;
             scalar_type c = erf<scalar_type>(cosTheta);
             scalar_type sample_x = max<scalar_type>(u.x, 1.0e-6);
             scalar_type theta = acos<scalar_type>(cosTheta);
             scalar_type fit = 1.0 + theta * (-0.876 + theta * (0.4265 - 0.0594*theta));
             scalar_type b = c - (1.0 + c) * pow<scalar_type>(1.0-sample_x, fit);
-            
+
             scalar_type normalization = 1.0 / (1.0 + c + numbers::inv_sqrtpi<scalar_type> * tanTheta * exp<scalar_type>(-cosTheta*cosTheta));
 
             const int ITER_THRESHOLD = 10;
@@ -475,7 +475,7 @@ struct SBeckmannBxDF
             slope.x = erfInv<scalar_type>(b);
             slope.y = erfInv<scalar_type>(2.0 * max<scalar_type>(u.y, 1.0e-6) - 1.0);
         }
-        
+
         scalar_type sinTheta = sqrt<scalar_type>(1.0 - V.z*V.z);
         scalar_type cosPhi = sinTheta==0.0 ? 1.0 : clamp<scalar_type>(V.x/sinTheta, -1.0, 1.0);
         scalar_type sinPhi = sinTheta==0.0 ? 0.0 : clamp<scalar_type>(V.y/sinTheta, -1.0, 1.0);
@@ -494,7 +494,7 @@ struct SBeckmannBxDF
     {
         const vector3_type localV = interaction.getTangentSpaceV();
         const vector3_type H = __generate(localV, u);
-        
+
         cache = anisocache_type::create(localV, H);
         ray_dir_info_type localL;
         localL.direction = math::reflect<scalar_type>(localV, H, cache.VdotH);
@@ -558,7 +558,7 @@ struct SBeckmannBxDF
             const spectral_type reflectance = fresnelConductor<spectral_type>(ior0, ior1, params.VdotH);
             quo = reflectance * G2_over_G1;
         }
-        
+
         return quotient_pdf_type::create(quo, _pdf);
     }
 
@@ -679,7 +679,7 @@ struct SGGXBxDF
         scalar_type t2 = r * sin<scalar_type>(phi);
         scalar_type s = 0.5 * (1.0 + V.z);
         t2 = (1.0 - s)*sqrt<scalar_type>(1.0 - t1*t1) + s*t2;
-        
+
         //reprojection onto hemisphere
         //TODO try it wothout the max(), not sure if -t1*t1-t2*t2>-1.0
         vector3_type H = t1*T1 + t2*T2 + sqrt<scalar_type>(max<scalar_type>(0.0, 1.0-t1*t1-t2*t2))*V;
@@ -691,7 +691,7 @@ struct SGGXBxDF
     {
         const vector3_type localV = interaction.getTangentSpaceV();
         const vector3_type H = __generate(localV, u);
-        
+
         cache = anisocache_type::create(localV, H);
         ray_dir_info_type localL;
         localL.direction = math::reflect<scalar_type>(localV, H, cache.VdotH);
@@ -753,7 +753,7 @@ struct SGGXBxDF
             const spectral_type reflectance = fresnelConductor<spectral_type>(ior0, ior1, params.VdotH);
             quo = reflectance * G2_over_G1;
         }
-        
+
         return quotient_pdf_type::create(quo, _pdf);
     }
 
diff --git a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl
index 1d6bf8ead6..38de552f00 100644
--- a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl
@@ -17,13 +17,13 @@ namespace bxdf
 namespace transmission
 {
 
-template<class LightSample, class Iso, class Aniso, class RayDirInfo, typename Scalar 
+template<class LightSample, class Iso, class Aniso, class RayDirInfo, typename Scalar
         NBL_FUNC_REQUIRES(Sample<LightSample> && surface_interactions::Isotropic<Iso> && surface_interactions::Anisotropic<Aniso> && ray_dir_info::Basic<RayDirInfo> && is_scalar_v<Scalar>)
 LightSample cos_generate(NBL_CONST_REF_ARG(Iso) interaction)
 {
     return LightSample(interaction.V.transmit(),-1.f,interaction.N);
 }
-template<class LightSample, class Iso, class Aniso, class RayDirInfo, typename Scalar 
+template<class LightSample, class Iso, class Aniso, class RayDirInfo, typename Scalar
     NBL_FUNC_REQUIRES(Sample<LightSample> && surface_interactions::Isotropic<Iso> && surface_interactions::Anisotropic<Aniso> && ray_dir_info::Basic<RayDirInfo> && is_scalar_v<Scalar>)
 LightSample cos_generate(NBL_CONST_REF_ARG(Aniso) interaction)
 {
@@ -169,7 +169,7 @@ struct SSmoothDielectricBxDF<LightSample, IsoCache, AnisoCache, Spectrum, false>
         scalar_type orientedEta, rcpOrientedEta;
         const bool backside = math::getOrientedEtas<scalar_type>(orientedEta, rcpOrientedEta, interaction.NdotV, eta);
         bool dummy;
-        return __generate_wo_clamps(interaction.V.direction, interaction.T, interaction.B, interaction.N, backside, interaction.NdotV, 
+        return __generate_wo_clamps(interaction.V.direction, interaction.T, interaction.B, interaction.N, backside, interaction.NdotV,
             interaction.NdotV, interaction.NdotV*interaction.NdotV, u, rcpOrientedEta, orientedEta*orientedEta, rcpOrientedEta*rcpOrientedEta, dummy);
     }
 
@@ -178,7 +178,7 @@ struct SSmoothDielectricBxDF<LightSample, IsoCache, AnisoCache, Spectrum, false>
         scalar_type orientedEta, rcpOrientedEta;
         const bool backside = math::getOrientedEtas<scalar_type>(orientedEta, rcpOrientedEta, interaction.NdotV, eta);
         bool dummy;
-        return __generate_wo_clamps(interaction.V.direction, interaction.T, interaction.B, interaction.N, backside, interaction.NdotV, 
+        return __generate_wo_clamps(interaction.V.direction, interaction.T, interaction.B, interaction.N, backside, interaction.NdotV,
             nbl::hlsl::abs<scalar_type>(interaction.NdotV), interaction.NdotV*interaction.NdotV, u, rcpOrientedEta, orientedEta*orientedEta, rcpOrientedEta*rcpOrientedEta, dummy);
     }
 
@@ -191,7 +191,7 @@ struct SSmoothDielectricBxDF<LightSample, IsoCache, AnisoCache, Spectrum, false>
     quotient_pdf_type quotient_and_pdf(params_t params)
     {
         const bool transmitted = isTransmissionPath(params.uNdotV, params.uNdotL);
-        
+
         scalar_type dummy, rcpOrientedEta;
         const bool backside = math::getOrientedEtas<scalar_type>(dummy, rcpOrientedEta, params.NdotV, eta);
 
@@ -245,7 +245,7 @@ struct SSmoothDielectricBxDF<LightSample, IsoCache, AnisoCache, Spectrum, true>
     }
 
     // usually `luminosityContributionHint` would be the Rec.709 luma coefficients (the Y row of the RGB to CIE XYZ matrix)
-    // its basically a set of weights that determine 
+    // its basically a set of weights that determine
     // assert(1.0==luminosityContributionHint.r+luminosityContributionHint.g+luminosityContributionHint.b);
     // `remainderMetadata` is a variable which the generator function returns byproducts of sample generation that would otherwise have to be redundantly calculated `quotient_and_pdf`
     sample_type __generate_wo_clamps(vector3_type V, vector3_type T, vector3_type B, vector3_type N, scalar_type NdotV, scalar_type absNdotV, NBL_REF_ARG(vector3_type) u, spectral_type eta2, spectral_type luminosityContributionHint, NBL_REF_ARG(spectral_type) remainderMetadata)
@@ -259,7 +259,7 @@ struct SSmoothDielectricBxDF<LightSample, IsoCache, AnisoCache, Spectrum, true>
         scalar_type rcpChoiceProb;
         const bool transmitted = math::partitionRandVariable(reflectionProb, u.z, rcpChoiceProb);
         remainderMetadata = (transmitted ? ((spectral_type)(1.0) - reflectance) : reflectance) * rcpChoiceProb;
-        
+
         ray_dir_info_type L;
         L.direction = (transmitted ? (vector3_type)(0.0) : N * 2.0f * NdotV) - V;
         return sample_type::create(L, nbl::hlsl::dot<vector3_type>(V, L.direction), T, B, N);
@@ -352,7 +352,7 @@ struct SBeckmannDielectricBxDF
         scalar_type orientedEta, dummy;
         const bool backside = math::getOrientedEtas<scalar_type>(orientedEta, dummy, params.VdotH, eta);
         const scalar_type orientedEta2 = orientedEta * orientedEta;
-        
+
         const scalar_type VdotHLdotH = params.VdotH * params.LdotH;
         const bool transmitted = VdotHLdotH < 0.0;
 
@@ -373,10 +373,10 @@ struct SBeckmannDielectricBxDF
     {
         const scalar_type localVdotH = nbl::hlsl::dot<vector3_type>(localV,H);
         const scalar_type reflectance = fresnelDielectric_common<scalar_type>(orientedEta2,nbl::hlsl::abs<scalar_type>(localVdotH));
-        
+
         scalar_type rcpChoiceProb;
         bool transmitted = math::partitionRandVariable(reflectance, u.z, rcpChoiceProb);
-        
+
         cache = anisocache_type::create(localV, H);
 
         const scalar_type VdotH = cache.VdotH;
@@ -419,7 +419,7 @@ struct SBeckmannDielectricBxDF
         const bool transmitted = VdotHLdotH < 0.0;
 
         const scalar_type reflectance = fresnelDielectric_common<scalar_type>(orientedEta2, nbl::hlsl::abs<scalar_type>(params.VdotH));
-        
+
         scalar_type ndf, lambda;
         if (params.is_aniso)
         {
@@ -443,7 +443,7 @@ struct SBeckmannDielectricBxDF
             smith::Beckmann<scalar_type> beckmann_smith;
             lambda = beckmann_smith.Lambda(params.NdotV2, a2);
         }
-    
+
         return smith::VNDF_pdf_wo_clamps<smith::Beckmann<scalar_type> >(ndf,lambda,params.NdotV,transmitted,params.VdotH,params.LdotH,VdotHLdotH,orientedEta,reflectance,onePlusLambda_V);
     }
 
@@ -533,7 +533,7 @@ struct SGGXDielectricBxDF
         scalar_type orientedEta, dummy;
         const bool backside = math::getOrientedEtas<scalar_type>(orientedEta, dummy, params.VdotH, eta);
         const scalar_type orientedEta2 = orientedEta * orientedEta;
-        
+
         const scalar_type VdotHLdotH = params.VdotH * params.LdotH;
         const bool transmitted = VdotHLdotH < 0.0;
 
@@ -560,10 +560,10 @@ struct SGGXDielectricBxDF
     {
         const scalar_type localVdotH = nbl::hlsl::dot<vector3_type>(localV,H);
         const scalar_type reflectance = fresnelDielectric_common<scalar_type>(orientedEta2,nbl::hlsl::abs<scalar_type>(localVdotH));
-        
+
         scalar_type rcpChoiceProb;
         bool transmitted = math::partitionRandVariable(reflectance, u.z, rcpChoiceProb);
-        
+
         cache = anisocache_type::create(localV, H);
 
         const scalar_type VdotH = cache.VdotH;
@@ -640,7 +640,7 @@ struct SGGXDielectricBxDF
     {
         const scalar_type ax2 = A.x*A.x;
         const scalar_type ay2 = A.y*A.y;
-        
+
         scalar_type _pdf = pdf(params);
 
         smith::GGX<scalar_type> ggx_smith;
diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl
index 3542e2dfef..42a923f650 100644
--- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl
@@ -31,8 +31,9 @@ struct Bilinear
         return retval;
     }
 
-    vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u)
+    vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) _u)
     {
+        vector2_type u = _u;
         const vector2_type twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]);
         Linear<scalar_type> lineary = Linear<scalar_type>::create(twiceAreasUnderXCurve);
         u.y = lineary.generate(u.y);
diff --git a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl
index 60865e7c8e..dfc7dd6bcb 100644
--- a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl
@@ -16,7 +16,7 @@ vector<T,2> concentricMapping(vector<T,2> _u)
     vector<T,2> u = 2.0f * _u - 1.0f;
     
     vector<T,2> p;
-    if (u == (vector<T,2>)(0.0))
+    if (nbl::hlsl::all<vector<T,2> >(u == (vector<T,2>)(0.0)))
         p = (vector<T,2>)(0.0);
     else
     {
diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
index cfc96dc9cb..f2f29ed12b 100644
--- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl
@@ -36,22 +36,23 @@ struct ProjectedSphericalTriangle
     vector4_type computeBilinearPatch(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF)
     {
         const scalar_type minimumProjSolidAngle = 0.0;
-    
+
         matrix<T, 3, 3> m = matrix<T, 3, 3>(tri.vertex0, tri.vertex1, tri.vertex2);
         const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), (vector3_type)minimumProjSolidAngle);
 
         return bxdfPdfAtVertex.yyxz;
     }
 
-    vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) u)
+    vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) _u)
     {
+        vector2_type u;
         // pre-warp according to proj solid angle approximation
         vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF);
         Bilinear<scalar_type> bilinear = Bilinear<scalar_type>::create(patch);
         u = bilinear.generate(rcpPdf, u);
 
         // now warp the points onto a spherical triangle
-        const vector3_type L = tri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u);
+        const vector3_type L = sphtri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u);
         rcpPdf *= solidAngle;
 
         return L;
@@ -68,7 +69,7 @@ struct ProjectedSphericalTriangle
     scalar_type pdf(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L)
     {
         scalar_type pdf;
-        const vector2_type u = tri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L);
+        const vector2_type u = sphtri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L);
 
         vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF);
         Bilinear<scalar_type> bilinear = Bilinear<scalar_type>::create(patch);
@@ -78,7 +79,7 @@ struct ProjectedSphericalTriangle
     scalar_type pdf(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L)
     {
         scalar_type pdf;
-        const vector2_type u = tri.generateInverse(pdf, L);
+        const vector2_type u = sphtri.generateInverse(pdf, L);
 
         vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF);
         Bilinear<scalar_type> bilinear = Bilinear<scalar_type>::create(patch);
@@ -86,6 +87,7 @@ struct ProjectedSphericalTriangle
     }
 
     shapes::SphericalTriangle<T> tri;
+    sampling::SphericalTriangle<T> sphtri;
 };
 
 }
diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
index c42bf8e464..cca3f21dd9 100644
--- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
@@ -35,7 +35,7 @@ struct SphericalRectangle
     vector2_type generate(NBL_CONST_REF_ARG(vector2_type) rectangleExtents, NBL_CONST_REF_ARG(vector2_type) uv, NBL_REF_ARG(scalar_type) S)
     {
         const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x);
-        const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt(vector4_type(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z);
+        const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z);
         const vector4_type cosGamma = vector4_type(
             -n_z[0] * n_z[1],
             -n_z[1] * n_z[2],
diff --git a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
index 47d3927f31..f1a1e37575 100644
--- a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
+++ b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
@@ -43,7 +43,7 @@ struct SphericalRectangle
     {
         const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x);
         const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z);
-        const vector4_type cosGamma = vec4(
+        const vector4_type cosGamma = vector4_type(
             -n_z[0] * n_z[1],
             -n_z[1] * n_z[2],
             -n_z[2] * n_z[3],
diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
index d904ed7246..67fdfa0476 100644
--- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl
+++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
@@ -81,7 +81,7 @@ struct SphericalTriangle
         cos_vertices = nbl::hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f);
 
         matrix<scalar_type, 3, 3> awayFromEdgePlane = matrix<scalar_type, 3, 3>(awayFromEdgePlane0, awayFromEdgePlane1, awayFromEdgePlane2);
-        const vector3_type externalProducts = nbl::hlsl::abs(/* transposed already */awayFromEdgePlane * receiverNormal);
+        const vector3_type externalProducts = nbl::hlsl::abs(nbl::hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal));
 
         const vector3_type pyramidAngles = acos(cos_sides);
         return nbl::hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi<float>);

From 85e955f3ceb7bc7a74647c3c06eee283603028aa Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 27 Feb 2025 10:38:37 +0700
Subject: [PATCH 13/39] fix wrong template usage

---
 include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl
index dfc7dd6bcb..437f9fe963 100644
--- a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl
@@ -16,7 +16,7 @@ vector<T,2> concentricMapping(vector<T,2> _u)
     vector<T,2> u = 2.0f * _u - 1.0f;
     
     vector<T,2> p;
-    if (nbl::hlsl::all<vector<T,2> >(u == (vector<T,2>)(0.0)))
+    if (nbl::hlsl::all<vector<bool,2> >(u == (vector<T,2>)(0.0)))
         p = (vector<T,2>)(0.0);
     else
     {

From 1c773d9bb78b3124ff0ba727c15b0818794f0502 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 4 Mar 2025 17:03:09 +0700
Subject: [PATCH 14/39] fix typo

---
 include/nbl/builtin/hlsl/bxdf/common.hlsl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl
index 66592d6319..1ddff69d72 100644
--- a/include/nbl/builtin/hlsl/bxdf/common.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl
@@ -694,7 +694,6 @@ struct SAnisotropicMicrofacetCache
         const scalar_type eta
     )
     {
-        isocache_type iso = (isocache_type)retval;
         vector3_type H;
         const bool valid = isocache_type::compute(retval.iso_cache,interaction,_sample,eta,H);
         if (valid)

From e8d2ed8cc0ebf0e76ee560aabecf8fe44eb3fbd4 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 14 Mar 2025 17:02:10 +0700
Subject: [PATCH 15/39] fixed some func usage to nbl ver

---
 .../hlsl/sampling/spherical_triangle.hlsl     |  2 +-
 include/nbl/builtin/hlsl/shapes/triangle.hlsl | 35 +++++++++++--------
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
index 1d4fda454d..7828fc14ea 100644
--- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
@@ -107,7 +107,7 @@ struct SphericalTriangle
         const scalar_type u = subTriSolidAngleRatio > numeric_limits<scalar_type>::min ? subTriSolidAngleRatio : 0.0;
 
         const scalar_type cosBC_s = (cos_vertices[0] + cosB_ * cosC_) / (sinB_ * sinC_);
-        const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < asfloat(0x3f7fffff) ? cosBC_s : cos_c));
+        const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < bit_cast<float>(0x3f7fffff) ? cosBC_s : cos_c));
 
         return vector2_type(u,v);
     }
diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
index 67fdfa0476..d3f5a90215 100644
--- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl
+++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
@@ -5,6 +5,7 @@
 #ifndef _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_
 #define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_
 
+#include <nbl/builtin/hlsl/tgmath.hlsl>
 #include <nbl/builtin/hlsl/cpp_compat.hlsl>
 #include <nbl/builtin/hlsl/limits.hlsl>
 #include <nbl/builtin/hlsl/math/functions.hlsl>
@@ -33,9 +34,13 @@ struct SphericalTriangle
 
     bool pyramidAngles(NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides)
     {
-        cos_sides = vector3_type(nbl::hlsl::dot(vertex1, vertex2), nbl::hlsl::dot(vertex2, vertex0), nbl::hlsl::dot(vertex0, vertex1));
-        csc_sides = 1.0 / nbl::hlsl::sqrt((vector3_type)(1.f) - cos_sides * cos_sides);
-        return nbl::hlsl::any(csc_sides >= (vector3_type)(numeric_limits<scalar_type>::max));
+        cos_sides = vector3_type(hlsl::dot(vertex1, vertex2), hlsl::dot(vertex2, vertex0), hlsl::dot(vertex0, vertex1));
+        csc_sides = (vector3_type)(1.f) - cos_sides * cos_sides;
+        csc_sides.x = hlsl::rsqrt<scalar_type>(csc_sides.x);
+        csc_sides.y = hlsl::rsqrt<scalar_type>(csc_sides.y);
+        csc_sides.z = hlsl::rsqrt<scalar_type>(csc_sides.z);
+
+        return hlsl::any<vector<bool, 3> >(csc_sides >= (vector3_type)(numeric_limits<scalar_type>::max));
     }
 
     scalar_type solidAngleOfTriangle(NBL_REF_ARG(vector3_type) cos_vertices, NBL_REF_ARG(vector3_type) sin_vertices, NBL_REF_ARG(scalar_type) cos_a, NBL_REF_ARG(scalar_type) cos_c, NBL_REF_ARG(scalar_type) csc_b, NBL_REF_ARG(scalar_type) csc_c)
@@ -51,8 +56,8 @@ struct SphericalTriangle
         csc_c = csc_sides[2];
 
         // Both vertices and angles at the vertices are denoted by the same upper case letters A, B, and C. The angles A, B, C of the triangle are equal to the angles between the planes that intersect the surface of the sphere or, equivalently, the angles between the tangent vectors of the great circle arcs where they meet at the vertices. Angles are in radians. The angles of proper spherical triangles are (by convention) less than PI
-        cos_vertices = clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) 
-        sin_vertices = sqrt((vector3_type)1.f - cos_vertices * cos_vertices);
+        cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) 
+        sin_vertices = hlsl::sqrt((vector3_type)1.f - cos_vertices * cos_vertices);
 
         return math::getArccosSumofABC_minus_PI(cos_vertices[0], cos_vertices[1], cos_vertices[2], sin_vertices[0], sin_vertices[1], sin_vertices[2]);
     }
@@ -69,22 +74,22 @@ struct SphericalTriangle
         if (pyramidAngles(cos_sides, csc_sides))
             return 0.f;
 
-        vector3_type awayFromEdgePlane0 = nbl::hlsl::cross(vertex1, vertex2) * csc_sides[0];
-        vector3_type awayFromEdgePlane1 = nbl::hlsl::cross(vertex2, vertex0) * csc_sides[1];
-        vector3_type awayFromEdgePlane2 = nbl::hlsl::cross(vertex0, vertex1) * csc_sides[2];
+        vector3_type awayFromEdgePlane0 = hlsl::cross<vector3_type>(vertex1, vertex2) * csc_sides[0];
+        vector3_type awayFromEdgePlane1 = hlsl::cross<vector3_type>(vertex2, vertex0) * csc_sides[1];
+        vector3_type awayFromEdgePlane2 = hlsl::cross<vector3_type>(vertex0, vertex1) * csc_sides[2];
 
         // useless here but could be useful somewhere else
-        cos_vertices[0] = nbl::hlsl::dot(awayFromEdgePlane1, awayFromEdgePlane2);
-        cos_vertices[1] = nbl::hlsl::dot(awayFromEdgePlane2, awayFromEdgePlane0);
-        cos_vertices[2] = nbl::hlsl::dot(awayFromEdgePlane0, awayFromEdgePlane1);
+        cos_vertices[0] = hlsl::dot<vector3_type>(awayFromEdgePlane1, awayFromEdgePlane2);
+        cos_vertices[1] = hlsl::dot<vector3_type>(awayFromEdgePlane2, awayFromEdgePlane0);
+        cos_vertices[2] = hlsl::dot<vector3_type>(awayFromEdgePlane0, awayFromEdgePlane1);
         // TODO: above dot products are in the wrong order, either work out which is which, or try all 6 permutations till it works
-        cos_vertices = nbl::hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f);
+        cos_vertices = hlsl::clamp<vector3_type>((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f);
 
         matrix<scalar_type, 3, 3> awayFromEdgePlane = matrix<scalar_type, 3, 3>(awayFromEdgePlane0, awayFromEdgePlane1, awayFromEdgePlane2);
-        const vector3_type externalProducts = nbl::hlsl::abs(nbl::hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal));
+        const vector3_type externalProducts = hlsl::abs(hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal));
 
-        const vector3_type pyramidAngles = acos(cos_sides);
-        return nbl::hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi<float>);
+        const vector3_type pyramidAngles = acos<scalar_type>(cos_sides);
+        return hlsl::dot<vector3_type>(pyramidAngles, externalProducts) / (2.f * numbers::pi<float>);
     }
 
     vector3_type vertex0;

From c47f4469cbf529b1f2b370817c50416aa194ead3 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Mon, 17 Mar 2025 13:57:25 +0700
Subject: [PATCH 16/39] specify template args

---
 .../hlsl/sampling/spherical_rectangle.hlsl    | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
index cca3f21dd9..663cd5e3d1 100644
--- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
@@ -35,7 +35,7 @@ struct SphericalRectangle
     vector2_type generate(NBL_CONST_REF_ARG(vector2_type) rectangleExtents, NBL_CONST_REF_ARG(vector2_type) uv, NBL_REF_ARG(scalar_type) S)
     {
         const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x);
-        const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z);
+        const vector4_type n_z = denorm_n_z / hlsl::sqrt<vector4_type>((vector4_type)(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z);
         const vector4_type cosGamma = vector4_type(
             -n_z[0] * n_z[1],
             -n_z[1] * n_z[2],
@@ -54,24 +54,24 @@ struct SphericalRectangle
         const scalar_type CLAMP_EPS = 1e-5f;
 
         // flip z axsis if rect.r0.z > 0
-        const uint32_t zFlipMask = (asuint(rect.r0.z) ^ 0x80000000u) & 0x80000000u;
-        rect.r0.z = asfloat(asuint(rect.r0.z) ^ zFlipMask);
+        const uint32_t zFlipMask = (bit_cast<uint32_t>(rect.r0.z) ^ 0x80000000u) & 0x80000000u;
+        rect.r0.z = bit_cast<float32_t>(bit_cast<uint32_t>(rect.r0.z) ^ zFlipMask);
         vector3_type r1 = rect.r0 + vector3_type(rectangleExtents.x, rectangleExtents.y, 0);
 
         const scalar_type au = uv.x * S + k;
-        const scalar_type fu = (nbl::hlsl::cos(au) * b0 - b1) / nbl::hlsl::sin(au);
-        const scalar_type cu_2 = nbl::hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1]
-        const scalar_type cu = asfloat(asuint(1.0 / nbl::hlsl::sqrt(cu_2)) ^ (asuint(fu) & 0x80000000u));
+        const scalar_type fu = (hlsl::cos<scalar_type>(au) * b0 - b1) / hlsl::sin<scalar_type>(au);
+        const scalar_type cu_2 = hlsl::max<scalar_type>(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1]
+        const scalar_type cu = bit_cast<float32_t>(bit_cast<uint32_t>(1.0 / hlsl::sqrt<scalar_type>(cu_2)) ^ (bit_cast<uint32_t>(fu) & 0x80000000u));
 
-        scalar_type xu = -(cu * rect.r0.z) * 1.0 / nbl::hlsl::sqrt(1 - cu * cu);
-        xu = nbl::hlsl::clamp(xu, rect.r0.x, r1.x); // avoid Infs
+        scalar_type xu = -(cu * rect.r0.z) * 1.0 / hlsl::sqrt<scalar_type>(1 - cu * cu);
+        xu = hlsl::clamp<scalar_type>(xu, rect.r0.x, r1.x); // avoid Infs
         const scalar_type d_2 = xu * xu + rect.r0.z * rect.r0.z;
-        const scalar_type d = nbl::hlsl::sqrt(d_2);
+        const scalar_type d = hlsl::sqrt<scalar_type>(d_2);
 
-        const scalar_type h0 = rect.r0.y / nbl::hlsl::sqrt(d_2 + rect.r0.y * rect.r0.y);
-        const scalar_type h1 = r1.y / nbl::hlsl::sqrt(d_2 + r1.y * r1.y);
+        const scalar_type h0 = rect.r0.y / hlsl::sqrt<scalar_type>(d_2 + rect.r0.y * rect.r0.y);
+        const scalar_type h1 = r1.y / hlsl::sqrt<scalar_type>(d_2 + r1.y * r1.y);
         const scalar_type hv = h0 + uv.y * (h1 - h0), hv2 = hv * hv;
-        const scalar_type yv = (hv2 < 1 - CLAMP_EPS) ? (hv * d) / nbl::hlsl::sqrt(1 - hv2) : r1.y;
+        const scalar_type yv = (hv2 < 1 - CLAMP_EPS) ? (hv * d) / hlsl::sqrt<scalar_type>(1 - hv2) : r1.y;
 
         return vector2_type((xu - rect.r0.x) / rectangleExtents.x, (yv - rect.r0.y) / rectangleExtents.y);
     }

From 1137b6b767142ef44e2bad80032e0053515a0051 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Mon, 17 Mar 2025 14:31:39 +0700
Subject: [PATCH 17/39] update to latest example

---
 examples_tests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples_tests b/examples_tests
index a2c84c2513..b5194ef176 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit a2c84c2513510f42cd245a015c6ba7ddd0d6eeaa
+Subproject commit b5194ef1768587aef72be4fdc044b5b881160609

From 7e8dd8143795536d2c82e01d1d24db5bcf2a4aeb Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 19 Mar 2025 16:16:31 +0700
Subject: [PATCH 18/39] latest example

---
 examples_tests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples_tests b/examples_tests
index a2c84c2513..ca8f2ec8fa 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit a2c84c2513510f42cd245a015c6ba7ddd0d6eeaa
+Subproject commit ca8f2ec8fa84a2bd1bfeb4348263f82d14026bca

From b1546383f2f44e4bcb84456aee990a278672e74b Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 21 Mar 2025 16:49:40 +0700
Subject: [PATCH 19/39] fix use of static const in func

---
 examples_tests                               | 2 +-
 include/nbl/builtin/hlsl/math/functions.hlsl | 6 +-----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/examples_tests b/examples_tests
index ca8f2ec8fa..e95f09d5d2 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit ca8f2ec8fa84a2bd1bfeb4348263f82d14026bca
+Subproject commit e95f09d5d20181c4107064cec08bddc689a7f399
diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl
index f7a84005e8..f47efff877 100644
--- a/include/nbl/builtin/hlsl/math/functions.hlsl
+++ b/include/nbl/builtin/hlsl/math/functions.hlsl
@@ -122,11 +122,7 @@ void frisvad(NBL_CONST_REF_ARG(T) normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T)
 
 bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(float) rcpChoiceProb)
 {
-#ifdef __HLSL_VERSION
-    NBL_CONSTEXPR float NEXT_ULP_AFTER_UNITY = asfloat(0x3f800001u);
-#else
-    NBL_CONSTEXPR float32_t NEXT_ULP_AFTER_UNITY = bit_cast<float32_t>(0x3f800001u);
-#endif
+    const float32_t NEXT_ULP_AFTER_UNITY = bit_cast<float32_t>(0x3f800001u);
     const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY;
 
     // This is all 100% correct taking into account the above NEXT_ULP_AFTER_UNITY

From 7892563055f9bc95c14b9704bd4b24c5000eda96 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Mon, 24 Mar 2025 14:43:43 +0700
Subject: [PATCH 20/39] added more morton order stuff

---
 examples_tests                             |  2 +-
 include/nbl/builtin/glsl/utils/morton.glsl | 17 ++++++
 include/nbl/builtin/hlsl/math/morton.hlsl  | 68 ++++++++++++++++++++++
 3 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 include/nbl/builtin/hlsl/math/morton.hlsl

diff --git a/examples_tests b/examples_tests
index e95f09d5d2..3cdfb4baf2 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit e95f09d5d20181c4107064cec08bddc689a7f399
+Subproject commit 3cdfb4baf2df319643620a8189c277dec20cb163
diff --git a/include/nbl/builtin/glsl/utils/morton.glsl b/include/nbl/builtin/glsl/utils/morton.glsl
index de3be8b9c7..fd07a9cad8 100644
--- a/include/nbl/builtin/glsl/utils/morton.glsl
+++ b/include/nbl/builtin/glsl/utils/morton.glsl
@@ -22,6 +22,18 @@ uint nbl_glsl_morton_decode2d8bComponent(in uint x)
     return x;
 }
 
+uint nbl_glsl_morton_decode2d32bComponent(in uint x) 
+{
+    x &= 0x55555555u;
+    x = (x ^ (x >>  1u)) & 0x33333333u;
+    x = (x ^ (x >>  2u)) & 0x0f0f0f0fu;
+    x = (x ^ (x >>  4u)) & 0x00ff00ffu;
+    x = (x ^ (x >>  8u)) & 0x0000ffffu;
+    x = (x ^ (x >>  16u));
+    return x;
+}
+
+
 uvec2 nbl_glsl_morton_decode2d4b(in uint x)
 {
     return uvec2(nbl_glsl_morton_decode2d4bComponent(x), nbl_glsl_morton_decode2d4bComponent(x >> 1u));
@@ -32,4 +44,9 @@ uvec2 nbl_glsl_morton_decode2d8b(in uint x)
     return uvec2(nbl_glsl_morton_decode2d8bComponent(x), nbl_glsl_morton_decode2d8bComponent(x >> 1u));
 }
 
+uvec2 nbl_glsl_morton_decode2d32b(in uint x)
+{
+    return uvec2(nbl_glsl_morton_decode2d32bComponent(x), nbl_glsl_morton_decode2d32bComponent(x >> 1u));
+}
+
 #endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl
new file mode 100644
index 0000000000..4a6cb5dfd3
--- /dev/null
+++ b/include/nbl/builtin/hlsl/math/morton.hlsl
@@ -0,0 +1,68 @@
+// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_
+#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace math
+{
+
+namespace impl
+{
+
+template<typename T, uint32_t bitDepth>
+struct MortonComponent;
+
+template<typename T>
+struct MortonComponent<T, 8u>
+{
+    static T decode2d(T x)
+    {
+        x &= 0x55555555u;
+        x = (x ^ (x >>  1u)) & 0x33333333u;
+        x = (x ^ (x >>  2u)) & 0x0f0f0f0fu;
+        x = (x ^ (x >>  4u)) & 0x00ff00ffu;
+        return x;
+    }
+};
+
+template<typename T>
+struct MortonComponent<T, 32u>
+{
+    static T decode2d(T x)
+    {
+        x &= 0x55555555u;
+        x = (x ^ (x >>  1u)) & 0x33333333u;
+        x = (x ^ (x >>  2u)) & 0x0f0f0f0fu;
+        x = (x ^ (x >>  4u)) & 0x00ff00ffu;
+        x = (x ^ (x >>  8u)) & 0x0000ffffu;
+        x = (x ^ (x >>  16u));
+        return x;
+    }
+};
+
+}
+
+template<typename T, uint32_t bitDepth=sizeof(T)*8u>
+struct Morton
+{
+    using vector2_type = vector<T, 2>;
+    using component_type = impl::MortonComponent<T, bitDepth>;
+
+    static vector2_type decode2d(T x)
+    {
+        return vector2_type(component_type::decode2d(x), component_type::decode2d(x >> 1u));
+    }
+};
+
+}
+}
+}
+
+#endif

From b21b789991cb544c9d95ce4b28dc22e28c68778b Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 28 Mar 2025 14:40:47 +0700
Subject: [PATCH 21/39] latest example

---
 examples_tests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples_tests b/examples_tests
index 3cdfb4baf2..1535561525 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 3cdfb4baf2df319643620a8189c277dec20cb163
+Subproject commit 1535561525c1df59d227969692ae7405b507962b

From 26adf951ee2e83ca23eeb8700543bdf37f8261cd Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 27 Jun 2025 10:19:00 +0700
Subject: [PATCH 22/39] latest example

---
 examples_tests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples_tests b/examples_tests
index a6de5908a2..52c1aa54cf 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit a6de5908a269d0f6853e0c1e94dec8fcdbe6540e
+Subproject commit 52c1aa54cf859b63a8ff6df648f743003e5e13fe

From 36910c676e1bc7a26802a5b8d7f84227b208a803 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Mon, 30 Jun 2025 09:58:25 +0700
Subject: [PATCH 23/39] latest example fixes

---
 examples_tests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples_tests b/examples_tests
index 52c1aa54cf..8b31859520 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 52c1aa54cf859b63a8ff6df648f743003e5e13fe
+Subproject commit 8b31859520069831b246d13270b43b97aea83141

From 19ba15d93ba69745c83d5427e0323f0e3bd54f9f Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Thu, 25 Sep 2025 17:23:03 +0200
Subject: [PATCH 24/39] Updated examples

---
 examples_tests                   | 2 +-
 src/nbl/video/CSurfaceVulkan.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples_tests b/examples_tests
index 3400a2a498..3d206fd4c7 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 3400a2a498b6b3738d63aff66dd0363a4a9f8c67
+Subproject commit 3d206fd4c772628fe92e95d6e4526170df09900c
diff --git a/src/nbl/video/CSurfaceVulkan.cpp b/src/nbl/video/CSurfaceVulkan.cpp
index 7d135b04cc..7feda9fcd3 100644
--- a/src/nbl/video/CSurfaceVulkan.cpp
+++ b/src/nbl/video/CSurfaceVulkan.cpp
@@ -60,7 +60,7 @@ bool ISurfaceVulkan::isSupportedForPhysicalDevice(const IPhysicalDevice* physica
 
 	core::bitflag<ISurface::E_PRESENT_MODE> ISurfaceVulkan::getAvailablePresentModesForPhysicalDevice(const IPhysicalDevice* physicalDevice) const
 	{
-		constexpr uint32_t MAX_PRESENT_MODE_COUNT = 4u;
+		constexpr uint32_t MAX_PRESENT_MODE_COUNT = 5u;
 
 		core::bitflag<ISurface::E_PRESENT_MODE> result = ISurface::EPM_UNKNOWN;
 

From 7ec3f394c645d4ce952a728d75bf001283e26a3c Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Sat, 18 Oct 2025 19:12:31 +0200
Subject: [PATCH 25/39] Created RWMC files

---
 examples_tests                                |   2 +-
 .../builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 100 +++++++++++
 include/nbl/builtin/hlsl/rwmc/rwmc.hlsl       | 160 ++++++++++++++++++
 src/nbl/builtin/CMakeLists.txt                |   3 +
 4 files changed, 264 insertions(+), 1 deletion(-)
 create mode 100644 include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl
 create mode 100644 include/nbl/builtin/hlsl/rwmc/rwmc.hlsl

diff --git a/examples_tests b/examples_tests
index 3d206fd4c7..8ecc60ff8a 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 3d206fd4c772628fe92e95d6e4526170df09900c
+Subproject commit 8ecc60ff8af7b68564d769e04ec06d2e87cbf8e2
diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl
new file mode 100644
index 0000000000..6678a66942
--- /dev/null
+++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl
@@ -0,0 +1,100 @@
+#ifndef _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_
+#define _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include <nbl/builtin/hlsl/vector_utils/vector_traits.hlsl>
+#include <nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl>
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace rwmc
+{
+
+struct CascadeSettings
+{
+    uint32_t size;
+    uint32_t start;
+    uint32_t base;
+};
+
+template<typename CascadeLayerType, uint32_t CascadeSize>
+struct CascadeEntry
+{
+    CascadeLayerType data[CascadeSize];
+};
+
+template<typename CascadeLayerType, uint32_t CascadeSize>
+struct CascadeAccumulator
+{
+    using output_storage_type = CascadeEntry<CascadeLayerType, CascadeSize>;
+    using initialization_data = CascadeSettings;
+    output_storage_type accumulation;
+    uint32_t cascadeSampleCounter[CascadeSize];
+    CascadeSettings cascadeSettings;
+
+    void initialize(in CascadeSettings settings)
+    {
+        for (int i = 0; i < CascadeSize; ++i)
+        {
+            accumulation.data[i] = (CascadeLayerType)0.0f;
+            cascadeSampleCounter[i] = 0u;
+        }
+
+        cascadeSettings.size = settings.size;
+        cascadeSettings.start = settings.start;
+        cascadeSettings.base = settings.base;
+    }
+
+    typename vector_traits<CascadeLayerType>::scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col)
+    {
+        return hlsl::dot<CascadeLayerType>(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col);
+    }
+
+    // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp
+    void addSample(uint32_t sampleIndex, float32_t3 sample)
+    {
+        float lowerScale = cascadeSettings.start;
+        float upperScale = lowerScale * cascadeSettings.base;
+
+        const float luma = getLuma(sample);
+
+        uint32_t lowerCascadeIndex = 0u;
+        while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2)
+        {
+            lowerScale = upperScale;
+            upperScale *= cascadeSettings.base;
+            ++lowerCascadeIndex;
+        }
+
+        float lowerCascadeLevelWeight;
+        float higherCascadeLevelWeight;
+
+        if (luma <= lowerScale)
+            lowerCascadeLevelWeight = 1.0f;
+        else if (luma < upperScale)
+            lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale));
+        else // Inf, NaN ...
+            lowerCascadeLevelWeight = 0.0f;
+
+        if (luma < upperScale)
+            higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight);
+        else
+            higherCascadeLevelWeight = upperScale / luma;
+
+        uint32_t higherCascadeIndex = lowerCascadeIndex + 1u;
+
+        const uint32_t sampleCount = sampleIndex + 1u;
+        const float reciprocalSampleCount = 1.0f / float(sampleCount);
+        accumulation.data[lowerCascadeIndex] += (sample * lowerCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[lowerCascadeIndex])) * accumulation.data[lowerCascadeIndex]) * reciprocalSampleCount;
+        accumulation.data[higherCascadeIndex] += (sample * higherCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[higherCascadeIndex])) * accumulation.data[higherCascadeIndex]) * reciprocalSampleCount;
+        cascadeSampleCounter[lowerCascadeIndex] = sampleCount;
+        cascadeSampleCounter[higherCascadeIndex] = sampleCount;
+    }
+};
+
+}
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl b/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl
new file mode 100644
index 0000000000..d7b151af86
--- /dev/null
+++ b/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl
@@ -0,0 +1,160 @@
+#ifndef _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_
+#define _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include <nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl>
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace rwmc
+{
+namespace impl
+{
+
+struct CascadeSample
+{
+	float32_t3 centerValue;
+	float normalizedCenterLuma;
+	float normalizedNeighbourhoodAverageLuma;
+};
+
+// TODO: figure out what values should pixels outside have, 0.0f is incorrect
+float32_t3 sampleCascadeTexel(int32_t2 currentCoord, int32_t2 offset, in RWTexture2DArray<float32_t4> cascade, uint32_t cascadeIndex)
+{
+	const int32_t2 texelCoord = currentCoord + offset;
+	if (any(texelCoord < int32_t2(0, 0)))
+		return float32_t3(0.0f, 0.0f, 0.0f);
+
+	float32_t4 output = cascade.Load(int32_t3(texelCoord, int32_t(cascadeIndex)));
+	return float32_t3(output.r, output.g, output.b);
+}
+
+float32_t calcLuma(in float32_t3 col)
+{
+	return hlsl::dot<float32_t3>(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col);
+}
+
+CascadeSample SampleCascade(in int32_t2 coord, in RWTexture2DArray<float32_t4> cascade, in uint cascadeIndex, in float reciprocalBaseI)
+{
+	float32_t3 neighbourhood[9];
+	neighbourhood[0] = sampleCascadeTexel(coord, int32_t2(-1, -1), cascade, cascadeIndex);
+	neighbourhood[1] = sampleCascadeTexel(coord, int32_t2(0, -1), cascade, cascadeIndex);
+	neighbourhood[2] = sampleCascadeTexel(coord, int32_t2(1, -1), cascade, cascadeIndex);
+	neighbourhood[3] = sampleCascadeTexel(coord, int32_t2(-1, 0), cascade, cascadeIndex);
+	neighbourhood[4] = sampleCascadeTexel(coord, int32_t2(0, 0), cascade, cascadeIndex);
+	neighbourhood[5] = sampleCascadeTexel(coord, int32_t2(1, 0), cascade, cascadeIndex);
+	neighbourhood[6] = sampleCascadeTexel(coord, int32_t2(-1, 1), cascade, cascadeIndex);
+	neighbourhood[7] = sampleCascadeTexel(coord, int32_t2(0, 1), cascade, cascadeIndex);
+	neighbourhood[8] = sampleCascadeTexel(coord, int32_t2(1, 1), cascade, cascadeIndex);
+
+	// numerical robustness
+	float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) +
+		((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8]));
+
+	CascadeSample retval;
+	retval.centerValue = neighbourhood[4];
+	retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = calcLuma(neighbourhood[4]) * reciprocalBaseI;
+	retval.normalizedNeighbourhoodAverageLuma = (calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f;
+	return retval;
+}
+
+} // namespace impl
+
+struct ReweightingParameters
+{
+	uint32_t lastCascadeIndex;
+	float initialEmin; // a minimum image brightness that we always consider reliable
+	float reciprocalBase;
+	float reciprocalN;
+	float reciprocalKappa;
+	float colorReliabilityFactor;
+	float NOverKappa;
+};
+
+ReweightingParameters computeReweightingParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize)
+{
+	ReweightingParameters retval;
+	retval.lastCascadeIndex = cascadeSize - 1u;
+	retval.initialEmin = minReliableLuma;
+	retval.reciprocalBase = 1.f / base;
+	const float N = float(sampleCount);
+	retval.reciprocalN = 1.f / N;
+	retval.reciprocalKappa = 1.f / kappa;
+	// if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have
+	// allow up to ~<cascadeBase> more energy in one sample to lessen bias in some cases
+	retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa;
+	retval.NOverKappa = N * retval.reciprocalKappa;
+
+	return retval;
+}
+
+float32_t3 reweight(in ReweightingParameters params, in RWTexture2DArray<float32_t4> cascade, in int32_t2 coord)
+{
+	float reciprocalBaseI = 1.f;
+	impl::CascadeSample curr = impl::SampleCascade(coord, cascade, 0u, reciprocalBaseI);
+
+	float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f);
+	float Emin = params.initialEmin;
+
+	float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma;
+	for (uint i = 0u; i <= params.lastCascadeIndex; i++)
+	{
+		const bool notFirstCascade = i != 0u;
+		const bool notLastCascade = i != params.lastCascadeIndex;
+
+		impl::CascadeSample next;
+		if (notLastCascade)
+		{
+			reciprocalBaseI *= params.reciprocalBase;
+			next = impl::SampleCascade(coord, cascade, i + 1u, reciprocalBaseI);
+		}
+
+		float reliability = 1.f;
+		// sample counting-based reliability estimation
+		if (params.reciprocalKappa <= 1.f)
+		{
+			float localReliability = curr.normalizedCenterLuma;
+			// reliability in 3x3 pixel block (see robustness)
+			float globalReliability = curr.normalizedNeighbourhoodAverageLuma;
+			if (notFirstCascade)
+			{
+				localReliability += prevNormalizedCenterLuma;
+				globalReliability += prevNormalizedNeighbourhoodAverageLuma;
+			}
+			if (notLastCascade)
+			{
+				localReliability += next.normalizedCenterLuma;
+				globalReliability += next.normalizedNeighbourhoodAverageLuma;
+			}
+			// check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse)
+			reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability;
+			{
+				const float accumLuma = impl::calcLuma(accumulation);
+				if (accumLuma > Emin)
+					Emin = accumLuma;
+
+				const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor;
+
+				reliability += colorReliability;
+				reliability *= params.NOverKappa;
+				reliability -= params.reciprocalKappa;
+				reliability = clamp(reliability * 0.5f, 0.f, 1.f);
+			}
+		}
+		accumulation += curr.centerValue * reliability;
+
+		prevNormalizedCenterLuma = curr.normalizedCenterLuma;
+		prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma;
+		curr = next;
+	}
+
+	return accumulation;
+}
+
+}
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index 2f4d11baf3..e0bd6921cc 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -389,5 +389,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl")
 #blur
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl")
+#rwmc
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/rwmc.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/CascadeAccumulator.hlsl")
 
 ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL")

From 4dcf56a8cd3bc2152d0f4b5b11c04bb03c7fb891 Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Mon, 20 Oct 2025 14:20:51 +0200
Subject: [PATCH 26/39] Updated examples

---
 examples_tests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples_tests b/examples_tests
index 8ecc60ff8a..bbc8ab80fe 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 8ecc60ff8af7b68564d769e04ec06d2e87cbf8e2
+Subproject commit bbc8ab80fecf44abb9b03f4fa147918fee7c310f

From 3ae69fa8cd4bc41fe4d675c398362cea5ed7a90a Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 12 Nov 2025 11:00:26 +0700
Subject: [PATCH 27/39] updated example

---
 examples_tests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples_tests b/examples_tests
index 2b4db21239..9c60e9905c 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit 2b4db2123918f380cc0a35f6889315a02f84ea73
+Subproject commit 9c60e9905cd29393b1b5479276ab64e643714cb9

From 4cfed1bbf62884f0b17d9685d9664f719692367e Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 13 Nov 2025 10:14:28 +0700
Subject: [PATCH 28/39] use new angle adder

---
 include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl | 5 ++++-
 include/nbl/builtin/hlsl/shapes/triangle.hlsl             | 8 ++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
index 7828fc14ea..fd3a616e8d 100644
--- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl
@@ -103,7 +103,10 @@ struct SphericalTriangle
         const scalar_type cosC_ = sin_vertices[0] * sinB_* cos_c - cos_vertices[0] * cosB_;
         const scalar_type sinC_ = nbl::hlsl::sqrt(1.0 - cosC_ * cosC_);
 
-        const scalar_type subTriSolidAngleRatio = math::getArccosSumofABC_minus_PI(cos_vertices[0], cosB_, cosC_, sin_vertices[0], sinB_, sinC_) * pdf;
+        math::sincos_accumulator<scalar_type> angle_adder = math::sincos_accumulator<scalar_type>::create(cos_vertices[0], sin_vertices[0]);
+        angle_adder.addAngle(cosB_, sinB_);
+        angle_adder.addAngle(cosC_, sinC_);
+        const scalar_type subTriSolidAngleRatio = (angle_adder.getSumofArccos() - numbers::pi<scalar_type>) * pdf;
         const scalar_type u = subTriSolidAngleRatio > numeric_limits<scalar_type>::min ? subTriSolidAngleRatio : 0.0;
 
         const scalar_type cosBC_s = (cos_vertices[0] + cosB_ * cosC_) / (sinB_ * sinC_);
diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
index 4c2d895c28..a6896fbc8a 100644
--- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl
+++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl
@@ -9,6 +9,7 @@
 #include <nbl/builtin/hlsl/cpp_compat.hlsl>
 #include <nbl/builtin/hlsl/limits.hlsl>
 #include <nbl/builtin/hlsl/math/functions.hlsl>
+#include <nbl/builtin/hlsl/math/angle_adding.hlsl>
 #include <nbl/builtin/hlsl/numbers.hlsl>
 
 namespace nbl
@@ -60,7 +61,10 @@ struct SphericalTriangle
         cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) 
         sin_vertices = hlsl::sqrt((vector3_type)1.f - cos_vertices * cos_vertices);
 
-        return math::getArccosSumofABC_minus_PI(cos_vertices[0], cos_vertices[1], cos_vertices[2], sin_vertices[0], sin_vertices[1], sin_vertices[2]);
+        math::sincos_accumulator<scalar_type> angle_adder = math::sincos_accumulator<scalar_type>::create(cos_vertices[0], sin_vertices[0]);
+        angle_adder.addAngle(cos_vertices[1], sin_vertices[1]);
+        angle_adder.addAngle(cos_vertices[2], sin_vertices[2]);
+        return angle_adder.getSumofArccos() - numbers::pi<scalar_type>;
     }
 
     scalar_type solidAngleOfTriangle()
@@ -90,7 +94,7 @@ struct SphericalTriangle
         const vector3_type externalProducts = hlsl::abs(hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal));
 
         const vector3_type pyramidAngles = acos<scalar_type>(cos_sides);
-        return hlsl::dot<vector3_type>(pyramidAngles, externalProducts) / (2.f * numbers::pi<float>);
+        return hlsl::dot<vector3_type>(pyramidAngles, externalProducts) / (2.f * numbers::pi<scalar_type>);
     }
 
     vector3_type vertex0;

From dc8b5bca79570126f6a7ec9e140624095f187b6a Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 13 Nov 2025 15:22:16 +0700
Subject: [PATCH 29/39] initialize invalid ndf return to inf, not 0

---
 include/nbl/builtin/hlsl/bxdf/ndf/beckmann.hlsl | 4 ++--
 include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl      | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/nbl/builtin/hlsl/bxdf/ndf/beckmann.hlsl b/include/nbl/builtin/hlsl/bxdf/ndf/beckmann.hlsl
index 1406bc8d4f..c719bbfd4e 100644
--- a/include/nbl/builtin/hlsl/bxdf/ndf/beckmann.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/ndf/beckmann.hlsl
@@ -339,8 +339,8 @@ struct Beckmann
         if (isInfinity)
         {
             quant_type dmq;
-            dmq.microfacetMeasure = scalar_type(0.0);
-            dmq.projectedLightMeasure = scalar_type(0.0);
+            dmq.microfacetMeasure = bit_cast<scalar_type>(numeric_limits<scalar_type>::infinity);
+            dmq.projectedLightMeasure = bit_cast<scalar_type>(numeric_limits<scalar_type>::infinity);
             return dmq;
         }
         scalar_type dg1 = D / (scalar_type(1.0) + query.getLambdaV());
diff --git a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl
index 0e2e9d1291..4ad4bb341e 100644
--- a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl
@@ -279,8 +279,8 @@ struct GGX
         quant_type dmq;
         if (isInfinity)
         {
-            dmq.microfacetMeasure = scalar_type(0.0);
-            dmq.projectedLightMeasure = scalar_type(0.0);
+            dmq.microfacetMeasure = bit_cast<scalar_type>(numeric_limits<scalar_type>::infinity);
+            dmq.projectedLightMeasure = bit_cast<scalar_type>(numeric_limits<scalar_type>::infinity);
             return dmq;
         }
 
@@ -337,8 +337,8 @@ struct GGX
         if (isInfinity)
         {
             quant_type dmq;
-            dmq.microfacetMeasure = scalar_type(0.0);
-            dmq.projectedLightMeasure = scalar_type(0.0);
+            dmq.microfacetMeasure = bit_cast<scalar_type>(numeric_limits<scalar_type>::infinity);
+            dmq.projectedLightMeasure = bit_cast<scalar_type>(numeric_limits<scalar_type>::infinity);
             return dmq;
         }
         dg *= correlated_wo_numerator<LS, Interaction, MicrofacetCache>(query, _sample, interaction, cache);

From 434234c9ab3bc8422a8149c7ae302d2c74d3c825 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 13 Nov 2025 15:22:59 +0700
Subject: [PATCH 30/39] derive trait from cook torrance base

---
 .../hlsl/bxdf/base/cook_torrance_base.hlsl    | 24 ++++++++++++++-----
 .../hlsl/bxdf/reflection/beckmann.hlsl        | 18 +-------------
 .../nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl | 18 +-------------
 .../hlsl/bxdf/reflection/iridescent.hlsl      |  9 +------
 .../hlsl/bxdf/transmission/beckmann.hlsl      | 18 +-------------
 .../builtin/hlsl/bxdf/transmission/ggx.hlsl   | 18 +-------------
 .../hlsl/bxdf/transmission/iridescent.hlsl    |  9 +------
 7 files changed, 24 insertions(+), 90 deletions(-)

diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl
index a185dc8d98..306198f827 100644
--- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl
@@ -133,7 +133,7 @@ struct SCookTorrance
     static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH)
     {
         spectral_type throughputWeights = interaction.getLuminosityContributionHint();
-        return hlsl::dot<spectral_type>(impl::__implicit_promote<spectral_type, typename fresnel_type::vector_type>::__call(orientedFresnel(clampedVdotH)), throughputWeights);
+        return hlsl::dot<spectral_type>(orientedFresnel(clampedVdotH), throughputWeights);
     }
     template<class Interaction=conditional_t<IsAnisotropic,anisotropic_interaction_type,isotropic_interaction_type>,
             typename C=bool_constant<fresnel_type::ReturnsMonochrome> NBL_FUNC_REQUIRES(C::value && fresnel_type::ReturnsMonochrome)
@@ -142,7 +142,7 @@ struct SCookTorrance
         return orientedFresnel(clampedVdotH)[0];
     }
 
-    bool __dotIsUnity(const vector3_type a, const vector3_type b, const scalar_type value)
+    bool __dotIsValue(const vector3_type a, const vector3_type b, const scalar_type value)
     {
         const scalar_type ab = hlsl::dot(a, b);
         return hlsl::max(ab, value / ab) <= scalar_type(value + 1e-3);
@@ -209,11 +209,11 @@ struct SCookTorrance
         ray_dir_info_type V = interaction.getV();
         const matrix3x3_type fromTangent = interaction.getFromTangentSpace();
         // tangent frame orthonormality
-        assert(__dotIsUnity(fromTangent[0],fromTangent[1],0.0));
-        assert(__dotIsUnity(fromTangent[1],fromTangent[2],0.0));
-        assert(__dotIsUnity(fromTangent[2],fromTangent[0],0.0));
+        assert(__dotIsValue(fromTangent[0],fromTangent[1],0.0));
+        assert(__dotIsValue(fromTangent[1],fromTangent[2],0.0));
+        assert(__dotIsValue(fromTangent[2],fromTangent[0],0.0));
         // NDF sampling produced a unit length direction
-        assert(__dotIsUnity(localH,localH,1.0));
+        assert(__dotIsValue(localH,localH,1.0));
         const vector3_type H = hlsl::mul(interaction.getFromTangentSpace(), localH);
         Refract<scalar_type> r = Refract<scalar_type>::create(V.getDirection(), H);
 
@@ -409,6 +409,18 @@ struct SCookTorrance
     fresnel_type fresnel;   // always front-facing
 };
 
+
+template<class Config, class N, class F>
+struct traits<SCookTorrance<Config,N,F> >
+{
+   using __type = SCookTorrance<Config,N,F>;
+
+    NBL_CONSTEXPR_STATIC_INLINE BxDFType type = conditional_value<__type::IsBSDF, BxDFType, BxDFType::BT_BSDF, BxDFType::BT_BRDF>::value;
+    NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true;
+    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = !__type::IsBSDF;
+    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = !__type::IsBSDF;
+};
+
 }
 }
 }
diff --git a/include/nbl/builtin/hlsl/bxdf/reflection/beckmann.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection/beckmann.hlsl
index f37d0d9fd8..cb7743e02d 100644
--- a/include/nbl/builtin/hlsl/bxdf/reflection/beckmann.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/reflection/beckmann.hlsl
@@ -27,23 +27,7 @@ using SBeckmannAnisotropic = SCookTorrance<Config, ndf::Beckmann<typename Config
 
 }
 
-template<typename C>
-struct traits<bxdf::reflection::SBeckmannIsotropic<C> >
-{
-    NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BRDF;
-    NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true;
-};
-
-template<typename C>
-struct traits<bxdf::reflection::SBeckmannAnisotropic<C> >
-{
-    NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BRDF;
-    NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true;
-};
+// inherit trait from cook torrance base
 
 }
 }
diff --git a/include/nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl
index 049480afab..0f49d0be43 100644
--- a/include/nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl
@@ -27,23 +27,7 @@ using SGGXAnisotropic = SCookTorrance<Config, ndf::GGX<typename Config::scalar_t
 
 }
 
-template<typename C>
-struct traits<bxdf::reflection::SGGXIsotropic<C> >
-{
-    NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BRDF;
-    NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true;
-};
-
-template<typename C>
-struct traits<bxdf::reflection::SGGXAnisotropic<C> >
-{
-    NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BRDF;
-    NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true;
-};
+// inherit trait from cook torrance base
 
 }
 }
diff --git a/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl
index 07762d1298..e30c3efdab 100644
--- a/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl
@@ -20,14 +20,7 @@ using SIridescent = SCookTorrance<Config, ndf::GGX<typename Config::scalar_type,
 
 }
 
-template<typename C>
-struct traits<bxdf::reflection::SIridescent<C> >
-{
-    NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BRDF;
-    NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true;
-};
+// inherit trait from cook torrance base
 
 }
 }
diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/beckmann.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/beckmann.hlsl
index fa315b40ea..8c61692c5c 100644
--- a/include/nbl/builtin/hlsl/bxdf/transmission/beckmann.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/transmission/beckmann.hlsl
@@ -27,23 +27,7 @@ using SBeckmannDielectricAnisotropic = SCookTorrance<Config, ndf::Beckmann<typen
 
 }
 
-template<typename C>
-struct traits<bxdf::transmission::SBeckmannDielectricIsotropic<C> >
-{
-    NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BSDF;
-    NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true;
-};
-
-template<typename C>
-struct traits<bxdf::transmission::SBeckmannDielectricAnisotropic<C> >
-{
-    NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BSDF;
-    NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true;
-};
+// inherit trait from cook torrance base
 
 }
 }
diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/ggx.hlsl
index 51f096532b..cdd4483c7f 100644
--- a/include/nbl/builtin/hlsl/bxdf/transmission/ggx.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/transmission/ggx.hlsl
@@ -27,23 +27,7 @@ using SGGXDielectricAnisotropic = SCookTorrance<Config, ndf::GGX<typename Config
 
 }
 
-template<typename C>
-struct traits<bxdf::transmission::SGGXDielectricIsotropic<C> >
-{
-    NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BSDF;
-    NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true;
-};
-
-template<typename C>
-struct traits<bxdf::transmission::SGGXDielectricAnisotropic<C> >
-{
-    NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BSDF;
-    NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true;
-};
+// inherit trait from cook torrance base
 
 }
 }
diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl
index 2e7aa0e56e..8e06fc34e7 100644
--- a/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl
@@ -20,14 +20,7 @@ using SIridescent = SCookTorrance<Config, ndf::GGX<typename Config::scalar_type,
 
 }
 
-template<typename C>
-struct traits<bxdf::transmission::SIridescent<C> >
-{
-    NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BSDF;
-    NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true;
-    NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true;
-};
+// inherit trait from cook torrance base
 
 }
 }

From ffeb6335793fa2d20d1e70eec7f47bd7b8133bfb Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 13 Nov 2025 15:23:29 +0700
Subject: [PATCH 31/39] fix angle adder

---
 include/nbl/builtin/hlsl/math/angle_adding.hlsl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/nbl/builtin/hlsl/math/angle_adding.hlsl b/include/nbl/builtin/hlsl/math/angle_adding.hlsl
index 27d4f2a465..5ab661facb 100644
--- a/include/nbl/builtin/hlsl/math/angle_adding.hlsl
+++ b/include/nbl/builtin/hlsl/math/angle_adding.hlsl
@@ -44,7 +44,7 @@ struct sincos_accumulator
         const T cosB = runningSum.real();
         const T sinB = runningSum.imag();
         // TODO: prove if we infer overflow from sign of `d` instead
-        const bool overflow = abs<T>(min<T>(a, cosB)) > max<T>(a, cosB);
+        const bool overflow = abs<T>(min<T>(cosA, cosB)) > max<T>(cosA, cosB);
         const T c = cosA * cosB - sinA * sinB;
         const T d = sinA * cosB + cosA * sinB;
 

From d75246d77adb2d8f2d065e50a6c5509e1383e818 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 13 Nov 2025 16:06:16 +0700
Subject: [PATCH 32/39] optimizations to iridescent fresnel

---
 include/nbl/builtin/hlsl/bxdf/fresnel.hlsl    | 157 +++++++++++-------
 .../hlsl/bxdf/reflection/iridescent.hlsl      |   2 +-
 .../hlsl/bxdf/transmission/iridescent.hlsl    |   2 +-
 3 files changed, 99 insertions(+), 62 deletions(-)

diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl
index 56ea88080c..e6bb3f98c2 100644
--- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl
@@ -497,7 +497,7 @@ struct Dielectric
 };
 
 // adapted from https://belcour.github.io/blog/research/publication/2017/05/01/brdf-thin-film.html
-template<typename T, bool SupportsTransmission NBL_STRUCT_CONSTRAINABLE>
+template<typename T, bool SupportsTransmission, typename Colorspace = colorspace::scRGB NBL_STRUCT_CONSTRAINABLE>
 struct Iridescent;
 
 namespace impl
@@ -543,47 +543,50 @@ struct iridescent_helper
         return xyz / scalar_type(1.0685e-7);
     }
 
-    template<typename Params>
-    static T __call(NBL_CONST_REF_ARG(Params) params, const scalar_type clampedCosTheta)
+    template<typename Colorspace>
+    static T __call(const vector_type _D, const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta)
     {
-        const vector_type wavelengths = vector_type(colorspace::scRGB::wavelength_R, colorspace::scRGB::wavelength_G, colorspace::scRGB::wavelength_B);
+        const vector_type wavelengths = vector_type(Colorspace::wavelength_R, Colorspace::wavelength_G, Colorspace::wavelength_B);
 
-        const vector_type eta12 = params.getEta12();
-        const vector_type eta23 = params.getEta23();
-        const vector_type etak23 = params.getEtak23();
         const scalar_type cosTheta_1 = clampedCosTheta;
-        vector_type cosTheta_2;
-
         vector_type R12p, R23p, R12s, R23s;
-        const vector_type scale = scalar_type(1.0)/eta12;
-        const vector_type cosTheta2_2 = hlsl::promote<vector_type>(1.0) - hlsl::promote<vector_type>(1.0-cosTheta_1*cosTheta_1) * scale * scale;
-
-        cosTheta_2 = hlsl::sqrt(hlsl::max(cosTheta2_2, hlsl::promote<vector_type>(0.0)));
-        Dielectric<vector_type>::__polarized(eta12, hlsl::promote<vector_type>(cosTheta_1), R12p, R12s);
+        vector_type cosTheta_2;
+        vector<bool,vector_traits<vector_type>::Dimension> notTIR;
+        {
+            const vector_type scale = scalar_type(1.0)/eta12;
+            const vector_type cosTheta2_2 = hlsl::promote<vector_type>(1.0) - hlsl::promote<vector_type>(scalar_type(1.0)-cosTheta_1*cosTheta_1) * scale * scale;
+            notTIR = cosTheta2_2 > hlsl::promote<vector_type>(0.0);
+            cosTheta_2 = hlsl::sqrt(hlsl::max(cosTheta2_2, hlsl::promote<vector_type>(0.0)));
+        }
 
-        // Reflected part by the base
-        // if kappa==0, base material is dielectric
-        NBL_IF_CONSTEXPR(SupportsTransmission)
-            Dielectric<vector_type>::__polarized(eta23 * eta23, cosTheta_2, R23p, R23s);
-        else
+        if (hlsl::any(notTIR))
         {
-            vector_type etaLen2 = eta23 * eta23 + etak23 * etak23;
-            Conductor<vector_type>::__polarized(eta23, etaLen2, cosTheta_2, R23p, R23s);
+            Dielectric<vector_type>::__polarized(eta12, hlsl::promote<vector_type>(cosTheta_1), R12p, R12s);
+
+            // Reflected part by the base
+            // if kappa==0, base material is dielectric
+            NBL_IF_CONSTEXPR(SupportsTransmission)
+                Dielectric<vector_type>::__polarized(eta23 * eta23, cosTheta_2, R23p, R23s);
+            else
+            {
+                vector_type etaLen2 = eta23 * eta23 + etak23 * etak23;
+                Conductor<vector_type>::__polarized(eta23, etaLen2, cosTheta_2, R23p, R23s);
+            }
         }
 
         // Check for total internal reflection
-        R12s = hlsl::mix(R12s, hlsl::promote<vector_type>(1.0), cosTheta2_2 <= hlsl::promote<vector_type>(0.0));
-        R12p = hlsl::mix(R12p, hlsl::promote<vector_type>(1.0), cosTheta2_2 <= hlsl::promote<vector_type>(0.0));
-
-        R23s = hlsl::mix(R23s, hlsl::promote<vector_type>(0.0), cosTheta2_2 <= hlsl::promote<vector_type>(0.0));
-        R23p = hlsl::mix(R23p, hlsl::promote<vector_type>(0.0), cosTheta2_2 <= hlsl::promote<vector_type>(0.0));
+        const vector_type notTIRFactor = vector_type(notTIR); // 0 when TIR, 1 otherwise
+        R12s = R12s * notTIRFactor;
+        R12p = R12p * notTIRFactor;
+        R23s = R23s * notTIRFactor;
+        R23p = R23p * notTIRFactor;
 
         // Compute the transmission coefficients
         vector_type T121p = hlsl::promote<vector_type>(1.0) - R12p;
         vector_type T121s = hlsl::promote<vector_type>(1.0) - R12s;
 
         // Optical Path Difference
-        const vector_type D = hlsl::promote<vector_type>(2.0 * params.getDinc()) * params.getThinFilmIor() * cosTheta_2;
+        const vector_type D = _D * cosTheta_2;
         const vector_type Dphi = hlsl::promote<vector_type>(2.0 * numbers::pi<scalar_type>) * D / wavelengths;
 
         vector_type phi21p, phi21s, phi23p, phi23s, r123s, r123p, Rs;
@@ -634,82 +637,115 @@ struct iridescent_helper
     }
 };
 
-template<typename T, bool SupportsTransmission NBL_PRIMARY_REQUIRES(concepts::FloatingPointLikeVectorial<T>)    
+template<typename T NBL_PRIMARY_REQUIRES(concepts::FloatingPointLikeVectorial<T>)    
 struct iridescent_base
 {
     using scalar_type = typename vector_traits<T>::scalar_type;
     using vector_type = T;
 
-    scalar_type getDinc() NBL_CONST_MEMBER_FUNC { return Dinc; }
-    vector_type getThinFilmIor() NBL_CONST_MEMBER_FUNC { return thinFilmIor; }
+    vector_type getD() NBL_CONST_MEMBER_FUNC { return D; }
     vector_type getEta12() NBL_CONST_MEMBER_FUNC { return eta12; }
     vector_type getEta23() NBL_CONST_MEMBER_FUNC { return eta23; }
-    vector_type getEtak23() NBL_CONST_MEMBER_FUNC
-    {
-        NBL_IF_CONSTEXPR(SupportsTransmission)
-            return hlsl::promote<vector_type>(0.0);
-        else
-            return etak23;
-    }
 
-    scalar_type Dinc;       // thickness of thin film in nanometers, rec. 100-25000nm
-    vector_type thinFilmIor;
+    vector_type D;
     vector_type eta12;      // outside (usually air 1.0) -> thin-film IOR
     vector_type eta23;      // thin-film -> base material IOR
-    vector_type etak23;     // thin-film -> complex component, k==0 makes dielectric
 };
 }
 
-template<typename T>
+template<typename T, typename Colorspace>
 NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial<T>)
-struct Iridescent<T, false NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectorial<T>) >
+struct Iridescent<T, false, Colorspace NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectorial<T>) > : impl::iridescent_base<T>
 {
-    using this_t = Iridescent<T,false>;
+    using this_t = Iridescent<T,false,Colorspace>;
     using scalar_type = typename vector_traits<T>::scalar_type;
     using vector_type = T;  // assert dim==3?
     using eta_type = vector_type;
-    using base_type = impl::iridescent_base<T, false>;
+    using base_type = impl::iridescent_base<T>;
 
     NBL_CONSTEXPR_STATIC_INLINE bool ReturnsMonochrome = vector_traits<vector_type>::Dimension == 1;
 
+    struct SCreationParams
+    {
+        scalar_type Dinc;   // thickness of thin film in nanometers, rec. 100-25000nm
+        vector_type ior1;   // outside (usually air 1.0)
+        vector_type ior2;   // thin-film ior
+        vector_type ior3;   // base mat ior
+        vector_type iork3;
+    };
+    using creation_params_type = SCreationParams;
+
+    static this_t create(NBL_CONST_REF_ARG(creation_params_type) params)
+    {
+        this_t retval;
+        retval.D = hlsl::promote<vector_type>(2.0 * params.Dinc) * params.ior2;
+        retval.eta12 = params.ior2/params.ior1;
+        retval.eta23 = params.ior3/params.ior2;
+        retval.etak23 = params.iork3/params.ior2;
+        return retval;
+    }
+
     T operator()(const scalar_type clampedCosTheta) NBL_CONST_MEMBER_FUNC
     {
-        return impl::iridescent_helper<T,false>::template __call<base_type>(__base, clampedCosTheta);
+        return impl::iridescent_helper<T,false>::template __call<Colorspace>(base_type::getD(), base_type::getEta12(), base_type::getEta23(), getEtak23(), clampedCosTheta);
     }
 
     OrientedEtaRcps<eta_type> getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC
     {
         OrientedEtaRcps<eta_type> rcpEta;
-        rcpEta.value = hlsl::promote<eta_type>(1.0) / __base.eta23;
+        rcpEta.value = hlsl::promote<eta_type>(1.0) / base_type::eta23;
         rcpEta.value2 = rcpEta.value * rcpEta.value;
         return rcpEta;
     }
 
-    base_type __base;
+    vector_type getEtak23() NBL_CONST_MEMBER_FUNC
+    {
+        return etak23;
+    }
+
+    vector_type etak23;     // thin-film -> complex component
 };
 
-template<typename T>
+template<typename T, typename Colorspace>
 NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial<T>)
-struct Iridescent<T, true NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectorial<T>) >
+struct Iridescent<T, true, Colorspace NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectorial<T>) > : impl::iridescent_base<T>
 {
-    using this_t = Iridescent<T,true>;
+    using this_t = Iridescent<T,true,Colorspace>;
     using scalar_type = typename vector_traits<T>::scalar_type;
     using vector_type = T;  // assert dim==3?
     using eta_type = vector<scalar_type, 1>;
-    using base_type = impl::iridescent_base<T, true>;
+    using base_type = impl::iridescent_base<T>;
 
     NBL_CONSTEXPR_STATIC_INLINE bool ReturnsMonochrome = vector_traits<vector_type>::Dimension == 1;
 
+    struct SCreationParams
+    {
+        scalar_type Dinc;   // thickness of thin film in nanometers, rec. 100-25000nm
+        vector_type ior1;   // outside (usually air 1.0)
+        vector_type ior2;   // thin-film ior
+        vector_type ior3;   // base mat ior
+    };
+    using creation_params_type = SCreationParams;
+
+    static this_t create(NBL_CONST_REF_ARG(creation_params_type) params)
+    {
+        this_t retval;
+        retval.D = hlsl::promote<vector_type>(2.0 * params.Dinc) * params.ior2;
+        retval.eta12 = params.ior2/params.ior1;
+        retval.eta23 = params.ior3/params.ior2;
+        return retval;
+    }
+
     T operator()(const scalar_type clampedCosTheta) NBL_CONST_MEMBER_FUNC
     {
-        return impl::iridescent_helper<T,true>::template __call<base_type>(__base, clampedCosTheta);
+        return impl::iridescent_helper<T,true>::template __call<Colorspace>(base_type::getD(), base_type::getEta12(), base_type::getEta23(), getEtak23(), clampedCosTheta);
     }
 
-    scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return __base.eta23[0]; }
+    scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return base_type::eta23[0]; }
     OrientedEtaRcps<eta_type> getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC
     {
         OrientedEtaRcps<eta_type> rcpEta;
-        rcpEta.value = hlsl::promote<eta_type>(1.0) / __base.eta23[0];
+        rcpEta.value = hlsl::promote<eta_type>(1.0) / base_type::eta23[0];
         rcpEta.value2 = rcpEta.value * rcpEta.value;
         return rcpEta;
     }
@@ -718,15 +754,16 @@ struct Iridescent<T, true NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectori
     {
         const bool flip = NdotI < scalar_type(0.0);
         this_t orientedFresnel;
-        orientedFresnel.__base.Dinc = __base.Dinc;
-        orientedFresnel.__base.thinFilmIor = __base.thinFilmIor;
-        orientedFresnel.__base.eta12 = hlsl::mix(__base.eta12, hlsl::promote<vector_type>(1.0)/__base.eta12, flip);
-        orientedFresnel.__base.eta23 = hlsl::mix(__base.eta23, hlsl::promote<vector_type>(1.0)/__base.eta23, flip);
-        orientedFresnel.__base.etak23 = hlsl::promote<vector_type>(0.0);
+        orientedFresnel.D = base_type::D;
+        orientedFresnel.eta12 = hlsl::mix(base_type::eta12, hlsl::promote<vector_type>(1.0)/base_type::eta12, flip);
+        orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote<vector_type>(1.0)/base_type::eta23, flip);
         return orientedFresnel;
     }
 
-    base_type __base;
+    vector_type getEtak23() NBL_CONST_MEMBER_FUNC
+    {
+        return hlsl::promote<vector_type>(0.0);
+    }
 };
 
 
diff --git a/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl
index e30c3efdab..a6120233bb 100644
--- a/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl
@@ -16,7 +16,7 @@ namespace reflection
 {
 
 template<class Config>
-using SIridescent = SCookTorrance<Config, ndf::GGX<typename Config::scalar_type, false, ndf::MTT_REFLECT>, fresnel::Iridescent<typename Config::spectral_type, false> >;
+using SIridescent = SCookTorrance<Config, ndf::GGX<typename Config::scalar_type, false, ndf::MTT_REFLECT>, fresnel::Iridescent<typename Config::spectral_type, false, colorspace::scRGB> >;
 
 }
 
diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl
index 8e06fc34e7..05b1753aca 100644
--- a/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl
@@ -16,7 +16,7 @@ namespace transmission
 {
 
 template<class Config>
-using SIridescent = SCookTorrance<Config, ndf::GGX<typename Config::scalar_type, false, ndf::MTT_REFLECT_REFRACT>, fresnel::Iridescent<typename Config::spectral_type, true> >;
+using SIridescent = SCookTorrance<Config, ndf::GGX<typename Config::scalar_type, false, ndf::MTT_REFLECT_REFRACT>, fresnel::Iridescent<typename Config::spectral_type, true, colorspace::scRGB> >;
 
 }
 

From 494c6255847a97cad810a3a3178b9ee8fb1aff6f Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 13 Nov 2025 16:25:26 +0700
Subject: [PATCH 33/39] avoid repeat fresnel calc

---
 .../hlsl/bxdf/base/cook_torrance_base.hlsl      | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl
index 306198f827..3789dded2e 100644
--- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl
@@ -130,14 +130,15 @@ struct SCookTorrance
 
     template<class Interaction=conditional_t<IsAnisotropic,anisotropic_interaction_type,isotropic_interaction_type>,
             typename C=bool_constant<!fresnel_type::ReturnsMonochrome> NBL_FUNC_REQUIRES(C::value && !fresnel_type::ReturnsMonochrome)
-    static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH)
+    static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH, NBL_REF_ARG(spectral_type) outFresnelVal)
     {
         spectral_type throughputWeights = interaction.getLuminosityContributionHint();
-        return hlsl::dot<spectral_type>(orientedFresnel(clampedVdotH), throughputWeights);
+        outFresnelVal = orientedFresnel(clampedVdotH);
+        return hlsl::dot<spectral_type>(outFresnelVal, throughputWeights);
     }
     template<class Interaction=conditional_t<IsAnisotropic,anisotropic_interaction_type,isotropic_interaction_type>,
             typename C=bool_constant<fresnel_type::ReturnsMonochrome> NBL_FUNC_REQUIRES(C::value && fresnel_type::ReturnsMonochrome)
-    static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH)
+    static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH, NBL_REF_ARG(spectral_type) outFresnelVal)
     {
         return orientedFresnel(clampedVdotH)[0];
     }
@@ -294,7 +295,8 @@ struct SCookTorrance
             assert(NdotV*VdotH >= scalar_type(0.0));
         }
 
-        const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(VdotH));
+        spectral_type dummy;
+        const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(VdotH), dummy);
 
         scalar_type rcpChoiceProb;
         scalar_type z = u.z;
@@ -337,7 +339,8 @@ struct SCookTorrance
 
         NBL_IF_CONSTEXPR(IsBSDF)
         {
-            const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()));    
+            spectral_type dummy;
+            const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()), dummy);    
             return hlsl::mix(reflectance, scalar_type(1.0) - reflectance, cache.isTransmission()) * DG1.projectedLightMeasure;
         }
         else
@@ -389,8 +392,8 @@ struct SCookTorrance
                 quo = hlsl::promote<spectral_type>(G2_over_G1);
             else
             {
-                const scalar_type scaled_reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()));
-                spectral_type reflectance = impl::__implicit_promote<spectral_type, typename fresnel_type::vector_type>::__call(_f(hlsl::abs(cache.getVdotH())));
+                spectral_type reflectance;
+                const scalar_type scaled_reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()), reflectance);
                 quo = hlsl::mix(reflectance / scaled_reflectance,
                         (hlsl::promote<spectral_type>(1.0) - reflectance) / (scalar_type(1.0) - scaled_reflectance), cache.isTransmission()) * G2_over_G1;
             }

From ab25c17ffd5101e7926c4b2a2a4beb3549ad588f Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 13 Nov 2025 16:37:47 +0700
Subject: [PATCH 34/39] mix reflectance w/ transmission in getScaledReflectance

---
 .../hlsl/bxdf/base/cook_torrance_base.hlsl    | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl
index 3789dded2e..e88d5fccb7 100644
--- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl
@@ -130,17 +130,19 @@ struct SCookTorrance
 
     template<class Interaction=conditional_t<IsAnisotropic,anisotropic_interaction_type,isotropic_interaction_type>,
             typename C=bool_constant<!fresnel_type::ReturnsMonochrome> NBL_FUNC_REQUIRES(C::value && !fresnel_type::ReturnsMonochrome)
-    static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH, NBL_REF_ARG(spectral_type) outFresnelVal)
+    static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH, bool transmitted, NBL_REF_ARG(spectral_type) outFresnelVal)
     {
         spectral_type throughputWeights = interaction.getLuminosityContributionHint();
-        outFresnelVal = orientedFresnel(clampedVdotH);
+        spectral_type reflectance = orientedFresnel(clampedVdotH);
+        outFresnelVal = hlsl::mix(reflectance, hlsl::promote<spectral_type>(1.0)-reflectance, transmitted);
         return hlsl::dot<spectral_type>(outFresnelVal, throughputWeights);
     }
     template<class Interaction=conditional_t<IsAnisotropic,anisotropic_interaction_type,isotropic_interaction_type>,
             typename C=bool_constant<fresnel_type::ReturnsMonochrome> NBL_FUNC_REQUIRES(C::value && fresnel_type::ReturnsMonochrome)
-    static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH, NBL_REF_ARG(spectral_type) outFresnelVal)
+    static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH, bool transmitted, NBL_REF_ARG(spectral_type) outFresnelVal)
     {
-        return orientedFresnel(clampedVdotH)[0];
+        scalar_type reflectance = orientedFresnel(clampedVdotH)[0];
+        return hlsl::mix(reflectance, scalar_type(1.0)-reflectance, transmitted);
     }
 
     bool __dotIsValue(const vector3_type a, const vector3_type b, const scalar_type value)
@@ -296,7 +298,7 @@ struct SCookTorrance
         }
 
         spectral_type dummy;
-        const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(VdotH), dummy);
+        const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(VdotH), false, dummy);
 
         scalar_type rcpChoiceProb;
         scalar_type z = u.z;
@@ -340,8 +342,8 @@ struct SCookTorrance
         NBL_IF_CONSTEXPR(IsBSDF)
         {
             spectral_type dummy;
-            const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()), dummy);    
-            return hlsl::mix(reflectance, scalar_type(1.0) - reflectance, cache.isTransmission()) * DG1.projectedLightMeasure;
+            const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()), cache.isTransmission(), dummy);    
+            return reflectance * DG1.projectedLightMeasure;
         }
         else
         {
@@ -393,9 +395,8 @@ struct SCookTorrance
             else
             {
                 spectral_type reflectance;
-                const scalar_type scaled_reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()), reflectance);
-                quo = hlsl::mix(reflectance / scaled_reflectance,
-                        (hlsl::promote<spectral_type>(1.0) - reflectance) / (scalar_type(1.0) - scaled_reflectance), cache.isTransmission()) * G2_over_G1;
+                const scalar_type scaled_reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()), cache.isTransmission(), reflectance);
+                quo = reflectance / scaled_reflectance * G2_over_G1;
             }
         }
         else

From 2a7db14c902097142b01af7587462b0975fe6094 Mon Sep 17 00:00:00 2001
From: Przemog1 <minikers21@gmail.com>
Date: Thu, 13 Nov 2025 21:58:26 +0100
Subject: [PATCH 35/39] Refactored resolve.hlsl

---
 examples_tests                                |   2 +-
 .../concepts/accessors/loadable_image.hlsl    |  15 +-
 .../builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 126 ++++++------
 .../builtin/hlsl/rwmc/ResolveParameters.hlsl  |  45 +++++
 .../hlsl/rwmc/SplattingParameters.hlsl        |  23 +++
 include/nbl/builtin/hlsl/rwmc/resolve.hlsl    | 189 ++++++++++++++++++
 include/nbl/builtin/hlsl/rwmc/rwmc.hlsl       | 160 ---------------
 src/nbl/builtin/CMakeLists.txt                |   4 +-
 8 files changed, 334 insertions(+), 230 deletions(-)
 create mode 100644 include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl
 create mode 100644 include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl
 create mode 100644 include/nbl/builtin/hlsl/rwmc/resolve.hlsl
 delete mode 100644 include/nbl/builtin/hlsl/rwmc/rwmc.hlsl

diff --git a/examples_tests b/examples_tests
index bbc8ab80fe..badb4a615f 160000
--- a/examples_tests
+++ b/examples_tests
@@ -1 +1 @@
-Subproject commit bbc8ab80fecf44abb9b03f4fa147918fee7c310f
+Subproject commit badb4a615f3d379cb494ad2b4bb2d12bad6ff9a9
diff --git a/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl
index c272eeb1ab..8c7251214d 100644
--- a/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl
+++ b/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl
@@ -16,8 +16,15 @@ namespace concepts
 {
 namespace accessors
 {
+
+// concept `LoadableImage` translates to smth like this:
+//template<typename U, typename T, int32_t Dims>
+//concept LoadableImage = requires(U a, vector<uint16_t, Dims> uv, uint16_t layer) {
+//    ::nbl::hlsl::is_same_v<decltype(declval<U>().template get<T,Dims>(uv,layer)), vector<T,4>>;
+//};
+
 // declare concept
-#define NBL_CONCEPT_NAME StorableImage
+#define NBL_CONCEPT_NAME LoadableImage
 #define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t)
 #define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims)
 // not the greatest syntax but works
@@ -26,12 +33,12 @@ namespace accessors
 #define NBL_CONCEPT_PARAM_2 (layer,uint16_t)
 // start concept
 NBL_CONCEPT_BEGIN(3)
-// need to be defined AFTER the cocnept begins
+// need to be defined AFTER the concept begins
 #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0
 #define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1
 #define layer NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2
 NBL_CONCEPT_END(
-    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get<T,Dims>(uv,layer)) , ::nbl::hlsl::is_same_v, vector<T,4>))
+    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get<T,Dims>(uv,layer)), ::nbl::hlsl::is_same_v, vector<T,4>))
 );
 #undef layer
 #undef uv
@@ -39,7 +46,7 @@ NBL_CONCEPT_END(
 #include <nbl/builtin/hlsl/concepts/__end.hlsl>
 
 // declare concept
-#define NBL_CONCEPT_NAME MipmappedStorableImage
+#define NBL_CONCEPT_NAME MipmappedLoadableImage
 #define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t)
 #define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims)
 // not the greatest syntax but works
diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl
index 6678a66942..77cfb3c283 100644
--- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl
+++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl
@@ -1,8 +1,10 @@
 #ifndef _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_
 #define _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_
-#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/cpp_compat/promote.hlsl>
 #include <nbl/builtin/hlsl/vector_utils/vector_traits.hlsl>
 #include <nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl>
+#include <nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl>
 
 namespace nbl
 {
@@ -11,86 +13,82 @@ namespace hlsl
 namespace rwmc
 {
 
-struct CascadeSettings
+template<typename CascadeLayerType, uint32_t CascadeCount NBL_PRIMARY_REQUIRES(concepts::Vector<CascadeLayerType>)
+struct CascadeAccumulator
 {
-    uint32_t size;
-    uint32_t start;
-    uint32_t base;
-};
+    struct CascadeEntry
+    {
+        uint32_t cascadeSampleCounter[CascadeCount];
+        CascadeLayerType data[CascadeCount];
 
-template<typename CascadeLayerType, uint32_t CascadeSize>
-struct CascadeEntry
-{
-    CascadeLayerType data[CascadeSize];
-};
+        void addSampleIntoCascadeEntry(CascadeLayerType _sample, uint32_t lowerCascadeIndex, float lowerCascadeLevelWeight, float higherCascadeLevelWeight, uint32_t sampleCount)
+        {
+            const float reciprocalSampleCount = 1.0f / float(sampleCount);
+
+            uint32_t lowerCascadeSampleCount = cascadeSampleCounter[lowerCascadeIndex];
+            data[lowerCascadeIndex] += (_sample * lowerCascadeLevelWeight - (sampleCount - lowerCascadeSampleCount) * data[lowerCascadeIndex]) * reciprocalSampleCount;
+            cascadeSampleCounter[lowerCascadeIndex] = sampleCount;
+
+            uint32_t higherCascadeIndex = lowerCascadeIndex + 1u;
+            if (higherCascadeIndex < CascadeCount)
+            {
+                uint32_t higherCascadeSampleCount = cascadeSampleCounter[higherCascadeIndex];
+                data[higherCascadeIndex] += (_sample * higherCascadeLevelWeight - (sampleCount - higherCascadeSampleCount) * data[higherCascadeIndex]) * reciprocalSampleCount;
+                cascadeSampleCounter[higherCascadeIndex] = sampleCount;
+            }
+        }
+    };
 
-template<typename CascadeLayerType, uint32_t CascadeSize>
-struct CascadeAccumulator
-{
-    using output_storage_type = CascadeEntry<CascadeLayerType, CascadeSize>;
-    using initialization_data = CascadeSettings;
+    using cascade_layer_scalar_type = typename vector_traits<CascadeLayerType>::scalar_type;
+    using this_t = CascadeAccumulator<CascadeLayerType, CascadeCount>;
+    using output_storage_type = CascadeEntry;
+    using initialization_data = SplattingParameters;
     output_storage_type accumulation;
-    uint32_t cascadeSampleCounter[CascadeSize];
-    CascadeSettings cascadeSettings;
+    
+    SplattingParameters splattingParameters;
 
-    void initialize(in CascadeSettings settings)
+    static this_t create(NBL_CONST_REF_ARG(SplattingParameters) settings)
     {
-        for (int i = 0; i < CascadeSize; ++i)
+        this_t retval;
+        for (int i = 0; i < CascadeCount; ++i)
         {
-            accumulation.data[i] = (CascadeLayerType)0.0f;
-            cascadeSampleCounter[i] = 0u;
+            retval.accumulation.data[i] = promote<CascadeLayerType, float32_t>(0.0f);
+            retval.accumulation.cascadeSampleCounter[i] = 0u;
         }
+        retval.splattingParameters = settings;
 
-        cascadeSettings.size = settings.size;
-        cascadeSettings.start = settings.start;
-        cascadeSettings.base = settings.base;
+        return retval;
     }
-
-    typename vector_traits<CascadeLayerType>::scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col)
+    
+    cascade_layer_scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col)
     {
         return hlsl::dot<CascadeLayerType>(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col);
     }
 
     // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp
-    void addSample(uint32_t sampleIndex, float32_t3 sample)
+    void addSample(uint32_t sampleCount, CascadeLayerType _sample)
     {
-        float lowerScale = cascadeSettings.start;
-        float upperScale = lowerScale * cascadeSettings.base;
-
-        const float luma = getLuma(sample);
-
-        uint32_t lowerCascadeIndex = 0u;
-        while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2)
-        {
-            lowerScale = upperScale;
-            upperScale *= cascadeSettings.base;
-            ++lowerCascadeIndex;
-        }
-
-        float lowerCascadeLevelWeight;
-        float higherCascadeLevelWeight;
-
-        if (luma <= lowerScale)
-            lowerCascadeLevelWeight = 1.0f;
-        else if (luma < upperScale)
-            lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale));
-        else // Inf, NaN ...
-            lowerCascadeLevelWeight = 0.0f;
-
-        if (luma < upperScale)
-            higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight);
-        else
-            higherCascadeLevelWeight = upperScale / luma;
-
-        uint32_t higherCascadeIndex = lowerCascadeIndex + 1u;
-
-        const uint32_t sampleCount = sampleIndex + 1u;
-        const float reciprocalSampleCount = 1.0f / float(sampleCount);
-        accumulation.data[lowerCascadeIndex] += (sample * lowerCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[lowerCascadeIndex])) * accumulation.data[lowerCascadeIndex]) * reciprocalSampleCount;
-        accumulation.data[higherCascadeIndex] += (sample * higherCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[higherCascadeIndex])) * accumulation.data[higherCascadeIndex]) * reciprocalSampleCount;
-        cascadeSampleCounter[lowerCascadeIndex] = sampleCount;
-        cascadeSampleCounter[higherCascadeIndex] = sampleCount;
+        const cascade_layer_scalar_type log2Start = splattingParameters.log2Start;
+        const cascade_layer_scalar_type log2Base = splattingParameters.log2Base;
+        const cascade_layer_scalar_type luma = getLuma(_sample);
+        const cascade_layer_scalar_type log2Luma = log2<cascade_layer_scalar_type>(luma);
+        const cascade_layer_scalar_type cascade = log2Luma * 1.f / log2Base - log2Start / log2Base;
+        const cascade_layer_scalar_type clampedCascade = clamp(cascade, 0, CascadeCount - 1);
+        // c<=0 -> 0, c>=Count-1 -> Count-1 
+        uint32_t lowerCascadeIndex = floor<cascade_layer_scalar_type>(cascade);
+        // 0 whenever clamped or `cascade` is integer (when `clampedCascade` is integer)
+        cascade_layer_scalar_type higherCascadeWeight = clampedCascade - floor<cascade_layer_scalar_type>(clampedCascade);
+        // never 0 thanks to magic of `1-fract(x)`
+        cascade_layer_scalar_type lowerCascadeWeight = cascade_layer_scalar_type(1) - higherCascadeWeight;
+
+        // handle super bright sample case
+        if (cascade > CascadeCount - 1)
+            lowerCascadeWeight = exp2(log2Start + log2Base * (CascadeCount - 1) - log2Luma);
+
+        accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount);
     }
+
+    
 };
 
 }
diff --git a/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl
new file mode 100644
index 0000000000..7509eac493
--- /dev/null
+++ b/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl
@@ -0,0 +1,45 @@
+#ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_
+#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace rwmc
+{
+
+struct ResolveParameters
+{
+	uint32_t lastCascadeIndex;
+	float initialEmin; // a minimum image brightness that we always consider reliable
+	float reciprocalBase;
+	float reciprocalN;
+	float reciprocalKappa;
+	float colorReliabilityFactor;
+	float NOverKappa;
+};
+
+ResolveParameters computeResolveParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize)
+{
+	ResolveParameters retval;
+	retval.lastCascadeIndex = cascadeSize - 1u;
+	retval.initialEmin = minReliableLuma;
+	retval.reciprocalBase = 1.f / base;
+	const float N = float(sampleCount);
+	retval.reciprocalN = 1.f / N;
+	retval.reciprocalKappa = 1.f / kappa;
+	// if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have
+	// allow up to ~<cascadeBase> more energy in one sample to lessen bias in some cases
+	retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa;
+	retval.NOverKappa = N * retval.reciprocalKappa;
+
+	return retval;
+}
+
+}
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl
new file mode 100644
index 0000000000..e74dd0e5bd
--- /dev/null
+++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl
@@ -0,0 +1,23 @@
+#ifndef _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_
+#define _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace rwmc
+{
+
+struct SplattingParameters
+{
+    float log2Start;
+    float log2Base;
+};
+
+}
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl
new file mode 100644
index 0000000000..cb8d3b27d1
--- /dev/null
+++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl
@@ -0,0 +1,189 @@
+#ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_
+#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include <nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl>
+#include <nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl>
+#include <nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl>
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace rwmc
+{
+		// declare concept
+#define NBL_CONCEPT_NAME ResolveAccessorBase
+#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t)
+#define NBL_CONCEPT_TPLT_PRM_NAMES (T)(VectorScalarType)(Dims)
+// not the greatest syntax but works
+#define NBL_CONCEPT_PARAM_0 (a,T)
+#define NBL_CONCEPT_PARAM_1 (scalar,VectorScalarType)
+#define NBL_CONCEPT_PARAM_2 (vec,vector<VectorScalarType, Dims>)
+// start concept
+	NBL_CONCEPT_BEGIN(2)
+// need to be defined AFTER the concept begins
+#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0
+#define scalar NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1
+#define vec NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2
+NBL_CONCEPT_END(
+	((NBL_CONCEPT_REQ_EXPR)((a.calcLuma(vec))))
+);
+#undef a
+#undef vec
+#include <nbl/builtin/hlsl/concepts/__end.hlsl>
+
+/* ResolveAccessor is required to:
+*	- satisfy `LoadableImage` concept requirements
+*	- implement function called `calcLuma` which calculates luma from a pixel value
+*/
+
+template<typename T, typename VectorScalarType, int32_t Dims>
+NBL_BOOL_CONCEPT ResolveAccessor = ResolveAccessorBase<T, VectorScalarType, Dims> && concepts::accessors::LoadableImage<T, VectorScalarType, Dims>;
+
+template<typename OutputScalar>
+struct ResolveAccessorAdaptor
+{
+	using output_scalar_type = OutputScalar;
+	using output_type = vector<OutputScalar, 4>;
+	NBL_CONSTEXPR int32_t image_dimension = 2;
+
+	RWTexture2DArray<float32_t4> cascade;
+
+	float32_t calcLuma(in float32_t3 col)
+	{
+		return hlsl::dot<float32_t3>(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col);
+	}
+
+	template<typename OutputScalarType, int32_t Dimension>
+	output_type get(vector<uint16_t, 2> uv, uint16_t layer)
+	{
+		uint32_t imgWidth, imgHeight, layers;
+		cascade.GetDimensions(imgWidth, imgHeight, layers);
+		int16_t2 cascadeImageDimension = int16_t2(imgWidth, imgHeight);
+
+		if (any(uv < int16_t2(0, 0)) || any(uv > cascadeImageDimension))
+			return vector<OutputScalar, 4>(0, 0, 0, 0);
+
+		return cascade.Load(int32_t3(uv, int32_t(layer)));
+	}
+};
+
+template<typename CascadeAccessor, typename OutputColorType> //NBL_PRIMARY_REQUIRES(ResolveAccessor<CascadeAccessor, typename CascadeAccessor::output_scalar_type, CascadeAccessor::image_dimension>)
+struct Resolver
+{
+	using output_type = OutputColorType;
+
+	struct CascadeSample
+	{
+		float32_t3 centerValue;
+		float normalizedCenterLuma;
+		float normalizedNeighbourhoodAverageLuma;
+	};
+
+	static Resolver create(NBL_REF_ARG(ResolveParameters) resolveParameters)
+	{
+		Resolver retval;
+		retval.params = resolveParameters;
+
+		return retval;
+	}
+
+	output_type operator()(NBL_REF_ARG(CascadeAccessor) acc, const int16_t2 coord)
+	{
+		float reciprocalBaseI = 1.f;
+		CascadeSample curr = __sampleCascade(acc, coord, 0u, reciprocalBaseI);
+
+		float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f);
+		float Emin = params.initialEmin;
+
+		float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma;
+		for (int16_t i = 0u; i <= params.lastCascadeIndex; i++)
+		{
+			const bool notFirstCascade = i != 0;
+			const bool notLastCascade = i != params.lastCascadeIndex;
+
+			CascadeSample next;
+			if (notLastCascade)
+			{
+				reciprocalBaseI *= params.reciprocalBase;
+				next = __sampleCascade(acc, coord, int16_t(i + 1), reciprocalBaseI);
+			}
+
+			float reliability = 1.f;
+			// sample counting-based reliability estimation
+			if (params.reciprocalKappa <= 1.f)
+			{
+				float localReliability = curr.normalizedCenterLuma;
+				// reliability in 3x3 pixel block (see robustness)
+				float globalReliability = curr.normalizedNeighbourhoodAverageLuma;
+				if (notFirstCascade)
+				{
+					localReliability += prevNormalizedCenterLuma;
+					globalReliability += prevNormalizedNeighbourhoodAverageLuma;
+				}
+				if (notLastCascade)
+				{
+					localReliability += next.normalizedCenterLuma;
+					globalReliability += next.normalizedNeighbourhoodAverageLuma;
+				}
+				// check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse)
+				reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability;
+				{
+					const float accumLuma = acc.calcLuma(accumulation);
+					if (accumLuma > Emin)
+						Emin = accumLuma;
+
+					const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor;
+
+					reliability += colorReliability;
+					reliability *= params.NOverKappa;
+					reliability -= params.reciprocalKappa;
+					reliability = clamp(reliability * 0.5f, 0.f, 1.f);
+				}
+			}
+			accumulation += curr.centerValue * reliability;
+
+			prevNormalizedCenterLuma = curr.normalizedCenterLuma;
+			prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma;
+			curr = next;
+		}
+
+		return accumulation;
+	}
+
+	ResolveParameters params;
+
+	// pseudo private stuff:
+
+	CascadeSample __sampleCascade(NBL_REF_ARG(CascadeAccessor) acc, int16_t2 coord, uint16_t cascadeIndex, float reciprocalBaseI)
+	{
+		CascadeAccessor::output_type tmp;
+		output_type neighbourhood[9];
+		neighbourhood[0] = acc.template get<float, 2>(coord + int16_t2(-1, -1), cascadeIndex);
+		neighbourhood[1] = acc.template get<float, 2>(coord + int16_t2(0, -1), cascadeIndex);
+		neighbourhood[2] = acc.template get<float, 2>(coord + int16_t2(1, -1), cascadeIndex);
+		neighbourhood[3] = acc.template get<float, 2>(coord + int16_t2(-1, 0), cascadeIndex);
+		neighbourhood[4] = acc.template get<float, 2>(coord + int16_t2(0, 0), cascadeIndex);
+		neighbourhood[5] = acc.template get<float, 2>(coord + int16_t2(1, 0), cascadeIndex);
+		neighbourhood[6] = acc.template get<float, 2>(coord + int16_t2(-1, 1), cascadeIndex);
+		neighbourhood[7] = acc.template get<float, 2>(coord + int16_t2(0, 1), cascadeIndex);
+		neighbourhood[8] = acc.template get<float, 2>(coord + int16_t2(1, 1), cascadeIndex);
+
+		// numerical robustness
+		float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) +
+			((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8]));
+
+		CascadeSample retval;
+		retval.centerValue = neighbourhood[4];
+		retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = acc.calcLuma(neighbourhood[4]) * reciprocalBaseI;
+		retval.normalizedNeighbourhoodAverageLuma = (acc.calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f;
+		return retval;
+	}
+};
+
+}
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl b/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl
deleted file mode 100644
index d7b151af86..0000000000
--- a/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl
+++ /dev/null
@@ -1,160 +0,0 @@
-#ifndef _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_
-#define _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_
-
-#include "nbl/builtin/hlsl/cpp_compat.hlsl"
-#include <nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl>
-
-namespace nbl
-{
-namespace hlsl
-{
-namespace rwmc
-{
-namespace impl
-{
-
-struct CascadeSample
-{
-	float32_t3 centerValue;
-	float normalizedCenterLuma;
-	float normalizedNeighbourhoodAverageLuma;
-};
-
-// TODO: figure out what values should pixels outside have, 0.0f is incorrect
-float32_t3 sampleCascadeTexel(int32_t2 currentCoord, int32_t2 offset, in RWTexture2DArray<float32_t4> cascade, uint32_t cascadeIndex)
-{
-	const int32_t2 texelCoord = currentCoord + offset;
-	if (any(texelCoord < int32_t2(0, 0)))
-		return float32_t3(0.0f, 0.0f, 0.0f);
-
-	float32_t4 output = cascade.Load(int32_t3(texelCoord, int32_t(cascadeIndex)));
-	return float32_t3(output.r, output.g, output.b);
-}
-
-float32_t calcLuma(in float32_t3 col)
-{
-	return hlsl::dot<float32_t3>(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col);
-}
-
-CascadeSample SampleCascade(in int32_t2 coord, in RWTexture2DArray<float32_t4> cascade, in uint cascadeIndex, in float reciprocalBaseI)
-{
-	float32_t3 neighbourhood[9];
-	neighbourhood[0] = sampleCascadeTexel(coord, int32_t2(-1, -1), cascade, cascadeIndex);
-	neighbourhood[1] = sampleCascadeTexel(coord, int32_t2(0, -1), cascade, cascadeIndex);
-	neighbourhood[2] = sampleCascadeTexel(coord, int32_t2(1, -1), cascade, cascadeIndex);
-	neighbourhood[3] = sampleCascadeTexel(coord, int32_t2(-1, 0), cascade, cascadeIndex);
-	neighbourhood[4] = sampleCascadeTexel(coord, int32_t2(0, 0), cascade, cascadeIndex);
-	neighbourhood[5] = sampleCascadeTexel(coord, int32_t2(1, 0), cascade, cascadeIndex);
-	neighbourhood[6] = sampleCascadeTexel(coord, int32_t2(-1, 1), cascade, cascadeIndex);
-	neighbourhood[7] = sampleCascadeTexel(coord, int32_t2(0, 1), cascade, cascadeIndex);
-	neighbourhood[8] = sampleCascadeTexel(coord, int32_t2(1, 1), cascade, cascadeIndex);
-
-	// numerical robustness
-	float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) +
-		((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8]));
-
-	CascadeSample retval;
-	retval.centerValue = neighbourhood[4];
-	retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = calcLuma(neighbourhood[4]) * reciprocalBaseI;
-	retval.normalizedNeighbourhoodAverageLuma = (calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f;
-	return retval;
-}
-
-} // namespace impl
-
-struct ReweightingParameters
-{
-	uint32_t lastCascadeIndex;
-	float initialEmin; // a minimum image brightness that we always consider reliable
-	float reciprocalBase;
-	float reciprocalN;
-	float reciprocalKappa;
-	float colorReliabilityFactor;
-	float NOverKappa;
-};
-
-ReweightingParameters computeReweightingParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize)
-{
-	ReweightingParameters retval;
-	retval.lastCascadeIndex = cascadeSize - 1u;
-	retval.initialEmin = minReliableLuma;
-	retval.reciprocalBase = 1.f / base;
-	const float N = float(sampleCount);
-	retval.reciprocalN = 1.f / N;
-	retval.reciprocalKappa = 1.f / kappa;
-	// if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have
-	// allow up to ~<cascadeBase> more energy in one sample to lessen bias in some cases
-	retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa;
-	retval.NOverKappa = N * retval.reciprocalKappa;
-
-	return retval;
-}
-
-float32_t3 reweight(in ReweightingParameters params, in RWTexture2DArray<float32_t4> cascade, in int32_t2 coord)
-{
-	float reciprocalBaseI = 1.f;
-	impl::CascadeSample curr = impl::SampleCascade(coord, cascade, 0u, reciprocalBaseI);
-
-	float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f);
-	float Emin = params.initialEmin;
-
-	float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma;
-	for (uint i = 0u; i <= params.lastCascadeIndex; i++)
-	{
-		const bool notFirstCascade = i != 0u;
-		const bool notLastCascade = i != params.lastCascadeIndex;
-
-		impl::CascadeSample next;
-		if (notLastCascade)
-		{
-			reciprocalBaseI *= params.reciprocalBase;
-			next = impl::SampleCascade(coord, cascade, i + 1u, reciprocalBaseI);
-		}
-
-		float reliability = 1.f;
-		// sample counting-based reliability estimation
-		if (params.reciprocalKappa <= 1.f)
-		{
-			float localReliability = curr.normalizedCenterLuma;
-			// reliability in 3x3 pixel block (see robustness)
-			float globalReliability = curr.normalizedNeighbourhoodAverageLuma;
-			if (notFirstCascade)
-			{
-				localReliability += prevNormalizedCenterLuma;
-				globalReliability += prevNormalizedNeighbourhoodAverageLuma;
-			}
-			if (notLastCascade)
-			{
-				localReliability += next.normalizedCenterLuma;
-				globalReliability += next.normalizedNeighbourhoodAverageLuma;
-			}
-			// check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse)
-			reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability;
-			{
-				const float accumLuma = impl::calcLuma(accumulation);
-				if (accumLuma > Emin)
-					Emin = accumLuma;
-
-				const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor;
-
-				reliability += colorReliability;
-				reliability *= params.NOverKappa;
-				reliability -= params.reciprocalKappa;
-				reliability = clamp(reliability * 0.5f, 0.f, 1.f);
-			}
-		}
-		accumulation += curr.centerValue * reliability;
-
-		prevNormalizedCenterLuma = curr.normalizedCenterLuma;
-		prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma;
-		curr = next;
-	}
-
-	return accumulation;
-}
-
-}
-}
-}
-
-#endif
\ No newline at end of file
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index e0bd6921cc..c4f13ab2a1 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -390,7 +390,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl")
 #rwmc
-LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/rwmc.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/Resolve.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/CascadeAccumulator.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/SplattingParameters.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/ResolveParameters.hlsl")
 
 ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL")

From 1acf7d63a46c370a70553368a195279cac208fb5 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 14 Nov 2025 14:49:25 +0700
Subject: [PATCH 36/39] some minor fixes to fresnel orientedEta usage

---
 include/nbl/builtin/hlsl/bxdf/common.hlsl  | 6 +++---
 include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl
index ebad0a925c..c114222c7c 100644
--- a/include/nbl/builtin/hlsl/bxdf/common.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl
@@ -615,7 +615,7 @@ struct SIsotropicMicrofacetCache
 
         // not coming from the medium (reflected) OR
         // exiting at the macro scale AND ( (not L outside the cone of possible directions given IoR with constraint VdotH*LdotH<0.0) OR (microfacet not facing toward the macrosurface, i.e. non heightfield profile of microsurface) )
-        const bool valid = ComputeMicrofacetNormal<scalar_type>::isValidMicrofacet(transmitted, VdotL, retval.absNdotH, computeMicrofacetNormal.orientedEta);
+        const bool valid = ComputeMicrofacetNormal<scalar_type>::isValidMicrofacet(transmitted, VdotL, retval.absNdotH, fresnel::OrientedEtas<monochrome_type>::create(1.0, computeMicrofacetNormal.orientedEta));
         if (valid)
         {
             retval.VdotH = hlsl::dot<vector3_type>(computeMicrofacetNormal.V,H);
@@ -638,7 +638,7 @@ struct SIsotropicMicrofacetCache
         const bool transmitted = ComputeMicrofacetNormal<scalar_type>::isTransmissionPath(NdotV,NdotL);
 
         ComputeMicrofacetNormal<scalar_type> computeMicrofacetNormal = ComputeMicrofacetNormal<scalar_type>::create(V,L,N,1.0);
-        computeMicrofacetNormal.orientedEta = orientedEtas;
+        computeMicrofacetNormal.orientedEta = orientedEtas.value[0];
         
         return create(transmitted, computeMicrofacetNormal, VdotL, N, H);
     }
@@ -664,7 +664,7 @@ struct SIsotropicMicrofacetCache
         const bool transmitted = ComputeMicrofacetNormal<scalar_type>::isTransmissionPath(interaction.getNdotV(),_sample.getNdotL());
 
         ComputeMicrofacetNormal<scalar_type> computeMicrofacetNormal = ComputeMicrofacetNormal<scalar_type>::create(V,L,N,1.0);
-        computeMicrofacetNormal.orientedEta = orientedEtas;
+        computeMicrofacetNormal.orientedEta = orientedEtas.value[0];
         
         return create(transmitted, computeMicrofacetNormal, hlsl::dot<vector3_type>(V, L), N, H);
     }
diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl
index e6bb3f98c2..f7655e9978 100644
--- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl
+++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl
@@ -141,7 +141,7 @@ struct ComputeMicrofacetNormal
     vector_type unnormalized(const bool _refract)
     {
         assert(hlsl::dot(V, L) <= -hlsl::min(orientedEta, scalar_type(1.0) / orientedEta));
-        const scalar_type etaFactor = hlsl::mix(scalar_type(1.0), orientedEta.value, _refract);
+        const scalar_type etaFactor = hlsl::mix(scalar_type(1.0), orientedEta, _refract);
         vector_type tmpH = V + L * etaFactor;
         tmpH = ieee754::flipSign<vector_type>(tmpH, _refract && orientedEta > scalar_type(1.0));
         return tmpH;

From 72429cb4ff829fe631d5ec52d46c67ea329af1a8 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 14 Nov 2025 14:49:59 +0700
Subject: [PATCH 37/39] refactor usage of angle adding

---
 .../nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl    | 8 ++++++--
 include/nbl/builtin/hlsl/shapes/rectangle.hlsl            | 7 ++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
index 663cd5e3d1..127a7194b2 100644
--- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
+++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl
@@ -43,8 +43,12 @@ struct SphericalRectangle
             -n_z[3] * n_z[0]
         );
 
-        scalar_type p = math::getSumofArccosAB(cosGamma[0], cosGamma[1]);
-        scalar_type q = math::getSumofArccosAB(cosGamma[2], cosGamma[3]);
+        math::sincos_accumulator<scalar_type> angle_adder = math::sincos_accumulator<scalar_type>::create(cosGamma[0]);
+        angle_adder.addCosine(cosGamma[1]);
+        scalar_type p = angle_adder.getSumofArccos();
+        angle_adder = math::sincos_accumulator<scalar_type>::create(cosGamma[2]);
+        angle_adder.addCosine(cosGamma[3]);
+        scalar_type q = angle_adder.getSumofArccos();
 
         const scalar_type k = 2 * numbers::pi<scalar_type> - q;
         const scalar_type b0 = n_z[0];
diff --git a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
index f1a1e37575..434918cc09 100644
--- a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
+++ b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl
@@ -8,6 +8,7 @@
 #include <nbl/builtin/hlsl/cpp_compat.hlsl>
 #include <nbl/builtin/hlsl/numbers.hlsl>
 #include <nbl/builtin/hlsl/math/functions.hlsl>
+#include <nbl/builtin/hlsl/math/angle_adding.hlsl>
 
 namespace nbl
 {
@@ -49,7 +50,11 @@ struct SphericalRectangle
             -n_z[2] * n_z[3],
             -n_z[3] * n_z[0]
         );
-        return math::getSumofArccosABCD(cosGamma[0], cosGamma[1], cosGamma[2], cosGamma[3]) - 2 * numbers::pi<float>;
+        math::sincos_accumulator<scalar_type> angle_adder = math::sincos_accumulator<scalar_type>::create(cosGamma[0]);
+        angle_adder.addCosine(cosGamma[1]);
+        angle_adder.addCosine(cosGamma[2]);
+        angle_adder.addCosine(cosGamma[3]);
+        return angle_adder.getSumofArccos() - scalar_type(2.0) * numbers::pi<float>;
     }
 
     vector3_type r0;

From 381bb50e3e56a14fe538a0fe0731b87aa2754eb2 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 14 Nov 2025 14:50:42 +0700
Subject: [PATCH 38/39] temp? fix for mix_helper on floats

---
 include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
index cd89ce45d1..0c595bb0e2 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
@@ -256,8 +256,8 @@ struct mix_helper<T, T NBL_PARTIAL_REQ_BOT(spirv::FMixIsCallable<T>) >
 };
 
 template<typename T, typename U>
-NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable<T,U>)
-struct mix_helper<T, U NBL_PARTIAL_REQ_BOT(spirv::SelectIsCallable<T,U>) >
+NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable<T,U> && concepts::Boolean<U>)
+struct mix_helper<T, U NBL_PARTIAL_REQ_BOT(spirv::SelectIsCallable<T,U> && concepts::Boolean<U>) >
 {
 	using return_t = conditional_t<is_vector_v<T>, vector<typename vector_traits<T>::scalar_type, vector_traits<T>::Dimension>, T>;
 	// for a component of a that is false, the corresponding component of x is returned

From a40a025384342de5467d6f35112a915a84984d6f Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 14 Nov 2025 17:06:17 +0700
Subject: [PATCH 39/39] added missing typename qualifier

---
 include/nbl/builtin/hlsl/rwmc/resolve.hlsl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl
index cb8d3b27d1..6484ef38b7 100644
--- a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl
+++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl
@@ -158,7 +158,7 @@ struct Resolver
 
 	CascadeSample __sampleCascade(NBL_REF_ARG(CascadeAccessor) acc, int16_t2 coord, uint16_t cascadeIndex, float reciprocalBaseI)
 	{
-		CascadeAccessor::output_type tmp;
+		typename CascadeAccessor::output_type tmp;
 		output_type neighbourhood[9];
 		neighbourhood[0] = acc.template get<float, 2>(coord + int16_t2(-1, -1), cascadeIndex);
 		neighbourhood[1] = acc.template get<float, 2>(coord + int16_t2(0, -1), cascadeIndex);