From efa37a7600ccd712039b0c23a0fc71b19ace148c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 18 Sep 2025 13:31:56 +0700 Subject: [PATCH 01/52] Remove wage and store weighted normal instead --- include/nbl/asset/utils/CPolygonGeometryManipulator.h | 4 ++-- src/nbl/asset/utils/CSmoothNormalGenerator.cpp | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index b9ee660309..247df17f03 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -22,9 +22,9 @@ class NBL_API2 CPolygonGeometryManipulator { uint32_t index; //offset of the vertex into index buffer uint32_t hash; // - float wage; //angle wage of the vertex + hlsl::float32_t3 weightedNormal; + hlsl::float32_t3 parentTriangleFaceNormal; hlsl::float32_t3 position; //position of the vertex in 3D space - hlsl::float32_t3 parentTriangleFaceNormal; // }; using VxCmpFunction = std::function; diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 73ac79b5ec..f9e414b143 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -219,9 +219,9 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as //set data for m_vertices const auto angleWages = getAngleWeight(v1, v2, v3); - vertices.add({ i, 0, angleWages.x, v1, faceNormal}); - vertices.add({ i + 1, 0, angleWages.y, v2, faceNormal}); - vertices.add({ i + 2, 0, angleWages.z, v3, faceNormal}); + vertices.add({ i, 0, faceNormal * angleWages.x, faceNormal, v1}); + vertices.add({ i + 1, 0, faceNormal * angleWages.y, faceNormal,v2}); + vertices.add({ i + 2, 0, faceNormal * angleWages.z, faceNormal, v3}); } vertices.validate(); @@ -260,7 +260,7 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne for (core::vector::iterator processedVertex = processedBucket.begin; processedVertex != processedBucket.end; processedVertex++) { std::array neighboringCells = vertexHashMap.getNeighboringCellHashes(*processedVertex); - hlsl::float32_t3 normal = processedVertex->parentTriangleFaceNormal * processedVertex->wage; + hlsl::float32_t3 normal = processedVertex->weightedNormal; //iterate among all neighboring cells for (int i = 0; i < 8; i++) @@ -273,7 +273,7 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne vxcmp(*processedVertex, *bounds.begin, polygon)) { //TODO: better mean calculation algorithm - normal += bounds.begin->parentTriangleFaceNormal * bounds.begin->wage; + normal += bounds.begin->weightedNormal; } } } @@ -373,7 +373,6 @@ core::smart_refctd_ptr CSmoothNormalGenerator::weldVertices for (core::vector::iterator processedVertex = processedBucket.begin; processedVertex != processedBucket.end; processedVertex++) { std::array neighboringCells = vertices.getNeighboringCellHashes(*processedVertex); - hlsl::float32_t3 normal = processedVertex->parentTriangleFaceNormal * processedVertex->wage; auto& groupIndex = groupIndexes[processedVertex->index]; From cc4a219905dc40b7c8b5b70c3d1c34593f969b7a Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 4 Oct 2025 10:30:20 +0700 Subject: [PATCH 02/52] Move getAngleWeight to hlsl/shapes/triangle.hlsl --- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 43 +++++++++++++++++++ .../asset/utils/CSmoothNormalGenerator.cpp | 41 ++++-------------- src/nbl/builtin/CMakeLists.txt | 1 + 3 files changed, 52 insertions(+), 33 deletions(-) create mode 100644 include/nbl/builtin/hlsl/shapes/triangle.hlsl diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl new file mode 100644 index 0000000000..2b24a6b525 --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +namespace shapes +{ + +namespace util +{ + template + vector GetAngleWeight(const vector& e1, const vector& e2, const vector& e3) + { + // Calculate this triangle's weight for each of its three m_vertices + // start by calculating the lengths of its sides + const float_t a = dot(e1, e1); + const float_t asqrt = sqrt(a); + const float_t b = dot(e2, e2); + const float_t bsqrt = sqrt(b); + const float_t c = dot(e3, e3); + const float_t csqrt = sqrt(c); + + // use them to find the angle at each vertex + return vector( + acosf((b + c - a) / (2.f * bsqrt * csqrt)), + acosf((-b + c + a) / (2.f * asqrt * csqrt)), + acosf((b - c + a) / (2.f * bsqrt * asqrt))); + } +} + +} +} +} + +#endif diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index f9e414b143..35ec4856d2 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -5,6 +5,7 @@ #include "CSmoothNormalGenerator.h" #include "nbl/core/declarations.h" +#include "nbl/builtin/hlsl/shapes/triangle.hlsl" #include #include @@ -71,40 +72,14 @@ static bool compareVertexPosition(const hlsl::float32_t3& a, const hlsl::float32 return (difference.x <= epsilon && difference.y <= epsilon && difference.z <= epsilon); } -static hlsl::float32_t3 getAngleWeight( - const hlsl::float32_t3& v1, - const hlsl::float32_t3& v2, - const hlsl::float32_t3& v3) -{ - auto distancesquared = [](const hlsl::float32_t3& v1, const hlsl::float32_t3& v2) - { - const auto diff = v1 - v2; - return hlsl::dot(diff, diff); - }; - // Calculate this triangle's weight for each of its three m_vertices - // start by calculating the lengths of its sides - const float a = distancesquared(v2, v3); - const float asqrt = sqrt(a); - const float b = distancesquared(v1,v3); - const float bsqrt = sqrt(b); - const float c = distancesquared(v1,v2); - const float csqrt = sqrt(c); - - // use them to find the angle at each vertex - return hlsl::float32_t3( - acosf((b + c - a) / (2.f * bsqrt * csqrt)), - acosf((-b + c + a) / (2.f * asqrt * csqrt)), - acosf((b - c + a) / (2.f * bsqrt * asqrt))); -} - core::smart_refctd_ptr CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, bool enableWelding, float epsilon, CPolygonGeometryManipulator::VxCmpFunction vxcmp) { - VertexHashMap vertexArray = setupData(polygon, epsilon); - const auto smoothPolygon = processConnectedVertices(polygon, vertexArray, epsilon,vxcmp); + VertexHashMap vertexHashMap = setupData(polygon, epsilon); + const auto smoothPolygon = processConnectedVertices(polygon, vertexHashMap, epsilon,vxcmp); if (enableWelding) { - return weldVertices(smoothPolygon.get(), vertexArray, epsilon); + return weldVertices(smoothPolygon.get(), vertexHashMap, epsilon); } return smoothPolygon; } @@ -217,11 +192,11 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as const auto faceNormal = normalize(cross(v2 - v1, v3 - v1)); //set data for m_vertices - const auto angleWages = getAngleWeight(v1, v2, v3); + const auto angleWages = hlsl::shapes::util::GetAngleWeight(v2 - v3, v1 - v3, v1 - v2); - vertices.add({ i, 0, faceNormal * angleWages.x, faceNormal, v1}); - vertices.add({ i + 1, 0, faceNormal * angleWages.y, faceNormal,v2}); - vertices.add({ i + 2, 0, faceNormal * angleWages.z, faceNormal, v3}); + vertices.add({ i, 0, faceNormal * angleWages.x, v1}); + vertices.add({ i + 1, 0, faceNormal * angleWages.y,v2}); + vertices.add({ i + 2, 0, faceNormal * angleWages.z, v3}); } vertices.validate(); diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 3b9fe1c39a..96fb60d535 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -317,6 +317,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/circle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/ellipse.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/line.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/beziers.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl") # LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ndarray_addressing.hlsl") # From c018762d8ea9d5b7723f76e800ef6d45e5e860db Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 4 Oct 2025 10:32:43 +0700 Subject: [PATCH 03/52] Refactor erase to resize --- src/nbl/asset/utils/CSmoothNormalGenerator.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 35ec4856d2..8a1dd957d8 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -152,10 +152,10 @@ void CSmoothNormalGenerator::VertexHashMap::validate() // TODO: maybe use counting sort (or big radix) and use the histogram directly for the m_buckets auto finalSortedOutput = core::radix_sort(m_vertices.data(),m_vertices.data()+oldSize,oldSize,KeyAccessor()); // TODO: optimize out the erase - if (finalSortedOutput!=m_vertices.data()) - m_vertices.erase(m_vertices.begin(),m_vertices.begin()+oldSize); + if (finalSortedOutput != m_vertices.data()) + m_vertices.erase(m_vertices.begin(), m_vertices.begin() + oldSize); else - m_vertices.erase(m_vertices.begin()+oldSize,m_vertices.end()); + m_vertices.resize(oldSize); // TODO: are `m_buckets` even begin USED!? uint16_t prevHash = m_vertices[0].hash; @@ -179,9 +179,10 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as { const size_t idxCount = polygon->getPrimitiveCount() * 3; - VertexHashMap vertices(idxCount, std::min(16u * 1024u, core::roundUpToPoT(idxCount * 1.0f / 32.0f)), epsilon == 0.0f ? 0.00001f : epsilon * 2.f); + const auto cellCount = std::max(core::roundUpToPoT((idxCount + 31) >> 5), 4); + VertexHashMap vertices(idxCount, std::min(16u * 1024u, cellCount), epsilon == 0.0f ? 0.00001f : epsilon * 2.f); - for (uint32_t i = 0; i < idxCount; i += 3) + for (uint64_t i = 0; i < idxCount; i += 3) { //calculate face normal of parent triangle hlsl::float32_t3 v1, v2, v3; @@ -317,7 +318,7 @@ core::smart_refctd_ptr CSmoothNormalGenerator::weldVertices { struct Group { - uint32_t vertex_reference_index; // index to referenced vertex in the original polygon + uint64_t vertex_reference_index; // index to referenced vertex in the original polygon }; core::vector groups; groups.reserve(vertices.getVertexCount()); From 42ccf489dccf0fc9c86ad72ae123e1e2b1e00730 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 4 Oct 2025 12:01:28 +0700 Subject: [PATCH 04/52] Remove parentTriangleFaceNormal from SSNGVertexData --- include/nbl/asset/utils/CPolygonGeometryManipulator.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 247df17f03..591b6ebfb4 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -20,10 +20,9 @@ class NBL_API2 CPolygonGeometryManipulator //vertex data needed for CSmoothNormalGenerator struct SSNGVertexData { - uint32_t index; //offset of the vertex into index buffer + uint64_t index; //offset of the vertex into index buffer uint32_t hash; // hlsl::float32_t3 weightedNormal; - hlsl::float32_t3 parentTriangleFaceNormal; hlsl::float32_t3 position; //position of the vertex in 3D space }; @@ -247,7 +246,7 @@ class NBL_API2 CPolygonGeometryManipulator VxCmpFunction vxcmp = [](const CPolygonGeometryManipulator::SSNGVertexData& v0, const CPolygonGeometryManipulator::SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) { static constexpr float cosOf45Deg = 0.70710678118f; - return dot(v0.parentTriangleFaceNormal,v1.parentTriangleFaceNormal) > cosOf45Deg; + return dot(normalize(v0.weightedNormal),normalize(v1.weightedNormal)) > cosOf45Deg; }); #if 0 // TODO: REDO From 05ee8eddd57dd1943b5c63de82638828a50b5eda Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 4 Oct 2025 13:04:25 +0700 Subject: [PATCH 05/52] Refactor getNeighboringCells --- .../asset/utils/CSmoothNormalGenerator.cpp | 56 ++++++++++--------- src/nbl/asset/utils/CSmoothNormalGenerator.h | 3 +- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 8a1dd957d8..b49e7115d6 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -235,11 +235,12 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne for (core::vector::iterator processedVertex = processedBucket.begin; processedVertex != processedBucket.end; processedVertex++) { - std::array neighboringCells = vertexHashMap.getNeighboringCellHashes(*processedVertex); + std::array neighboringCells; + const auto cellCount = vertexHashMap.getNeighboringCellHashes(neighboringCells.data(), *processedVertex); hlsl::float32_t3 normal = processedVertex->weightedNormal; //iterate among all neighboring cells - for (int i = 0; i < 8; i++) + for (int i = 0; i < cellCount; i++) { VertexHashMap::BucketBounds bounds = vertexHashMap.getBucketBoundsByHash(neighboringCells[i]); for (; bounds.begin != bounds.end; bounds.begin++) @@ -263,55 +264,55 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne return outPolygon; } -std::array CSmoothNormalGenerator::VertexHashMap::getNeighboringCellHashes(const CPolygonGeometryManipulator::SSNGVertexData & vertex) +uint8_t CSmoothNormalGenerator::VertexHashMap::getNeighboringCellHashes(uint32_t* outNeighbours, const CPolygonGeometryManipulator::SSNGVertexData& vertex) { - std::array neighbourhood; - - hlsl::float32_t3 cellFloatCoord = vertex.position / m_cellSize - hlsl::float32_t3(0.5f); + hlsl::float32_t3 cellFloatCoord = floor(vertex.position / m_cellSize - hlsl::float32_t3(0.5f)); hlsl::uint32_t3 neighbor = hlsl::uint32_t3(static_cast(cellFloatCoord.x), static_cast(cellFloatCoord.y), static_cast(cellFloatCoord.z)); + uint8_t neighbourCount = 0; + //left bottom near - neighbourhood[0] = hash(neighbor); + outNeighbours[neighbourCount] = hash(neighbor); + neighbourCount++; + + auto addUniqueNeighbour = [&neighbourCount, outNeighbours](uint32_t hashVal) + { + if (std::find(outNeighbours, outNeighbours + neighbourCount, hashVal) != outNeighbours + neighbourCount) + { + outNeighbours[neighbourCount] = hashVal; + neighbourCount++; + } + }; //right bottom near neighbor = neighbor + hlsl::uint32_t3(1, 0, 0); - neighbourhood[1] = hash(neighbor); + addUniqueNeighbour(hash(neighbor)); //right bottom far neighbor = neighbor + hlsl::uint32_t3(0, 0, 1); - neighbourhood[2] = hash(neighbor); + addUniqueNeighbour(hash(neighbor)); //left bottom far neighbor = neighbor - hlsl::uint32_t3(1, 0, 0); - neighbourhood[3] = hash(neighbor); + addUniqueNeighbour(hash(neighbor)); //left top far neighbor = neighbor + hlsl::uint32_t3(0, 1, 0); - neighbourhood[4] = hash(neighbor); + addUniqueNeighbour(hash(neighbor)); //right top far neighbor = neighbor + hlsl::uint32_t3(1, 0, 0); - neighbourhood[5] = hash(neighbor); + addUniqueNeighbour(hash(neighbor)); //righ top near neighbor = neighbor - hlsl::uint32_t3(0, 0, 1); - neighbourhood[6] = hash(neighbor); + addUniqueNeighbour(hash(neighbor)); //left top near neighbor = neighbor - hlsl::uint32_t3(1, 0, 0); - neighbourhood[7] = hash(neighbor); + addUniqueNeighbour(hash(neighbor)); - //erase duplicated hashes - for (int i = 0; i < 8; i++) - { - uint32_t currHash = neighbourhood[i]; - for (int j = i + 1; j < 8; j++) - { - if (neighbourhood[j] == currHash) - neighbourhood[j] = invalidHash; - } - } - return neighbourhood; + return neighbourCount; } core::smart_refctd_ptr CSmoothNormalGenerator::weldVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon) @@ -348,12 +349,13 @@ core::smart_refctd_ptr CSmoothNormalGenerator::weldVertices for (core::vector::iterator processedVertex = processedBucket.begin; processedVertex != processedBucket.end; processedVertex++) { - std::array neighboringCells = vertices.getNeighboringCellHashes(*processedVertex); + std::array neighboringCells; + const auto cellCount = vertices.getNeighboringCellHashes(neighboringCells.data(), *processedVertex); auto& groupIndex = groupIndexes[processedVertex->index]; //iterate among all neighboring cells - for (int i = 0; i < 8; i++) + for (int i = 0; i < cellCount; i++) { VertexHashMap::BucketBounds bounds = vertices.getBucketBoundsByHash(neighboringCells[i]); for (auto neighbourVertex_it = bounds.begin; neighbourVertex_it != bounds.end; neighbourVertex_it++) diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index c7d648d2d7..b7b19c4729 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -39,8 +39,7 @@ class CSmoothNormalGenerator inline uint32_t getVertexCount() const { return m_vertices.size(); } - // - std::array getNeighboringCellHashes(const CPolygonGeometryManipulator::SSNGVertexData& vertex); + uint8_t getNeighboringCellHashes(uint32_t* outNeighbours, const CPolygonGeometryManipulator::SSNGVertexData& vertex); inline uint32_t getBucketCount() { return m_buckets.size(); } inline BucketBounds getBucketBoundsById(uint32_t index) const { return { m_buckets[index], m_buckets[index + 1] }; } From f4b8ac6b703d7d8ec227f5c50b34d0124686e230 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 4 Oct 2025 13:32:05 +0700 Subject: [PATCH 06/52] Slight improvement for readability --- src/nbl/asset/utils/CSmoothNormalGenerator.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index b7b19c4729..09dd0aee71 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -22,10 +22,11 @@ class CSmoothNormalGenerator class VertexHashMap { public: + using collection_t = core::vector; struct BucketBounds { - core::vector::iterator begin; - core::vector::iterator end; + collection_t::iterator begin; + collection_t::iterator end; }; public: @@ -52,8 +53,8 @@ class CSmoothNormalGenerator static constexpr uint32_t primeNumber3 = 83492791; //holds iterators pointing to beginning of each bucket, last iterator points to m_vertices.end() - core::vector::iterator> m_buckets; - core::vector m_vertices; + core::vector m_buckets; + collection_t m_vertices; const uint32_t m_hashTableMaxSize; const float m_cellSize; From 0dd02e8f5d9ca727361c866ac2557f1ba3588f53 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 6 Oct 2025 21:15:26 +0700 Subject: [PATCH 07/52] Refactor Radix Sorter and use histogram as skip list --- examples_tests | 2 +- include/nbl/core/algorithm/radix_sort.h | 27 ++- .../asset/utils/CSmoothNormalGenerator.cpp | 174 ++++++++---------- src/nbl/asset/utils/CSmoothNormalGenerator.h | 35 +++- 4 files changed, 123 insertions(+), 115 deletions(-) diff --git a/examples_tests b/examples_tests index 03c3726b51..eeea1ebe20 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 03c3726b51fe1c5b5f9f53527d6f2a0d08dc64d5 +Subproject commit eeea1ebe20f661d1cd0dffc2fb7864d99c2218de diff --git a/include/nbl/core/algorithm/radix_sort.h b/include/nbl/core/algorithm/radix_sort.h index 749a8b8309..f0d87a60e0 100644 --- a/include/nbl/core/algorithm/radix_sort.h +++ b/include/nbl/core/algorithm/radix_sort.h @@ -38,10 +38,10 @@ constexpr int8_t find_msb(const T& a_variable) { static_assert(std::is_unsigned::value, "Variable must be unsigned"); - constexpr uint8_t number_of_bits = std::numeric_limits::digits; + constexpr int8_t number_of_bits = std::numeric_limits::digits; const std::bitset variable_bitset{a_variable}; - for (uint8_t msb = number_of_bits - 1; msb >= 0; msb--) + for (int8_t msb = number_of_bits - 1; msb >= 0; msb--) { if (variable_bitset[msb] == 1) return msb; @@ -49,12 +49,15 @@ constexpr int8_t find_msb(const T& a_variable) return -1; } + +} + template -struct RadixSorter +struct LSBSorter { _NBL_STATIC_INLINE_CONSTEXPR uint16_t histogram_bytesize = 8192u; _NBL_STATIC_INLINE_CONSTEXPR size_t histogram_size = size_t(histogram_bytesize)/sizeof(histogram_t); - _NBL_STATIC_INLINE_CONSTEXPR uint8_t radix_bits = find_msb(histogram_size); + _NBL_STATIC_INLINE_CONSTEXPR uint8_t radix_bits = impl::find_msb(histogram_size); _NBL_STATIC_INLINE_CONSTEXPR size_t last_pass = (key_bit_count-1ull)/size_t(radix_bits); _NBL_STATIC_INLINE_CONSTEXPR uint16_t radix_mask = (1u<(input,output,rangeSize,comp); } + + std::pair getHashBound(size_t key) const + { + constexpr histogram_t shift = static_cast(radix_bits * last_pass); + const auto histogramIx = (key >> shift) & radix_mask; + return { histogram[histogramIx], histogram[histogramIx + 1] }; + } + private: template inline RandomIt pass(RandomIt input, RandomIt output, const histogram_t rangeSize, const KeyAccessor& comp) @@ -91,19 +102,17 @@ struct RadixSorter alignas(sizeof(histogram_t)) histogram_t histogram[histogram_size]; }; -} - template inline RandomIt radix_sort(RandomIt input, RandomIt scratch, const size_t rangeSize, const KeyAccessor& comp) { assert(std::abs(std::distance(input,scratch))>=rangeSize); if (rangeSize(0x1ull<<16ull)) - return impl::RadixSorter()(input,scratch,static_cast(rangeSize),comp); + return LSBSorter()(input,scratch,static_cast(rangeSize),comp); if (rangeSize(0x1ull<<32ull)) - return impl::RadixSorter()(input,scratch,static_cast(rangeSize),comp); + return LSBSorter()(input,scratch,static_cast(rangeSize),comp); else - return impl::RadixSorter()(input,scratch,rangeSize,comp); + return LSBSorter()(input,scratch,rangeSize,comp); } //! Because Radix Sort needs O(2n) space and a number of passes dependant on the key length, the final sorted range can be either in `input` or `scratch` diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index b49e7115d6..69ca08867f 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -84,14 +84,14 @@ core::smart_refctd_ptr CSmoothNormalGenerator::calculateNor return smoothPolygon; } -CSmoothNormalGenerator::VertexHashMap::VertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize) - :m_hashTableMaxSize(_hashTableMaxSize), - m_cellSize(_cellSize) +CSmoothNormalGenerator::VertexHashMap::VertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize) : + m_sorter(createSorter(_vertexCount)), + m_hashTableMaxSize(_hashTableMaxSize), + m_cellSize(_cellSize) { - assert((core::isPoT(m_hashTableMaxSize))); + assert((core::isPoT(m_hashTableMaxSize))); - m_vertices.reserve(_vertexCount); - m_buckets.reserve(_hashTableMaxSize + 1); + m_vertices.reserve(_vertexCount); } uint32_t CSmoothNormalGenerator::VertexHashMap::hash(const CPolygonGeometryManipulator::SSNGVertexData & vertex) const @@ -121,8 +121,14 @@ CSmoothNormalGenerator::VertexHashMap::BucketBounds CSmoothNormalGenerator::Vert if (hash == invalidHash) return { m_vertices.end(), m_vertices.end() }; - core::vector::iterator begin = std::lower_bound(m_vertices.begin(), m_vertices.end(), hash); - core::vector::iterator end = std::upper_bound(m_vertices.begin(), m_vertices.end(), hash); + const auto skipListBound = std::visit([&](auto& sorter) + { + auto hashBound = sorter.getHashBound(hash); + return std::pair(m_vertices.begin() + hashBound.first, m_vertices.begin() + hashBound.second); + }, m_sorter); + + auto begin = std::lower_bound(skipListBound.first, skipListBound.second, hash); + auto end = std::upper_bound(skipListBound.first, skipListBound.second, hash); //bucket missing if (begin == m_vertices.end()) @@ -135,22 +141,12 @@ CSmoothNormalGenerator::VertexHashMap::BucketBounds CSmoothNormalGenerator::Vert return { begin, end }; } -struct KeyAccessor -{ - _NBL_STATIC_INLINE_CONSTEXPR size_t key_bit_count = 32ull; - - template - inline decltype(radix_mask) operator()(const CPolygonGeometryManipulator::SSNGVertexData& item) const - { - return static_cast(item.hash>>static_cast(bit_offset))&radix_mask; - } -}; void CSmoothNormalGenerator::VertexHashMap::validate() { const auto oldSize = m_vertices.size(); m_vertices.resize(oldSize*2u); // TODO: maybe use counting sort (or big radix) and use the histogram directly for the m_buckets - auto finalSortedOutput = core::radix_sort(m_vertices.data(),m_vertices.data()+oldSize,oldSize,KeyAccessor()); + auto finalSortedOutput = std::visit( [&](auto& sorter) { return sorter(m_vertices.data(), m_vertices.data() + oldSize, oldSize, KeyAccessor()); },m_sorter ); // TODO: optimize out the erase if (finalSortedOutput != m_vertices.data()) m_vertices.erase(m_vertices.begin(), m_vertices.begin() + oldSize); @@ -160,19 +156,6 @@ void CSmoothNormalGenerator::VertexHashMap::validate() // TODO: are `m_buckets` even begin USED!? uint16_t prevHash = m_vertices[0].hash; core::vector::iterator prevBegin = m_vertices.begin(); - m_buckets.push_back(prevBegin); - - while (true) - { - core::vector::iterator next = std::upper_bound(prevBegin, m_vertices.end(), prevHash); - m_buckets.push_back(next); - - if (next == m_vertices.end()) - break; - - prevBegin = next; - prevHash = next->hash; - } } CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const asset::ICPUPolygonGeometry* polygon, float epsilon) @@ -229,35 +212,31 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne auto* normalPtr = reinterpret_cast(outPolygon->getNormalPtr()); auto normalStride = outPolygon->getNormalView().composed.stride; - for (uint32_t cell = 0; cell < vertexHashMap.getBucketCount() - 1; cell++) - { - VertexHashMap::BucketBounds processedBucket = vertexHashMap.getBucketBoundsById(cell); - for (core::vector::iterator processedVertex = processedBucket.begin; processedVertex != processedBucket.end; processedVertex++) - { - std::array neighboringCells; - const auto cellCount = vertexHashMap.getNeighboringCellHashes(neighboringCells.data(), *processedVertex); - hlsl::float32_t3 normal = processedVertex->weightedNormal; + for (auto processedVertex = vertexHashMap.vertices().begin(); processedVertex != vertexHashMap.vertices().end(); processedVertex++) + { + std::array neighboringCells; + const auto cellCount = vertexHashMap.getNeighboringCellHashes(neighboringCells.data(), *processedVertex); + hlsl::float32_t3 normal = processedVertex->weightedNormal; - //iterate among all neighboring cells - for (int i = 0; i < cellCount; i++) - { - VertexHashMap::BucketBounds bounds = vertexHashMap.getBucketBoundsByHash(neighboringCells[i]); - for (; bounds.begin != bounds.end; bounds.begin++) - { - if (processedVertex != bounds.begin) - if (compareVertexPosition(processedVertex->position, bounds.begin->position, epsilon) && - vxcmp(*processedVertex, *bounds.begin, polygon)) - { - //TODO: better mean calculation algorithm - normal += bounds.begin->weightedNormal; - } - } - } - normal = normalize(normal); - memcpy(normalPtr + (normalStride * processedVertex->index), &normal, sizeof(normal)); - } - } + //iterate among all neighboring cells + for (uint8_t i = 0; i < cellCount; i++) + { + VertexHashMap::BucketBounds bounds = vertexHashMap.getBucketBoundsByHash(neighboringCells[i]); + for (; bounds.begin != bounds.end; bounds.begin++) + { + if (processedVertex != bounds.begin) + if (compareVertexPosition(processedVertex->position, bounds.begin->position, epsilon) && + vxcmp(*processedVertex, *bounds.begin, polygon)) + { + //TODO: better mean calculation algorithm + normal += bounds.begin->weightedNormal; + } + } + } + normal = normalize(normal); + memcpy(normalPtr + (normalStride * processedVertex->index), &normal, sizeof(normal)); + } CPolygonGeometryManipulator::recomputeContentHashes(outPolygon.get()); @@ -343,49 +322,44 @@ core::smart_refctd_ptr CSmoothNormalGenerator::weldVertices return true; }; - for (uint32_t cell = 0; cell < vertices.getBucketCount() - 1; cell++) - { - VertexHashMap::BucketBounds processedBucket = vertices.getBucketBoundsById(cell); - - for (core::vector::iterator processedVertex = processedBucket.begin; processedVertex != processedBucket.end; processedVertex++) - { - std::array neighboringCells; - const auto cellCount = vertices.getNeighboringCellHashes(neighboringCells.data(), *processedVertex); + for (auto processedVertex = vertices.vertices().begin(); processedVertex != vertices.vertices().end(); processedVertex++) + { + std::array neighboringCells; + const auto cellCount = vertices.getNeighboringCellHashes(neighboringCells.data(), *processedVertex); - auto& groupIndex = groupIndexes[processedVertex->index]; + auto& groupIndex = groupIndexes[processedVertex->index]; - //iterate among all neighboring cells - for (int i = 0; i < cellCount; i++) - { - VertexHashMap::BucketBounds bounds = vertices.getBucketBoundsByHash(neighboringCells[i]); - for (auto neighbourVertex_it = bounds.begin; neighbourVertex_it != bounds.end; neighbourVertex_it++) - { - const auto neighbourGroupIndex = groupIndexes[neighbourVertex_it->index]; - - hlsl::float32_t3 normal1, normal2; - polygon->getNormalView().decodeElement(processedVertex->index, normal1); - polygon->getNormalView().decodeElement(neighbourVertex_it->index, normal2); - - hlsl::float32_t3 position1, position2; - polygon->getPositionView().decodeElement(processedVertex->index, position1); - polygon->getPositionView().decodeElement(neighbourVertex_it->index, position2); - - // find the first group that this vertex can join - if (processedVertex != neighbourVertex_it && neighbourGroupIndex && canJoinVertices(processedVertex->index, neighbourVertex_it->index)) - { - groupIndex = neighbourGroupIndex; - break; - } - } - } - if (!groupIndex) - { - // create new group if no group nearby that is compatible with this vertex - groupIndex = groups.size(); - groups.push_back({ processedVertex->index}); - } - } - } + //iterate among all neighboring cells + for (int i = 0; i < cellCount; i++) + { + VertexHashMap::BucketBounds bounds = vertices.getBucketBoundsByHash(neighboringCells[i]); + for (auto neighbourVertex_it = bounds.begin; neighbourVertex_it != bounds.end; neighbourVertex_it++) + { + const auto neighbourGroupIndex = groupIndexes[neighbourVertex_it->index]; + + hlsl::float32_t3 normal1, normal2; + polygon->getNormalView().decodeElement(processedVertex->index, normal1); + polygon->getNormalView().decodeElement(neighbourVertex_it->index, normal2); + + hlsl::float32_t3 position1, position2; + polygon->getPositionView().decodeElement(processedVertex->index, position1); + polygon->getPositionView().decodeElement(neighbourVertex_it->index, position2); + + // find the first group that this vertex can join + if (processedVertex != neighbourVertex_it && neighbourGroupIndex && canJoinVertices(processedVertex->index, neighbourVertex_it->index)) + { + groupIndex = neighbourGroupIndex; + break; + } + } + } + if (!groupIndex) + { + // create new group if no group nearby that is compatible with this vertex + groupIndex = groups.size(); + groups.push_back({ processedVertex->index}); + } + } auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); outPolygon->setIndexing(IPolygonGeometryBase::TriangleList()); diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index 09dd0aee71..fbfea665d4 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -29,6 +29,7 @@ class CSmoothNormalGenerator collection_t::iterator end; }; + public: VertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize); @@ -40,20 +41,44 @@ class CSmoothNormalGenerator inline uint32_t getVertexCount() const { return m_vertices.size(); } - uint8_t getNeighboringCellHashes(uint32_t* outNeighbours, const CPolygonGeometryManipulator::SSNGVertexData& vertex); + uint8_t getNeighboringCellHashes(uint32_t* outNeighbours, const CPolygonGeometryManipulator::SSNGVertexData& vertex); - inline uint32_t getBucketCount() { return m_buckets.size(); } - inline BucketBounds getBucketBoundsById(uint32_t index) const { return { m_buckets[index], m_buckets[index + 1] }; } BucketBounds getBucketBoundsByHash(uint32_t hash); + const collection_t& vertices() const { return m_vertices; } + private: + struct KeyAccessor + { + _NBL_STATIC_INLINE_CONSTEXPR size_t key_bit_count = 32ull; + + template + inline decltype(radix_mask) operator()(const CPolygonGeometryManipulator::SSNGVertexData& item) const + { + return static_cast(item.hash>>static_cast(bit_offset))&radix_mask; + } + }; + static constexpr uint32_t invalidHash = 0xFFFFFFFF; static constexpr uint32_t primeNumber1 = 73856093; static constexpr uint32_t primeNumber2 = 19349663; static constexpr uint32_t primeNumber3 = 83492791; - //holds iterators pointing to beginning of each bucket, last iterator points to m_vertices.end() - core::vector m_buckets; + using sorter_t = std::variant< + core::LSBSorter, + core::LSBSorter, + core::LSBSorter>; + sorter_t m_sorter; + + static sorter_t createSorter(size_t vertexCount) + { + if (vertexCount < (0x1ull << 16ull)) + return core::LSBSorter(); + if (vertexCount< (0x1ull << 32ull)) + return core::LSBSorter(); + return core::LSBSorter(); + } + collection_t m_vertices; const uint32_t m_hashTableMaxSize; const float m_cellSize; From ce99287fb0d3489864ceacf68cf963140af216bf Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 7 Oct 2025 13:46:07 +0700 Subject: [PATCH 08/52] Move VertexHashMap into its own file --- .../asset/utils/CPolygonGeometryManipulator.h | 11 +- include/nbl/asset/utils/CVertexHashMap.h | 87 +++++++++++ include/nbl/asset/utils/CVertexWelder.h | 19 +++ src/nbl/CMakeLists.txt | 1 + .../asset/utils/CSmoothNormalGenerator.cpp | 123 --------------- src/nbl/asset/utils/CSmoothNormalGenerator.h | 70 +-------- src/nbl/asset/utils/CVertexHashMap.cpp | 141 ++++++++++++++++++ 7 files changed, 252 insertions(+), 200 deletions(-) create mode 100644 include/nbl/asset/utils/CVertexHashMap.h create mode 100644 include/nbl/asset/utils/CVertexWelder.h create mode 100644 src/nbl/asset/utils/CVertexHashMap.cpp diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 591b6ebfb4..db7125ed1c 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -9,6 +9,7 @@ #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/utils/CGeometryManipulator.h" +#include "nbl/asset/utils/CVertexHashMap.h" namespace nbl::asset { @@ -17,14 +18,8 @@ namespace nbl::asset class NBL_API2 CPolygonGeometryManipulator { public: - //vertex data needed for CSmoothNormalGenerator - struct SSNGVertexData - { - uint64_t index; //offset of the vertex into index buffer - uint32_t hash; // - hlsl::float32_t3 weightedNormal; - hlsl::float32_t3 position; //position of the vertex in 3D space - }; + + using SSNGVertexData = CVertexHashMap::VertexData; using VxCmpFunction = std::function; diff --git a/include/nbl/asset/utils/CVertexHashMap.h b/include/nbl/asset/utils/CVertexHashMap.h new file mode 100644 index 0000000000..f13847451c --- /dev/null +++ b/include/nbl/asset/utils/CVertexHashMap.h @@ -0,0 +1,87 @@ +#ifndef _NBL_ASSET_C_VERTEX_HASH_MAP_H_INCLUDED_ +#define _NBL_ASSET_C_VERTEX_HASH_MAP_H_INCLUDED_ + +#include "nbl/core/declarations.h" + +namespace nbl::asset +{ + +class CVertexHashMap +{ +public: + + struct VertexData + { + uint64_t index; //offset of the vertex into index buffer + uint32_t hash; // + hlsl::float32_t3 weightedNormal; + hlsl::float32_t3 position; //position of the vertex in 3D space + }; + + using collection_t = core::vector; + struct BucketBounds + { + collection_t::iterator begin; + collection_t::iterator end; + }; + + + CVertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize); + + //inserts vertex into hash table + void add(VertexData&& vertex); + + //sorts hashtable and sets iterators at beginnings of bucktes + void validate(); + + inline uint32_t getVertexCount() const { return m_vertices.size(); } + + uint8_t getNeighboringCellHashes(uint32_t* outNeighbours, const VertexData& vertex); + + BucketBounds getBucketBoundsByHash(uint32_t hash); + + const collection_t& vertices() const { return m_vertices; } + +private: + struct KeyAccessor + { + _NBL_STATIC_INLINE_CONSTEXPR size_t key_bit_count = 32ull; + + template + inline decltype(radix_mask) operator()(const VertexData& item) const + { + return static_cast(item.hash >> static_cast(bit_offset)) & radix_mask; + } + }; + + static constexpr uint32_t invalidHash = 0xFFFFFFFF; + static constexpr uint32_t primeNumber1 = 73856093; + static constexpr uint32_t primeNumber2 = 19349663; + static constexpr uint32_t primeNumber3 = 83492791; + + using sorter_t = std::variant< + core::LSBSorter, + core::LSBSorter, + core::LSBSorter>; + sorter_t m_sorter; + + static sorter_t createSorter(size_t vertexCount) + { + if (vertexCount < (0x1ull << 16ull)) + return core::LSBSorter(); + if (vertexCount < (0x1ull << 32ull)) + return core::LSBSorter(); + return core::LSBSorter(); + } + + collection_t m_vertices; + const uint32_t m_hashTableMaxSize; + const float m_cellSize; + + uint32_t hash(const VertexData& vertex) const; + uint32_t hash(const hlsl::uint32_t3& position) const; + +}; + +} +#endif \ No newline at end of file diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h new file mode 100644 index 0000000000..d69ea76c18 --- /dev/null +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -0,0 +1,19 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_C_POLYGON_VERTEX_WELDER_H_INCLUDED_ +#define _NBL_ASSET_C_POLYGON_VERTEX_WELDER_H_INCLUDED_ + +#include "nbl/asset/utils/CPolygonGeometryManipulator.h" + +namespace nbl::asset { + +class CVertexWelder { + + template + static core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& vertices, float epsilon); +}; + +} + +#endif diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 935beffe2c..7117eb910d 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -184,6 +184,7 @@ set(NBL_ASSET_SOURCES asset/utils/CPolygonGeometryManipulator.cpp asset/utils/COverdrawPolygonGeometryOptimizer.cpp asset/utils/CSmoothNormalGenerator.cpp + asset/utils/CVertexHashMap.cpp # Mesh loaders asset/interchange/COBJMeshFileLoader.cpp diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 69ca08867f..ef61613bbc 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -84,79 +84,6 @@ core::smart_refctd_ptr CSmoothNormalGenerator::calculateNor return smoothPolygon; } -CSmoothNormalGenerator::VertexHashMap::VertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize) : - m_sorter(createSorter(_vertexCount)), - m_hashTableMaxSize(_hashTableMaxSize), - m_cellSize(_cellSize) -{ - assert((core::isPoT(m_hashTableMaxSize))); - - m_vertices.reserve(_vertexCount); -} - -uint32_t CSmoothNormalGenerator::VertexHashMap::hash(const CPolygonGeometryManipulator::SSNGVertexData & vertex) const -{ - const hlsl::float32_t3 position = vertex.position / m_cellSize; - - return ((static_cast(position.x) * primeNumber1) ^ - (static_cast(position.y) * primeNumber2) ^ - (static_cast(position.z) * primeNumber3))& (m_hashTableMaxSize - 1); -} - -uint32_t CSmoothNormalGenerator::VertexHashMap::hash(const hlsl::uint32_t3& position) const -{ - return ((position.x * primeNumber1) ^ - (position.y * primeNumber2) ^ - (position.z * primeNumber3))& (m_hashTableMaxSize - 1); -} - -void CSmoothNormalGenerator::VertexHashMap::add(CPolygonGeometryManipulator::SSNGVertexData && vertex) -{ - vertex.hash = hash(vertex); - m_vertices.push_back(vertex); -} - -CSmoothNormalGenerator::VertexHashMap::BucketBounds CSmoothNormalGenerator::VertexHashMap::getBucketBoundsByHash(uint32_t hash) -{ - if (hash == invalidHash) - return { m_vertices.end(), m_vertices.end() }; - - const auto skipListBound = std::visit([&](auto& sorter) - { - auto hashBound = sorter.getHashBound(hash); - return std::pair(m_vertices.begin() + hashBound.first, m_vertices.begin() + hashBound.second); - }, m_sorter); - - auto begin = std::lower_bound(skipListBound.first, skipListBound.second, hash); - auto end = std::upper_bound(skipListBound.first, skipListBound.second, hash); - - //bucket missing - if (begin == m_vertices.end()) - return { m_vertices.end(), m_vertices.end() }; - - //bucket missing - if (begin->hash != hash) - return { m_vertices.end(), m_vertices.end() }; - - return { begin, end }; -} - -void CSmoothNormalGenerator::VertexHashMap::validate() -{ - const auto oldSize = m_vertices.size(); - m_vertices.resize(oldSize*2u); - // TODO: maybe use counting sort (or big radix) and use the histogram directly for the m_buckets - auto finalSortedOutput = std::visit( [&](auto& sorter) { return sorter(m_vertices.data(), m_vertices.data() + oldSize, oldSize, KeyAccessor()); },m_sorter ); - // TODO: optimize out the erase - if (finalSortedOutput != m_vertices.data()) - m_vertices.erase(m_vertices.begin(), m_vertices.begin() + oldSize); - else - m_vertices.resize(oldSize); - - // TODO: are `m_buckets` even begin USED!? - uint16_t prevHash = m_vertices[0].hash; - core::vector::iterator prevBegin = m_vertices.begin(); -} CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const asset::ICPUPolygonGeometry* polygon, float epsilon) { @@ -243,56 +170,6 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne return outPolygon; } -uint8_t CSmoothNormalGenerator::VertexHashMap::getNeighboringCellHashes(uint32_t* outNeighbours, const CPolygonGeometryManipulator::SSNGVertexData& vertex) -{ - hlsl::float32_t3 cellFloatCoord = floor(vertex.position / m_cellSize - hlsl::float32_t3(0.5f)); - hlsl::uint32_t3 neighbor = hlsl::uint32_t3(static_cast(cellFloatCoord.x), static_cast(cellFloatCoord.y), static_cast(cellFloatCoord.z)); - - uint8_t neighbourCount = 0; - - //left bottom near - outNeighbours[neighbourCount] = hash(neighbor); - neighbourCount++; - - auto addUniqueNeighbour = [&neighbourCount, outNeighbours](uint32_t hashVal) - { - if (std::find(outNeighbours, outNeighbours + neighbourCount, hashVal) != outNeighbours + neighbourCount) - { - outNeighbours[neighbourCount] = hashVal; - neighbourCount++; - } - }; - - //right bottom near - neighbor = neighbor + hlsl::uint32_t3(1, 0, 0); - addUniqueNeighbour(hash(neighbor)); - - //right bottom far - neighbor = neighbor + hlsl::uint32_t3(0, 0, 1); - addUniqueNeighbour(hash(neighbor)); - - //left bottom far - neighbor = neighbor - hlsl::uint32_t3(1, 0, 0); - addUniqueNeighbour(hash(neighbor)); - - //left top far - neighbor = neighbor + hlsl::uint32_t3(0, 1, 0); - addUniqueNeighbour(hash(neighbor)); - - //right top far - neighbor = neighbor + hlsl::uint32_t3(1, 0, 0); - addUniqueNeighbour(hash(neighbor)); - - //righ top near - neighbor = neighbor - hlsl::uint32_t3(0, 0, 1); - addUniqueNeighbour(hash(neighbor)); - - //left top near - neighbor = neighbor - hlsl::uint32_t3(1, 0, 0); - addUniqueNeighbour(hash(neighbor)); - - return neighbourCount; -} core::smart_refctd_ptr CSmoothNormalGenerator::weldVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon) { diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index fbfea665d4..476d962f95 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -19,76 +19,8 @@ class CSmoothNormalGenerator static core::smart_refctd_ptr calculateNormals(const ICPUPolygonGeometry* polygon, bool enableWelding, float epsilon, CPolygonGeometryManipulator::VxCmpFunction function); private: - class VertexHashMap - { - public: - using collection_t = core::vector; - struct BucketBounds - { - collection_t::iterator begin; - collection_t::iterator end; - }; + using VertexHashMap = CVertexHashMap; - - public: - VertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize); - - //inserts vertex into hash table - void add(CPolygonGeometryManipulator::SSNGVertexData&& vertex); - - //sorts hashtable and sets iterators at beginnings of bucktes - void validate(); - - inline uint32_t getVertexCount() const { return m_vertices.size(); } - - uint8_t getNeighboringCellHashes(uint32_t* outNeighbours, const CPolygonGeometryManipulator::SSNGVertexData& vertex); - - BucketBounds getBucketBoundsByHash(uint32_t hash); - - const collection_t& vertices() const { return m_vertices; } - - private: - struct KeyAccessor - { - _NBL_STATIC_INLINE_CONSTEXPR size_t key_bit_count = 32ull; - - template - inline decltype(radix_mask) operator()(const CPolygonGeometryManipulator::SSNGVertexData& item) const - { - return static_cast(item.hash>>static_cast(bit_offset))&radix_mask; - } - }; - - static constexpr uint32_t invalidHash = 0xFFFFFFFF; - static constexpr uint32_t primeNumber1 = 73856093; - static constexpr uint32_t primeNumber2 = 19349663; - static constexpr uint32_t primeNumber3 = 83492791; - - using sorter_t = std::variant< - core::LSBSorter, - core::LSBSorter, - core::LSBSorter>; - sorter_t m_sorter; - - static sorter_t createSorter(size_t vertexCount) - { - if (vertexCount < (0x1ull << 16ull)) - return core::LSBSorter(); - if (vertexCount< (0x1ull << 32ull)) - return core::LSBSorter(); - return core::LSBSorter(); - } - - collection_t m_vertices; - const uint32_t m_hashTableMaxSize; - const float m_cellSize; - - uint32_t hash(const CPolygonGeometryManipulator::SSNGVertexData& vertex) const; - uint32_t hash(const hlsl::uint32_t3& position) const; - - }; - - private: static VertexHashMap setupData(const ICPUPolygonGeometry* polygon, float epsilon); static core::smart_refctd_ptr processConnectedVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon, CPolygonGeometryManipulator::VxCmpFunction vxcmp); static core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon); diff --git a/src/nbl/asset/utils/CVertexHashMap.cpp b/src/nbl/asset/utils/CVertexHashMap.cpp new file mode 100644 index 0000000000..72208c2625 --- /dev/null +++ b/src/nbl/asset/utils/CVertexHashMap.cpp @@ -0,0 +1,141 @@ +#include "nbl/asset/utils/CVertexHashMap.h" + +namespace nbl::asset { + +CVertexHashMap::CVertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize) : + m_sorter(createSorter(_vertexCount)), + m_hashTableMaxSize(_hashTableMaxSize), + m_cellSize(_cellSize) +{ + assert((core::isPoT(m_hashTableMaxSize))); + + m_vertices.reserve(_vertexCount); +} + +uint32_t CVertexHashMap::hash(const VertexData& vertex) const +{ + const hlsl::float32_t3 position = vertex.position / m_cellSize; + + return ((static_cast(position.x) * primeNumber1) ^ + (static_cast(position.y) * primeNumber2) ^ + (static_cast(position.z) * primeNumber3))& (m_hashTableMaxSize - 1); +} + +uint32_t CVertexHashMap::hash(const hlsl::uint32_t3& position) const +{ + return ((position.x * primeNumber1) ^ + (position.y * primeNumber2) ^ + (position.z * primeNumber3))& (m_hashTableMaxSize - 1); +} + +void CVertexHashMap::add(VertexData&& vertex) +{ + vertex.hash = hash(vertex); + m_vertices.push_back(vertex); +} + +CVertexHashMap::BucketBounds CVertexHashMap::getBucketBoundsByHash(uint32_t hash) +{ + if (hash == invalidHash) + return { m_vertices.end(), m_vertices.end() }; + + const auto skipListBound = std::visit([&](auto& sorter) + { + auto hashBound = sorter.getHashBound(hash); + return std::pair(m_vertices.begin() + hashBound.first, m_vertices.begin() + hashBound.second); + }, m_sorter); + + auto begin = std::lower_bound( + skipListBound.first, + skipListBound.second, + hash, + [](const VertexData& vertex, uint32_t hash) + { + return vertex.hash < hash; + }); + + auto end = std::upper_bound( + skipListBound.first, + skipListBound.second, + hash, + [](uint32_t hash, const VertexData& vertex) + { + return hash < vertex.hash; + }); + + //bucket missing + if (begin == m_vertices.end()) + return { m_vertices.end(), m_vertices.end() }; + + //bucket missing + if (begin->hash != hash) + return { m_vertices.end(), m_vertices.end() }; + + return { begin, end }; +} + +void CVertexHashMap::validate() +{ + const auto oldSize = m_vertices.size(); + m_vertices.resize(oldSize*2u); + // TODO: maybe use counting sort (or big radix) and use the histogram directly for the m_buckets + auto finalSortedOutput = std::visit( [&](auto& sorter) { return sorter(m_vertices.data(), m_vertices.data() + oldSize, oldSize, KeyAccessor()); },m_sorter ); + // TODO: optimize out the erase + if (finalSortedOutput != m_vertices.data()) + m_vertices.erase(m_vertices.begin(), m_vertices.begin() + oldSize); + else + m_vertices.resize(oldSize); +} + +uint8_t CVertexHashMap::getNeighboringCellHashes(uint32_t* outNeighbours, const VertexData& vertex) +{ + hlsl::float32_t3 cellFloatCoord = floor(vertex.position / m_cellSize - hlsl::float32_t3(0.5f)); + hlsl::uint32_t3 neighbor = hlsl::uint32_t3(static_cast(cellFloatCoord.x), static_cast(cellFloatCoord.y), static_cast(cellFloatCoord.z)); + + uint8_t neighbourCount = 0; + + //left bottom near + outNeighbours[neighbourCount] = hash(neighbor); + neighbourCount++; + + auto addUniqueNeighbour = [&neighbourCount, outNeighbours](uint32_t hashVal) + { + if (std::find(outNeighbours, outNeighbours + neighbourCount, hashVal) != outNeighbours + neighbourCount) + { + outNeighbours[neighbourCount] = hashVal; + neighbourCount++; + } + }; + + //right bottom near + neighbor = neighbor + hlsl::uint32_t3(1, 0, 0); + addUniqueNeighbour(hash(neighbor)); + + //right bottom far + neighbor = neighbor + hlsl::uint32_t3(0, 0, 1); + addUniqueNeighbour(hash(neighbor)); + + //left bottom far + neighbor = neighbor - hlsl::uint32_t3(1, 0, 0); + addUniqueNeighbour(hash(neighbor)); + + //left top far + neighbor = neighbor + hlsl::uint32_t3(0, 1, 0); + addUniqueNeighbour(hash(neighbor)); + + //right top far + neighbor = neighbor + hlsl::uint32_t3(1, 0, 0); + addUniqueNeighbour(hash(neighbor)); + + //righ top near + neighbor = neighbor - hlsl::uint32_t3(0, 0, 1); + addUniqueNeighbour(hash(neighbor)); + + //left top near + neighbor = neighbor - hlsl::uint32_t3(1, 0, 0); + addUniqueNeighbour(hash(neighbor)); + + return neighbourCount; +} + +} \ No newline at end of file From 28f73f59a3263817b88798efbdcf41bf5e677e71 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 18 Oct 2025 14:24:26 +0700 Subject: [PATCH 09/52] Refactor smooth normal generator to use VertexHashGrid and separate weldVertices into CVertexWelder --- .../asset/utils/CPolygonGeometryManipulator.h | 27 +- include/nbl/asset/utils/CVertexHashMap.h | 87 ----- include/nbl/asset/utils/CVertexWelder.h | 118 ++++++- include/nbl/core/algorithm/radix_sort.h | 37 +- src/nbl/CMakeLists.txt | 1 - src/nbl/asset/utils/CGeometryCreator.cpp | 1 - .../utils/CPolygonGeometryManipulator.cpp | 73 +++- .../asset/utils/CSmoothNormalGenerator.cpp | 326 ++---------------- src/nbl/asset/utils/CSmoothNormalGenerator.h | 11 +- src/nbl/asset/utils/CVertexHashMap.cpp | 141 -------- 10 files changed, 288 insertions(+), 534 deletions(-) delete mode 100644 include/nbl/asset/utils/CVertexHashMap.h delete mode 100644 src/nbl/asset/utils/CVertexHashMap.cpp diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index db7125ed1c..b1182fd983 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -9,7 +9,7 @@ #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/utils/CGeometryManipulator.h" -#include "nbl/asset/utils/CVertexHashMap.h" +#include "nbl/asset/utils/CVertexHashGrid.h" namespace nbl::asset { @@ -19,7 +19,30 @@ class NBL_API2 CPolygonGeometryManipulator { public: - using SSNGVertexData = CVertexHashMap::VertexData; + struct SSNGVertexData + { + uint64_t index; //offset of the vertex into index buffer + uint32_t hash; + hlsl::float32_t3 weightedNormal; + // TODO(kevinyu): Should we separate this from SSNGVertexData, and store it in its own vector in VertexHashGrid? Similar like how hashmap work. Or keep it intrusive? + hlsl::float32_t3 position; //position of the vertex in 3D space + + hlsl::float32_t3 getPosition() const + { + return position; + } + + void setHash(uint32_t hash) + { + this->hash = hash; + } + + uint32_t getHash() const + { + return hash; + }; + + }; using VxCmpFunction = std::function; diff --git a/include/nbl/asset/utils/CVertexHashMap.h b/include/nbl/asset/utils/CVertexHashMap.h deleted file mode 100644 index f13847451c..0000000000 --- a/include/nbl/asset/utils/CVertexHashMap.h +++ /dev/null @@ -1,87 +0,0 @@ -#ifndef _NBL_ASSET_C_VERTEX_HASH_MAP_H_INCLUDED_ -#define _NBL_ASSET_C_VERTEX_HASH_MAP_H_INCLUDED_ - -#include "nbl/core/declarations.h" - -namespace nbl::asset -{ - -class CVertexHashMap -{ -public: - - struct VertexData - { - uint64_t index; //offset of the vertex into index buffer - uint32_t hash; // - hlsl::float32_t3 weightedNormal; - hlsl::float32_t3 position; //position of the vertex in 3D space - }; - - using collection_t = core::vector; - struct BucketBounds - { - collection_t::iterator begin; - collection_t::iterator end; - }; - - - CVertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize); - - //inserts vertex into hash table - void add(VertexData&& vertex); - - //sorts hashtable and sets iterators at beginnings of bucktes - void validate(); - - inline uint32_t getVertexCount() const { return m_vertices.size(); } - - uint8_t getNeighboringCellHashes(uint32_t* outNeighbours, const VertexData& vertex); - - BucketBounds getBucketBoundsByHash(uint32_t hash); - - const collection_t& vertices() const { return m_vertices; } - -private: - struct KeyAccessor - { - _NBL_STATIC_INLINE_CONSTEXPR size_t key_bit_count = 32ull; - - template - inline decltype(radix_mask) operator()(const VertexData& item) const - { - return static_cast(item.hash >> static_cast(bit_offset)) & radix_mask; - } - }; - - static constexpr uint32_t invalidHash = 0xFFFFFFFF; - static constexpr uint32_t primeNumber1 = 73856093; - static constexpr uint32_t primeNumber2 = 19349663; - static constexpr uint32_t primeNumber3 = 83492791; - - using sorter_t = std::variant< - core::LSBSorter, - core::LSBSorter, - core::LSBSorter>; - sorter_t m_sorter; - - static sorter_t createSorter(size_t vertexCount) - { - if (vertexCount < (0x1ull << 16ull)) - return core::LSBSorter(); - if (vertexCount < (0x1ull << 32ull)) - return core::LSBSorter(); - return core::LSBSorter(); - } - - collection_t m_vertices; - const uint32_t m_hashTableMaxSize; - const float m_cellSize; - - uint32_t hash(const VertexData& vertex) const; - uint32_t hash(const hlsl::uint32_t3& position) const; - -}; - -} -#endif \ No newline at end of file diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index d69ea76c18..4b5a06a540 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -10,8 +10,124 @@ namespace nbl::asset { class CVertexWelder { +public: + using WeldPredicateFn = std::function; + template - static core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& vertices, float epsilon); + static core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, WeldPredicateFn shouldWeldFn) { + auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); + outPolygon->setIndexing(IPolygonGeometryBase::TriangleList()); + + core::vector vertexIndexToAsIndex(as.getVertexCount()); + + for (uint64_t vertexData_i = 0u; vertexData_i < as.getVertexCount(); vertexData_i++) + { + const auto& vertexData = as.vertices()[vertexData_i]; + vertexIndexToAsIndex[vertexData.index] = vertexData.index; + } + + static constexpr auto INVALID_INDEX = std::numeric_limits::max(); + core::vector remappedVertexIndexes(as.getVertexCount()); + std::fill(remappedVertexIndexes.begin(), remappedVertexIndexes.end(), INVALID_INDEX); + + uint64_t maxRemappedIndex = 0; + // iterate by index, so that we always use the smallest index when multiple vertexes can be welded together + for (uint64_t index = 0; index < as.getVertexCount(); index++) + { + const auto asIndex = vertexIndexToAsIndex[index]; + const auto& vertexData = as.vertices()[asIndex]; + auto& remappedVertexIndex = remappedVertexIndexes[index]; + as.iterateBroadphaseCandidates(vertexData, [&, polygon, index](const typename AccelStructureT::vertex_data_t& neighbor) { + const auto neighborRemappedIndex = remappedVertexIndexes[neighbor.index]; + if (shouldWeldFn(polygon, index, neighbor.index) && neighborRemappedIndex != INVALID_INDEX) { + remappedVertexIndex = neighborRemappedIndex; + return false; + } + return true; + }); + if (remappedVertexIndex != INVALID_INDEX) { + remappedVertexIndex = vertexData.index; + maxRemappedIndex = vertexData.index; + } + } + + // TODO(kevinyu): Handle when indexBuffer is not exist + + const auto& indexView = outPolygon->getIndexView(); + if (indexView) + { + auto remappedIndexView = [&] + { + const auto bytesize = indexView.src.size; + auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); + + auto retval = indexView; + retval.src.buffer = std::move(indices); + if (retval.composed.rangeFormat == IGeometryBase::EAABBFormat::U16) + retval.composed.encodedDataRange.u16.maxVx[0] = maxRemappedIndex; + else if (retval.composed.rangeFormat == IGeometryBase::EAABBFormat::U32) + retval.composed.encodedDataRange.u32.maxVx[0] = maxRemappedIndex; + + return retval; + }(); + + + auto remappedIndexes = [&]() { + auto* indexPtr = reinterpret_cast(remappedIndexView.getPointer()); + for (uint64_t index_i = 0; index_i < polygon->getIndexCount(); index_i++) + { + hlsl::vector index; + indexView.decodeElement>(index_i, index); + IndexT remappedIndex = remappedVertexIndexes[index.x]; + indexPtr[index_i] = remappedIndex; + } + }; + + if (indexView.composed.rangeFormat == IGeometryBase::EAABBFormat::U16) { + remappedIndexes.template operator()(); + } + else if (indexView.composed.rangeFormat == IGeometryBase::EAABBFormat::U32) { + remappedIndexes.template operator()(); + } + + outPolygon->setIndexView(std::move(remappedIndexView)); + } else + { + const uint32_t indexSize = (outPolygon->getPositionView().getElementCount() - 1 < std::numeric_limits::max()) ? sizeof(uint16_t) : sizeof(uint32_t); + auto remappedIndexBuffer = ICPUBuffer::create({indexSize * outPolygon->getVertexReferenceCount(), IBuffer::EUF_INDEX_BUFFER_BIT}); + auto remappedIndexView = ICPUPolygonGeometry::SDataView{ + .composed = { + .stride = indexSize, + }, + .src = { + .offset = 0, + .size = remappedIndexBuffer->getSize(), + .buffer = std::move(remappedIndexBuffer) + } + }; + + auto fillRemappedIndex = [&](){ + auto remappedIndexBufferPtr = reinterpret_cast(remappedIndexBuffer->getPointer()); + for (uint64_t index = 0; index < outPolygon->getPositionView().getElementCount(); index++) + { + remappedIndexBufferPtr[index] = remappedVertexIndexes[index]; + } + }; + + if (indexView.composed.rangeFormat == IGeometryBase::EAABBFormat::U16) { + fillRemappedIndex.template operator()(); + } + else if (indexView.composed.rangeFormat == IGeometryBase::EAABBFormat::U32) { + fillRemappedIndex.template operator()(); + } + + outPolygon->setIndexView(std::move(remappedIndexView)); + + } + + CPolygonGeometryManipulator::recomputeContentHashes(outPolygon.get()); + return outPolygon; + } }; } diff --git a/include/nbl/core/algorithm/radix_sort.h b/include/nbl/core/algorithm/radix_sort.h index f0d87a60e0..057598963a 100644 --- a/include/nbl/core/algorithm/radix_sort.h +++ b/include/nbl/core/algorithm/radix_sort.h @@ -71,7 +71,8 @@ struct LSBSorter { constexpr histogram_t shift = static_cast(radix_bits * last_pass); const auto histogramIx = (key >> shift) & radix_mask; - return { histogram[histogramIx], histogram[histogramIx + 1] }; + const auto boundBegin = histogramIx == 0 ? 0 : histogram[histogramIx - 1]; + return { boundBegin, histogram[histogramIx] }; } private: @@ -82,21 +83,41 @@ struct LSBSorter std::fill_n(histogram,histogram_size,static_cast(0u)); // count constexpr histogram_t shift = static_cast(radix_bits*pass_ix); - for (histogram_t i=0u; i(input[i])]; // prefix sum - std::inclusive_scan(histogram,histogram+histogram_size,histogram); + std::inclusive_scan(histogram, histogram + histogram_size, histogram); // scatter - for (histogram_t i=rangeSize; i!=0u;) - { - i--; - output[--histogram[comp.template operator()(input[i])]] = input[i]; - } if constexpr (pass_ix != last_pass) + { + + for (histogram_t i = rangeSize; i != 0u;) + { + i--; + const auto& val = input[i]; + const auto& histogramIx = comp.template operator()(val); + output[--histogram[histogramIx]] = val; + } + return pass(output,input,rangeSize,comp); + } else + { + // need to preserve histogram value for the skip list, so we copy to temporary histogramArray and use that + std::array tmpHistogram; + std::copy(histogram, histogram + histogram_size, tmpHistogram.data()); + + for (histogram_t i = rangeSize; i != 0u;) + { + i--; + const auto& val = input[i]; + const auto& histogramIx = comp.template operator()(val); + output[--tmpHistogram[histogramIx]] = val; + } + return output; + } } alignas(sizeof(histogram_t)) histogram_t histogram[histogram_size]; diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 7117eb910d..935beffe2c 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -184,7 +184,6 @@ set(NBL_ASSET_SOURCES asset/utils/CPolygonGeometryManipulator.cpp asset/utils/COverdrawPolygonGeometryOptimizer.cpp asset/utils/CSmoothNormalGenerator.cpp - asset/utils/CVertexHashMap.cpp # Mesh loaders asset/interchange/COBJMeshFileLoader.cpp diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 00bc425ef5..f36ee38125 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -83,7 +83,6 @@ template requires(std::is_same_v || std::is_same_v) static ICPUPolygonGeometry::SDataView createIndexView(size_t indexCount, size_t maxIndex) { - const auto bytesize = sizeof(IndexT) * indexCount; auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 03b07fc22f..f3c8761b3c 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -9,6 +9,7 @@ #include #include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/asset/utils/CVertexWelder.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" @@ -131,6 +132,53 @@ core::smart_refctd_ptr CPolygonGeometryManipulator::createU return outGeometry; } + +namespace +{ + static bool isAttributeEqual(const ICPUPolygonGeometry::SDataView& view, uint64_t index1, uint64_t index2, float epsilon) + { + if (!view) return true; + const auto channelCount = getFormatChannelCount(view.composed.format); + switch (view.composed.rangeFormat) + { + case IGeometryBase::EAABBFormat::U64: + case IGeometryBase::EAABBFormat::U32: + { + hlsl::uint64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + for (auto channel_i = 0u; channel_i < channelCount; channel_i++) + if (val1[channel_i] != val2[channel_i]) return false; + break; + } + case IGeometryBase::EAABBFormat::S64: + case IGeometryBase::EAABBFormat::S32: + { + hlsl::int64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + for (auto channel_i = 0u; channel_i < channelCount; channel_i++) + if (val1[channel_i] != val2[channel_i]) return false; + break; + } + default: + { + hlsl::float64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + for (auto channel_i = 0u; channel_i < channelCount; channel_i++) + { + const auto diff = abs(val1[channel_i] - val2[channel_i]); + if (diff > epsilon) return false; + } + break; + } + } + return true; + } +} + + core::smart_refctd_ptr CPolygonGeometryManipulator::createSmoothVertexNormal(const ICPUPolygonGeometry* inPolygon, bool enableWelding, float epsilon, VxCmpFunction vxcmp) { if (inPolygon == nullptr) @@ -146,8 +194,31 @@ core::smart_refctd_ptr CPolygonGeometryManipulator::createS return nullptr; } - return CSmoothNormalGenerator::calculateNormals(inPolygon, enableWelding, epsilon, vxcmp); + auto canJoinVertices = [epsilon](const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2)-> bool +{ + if (!isAttributeEqual(polygon->getPositionView(), index1, index2, epsilon)) + return false; + if (!isAttributeEqual(polygon->getNormalView(), index1, index2, epsilon)) + return false; + for (const auto& jointWeightView : polygon->getJointWeightViews()) + { + if (!isAttributeEqual(jointWeightView.indices, index1, index2, epsilon)) return false; + if (!isAttributeEqual(jointWeightView.weights, index1, index2, epsilon)) return false; + } + for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) + if (!isAttributeEqual(auxAttributeView, index1, index2, epsilon)) return false; + + return true; + }; + + auto result = CSmoothNormalGenerator::calculateNormals(inPolygon, epsilon, vxcmp); + if (enableWelding) + { + return CVertexWelder::weldVertices(result.geom.get(), result.vertexHashGrid, canJoinVertices); + } + return result.geom; } + } // end namespace nbl::asset diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index ef61613bbc..a6067adddd 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -8,7 +8,6 @@ #include "nbl/builtin/hlsl/shapes/triangle.hlsl" #include -#include namespace nbl { @@ -24,64 +23,17 @@ static bool operator<(const CPolygonGeometryManipulator::SSNGVertexData& lhs, ui return lhs.hash < rhs; } -static bool isAttributeEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) -{ - if (!view) return true; - const auto channelCount = getFormatChannelCount(view.composed.format); - switch (view.composed.rangeFormat) - { - case IGeometryBase::EAABBFormat::U64: - case IGeometryBase::EAABBFormat::U32: - { - hlsl::uint64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - if (val1[channel_i] != val2[channel_i]) return false; - break; - } - case IGeometryBase::EAABBFormat::S64: - case IGeometryBase::EAABBFormat::S32: - { - hlsl::int64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - if (val1[channel_i] != val2[channel_i]) return false; - break; - } - default: - { - hlsl::float64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - { - const auto diff = abs(val1[channel_i] - val2[channel_i]); - if (diff > epsilon) return false; - } - break; - } - } - return true; -} - static bool compareVertexPosition(const hlsl::float32_t3& a, const hlsl::float32_t3& b, float epsilon) { const hlsl::float32_t3 difference = abs(b - a); return (difference.x <= epsilon && difference.y <= epsilon && difference.z <= epsilon); } -core::smart_refctd_ptr CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, bool enableWelding, float epsilon, CPolygonGeometryManipulator::VxCmpFunction vxcmp) +CSmoothNormalGenerator::Result CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, float epsilon, CPolygonGeometryManipulator::VxCmpFunction vxcmp) { VertexHashMap vertexHashMap = setupData(polygon, epsilon); const auto smoothPolygon = processConnectedVertices(polygon, vertexHashMap, epsilon,vxcmp); - - if (enableWelding) - { - return weldVertices(smoothPolygon.get(), vertexHashMap, epsilon); - } - return smoothPolygon; + return { vertexHashMap, smoothPolygon }; } @@ -117,256 +69,52 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as core::smart_refctd_ptr CSmoothNormalGenerator::processConnectedVertices(const asset::ICPUPolygonGeometry* polygon, VertexHashMap& vertexHashMap, float epsilon, CPolygonGeometryManipulator::VxCmpFunction vxcmp) { - auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); - static constexpr auto NormalFormat = EF_R32G32B32_SFLOAT; - const auto normalFormatBytesize = asset::getTexelOrBlockBytesize(NormalFormat); - auto normalBuf = ICPUBuffer::create({ normalFormatBytesize * outPolygon->getPositionView().getElementCount()}); - auto normalView = polygon->getNormalView(); - - hlsl::shapes::AABB<4,hlsl::float32_t> aabb; - aabb.maxVx = hlsl::float32_t4(1, 1, 1, 0.f); - aabb.minVx = -aabb.maxVx; - outPolygon->setNormalView({ - .composed = { - .encodedDataRange = {.f32 = aabb}, - .stride = sizeof(hlsl::float32_t3), - .format = NormalFormat, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = { .offset = 0, .size = normalBuf->getSize(), .buffer = std::move(normalBuf) }, - }); + auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); + static constexpr auto NormalFormat = EF_R32G32B32_SFLOAT; + const auto normalFormatBytesize = asset::getTexelOrBlockBytesize(NormalFormat); + auto normalBuf = ICPUBuffer::create({ normalFormatBytesize * outPolygon->getPositionView().getElementCount()}); + auto normalView = polygon->getNormalView(); + + hlsl::shapes::AABB<4,hlsl::float32_t> aabb; + aabb.maxVx = hlsl::float32_t4(1, 1, 1, 0.f); + aabb.minVx = -aabb.maxVx; + outPolygon->setNormalView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = sizeof(hlsl::float32_t3), + .format = NormalFormat, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = { .offset = 0, .size = normalBuf->getSize(), .buffer = std::move(normalBuf) }, + }); auto* normalPtr = reinterpret_cast(outPolygon->getNormalPtr()); auto normalStride = outPolygon->getNormalView().composed.stride; - for (auto processedVertex = vertexHashMap.vertices().begin(); processedVertex != vertexHashMap.vertices().end(); processedVertex++) - { - std::array neighboringCells; - const auto cellCount = vertexHashMap.getNeighboringCellHashes(neighboringCells.data(), *processedVertex); - hlsl::float32_t3 normal = processedVertex->weightedNormal; + for (auto& processedVertex : vertexHashMap.vertices()) + { + auto normal = processedVertex.weightedNormal; - //iterate among all neighboring cells - for (uint8_t i = 0; i < cellCount; i++) - { - VertexHashMap::BucketBounds bounds = vertexHashMap.getBucketBoundsByHash(neighboringCells[i]); - for (; bounds.begin != bounds.end; bounds.begin++) - { - if (processedVertex != bounds.begin) - if (compareVertexPosition(processedVertex->position, bounds.begin->position, epsilon) && - vxcmp(*processedVertex, *bounds.begin, polygon)) - { - //TODO: better mean calculation algorithm - normal += bounds.begin->weightedNormal; - } - } - } - normal = normalize(normal); - memcpy(normalPtr + (normalStride * processedVertex->index), &normal, sizeof(normal)); - } + vertexHashMap.iterateBroadphaseCandidates(processedVertex, [&](const VertexHashMap::vertex_data_t& candidate) + { + if (compareVertexPosition(processedVertex.position, candidate.position, epsilon) && + vxcmp(processedVertex, candidate, polygon)) + { + //TODO: better mean calculation algorithm + normal += candidate.weightedNormal; + } + return true; + }); + + normal = normalize(normal); + memcpy(normalPtr + (normalStride * processedVertex.index), &normal, sizeof(normal)); + } CPolygonGeometryManipulator::recomputeContentHashes(outPolygon.get()); return outPolygon; } - -core::smart_refctd_ptr CSmoothNormalGenerator::weldVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon) -{ - struct Group - { - uint64_t vertex_reference_index; // index to referenced vertex in the original polygon - }; - core::vector groups; - groups.reserve(vertices.getVertexCount()); - - core::vector> groupIndexes(vertices.getVertexCount()); - - auto canJoinVertices = [&](uint32_t index1, uint32_t index2)-> bool - { - if (!isAttributeEqual(polygon->getPositionView(), index1, index2, epsilon)) - return false; - if (!isAttributeEqual(polygon->getNormalView(), index1, index2, epsilon)) - return false; - for (const auto& jointWeightView : polygon->getJointWeightViews()) - { - if (!isAttributeEqual(jointWeightView.indices, index1, index2, epsilon)) return false; - if (!isAttributeEqual(jointWeightView.weights, index1, index2, epsilon)) return false; - } - for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) - if (!isAttributeEqual(auxAttributeView, index1, index2, epsilon)) return false; - - return true; - }; - - for (auto processedVertex = vertices.vertices().begin(); processedVertex != vertices.vertices().end(); processedVertex++) - { - std::array neighboringCells; - const auto cellCount = vertices.getNeighboringCellHashes(neighboringCells.data(), *processedVertex); - - auto& groupIndex = groupIndexes[processedVertex->index]; - - //iterate among all neighboring cells - for (int i = 0; i < cellCount; i++) - { - VertexHashMap::BucketBounds bounds = vertices.getBucketBoundsByHash(neighboringCells[i]); - for (auto neighbourVertex_it = bounds.begin; neighbourVertex_it != bounds.end; neighbourVertex_it++) - { - const auto neighbourGroupIndex = groupIndexes[neighbourVertex_it->index]; - - hlsl::float32_t3 normal1, normal2; - polygon->getNormalView().decodeElement(processedVertex->index, normal1); - polygon->getNormalView().decodeElement(neighbourVertex_it->index, normal2); - - hlsl::float32_t3 position1, position2; - polygon->getPositionView().decodeElement(processedVertex->index, position1); - polygon->getPositionView().decodeElement(neighbourVertex_it->index, position2); - - // find the first group that this vertex can join - if (processedVertex != neighbourVertex_it && neighbourGroupIndex && canJoinVertices(processedVertex->index, neighbourVertex_it->index)) - { - groupIndex = neighbourGroupIndex; - break; - } - } - } - if (!groupIndex) - { - // create new group if no group nearby that is compatible with this vertex - groupIndex = groups.size(); - groups.push_back({ processedVertex->index}); - } - } - - auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); - outPolygon->setIndexing(IPolygonGeometryBase::TriangleList()); - - const uint32_t indexSize = (groups.size() < std::numeric_limits::max()) ? sizeof(uint16_t) : sizeof(uint32_t); - auto indexBuffer = ICPUBuffer::create({indexSize * groupIndexes.size(), IBuffer::EUF_INDEX_BUFFER_BIT}); - auto indexBufferPtr = reinterpret_cast(indexBuffer->getPointer()); - auto indexView = ICPUPolygonGeometry::SDataView{ - .composed = { - .stride = indexSize, - }, - .src = { - .offset = 0, - .size = indexBuffer->getSize(), - .buffer = std::move(indexBuffer) - } - }; - if (indexSize == 2) - { - indexView.composed.encodedDataRange.u16.minVx[0] = 0; - indexView.composed.encodedDataRange.u16.maxVx[0] = groups.size() - 1; - indexView.composed.format = EF_R16_UINT; - indexView.composed.rangeFormat = IGeometryBase::EAABBFormat::U16; - } else if (indexSize == 4) - { - indexView.composed.encodedDataRange.u32.minVx[0] = 0; - indexView.composed.encodedDataRange.u32.maxVx[0] = groups.size() - 1; - indexView.composed.format = EF_R32_UINT; - indexView.composed.rangeFormat = IGeometryBase::EAABBFormat::U32; - } - - for (auto index_i = 0u; index_i < groupIndexes.size(); index_i++) - { - if (indexSize == 2) - { - uint16_t index = *groupIndexes[index_i]; - memcpy(indexBufferPtr + indexSize * index_i, &index, sizeof(index)); - } else if (indexSize == 4) - { - uint32_t index = *groupIndexes[index_i]; - memcpy(indexBufferPtr + indexSize * index_i, &index, sizeof(index)); - } - } - outPolygon->setIndexView(std::move(indexView)); - - - using position_t = hlsl::float32_t3; - constexpr auto PositionAttrSize = sizeof(position_t); - auto positionBuffer = ICPUBuffer::create({ PositionAttrSize * groups.size(), IBuffer::EUF_NONE }); - auto outPositions = reinterpret_cast(positionBuffer->getPointer()); - const auto inPositions = reinterpret_cast(polygon->getPositionView().getPointer()); - outPolygon->setPositionView({ - .composed = polygon->getPositionView().composed, - .src = {.offset = 0, .size = positionBuffer->getSize(), .buffer = std::move(positionBuffer)} - }); - - using normal_t = hlsl::float32_t3; - constexpr auto NormalAttrSize = sizeof(normal_t); - auto normalBuffer = ICPUBuffer::create({ NormalAttrSize * groups.size(), IBuffer::EUF_NONE }); - auto outNormals = reinterpret_cast(normalBuffer->getPointer()); - const auto inNormals = reinterpret_cast(polygon->getNormalView().getPointer()); - outPolygon->setNormalView({ - .composed = polygon->getNormalView().composed, - .src = {.offset = 0, .size = normalBuffer->getSize(), .buffer = std::move(normalBuffer)} - }); - - auto createOutView = [&](const ICPUPolygonGeometry::SDataView& view) - { - auto buffer = ICPUBuffer::create({ view.composed.stride * groups.size(), view.src.buffer->getUsageFlags() }); - return ICPUPolygonGeometry::SDataView{ - .composed = view.composed, - .src = {.offset = 0, .size = buffer->getSize(), .buffer = std::move(buffer)} - }; - }; - - const auto& inJointWeightViews = polygon->getJointWeightViews(); - auto* outJointWeightViews = outPolygon->getJointWeightViews(); - outJointWeightViews->resize(inJointWeightViews.size()); - for (auto jointWeightView_i = 0u; jointWeightView_i < inJointWeightViews.size(); jointWeightView_i++) - { - const auto& inJointWeightView = inJointWeightViews[jointWeightView_i]; - outJointWeightViews->operator[](jointWeightView_i).indices = createOutView(inJointWeightView.indices); - outJointWeightViews->operator[](jointWeightView_i).weights = createOutView(inJointWeightView.weights); - } - - const auto& inAuxAttributeViews = polygon->getAuxAttributeViews(); - auto* outAuxAttributeViews = outPolygon->getAuxAttributeViews(); - outAuxAttributeViews->resize(inAuxAttributeViews.size()); - for (auto auxAttributeView_i = 0u; auxAttributeView_i < inAuxAttributeViews.size(); auxAttributeView_i++) - { - const auto& inAuxAttributeView = inAuxAttributeViews[auxAttributeView_i]; - outAuxAttributeViews->operator[](auxAttributeView_i) = createOutView(inAuxAttributeView); - } - - for (auto group_i = 0u; group_i < groups.size(); group_i++) - { - const auto srcIndex = groups[group_i].vertex_reference_index; - outPositions[group_i] = inPositions[srcIndex]; - outNormals[group_i] = inPositions[srcIndex]; - - for (uint64_t jointView_i = 0u; jointView_i < polygon->getJointWeightViews().size(); jointView_i++) - { - auto& inView = polygon->getJointWeightViews()[jointView_i]; - auto& outView = outPolygon->getJointWeightViews()->operator[](jointView_i); - - const std::byte* const inJointIndices = reinterpret_cast(inView.indices.getPointer()); - const auto jointIndexSize = inView.indices.composed.stride; - std::byte* const outJointIndices = reinterpret_cast(outView.indices.getPointer()); - memcpy(outJointIndices + group_i * jointIndexSize, inJointIndices + srcIndex * jointIndexSize, jointIndexSize); - - const std::byte* const inWeights = reinterpret_cast(inView.weights.getPointer()); - const auto jointWeightSize = inView.weights.composed.stride; - std::byte* const outWeights = reinterpret_cast(outView.weights.getPointer()); - memcpy(outWeights + group_i * jointWeightSize, inWeights + srcIndex * jointWeightSize, jointWeightSize); - } - - for (auto auxView_i = 0u; auxView_i < polygon->getAuxAttributeViews().size(); auxView_i++) - { - auto& inView = polygon->getAuxAttributeViews()[auxView_i]; - auto& outView = outPolygon->getAuxAttributeViews()->operator[](auxView_i); - const auto attrSize = inView.composed.stride; - const std::byte* const inAuxs = reinterpret_cast(inView.getPointer()); - std::byte* const outAuxs = reinterpret_cast(outView.getPointer()); - memcpy(outAuxs + group_i * attrSize, inAuxs + srcIndex * attrSize, attrSize); - } - } - - CPolygonGeometryManipulator::recomputeContentHashes(outPolygon.get()); - return outPolygon; - -} } } \ No newline at end of file diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index 476d962f95..90c72e45ee 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -16,14 +16,19 @@ class CSmoothNormalGenerator CSmoothNormalGenerator() = delete; ~CSmoothNormalGenerator() = delete; - static core::smart_refctd_ptr calculateNormals(const ICPUPolygonGeometry* polygon, bool enableWelding, float epsilon, CPolygonGeometryManipulator::VxCmpFunction function); + using VertexHashMap = CVertexHashGrid; + + struct Result + { + VertexHashMap vertexHashGrid; + core::smart_refctd_ptr geom; + }; + static Result calculateNormals(const ICPUPolygonGeometry* polygon, float epsilon, CPolygonGeometryManipulator::VxCmpFunction function); private: - using VertexHashMap = CVertexHashMap; static VertexHashMap setupData(const ICPUPolygonGeometry* polygon, float epsilon); static core::smart_refctd_ptr processConnectedVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon, CPolygonGeometryManipulator::VxCmpFunction vxcmp); - static core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon); }; } diff --git a/src/nbl/asset/utils/CVertexHashMap.cpp b/src/nbl/asset/utils/CVertexHashMap.cpp deleted file mode 100644 index 72208c2625..0000000000 --- a/src/nbl/asset/utils/CVertexHashMap.cpp +++ /dev/null @@ -1,141 +0,0 @@ -#include "nbl/asset/utils/CVertexHashMap.h" - -namespace nbl::asset { - -CVertexHashMap::CVertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize) : - m_sorter(createSorter(_vertexCount)), - m_hashTableMaxSize(_hashTableMaxSize), - m_cellSize(_cellSize) -{ - assert((core::isPoT(m_hashTableMaxSize))); - - m_vertices.reserve(_vertexCount); -} - -uint32_t CVertexHashMap::hash(const VertexData& vertex) const -{ - const hlsl::float32_t3 position = vertex.position / m_cellSize; - - return ((static_cast(position.x) * primeNumber1) ^ - (static_cast(position.y) * primeNumber2) ^ - (static_cast(position.z) * primeNumber3))& (m_hashTableMaxSize - 1); -} - -uint32_t CVertexHashMap::hash(const hlsl::uint32_t3& position) const -{ - return ((position.x * primeNumber1) ^ - (position.y * primeNumber2) ^ - (position.z * primeNumber3))& (m_hashTableMaxSize - 1); -} - -void CVertexHashMap::add(VertexData&& vertex) -{ - vertex.hash = hash(vertex); - m_vertices.push_back(vertex); -} - -CVertexHashMap::BucketBounds CVertexHashMap::getBucketBoundsByHash(uint32_t hash) -{ - if (hash == invalidHash) - return { m_vertices.end(), m_vertices.end() }; - - const auto skipListBound = std::visit([&](auto& sorter) - { - auto hashBound = sorter.getHashBound(hash); - return std::pair(m_vertices.begin() + hashBound.first, m_vertices.begin() + hashBound.second); - }, m_sorter); - - auto begin = std::lower_bound( - skipListBound.first, - skipListBound.second, - hash, - [](const VertexData& vertex, uint32_t hash) - { - return vertex.hash < hash; - }); - - auto end = std::upper_bound( - skipListBound.first, - skipListBound.second, - hash, - [](uint32_t hash, const VertexData& vertex) - { - return hash < vertex.hash; - }); - - //bucket missing - if (begin == m_vertices.end()) - return { m_vertices.end(), m_vertices.end() }; - - //bucket missing - if (begin->hash != hash) - return { m_vertices.end(), m_vertices.end() }; - - return { begin, end }; -} - -void CVertexHashMap::validate() -{ - const auto oldSize = m_vertices.size(); - m_vertices.resize(oldSize*2u); - // TODO: maybe use counting sort (or big radix) and use the histogram directly for the m_buckets - auto finalSortedOutput = std::visit( [&](auto& sorter) { return sorter(m_vertices.data(), m_vertices.data() + oldSize, oldSize, KeyAccessor()); },m_sorter ); - // TODO: optimize out the erase - if (finalSortedOutput != m_vertices.data()) - m_vertices.erase(m_vertices.begin(), m_vertices.begin() + oldSize); - else - m_vertices.resize(oldSize); -} - -uint8_t CVertexHashMap::getNeighboringCellHashes(uint32_t* outNeighbours, const VertexData& vertex) -{ - hlsl::float32_t3 cellFloatCoord = floor(vertex.position / m_cellSize - hlsl::float32_t3(0.5f)); - hlsl::uint32_t3 neighbor = hlsl::uint32_t3(static_cast(cellFloatCoord.x), static_cast(cellFloatCoord.y), static_cast(cellFloatCoord.z)); - - uint8_t neighbourCount = 0; - - //left bottom near - outNeighbours[neighbourCount] = hash(neighbor); - neighbourCount++; - - auto addUniqueNeighbour = [&neighbourCount, outNeighbours](uint32_t hashVal) - { - if (std::find(outNeighbours, outNeighbours + neighbourCount, hashVal) != outNeighbours + neighbourCount) - { - outNeighbours[neighbourCount] = hashVal; - neighbourCount++; - } - }; - - //right bottom near - neighbor = neighbor + hlsl::uint32_t3(1, 0, 0); - addUniqueNeighbour(hash(neighbor)); - - //right bottom far - neighbor = neighbor + hlsl::uint32_t3(0, 0, 1); - addUniqueNeighbour(hash(neighbor)); - - //left bottom far - neighbor = neighbor - hlsl::uint32_t3(1, 0, 0); - addUniqueNeighbour(hash(neighbor)); - - //left top far - neighbor = neighbor + hlsl::uint32_t3(0, 1, 0); - addUniqueNeighbour(hash(neighbor)); - - //right top far - neighbor = neighbor + hlsl::uint32_t3(1, 0, 0); - addUniqueNeighbour(hash(neighbor)); - - //righ top near - neighbor = neighbor - hlsl::uint32_t3(0, 0, 1); - addUniqueNeighbour(hash(neighbor)); - - //left top near - neighbor = neighbor - hlsl::uint32_t3(1, 0, 0); - addUniqueNeighbour(hash(neighbor)); - - return neighbourCount; -} - -} \ No newline at end of file From 5acb40da363979978c04a8a367ac986b8de0f6f4 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 18 Oct 2025 14:46:16 +0700 Subject: [PATCH 10/52] Fix normal comparison to use dot instead of value by value comparison --- include/nbl/asset/utils/CVertexHashGrid.h | 212 ++++++++++++++++++ include/nbl/asset/utils/CVertexWelder.h | 2 - .../utils/CPolygonGeometryManipulator.cpp | 47 +++- 3 files changed, 252 insertions(+), 9 deletions(-) create mode 100644 include/nbl/asset/utils/CVertexHashGrid.h diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h new file mode 100644 index 0000000000..b978b9f576 --- /dev/null +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -0,0 +1,212 @@ +#ifndef _NBL_ASSET_C_VERTEX_HASH_MAP_H_INCLUDED_ +#define _NBL_ASSET_C_VERTEX_HASH_MAP_H_INCLUDED_ + +#include "nbl/core/declarations.h" + +namespace nbl::asset +{ + +template +concept HashGridVertexData = requires(T obj, T const cobj, uint32_t hash) { + { cobj.getHash() } -> std::same_as; + { obj.setHash(hash) } -> std::same_as; + { cobj.getPosition() } -> std::same_as; +}; + +template +class CVertexHashGrid +{ +public: + + using vertex_data_t = VertexData; + using collection_t = core::vector; + struct BucketBounds + { + collection_t::const_iterator begin; + collection_t::const_iterator end; + }; + + CVertexHashGrid(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize) : + m_sorter(createSorter(_vertexCount)), + m_hashTableMaxSize(_hashTableMaxSize), + m_cellSize(_cellSize) + { + assert((core::isPoT(m_hashTableMaxSize))); + + m_vertices.reserve(_vertexCount); + } + + //inserts vertex into hash table + void add(VertexData&& vertex) + { + vertex.setHash(hash(vertex)); + m_vertices.push_back(vertex); + } + + void validate() + { + const auto oldSize = m_vertices.size(); + m_vertices.resize(oldSize*2u); + auto finalSortedOutput = std::visit( [&](auto& sorter) { return sorter(m_vertices.data(), m_vertices.data() + oldSize, oldSize, KeyAccessor()); },m_sorter ); + + if (finalSortedOutput != m_vertices.data()) + m_vertices.erase(m_vertices.begin(), m_vertices.begin() + oldSize); + else + m_vertices.resize(oldSize); + } + + const collection_t& vertices() const { return m_vertices; } + + collection_t& vertices(){ return m_vertices; } + + inline uint32_t getVertexCount() const { return m_vertices.size(); } + + template + void iterateBroadphaseCandidates(const VertexData& vertex, Fn fn) const + { + std::array neighboringCells; + const auto cellCount = getNeighboringCellHashes(neighboringCells.data(), vertex); + + //iterate among all neighboring cells + for (uint8_t i = 0; i < cellCount; i++) + { + const auto& neighborCell = neighboringCells[i]; + BucketBounds bounds = getBucketBoundsByHash(neighborCell); + for (; bounds.begin != bounds.end; bounds.begin++) + { + const vertex_data_t& neighborVertex = *bounds.begin; + if (&vertex != &neighborVertex) + if (!fn(neighborVertex)) break; + } + } + + }; + +private: + struct KeyAccessor + { + _NBL_STATIC_INLINE_CONSTEXPR size_t key_bit_count = 32ull; + + template + inline decltype(radix_mask) operator()(const VertexData& item) const + { + return static_cast(item.getHash() >> static_cast(bit_offset)) & radix_mask; + } + }; + + static constexpr uint32_t primeNumber1 = 73856093; + static constexpr uint32_t primeNumber2 = 19349663; + static constexpr uint32_t primeNumber3 = 83492791; + + static constexpr uint32_t invalidHash = 0xFFFFFFFF; + + using sorter_t = std::variant< + core::LSBSorter, + core::LSBSorter, + core::LSBSorter>; + sorter_t m_sorter; + + static sorter_t createSorter(size_t vertexCount) + { + if (vertexCount < (0x1ull << 16ull)) + return core::LSBSorter(); + if (vertexCount < (0x1ull << 32ull)) + return core::LSBSorter(); + return core::LSBSorter(); + } + + collection_t m_vertices; + const uint32_t m_hashTableMaxSize; + const float m_cellSize; + + uint32_t hash(const VertexData& vertex) const + { + const hlsl::float32_t3 position = floor(vertex.getPosition() / m_cellSize); + + return ((static_cast(position.x) * primeNumber1) ^ + (static_cast(position.y) * primeNumber2) ^ + (static_cast(position.z) * primeNumber3))& (m_hashTableMaxSize - 1); + } + + uint32_t hash(const hlsl::uint32_t3& position) const + { + return ((position.x * primeNumber1) ^ + (position.y * primeNumber2) ^ + (position.z * primeNumber3))& (m_hashTableMaxSize - 1); + } + + uint8_t getNeighboringCellHashes(uint32_t* outNeighbors, const VertexData& vertex) const + { + hlsl::float32_t3 cellfloatcoord = floor(vertex.getPosition() / m_cellSize - hlsl::float32_t3(0.5)); + hlsl::uint32_t3 baseCoord = hlsl::uint32_t3(static_cast(cellfloatcoord.x), static_cast(cellfloatcoord.y), static_cast(cellfloatcoord.z)); + + uint8_t neighborCount = 0; + + outNeighbors[neighborCount] = hash(baseCoord); + neighborCount++; + + auto addUniqueNeighbor = [&neighborCount, outNeighbors](uint32_t hashval) + { + if (std::find(outNeighbors, outNeighbors + neighborCount, hashval) == outNeighbors + neighborCount) + { + outNeighbors[neighborCount] = hashval; + neighborCount++; + } + }; + + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 0, 1))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 0))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 0))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 0))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 1))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 1))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 1))); + + return neighborCount; + } + + BucketBounds getBucketBoundsByHash(uint32_t hash) const + { + if (hash == invalidHash) + return { m_vertices.end(), m_vertices.end() }; + + const auto skipListBound = std::visit([&](auto& sorter) + { + auto hashBound = sorter.getHashBound(hash); + return std::pair(m_vertices.begin() + hashBound.first, m_vertices.begin() + hashBound.second); + }, m_sorter); + + auto begin = std::lower_bound( + skipListBound.first, + skipListBound.second, + hash, + [](const VertexData& vertex, uint32_t hash) + { + return vertex.hash < hash; + }); + + auto end = std::upper_bound( + skipListBound.first, + skipListBound.second, + hash, + [](uint32_t hash, const VertexData& vertex) + { + return hash < vertex.hash; + }); + + const auto beginIx = begin - m_vertices.begin(); + const auto endIx = end - m_vertices.begin(); + //bucket missing + if (begin == end) + return { m_vertices.end(), m_vertices.end() }; + + //bucket missing + if (begin->hash != hash) + return { m_vertices.end(), m_vertices.end() }; + + return { begin, end }; + } +}; + +} +#endif \ No newline at end of file diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 4b5a06a540..54f407fdbf 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -51,8 +51,6 @@ class CVertexWelder { } } - // TODO(kevinyu): Handle when indexBuffer is not exist - const auto& indexView = outPolygon->getIndexView(); if (indexView) { diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index f3c8761b3c..859d1224a3 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -135,7 +135,7 @@ core::smart_refctd_ptr CPolygonGeometryManipulator::createU namespace { - static bool isAttributeEqual(const ICPUPolygonGeometry::SDataView& view, uint64_t index1, uint64_t index2, float epsilon) + bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, uint64_t index1, uint64_t index2, float epsilon) { if (!view) return true; const auto channelCount = getFormatChannelCount(view.composed.format); @@ -175,7 +175,40 @@ namespace } } return true; - } + } + + bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, uint64_t index1, uint64_t index2, float epsilon) +{ + if (!view) return true; + const auto channelCount = getFormatChannelCount(view.composed.format); + switch (view.composed.rangeFormat) + { + case IGeometryBase::EAABBFormat::U64: + case IGeometryBase::EAABBFormat::U32: + { + hlsl::uint64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + return (1.0 - hlsl::dot(val1, val2)) < epsilon; + } + case IGeometryBase::EAABBFormat::S64: + case IGeometryBase::EAABBFormat::S32: + { + hlsl::int64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + return (1.0 - hlsl::dot(val1, val2)) < epsilon; + } + default: + { + hlsl::float64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + return (1.0 - hlsl::dot(val1, val2)) < epsilon; + } + } + return true; +} } @@ -196,17 +229,17 @@ core::smart_refctd_ptr CPolygonGeometryManipulator::createS auto canJoinVertices = [epsilon](const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2)-> bool { - if (!isAttributeEqual(polygon->getPositionView(), index1, index2, epsilon)) + if (!isAttributeValEqual(polygon->getPositionView(), index1, index2, epsilon)) return false; - if (!isAttributeEqual(polygon->getNormalView(), index1, index2, epsilon)) + if (!isAttributeDirEqual(polygon->getNormalView(), index1, index2, epsilon)) return false; for (const auto& jointWeightView : polygon->getJointWeightViews()) { - if (!isAttributeEqual(jointWeightView.indices, index1, index2, epsilon)) return false; - if (!isAttributeEqual(jointWeightView.weights, index1, index2, epsilon)) return false; + if (!isAttributeValEqual(jointWeightView.indices, index1, index2, epsilon)) return false; + if (!isAttributeValEqual(jointWeightView.weights, index1, index2, epsilon)) return false; } for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) - if (!isAttributeEqual(auxAttributeView, index1, index2, epsilon)) return false; + if (!isAttributeValEqual(auxAttributeView, index1, index2, epsilon)) return false; return true; }; From 7424828e6c2aac28a60e04fcd910d01a7bdbb237 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 26 Oct 2025 14:49:02 +0700 Subject: [PATCH 11/52] Fix triangle.hlsl --- include/nbl/asset/utils/CVertexHashGrid.h | 4 ++-- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 24 ++++++++++--------- .../asset/utils/CSmoothNormalGenerator.cpp | 2 +- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index b978b9f576..ecd807054d 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -182,7 +182,7 @@ class CVertexHashGrid hash, [](const VertexData& vertex, uint32_t hash) { - return vertex.hash < hash; + return vertex.getHash() < hash; }); auto end = std::upper_bound( @@ -191,7 +191,7 @@ class CVertexHashGrid hash, [](uint32_t hash, const VertexData& vertex) { - return hash < vertex.hash; + return hash < vertex.getHash(); }); const auto beginIx = begin - m_vertices.begin(); diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl index 2b24a6b525..4286e0a411 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -6,6 +6,8 @@ #define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ #include +#include +#include namespace nbl { @@ -17,22 +19,22 @@ namespace shapes namespace util { template - vector GetAngleWeight(const vector& e1, const vector& e2, const vector& e3) + vector compInternalAngle(NBL_CONST_REF_ARG(vector) e1, NBL_CONST_REF_ARG(vector) e2, NBL_CONST_REF_ARG(vector) e3) { // Calculate this triangle's weight for each of its three m_vertices // start by calculating the lengths of its sides - const float_t a = dot(e1, e1); - const float_t asqrt = sqrt(a); - const float_t b = dot(e2, e2); - const float_t bsqrt = sqrt(b); - const float_t c = dot(e3, e3); - const float_t csqrt = sqrt(c); + const float_t a = hlsl::dot(e1, e1); + const float_t asqrt = hlsl::sqrt(a); + const float_t b = hlsl::dot(e2, e2); + const float_t bsqrt = hlsl::sqrt(b); + const float_t c = hlsl::dot(e3, e3); + const float_t csqrt = hlsl::sqrt(c); + const float_t angle1 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); + const float_t angle2 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); + const float_t angle3 = hlsl::numbers::pi - (angle1 + angle2); // use them to find the angle at each vertex - return vector( - acosf((b + c - a) / (2.f * bsqrt * csqrt)), - acosf((-b + c + a) / (2.f * asqrt * csqrt)), - acosf((b - c + a) / (2.f * bsqrt * asqrt))); + return vector(angle1, angle2, angle3); } } diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index a04478ff59..30fdc3b40f 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -55,7 +55,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as const auto faceNormal = normalize(cross(v2 - v1, v3 - v1)); //set data for m_vertices - const auto angleWages = hlsl::shapes::util::GetAngleWeight(v2 - v3, v1 - v3, v1 - v2); + const auto angleWages = hlsl::shapes::util::compInternalAngle(v2 - v3, v1 - v3, v1 - v2); vertices.add({ i, 0, faceNormal * angleWages.x, v1}); vertices.add({ i + 1, 0, faceNormal * angleWages.y,v2}); From d078b2c932c4f347303b45a29062b284052c8bb5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 26 Oct 2025 16:51:20 +0700 Subject: [PATCH 12/52] Rename LSBSorter to RadixLSBSorter --- include/nbl/asset/utils/CVertexHashGrid.h | 12 ++++++------ include/nbl/core/algorithm/radix_sort.h | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index ecd807054d..0c642ae2a8 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -101,18 +101,18 @@ class CVertexHashGrid static constexpr uint32_t invalidHash = 0xFFFFFFFF; using sorter_t = std::variant< - core::LSBSorter, - core::LSBSorter, - core::LSBSorter>; + core::RadixLsbSorter, + core::RadixLsbSorter, + core::RadixLsbSorter>; sorter_t m_sorter; static sorter_t createSorter(size_t vertexCount) { if (vertexCount < (0x1ull << 16ull)) - return core::LSBSorter(); + return core::RadixLsbSorter(); if (vertexCount < (0x1ull << 32ull)) - return core::LSBSorter(); - return core::LSBSorter(); + return core::RadixLsbSorter(); + return core::RadixLsbSorter(); } collection_t m_vertices; diff --git a/include/nbl/core/algorithm/radix_sort.h b/include/nbl/core/algorithm/radix_sort.h index 057598963a..d7092590f7 100644 --- a/include/nbl/core/algorithm/radix_sort.h +++ b/include/nbl/core/algorithm/radix_sort.h @@ -53,7 +53,7 @@ constexpr int8_t find_msb(const T& a_variable) } template -struct LSBSorter +struct RadixLsbSorter { _NBL_STATIC_INLINE_CONSTEXPR uint16_t histogram_bytesize = 8192u; _NBL_STATIC_INLINE_CONSTEXPR size_t histogram_size = size_t(histogram_bytesize)/sizeof(histogram_t); @@ -129,11 +129,11 @@ inline RandomIt radix_sort(RandomIt input, RandomIt scratch, const size_t rangeS assert(std::abs(std::distance(input,scratch))>=rangeSize); if (rangeSize(0x1ull<<16ull)) - return LSBSorter()(input,scratch,static_cast(rangeSize),comp); + return RadixLsbSorter()(input,scratch,static_cast(rangeSize),comp); if (rangeSize(0x1ull<<32ull)) - return LSBSorter()(input,scratch,static_cast(rangeSize),comp); + return RadixLsbSorter()(input,scratch,static_cast(rangeSize),comp); else - return LSBSorter()(input,scratch,rangeSize,comp); + return RadixLsbSorter()(input,scratch,rangeSize,comp); } //! Because Radix Sort needs O(2n) space and a number of passes dependant on the key length, the final sorted range can be either in `input` or `scratch` From e72e0efbc8394b99fbd7623ee068b75594c92698 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 26 Oct 2025 16:51:35 +0700 Subject: [PATCH 13/52] Add comment for future task --- include/nbl/asset/utils/CPolygonGeometryManipulator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index b1182fd983..6f87c3549b 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -22,9 +22,9 @@ class NBL_API2 CPolygonGeometryManipulator struct SSNGVertexData { uint64_t index; //offset of the vertex into index buffer + // TODO: check whether separating hash and position into its own vector or even rehash the position everytime we need will result in VertexHashGrid become faster. uint32_t hash; hlsl::float32_t3 weightedNormal; - // TODO(kevinyu): Should we separate this from SSNGVertexData, and store it in its own vector in VertexHashGrid? Similar like how hashmap work. Or keep it intrusive? hlsl::float32_t3 position; //position of the vertex in 3D space hlsl::float32_t3 getPosition() const From e91aba1350d6402fd9259070d754b6e7f6d2670f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 27 Oct 2025 15:22:56 +0700 Subject: [PATCH 14/52] Refactor CVertexHashGrid --- include/nbl/asset/utils/CVertexHashGrid.h | 60 +++++++++---------- include/nbl/asset/utils/CVertexWelder.h | 2 +- .../asset/utils/CSmoothNormalGenerator.cpp | 4 +- 3 files changed, 32 insertions(+), 34 deletions(-) diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index 0c642ae2a8..8b8711d183 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -13,6 +13,13 @@ concept HashGridVertexData = requires(T obj, T const cobj, uint32_t hash) { { cobj.getPosition() } -> std::same_as; }; +template +concept HashGridIteratorFn = HashGridVertexData && requires(Fn && fn, T const cobj) +{ + // return whether hash grid should continue the iteration + { std::invoke(std::forward(fn), cobj) } -> std::same_as; +}; + template class CVertexHashGrid { @@ -26,43 +33,40 @@ class CVertexHashGrid collection_t::const_iterator end; }; - CVertexHashGrid(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize) : - m_sorter(createSorter(_vertexCount)), - m_hashTableMaxSize(_hashTableMaxSize), - m_cellSize(_cellSize) + CVertexHashGrid(size_t cellSize, uint32_t hashTableMaxSizeLog2, float vertexCountReserve) : + m_cellSize(cellSize), + m_hashTableMaxSize(1llu << hashTableMaxSizeLog2), + m_sorter(createSorter(vertexCountReserve)) { - assert((core::isPoT(m_hashTableMaxSize))); - - m_vertices.reserve(_vertexCount); + m_vertices.reserve(vertexCountReserve); } //inserts vertex into hash table void add(VertexData&& vertex) { vertex.setHash(hash(vertex)); - m_vertices.push_back(vertex); + m_vertices.push_back(std::move(vertex)); } - void validate() + void bake() { - const auto oldSize = m_vertices.size(); - m_vertices.resize(oldSize*2u); - auto finalSortedOutput = std::visit( [&](auto& sorter) { return sorter(m_vertices.data(), m_vertices.data() + oldSize, oldSize, KeyAccessor()); },m_sorter ); + auto scratchBuffer = collection_t(m_vertices.size()); + + auto finalSortedOutput = std::visit( [&](auto& sorter) + { + return sorter(m_vertices.data(), scratchBuffer.data(), m_vertices.size(), KeyAccessor()); + }, m_sorter ); if (finalSortedOutput != m_vertices.data()) - m_vertices.erase(m_vertices.begin(), m_vertices.begin() + oldSize); - else - m_vertices.resize(oldSize); + m_vertices = std::move(scratchBuffer); } const collection_t& vertices() const { return m_vertices; } - collection_t& vertices(){ return m_vertices; } - inline uint32_t getVertexCount() const { return m_vertices.size(); } - template - void iterateBroadphaseCandidates(const VertexData& vertex, Fn fn) const + template Fn> + void forEachBroadphaseNeighborCandidates(const VertexData& vertex, Fn&& fn) const { std::array neighboringCells; const auto cellCount = getNeighboringCellHashes(neighboringCells.data(), vertex); @@ -76,7 +80,7 @@ class CVertexHashGrid { const vertex_data_t& neighborVertex = *bounds.begin; if (&vertex != &neighborVertex) - if (!fn(neighborVertex)) break; + if (!std::invoke(std::forward(fn), neighborVertex)) break; } } @@ -85,7 +89,7 @@ class CVertexHashGrid private: struct KeyAccessor { - _NBL_STATIC_INLINE_CONSTEXPR size_t key_bit_count = 32ull; + constexpr static size_t key_bit_count = 32ull; template inline decltype(radix_mask) operator()(const VertexData& item) const @@ -98,8 +102,6 @@ class CVertexHashGrid static constexpr uint32_t primeNumber2 = 19349663; static constexpr uint32_t primeNumber3 = 83492791; - static constexpr uint32_t invalidHash = 0xFFFFFFFF; - using sorter_t = std::variant< core::RadixLsbSorter, core::RadixLsbSorter, @@ -122,10 +124,8 @@ class CVertexHashGrid uint32_t hash(const VertexData& vertex) const { const hlsl::float32_t3 position = floor(vertex.getPosition() / m_cellSize); - - return ((static_cast(position.x) * primeNumber1) ^ - (static_cast(position.y) * primeNumber2) ^ - (static_cast(position.z) * primeNumber3))& (m_hashTableMaxSize - 1); + const auto position_uint32 = hlsl::uint32_t3(position.x, position.y, position.z); + return hash(position_uint32); } uint32_t hash(const hlsl::uint32_t3& position) const @@ -137,6 +137,7 @@ class CVertexHashGrid uint8_t getNeighboringCellHashes(uint32_t* outNeighbors, const VertexData& vertex) const { + // both 0.x and -0.x would be converted to 0 if we directly casting the position to unsigned integer. Causing the 0 to be crowded then the rest of the cells. So we use floor here to spread the vertex more uniformly. hlsl::float32_t3 cellfloatcoord = floor(vertex.getPosition() / m_cellSize - hlsl::float32_t3(0.5)); hlsl::uint32_t3 baseCoord = hlsl::uint32_t3(static_cast(cellfloatcoord.x), static_cast(cellfloatcoord.y), static_cast(cellfloatcoord.z)); @@ -167,12 +168,9 @@ class CVertexHashGrid BucketBounds getBucketBoundsByHash(uint32_t hash) const { - if (hash == invalidHash) - return { m_vertices.end(), m_vertices.end() }; - const auto skipListBound = std::visit([&](auto& sorter) { - auto hashBound = sorter.getHashBound(hash); + auto hashBound = sorter.getMostSignificantRadixBound(hash); return std::pair(m_vertices.begin() + hashBound.first, m_vertices.begin() + hashBound.second); }, m_sorter); diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 54f407fdbf..0fa728037c 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -37,7 +37,7 @@ class CVertexWelder { const auto asIndex = vertexIndexToAsIndex[index]; const auto& vertexData = as.vertices()[asIndex]; auto& remappedVertexIndex = remappedVertexIndexes[index]; - as.iterateBroadphaseCandidates(vertexData, [&, polygon, index](const typename AccelStructureT::vertex_data_t& neighbor) { + as.forEachBroadphaseNeighborCandidates(vertexData, [&, polygon, index](const typename AccelStructureT::vertex_data_t& neighbor) { const auto neighborRemappedIndex = remappedVertexIndexes[neighbor.index]; if (shouldWeldFn(polygon, index, neighbor.index) && neighborRemappedIndex != INVALID_INDEX) { remappedVertexIndex = neighborRemappedIndex; diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 30fdc3b40f..4b8ae10226 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -62,7 +62,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as vertices.add({ i + 2, 0, faceNormal * angleWages.z, v3}); } - vertices.validate(); + vertices.bake(); return vertices; } @@ -96,7 +96,7 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne { auto normal = processedVertex.weightedNormal; - vertexHashMap.iterateBroadphaseCandidates(processedVertex, [&](const VertexHashMap::vertex_data_t& candidate) + vertexHashMap.forEachBroadphaseNeighborCandidates(processedVertex, [&](const VertexHashMap::vertex_data_t& candidate) { if (compareVertexPosition(processedVertex.position, candidate.position, epsilon) && vxcmp(processedVertex, candidate, polygon)) From 509b359e2d0eb92a9ac4879cde9aa2a84028149e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 27 Oct 2025 15:23:08 +0700 Subject: [PATCH 15/52] Make radix sort more efficient --- include/nbl/core/algorithm/radix_sort.h | 53 +++++++++---------------- 1 file changed, 18 insertions(+), 35 deletions(-) diff --git a/include/nbl/core/algorithm/radix_sort.h b/include/nbl/core/algorithm/radix_sort.h index d7092590f7..67ef0d3c31 100644 --- a/include/nbl/core/algorithm/radix_sort.h +++ b/include/nbl/core/algorithm/radix_sort.h @@ -67,12 +67,12 @@ struct RadixLsbSorter return pass(input,output,rangeSize,comp); } - std::pair getHashBound(size_t key) const + std::pair getMostSignificantRadixBound(size_t key) const { constexpr histogram_t shift = static_cast(radix_bits * last_pass); const auto histogramIx = (key >> shift) & radix_mask; - const auto boundBegin = histogramIx == 0 ? 0 : histogram[histogramIx - 1]; - return { boundBegin, histogram[histogramIx] }; + const auto boundBegin = histogramIx == 0 ? 0 : m_histogram[histogramIx - 1]; + return { boundBegin, m_histogram[histogramIx] }; } private: @@ -80,47 +80,30 @@ struct RadixLsbSorter inline RandomIt pass(RandomIt input, RandomIt output, const histogram_t rangeSize, const KeyAccessor& comp) { // clear - std::fill_n(histogram,histogram_size,static_cast(0u)); + std::fill_n(m_histogram,histogram_size,static_cast(0u)); + // count constexpr histogram_t shift = static_cast(radix_bits*pass_ix); for (histogram_t i = 0u; i < rangeSize; i++) - ++histogram[comp.template operator()(input[i])]; + ++m_histogram[comp.template operator()(input[i])]; + // prefix sum - std::inclusive_scan(histogram, histogram + histogram_size, histogram); - // scatter + std::exclusive_scan(m_histogram, m_histogram + histogram_size, m_histogram, 0); - if constexpr (pass_ix != last_pass) + // scatter. After scatter m_histogram now become a skiplist + for (histogram_t i = 0; i < rangeSize; i++) { - - for (histogram_t i = rangeSize; i != 0u;) - { - i--; - const auto& val = input[i]; - const auto& histogramIx = comp.template operator()(val); - output[--histogram[histogramIx]] = val; - } - - return pass(output,input,rangeSize,comp); - } - else - { - // need to preserve histogram value for the skip list, so we copy to temporary histogramArray and use that - std::array tmpHistogram; - std::copy(histogram, histogram + histogram_size, tmpHistogram.data()); - - for (histogram_t i = rangeSize; i != 0u;) - { - i--; - const auto& val = input[i]; - const auto& histogramIx = comp.template operator()(val); - output[--tmpHistogram[histogramIx]] = val; - } - - return output; + const auto& val = input[i]; + const auto& histogramIx = comp.template operator()(val); + output[m_histogram[histogramIx]++] = val; } + + if constexpr (pass_ix != last_pass) + return pass(output,input,rangeSize,comp); + return output; } - alignas(sizeof(histogram_t)) histogram_t histogram[histogram_size]; + alignas(sizeof(histogram_t)) histogram_t m_histogram[histogram_size]; }; template From a30ef6ffe9b49e1242d2ffa0a1f574110efbb7bb Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 27 Oct 2025 18:58:32 +0700 Subject: [PATCH 16/52] Change type of index in SSNGVertexData --- .../nbl/asset/utils/CPolygonGeometryManipulator.h | 2 +- include/nbl/asset/utils/CVertexWelder.h | 12 ++++++------ src/nbl/asset/utils/CSmoothNormalGenerator.cpp | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 6f87c3549b..a56da56da5 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -21,7 +21,7 @@ class NBL_API2 CPolygonGeometryManipulator struct SSNGVertexData { - uint64_t index; //offset of the vertex into index buffer + uint32_t index; //offset of the vertex into index buffer // TODO: check whether separating hash and position into its own vector or even rehash the position everytime we need will result in VertexHashGrid become faster. uint32_t hash; hlsl::float32_t3 weightedNormal; diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 0fa728037c..d8ff8e2bda 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -18,21 +18,21 @@ class CVertexWelder { auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); outPolygon->setIndexing(IPolygonGeometryBase::TriangleList()); - core::vector vertexIndexToAsIndex(as.getVertexCount()); + core::vector vertexIndexToAsIndex(as.getVertexCount()); - for (uint64_t vertexData_i = 0u; vertexData_i < as.getVertexCount(); vertexData_i++) + for (uint32_t vertexData_i = 0u; vertexData_i < as.getVertexCount(); vertexData_i++) { const auto& vertexData = as.vertices()[vertexData_i]; vertexIndexToAsIndex[vertexData.index] = vertexData.index; } - static constexpr auto INVALID_INDEX = std::numeric_limits::max(); - core::vector remappedVertexIndexes(as.getVertexCount()); + static constexpr auto INVALID_INDEX = std::numeric_limits::max(); + core::vector remappedVertexIndexes(as.getVertexCount()); std::fill(remappedVertexIndexes.begin(), remappedVertexIndexes.end(), INVALID_INDEX); - uint64_t maxRemappedIndex = 0; + uint32_t maxRemappedIndex = 0; // iterate by index, so that we always use the smallest index when multiple vertexes can be welded together - for (uint64_t index = 0; index < as.getVertexCount(); index++) + for (uint32_t index = 0; index < as.getVertexCount(); index++) { const auto asIndex = vertexIndexToAsIndex[index]; const auto& vertexData = as.vertices()[asIndex]; diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 4b8ae10226..9f4ceb5020 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -44,7 +44,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as const auto cellCount = std::max(core::roundUpToPoT((idxCount + 31) >> 5), 4); VertexHashMap vertices(idxCount, std::min(16u * 1024u, cellCount), epsilon == 0.0f ? 0.00001f : epsilon * 2.f); - for (uint64_t i = 0; i < idxCount; i += 3) + for (uint32_t i = 0; i < idxCount; i += 3) { //calculate face normal of parent triangle hlsl::float32_t3 v1, v2, v3; From c580d729217569d66e98da8d360962553d9963c3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 27 Oct 2025 18:58:47 +0700 Subject: [PATCH 17/52] Move CanJoinVertices to CVertexWelder --- include/nbl/asset/utils/CVertexWelder.h | 105 +++++++++++++++++- .../utils/CPolygonGeometryManipulator.cpp | 99 +---------------- 2 files changed, 105 insertions(+), 99 deletions(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index d8ff8e2bda..192b51cf14 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -11,7 +11,110 @@ namespace nbl::asset { class CVertexWelder { public: - using WeldPredicateFn = std::function; + using WeldPredicateFn = std::function; + + class DefaultWeldPredicate + { + private: + static bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) + { + if (!view) return true; + const auto channelCount = getFormatChannelCount(view.composed.format); + switch (view.composed.rangeFormat) + { + case IGeometryBase::EAABBFormat::U64: + case IGeometryBase::EAABBFormat::U32: + { + hlsl::uint64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + for (auto channel_i = 0u; channel_i < channelCount; channel_i++) + if (val1[channel_i] != val2[channel_i]) return false; + break; + } + case IGeometryBase::EAABBFormat::S64: + case IGeometryBase::EAABBFormat::S32: + { + hlsl::int64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + for (auto channel_i = 0u; channel_i < channelCount; channel_i++) + if (val1[channel_i] != val2[channel_i]) return false; + break; + } + default: + { + hlsl::float64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + for (auto channel_i = 0u; channel_i < channelCount; channel_i++) + { + const auto diff = abs(val1[channel_i] - val2[channel_i]); + if (diff > epsilon) return false; + } + break; + } + } + return true; + } + + static bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) + { + if (!view) return true; + const auto channelCount = getFormatChannelCount(view.composed.format); + switch (view.composed.rangeFormat) + { + case IGeometryBase::EAABBFormat::U64: + case IGeometryBase::EAABBFormat::U32: + { + hlsl::uint64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + return (1.0 - hlsl::dot(val1, val2)) < epsilon; + } + case IGeometryBase::EAABBFormat::S64: + case IGeometryBase::EAABBFormat::S32: + { + hlsl::int64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + return (1.0 - hlsl::dot(val1, val2)) < epsilon; + } + default: + { + hlsl::float64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + return (1.0 - hlsl::dot(val1, val2)) < epsilon; + } + } + return true; + } + + float m_epsilon; + + public: + + DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} + + bool operator()(const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2) + { + if (!isAttributeValEqual(polygon->getPositionView(), index1, index2, m_epsilon)) + return false; + if (!isAttributeDirEqual(polygon->getNormalView(), index1, index2, m_epsilon)) + return false; + for (const auto& jointWeightView : polygon->getJointWeightViews()) + { + if (!isAttributeValEqual(jointWeightView.indices, index1, index2, m_epsilon)) return false; + if (!isAttributeValEqual(jointWeightView.weights, index1, index2, m_epsilon)) return false; + } + for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) + if (!isAttributeValEqual(auxAttributeView, index1, index2, m_epsilon)) return false; + + return true; + } + + }; template static core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, WeldPredicateFn shouldWeldFn) { diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index f3b043246d..7e70ce55a9 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -136,86 +136,6 @@ core::smart_refctd_ptr CPolygonGeometryManipulator::createU return outGeometry; } - -namespace -{ - bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, uint64_t index1, uint64_t index2, float epsilon) - { - if (!view) return true; - const auto channelCount = getFormatChannelCount(view.composed.format); - switch (view.composed.rangeFormat) - { - case IGeometryBase::EAABBFormat::U64: - case IGeometryBase::EAABBFormat::U32: - { - hlsl::uint64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - if (val1[channel_i] != val2[channel_i]) return false; - break; - } - case IGeometryBase::EAABBFormat::S64: - case IGeometryBase::EAABBFormat::S32: - { - hlsl::int64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - if (val1[channel_i] != val2[channel_i]) return false; - break; - } - default: - { - hlsl::float64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - { - const auto diff = abs(val1[channel_i] - val2[channel_i]); - if (diff > epsilon) return false; - } - break; - } - } - return true; - } - - bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, uint64_t index1, uint64_t index2, float epsilon) -{ - if (!view) return true; - const auto channelCount = getFormatChannelCount(view.composed.format); - switch (view.composed.rangeFormat) - { - case IGeometryBase::EAABBFormat::U64: - case IGeometryBase::EAABBFormat::U32: - { - hlsl::uint64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - return (1.0 - hlsl::dot(val1, val2)) < epsilon; - } - case IGeometryBase::EAABBFormat::S64: - case IGeometryBase::EAABBFormat::S32: - { - hlsl::int64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - return (1.0 - hlsl::dot(val1, val2)) < epsilon; - } - default: - { - hlsl::float64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - return (1.0 - hlsl::dot(val1, val2)) < epsilon; - } - } - return true; -} -} - - core::smart_refctd_ptr CPolygonGeometryManipulator::createSmoothVertexNormal(const ICPUPolygonGeometry* inPolygon, bool enableWelding, float epsilon, VxCmpFunction vxcmp) { if (!inPolygon) @@ -231,27 +151,10 @@ core::smart_refctd_ptr CPolygonGeometryManipulator::createS return nullptr; } - auto canJoinVertices = [epsilon](const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2)-> bool -{ - if (!isAttributeValEqual(polygon->getPositionView(), index1, index2, epsilon)) - return false; - if (!isAttributeDirEqual(polygon->getNormalView(), index1, index2, epsilon)) - return false; - for (const auto& jointWeightView : polygon->getJointWeightViews()) - { - if (!isAttributeValEqual(jointWeightView.indices, index1, index2, epsilon)) return false; - if (!isAttributeValEqual(jointWeightView.weights, index1, index2, epsilon)) return false; - } - for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) - if (!isAttributeValEqual(auxAttributeView, index1, index2, epsilon)) return false; - - return true; - }; - auto result = CSmoothNormalGenerator::calculateNormals(inPolygon, epsilon, vxcmp); if (enableWelding) { - return CVertexWelder::weldVertices(result.geom.get(), result.vertexHashGrid, canJoinVertices); + return CVertexWelder::weldVertices(result.geom.get(), result.vertexHashGrid, CVertexWelder::DefaultWeldPredicate(epsilon)); } return result.geom; } From 5af4a9b3778758e9012ca6645b55a2175bac1714 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 28 Oct 2025 11:24:02 +0700 Subject: [PATCH 18/52] Move SSNGVertexData and VxCmpFunction to CSmoothNormalGenerator --- .../asset/utils/CPolygonGeometryManipulator.h | 31 ++-------------- .../utils/CPolygonGeometryManipulator.cpp | 2 +- .../asset/utils/CSmoothNormalGenerator.cpp | 8 ++--- src/nbl/asset/utils/CSmoothNormalGenerator.h | 35 ++++++++++++++++--- 4 files changed, 39 insertions(+), 37 deletions(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index a56da56da5..25802ced46 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -9,6 +9,7 @@ #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/utils/CGeometryManipulator.h" +#include "nbl/asset/utils/CSmoothNormalGenerator.h" #include "nbl/asset/utils/CVertexHashGrid.h" namespace nbl::asset @@ -19,33 +20,6 @@ class NBL_API2 CPolygonGeometryManipulator { public: - struct SSNGVertexData - { - uint32_t index; //offset of the vertex into index buffer - // TODO: check whether separating hash and position into its own vector or even rehash the position everytime we need will result in VertexHashGrid become faster. - uint32_t hash; - hlsl::float32_t3 weightedNormal; - hlsl::float32_t3 position; //position of the vertex in 3D space - - hlsl::float32_t3 getPosition() const - { - return position; - } - - void setHash(uint32_t hash) - { - this->hash = hash; - } - - uint32_t getHash() const - { - return hash; - }; - - }; - - using VxCmpFunction = std::function; - static inline void recomputeContentHashes(ICPUPolygonGeometry* geo) { if (!geo) @@ -260,8 +234,9 @@ class NBL_API2 CPolygonGeometryManipulator static core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo); + using SSNGVxCmpFunction = CSmoothNormalGenerator::VxCmpFunction; static core::smart_refctd_ptr createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, bool enableWelding = false, float epsilon = 1.525e-5f, - VxCmpFunction vxcmp = [](const CPolygonGeometryManipulator::SSNGVertexData& v0, const CPolygonGeometryManipulator::SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) + SSNGVxCmpFunction vxcmp = [](const CSmoothNormalGenerator::SSNGVertexData& v0, const CSmoothNormalGenerator::SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) { static constexpr float cosOf45Deg = 0.70710678118f; return dot(normalize(v0.weightedNormal),normalize(v1.weightedNormal)) > cosOf45Deg; diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 7e70ce55a9..760e563312 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -136,7 +136,7 @@ core::smart_refctd_ptr CPolygonGeometryManipulator::createU return outGeometry; } -core::smart_refctd_ptr CPolygonGeometryManipulator::createSmoothVertexNormal(const ICPUPolygonGeometry* inPolygon, bool enableWelding, float epsilon, VxCmpFunction vxcmp) +core::smart_refctd_ptr CPolygonGeometryManipulator::createSmoothVertexNormal(const ICPUPolygonGeometry* inPolygon, bool enableWelding, float epsilon, SSNGVxCmpFunction vxcmp) { if (!inPolygon) { diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 9f4ceb5020..5c35a8ef68 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -13,12 +13,12 @@ namespace nbl { namespace asset { -static bool operator<(uint32_t lhs, const CPolygonGeometryManipulator::SSNGVertexData& rhs) +static bool operator<(uint32_t lhs, const CSmoothNormalGenerator::SSNGVertexData& rhs) { return lhs < rhs.hash; } -static bool operator<(const CPolygonGeometryManipulator::SSNGVertexData& lhs, uint32_t rhs) +static bool operator<(const CSmoothNormalGenerator::SSNGVertexData& lhs, uint32_t rhs) { return lhs.hash < rhs; } @@ -29,7 +29,7 @@ static bool compareVertexPosition(const hlsl::float32_t3& a, const hlsl::float32 return (difference.x <= epsilon && difference.y <= epsilon && difference.z <= epsilon); } -CSmoothNormalGenerator::Result CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, float epsilon, CPolygonGeometryManipulator::VxCmpFunction vxcmp) +CSmoothNormalGenerator::Result CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction vxcmp) { VertexHashMap vertexHashMap = setupData(polygon, epsilon); const auto smoothPolygon = processConnectedVertices(polygon, vertexHashMap, epsilon,vxcmp); @@ -67,7 +67,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as return vertices; } -core::smart_refctd_ptr CSmoothNormalGenerator::processConnectedVertices(const asset::ICPUPolygonGeometry* polygon, VertexHashMap& vertexHashMap, float epsilon, CPolygonGeometryManipulator::VxCmpFunction vxcmp) +core::smart_refctd_ptr CSmoothNormalGenerator::processConnectedVertices(const asset::ICPUPolygonGeometry* polygon, VertexHashMap& vertexHashMap, float epsilon, VxCmpFunction vxcmp) { auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); static constexpr auto NormalFormat = EF_R32G32B32_SFLOAT; diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index 90c72e45ee..9d112061d3 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -4,8 +4,8 @@ #ifndef _NBL_ASSET_C_SMOOTH_NORMAL_GENERATOR_H_INCLUDED_ #define _NBL_ASSET_C_SMOOTH_NORMAL_GENERATOR_H_INCLUDED_ +#include "nbl/asset/utils/CVertexHashGrid.h" -#include "nbl/asset/utils/CPolygonGeometryManipulator.h" namespace nbl::asset { @@ -16,19 +16,46 @@ class CSmoothNormalGenerator CSmoothNormalGenerator() = delete; ~CSmoothNormalGenerator() = delete; - using VertexHashMap = CVertexHashGrid; + struct SSNGVertexData + { + uint32_t index; //offset of the vertex into index buffer + // TODO: check whether separating hash and position into its own vector or even rehash the position everytime we need will result in VertexHashGrid become faster. + uint32_t hash; + hlsl::float32_t3 weightedNormal; + hlsl::float32_t3 position; //position of the vertex in 3D space + + hlsl::float32_t3 getPosition() const + { + return position; + } + + void setHash(uint32_t hash) + { + this->hash = hash; + } + + uint32_t getHash() const + { + return hash; + }; + + }; + + using VxCmpFunction = std::function; + + using VertexHashMap = CVertexHashGrid; struct Result { VertexHashMap vertexHashGrid; core::smart_refctd_ptr geom; }; - static Result calculateNormals(const ICPUPolygonGeometry* polygon, float epsilon, CPolygonGeometryManipulator::VxCmpFunction function); + static Result calculateNormals(const ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction function); private: static VertexHashMap setupData(const ICPUPolygonGeometry* polygon, float epsilon); - static core::smart_refctd_ptr processConnectedVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon, CPolygonGeometryManipulator::VxCmpFunction vxcmp); + static core::smart_refctd_ptr processConnectedVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon, VxCmpFunction vxcmp); }; } From 39e288cd9848e672cec4e7337a1ee84b53887316 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 28 Oct 2025 11:24:08 +0700 Subject: [PATCH 19/52] Add comment --- include/nbl/asset/utils/CVertexWelder.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 192b51cf14..02dbfbc660 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -44,6 +44,7 @@ class CVertexWelder { } default: { + // TODO: Handle 16,32,64 bit float vectors once the pixel encode/decode functions get reimplemented in HLSL and decodeElement can actually benefit from that. hlsl::float64_t4 val1, val2; view.decodeElement(index1, val1); view.decodeElement(index2, val2); From c49d8d11508568eb6d8e5a9f25fedff1df442105 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 2 Nov 2025 07:35:43 +0700 Subject: [PATCH 20/52] Add overload for CVertexHashGrid::forEachBroadphaseNeighborCandidates that accept position as argument --- include/nbl/asset/utils/CVertexHashGrid.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index 8b8711d183..63a3a930a8 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -83,7 +83,25 @@ class CVertexHashGrid if (!std::invoke(std::forward(fn), neighborVertex)) break; } } - + }; + + template Fn> + inline void forEachBroadphaseNeighborCandidates(const hlsl::float32_t3& position, Fn&& fn) const + { + std::array neighboringCells; + const auto cellCount = getNeighboringCellHashes(neighboringCells.data(), position); + + //iterate among all neighboring cells + for (uint8_t i = 0; i < cellCount; i++) + { + const auto& neighborCell = neighboringCells[i]; + BucketBounds bounds = getBucketBoundsByHash(neighborCell); + for (; bounds.begin != bounds.end; bounds.begin++) + { + const vertex_data_t& neighborVertex = *bounds.begin; + if (!std::invoke(std::forward(fn), neighborVertex)) break; + } + } }; private: From c688f7b3a0265248f54d81bbb49867c5afda4d95 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 2 Nov 2025 07:35:56 +0700 Subject: [PATCH 21/52] Add inline specifier to a bunch of method --- include/nbl/asset/utils/CVertexHashGrid.h | 84 +++++++++++------------ 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index 63a3a930a8..0aceeab80a 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -33,7 +33,7 @@ class CVertexHashGrid collection_t::const_iterator end; }; - CVertexHashGrid(size_t cellSize, uint32_t hashTableMaxSizeLog2, float vertexCountReserve) : + inline CVertexHashGrid(float cellSize, uint32_t hashTableMaxSizeLog2, size_t vertexCountReserve = 8192) : m_cellSize(cellSize), m_hashTableMaxSize(1llu << hashTableMaxSizeLog2), m_sorter(createSorter(vertexCountReserve)) @@ -42,13 +42,13 @@ class CVertexHashGrid } //inserts vertex into hash table - void add(VertexData&& vertex) + inline void add(VertexData&& vertex) { vertex.setHash(hash(vertex)); m_vertices.push_back(std::move(vertex)); } - void bake() + inline void bake() { auto scratchBuffer = collection_t(m_vertices.size()); @@ -61,15 +61,15 @@ class CVertexHashGrid m_vertices = std::move(scratchBuffer); } - const collection_t& vertices() const { return m_vertices; } + inline const collection_t& vertices() const { return m_vertices; } inline uint32_t getVertexCount() const { return m_vertices.size(); } template Fn> - void forEachBroadphaseNeighborCandidates(const VertexData& vertex, Fn&& fn) const + inline void forEachBroadphaseNeighborCandidates(const VertexData& vertex, Fn&& fn) const { std::array neighboringCells; - const auto cellCount = getNeighboringCellHashes(neighboringCells.data(), vertex); + const auto cellCount = getNeighboringCellHashes(neighboringCells.data(), vertex.getPosition()); //iterate among all neighboring cells for (uint8_t i = 0; i < cellCount; i++) @@ -107,7 +107,7 @@ class CVertexHashGrid private: struct KeyAccessor { - constexpr static size_t key_bit_count = 32ull; + constexpr static inline size_t key_bit_count = 32ull; template inline decltype(radix_mask) operator()(const VertexData& item) const @@ -126,7 +126,7 @@ class CVertexHashGrid core::RadixLsbSorter>; sorter_t m_sorter; - static sorter_t createSorter(size_t vertexCount) + inline static sorter_t createSorter(size_t vertexCount) { if (vertexCount < (0x1ull << 16ull)) return core::RadixLsbSorter(); @@ -139,52 +139,52 @@ class CVertexHashGrid const uint32_t m_hashTableMaxSize; const float m_cellSize; - uint32_t hash(const VertexData& vertex) const + inline uint32_t hash(const VertexData& vertex) const { const hlsl::float32_t3 position = floor(vertex.getPosition() / m_cellSize); const auto position_uint32 = hlsl::uint32_t3(position.x, position.y, position.z); return hash(position_uint32); } - uint32_t hash(const hlsl::uint32_t3& position) const + inline uint32_t hash(const hlsl::uint32_t3& position) const { return ((position.x * primeNumber1) ^ (position.y * primeNumber2) ^ (position.z * primeNumber3))& (m_hashTableMaxSize - 1); } - uint8_t getNeighboringCellHashes(uint32_t* outNeighbors, const VertexData& vertex) const - { - // both 0.x and -0.x would be converted to 0 if we directly casting the position to unsigned integer. Causing the 0 to be crowded then the rest of the cells. So we use floor here to spread the vertex more uniformly. - hlsl::float32_t3 cellfloatcoord = floor(vertex.getPosition() / m_cellSize - hlsl::float32_t3(0.5)); - hlsl::uint32_t3 baseCoord = hlsl::uint32_t3(static_cast(cellfloatcoord.x), static_cast(cellfloatcoord.y), static_cast(cellfloatcoord.z)); - - uint8_t neighborCount = 0; - - outNeighbors[neighborCount] = hash(baseCoord); - neighborCount++; - - auto addUniqueNeighbor = [&neighborCount, outNeighbors](uint32_t hashval) - { - if (std::find(outNeighbors, outNeighbors + neighborCount, hashval) == outNeighbors + neighborCount) - { - outNeighbors[neighborCount] = hashval; - neighborCount++; - } - }; - - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 0, 1))); - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 0))); - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 0))); - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 0))); - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 1))); - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 1))); - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 1))); - - return neighborCount; - } - - BucketBounds getBucketBoundsByHash(uint32_t hash) const + inline uint8_t getNeighboringCellHashes(uint32_t* outNeighbors, hlsl::float32_t3 position) const + { + // both 0.x and -0.x would be converted to 0 if we directly casting the position to unsigned integer. Causing the 0 to be crowded then the rest of the cells. So we use floor here to spread the vertex more uniformly. + hlsl::float32_t3 cellfloatcoord = floor(position / m_cellSize - hlsl::float32_t3(0.5)); + hlsl::uint32_t3 baseCoord = hlsl::uint32_t3(static_cast(cellfloatcoord.x), static_cast(cellfloatcoord.y), static_cast(cellfloatcoord.z)); + + uint8_t neighborCount = 0; + + outNeighbors[neighborCount] = hash(baseCoord); + neighborCount++; + + auto addUniqueNeighbor = [&neighborCount, outNeighbors](uint32_t hashval) + { + if (std::find(outNeighbors, outNeighbors + neighborCount, hashval) == outNeighbors + neighborCount) + { + outNeighbors[neighborCount] = hashval; + neighborCount++; + } + }; + + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 0, 1))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 0))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 0))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 0))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 1))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 1))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 1))); + + return neighborCount; + } + + inline BucketBounds getBucketBoundsByHash(uint32_t hash) const { const auto skipListBound = std::visit([&](auto& sorter) { From c521f955538100f8f0dab051daa2400450f7751e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 2 Nov 2025 07:36:59 +0700 Subject: [PATCH 22/52] use 0 base indexing for edge and vertex in compInternalAngle --- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 17 ++++++++-------- .../asset/utils/CSmoothNormalGenerator.cpp | 20 +++++++++---------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl index 4286e0a411..45f678325c 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -18,23 +18,24 @@ namespace shapes namespace util { + // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. template - vector compInternalAngle(NBL_CONST_REF_ARG(vector) e1, NBL_CONST_REF_ARG(vector) e2, NBL_CONST_REF_ARG(vector) e3) + vector compInternalAngle(NBL_CONST_REF_ARG(vector) e0, NBL_CONST_REF_ARG(vector) e1, NBL_CONST_REF_ARG(vector) e2) { // Calculate this triangle's weight for each of its three m_vertices // start by calculating the lengths of its sides - const float_t a = hlsl::dot(e1, e1); + const float_t a = hlsl::dot(e0, e0); const float_t asqrt = hlsl::sqrt(a); - const float_t b = hlsl::dot(e2, e2); + const float_t b = hlsl::dot(e1, e1); const float_t bsqrt = hlsl::sqrt(b); - const float_t c = hlsl::dot(e3, e3); + const float_t c = hlsl::dot(e2, e2); const float_t csqrt = hlsl::sqrt(c); - const float_t angle1 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); - const float_t angle2 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); - const float_t angle3 = hlsl::numbers::pi - (angle1 + angle2); + const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); + const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); + const float_t angle2 = hlsl::numbers::pi - (angle1 + angle2); // use them to find the angle at each vertex - return vector(angle1, angle2, angle3); + return vector(angle0, angle1, angle2); } } diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 5c35a8ef68..ec9f7357f3 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -42,24 +42,24 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as const size_t idxCount = polygon->getPrimitiveCount() * 3; const auto cellCount = std::max(core::roundUpToPoT((idxCount + 31) >> 5), 4); - VertexHashMap vertices(idxCount, std::min(16u * 1024u, cellCount), epsilon == 0.0f ? 0.00001f : epsilon * 2.f); + VertexHashMap vertices(idxCount, std::min(16u * 1024u, cellCount), epsilon); for (uint32_t i = 0; i < idxCount; i += 3) { //calculate face normal of parent triangle - hlsl::float32_t3 v1, v2, v3; - polygon->getPositionView().decodeElement(i, v1); - polygon->getPositionView().decodeElement(i + 1, v2); - polygon->getPositionView().decodeElement(i + 2, v3); + hlsl::float32_t3 v0, v1, v2; + polygon->getPositionView().decodeElement(i, v0); + polygon->getPositionView().decodeElement(i + 1, v1); + polygon->getPositionView().decodeElement(i + 2, v2); - const auto faceNormal = normalize(cross(v2 - v1, v3 - v1)); + const auto faceNormal = normalize(cross(v1 - v0, v2 - v0)); //set data for m_vertices - const auto angleWages = hlsl::shapes::util::compInternalAngle(v2 - v3, v1 - v3, v1 - v2); + const auto angleWages = hlsl::shapes::util::compInternalAngle(v2 - v1, v0 - v2, v1 - v2); - vertices.add({ i, 0, faceNormal * angleWages.x, v1}); - vertices.add({ i + 1, 0, faceNormal * angleWages.y,v2}); - vertices.add({ i + 2, 0, faceNormal * angleWages.z, v3}); + vertices.add({ i, 0, faceNormal * angleWages.x, v0}); + vertices.add({ i + 1, 0, faceNormal * angleWages.y,v1}); + vertices.add({ i + 2, 0, faceNormal * angleWages.z, v2}); } vertices.bake(); From a7aae4d546f7b2ab3a5d03260c9522f558807c61 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 2 Nov 2025 07:37:25 +0700 Subject: [PATCH 23/52] Iterate backward when gatherin histogram frequency for better cache line utilization --- include/nbl/core/algorithm/radix_sort.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/nbl/core/algorithm/radix_sort.h b/include/nbl/core/algorithm/radix_sort.h index 67ef0d3c31..5b88c92936 100644 --- a/include/nbl/core/algorithm/radix_sort.h +++ b/include/nbl/core/algorithm/radix_sort.h @@ -91,8 +91,9 @@ struct RadixLsbSorter std::exclusive_scan(m_histogram, m_histogram + histogram_size, m_histogram, 0); // scatter. After scatter m_histogram now become a skiplist - for (histogram_t i = 0; i < rangeSize; i++) + for (histogram_t i = rangeSize; i != 0;) { + i--; const auto& val = input[i]; const auto& histogramIx = comp.template operator()(val); output[m_histogram[histogramIx]++] = val; From 4a5c49062e55ffb08eaee49123105c7d89d40ea5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 2 Nov 2025 07:38:17 +0700 Subject: [PATCH 24/52] Refactor CVertexWelder to use abstract class for WeldPredicate instead of std::function --- include/nbl/asset/utils/CVertexWelder.h | 380 ++++++++++++------------ 1 file changed, 198 insertions(+), 182 deletions(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 02dbfbc660..a537bbebb0 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -9,227 +9,243 @@ namespace nbl::asset { class CVertexWelder { - -public: - using WeldPredicateFn = std::function; - - class DefaultWeldPredicate - { - private: - static bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) - { - if (!view) return true; - const auto channelCount = getFormatChannelCount(view.composed.format); - switch (view.composed.rangeFormat) + + public: + + class WeldPredicate + { + public: + virtual bool init(const ICPUPolygonGeometry* geom) = 0; + virtual bool operator()(const ICPUPolygonGeometry* geom, uint32_t idx1, uint32_t idx2) const = 0; + virtual ~WeldPredicate() {}; + }; + + class DefaultWeldPredicate : public WeldPredicate + { + private: + + static bool isIntegralElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2) { - case IGeometryBase::EAABBFormat::U64: - case IGeometryBase::EAABBFormat::U32: - { - hlsl::uint64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - if (val1[channel_i] != val2[channel_i]) return false; - break; - } - case IGeometryBase::EAABBFormat::S64: - case IGeometryBase::EAABBFormat::S32: + const auto byteSize = getTexelOrBlockBytesize(view.composed.format); + const auto* basePtr = reinterpret_cast(view.getPointer()); + const auto stride = view.composed.stride; + return (memcmp(basePtr + (index1 * stride), basePtr + (index2 * stride), byteSize) == 0); + } + + static bool isRealElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t channelCount, float epsilon) + { + // TODO: Handle 16,32,64 bit float vectors once the pixel encode/decode functions get reimplemented in HLSL and decodeElement can actually benefit from that. + hlsl::float64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + for (auto channel_i = 0u; channel_i < channelCount; channel_i++) { - hlsl::int64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - if (val1[channel_i] != val2[channel_i]) return false; - break; + const auto diff = abs(val1[channel_i] - val2[channel_i]); + if (diff > epsilon) return false; } - default: + return true; + } + + static bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) + { + if (!view) return true; + const auto channelCount = getFormatChannelCount(view.composed.format); + // TODO: use memcmp to compare for integral equality + const auto byteSize = getTexelOrBlockBytesize(view.composed.format); + switch (view.composed.rangeFormat) { - // TODO: Handle 16,32,64 bit float vectors once the pixel encode/decode functions get reimplemented in HLSL and decodeElement can actually benefit from that. - hlsl::float64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) + case IGeometryBase::EAABBFormat::U64: + case IGeometryBase::EAABBFormat::U32: + case IGeometryBase::EAABBFormat::S64: + case IGeometryBase::EAABBFormat::S32: { - const auto diff = abs(val1[channel_i] - val2[channel_i]); - if (diff > epsilon) return false; + return isIntegralElementEqual(view, index1, index2); + } + default: + { + return isRealElementEqual(view, index1, index2, channelCount, epsilon); } - break; } + return true; } - return true; - } - static bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) - { - if (!view) return true; - const auto channelCount = getFormatChannelCount(view.composed.format); - switch (view.composed.rangeFormat) + static bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) { - case IGeometryBase::EAABBFormat::U64: - case IGeometryBase::EAABBFormat::U32: + if (!view) return true; + const auto channelCount = getFormatChannelCount(view.composed.format); + switch (view.composed.rangeFormat) { - hlsl::uint64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - return (1.0 - hlsl::dot(val1, val2)) < epsilon; - } - case IGeometryBase::EAABBFormat::S64: - case IGeometryBase::EAABBFormat::S32: - { - hlsl::int64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - return (1.0 - hlsl::dot(val1, val2)) < epsilon; - } - default: - { - hlsl::float64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - return (1.0 - hlsl::dot(val1, val2)) < epsilon; + case IGeometryBase::EAABBFormat::U64: + case IGeometryBase::EAABBFormat::U32: + case IGeometryBase::EAABBFormat::S64: + case IGeometryBase::EAABBFormat::S32: + { + return isIntegralElementEqual(view, index1, index2); + } + default: + { + if (channelCount != 3) + return isRealElementEqual(view, index1, index2, channelCount, epsilon); + + hlsl::float64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + return (1.0 - hlsl::dot(val1, val2)) < epsilon; + } } } - return true; - } - float m_epsilon; + float m_epsilon; - public: + public: - DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} + DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} - bool operator()(const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2) - { - if (!isAttributeValEqual(polygon->getPositionView(), index1, index2, m_epsilon)) - return false; - if (!isAttributeDirEqual(polygon->getNormalView(), index1, index2, m_epsilon)) - return false; - for (const auto& jointWeightView : polygon->getJointWeightViews()) + bool init(const ICPUPolygonGeometry* polygon) override { - if (!isAttributeValEqual(jointWeightView.indices, index1, index2, m_epsilon)) return false; - if (!isAttributeValEqual(jointWeightView.weights, index1, index2, m_epsilon)) return false; + return polygon->valid(); } - for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) - if (!isAttributeValEqual(auxAttributeView, index1, index2, m_epsilon)) return false; - return true; - } - - }; + bool operator()(const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2) const override + { + if (!isAttributeValEqual(polygon->getPositionView(), index1, index2, m_epsilon)) + return false; + if (!isAttributeDirEqual(polygon->getNormalView(), index1, index2, m_epsilon)) + return false; + for (const auto& jointWeightView : polygon->getJointWeightViews()) + { + if (!isAttributeValEqual(jointWeightView.indices, index1, index2, m_epsilon)) return false; + if (!isAttributeValEqual(jointWeightView.weights, index1, index2, m_epsilon)) return false; + } + for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) + if (!isAttributeValEqual(auxAttributeView, index1, index2, m_epsilon)) return false; - template - static core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, WeldPredicateFn shouldWeldFn) { - auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); - outPolygon->setIndexing(IPolygonGeometryBase::TriangleList()); + return true; + } - core::vector vertexIndexToAsIndex(as.getVertexCount()); + ~DefaultWeldPredicate() override {} + + }; - for (uint32_t vertexData_i = 0u; vertexData_i < as.getVertexCount(); vertexData_i++) - { - const auto& vertexData = as.vertices()[vertexData_i]; - vertexIndexToAsIndex[vertexData.index] = vertexData.index; - } + template + static core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, const WeldPredicate& shouldWeldFn) { + auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); - static constexpr auto INVALID_INDEX = std::numeric_limits::max(); - core::vector remappedVertexIndexes(as.getVertexCount()); - std::fill(remappedVertexIndexes.begin(), remappedVertexIndexes.end(), INVALID_INDEX); + const auto& positionView = polygon->getPositionView(); + const auto vertexCount = positionView.getElementCount(); - uint32_t maxRemappedIndex = 0; - // iterate by index, so that we always use the smallest index when multiple vertexes can be welded together - for (uint32_t index = 0; index < as.getVertexCount(); index++) - { - const auto asIndex = vertexIndexToAsIndex[index]; - const auto& vertexData = as.vertices()[asIndex]; - auto& remappedVertexIndex = remappedVertexIndexes[index]; - as.forEachBroadphaseNeighborCandidates(vertexData, [&, polygon, index](const typename AccelStructureT::vertex_data_t& neighbor) { - const auto neighborRemappedIndex = remappedVertexIndexes[neighbor.index]; - if (shouldWeldFn(polygon, index, neighbor.index) && neighborRemappedIndex != INVALID_INDEX) { - remappedVertexIndex = neighborRemappedIndex; - return false; - } - return true; - }); - if (remappedVertexIndex != INVALID_INDEX) { - remappedVertexIndex = vertexData.index; - maxRemappedIndex = vertexData.index; - } - } + static constexpr auto INVALID_INDEX = std::numeric_limits::max(); + core::vector remappedVertexIndexes(vertexCount); + std::fill(remappedVertexIndexes.begin(), remappedVertexIndexes.end(), INVALID_INDEX); - const auto& indexView = outPolygon->getIndexView(); - if (indexView) - { - auto remappedIndexView = [&] + uint32_t maxRemappedIndex = 0; + // iterate by index, so that we always use the smallest index when multiple vertexes can be welded together + for (uint32_t index = 0; index < vertexCount; index++) { - const auto bytesize = indexView.src.size; - auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); - - auto retval = indexView; - retval.src.buffer = std::move(indices); - if (retval.composed.rangeFormat == IGeometryBase::EAABBFormat::U16) - retval.composed.encodedDataRange.u16.maxVx[0] = maxRemappedIndex; - else if (retval.composed.rangeFormat == IGeometryBase::EAABBFormat::U32) - retval.composed.encodedDataRange.u32.maxVx[0] = maxRemappedIndex; - - return retval; - }(); + hlsl::float32_t3 position; + positionView.decodeElement(index, position); + auto remappedVertexIndex = INVALID_INDEX; + bool foundVertex = false; + as.forEachBroadphaseNeighborCandidates(position, [&](const typename AccelStructureT::vertex_data_t& candidate) { + const auto neighborRemappedIndex = remappedVertexIndexes[candidate.index]; + if (index == candidate.index) { + foundVertex = true; + } + else if (neighborRemappedIndex != INVALID_INDEX && shouldWeldFn(polygon, index, candidate.index)) { + remappedVertexIndex = neighborRemappedIndex; + } + return !(foundVertex && remappedVertexIndex != INVALID_INDEX); + }); + if (foundVertex) + { + if (remappedVertexIndex == INVALID_INDEX) { + remappedVertexIndex = index; + maxRemappedIndex = index; + } + } + remappedVertexIndexes[index] = remappedVertexIndex; + } + const auto& indexView = outPolygon->getIndexView(); + const auto remappedRangeFormat = (maxRemappedIndex - 1) < std::numeric_limits::max() ? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32; + + auto createRemappedIndexView = [&](size_t indexCount) { + const uint32_t indexSize = remappedRangeFormat == IGeometryBase::EAABBFormat::U16 ? sizeof(uint16_t) : sizeof(uint32_t); + auto remappedIndexBuffer = ICPUBuffer::create({indexSize * indexCount, IBuffer::EUF_INDEX_BUFFER_BIT}); + auto remappedIndexView = ICPUPolygonGeometry::SDataView{ + .composed = { + .stride = indexSize, + .rangeFormat = remappedRangeFormat + }, + .src = { + .offset = 0, + .size = remappedIndexBuffer->getSize(), + .buffer = std::move(remappedIndexBuffer) + } + }; - auto remappedIndexes = [&]() { - auto* indexPtr = reinterpret_cast(remappedIndexView.getPointer()); - for (uint64_t index_i = 0; index_i < polygon->getIndexCount(); index_i++) + if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { - hlsl::vector index; - indexView.decodeElement>(index_i, index); - IndexT remappedIndex = remappedVertexIndexes[index.x]; - indexPtr[index_i] = remappedIndex; + hlsl::shapes::AABB<4, uint16_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = maxRemappedIndex; + remappedIndexView.composed.encodedDataRange.u16 = aabb; + remappedIndexView.composed.format = EF_R16_UINT; } + else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { + hlsl::shapes::AABB<4, uint32_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = maxRemappedIndex; + remappedIndexView.composed.encodedDataRange.u32 = aabb; + remappedIndexView.composed.format = EF_R32_UINT; + } + + return remappedIndexView; }; - if (indexView.composed.rangeFormat == IGeometryBase::EAABBFormat::U16) { - remappedIndexes.template operator()(); - } - else if (indexView.composed.rangeFormat == IGeometryBase::EAABBFormat::U32) { - remappedIndexes.template operator()(); - } - outPolygon->setIndexView(std::move(remappedIndexView)); - } else - { - const uint32_t indexSize = (outPolygon->getPositionView().getElementCount() - 1 < std::numeric_limits::max()) ? sizeof(uint16_t) : sizeof(uint32_t); - auto remappedIndexBuffer = ICPUBuffer::create({indexSize * outPolygon->getVertexReferenceCount(), IBuffer::EUF_INDEX_BUFFER_BIT}); - auto remappedIndexView = ICPUPolygonGeometry::SDataView{ - .composed = { - .stride = indexSize, - }, - .src = { - .offset = 0, - .size = remappedIndexBuffer->getSize(), - .buffer = std::move(remappedIndexBuffer) - } - }; + if (indexView) + { + auto remappedIndexView = createRemappedIndexView(polygon->getIndexCount()); + auto remappedIndexes = [&]() { + auto* remappedIndexPtr = reinterpret_cast(remappedIndexView.getPointer()); + for (uint32_t index_i = 0; index_i < polygon->getIndexCount(); index_i++) + { + hlsl::vector index; + indexView.decodeElement>(index_i, index); + IndexT remappedIndex = remappedVertexIndexes[index.x]; + remappedIndexPtr[index_i] = remappedIndex; + } + }; - auto fillRemappedIndex = [&](){ - auto remappedIndexBufferPtr = reinterpret_cast(remappedIndexBuffer->getPointer()); - for (uint64_t index = 0; index < outPolygon->getPositionView().getElementCount(); index++) - { - remappedIndexBufferPtr[index] = remappedVertexIndexes[index]; + if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { + remappedIndexes.template operator()(); + } + else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { + remappedIndexes.template operator()(); } - }; - if (indexView.composed.rangeFormat == IGeometryBase::EAABBFormat::U16) { - fillRemappedIndex.template operator()(); - } - else if (indexView.composed.rangeFormat == IGeometryBase::EAABBFormat::U32) { - fillRemappedIndex.template operator()(); + outPolygon->setIndexView(std::move(remappedIndexView)); + } else + { + auto remappedIndexView = createRemappedIndexView(remappedVertexIndexes.size()); + + auto fillRemappedIndex = [&](){ + auto remappedIndexBufferPtr = reinterpret_cast(remappedIndexView.getPointer()); + std::copy_n(remappedVertexIndexes.data(), remappedVertexIndexes.size(), remappedIndexBufferPtr); + }; + if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { + fillRemappedIndex.template operator()(); + } + else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { + fillRemappedIndex.template operator()(); + } + outPolygon->setIndexView(std::move(remappedIndexView)); } - - outPolygon->setIndexView(std::move(remappedIndexView)); - - } - CPolygonGeometryManipulator::recomputeContentHashes(outPolygon.get()); - return outPolygon; - } + CPolygonGeometryManipulator::recomputeContentHashes(outPolygon.get()); + return outPolygon; + } }; } From 82f9820009057de0f0d5b526b15cdc69f66f372d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 14:33:24 +0700 Subject: [PATCH 25/52] Add concept for Vertex Welder AccelerationStructure --- include/nbl/asset/utils/CVertexWelder.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index a537bbebb0..16f336c010 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -8,6 +8,14 @@ namespace nbl::asset { +template +concept VertexWelderAccelerationStructure = requires(T const cobj, hlsl::float32_t3 position, std::function fn) +{ + typename T::vertex_data_t; + { std::same_as }; + { cobj.forEachBroadphaseNeighborCandidates(position, fn) } -> std::same_as; +}; + class CVertexWelder { public: @@ -127,7 +135,7 @@ class CVertexWelder { }; - template + template static core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, const WeldPredicate& shouldWeldFn) { auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); From 401f1bbdf0421479e4ef222b795f4e9b9b274e22 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 14:33:50 +0700 Subject: [PATCH 26/52] Fix virtual destructor for WeldPredicate --- include/nbl/asset/utils/CVertexWelder.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 16f336c010..ac348cccd0 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -25,7 +25,7 @@ class CVertexWelder { public: virtual bool init(const ICPUPolygonGeometry* geom) = 0; virtual bool operator()(const ICPUPolygonGeometry* geom, uint32_t idx1, uint32_t idx2) const = 0; - virtual ~WeldPredicate() {}; + virtual ~WeldPredicate() = default; }; class DefaultWeldPredicate : public WeldPredicate @@ -131,7 +131,7 @@ class CVertexWelder { return true; } - ~DefaultWeldPredicate() override {} + ~DefaultWeldPredicate() override = default; }; From 9b007d26302e986aae948d06e3bed1d49531a16d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 14:34:05 +0700 Subject: [PATCH 27/52] Return nullptr if vertex contain INVALID_INDEX --- include/nbl/asset/utils/CVertexWelder.h | 21 +++++++++++++------ .../asset/utils/CSmoothNormalGenerator.cpp | 4 +++- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index ac348cccd0..9c65b4e09e 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -215,7 +215,7 @@ class CVertexWelder { if (indexView) { auto remappedIndexView = createRemappedIndexView(polygon->getIndexCount()); - auto remappedIndexes = [&]() { + auto remappedIndexes = [&]() -> bool { auto* remappedIndexPtr = reinterpret_cast(remappedIndexView.getPointer()); for (uint32_t index_i = 0; index_i < polygon->getIndexCount(); index_i++) { @@ -223,31 +223,40 @@ class CVertexWelder { indexView.decodeElement>(index_i, index); IndexT remappedIndex = remappedVertexIndexes[index.x]; remappedIndexPtr[index_i] = remappedIndex; + if (remappedIndex == INVALID_INDEX) return false; } + return true; }; if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { - remappedIndexes.template operator()(); + if (!remappedIndexes.template operator()()) return nullptr; } else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { - remappedIndexes.template operator()(); + if (!remappedIndexes.template operator()()) return nullptr; } outPolygon->setIndexView(std::move(remappedIndexView)); + } else { auto remappedIndexView = createRemappedIndexView(remappedVertexIndexes.size()); auto fillRemappedIndex = [&](){ auto remappedIndexBufferPtr = reinterpret_cast(remappedIndexView.getPointer()); - std::copy_n(remappedVertexIndexes.data(), remappedVertexIndexes.size(), remappedIndexBufferPtr); + for (uint32_t index_i = 0; index_i < remappedVertexIndexes.size(); index_i++) + { + if (remappedVertexIndexes[index_i] == INVALID_INDEX) return false; + remappedIndexBufferPtr[index_i] = remappedVertexIndexes[index_i]; + } + return true; }; if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { - fillRemappedIndex.template operator()(); + if (!fillRemappedIndex.template operator()()) return nullptr; } else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { - fillRemappedIndex.template operator()(); + if (!fillRemappedIndex.template operator()()) return nullptr; } + outPolygon->setIndexView(std::move(remappedIndexView)); } diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index ec9f7357f3..41fab68635 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -31,8 +31,10 @@ static bool compareVertexPosition(const hlsl::float32_t3& a, const hlsl::float32 CSmoothNormalGenerator::Result CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction vxcmp) { + assert(polygon->getIndexingCallback()->degree() == 3); VertexHashMap vertexHashMap = setupData(polygon, epsilon); - const auto smoothPolygon = processConnectedVertices(polygon, vertexHashMap, epsilon,vxcmp); + const auto patchedEpsilon = epsilon == 0.0f ? 0.00001f : epsilon * 2.f; + const auto smoothPolygon = processConnectedVertices(polygon, vertexHashMap, patchedEpsilon,vxcmp); return { vertexHashMap, smoothPolygon }; } From 7d3175092e79371fc766d92f18f626cea5e4b34f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 14:35:00 +0700 Subject: [PATCH 28/52] Reindent CVertexHashGrid to use tabs --- include/nbl/asset/utils/CVertexHashGrid.h | 64 +++++++++++------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index 0aceeab80a..38c8482681 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -54,7 +54,7 @@ class CVertexHashGrid auto finalSortedOutput = std::visit( [&](auto& sorter) { - return sorter(m_vertices.data(), scratchBuffer.data(), m_vertices.size(), KeyAccessor()); + return sorter(m_vertices.data(), scratchBuffer.data(), m_vertices.size(), KeyAccessor()); }, m_sorter ); if (finalSortedOutput != m_vertices.data()) @@ -99,7 +99,7 @@ class CVertexHashGrid for (; bounds.begin != bounds.end; bounds.begin++) { const vertex_data_t& neighborVertex = *bounds.begin; - if (!std::invoke(std::forward(fn), neighborVertex)) break; + if (!std::invoke(std::forward(fn), neighborVertex)) break; } } }; @@ -153,36 +153,36 @@ class CVertexHashGrid (position.z * primeNumber3))& (m_hashTableMaxSize - 1); } - inline uint8_t getNeighboringCellHashes(uint32_t* outNeighbors, hlsl::float32_t3 position) const - { - // both 0.x and -0.x would be converted to 0 if we directly casting the position to unsigned integer. Causing the 0 to be crowded then the rest of the cells. So we use floor here to spread the vertex more uniformly. - hlsl::float32_t3 cellfloatcoord = floor(position / m_cellSize - hlsl::float32_t3(0.5)); - hlsl::uint32_t3 baseCoord = hlsl::uint32_t3(static_cast(cellfloatcoord.x), static_cast(cellfloatcoord.y), static_cast(cellfloatcoord.z)); - - uint8_t neighborCount = 0; - - outNeighbors[neighborCount] = hash(baseCoord); - neighborCount++; - - auto addUniqueNeighbor = [&neighborCount, outNeighbors](uint32_t hashval) - { - if (std::find(outNeighbors, outNeighbors + neighborCount, hashval) == outNeighbors + neighborCount) - { - outNeighbors[neighborCount] = hashval; - neighborCount++; - } - }; - - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 0, 1))); - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 0))); - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 0))); - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 0))); - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 1))); - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 1))); - addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 1))); - - return neighborCount; - } + inline uint8_t getNeighboringCellHashes(uint32_t* outNeighbors, hlsl::float32_t3 position) const + { + // both 0.x and -0.x would be converted to 0 if we directly casting the position to unsigned integer. Causing the 0 to be crowded then the rest of the cells. So we use floor here to spread the vertex more uniformly. + hlsl::float32_t3 cellfloatcoord = floor(position / m_cellSize - hlsl::float32_t3(0.5)); + hlsl::uint32_t3 baseCoord = hlsl::uint32_t3(static_cast(cellfloatcoord.x), static_cast(cellfloatcoord.y), static_cast(cellfloatcoord.z)); + + uint8_t neighborCount = 0; + + outNeighbors[neighborCount] = hash(baseCoord); + neighborCount++; + + auto addUniqueNeighbor = [&neighborCount, outNeighbors](uint32_t hashval) + { + if (std::find(outNeighbors, outNeighbors + neighborCount, hashval) == outNeighbors + neighborCount) + { + outNeighbors[neighborCount] = hashval; + neighborCount++; + } + }; + + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 0, 1))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 0))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 0))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 0))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 1))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 1))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 1))); + + return neighborCount; + } inline BucketBounds getBucketBoundsByHash(uint32_t hash) const { From 4699173cae72d7fa7f20f4d14aa9f6ab6b37c9c1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 14:48:51 +0700 Subject: [PATCH 29/52] Reindent CPolygonGeometryManipulator to use TABS --- .../asset/utils/CPolygonGeometryManipulator.h | 99 +++--- .../utils/CPolygonGeometryManipulator.cpp | 324 +++++++++--------- 2 files changed, 211 insertions(+), 212 deletions(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 25802ced46..92897fb8d1 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -10,7 +10,6 @@ #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/utils/CGeometryManipulator.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" -#include "nbl/asset/utils/CVertexHashGrid.h" namespace nbl::asset { @@ -269,14 +268,14 @@ class NBL_API2 CPolygonGeometryManipulator }; typedef std::function VxCmpFunction; - //! Compares two attributes of floating point types in accordance with passed error metric. - /** - @param _a First attribute. - @param _b Second attribute. - @param _cpa Component count. - @param _errMetric Error metric info. - */ - static inline bool compareFloatingPointAttribute(const core::vectorSIMDf& _a, const core::vectorSIMDf& _b, size_t _cpa, const SErrorMetric& _errMetric) + //! Compares two attributes of floating point types in accordance with passed error metric. + /** + @param _a First attribute. + @param _b Second attribute. + @param _cpa Component count. + @param _errMetric Error metric info. + */ + static inline bool compareFloatingPointAttribute(const core::vectorSIMDf& _a, const core::vectorSIMDf& _b, size_t _cpa, const SErrorMetric& _errMetric) { using ErrorF_t = core::vectorSIMDf(*)(core::vectorSIMDf, core::vectorSIMDf); @@ -357,41 +356,41 @@ class NBL_API2 CPolygonGeometryManipulator } - //! Swaps the index buffer for a new index buffer with invalid triangles removed. - /** - Invalid triangle is such consisting of two or more same indices. - @param _input Input index buffer. - @param _idxType Type of indices in the index buffer. - @returns New index buffer or nullptr if input indices were of unknown type or _input was nullptr. - */ - static void filterInvalidTriangles(ICPUMeshBuffer* _input); - - //! Creates index buffer from input converting it to indices for line list primitives. Input is assumed to be indices for line strip. - /** - @param _input Input index buffer's data. - @param _idxCount Index count. - @param _inIndexType Type of input index buffer data (32bit or 16bit). - @param _outIndexType Type of output index buffer data (32bit or 16bit). - */ - static core::smart_refctd_ptr idxBufferFromLineStripsToLines(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); - - //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle strip. - /** - @param _input Input index buffer's data. - @param _idxCount Index count. - @param _inIndexType Type of input index buffer data (32bit or 16bit). - @param _outIndexType Type of output index buffer data (32bit or 16bit). - */ - static core::smart_refctd_ptr idxBufferFromTriangleStripsToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); - - //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle fan. - /** - @param _input Input index buffer's data. - @param _idxCount Index count. - @param _inIndexType Type of input index buffer data (32bit or 16bit). - @param _outIndexType Type of output index buffer data (32bit or 16bit). - */ - static core::smart_refctd_ptr idxBufferFromTrianglesFanToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); + //! Swaps the index buffer for a new index buffer with invalid triangles removed. + /** + Invalid triangle is such consisting of two or more same indices. + @param _input Input index buffer. + @param _idxType Type of indices in the index buffer. + @returns New index buffer or nullptr if input indices were of unknown type or _input was nullptr. + */ + static void filterInvalidTriangles(ICPUMeshBuffer* _input); + + //! Creates index buffer from input converting it to indices for line list primitives. Input is assumed to be indices for line strip. + /** + @param _input Input index buffer's data. + @param _idxCount Index count. + @param _inIndexType Type of input index buffer data (32bit or 16bit). + @param _outIndexType Type of output index buffer data (32bit or 16bit). + */ + static core::smart_refctd_ptr idxBufferFromLineStripsToLines(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); + + //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle strip. + /** + @param _input Input index buffer's data. + @param _idxCount Index count. + @param _inIndexType Type of input index buffer data (32bit or 16bit). + @param _outIndexType Type of output index buffer data (32bit or 16bit). + */ + static core::smart_refctd_ptr idxBufferFromTriangleStripsToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); + + //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle fan. + /** + @param _input Input index buffer's data. + @param _idxCount Index count. + @param _inIndexType Type of input index buffer data (32bit or 16bit). + @param _outIndexType Type of output index buffer data (32bit or 16bit). + */ + static core::smart_refctd_ptr idxBufferFromTrianglesFanToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); //! static inline std::array getTriangleIndices(const ICPUMeshBuffer* mb, uint32_t triangleIx) @@ -627,7 +626,7 @@ class NBL_API2 CPolygonGeometryManipulator //! Creates a copy of a mesh with vertices welded /** \param mesh Input mesh - \param errMetrics Array of size EVAI_COUNT. Describes error metric for each vertex attribute (used if attribute is of floating point or normalized type). + \param errMetrics Array of size EVAI_COUNT. Describes error metric for each vertex attribute (used if attribute is of floating point or normalized type). \param tolerance The threshold for vertex comparisons. \return Mesh without redundant vertices. */ static core::smart_refctd_ptr createMeshBufferWelded(ICPUMeshBuffer *inbuffer, const SErrorMetric* errMetrics, const bool& optimIndexType = true, const bool& makeNewMesh = false); @@ -645,12 +644,12 @@ class NBL_API2 CPolygonGeometryManipulator */ static void requantizeMeshBuffer(ICPUMeshBuffer* _meshbuffer, const SErrorMetric* _errMetric); - //! Creates a 32bit index buffer for a mesh with primitive types changed to list types - /**# + //! Creates a 32bit index buffer for a mesh with primitive types changed to list types + /**# @param _newPrimitiveType - @param _begin non-const iterator to beginning of meshbuffer range - @param _end non-const iterator to ending of meshbuffer range - */ + @param _begin non-const iterator to beginning of meshbuffer range + @param _end non-const iterator to ending of meshbuffer range + */ template static inline void homogenizePrimitiveTypeAndIndices(Iterator _begin, Iterator _end, const E_PRIMITIVE_TOPOLOGY _newPrimitiveType, const E_INDEX_TYPE outIndexType = EIT_32BIT) { diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 760e563312..b355f7fb43 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -18,145 +18,145 @@ namespace nbl::asset core::smart_refctd_ptr CPolygonGeometryManipulator::createUnweldedList(const ICPUPolygonGeometry* inGeo) { - const auto* indexing = inGeo->getIndexingCallback(); - if (!indexing) - return nullptr; - - const auto indexView = inGeo->getIndexView(); - const auto primCount = inGeo->getPrimitiveCount(); - const uint8_t degree = indexing->degree(); - const auto outIndexCount = primCount*degree; - if (outIndexCount(inGeo->clone(0u)); - - auto* outGeo = outGeometry.get(); - outGeo->setIndexing(IPolygonGeometryBase::NGonList(degree)); - - auto createOutView = [&](const ICPUPolygonGeometry::SDataView& inView) -> ICPUPolygonGeometry::SDataView - { - if (!inView) - return {}; - auto buffer = ICPUBuffer::create({ outIndexCount*inView.composed.stride , inView.src.buffer->getUsageFlags() }); - return { - .composed = inView.composed, - .src = {.offset = 0, .size = buffer->getSize(), .buffer = std::move(buffer)} - }; - }; - - const auto inIndexView = inGeo->getIndexView(); - auto outIndexView = createOutView(inIndexView); - auto indexBuffer = outIndexView.src.buffer; - const auto indexSize = inIndexView.composed.stride; - std::byte* outIndices = reinterpret_cast(outIndexView.getPointer()); - outGeo->setIndexView({}); - - const auto inVertexView = inGeo->getPositionView(); - auto outVertexView = createOutView(inVertexView); - auto vertexBuffer = outVertexView.src.buffer; - const auto vertexSize = inVertexView.composed.stride; - const std::byte* inVertices = reinterpret_cast(inVertexView.getPointer()); - std::byte* const outVertices = reinterpret_cast(vertexBuffer->getPointer()); - outGeo->setPositionView(std::move(outVertexView)); - - const auto inNormalView = inGeo->getNormalView(); - const std::byte* const inNormals = reinterpret_cast(inNormalView.getPointer()); - auto outNormalView = createOutView(inNormalView); - auto outNormalBuffer = outNormalView.src.buffer; - outGeo->setNormalView(std::move(outNormalView)); - - outGeometry->getJointWeightViews()->resize(inGeo->getJointWeightViews().size()); - for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) - { - auto& inJointWeightView = inGeo->getJointWeightViews()[jointView_i]; - auto& outJointWeightView = outGeometry->getJointWeightViews()->operator[](jointView_i); - outJointWeightView.indices = createOutView(inJointWeightView.indices); - outJointWeightView.weights = createOutView(inJointWeightView.weights); - } - - outGeometry->getAuxAttributeViews()->resize(inGeo->getAuxAttributeViews().size()); - for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) - outGeo->getAuxAttributeViews()->operator[](auxView_i) = createOutView(inGeo->getAuxAttributeViews()[auxView_i]); - - std::array indices; - for (uint64_t prim_i = 0u; prim_i < primCount; prim_i++) - { - IPolygonGeometryBase::IIndexingCallback::SContext context{ - .indexBuffer = indexView.getPointer(), - .indexSize = indexView.composed.stride, - .beginPrimitive = prim_i, - .endPrimitive = prim_i + 1, - .out = indices.data() - }; - indexing->operator()(context); - for (uint8_t primIndex_i=0; primIndex_i(outNormalBuffer->getPointer()); - const auto normalSize = inNormalView.composed.stride; - memcpy(outNormals + outIndex * normalSize, inNormals + inIndex * normalSize, normalSize); - } - - for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) - { - auto& inView = inGeo->getJointWeightViews()[jointView_i]; - auto& outView = outGeometry->getJointWeightViews()->operator[](jointView_i); - - const std::byte* const inJointIndices = reinterpret_cast(inView.indices.getPointer()); - const auto jointIndexSize = inView.indices.composed.stride; - std::byte* const outJointIndices = reinterpret_cast(outView.indices.getPointer()); - memcpy(outJointIndices + outIndex * jointIndexSize, inJointIndices + inIndex * jointIndexSize, jointIndexSize); - - const std::byte* const inWeights = reinterpret_cast(inView.weights.getPointer()); - const auto jointWeightSize = inView.weights.composed.stride; - std::byte* const outWeights = reinterpret_cast(outView.weights.getPointer()); - memcpy(outWeights + outIndex * jointWeightSize, outWeights + inIndex * jointWeightSize, jointWeightSize); - } - - for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) - { - auto& inView = inGeo->getAuxAttributeViews()[auxView_i]; - auto& outView = outGeometry->getAuxAttributeViews()->operator[](auxView_i); - const auto attrSize = inView.composed.stride; - const std::byte* const inAuxs = reinterpret_cast(inView.getPointer()); - std::byte* const outAuxs = reinterpret_cast(outView.getPointer()); - memcpy(outAuxs + outIndex * attrSize, inAuxs + inIndex * attrSize, attrSize); - } - } - } - - recomputeContentHashes(outGeo); - return outGeometry; + const auto* indexing = inGeo->getIndexingCallback(); + if (!indexing) + return nullptr; + + const auto indexView = inGeo->getIndexView(); + const auto primCount = inGeo->getPrimitiveCount(); + const uint8_t degree = indexing->degree(); + const auto outIndexCount = primCount*degree; + if (outIndexCount(inGeo->clone(0u)); + + auto* outGeo = outGeometry.get(); + outGeo->setIndexing(IPolygonGeometryBase::NGonList(degree)); + + auto createOutView = [&](const ICPUPolygonGeometry::SDataView& inView) -> ICPUPolygonGeometry::SDataView + { + if (!inView) + return {}; + auto buffer = ICPUBuffer::create({ outIndexCount*inView.composed.stride , inView.src.buffer->getUsageFlags() }); + return { + .composed = inView.composed, + .src = {.offset = 0, .size = buffer->getSize(), .buffer = std::move(buffer)} + }; + }; + + const auto inIndexView = inGeo->getIndexView(); + auto outIndexView = createOutView(inIndexView); + auto indexBuffer = outIndexView.src.buffer; + const auto indexSize = inIndexView.composed.stride; + std::byte* outIndices = reinterpret_cast(outIndexView.getPointer()); + outGeo->setIndexView({}); + + const auto inVertexView = inGeo->getPositionView(); + auto outVertexView = createOutView(inVertexView); + auto vertexBuffer = outVertexView.src.buffer; + const auto vertexSize = inVertexView.composed.stride; + const std::byte* inVertices = reinterpret_cast(inVertexView.getPointer()); + std::byte* const outVertices = reinterpret_cast(vertexBuffer->getPointer()); + outGeo->setPositionView(std::move(outVertexView)); + + const auto inNormalView = inGeo->getNormalView(); + const std::byte* const inNormals = reinterpret_cast(inNormalView.getPointer()); + auto outNormalView = createOutView(inNormalView); + auto outNormalBuffer = outNormalView.src.buffer; + outGeo->setNormalView(std::move(outNormalView)); + + outGeometry->getJointWeightViews()->resize(inGeo->getJointWeightViews().size()); + for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) + { + auto& inJointWeightView = inGeo->getJointWeightViews()[jointView_i]; + auto& outJointWeightView = outGeometry->getJointWeightViews()->operator[](jointView_i); + outJointWeightView.indices = createOutView(inJointWeightView.indices); + outJointWeightView.weights = createOutView(inJointWeightView.weights); + } + + outGeometry->getAuxAttributeViews()->resize(inGeo->getAuxAttributeViews().size()); + for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) + outGeo->getAuxAttributeViews()->operator[](auxView_i) = createOutView(inGeo->getAuxAttributeViews()[auxView_i]); + + std::array indices; + for (uint64_t prim_i = 0u; prim_i < primCount; prim_i++) + { + IPolygonGeometryBase::IIndexingCallback::SContext context{ + .indexBuffer = indexView.getPointer(), + .indexSize = indexView.composed.stride, + .beginPrimitive = prim_i, + .endPrimitive = prim_i + 1, + .out = indices.data() + }; + indexing->operator()(context); + for (uint8_t primIndex_i=0; primIndex_i(outNormalBuffer->getPointer()); + const auto normalSize = inNormalView.composed.stride; + memcpy(outNormals + outIndex * normalSize, inNormals + inIndex * normalSize, normalSize); + } + + for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) + { + auto& inView = inGeo->getJointWeightViews()[jointView_i]; + auto& outView = outGeometry->getJointWeightViews()->operator[](jointView_i); + + const std::byte* const inJointIndices = reinterpret_cast(inView.indices.getPointer()); + const auto jointIndexSize = inView.indices.composed.stride; + std::byte* const outJointIndices = reinterpret_cast(outView.indices.getPointer()); + memcpy(outJointIndices + outIndex * jointIndexSize, inJointIndices + inIndex * jointIndexSize, jointIndexSize); + + const std::byte* const inWeights = reinterpret_cast(inView.weights.getPointer()); + const auto jointWeightSize = inView.weights.composed.stride; + std::byte* const outWeights = reinterpret_cast(outView.weights.getPointer()); + memcpy(outWeights + outIndex * jointWeightSize, outWeights + inIndex * jointWeightSize, jointWeightSize); + } + + for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) + { + auto& inView = inGeo->getAuxAttributeViews()[auxView_i]; + auto& outView = outGeometry->getAuxAttributeViews()->operator[](auxView_i); + const auto attrSize = inView.composed.stride; + const std::byte* const inAuxs = reinterpret_cast(inView.getPointer()); + std::byte* const outAuxs = reinterpret_cast(outView.getPointer()); + memcpy(outAuxs + outIndex * attrSize, inAuxs + inIndex * attrSize, attrSize); + } + } + } + + recomputeContentHashes(outGeo); + return outGeometry; } core::smart_refctd_ptr CPolygonGeometryManipulator::createSmoothVertexNormal(const ICPUPolygonGeometry* inPolygon, bool enableWelding, float epsilon, SSNGVxCmpFunction vxcmp) { - if (!inPolygon) - { - _NBL_DEBUG_BREAK_IF(true); - return nullptr; - } - - // Mesh need to be unwelded (TODO: why? the output only need to be unwelded, really should be checking `inPolygon->getIndexingCallback()->count()!=3`) - if (inPolygon->getIndexView() && inPolygon->getIndexingCallback()!=IPolygonGeometryBase::TriangleList()) - { - _NBL_DEBUG_BREAK_IF(true); - return nullptr; - } - - auto result = CSmoothNormalGenerator::calculateNormals(inPolygon, epsilon, vxcmp); - if (enableWelding) - { - return CVertexWelder::weldVertices(result.geom.get(), result.vertexHashGrid, CVertexWelder::DefaultWeldPredicate(epsilon)); - } - return result.geom; + if (!inPolygon) + { + _NBL_DEBUG_BREAK_IF(true); + return nullptr; + } + + // Mesh need to be unwelded (TODO: why? the output only need to be unwelded, really should be checking `inPolygon->getIndexingCallback()->count()!=3`) + if (inPolygon->getIndexView() && inPolygon->getIndexingCallback()!=IPolygonGeometryBase::TriangleList()) + { + _NBL_DEBUG_BREAK_IF(true); + return nullptr; + } + + auto result = CSmoothNormalGenerator::calculateNormals(inPolygon, epsilon, vxcmp); + if (enableWelding) + { + return CVertexWelder::weldVertices(result.geom.get(), result.vertexHashGrid, CVertexWelder::DefaultWeldPredicate(epsilon)); + } + return result.geom; } #if 0 @@ -165,28 +165,28 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp if (!_inbuffer) return nullptr; - const auto* pipeline = _inbuffer->getPipeline(); - const void* ind = _inbuffer->getIndices(); + const auto* pipeline = _inbuffer->getPipeline(); + const void* ind = _inbuffer->getIndices(); if (!pipeline || !ind) return nullptr; auto outbuffer = core::move_and_static_cast(_inbuffer->clone(1u)); - outbuffer->setAttachedDescriptorSet(core::smart_refctd_ptr(const_cast(_inbuffer->getAttachedDescriptorSet()))); - outbuffer->setSkin( - SBufferBinding(reinterpret_cast&>(_inbuffer->getInverseBindPoseBufferBinding())), - SBufferBinding(reinterpret_cast&>(_inbuffer->getJointAABBBufferBinding())), - _inbuffer->getJointCount(),_inbuffer->getMaxJointsPerVertex() - ); + outbuffer->setAttachedDescriptorSet(core::smart_refctd_ptr(const_cast(_inbuffer->getAttachedDescriptorSet()))); + outbuffer->setSkin( + SBufferBinding(reinterpret_cast&>(_inbuffer->getInverseBindPoseBufferBinding())), + SBufferBinding(reinterpret_cast&>(_inbuffer->getJointAABBBufferBinding())), + _inbuffer->getJointCount(),_inbuffer->getMaxJointsPerVertex() + ); - constexpr uint32_t MAX_ATTRIBS = asset::ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; + constexpr uint32_t MAX_ATTRIBS = asset::ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; // Find vertex count size_t vertexCount = IMeshManipulator::upperBoundVertexID(_inbuffer); core::unordered_set buffers; for (size_t i = 0; i < MAX_ATTRIBS; ++i) - if (auto* buf = _inbuffer->getAttribBoundBuffer(i).buffer.get()) - buffers.insert(buf); + if (auto* buf = _inbuffer->getAttribBoundBuffer(i).buffer.get()) + buffers.insert(buf); size_t offsets[MAX_ATTRIBS]; memset(offsets, -1, sizeof(offsets)); @@ -201,36 +201,36 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp { types[i] = _inbuffer->getAttribFormat(i); - const uint32_t typeSz = getTexelOrBlockBytesize(types[i]); - const size_t alignment = (typeSz/getFormatChannelCount(types[i]) == 8u) ? 8ull : 4ull; // if format 64bit per channel, then align to 8 + const uint32_t typeSz = getTexelOrBlockBytesize(types[i]); + const size_t alignment = (typeSz/getFormatChannelCount(types[i]) == 8u) ? 8ull : 4ull; // if format 64bit per channel, then align to 8 offsets[i] = lastOffset + lastSize; const size_t mod = offsets[i] % alignment; offsets[i] += mod; lastOffset = offsets[i]; - lastSize = typeSz; + lastSize = typeSz; } } const size_t vertexSize = lastOffset + lastSize; - constexpr uint32_t NEW_VTX_BUF_BINDING = 0u; - auto& vtxParams = outbuffer->getPipeline()->getCachedCreationParams().vertexInput; - vtxParams = SVertexInputParams(); - vtxParams.enabledAttribFlags = _inbuffer->getPipeline()->getCachedCreationParams().vertexInput.enabledAttribFlags; - vtxParams.enabledBindingFlags = 1u << NEW_VTX_BUF_BINDING; - vtxParams.bindings[NEW_VTX_BUF_BINDING].stride = vertexSize; - vtxParams.bindings[NEW_VTX_BUF_BINDING].inputRate = SVertexInputBindingParams::EVIR_PER_VERTEX; + constexpr uint32_t NEW_VTX_BUF_BINDING = 0u; + auto& vtxParams = outbuffer->getPipeline()->getCachedCreationParams().vertexInput; + vtxParams = SVertexInputParams(); + vtxParams.enabledAttribFlags = _inbuffer->getPipeline()->getCachedCreationParams().vertexInput.enabledAttribFlags; + vtxParams.enabledBindingFlags = 1u << NEW_VTX_BUF_BINDING; + vtxParams.bindings[NEW_VTX_BUF_BINDING].stride = vertexSize; + vtxParams.bindings[NEW_VTX_BUF_BINDING].inputRate = SVertexInputBindingParams::EVIR_PER_VERTEX; auto newVertBuffer = ICPUBuffer::create({ vertexCount*vertexSize }); - outbuffer->setVertexBufferBinding({ 0u, core::smart_refctd_ptr(newVertBuffer) }, NEW_VTX_BUF_BINDING); + outbuffer->setVertexBufferBinding({ 0u, core::smart_refctd_ptr(newVertBuffer) }, NEW_VTX_BUF_BINDING); for (size_t i = 0; i < MAX_ATTRIBS; ++i) { if (offsets[i] < 0xffffffff) { - vtxParams.attributes[i].binding = NEW_VTX_BUF_BINDING; - vtxParams.attributes[i].format = types[i]; - vtxParams.attributes[i].relativeOffset = offsets[i]; + vtxParams.attributes[i].binding = NEW_VTX_BUF_BINDING; + vtxParams.attributes[i].format = types[i]; + vtxParams.attributes[i].relativeOffset = offsets[i]; } } } @@ -260,7 +260,7 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp { E_FORMAT type = types[activeAttribs[j]]; - if (!isNormalizedFormat(type) && (isIntegerFormat(type) || isScaledFormat(type))) + if (!isNormalizedFormat(type) && (isIntegerFormat(type) || isScaledFormat(type))) { uint32_t dst[4]; _inbuffer->getAttribute(dst, activeAttribs[j], index); @@ -283,7 +283,7 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp ((uint16_t*)indices)[i] = remap; } - _NBL_DELETE_ARRAY(remapBuffer,vertexCount); + _NBL_DELETE_ARRAY(remapBuffer,vertexCount); _NBL_DEBUG_BREAK_IF(nextVert > vertexCount) From e65297b698963a8a2b5c0ed993636eff1c2dbdcf Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 15:21:08 +0700 Subject: [PATCH 30/52] Add diagram to explain why we choose cellSize to be twice as epsilon --- include/nbl/asset/utils/CVertexHashGrid.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index 38c8482681..9eaf31bae8 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -155,6 +155,16 @@ class CVertexHashGrid inline uint8_t getNeighboringCellHashes(uint32_t* outNeighbors, hlsl::float32_t3 position) const { + // We substract the coordinate by 0.5 since the cellSize is expected to be twice the epsilon. This is to snap the vertex into the cell that contain the most bottom left neighbor of our vertex. + // --------- --------- + // | | y| | | | + // | |x | | |y | + // --------- -> --------- + // | | | | x| | + // | | | | | | + // --------- --------- + // |2*e| + // By substracing 0.5 on the coordinate, now we only need to check the cell that is either upper or righter than us. Reducing the number of cell that we have to check if we use epsilon as the cell size and do not substract the coord by 0.5. // both 0.x and -0.x would be converted to 0 if we directly casting the position to unsigned integer. Causing the 0 to be crowded then the rest of the cells. So we use floor here to spread the vertex more uniformly. hlsl::float32_t3 cellfloatcoord = floor(position / m_cellSize - hlsl::float32_t3(0.5)); hlsl::uint32_t3 baseCoord = hlsl::uint32_t3(static_cast(cellfloatcoord.x), static_cast(cellfloatcoord.y), static_cast(cellfloatcoord.z)); From dbffea5be000575e0441d07657a4d577fe5bf80c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 15:25:17 +0700 Subject: [PATCH 31/52] Improve Diagram --- include/nbl/asset/utils/CVertexHashGrid.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index 9eaf31bae8..7dd22c1403 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -156,14 +156,14 @@ class CVertexHashGrid inline uint8_t getNeighboringCellHashes(uint32_t* outNeighbors, hlsl::float32_t3 position) const { // We substract the coordinate by 0.5 since the cellSize is expected to be twice the epsilon. This is to snap the vertex into the cell that contain the most bottom left neighbor of our vertex. - // --------- --------- - // | | y| | | | - // | |x | | |y | - // --------- -> --------- - // | | | | x| | - // | | | | | | - // --------- --------- - // |2*e| + // ------- ------- + // | | y| | | | + // | |x | | |y | + // ------- -> ------- + // | | | | x| | + // | | | | | | + // ------- ------- + // |2e|e| // By substracing 0.5 on the coordinate, now we only need to check the cell that is either upper or righter than us. Reducing the number of cell that we have to check if we use epsilon as the cell size and do not substract the coord by 0.5. // both 0.x and -0.x would be converted to 0 if we directly casting the position to unsigned integer. Causing the 0 to be crowded then the rest of the cells. So we use floor here to spread the vertex more uniformly. hlsl::float32_t3 cellfloatcoord = floor(position / m_cellSize - hlsl::float32_t3(0.5)); From ad0646e7ff000d3332a9450f7d3becdc0a1d94d9 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 15:38:24 +0700 Subject: [PATCH 32/52] Improve the explanation of VertexHashGrid --- include/nbl/asset/utils/CVertexHashGrid.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index 7dd22c1403..d67dfde1e0 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -155,7 +155,7 @@ class CVertexHashGrid inline uint8_t getNeighboringCellHashes(uint32_t* outNeighbors, hlsl::float32_t3 position) const { - // We substract the coordinate by 0.5 since the cellSize is expected to be twice the epsilon. This is to snap the vertex into the cell that contain the most bottom left neighbor of our vertex. + // We substract the coordinate by 0.5 since the cellSize is expected to be twice the epsilon. This is to snap the vertex into the cell that contain the most bottom left cell that could collide with of our vertex. // ------- ------- // | | y| | | | // | |x | | |y | @@ -164,7 +164,8 @@ class CVertexHashGrid // | | | | | | // ------- ------- // |2e|e| - // By substracing 0.5 on the coordinate, now we only need to check the cell that is either upper or righter than us. Reducing the number of cell that we have to check if we use epsilon as the cell size and do not substract the coord by 0.5. + // In the example,x is snapped into a different cell which is the most bottom left cell that could collide with x. Since we have move it into its bottom left candidate, there is no need to check to the bottom and to the left of the snapped coordinate. We only need to check the upper and to the right of the snapped cell, which include the original cell. Note that we do not need to check the upper and to the right of the original cell. The cell size is 2 * epsilon and x is located on the lower and lefter side of the cell. + // Contrary to x, y is still snapped into its original cell. It means the most bottom left cell that collide with y is its own cell. // both 0.x and -0.x would be converted to 0 if we directly casting the position to unsigned integer. Causing the 0 to be crowded then the rest of the cells. So we use floor here to spread the vertex more uniformly. hlsl::float32_t3 cellfloatcoord = floor(position / m_cellSize - hlsl::float32_t3(0.5)); hlsl::uint32_t3 baseCoord = hlsl::uint32_t3(static_cast(cellfloatcoord.x), static_cast(cellfloatcoord.y), static_cast(cellfloatcoord.z)); From 60ec97001f78a4b7688eef3683dfd3c851242d65 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 15:42:55 +0700 Subject: [PATCH 33/52] Improve comment --- include/nbl/asset/utils/CVertexHashGrid.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index d67dfde1e0..4483c07c77 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -166,6 +166,7 @@ class CVertexHashGrid // |2e|e| // In the example,x is snapped into a different cell which is the most bottom left cell that could collide with x. Since we have move it into its bottom left candidate, there is no need to check to the bottom and to the left of the snapped coordinate. We only need to check the upper and to the right of the snapped cell, which include the original cell. Note that we do not need to check the upper and to the right of the original cell. The cell size is 2 * epsilon and x is located on the lower and lefter side of the cell. // Contrary to x, y is still snapped into its original cell. It means the most bottom left cell that collide with y is its own cell. + // The above scheme is to reduce the number of cell candidates that we need to check for collision, from 9 cell to 4 cell in 2d, or from 27 cells to 8 cells in 3d. // both 0.x and -0.x would be converted to 0 if we directly casting the position to unsigned integer. Causing the 0 to be crowded then the rest of the cells. So we use floor here to spread the vertex more uniformly. hlsl::float32_t3 cellfloatcoord = floor(position / m_cellSize - hlsl::float32_t3(0.5)); hlsl::uint32_t3 baseCoord = hlsl::uint32_t3(static_cast(cellfloatcoord.x), static_cast(cellfloatcoord.y), static_cast(cellfloatcoord.z)); From 7ebd7d0d8a395c0fd2cd59fe6b47f2f62031a775 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 15:55:21 +0700 Subject: [PATCH 34/52] Fix wrong iteration order --- include/nbl/core/algorithm/radix_sort.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/nbl/core/algorithm/radix_sort.h b/include/nbl/core/algorithm/radix_sort.h index 5b88c92936..171f0819f9 100644 --- a/include/nbl/core/algorithm/radix_sort.h +++ b/include/nbl/core/algorithm/radix_sort.h @@ -84,16 +84,18 @@ struct RadixLsbSorter // count constexpr histogram_t shift = static_cast(radix_bits*pass_ix); - for (histogram_t i = 0u; i < rangeSize; i++) + for (histogram_t i = rangeSize; i != 0;) + { + i--; ++m_histogram[comp.template operator()(input[i])]; + } // prefix sum std::exclusive_scan(m_histogram, m_histogram + histogram_size, m_histogram, 0); // scatter. After scatter m_histogram now become a skiplist - for (histogram_t i = rangeSize; i != 0;) + for (histogram_t i = 0; i < rangeSize; i++) { - i--; const auto& val = input[i]; const auto& histogramIx = comp.template operator()(val); output[m_histogram[histogramIx]++] = val; From 087beb8e37e499675262fdf997604956ebd77bf3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 15:55:33 +0700 Subject: [PATCH 35/52] Fix compInternalAngle --- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl index 45f678325c..4677b0e155 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -33,7 +33,7 @@ namespace util const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); - const float_t angle2 = hlsl::numbers::pi - (angle1 + angle2); + const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); // use them to find the angle at each vertex return vector(angle0, angle1, angle2); } From 6d4b7946a18c89a31698b97036a94320589f7bab Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 15:56:15 +0700 Subject: [PATCH 36/52] Fix method specifier in radix_sort.h --- include/nbl/core/algorithm/radix_sort.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/nbl/core/algorithm/radix_sort.h b/include/nbl/core/algorithm/radix_sort.h index 171f0819f9..cd1a0aaa6a 100644 --- a/include/nbl/core/algorithm/radix_sort.h +++ b/include/nbl/core/algorithm/radix_sort.h @@ -55,11 +55,11 @@ constexpr int8_t find_msb(const T& a_variable) template struct RadixLsbSorter { - _NBL_STATIC_INLINE_CONSTEXPR uint16_t histogram_bytesize = 8192u; - _NBL_STATIC_INLINE_CONSTEXPR size_t histogram_size = size_t(histogram_bytesize)/sizeof(histogram_t); - _NBL_STATIC_INLINE_CONSTEXPR uint8_t radix_bits = impl::find_msb(histogram_size); - _NBL_STATIC_INLINE_CONSTEXPR size_t last_pass = (key_bit_count-1ull)/size_t(radix_bits); - _NBL_STATIC_INLINE_CONSTEXPR uint16_t radix_mask = (1u< inline RandomIt operator()(RandomIt input, RandomIt output, const histogram_t rangeSize, const KeyAccessor& comp) @@ -67,7 +67,7 @@ struct RadixLsbSorter return pass(input,output,rangeSize,comp); } - std::pair getMostSignificantRadixBound(size_t key) const + inline std::pair getMostSignificantRadixBound(size_t key) const { constexpr histogram_t shift = static_cast(radix_bits * last_pass); const auto histogramIx = (key >> shift) & radix_mask; From 0cdf7e81bb6730e5bb641a4eb63ea41eae2e6528 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 16:12:06 +0700 Subject: [PATCH 37/52] Fix variable and function specifier --- include/nbl/asset/utils/CPolygonGeometryManipulator.h | 4 ++-- include/nbl/asset/utils/CVertexHashGrid.h | 10 +++++----- include/nbl/asset/utils/CVertexWelder.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 92897fb8d1..b5e185841f 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -231,10 +231,10 @@ class NBL_API2 CPolygonGeometryManipulator EEM_COUNT }; - static core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo); + static inline core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo); using SSNGVxCmpFunction = CSmoothNormalGenerator::VxCmpFunction; - static core::smart_refctd_ptr createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, bool enableWelding = false, float epsilon = 1.525e-5f, + static inline core::smart_refctd_ptr createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, bool enableWelding = false, float epsilon = 1.525e-5f, SSNGVxCmpFunction vxcmp = [](const CSmoothNormalGenerator::SSNGVertexData& v0, const CSmoothNormalGenerator::SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) { static constexpr float cosOf45Deg = 0.70710678118f; diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index 4483c07c77..2eba1a623d 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -83,7 +83,7 @@ class CVertexHashGrid if (!std::invoke(std::forward(fn), neighborVertex)) break; } } - }; + } template Fn> inline void forEachBroadphaseNeighborCandidates(const hlsl::float32_t3& position, Fn&& fn) const @@ -102,7 +102,7 @@ class CVertexHashGrid if (!std::invoke(std::forward(fn), neighborVertex)) break; } } - }; + } private: struct KeyAccessor @@ -116,9 +116,9 @@ class CVertexHashGrid } }; - static constexpr uint32_t primeNumber1 = 73856093; - static constexpr uint32_t primeNumber2 = 19349663; - static constexpr uint32_t primeNumber3 = 83492791; + static constexpr inline uint32_t primeNumber1 = 73856093; + static constexpr inline uint32_t primeNumber2 = 19349663; + static constexpr inline uint32_t primeNumber3 = 83492791; using sorter_t = std::variant< core::RadixLsbSorter, diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 9c65b4e09e..e0ec99911f 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -142,7 +142,7 @@ class CVertexWelder { const auto& positionView = polygon->getPositionView(); const auto vertexCount = positionView.getElementCount(); - static constexpr auto INVALID_INDEX = std::numeric_limits::max(); + constexpr auto INVALID_INDEX = std::numeric_limits::max(); core::vector remappedVertexIndexes(vertexCount); std::fill(remappedVertexIndexes.begin(), remappedVertexIndexes.end(), INVALID_INDEX); From 4dbcbebea0571517c1df7375ce818519055dad4a Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 16:12:23 +0700 Subject: [PATCH 38/52] Fix the patchedEpsilon --- src/nbl/asset/utils/CSmoothNormalGenerator.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 41fab68635..5f431a99ba 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -32,8 +32,9 @@ static bool compareVertexPosition(const hlsl::float32_t3& a, const hlsl::float32 CSmoothNormalGenerator::Result CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction vxcmp) { assert(polygon->getIndexingCallback()->degree() == 3); - VertexHashMap vertexHashMap = setupData(polygon, epsilon); - const auto patchedEpsilon = epsilon == 0.0f ? 0.00001f : epsilon * 2.f; + static constexpr auto MinEpsilon = 0.00001f; + const auto patchedEpsilon = epsilon < MinEpsilon ? MinEpsilon : epsilon; + VertexHashMap vertexHashMap = setupData(polygon, patchedEpsilon); const auto smoothPolygon = processConnectedVertices(polygon, vertexHashMap, patchedEpsilon,vxcmp); return { vertexHashMap, smoothPolygon }; } @@ -44,7 +45,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as const size_t idxCount = polygon->getPrimitiveCount() * 3; const auto cellCount = std::max(core::roundUpToPoT((idxCount + 31) >> 5), 4); - VertexHashMap vertices(idxCount, std::min(16u * 1024u, cellCount), epsilon); + VertexHashMap vertices(idxCount, std::min(16u * 1024u, cellCount), epsilon * 2.f); for (uint32_t i = 0; i < idxCount; i += 3) { From ff4216551592d1896badcf5ff644734d7e2bd6cc Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 18:31:46 +0700 Subject: [PATCH 39/52] Remove inline specifier for function that is defined in cpp --- include/nbl/asset/utils/CPolygonGeometryManipulator.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index b5e185841f..8c75b39e0d 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -231,13 +231,13 @@ class NBL_API2 CPolygonGeometryManipulator EEM_COUNT }; - static inline core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo); + static core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo); using SSNGVxCmpFunction = CSmoothNormalGenerator::VxCmpFunction; - static inline core::smart_refctd_ptr createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, bool enableWelding = false, float epsilon = 1.525e-5f, + static core::smart_refctd_ptr createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, bool enableWelding = false, float epsilon = 1.525e-5f, SSNGVxCmpFunction vxcmp = [](const CSmoothNormalGenerator::SSNGVertexData& v0, const CSmoothNormalGenerator::SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) { - static constexpr float cosOf45Deg = 0.70710678118f; + constexpr float cosOf45Deg = 0.70710678118f; return dot(normalize(v0.weightedNormal),normalize(v1.weightedNormal)) > cosOf45Deg; }); From 94c65b6d92f57d02235057a12b4a54c78b34edf6 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 18:32:13 +0700 Subject: [PATCH 40/52] Remove forEachBroadphaseNeighborCandidates overload that takes vertex as argument --- include/nbl/asset/utils/CVertexHashGrid.h | 20 ------------------- .../asset/utils/CSmoothNormalGenerator.cpp | 4 ++-- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index 2eba1a623d..b32ed1c294 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -65,26 +65,6 @@ class CVertexHashGrid inline uint32_t getVertexCount() const { return m_vertices.size(); } - template Fn> - inline void forEachBroadphaseNeighborCandidates(const VertexData& vertex, Fn&& fn) const - { - std::array neighboringCells; - const auto cellCount = getNeighboringCellHashes(neighboringCells.data(), vertex.getPosition()); - - //iterate among all neighboring cells - for (uint8_t i = 0; i < cellCount; i++) - { - const auto& neighborCell = neighboringCells[i]; - BucketBounds bounds = getBucketBoundsByHash(neighborCell); - for (; bounds.begin != bounds.end; bounds.begin++) - { - const vertex_data_t& neighborVertex = *bounds.begin; - if (&vertex != &neighborVertex) - if (!std::invoke(std::forward(fn), neighborVertex)) break; - } - } - } - template Fn> inline void forEachBroadphaseNeighborCandidates(const hlsl::float32_t3& position, Fn&& fn) const { diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 5f431a99ba..b0e4839673 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -99,9 +99,9 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne { auto normal = processedVertex.weightedNormal; - vertexHashMap.forEachBroadphaseNeighborCandidates(processedVertex, [&](const VertexHashMap::vertex_data_t& candidate) + vertexHashMap.forEachBroadphaseNeighborCandidates(processedVertex.getPosition(), [&](const VertexHashMap::vertex_data_t& candidate) { - if (compareVertexPosition(processedVertex.position, candidate.position, epsilon) && + if (processedVertex.index != candidate.index && compareVertexPosition(processedVertex.position, candidate.position, epsilon) && vxcmp(processedVertex, candidate, polygon)) { //TODO: better mean calculation algorithm From 38ef6276c3d72c439357531969a50d13acdea5d2 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 18:32:47 +0700 Subject: [PATCH 41/52] Add inline specifier for method in CVertexWelder --- include/nbl/asset/utils/CVertexWelder.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index e0ec99911f..f089f0e781 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -32,7 +32,7 @@ class CVertexWelder { { private: - static bool isIntegralElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2) + static inline bool isIntegralElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2) { const auto byteSize = getTexelOrBlockBytesize(view.composed.format); const auto* basePtr = reinterpret_cast(view.getPointer()); @@ -40,7 +40,7 @@ class CVertexWelder { return (memcmp(basePtr + (index1 * stride), basePtr + (index2 * stride), byteSize) == 0); } - static bool isRealElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t channelCount, float epsilon) + static inline bool isRealElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t channelCount, float epsilon) { // TODO: Handle 16,32,64 bit float vectors once the pixel encode/decode functions get reimplemented in HLSL and decodeElement can actually benefit from that. hlsl::float64_t4 val1, val2; @@ -54,7 +54,7 @@ class CVertexWelder { return true; } - static bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) + static inline bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) { if (!view) return true; const auto channelCount = getFormatChannelCount(view.composed.format); @@ -77,7 +77,7 @@ class CVertexWelder { return true; } - static bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) + static inline bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) { if (!view) return true; const auto channelCount = getFormatChannelCount(view.composed.format); @@ -109,12 +109,12 @@ class CVertexWelder { DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} - bool init(const ICPUPolygonGeometry* polygon) override + inline bool init(const ICPUPolygonGeometry* polygon) override { return polygon->valid(); } - bool operator()(const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2) const override + inline bool operator()(const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2) const override { if (!isAttributeValEqual(polygon->getPositionView(), index1, index2, m_epsilon)) return false; @@ -136,7 +136,7 @@ class CVertexWelder { }; template - static core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, const WeldPredicate& shouldWeldFn) { + static inline core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, const WeldPredicate& shouldWeldFn) { auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); const auto& positionView = polygon->getPositionView(); From bfba9fa249ea1772d47893788f680390e51be653 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 18:41:29 +0700 Subject: [PATCH 42/52] Refactor class name for SSNGVertexData --- include/nbl/asset/utils/CPolygonGeometryManipulator.h | 2 +- src/nbl/asset/utils/CSmoothNormalGenerator.cpp | 4 ++-- src/nbl/asset/utils/CSmoothNormalGenerator.h | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 8c75b39e0d..d5ffefd3fc 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -235,7 +235,7 @@ class NBL_API2 CPolygonGeometryManipulator using SSNGVxCmpFunction = CSmoothNormalGenerator::VxCmpFunction; static core::smart_refctd_ptr createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, bool enableWelding = false, float epsilon = 1.525e-5f, - SSNGVxCmpFunction vxcmp = [](const CSmoothNormalGenerator::SSNGVertexData& v0, const CSmoothNormalGenerator::SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) + SSNGVxCmpFunction vxcmp = [](const CSmoothNormalGenerator::VertexData& v0, const CSmoothNormalGenerator::VertexData& v1, const ICPUPolygonGeometry* buffer) { constexpr float cosOf45Deg = 0.70710678118f; return dot(normalize(v0.weightedNormal),normalize(v1.weightedNormal)) > cosOf45Deg; diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index b0e4839673..d628377092 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -13,12 +13,12 @@ namespace nbl { namespace asset { -static bool operator<(uint32_t lhs, const CSmoothNormalGenerator::SSNGVertexData& rhs) +static bool operator<(uint32_t lhs, const CSmoothNormalGenerator::VertexData& rhs) { return lhs < rhs.hash; } -static bool operator<(const CSmoothNormalGenerator::SSNGVertexData& lhs, uint32_t rhs) +static bool operator<(const CSmoothNormalGenerator::VertexData& lhs, uint32_t rhs) { return lhs.hash < rhs; } diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index 9d112061d3..7956c27310 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -16,7 +16,7 @@ class CSmoothNormalGenerator CSmoothNormalGenerator() = delete; ~CSmoothNormalGenerator() = delete; - struct SSNGVertexData + struct VertexData { uint32_t index; //offset of the vertex into index buffer // TODO: check whether separating hash and position into its own vector or even rehash the position everytime we need will result in VertexHashGrid become faster. @@ -41,9 +41,9 @@ class CSmoothNormalGenerator }; - using VxCmpFunction = std::function; + using VxCmpFunction = std::function; - using VertexHashMap = CVertexHashGrid; + using VertexHashMap = CVertexHashGrid; struct Result { From f3d9a191dcf4fbdb56a53251c2c7a324064c49f6 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 18:58:45 +0700 Subject: [PATCH 43/52] Add inline specifier for DefaultWeldPredicate constructor and destructor --- include/nbl/asset/utils/CVertexWelder.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index f089f0e781..49344d6bbf 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -107,7 +107,7 @@ class CVertexWelder { public: - DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} + inline DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} inline bool init(const ICPUPolygonGeometry* polygon) override { @@ -131,7 +131,7 @@ class CVertexWelder { return true; } - ~DefaultWeldPredicate() override = default; + inline ~DefaultWeldPredicate() override = default; }; From f5d38f2e848b1c3fe47ded590d6eed072e0f3ae4 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 18:59:01 +0700 Subject: [PATCH 44/52] Remove outdated TODO --- include/nbl/asset/utils/CVertexWelder.h | 1 - src/nbl/asset/utils/CSmoothNormalGenerator.h | 1 - 2 files changed, 2 deletions(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 49344d6bbf..9a996b0716 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -42,7 +42,6 @@ class CVertexWelder { static inline bool isRealElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t channelCount, float epsilon) { - // TODO: Handle 16,32,64 bit float vectors once the pixel encode/decode functions get reimplemented in HLSL and decodeElement can actually benefit from that. hlsl::float64_t4 val1, val2; view.decodeElement(index1, val1); view.decodeElement(index2, val2); diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index 7956c27310..46146800b2 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -19,7 +19,6 @@ class CSmoothNormalGenerator struct VertexData { uint32_t index; //offset of the vertex into index buffer - // TODO: check whether separating hash and position into its own vector or even rehash the position everytime we need will result in VertexHashGrid become faster. uint32_t hash; hlsl::float32_t3 weightedNormal; hlsl::float32_t3 position; //position of the vertex in 3D space From cc399014458439cdad2863935a5e6ccc6b4b2dab Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 3 Nov 2025 18:59:17 +0700 Subject: [PATCH 45/52] Slight refactor for SSNGVertexData alias --- include/nbl/asset/utils/CPolygonGeometryManipulator.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index d5ffefd3fc..cc41bba7e9 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -233,9 +233,11 @@ class NBL_API2 CPolygonGeometryManipulator static core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo); + using SSNGVertexData = CSmoothNormalGenerator::VertexData; using SSNGVxCmpFunction = CSmoothNormalGenerator::VxCmpFunction; + static core::smart_refctd_ptr createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, bool enableWelding = false, float epsilon = 1.525e-5f, - SSNGVxCmpFunction vxcmp = [](const CSmoothNormalGenerator::VertexData& v0, const CSmoothNormalGenerator::VertexData& v1, const ICPUPolygonGeometry* buffer) + SSNGVxCmpFunction vxcmp = [](const SSNGVertexData& v0, const SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) { constexpr float cosOf45Deg = 0.70710678118f; return dot(normalize(v0.weightedNormal),normalize(v1.weightedNormal)) > cosOf45Deg; From 1257d4d08eff8e0e37c9206012b97f30d405fa10 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 6 Nov 2025 22:04:43 +0700 Subject: [PATCH 46/52] Add comment to implement a class template of CVertexHashGrid and CSmoothNormalGenerator that take position as template argument --- include/nbl/asset/utils/CVertexHashGrid.h | 1 + src/nbl/asset/utils/CSmoothNormalGenerator.h | 1 + 2 files changed, 2 insertions(+) diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h index b32ed1c294..ce094fabe4 100644 --- a/include/nbl/asset/utils/CVertexHashGrid.h +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -20,6 +20,7 @@ concept HashGridIteratorFn = HashGridVertexData && requires(Fn && fn, T const { std::invoke(std::forward(fn), cobj) } -> std::same_as; }; +// TODO: implement a class template that take position type(either float32_t3 or float64_t3 as template argument template class CVertexHashGrid { diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index 46146800b2..6ac4daf6c4 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -10,6 +10,7 @@ namespace nbl::asset { +// TODO: implement a class template that take position type(either float32_t3 or float64_t3 as template argument class CSmoothNormalGenerator { public: From d2503afd1607e94c64ef84857007767617050bec Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 6 Nov 2025 22:16:41 +0700 Subject: [PATCH 47/52] Add some assert in isAttributeValEqual and isAttributeDirEqual --- include/nbl/asset/utils/CVertexWelder.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 9a996b0716..cf0cc3d625 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -55,10 +55,10 @@ class CVertexWelder { static inline bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) { - if (!view) return true; + assert(view); + assert(view.composed.isFormatted()); + assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); const auto channelCount = getFormatChannelCount(view.composed.format); - // TODO: use memcmp to compare for integral equality - const auto byteSize = getTexelOrBlockBytesize(view.composed.format); switch (view.composed.rangeFormat) { case IGeometryBase::EAABBFormat::U64: @@ -78,7 +78,9 @@ class CVertexWelder { static inline bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) { - if (!view) return true; + assert(view); + assert(view.composed.isFormatted()); + assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); const auto channelCount = getFormatChannelCount(view.composed.format); switch (view.composed.rangeFormat) { From 1386fba08879b06dffadbd25aefa19b6c8f5eabe Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 6 Nov 2025 23:14:20 +0700 Subject: [PATCH 48/52] Cache channelCount and byte size in init --- include/nbl/asset/utils/CVertexWelder.h | 114 ++++++++++++++++++++---- 1 file changed, 95 insertions(+), 19 deletions(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index cf0cc3d625..b1594f6f3d 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -32,9 +32,14 @@ class CVertexWelder { { private: - static inline bool isIntegralElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2) + struct SDataViewContext + { + uint32_t channelCount : 3; + uint32_t byteSize: 29; + }; + + static inline bool isIntegralElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t byteSize) { - const auto byteSize = getTexelOrBlockBytesize(view.composed.format); const auto* basePtr = reinterpret_cast(view.getPointer()); const auto stride = view.composed.stride; return (memcmp(basePtr + (index1 * stride), basePtr + (index2 * stride), byteSize) == 0); @@ -53,12 +58,13 @@ class CVertexWelder { return true; } - static inline bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) + static inline bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, const SDataViewContext& context, uint32_t index1, uint32_t index2, float epsilon) { + if (context.byteSize == 0) return true; + assert(view); assert(view.composed.isFormatted()); assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); - const auto channelCount = getFormatChannelCount(view.composed.format); switch (view.composed.rangeFormat) { case IGeometryBase::EAABBFormat::U64: @@ -66,22 +72,23 @@ class CVertexWelder { case IGeometryBase::EAABBFormat::S64: case IGeometryBase::EAABBFormat::S32: { - return isIntegralElementEqual(view, index1, index2); + return isIntegralElementEqual(view, index1, index2, context.byteSize); } default: { - return isRealElementEqual(view, index1, index2, channelCount, epsilon); + return isRealElementEqual(view, index1, index2, context.channelCount, epsilon); } } return true; } - static inline bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) + static inline bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, const SDataViewContext& context, uint32_t index1, uint32_t index2, float epsilon) { + if (context.byteSize == 0) return true; + assert(view); assert(view.composed.isFormatted()); assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); - const auto channelCount = getFormatChannelCount(view.composed.format); switch (view.composed.rangeFormat) { case IGeometryBase::EAABBFormat::U64: @@ -89,12 +96,12 @@ class CVertexWelder { case IGeometryBase::EAABBFormat::S64: case IGeometryBase::EAABBFormat::S32: { - return isIntegralElementEqual(view, index1, index2); + return isIntegralElementEqual(view, index1, index2, context.byteSize); } default: { - if (channelCount != 3) - return isRealElementEqual(view, index1, index2, channelCount, epsilon); + if (context.channelCount != 3) + return isRealElementEqual(view, index1, index2, context.channelCount, epsilon); hlsl::float64_t4 val1, val2; view.decodeElement(index1, val1); @@ -106,28 +113,97 @@ class CVertexWelder { float m_epsilon; + SDataViewContext m_positionViewContext; + SDataViewContext m_normalViewContext; + + struct SJointViewContext + { + SDataViewContext indices; + SDataViewContext weights; + }; + core::vector m_jointViewContexts; + + core::vector m_auxAttributeViewContexts; + public: inline DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} inline bool init(const ICPUPolygonGeometry* polygon) override { - return polygon->valid(); + auto isViewFormatValid = [](const ICPUPolygonGeometry::SDataView& view) + { + return view.composed.isFormatted() && IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat; + }; + auto getViewContext = [](const ICPUPolygonGeometry::SDataView& view) -> SDataViewContext + { + if (!view) + { + return { + .channelCount = 0, + .byteSize = 0 + }; + } + return { + .channelCount = getFormatChannelCount(view.composed.format), + .byteSize = getTexelOrBlockBytesize(view.composed.format) + }; + }; + + if (!polygon->valid()) return false; + + const auto& positionView = polygon->getPositionView(); + if (IGeometryBase::getMatchingAABBFormat(positionView.composed.format) == positionView.composed.rangeFormat) return false; + m_positionViewContext = { + .channelCount = getFormatChannelCount(positionView.composed.format), + .byteSize = getTexelOrBlockBytesize(positionView.composed.format), + }; + + const auto& normalView = polygon->getNormalView(); + if (normalView && !isViewFormatValid(normalView)) return false; + m_normalViewContext = getViewContext(normalView); + + m_jointViewContexts.reserve(polygon->getJointWeightViews().size()); + for (const auto& jointWeightView : polygon->getJointWeightViews()) + { + if (jointWeightView.indices && !isViewFormatValid(jointWeightView.indices)) return false; + if (jointWeightView.weights && !isViewFormatValid(jointWeightView.weights)) return false; + m_jointViewContexts.push_back({ + .indices = getViewContext(jointWeightView.indices), + .weights = getViewContext(jointWeightView.weights), + }); + } + + m_auxAttributeViewContexts.reserve(polygon->getAuxAttributeViews().size()); + for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) + { + if (auxAttributeView && !isViewFormatValid(auxAttributeView)) return false; + m_auxAttributeViewContexts.push_back(getViewContext(auxAttributeView)); + } + } inline bool operator()(const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2) const override { - if (!isAttributeValEqual(polygon->getPositionView(), index1, index2, m_epsilon)) + if (!isAttributeValEqual(polygon->getPositionView(), m_positionViewContext, index1, index2, m_epsilon)) return false; - if (!isAttributeDirEqual(polygon->getNormalView(), index1, index2, m_epsilon)) + + const auto& normalView = polygon->getNormalView(); + if (!isAttributeDirEqual(normalView, m_normalViewContext, index1, index2, m_epsilon)) return false; - for (const auto& jointWeightView : polygon->getJointWeightViews()) + + for (uint64_t joint_i = 0; joint_i < polygon->getJointWeightViews().size(); joint_i++) { - if (!isAttributeValEqual(jointWeightView.indices, index1, index2, m_epsilon)) return false; - if (!isAttributeValEqual(jointWeightView.weights, index1, index2, m_epsilon)) return false; + const auto& jointWeightView = polygon->getJointWeightViews()[joint_i]; + if (!isAttributeValEqual(jointWeightView.indices, m_jointViewContexts[joint_i].indices, index1, index2, m_epsilon)) return false; + if (!isAttributeValEqual(jointWeightView.weights, m_jointViewContexts[joint_i].weights, index1, index2, m_epsilon)) return false; + } + + const auto& auxAttrViews = polygon->getAuxAttributeViews(); + for (uint64_t aux_i = 0; aux_i < auxAttrViews.size(); aux_i++) + { + if (!isAttributeValEqual(auxAttrViews[aux_i], m_auxAttributeViewContexts[aux_i], index1, index2, m_epsilon)) return false; } - for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) - if (!isAttributeValEqual(auxAttributeView, index1, index2, m_epsilon)) return false; return true; } From ac2ed542eea3659501e06cb9142bc4965c3568cf Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 6 Nov 2025 23:37:22 +0700 Subject: [PATCH 49/52] Fix weldVertices --- include/nbl/asset/utils/CVertexWelder.h | 34 +++++++++++++++---------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index b1594f6f3d..310eb214ab 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -221,7 +221,6 @@ class CVertexWelder { constexpr auto INVALID_INDEX = std::numeric_limits::max(); core::vector remappedVertexIndexes(vertexCount); - std::fill(remappedVertexIndexes.begin(), remappedVertexIndexes.end(), INVALID_INDEX); uint32_t maxRemappedIndex = 0; // iterate by index, so that we always use the smallest index when multiple vertexes can be welded together @@ -230,24 +229,31 @@ class CVertexWelder { hlsl::float32_t3 position; positionView.decodeElement(index, position); auto remappedVertexIndex = INVALID_INDEX; - bool foundVertex = false; as.forEachBroadphaseNeighborCandidates(position, [&](const typename AccelStructureT::vertex_data_t& candidate) { const auto neighborRemappedIndex = remappedVertexIndexes[candidate.index]; - if (index == candidate.index) { - foundVertex = true; + // make sure we can only map higher indices to lower indices to disallow loops + if (candidate.index Date: Thu, 6 Nov 2025 23:42:48 +0700 Subject: [PATCH 50/52] Small fixes in CVertexWelder --- include/nbl/asset/utils/CVertexWelder.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 310eb214ab..09feed738b 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -258,7 +258,7 @@ class CVertexWelder { } const auto& indexView = outPolygon->getIndexView(); - const auto remappedRangeFormat = (maxRemappedIndex - 1) < std::numeric_limits::max() ? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32; + const auto remappedRangeFormat = (maxRemappedIndex - 1) < static_cast(std::numeric_limits::max()) ? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32; auto createRemappedIndexView = [&](size_t indexCount) { const uint32_t indexSize = remappedRangeFormat == IGeometryBase::EAABBFormat::U16 ? sizeof(uint16_t) : sizeof(uint32_t); @@ -302,10 +302,10 @@ class CVertexWelder { auto* remappedIndexPtr = reinterpret_cast(remappedIndexView.getPointer()); for (uint32_t index_i = 0; index_i < polygon->getIndexCount(); index_i++) { - hlsl::vector index; - indexView.decodeElement>(index_i, index); - IndexT remappedIndex = remappedVertexIndexes[index.x]; - remappedIndexPtr[index_i] = remappedIndex; + hlsl::vector index; + indexView.decodeElement>(index_i, index); + const auto remappedIndex = remappedVertexIndexes[index.x]; + remappedIndexPtr[index_i] = remappedVertexIndexes[index.x]; if (remappedIndex == INVALID_INDEX) return false; } return true; @@ -343,7 +343,7 @@ class CVertexWelder { outPolygon->setIndexView(std::move(remappedIndexView)); } - CPolygonGeometryManipulator::recomputeContentHashes(outPolygon.get()); + CGeometryManipulator::recomputeContentHash(outPolygon->getIndexView()); return outPolygon; } }; From 43914336f6223776816ceaaae5af14daf493b13b Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Fri, 7 Nov 2025 10:48:50 +0100 Subject: [PATCH 51/52] fix compiler warning --- include/nbl/asset/utils/CVertexWelder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 09feed738b..9faf1b33a8 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -305,7 +305,7 @@ class CVertexWelder { hlsl::vector index; indexView.decodeElement>(index_i, index); const auto remappedIndex = remappedVertexIndexes[index.x]; - remappedIndexPtr[index_i] = remappedVertexIndexes[index.x]; + remappedIndexPtr[index_i] = static_cast(remappedIndex); if (remappedIndex == INVALID_INDEX) return false; } return true; From bd102e4f6f509dbb69926288976b5306de97055c Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Fri, 7 Nov 2025 10:55:07 +0100 Subject: [PATCH 52/52] Update CSmoothNormalGenerator.cpp Resolve https://github.com/Devsh-Graphics-Programming/Nabla/pull/941#discussion_r2465956214 --- src/nbl/asset/utils/CSmoothNormalGenerator.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index d628377092..8c03ad99b9 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -99,6 +99,10 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne { auto normal = processedVertex.weightedNormal; + // We perform double the work (since `vxcmp` must be commutative but not required to be associative) intentionally, + // because without guaranteed associativity we cannot partition the vertices into disjoint sets (we're not reconstructing OBJ-like + // smooth groups with this), so we can't have all vertices in a set just copy their normal from a "master vertex". + // For an example of why that is good, think of a cone or cylinder and why its good to have non-associative smoothing predicate. vertexHashMap.forEachBroadphaseNeighborCandidates(processedVertex.getPosition(), [&](const VertexHashMap::vertex_data_t& candidate) { if (processedVertex.index != candidate.index && compareVertexPosition(processedVertex.position, candidate.position, epsilon) && @@ -120,4 +124,4 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne } } -} \ No newline at end of file +}