From 6a74ab15c39406bcf0a0ae304104ddee47b8c2ac Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 23 Jul 2025 20:38:14 -0300 Subject: [PATCH 01/29] Checkpoint 1 is close --- 62_CAD/DrawResourcesFiller.cpp | 146 +++++++++++++++--- 62_CAD/DrawResourcesFiller.h | 27 +++- 62_CAD/Images.h | 6 +- 62_CAD/main.cpp | 26 +++- 62_CAD/scripts/tiled_grid.py | 266 +++++++++++++++++++++++++++++++++ 5 files changed, 443 insertions(+), 28 deletions(-) create mode 100644 62_CAD/scripts/tiled_grid.py diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index b40f6585c..5c2242547 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -631,7 +631,7 @@ bool DrawResourcesFiller::ensureMultipleStaticImagesAvailability(std::spangetLogicalDevice(); auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); @@ -639,12 +639,11 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( // Try inserting or updating the image usage in the cache. // If the image is already present, updates its semaphore value. auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; - CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + CachedImageRecord* cachedImageRecord = imagesCache->insert(manager.georeferencedImageParams.imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); // TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENED TYPE IGPUImage::SCreationParams imageCreationParams = {}; - ImageType georeferenceImageType; - determineGeoreferencedImageCreationParams(imageCreationParams, georeferenceImageType, params); + determineGeoreferencedImageCreationParams(imageCreationParams, manager); // imageParams = cpuImage->getCreationParameters(); imageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; @@ -671,11 +670,11 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( const auto cachedImageType = cachedImageRecord->type; // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus const auto currentParams = static_cast(imageCreationParams); - const bool needsRecreation = cachedImageType != georeferenceImageType || cachedParams != currentParams; + const bool needsRecreation = cachedImageType != manager.georeferencedImageParams.imageType || cachedParams != currentParams; if (needsRecreation) { // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena. - evictCallback(imageID, *cachedImageRecord); + evictCallback(manager.georeferencedImageParams.imageID, *cachedImageRecord); // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image *cachedImageRecord = CachedImageRecord(currentFrameIndex); @@ -705,17 +704,17 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) { // Attempt to create a GPU image and image view for this texture. - ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(imageID)); + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(manager.georeferencedImageParams.imageID)); if (allocResults.isValid()) { - cachedImageRecord->type = georeferenceImageType; + cachedImageRecord->type = manager.georeferencedImageParams.imageType; cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN cachedImageRecord->allocationOffset = allocResults.allocationOffset; cachedImageRecord->allocationSize = allocResults.allocationSize; cachedImageRecord->gpuImageView = allocResults.gpuImageView; - cachedImageRecord->staticCPUImage = nullptr; + cachedImageRecord->staticCPUImage = manager.georeferencedImageParams.geoReferencedImage; } else { @@ -743,7 +742,7 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( } // erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation - imagesCache->erase(imageID); + imagesCache->erase(manager.georeferencedImageParams.imageID); } } else @@ -1557,7 +1556,7 @@ bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intende std::vector afterCopyImageBarriers; afterCopyImageBarriers.reserve(streamedImageCopies.size()); - // Pipeline Barriers before imageCopy + // Pipeline Barriers after imageCopy for (auto& [imageID, imageCopies] : streamedImageCopies) { auto* imageRecord = imagesCache->peek(imageID); @@ -2461,30 +2460,35 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc return ret; } -void DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams) +void DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, StreamedImageManager& manager) { + auto& georeferencedImageParams = manager.georeferencedImageParams; // Decide whether the image can reside fully into memory rather than get streamed. // TODO: Improve logic, currently just a simple check to see if the full-screen image has more pixels that viewport or not // TODO: add criterial that the size of the full-res image shouldn't consume more than 30% of the total memory arena for images (if we allowed larger than viewport extents) const bool betterToResideFullyInMem = georeferencedImageParams.imageExtents.x * georeferencedImageParams.imageExtents.y <= georeferencedImageParams.viewportExtents.x * georeferencedImageParams.viewportExtents.y; if (betterToResideFullyInMem) - outImageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; + georeferencedImageParams.imageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; else - outImageType = ImageType::GEOREFERENCED_STREAMED; + georeferencedImageParams.imageType = ImageType::GEOREFERENCED_STREAMED; outImageParams.type = asset::IImage::ET_2D; outImageParams.samples = asset::IImage::ESCF_1_BIT; outImageParams.format = georeferencedImageParams.format; - if (outImageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) + if (georeferencedImageParams.imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) { outImageParams.extent = { georeferencedImageParams.imageExtents.x, georeferencedImageParams.imageExtents.y, 1u }; } else { - // TODO: Better Logic, area around the view, etc... - outImageParams.extent = { georeferencedImageParams.viewportExtents.x, georeferencedImageParams.viewportExtents.y, 1u }; + // Pad sides to multiple of tileSize. Even after rounding up, we might still need to add an extra tile to cover both sides. + const auto xExtent = core::roundUp(georeferencedImageParams.viewportExtents.x, manager.TileSize) + manager.TileSize; + const auto yExtent = core::roundUp(georeferencedImageParams.viewportExtents.y, manager.TileSize) + manager.TileSize; + outImageParams.extent = { xExtent, yExtent, 1u }; + manager.maxResidentTiles.x = xExtent / manager.TileSize; + manager.maxResidentTiles.y = yExtent / manager.TileSize; } @@ -2624,4 +2628,112 @@ void DrawResourcesFiller::flushDrawObjects() drawCalls.push_back(drawCall); drawObjectsFlushedToDrawCalls = resourcesCollection.drawObjects.getCount(); } +} + +DrawResourcesFiller::StreamedImageManager::StreamedImageManager(GeoreferencedImageParams&& _georeferencedImageParams) + : georeferencedImageParams(std::move(_georeferencedImageParams)) +{ + maxImageTileIndices = georeferencedImageParams.imageExtents / uint32_t2(TileSize, TileSize); + // If it fits perfectly along any dimension, we need one less tile with this scheme + maxImageTileIndices -= uint32_t2(maxImageTileIndices.x * TileSize == georeferencedImageParams.imageExtents.x, maxImageTileIndices.y * TileSize == georeferencedImageParams.imageExtents.y); + + // R^2 can be covered with a lattice of image tiles. Real tiles (those actually covered by the image) are indexed in the range [0, maxImageTileIndices.x] x [0, maxImageTileIndices.y], + // but part of the algorithm to figure out which tiles need to be resident for a draw involves figuring out the coordinates in this lattice of each of the viewport corners. + // To that end, we devise an algorithm that maps a point in worldspace to its coordinates in this tile lattice: + // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point + // 2. Transform this displacement vector into a displacement into the coordinates spanned by the basis {dirU, dirV}. Notice that these vectors are still in world units + // 3. Map world units to tile units. This scaling is generally nonuniform, since it depends on the ratio of pixels to world units per coordinate. + // The name of the `offsetCoBScaleMatrix` follows by what is computed at each step + + // 1. Displacement. The following matrix calculates the offset for an input point `p` with homogenous worldspace coordinates. + // By foregoing the homogenous coordinate we can keep only the vector part, that's why it's `2x3` and not `3x3` + float64_t2 topLeftWorld = georeferencedImageParams.worldspaceOBB.topLeft; + float64_t2x3 displacementMatrix(1., 0., topLeftWorld.x, 0., 1., topLeftWorld.y); + + // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to "image worldspan" coords has a quite nice expression + float64_t2 dirU = georeferencedImageParams.worldspaceOBB.dirU; + float64_t2 dirV = float32_t2(dirU.y, -dirU.x) * georeferencedImageParams.worldspaceOBB.aspectRatio; + float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); + float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); + float64_t2 firstRow = dirU / dirULengthSquared; + float64_t2 secondRow = dirV / dirVLengthSquared; + float64_t2x2 changeOfBasisMatrix(firstRow, secondRow); + + // 3. Scaling. The vector obtained by doing `CoB * displacement * p` is still in world units. Given that we know how many pixels the image spans (given by + // georeferencedImageParams.imageExtents) and how many world units it spans (given by (|dirU|, |dirV|) ) we can get a factor for the `pixel/world unit` ratio. + // Then we simply multiply that factor for another factor for the `tile / pixel` ratio to get our `tile / world unit` scaling factor. + float64_t dirULength = nbl::hlsl::sqrt(dirULengthSquared); + float64_t dirVLength = nbl::hlsl::sqrt(dirVLengthSquared); + float64_t2 scaleFactors = (1. / TileSize) * (float64_t2(georeferencedImageParams.imageExtents) / float64_t2(dirULength, dirVLength)); + float64_t2x2 scaleMatrix(scaleFactors.x, 0., 0., scaleFactors.y); + + // Put them all together + offsetCoBScaleMatrix = nbl::hlsl::mul(scaleMatrix, nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix)); +} + +core::vector DrawResourcesFiller::StreamedImageManager::generateTileUploadData(const float64_t3x3& NDCToWorld) +{ + // Using Vulkan NDC, the viewport has coordinates in the range [-1, -1] x [1,1]. First we get the world coordinates of the viewport corners, in homogenous + float64_t3 topLeftNDCH(-1., -1., 1.); + float64_t3 topRightNDCH(1., -1., 1.); + float64_t3 bottomLeftNDCH(-1., 1., 1.); + float64_t3 bottomRightNDCH(1., 1., 1.); + + float64_t3 topLeftWorldH = nbl::hlsl::mul(NDCToWorld, topLeftNDCH); + float64_t3 topRightWorldH = nbl::hlsl::mul(NDCToWorld, topRightNDCH); + float64_t3 bottomLeftWorldH = nbl::hlsl::mul(NDCToWorld, bottomLeftNDCH); + float64_t3 bottomRightWorldH = nbl::hlsl::mul(NDCToWorld, bottomRightNDCH); + + // We can use `offsetCoBScaleMatrix` to get tile lattice coordinates for each of these points + float64_t2 topLeftTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, topLeftWorldH); + float64_t2 topRightTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, topRightWorldH); + float64_t2 bottomLeftTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, bottomLeftWorldH); + float64_t2 bottomRightTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, bottomRightWorldH); + + // Get the min and max of each lattice coordinate + float64_t2 minTop = nbl::hlsl::min(topLeftTileLattice, topRightTileLattice); + float64_t2 minBottom = nbl::hlsl::min(bottomLeftTileLattice, bottomRightTileLattice); + float64_t2 minAll = nbl::hlsl::min(minTop, minBottom); + + float64_t2 maxTop = nbl::hlsl::max(topLeftTileLattice, topRightTileLattice); + float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice); + float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom); + + // Floor mins and ceil maxes + int32_t2 minAllFloored = nbl::hlsl::floor(minAll); + int32_t2 maxAllCeiled = nbl::hlsl::ceil(maxAll); + + // Clamp them to reasonable tile indices + uint32_t2 minEffective = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(maxImageTileIndices)); + uint32_t2 maxEffective = nbl::hlsl::clamp(maxAllCeiled, int32_t2(0, 0), int32_t2(maxImageTileIndices)); + + // Now we have the indices of the tiles we want to upload, so create the vector of `StreamedImageCopies` - 1 per tile. + core::vector retVal; + retVal.reserve((maxEffective.x - minEffective.x + 1) * (maxEffective.y - minEffective.y + 1)); + + // Assuming a 1 pixel per block format for simplicity rn + auto bytesPerPixel = getTexelOrBlockBytesize(georeferencedImageParams.format); + auto bytesPerSide = bytesPerPixel * TileSize; + + for (uint32_t tileX = minEffective.x; tileX <= maxEffective.x; tileX++) + { + for (uint32_t tileY = minEffective.y; tileY <= maxEffective.y; tileY++) + { + asset::IImage::SBufferCopy bufCopy; + bufCopy.bufferOffset = (tileY * maxImageTileIndices.x * bytesPerSide + tileX) * bytesPerSide; + bufCopy.bufferRowLength = georeferencedImageParams.imageExtents.x; + bufCopy.bufferImageHeight = 0; + bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + bufCopy.imageSubresource.mipLevel = 0u; + bufCopy.imageSubresource.baseArrayLayer = 0u; + bufCopy.imageSubresource.layerCount = 1u; + bufCopy.imageOffset = { 0u,0u,0u }; + bufCopy.imageExtent.width = TileSize; + bufCopy.imageExtent.height = TileSize; + bufCopy.imageExtent.depth = 1; + + retVal.emplace_back(georeferencedImageParams.format, georeferencedImageParams.geoReferencedImage->getBuffer(), std::move(bufCopy)); + } + } + return retVal; } \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 1a74338e7..d7d38e9f0 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -120,6 +120,27 @@ struct DrawResourcesFiller geometryInfo.getAlignedStorageSize(); } }; + + // @brief Used to load tiles into VRAM, keep track of loaded tiles, determine how they get sampled etc. + struct StreamedImageManager + { + friend class DrawResourcesFiller; + constexpr static uint32_t TileSize = 128u; + + StreamedImageManager(GeoreferencedImageParams&& _georeferencedImageParams); + + core::vector generateTileUploadData(const float64_t3x3& worldToNDC); + + // This and the logic they're in will likely change later with Toroidal updating + protected: + GeoreferencedImageParams georeferencedImageParams; + uint32_t2 maxResidentTiles = {}; + private: + uint32_t2 minLoadedTileIndices = {}; + uint32_t2 maxImageTileIndices = {}; + // See constructor for info on this one + float64_t2x3 offsetCoBScaleMatrix = {}; + }; DrawResourcesFiller(); @@ -343,7 +364,7 @@ struct DrawResourcesFiller * @return true if the image was successfully cached and is ready for use; false if allocation failed. * [TODO]: should be internal protected member function. */ - bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); + bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(StreamedImageManager& manager, SIntendedSubmitInfo& intendedNextSubmit); // [TODO]: should be internal protected member function. bool queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy); @@ -663,9 +684,9 @@ struct DrawResourcesFiller * * @param[out] outImageParams Structure to be filled with image creation parameters (format, size, etc.). * @param[out] outImageType Indicates whether the image should be fully resident or streamed. - * @param[in] georeferencedImageParams Parameters describing the full image extents, viewport extents, and format. + * @param[in] manager Manager for the georeferenced image */ - void determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams); + void determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, StreamedImageManager& manager); /** * @brief Used to implement both `drawHatch` and `drawFixedGeometryHatch` without exposing the transformation type parameter diff --git a/62_CAD/Images.h b/62_CAD/Images.h index a341eadd6..8453c93ab 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -28,6 +28,10 @@ struct GeoreferencedImageParams uint32_t2 imageExtents = {}; uint32_t2 viewportExtents = {}; asset::E_FORMAT format = {}; + ImageType imageType; + image_id imageID; + // For now it's going to be fully resident in memory, later on it's probably going to be a streamer class most likely. + core::smart_refctd_ptr geoReferencedImage; // TODO: Need to add other stuff later. }; @@ -205,7 +209,7 @@ class ImagesCache : public core::ResizableLRUCache struct StreamedImageCopy { asset::E_FORMAT srcFormat; - core::smart_refctd_ptr srcBuffer; // Make it 'std::future' later? + ICPUBuffer* srcBuffer; // Make it 'std::future' later? asset::IImage::SBufferCopy region; }; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 5cb4082bd..f9ebb83cb 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -61,6 +61,7 @@ enum class ExampleMode CASE_9, // DTM CASE_BUG, // Bug Repro, after fix, rename to CASE_10 and comment should be: testing fixed geometry and emulated fp64 corner cases CASE_11, // grid DTM + CASE_12, // Georeferenced streamed images CASE_COUNT }; @@ -77,7 +78,8 @@ constexpr std::array cameraExtents = 600.0, // CASE_8 600.0, // CASE_9 10.0, // CASE_BUG - 1000.0 // CASE_11 + 1000.0, // CASE_11 + 10.0 // CASE_12 }; constexpr ExampleMode mode = ExampleMode::CASE_11; @@ -3109,12 +3111,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu //printf("\n"); } - GeoreferencedImageParams geoRefParams = {}; - geoRefParams.format = asset::EF_R8G8B8A8_SRGB; - geoRefParams.imageExtents = uint32_t2 (2048, 2048); - geoRefParams.viewportExtents = (m_realFrameIx <= 5u) ? uint32_t2(1280, 720) : uint32_t2(3840, 2160); // to test trigerring resize/recreation - // drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(6996, geoRefParams, intendedNextSubmit); - LineStyleInfo lineStyle = { .color = float32_t4(1.0f, 0.1f, 0.1f, 0.9f), @@ -3698,6 +3694,22 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } #endif } + else if (mode == ExampleMode::CASE_12) + { + for (uint32_t i = 0; i < sampleImages.size(); ++i) + { + uint64_t imageID = i * 69ull; // it can be hash or something of the file path the image was loaded from + //printf(std::format("\n Image {} \n", i).c_str()); + drawResourcesFiller.ensureStaticImageAvailability({ imageID, sampleImages[i] }, intendedNextSubmit); + drawResourcesFiller.addImageObject(imageID, { .topLeft = { 0.0 + (i) * 3.0, 0.0 }, .dirU = { 3.0 , 0.0 }, .aspectRatio = 1.0 }, intendedNextSubmit); + //printf("\n"); + } + + GeoreferencedImageParams geoRefParams = {}; + geoRefParams.format = asset::EF_R8G8B8A8_SRGB; + geoRefParams.imageExtents = uint32_t2(2048, 2048); + // drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(6996, geoRefParams, intendedNextSubmit); + } } double getScreenToWorldRatio(const float64_t3x3& viewProjectionMatrix, uint32_t2 windowSize) diff --git a/62_CAD/scripts/tiled_grid.py b/62_CAD/scripts/tiled_grid.py new file mode 100644 index 000000000..737c3463e --- /dev/null +++ b/62_CAD/scripts/tiled_grid.py @@ -0,0 +1,266 @@ +from PIL import Image, ImageDraw, ImageFont +import numpy as np +import os +import OpenImageIO as oiio + + + +def create_single_tile(tile_size, color, x_coord, y_coord, font_path=None): + """ + Creates a single square tile image with a given color and two lines of centered text. + + Args: + tile_size (int): The sidelength of the square tile in pixels. + color (tuple): A tuple of three floats (R, G, B) representing the color (0.0-1.0). + x_coord (int): The X coordinate to display on the tile. + y_coord (int): The Y coordinate to display on the tile. + font_path (str, optional): The path to a TrueType font file (.ttf). + If None, a default PIL font will be used. + Returns: + PIL.Image.Image: The created tile image with text. + """ + # Convert float color (0.0-1.0) to 8-bit integer color (0-255) + int_color = tuple(int(max(0, min(1, c)) * 255) for c in color) # Ensure color components are clamped + + img = Image.new('RGB', (tile_size, tile_size), int_color) + draw = ImageDraw.Draw(img) + + text_line1 = f"x = {x_coord}" + text_line2 = f"y = {y_coord}" + + text_fill_color = (255, 255, 255) + + # --- Dynamic Font Size Adjustment --- + # Start with a relatively large font size and shrink if needed + font_size = int(tile_size * 0.25) # Initial guess for font size + max_font_size = int(tile_size * 0.25) # Don't exceed this + + font = None + max_iterations = 100 # Prevent infinite loops in font size reduction + + for _ in range(max_iterations): + current_font_path = font_path + current_font_size = max(1, font_size) # Ensure font size is at least 1 + + try: + if current_font_path and os.path.exists(current_font_path): + font = ImageFont.truetype(current_font_path, current_font_size) + else: + # Fallback to default font (size argument might not always work perfectly) + font = ImageFont.load_default() + # For default font, try to scale if load_default(size=...) is supported and works + try: + scaled_font = ImageFont.load_default(size=current_font_size) + if draw.textbbox((0, 0), text_line1, font=scaled_font)[2] > 0: # Check if usable + font = scaled_font + except Exception: + pass # Stick with original default font + + if font is None: # Last resort if no font could be loaded + font = ImageFont.load_default() + + # Measure text dimensions + bbox1 = draw.textbbox((0, 0), text_line1, font=font) + text_width1 = bbox1[2] - bbox1[0] + text_height1 = bbox1[3] - bbox1[1] + + bbox2 = draw.textbbox((0, 0), text_line2, font=font) + text_width2 = bbox2[2] - bbox2[0] + text_height2 = bbox2[3] - bbox2[1] + + # Calculate total height needed for both lines plus some padding + # Let's assume a small gap between lines (e.g., 0.1 * text_height) + line_gap = int(text_height1 * 0.2) # 20% of line height + total_text_height = text_height1 + text_height2 + line_gap + + # Check if text fits vertically and horizontally + if (total_text_height < tile_size * 0.9) and \ + (text_width1 < tile_size * 0.9) and \ + (text_width2 < tile_size * 0.9): + break # Font size is good, break out of loop + else: + font_size -= 1 # Reduce font size + if font_size <= 0: # Prevent infinite loop if text can never fit + font_size = 1 # Smallest possible font size + break + + except Exception as e: + # Handle cases where font loading or textbbox fails + print(f"Error during font sizing: {e}. Reducing font size and retrying.") + font_size -= 1 + if font_size <= 0: + font_size = 1 + break # Cannot make font smaller, stop + + # Final check: if font_size became 0 or less, ensure it's at least 1 + if font_size <= 0: + font_size = 1 + # Reload font with minimum size if needed + if font_path and os.path.exists(font_path): + font = ImageFont.truetype(font_path, font_size) + else: + font = ImageFont.load_default() + try: + scaled_font = ImageFont.load_default(size=font_size) + if draw.textbbox((0, 0), text_line1, font=scaled_font)[2] > 0: + font = scaled_font + except Exception: + pass + + + # Re-measure with final font size to ensure accurate positioning + bbox1 = draw.textbbox((0, 0), text_line1, font=font) + text_width1 = bbox1[2] - bbox1[0] + text_height1 = bbox1[3] - bbox1[1] + + bbox2 = draw.textbbox((0, 0), text_line2, font=font) + text_width2 = bbox2[2] - bbox2[0] + text_height2 = bbox2[3] - bbox2[1] + + # Calculate positions for centering + # Line 1: centered horizontally, midpoint at 1/3 tile height + x1 = (tile_size - text_width1) / 2 + y1 = (tile_size / 3) - (text_height1 / 2) + + # Line 2: centered horizontally, midpoint at 2/3 tile height + x2 = (tile_size - text_width2) / 2 + y2 = (tile_size * 2 / 3) - (text_height2 / 2) + + # Draw the text + draw.text((x1, y1), text_line1, fill=text_fill_color, font=font) + draw.text((x2, y2), text_line2, fill=text_fill_color, font=font) + + return img + +def generate_interpolated_grid_image(tile_size, count, font_path=None): + """ + Generates a large image composed of 'count' x 'count' tiles, + with colors bilinearly interpolated from corners and text indicating tile index. + + Args: + tile_size (int): The sidelength of each individual square tile in pixels. + count (int): The number of tiles per side of the large grid (e.g., if count=3, + it's a 3x3 grid of tiles). + font_path (str, optional): Path to a TrueType font file for the tile text. + If None, a default PIL font will be used. + + Returns: + PIL.Image.Image: The generated large grid image. + """ + if count <= 0: + raise ValueError("Count must be a positive integer.") + + total_image_size = count * tile_size + main_img = Image.new('RGB', (total_image_size, total_image_size)) + + # Corner colors (R, G, B) as floats (0.0-1.0) + corner_colors = { + "top_left": (1.0, 0.0, 0.0), # Red + "top_right": (1.0, 0.0, 1.0), # Purple + "bottom_left": (0.0, 1.0, 0.0), # Green + "bottom_right": (0.0, 0.0, 1.0) # Blue + } + + # Handle the edge case where count is 1 + if count == 1: + # If count is 1, there's only one tile, which is the top-left corner + tile_color = corner_colors["top_left"] + tile_image = create_single_tile(tile_size, tile_color, 0, 0, font_path=font_path) + main_img.paste(tile_image, (0, 0)) + return main_img + + for y_tile in range(count): + for x_tile in range(count): + # Calculate normalized coordinates (u, v) for interpolation + # We divide by (count - 1) to ensure 0 and 1 values at the edges + u = x_tile / (count - 1) + v = y_tile / (count - 1) + + # Apply the simplified bilinear interpolation formulas + r_component = 1 - v + g_component = v * (1 - u) + b_component = u + + # Clamp components to be within 0.0 and 1.0 (due to potential floating point inaccuracies) + current_color = ( + max(0.0, min(1.0, r_component)), + max(0.0, min(1.0, g_component)), + max(0.0, min(1.0, b_component)) + ) + + # Create the individual tile + tile_image = create_single_tile(tile_size, current_color, x_tile, y_tile, font_path=font_path) + + # Paste the tile onto the main image + paste_x = x_tile * tile_size + paste_y = y_tile * tile_size + main_img.paste(tile_image, (paste_x, paste_y)) + + return main_img + + + + +import argparse +parser = argparse.ArgumentParser(description="Process two optional named parameters.") +parser.add_argument('--ts', type=int, default=128, help='Tile Size') +parser.add_argument('--gs', type=int, default=128, help='Grid Size') + +# Parse the arguments +args = parser.parse_args() + + +# --- Configuration --- +tile_sidelength = args.ts # Size of each individual tile in pixels +grid_count = args.gs # Number of tiles per side (e.g., 15 means 15x15 grid) + +# Path to a font file (adjust this for your system) +# On Windows, you can typically use 'C:/Windows/Fonts/arial.ttf' or similar +# You might need to find a suitable font on your system. +# For testing, you can use None to let PIL use its default font. +# If a specific font path is provided and doesn't exist, it will fall back to default. +windows_font_path = "C:/Windows/Fonts/arial.ttf" # Example path for Windows +# If Arial is not found, try Times New Roman: +# windows_font_path = "C:/Windows/Fonts/times.ttf" + +font_to_use = None +if os.name == 'nt': # Check if OS is Windows + if os.path.exists(windows_font_path): + font_to_use = windows_font_path + print(f"Using font: {windows_font_path}") + else: + print(f"Warning: Windows font not found at '{windows_font_path}'. Using default PIL font.") +else: # Assume Linux/macOS for other OS types + # Common Linux/macOS font paths (adjust as needed) + linux_font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" + mac_font_path = "/Library/Fonts/Arial.ttf" + if os.path.exists(linux_font_path): + font_to_use = linux_font_path + print(f"Using font: {linux_font_path}") + elif os.path.exists(mac_font_path): + font_to_use = mac_font_path + print(f"Using font: {mac_font_path}") + else: + print("Warning: No common Linux/macOS font found. Using default PIL font.") + + +# --- Generate and save the image --- +print(f"Generating a {grid_count}x{grid_count} grid of tiles, each {tile_sidelength}x{tile_sidelength} pixels.") +print(f"Total image size will be {grid_count * tile_sidelength}x{grid_count * tile_sidelength} pixels.") + +try: + final_image = generate_interpolated_grid_image(tile_sidelength, grid_count, font_path=font_to_use) + output_filename = "../../media/tiled_grid.exr" + np_img = np.array(final_image).astype(np.float32) / 255.0 # Normalize for EXR + spec = oiio.ImageSpec(final_image.width, final_image.height, 3, oiio.TypeDesc("float")) + out = oiio.ImageOutput.create(output_filename) + out.open(output_filename, spec) + out.write_image(np_img.reshape(-1)) # Flatten for OIIO’s expected input + out.close() + + print(f"Successfully created '{output_filename}'") + +except ValueError as e: + print(f"Error: {e}") +except Exception as e: + print(f"An unexpected error occurred: {e}") \ No newline at end of file From e523868e11ccf0747184413748b160854ecaa15b Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 23 Jul 2025 23:23:33 -0300 Subject: [PATCH 02/29] Off by one error fix --- 62_CAD/DrawResourcesFiller.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 5c2242547..4c259220f 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -2713,14 +2713,14 @@ core::vector DrawResourcesFiller::StreamedImageManager::gener // Assuming a 1 pixel per block format for simplicity rn auto bytesPerPixel = getTexelOrBlockBytesize(georeferencedImageParams.format); - auto bytesPerSide = bytesPerPixel * TileSize; + size_t bytesPerSide = bytesPerPixel * TileSize; for (uint32_t tileX = minEffective.x; tileX <= maxEffective.x; tileX++) { for (uint32_t tileY = minEffective.y; tileY <= maxEffective.y; tileY++) { asset::IImage::SBufferCopy bufCopy; - bufCopy.bufferOffset = (tileY * maxImageTileIndices.x * bytesPerSide + tileX) * bytesPerSide; + bufCopy.bufferOffset = (tileY * (maxImageTileIndices.x + 1) * bytesPerSide + tileX) * bytesPerSide; bufCopy.bufferRowLength = georeferencedImageParams.imageExtents.x; bufCopy.bufferImageHeight = 0; bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; From a54f6c6b90a3d893bf6341fa3f6fcb5e2f7c7375 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 23 Jul 2025 23:26:45 -0300 Subject: [PATCH 03/29] Fix tile offsets for upload --- 62_CAD/DrawResourcesFiller.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 4c259220f..1fbfe1bff 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -2727,7 +2727,7 @@ core::vector DrawResourcesFiller::StreamedImageManager::gener bufCopy.imageSubresource.mipLevel = 0u; bufCopy.imageSubresource.baseArrayLayer = 0u; bufCopy.imageSubresource.layerCount = 1u; - bufCopy.imageOffset = { 0u,0u,0u }; + bufCopy.imageOffset = { tileX * TileSize, tileY * TileSize, 0u }; bufCopy.imageExtent.width = TileSize; bufCopy.imageExtent.height = TileSize; bufCopy.imageExtent.depth = 1; From c3f7d04234e149b64d3bfa8cc49664a99bc38fb8 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 28 Jul 2025 00:28:21 -0300 Subject: [PATCH 04/29] Skeleton done but currently bugged, some byte offset is wrong (related to VkBufferImageCopy used to upload tiles) --- 62_CAD/DrawResourcesFiller.cpp | 120 ++++++++++++++++++++------------- 62_CAD/DrawResourcesFiller.h | 17 ++++- 62_CAD/Images.h | 1 - 62_CAD/main.cpp | 33 +++++---- 4 files changed, 108 insertions(+), 63 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 1fbfe1bff..7ec6d2145 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -641,7 +641,7 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; CachedImageRecord* cachedImageRecord = imagesCache->insert(manager.georeferencedImageParams.imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); - // TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENED TYPE + // TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENCED TYPE IGPUImage::SCreationParams imageCreationParams = {}; determineGeoreferencedImageCreationParams(imageCreationParams, manager); @@ -670,7 +670,7 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( const auto cachedImageType = cachedImageRecord->type; // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus const auto currentParams = static_cast(imageCreationParams); - const bool needsRecreation = cachedImageType != manager.georeferencedImageParams.imageType || cachedParams != currentParams; + const bool needsRecreation = cachedImageType != manager.imageType || cachedParams != currentParams; if (needsRecreation) { // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena. @@ -708,7 +708,7 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( if (allocResults.isValid()) { - cachedImageRecord->type = manager.georeferencedImageParams.imageType; + cachedImageRecord->type = manager.imageType; cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN cachedImageRecord->allocationOffset = allocResults.allocationOffset; @@ -866,7 +866,7 @@ void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundin endMainObject(); } -void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::addGeoreferencedImage(StreamedImageManager& manager, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit) { beginMainObject(MainObjectType::STREAMED_IMAGE); @@ -878,11 +878,21 @@ void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const Georefer return; } + // Generate upload data + auto uploadData = manager.generateTileUploadData(NDCToWorld); + + // Queue image uploads - if necessary + if (manager.imageType == ImageType::GEOREFERENCED_STREAMED) + { + for (const auto& imageCopy : uploadData.tiles) + queueGeoreferencedImageCopy_Internal(manager.georeferencedImageParams.imageID, imageCopy); + } + GeoreferencedImageInfo info = {}; - info.topLeft = params.worldspaceOBB.topLeft; - info.dirU = params.worldspaceOBB.dirU; - info.aspectRatio = params.worldspaceOBB.aspectRatio; - info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + info.topLeft = uploadData.worldspaceOBB.topLeft; + info.dirU = uploadData.worldspaceOBB.dirU; + info.aspectRatio = uploadData.worldspaceOBB.aspectRatio; + info.textureID = getImageIndexFromID(manager.georeferencedImageParams.imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory if (!addGeoreferencedImageInfo_Internal(info, mainObjIdx)) { // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects @@ -1369,7 +1379,7 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN std::vector nonResidentImageRecords; for (auto& [id, record] : imagesCache) { - if (record.staticCPUImage && record.type == ImageType::STATIC && record.state < ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA) + if (record.staticCPUImage && (record.type == ImageType::STATIC || record.type == ImageType::GEOREFERENCED_FULL_RESOLUTION) && record.state < ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA) nonResidentImageRecords.push_back(&record); } @@ -2469,15 +2479,15 @@ void DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset:: const bool betterToResideFullyInMem = georeferencedImageParams.imageExtents.x * georeferencedImageParams.imageExtents.y <= georeferencedImageParams.viewportExtents.x * georeferencedImageParams.viewportExtents.y; if (betterToResideFullyInMem) - georeferencedImageParams.imageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; + manager.imageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; else - georeferencedImageParams.imageType = ImageType::GEOREFERENCED_STREAMED; + manager.imageType = ImageType::GEOREFERENCED_STREAMED; outImageParams.type = asset::IImage::ET_2D; outImageParams.samples = asset::IImage::ESCF_1_BIT; outImageParams.format = georeferencedImageParams.format; - if (georeferencedImageParams.imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) + if (manager.imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) { outImageParams.extent = { georeferencedImageParams.imageExtents.x, georeferencedImageParams.imageExtents.y, 1u }; } @@ -2648,7 +2658,7 @@ DrawResourcesFiller::StreamedImageManager::StreamedImageManager(GeoreferencedIma // 1. Displacement. The following matrix calculates the offset for an input point `p` with homogenous worldspace coordinates. // By foregoing the homogenous coordinate we can keep only the vector part, that's why it's `2x3` and not `3x3` float64_t2 topLeftWorld = georeferencedImageParams.worldspaceOBB.topLeft; - float64_t2x3 displacementMatrix(1., 0., topLeftWorld.x, 0., 1., topLeftWorld.y); + float64_t2x3 displacementMatrix(1., 0., - topLeftWorld.x, 0., 1., - topLeftWorld.y); // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to "image worldspan" coords has a quite nice expression float64_t2 dirU = georeferencedImageParams.worldspaceOBB.dirU; @@ -2669,55 +2679,68 @@ DrawResourcesFiller::StreamedImageManager::StreamedImageManager(GeoreferencedIma // Put them all together offsetCoBScaleMatrix = nbl::hlsl::mul(scaleMatrix, nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix)); + + // Create a "sliding window OBB" that we use to offset tiles + fromTopLeftOBB = georeferencedImageParams.worldspaceOBB; + fromTopLeftOBB.dirU *= float32_t(TileSize * maxResidentTiles.x) / georeferencedImageParams.imageExtents.x; + // I think aspect ratio can stay the same since worldspace OBB and imageExtents should have same aspect ratio + // If the image can be stretched/sheared and not simply rotated, then the aspect ratio *might* have to change, although I think that's covered by + // the OBB's aspect ratio } -core::vector DrawResourcesFiller::StreamedImageManager::generateTileUploadData(const float64_t3x3& NDCToWorld) +DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::StreamedImageManager::generateTileUploadData(const float64_t3x3& NDCToWorld) { + if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) + return TileUploadData{ {}, georeferencedImageParams.worldspaceOBB }; + + // Following need only be done if image is actually streamed + // Using Vulkan NDC, the viewport has coordinates in the range [-1, -1] x [1,1]. First we get the world coordinates of the viewport corners, in homogenous - float64_t3 topLeftNDCH(-1., -1., 1.); - float64_t3 topRightNDCH(1., -1., 1.); - float64_t3 bottomLeftNDCH(-1., 1., 1.); - float64_t3 bottomRightNDCH(1., 1., 1.); + const float64_t3 topLeftNDCH(-1., -1., 1.); + const float64_t3 topRightNDCH(1., -1., 1.); + const float64_t3 bottomLeftNDCH(-1., 1., 1.); + const float64_t3 bottomRightNDCH(1., 1., 1.); - float64_t3 topLeftWorldH = nbl::hlsl::mul(NDCToWorld, topLeftNDCH); - float64_t3 topRightWorldH = nbl::hlsl::mul(NDCToWorld, topRightNDCH); - float64_t3 bottomLeftWorldH = nbl::hlsl::mul(NDCToWorld, bottomLeftNDCH); - float64_t3 bottomRightWorldH = nbl::hlsl::mul(NDCToWorld, bottomRightNDCH); + const float64_t3 topLeftWorldH = nbl::hlsl::mul(NDCToWorld, topLeftNDCH); + const float64_t3 topRightWorldH = nbl::hlsl::mul(NDCToWorld, topRightNDCH); + const float64_t3 bottomLeftWorldH = nbl::hlsl::mul(NDCToWorld, bottomLeftNDCH); + const float64_t3 bottomRightWorldH = nbl::hlsl::mul(NDCToWorld, bottomRightNDCH); // We can use `offsetCoBScaleMatrix` to get tile lattice coordinates for each of these points - float64_t2 topLeftTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, topLeftWorldH); - float64_t2 topRightTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, topRightWorldH); - float64_t2 bottomLeftTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, bottomLeftWorldH); - float64_t2 bottomRightTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, bottomRightWorldH); + const float64_t2 topLeftTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, topLeftWorldH); + const float64_t2 topRightTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, topRightWorldH); + const float64_t2 bottomLeftTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, bottomLeftWorldH); + const float64_t2 bottomRightTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, bottomRightWorldH); // Get the min and max of each lattice coordinate - float64_t2 minTop = nbl::hlsl::min(topLeftTileLattice, topRightTileLattice); - float64_t2 minBottom = nbl::hlsl::min(bottomLeftTileLattice, bottomRightTileLattice); - float64_t2 minAll = nbl::hlsl::min(minTop, minBottom); + const float64_t2 minTop = nbl::hlsl::min(topLeftTileLattice, topRightTileLattice); + const float64_t2 minBottom = nbl::hlsl::min(bottomLeftTileLattice, bottomRightTileLattice); + const float64_t2 minAll = nbl::hlsl::min(minTop, minBottom); - float64_t2 maxTop = nbl::hlsl::max(topLeftTileLattice, topRightTileLattice); - float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice); - float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom); + const float64_t2 maxTop = nbl::hlsl::max(topLeftTileLattice, topRightTileLattice); + const float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice); + const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom); // Floor mins and ceil maxes - int32_t2 minAllFloored = nbl::hlsl::floor(minAll); - int32_t2 maxAllCeiled = nbl::hlsl::ceil(maxAll); + const int32_t2 minAllFloored = nbl::hlsl::floor(minAll); + const int32_t2 maxAllCeiled = nbl::hlsl::ceil(maxAll); // Clamp them to reasonable tile indices - uint32_t2 minEffective = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(maxImageTileIndices)); - uint32_t2 maxEffective = nbl::hlsl::clamp(maxAllCeiled, int32_t2(0, 0), int32_t2(maxImageTileIndices)); + minLoadedTileIndices = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(maxImageTileIndices)); + maxLoadedTileIndices = nbl::hlsl::clamp(maxAllCeiled, int32_t2(0, 0), int32_t2(maxImageTileIndices)); // Now we have the indices of the tiles we want to upload, so create the vector of `StreamedImageCopies` - 1 per tile. - core::vector retVal; - retVal.reserve((maxEffective.x - minEffective.x + 1) * (maxEffective.y - minEffective.y + 1)); + core::vector tiles; + tiles.reserve((maxLoadedTileIndices.x - minLoadedTileIndices.x + 1) * (maxLoadedTileIndices.y - minLoadedTileIndices.y + 1)); - // Assuming a 1 pixel per block format for simplicity rn + // Assuming a 1 pixel per block format - otherwise math here gets a bit trickier auto bytesPerPixel = getTexelOrBlockBytesize(georeferencedImageParams.format); - size_t bytesPerSide = bytesPerPixel * TileSize; + const size_t bytesPerSide = bytesPerPixel * TileSize; - for (uint32_t tileX = minEffective.x; tileX <= maxEffective.x; tileX++) + // Dangerous code - assumes image can be perfectly covered with tiles. Otherwise will need to handle edge cases + for (uint32_t tileX = minLoadedTileIndices.x; tileX <= maxLoadedTileIndices.x; tileX++) { - for (uint32_t tileY = minEffective.y; tileY <= maxEffective.y; tileY++) + for (uint32_t tileY = minLoadedTileIndices.y; tileY <= maxLoadedTileIndices.y; tileY++) { asset::IImage::SBufferCopy bufCopy; bufCopy.bufferOffset = (tileY * (maxImageTileIndices.x + 1) * bytesPerSide + tileX) * bytesPerSide; @@ -2727,13 +2750,20 @@ core::vector DrawResourcesFiller::StreamedImageManager::gener bufCopy.imageSubresource.mipLevel = 0u; bufCopy.imageSubresource.baseArrayLayer = 0u; bufCopy.imageSubresource.layerCount = 1u; - bufCopy.imageOffset = { tileX * TileSize, tileY * TileSize, 0u }; + bufCopy.imageOffset = { (tileX - minLoadedTileIndices.x) * TileSize, (tileY - minLoadedTileIndices.y) * TileSize, 0u }; bufCopy.imageExtent.width = TileSize; bufCopy.imageExtent.height = TileSize; bufCopy.imageExtent.depth = 1; - retVal.emplace_back(georeferencedImageParams.format, georeferencedImageParams.geoReferencedImage->getBuffer(), std::move(bufCopy)); + tiles.emplace_back(georeferencedImageParams.format, georeferencedImageParams.geoReferencedImage->getBuffer(), std::move(bufCopy)); } } - return retVal; + + // Last, we need to figure out an obb that covers only these tiles + OrientedBoundingBox2D worldspaceOBB = fromTopLeftOBB; + const float32_t2 dirV = float32_t2(worldspaceOBB.dirU.y, -worldspaceOBB.dirU.x) * worldspaceOBB.aspectRatio; + worldspaceOBB.topLeft += worldspaceOBB.dirU * float32_t(minLoadedTileIndices.x / maxResidentTiles.x); + worldspaceOBB.topLeft += dirV * float32_t(minLoadedTileIndices.y / maxResidentTiles.y); + return TileUploadData{ std::move(tiles), worldspaceOBB }; + } \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index d7d38e9f0..d54c7d3f8 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -129,17 +129,28 @@ struct DrawResourcesFiller StreamedImageManager(GeoreferencedImageParams&& _georeferencedImageParams); - core::vector generateTileUploadData(const float64_t3x3& worldToNDC); + struct TileUploadData + { + core::vector tiles; + OrientedBoundingBox2D worldspaceOBB; + }; + + TileUploadData generateTileUploadData(const float64_t3x3& NDCToWorld); // This and the logic they're in will likely change later with Toroidal updating protected: GeoreferencedImageParams georeferencedImageParams; uint32_t2 maxResidentTiles = {}; private: + ImageType imageType; uint32_t2 minLoadedTileIndices = {}; + uint32_t2 maxLoadedTileIndices = {}; uint32_t2 maxImageTileIndices = {}; // See constructor for info on this one float64_t2x3 offsetCoBScaleMatrix = {}; + // Wordlspace OBB that covers the top left `maxResidentTiles.x x maxResidentTiles.y` tiles of the image. + // We shift this OBB by appropriate tile offsets when loading tiles + OrientedBoundingBox2D fromTopLeftOBB = {}; }; DrawResourcesFiller(); @@ -373,7 +384,7 @@ struct DrawResourcesFiller void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); // This function must be called immediately after `addStaticImage` for the same imageID. - void addGeoreferencedImage(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); + void addGeoreferencedImage(StreamedImageManager& manager, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit); /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. @@ -620,7 +631,7 @@ struct DrawResourcesFiller bool addImageObject_Internal(const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx);; /// Attempts to upload a georeferenced image info considering resource limitations (not accounting for the resource image added using ensureStaticImageAvailability function) - bool addGeoreferencedImageInfo_Internal(const GeoreferencedImageInfo& georeferencedImageInfo, uint32_t mainObjIdx);; + bool addGeoreferencedImageInfo_Internal(const GeoreferencedImageInfo& georeferencedImageInfo, uint32_t mainObjIdx); uint32_t getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit); diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 8453c93ab..517fc0e06 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -28,7 +28,6 @@ struct GeoreferencedImageParams uint32_t2 imageExtents = {}; uint32_t2 viewportExtents = {}; asset::E_FORMAT format = {}; - ImageType imageType; image_id imageID; // For now it's going to be fully resident in memory, later on it's probably going to be a streamer class most likely. core::smart_refctd_ptr geoReferencedImage; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index f9ebb83cb..d472577b8 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -82,7 +82,7 @@ constexpr std::array cameraExtents = 10.0 // CASE_12 }; -constexpr ExampleMode mode = ExampleMode::CASE_11; +constexpr ExampleMode mode = ExampleMode::CASE_12; class Camera2D { @@ -1270,6 +1270,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu gridDTMHeightMap = loadImage("../../media/gridDTMHeightMap.exr"); + bigTiledGrid = loadImage("../../media/tiled_grid.exr"); + // set diagonals of cells to TOP_LEFT_TO_BOTTOM_RIGHT or BOTTOM_LEFT_TO_TOP_RIGHT randomly { // assumption is that format of the grid DTM height map is *_SRGB, I don't think we need any code to ensure that @@ -3696,19 +3698,21 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } else if (mode == ExampleMode::CASE_12) { - for (uint32_t i = 0; i < sampleImages.size(); ++i) - { - uint64_t imageID = i * 69ull; // it can be hash or something of the file path the image was loaded from - //printf(std::format("\n Image {} \n", i).c_str()); - drawResourcesFiller.ensureStaticImageAvailability({ imageID, sampleImages[i] }, intendedNextSubmit); - drawResourcesFiller.addImageObject(imageID, { .topLeft = { 0.0 + (i) * 3.0, 0.0 }, .dirU = { 3.0 , 0.0 }, .aspectRatio = 1.0 }, intendedNextSubmit); - //printf("\n"); - } - - GeoreferencedImageParams geoRefParams = {}; - geoRefParams.format = asset::EF_R8G8B8A8_SRGB; - geoRefParams.imageExtents = uint32_t2(2048, 2048); - // drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(6996, geoRefParams, intendedNextSubmit); + GeoreferencedImageParams tiledGridParams; + auto& tiledGridCreationParams = bigTiledGrid->getCreationParameters(); + tiledGridParams.worldspaceOBB.topLeft = { 0.0, 0.0 }; + tiledGridParams.worldspaceOBB.dirU = { 10.0, 0.0 }; + tiledGridParams.worldspaceOBB.aspectRatio = 1.0; + tiledGridParams.imageExtents = { tiledGridCreationParams.extent.width, tiledGridCreationParams.extent.height}; + tiledGridParams.viewportExtents = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; + tiledGridParams.format = tiledGridCreationParams.format; + tiledGridParams.imageID = 6996; + tiledGridParams.geoReferencedImage = bigTiledGrid; + + DrawResourcesFiller::StreamedImageManager tiledGridManager(std::move(tiledGridParams)); + + drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(tiledGridManager, intendedNextSubmit); + drawResourcesFiller.addGeoreferencedImage(tiledGridManager, nbl::hlsl::inverse(m_Camera.constructViewProjection()), intendedNextSubmit); } } @@ -3783,6 +3787,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu std::vector> sampleImages; smart_refctd_ptr gridDTMHeightMap; + smart_refctd_ptr bigTiledGrid; static constexpr char FirstGeneratedCharacter = ' '; static constexpr char LastGeneratedCharacter = '~'; From 7a5e948701d8a61b08db2c018263eaa2a51d09ce Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 29 Jul 2025 14:14:26 -0300 Subject: [PATCH 05/29] Fix square bytes computation --- 62_CAD/DrawResourcesFiller.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 7ec6d2145..3c9e85246 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -2743,7 +2743,7 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S for (uint32_t tileY = minLoadedTileIndices.y; tileY <= maxLoadedTileIndices.y; tileY++) { asset::IImage::SBufferCopy bufCopy; - bufCopy.bufferOffset = (tileY * (maxImageTileIndices.x + 1) * bytesPerSide + tileX) * bytesPerSide; + bufCopy.bufferOffset = (tileY * (maxImageTileIndices.x + 1) * TileSize + tileX) * bytesPerSide; bufCopy.bufferRowLength = georeferencedImageParams.imageExtents.x; bufCopy.bufferImageHeight = 0; bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; From 52d947deb94432c58de00bfb252f73ae7da4795e Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 30 Jul 2025 03:04:40 -0300 Subject: [PATCH 06/29] Checkpoint 1! --- 62_CAD/DrawResourcesFiller.cpp | 50 ++++++++++++++++++---------------- 62_CAD/main.cpp | 6 ++-- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 3c9e85246..e129e13a1 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -2494,11 +2494,19 @@ void DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset:: else { // Pad sides to multiple of tileSize. Even after rounding up, we might still need to add an extra tile to cover both sides. - const auto xExtent = core::roundUp(georeferencedImageParams.viewportExtents.x, manager.TileSize) + manager.TileSize; - const auto yExtent = core::roundUp(georeferencedImageParams.viewportExtents.y, manager.TileSize) + manager.TileSize; + // I added two to be safe and to have issues at the borders. + const auto xExtent = core::roundUp(georeferencedImageParams.viewportExtents.x, manager.TileSize) + 2 * manager.TileSize; + const auto yExtent = core::roundUp(georeferencedImageParams.viewportExtents.y, manager.TileSize) + 2 * manager.TileSize; outImageParams.extent = { xExtent, yExtent, 1u }; manager.maxResidentTiles.x = xExtent / manager.TileSize; manager.maxResidentTiles.y = yExtent / manager.TileSize; + // Create a "sliding window OBB" that we use to offset tiles + manager.fromTopLeftOBB.topLeft = georeferencedImageParams.worldspaceOBB.topLeft; + manager.fromTopLeftOBB.dirU = georeferencedImageParams.worldspaceOBB.dirU * float32_t(manager.TileSize * manager.maxResidentTiles.x) / float32_t(georeferencedImageParams.imageExtents.x); + manager.fromTopLeftOBB.aspectRatio = float32_t(manager.maxResidentTiles.y) / float32_t(manager.maxResidentTiles.x); + // I think aspect ratio can stay the same since worldspace OBB and imageExtents should have same aspect ratio. + // If the image can be stretched/sheared and not simply rotated, then the aspect ratio *might* have to change, although I think that's covered by + // the OBB's aspect ratio } @@ -2655,12 +2663,13 @@ DrawResourcesFiller::StreamedImageManager::StreamedImageManager(GeoreferencedIma // 3. Map world units to tile units. This scaling is generally nonuniform, since it depends on the ratio of pixels to world units per coordinate. // The name of the `offsetCoBScaleMatrix` follows by what is computed at each step - // 1. Displacement. The following matrix calculates the offset for an input point `p` with homogenous worldspace coordinates. + // 1. Displacement. The following matrix computes the offset for an input point `p` with homogenous worldspace coordinates. // By foregoing the homogenous coordinate we can keep only the vector part, that's why it's `2x3` and not `3x3` float64_t2 topLeftWorld = georeferencedImageParams.worldspaceOBB.topLeft; float64_t2x3 displacementMatrix(1., 0., - topLeftWorld.x, 0., 1., - topLeftWorld.y); - // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to "image worldspan" coords has a quite nice expression + // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression + // Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal) float64_t2 dirU = georeferencedImageParams.worldspaceOBB.dirU; float64_t2 dirV = float32_t2(dirU.y, -dirU.x) * georeferencedImageParams.worldspaceOBB.aspectRatio; float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); @@ -2669,23 +2678,14 @@ DrawResourcesFiller::StreamedImageManager::StreamedImageManager(GeoreferencedIma float64_t2 secondRow = dirV / dirVLengthSquared; float64_t2x2 changeOfBasisMatrix(firstRow, secondRow); - // 3. Scaling. The vector obtained by doing `CoB * displacement * p` is still in world units. Given that we know how many pixels the image spans (given by - // georeferencedImageParams.imageExtents) and how many world units it spans (given by (|dirU|, |dirV|) ) we can get a factor for the `pixel/world unit` ratio. - // Then we simply multiply that factor for another factor for the `tile / pixel` ratio to get our `tile / world unit` scaling factor. - float64_t dirULength = nbl::hlsl::sqrt(dirULengthSquared); - float64_t dirVLength = nbl::hlsl::sqrt(dirVLengthSquared); - float64_t2 scaleFactors = (1. / TileSize) * (float64_t2(georeferencedImageParams.imageExtents) / float64_t2(dirULength, dirVLength)); - float64_t2x2 scaleMatrix(scaleFactors.x, 0., 0., scaleFactors.y); + // 3. Scaling. The vector obtained by doing `CoB * displacement * p` are now the coordinates in the `span{dirU, dirV}`, which would be `uv` coordinates in [0,1]^2 + // (or outside this range for points not in the image). To get tile lattice coordinates, we need to scale this number by an nTiles vector which counts + // (fractionally) how many tiles fit in the image along each axis + float32_t2 nTiles = float32_t2(georeferencedImageParams.imageExtents) / float32_t2(TileSize, TileSize); + float64_t2x2 scaleMatrix(nTiles.x, 0., 0., nTiles.y); // Put them all together offsetCoBScaleMatrix = nbl::hlsl::mul(scaleMatrix, nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix)); - - // Create a "sliding window OBB" that we use to offset tiles - fromTopLeftOBB = georeferencedImageParams.worldspaceOBB; - fromTopLeftOBB.dirU *= float32_t(TileSize * maxResidentTiles.x) / georeferencedImageParams.imageExtents.x; - // I think aspect ratio can stay the same since worldspace OBB and imageExtents should have same aspect ratio - // If the image can be stretched/sheared and not simply rotated, then the aspect ratio *might* have to change, although I think that's covered by - // the OBB's aspect ratio } DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::StreamedImageManager::generateTileUploadData(const float64_t3x3& NDCToWorld) @@ -2721,13 +2721,13 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S const float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice); const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom); - // Floor mins and ceil maxes + // Floor them to get an integer for the tiles they're in const int32_t2 minAllFloored = nbl::hlsl::floor(minAll); - const int32_t2 maxAllCeiled = nbl::hlsl::ceil(maxAll); + const int32_t2 maxAllFloored = nbl::hlsl::floor(maxAll); // Clamp them to reasonable tile indices minLoadedTileIndices = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(maxImageTileIndices)); - maxLoadedTileIndices = nbl::hlsl::clamp(maxAllCeiled, int32_t2(0, 0), int32_t2(maxImageTileIndices)); + maxLoadedTileIndices = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), nbl::hlsl::min(int32_t2(maxImageTileIndices), int32_t2(minLoadedTileIndices + maxResidentTiles - uint32_t2(1,1)))); // Now we have the indices of the tiles we want to upload, so create the vector of `StreamedImageCopies` - 1 per tile. core::vector tiles; @@ -2759,11 +2759,13 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S } } - // Last, we need to figure out an obb that covers only these tiles + // Last, we need to figure out an obb that covers only the currently loaded tiles + // By shifting the `fromTopLeftOBB` an appropriate number of tiles in each direction, we get an obb that covers at least the uploaded tiles + // It might cover more tiles, possible some that are not even loaded into VRAM, but since those fall outside of the viewport we don't really care about them OrientedBoundingBox2D worldspaceOBB = fromTopLeftOBB; const float32_t2 dirV = float32_t2(worldspaceOBB.dirU.y, -worldspaceOBB.dirU.x) * worldspaceOBB.aspectRatio; - worldspaceOBB.topLeft += worldspaceOBB.dirU * float32_t(minLoadedTileIndices.x / maxResidentTiles.x); - worldspaceOBB.topLeft += dirV * float32_t(minLoadedTileIndices.y / maxResidentTiles.y); + worldspaceOBB.topLeft += worldspaceOBB.dirU * float32_t(minLoadedTileIndices.x) / float32_t(maxResidentTiles.x); + worldspaceOBB.topLeft += dirV * float32_t(minLoadedTileIndices.y) / float32_t(maxResidentTiles.y); return TileUploadData{ std::move(tiles), worldspaceOBB }; } \ No newline at end of file diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index d472577b8..838833c2c 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1320,7 +1320,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { - m_Camera.mouseProcess(events); + if (m_window->hasMouseFocus()) + m_Camera.mouseProcess(events); } , m_logger.get()); keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void @@ -3701,7 +3702,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu GeoreferencedImageParams tiledGridParams; auto& tiledGridCreationParams = bigTiledGrid->getCreationParameters(); tiledGridParams.worldspaceOBB.topLeft = { 0.0, 0.0 }; - tiledGridParams.worldspaceOBB.dirU = { 10.0, 0.0 }; + tiledGridParams.worldspaceOBB.dirU = { 128.0, 0.0 }; tiledGridParams.worldspaceOBB.aspectRatio = 1.0; tiledGridParams.imageExtents = { tiledGridCreationParams.extent.width, tiledGridCreationParams.extent.height}; tiledGridParams.viewportExtents = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; @@ -3712,6 +3713,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu DrawResourcesFiller::StreamedImageManager tiledGridManager(std::move(tiledGridParams)); drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(tiledGridManager, intendedNextSubmit); + drawResourcesFiller.addGeoreferencedImage(tiledGridManager, nbl::hlsl::inverse(m_Camera.constructViewProjection()), intendedNextSubmit); } } From 665559b0c94966306f5e90c31ba731e68a4d893b Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 31 Jul 2025 10:34:57 -0300 Subject: [PATCH 07/29] Save before merge --- 62_CAD/DrawResourcesFiller.cpp | 2 +- 62_CAD/main.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index e129e13a1..2bc5865bd 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -714,7 +714,7 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( cachedImageRecord->allocationOffset = allocResults.allocationOffset; cachedImageRecord->allocationSize = allocResults.allocationSize; cachedImageRecord->gpuImageView = allocResults.gpuImageView; - cachedImageRecord->staticCPUImage = manager.georeferencedImageParams.geoReferencedImage; + cachedImageRecord->staticCPUImage = nullptr; } else { diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 838833c2c..5cb84c88f 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -132,7 +132,7 @@ class Camera2D if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) { - m_bounds = m_bounds + float64_t2{ (double)ev.scrollEvent.verticalScroll * -0.1 * m_aspectRatio, (double)ev.scrollEvent.verticalScroll * -0.1}; + m_bounds = m_bounds + float64_t2{ (double)ev.scrollEvent.verticalScroll * -0.0025 * m_aspectRatio, (double)ev.scrollEvent.verticalScroll * -0.0025}; m_bounds = float64_t2{ core::max(m_aspectRatio, m_bounds.x), core::max(1.0, m_bounds.y) }; } } @@ -3702,7 +3702,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu GeoreferencedImageParams tiledGridParams; auto& tiledGridCreationParams = bigTiledGrid->getCreationParameters(); tiledGridParams.worldspaceOBB.topLeft = { 0.0, 0.0 }; - tiledGridParams.worldspaceOBB.dirU = { 128.0, 0.0 }; + tiledGridParams.worldspaceOBB.dirU = { 100.0 / nbl::hlsl::sqrt(2.0), 100.0 / nbl::hlsl::sqrt(2.0) }; tiledGridParams.worldspaceOBB.aspectRatio = 1.0; tiledGridParams.imageExtents = { tiledGridCreationParams.extent.width, tiledGridCreationParams.extent.height}; tiledGridParams.viewportExtents = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; From fc2d504c8df2ddcb829208f62b5636fdc114e0a8 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 8 Aug 2025 01:54:22 -0300 Subject: [PATCH 08/29] Bug: using uploaded uvs seems to stretch/not shrink along v direction --- 62_CAD/DrawResourcesFiller.cpp | 4 ++-- 62_CAD/shaders/main_pipeline/vertex_shader.hlsl | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 2a72305c7..392f7214c 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -2794,7 +2794,7 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S const float32_t2 dirV = float32_t2(worldspaceOBB.dirU.y, -worldspaceOBB.dirU.x) * worldspaceOBB.aspectRatio; worldspaceOBB.topLeft += worldspaceOBB.dirU * float32_t(minLoadedTileIndices.x) / float32_t(maxResidentTiles.x); worldspaceOBB.topLeft += dirV * float32_t(minLoadedTileIndices.y) / float32_t(maxResidentTiles.y); - + // Compute minUV, maxUV // Since right now we don't shift the obb around, minUV will always be (0,0), but this is bound to change later on (shifting obb will happen when we want to reuse tiles and not // reupload them on every frame in the next phase) @@ -2820,7 +2820,7 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S worldspaceOBB.dirU *= maxUV.x; // Scale the aspect ratio by the relative shrinkage of U,V. Remember our aspect ratio is V / U. worldspaceOBB.aspectRatio *= maxUV.y / maxUV.x; - + return TileUploadData{ std::move(tiles), worldspaceOBB, minUV, maxUV }; } \ No newline at end of file diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 23a4c69ed..299c87778 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -746,6 +746,9 @@ PSInput vtxMain(uint vertexID : SV_VertexID) const float32_t2 minUV = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float32_t2) + sizeof(float32_t) + sizeof(uint32_t), 4u); const float32_t2 maxUV = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(float32_t2) + sizeof(float32_t) + sizeof(uint32_t), 4u); + //printf("%f %f", minUV.x, minUV.y); + //printf("%f %f", maxUV.x, maxUV.y); + const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; const float32_t2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); const float32_t2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); @@ -754,7 +757,8 @@ PSInput vtxMain(uint vertexID : SV_VertexID) const uint32_t2 corner = uint32_t2(vertexIdx & 0x1u, vertexIdx & 0x2u); const float32_t2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; - const float32_t2 uv = float32_t2(corner.x ? minUV.x : maxUV.x , corner.y ? minUV.y : maxUV.y); + const float32_t2 uv = corner;// float32_t2(corner.x ? maxUV.x : minUV.x, corner.y ? maxUV.y : minUV.y); + printf("%f %f", ndcCorner.x, ndcCorner.y); outV.position = float4(ndcCorner, 0.f, 1.f); outV.setImageUV(uv); From e61e389ed763f9d9b6b44b075de00baa220763a5 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 8 Aug 2025 02:13:07 -0300 Subject: [PATCH 09/29] Fixed y-axis bug --- 62_CAD/shaders/main_pipeline/vertex_shader.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 299c87778..9842e67b0 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -754,10 +754,10 @@ PSInput vtxMain(uint vertexID : SV_VertexID) const float32_t2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); const float32_t2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); - const uint32_t2 corner = uint32_t2(vertexIdx & 0x1u, vertexIdx & 0x2u); + const bool2 corner = bool2(vertexIdx & 0x1u, vertexIdx & 0x2u); const float32_t2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; - const float32_t2 uv = corner;// float32_t2(corner.x ? maxUV.x : minUV.x, corner.y ? maxUV.y : minUV.y); + const float32_t2 uv = float32_t2(corner.x ? maxUV.x : minUV.x, corner.y ? maxUV.y : minUV.y); printf("%f %f", ndcCorner.x, ndcCorner.y); outV.position = float4(ndcCorner, 0.f, 1.f); From 258920c467906763fe5866dec1fc21ba85616ddf Mon Sep 17 00:00:00 2001 From: Fletterio Date: Sun, 10 Aug 2025 19:37:00 -0300 Subject: [PATCH 10/29] Diagonal computation --- 62_CAD/DrawResourcesFiller.cpp | 84 ++++++++++++++++++---------------- 62_CAD/DrawResourcesFiller.h | 18 ++++---- 62_CAD/Images.h | 1 - 62_CAD/main.cpp | 6 +-- 4 files changed, 57 insertions(+), 52 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 392f7214c..547feca64 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -651,7 +651,7 @@ bool DrawResourcesFiller::ensureMultipleStaticImagesAvailability(std::spangetLogicalDevice(); auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); @@ -659,11 +659,11 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( // Try inserting or updating the image usage in the cache. // If the image is already present, updates its semaphore value. auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; - CachedImageRecord* cachedImageRecord = imagesCache->insert(manager.georeferencedImageParams.imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); // TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENCED TYPE IGPUImage::SCreationParams imageCreationParams = {}; - determineGeoreferencedImageCreationParams(imageCreationParams, manager); + ImageType imageType = determineGeoreferencedImageCreationParams(imageCreationParams, params); // imageParams = cpuImage->getCreationParameters(); imageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; @@ -690,11 +690,11 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( const auto cachedImageType = cachedImageRecord->type; // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus const auto currentParams = static_cast(imageCreationParams); - const bool needsRecreation = cachedImageType != manager.imageType || cachedParams != currentParams; + const bool needsRecreation = cachedImageType != imageType || cachedParams != currentParams; if (needsRecreation) { // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena. - evictCallback(manager.georeferencedImageParams.imageID, *cachedImageRecord); + evictCallback(imageID, *cachedImageRecord); // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image *cachedImageRecord = CachedImageRecord(currentFrameIndex); @@ -724,11 +724,11 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) { // Attempt to create a GPU image and image view for this texture. - ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(manager.georeferencedImageParams.imageID)); + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(imageID)); if (allocResults.isValid()) { - cachedImageRecord->type = manager.imageType; + cachedImageRecord->type = imageType; cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN cachedImageRecord->allocationOffset = allocResults.allocationOffset; @@ -762,7 +762,7 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( } // erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation - imagesCache->erase(manager.georeferencedImageParams.imageID); + imagesCache->erase(imageID); } } else @@ -896,21 +896,28 @@ void DrawResourcesFiller::addGeoreferencedImage(StreamedImageManager& manager, c return; } + // Query imageType + auto cachedImageRecord = imagesCache->peek(manager.imageID); + + manager.maxResidentTiles.x = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / manager.TileSize; + manager.maxResidentTiles.y = manager.maxResidentTiles.x; + // Create a "sliding window OBB" that we use to offset tiles + manager.fromTopLeftOBB.topLeft = manager.georeferencedImageParams.worldspaceOBB.topLeft; + manager.fromTopLeftOBB.dirU = manager.georeferencedImageParams.worldspaceOBB.dirU * float32_t(manager.TileSize * manager.maxResidentTiles.x) / float32_t(manager.georeferencedImageParams.imageExtents.x); + manager.fromTopLeftOBB.aspectRatio = float32_t(manager.maxResidentTiles.y) / float32_t(manager.maxResidentTiles.x); + // Generate upload data - auto uploadData = manager.generateTileUploadData(NDCToWorld); + auto uploadData = manager.generateTileUploadData(cachedImageRecord->type, NDCToWorld); - // Queue image uploads - if necessary - if (manager.imageType == ImageType::GEOREFERENCED_STREAMED) - { - for (const auto& imageCopy : uploadData.tiles) - queueGeoreferencedImageCopy_Internal(manager.georeferencedImageParams.imageID, imageCopy); - } + // Queue image uploads + for (const auto& imageCopy : uploadData.tiles) + queueGeoreferencedImageCopy_Internal(manager.imageID, imageCopy); GeoreferencedImageInfo info = {}; info.topLeft = uploadData.worldspaceOBB.topLeft; info.dirU = uploadData.worldspaceOBB.dirU; info.aspectRatio = uploadData.worldspaceOBB.aspectRatio; - info.textureID = getImageIndexFromID(manager.georeferencedImageParams.imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + info.textureID = getImageIndexFromID(manager.imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory info.minUV = uploadData.minUV; info.maxUV = uploadData.maxUV; if (!addGeoreferencedImageInfo_Internal(info, mainObjIdx)) @@ -2497,48 +2504,45 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc return ret; } -void DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, StreamedImageManager& manager) +ImageType DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const GeoreferencedImageParams& params) { - auto& georeferencedImageParams = manager.georeferencedImageParams; // Decide whether the image can reside fully into memory rather than get streamed. // TODO: Improve logic, currently just a simple check to see if the full-screen image has more pixels that viewport or not // TODO: add criterial that the size of the full-res image shouldn't consume more than 30% of the total memory arena for images (if we allowed larger than viewport extents) - const bool betterToResideFullyInMem = georeferencedImageParams.imageExtents.x * georeferencedImageParams.imageExtents.y <= georeferencedImageParams.viewportExtents.x * georeferencedImageParams.viewportExtents.y; + const bool betterToResideFullyInMem = params.imageExtents.x * params.imageExtents.y <= params.viewportExtents.x * params.viewportExtents.y; + + ImageType imageType; if (betterToResideFullyInMem) - manager.imageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; + imageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; else - manager.imageType = ImageType::GEOREFERENCED_STREAMED; + imageType = ImageType::GEOREFERENCED_STREAMED; outImageParams.type = asset::IImage::ET_2D; outImageParams.samples = asset::IImage::ESCF_1_BIT; - outImageParams.format = georeferencedImageParams.format; + outImageParams.format = params.format; - if (manager.imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) + if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) { - outImageParams.extent = { georeferencedImageParams.imageExtents.x, georeferencedImageParams.imageExtents.y, 1u }; + outImageParams.extent = { params.imageExtents.x, params.imageExtents.y, 1u }; } else { // Enough to cover twice the viewport at mip 0 (so that when zooming out to mip 1 the whole viewport still gets covered with mip 0 tiles) // and in any rotation (taking the longest side suffices). Can be increased to avoid frequent tile eviction when moving the camera at mip close to 1 - const uint32_t longestSide = nbl::hlsl::max(georeferencedImageParams.viewportExtents.x, georeferencedImageParams.viewportExtents.y); - const uint32_t gpuImageSidelength = 2 * (core::roundUp(longestSide, manager.TileSize) + manager.TileSize); + const uint32_t diagonal = static_cast(nbl::hlsl::ceil( + nbl::hlsl::sqrt(static_cast(params.viewportExtents.x * params.viewportExtents.x + + params.viewportExtents.y * params.viewportExtents.y)) + ) + ); + const uint32_t gpuImageSidelength = 2 * core::roundUp(diagonal, StreamedImageManager::TileSize) + StreamedImageManager::PaddingTiles * StreamedImageManager::TileSize; outImageParams.extent = { gpuImageSidelength, gpuImageSidelength, 1u }; - manager.maxResidentTiles.x = gpuImageSidelength / manager.TileSize; - manager.maxResidentTiles.y = manager.maxResidentTiles.x; - // Create a "sliding window OBB" that we use to offset tiles - manager.fromTopLeftOBB.topLeft = georeferencedImageParams.worldspaceOBB.topLeft; - manager.fromTopLeftOBB.dirU = georeferencedImageParams.worldspaceOBB.dirU * float32_t(manager.TileSize * manager.maxResidentTiles.x) / float32_t(georeferencedImageParams.imageExtents.x); - manager.fromTopLeftOBB.aspectRatio = float32_t(manager.maxResidentTiles.y) / float32_t(manager.maxResidentTiles.x); - // I think aspect ratio can stay the same since worldspace OBB and imageExtents should have same aspect ratio. - // If the image can be stretched/sheared and not simply rotated, then the aspect ratio *might* have to change, although I think that's covered by - // the OBB's aspect ratio } - outImageParams.mipLevels = 1u; // TODO: Later do mipmapping outImageParams.arrayLayers = 1u; + + return imageType; } void DrawResourcesFiller::setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func) @@ -2675,8 +2679,8 @@ void DrawResourcesFiller::flushDrawObjects() } } -DrawResourcesFiller::StreamedImageManager::StreamedImageManager(GeoreferencedImageParams&& _georeferencedImageParams) - : georeferencedImageParams(std::move(_georeferencedImageParams)) +DrawResourcesFiller::StreamedImageManager::StreamedImageManager(image_id _imageID, GeoreferencedImageParams&& _georeferencedImageParams) + : imageID(_imageID), georeferencedImageParams(std::move(_georeferencedImageParams)) { maxImageTileIndices = georeferencedImageParams.imageExtents / uint32_t2(TileSize, TileSize); // If it fits perfectly along any dimension, we need one less tile with this scheme @@ -2715,10 +2719,10 @@ DrawResourcesFiller::StreamedImageManager::StreamedImageManager(GeoreferencedIma world2Tile = nbl::hlsl::mul(scaleMatrix, nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix)); } -DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::StreamedImageManager::generateTileUploadData(const float64_t3x3& NDCToWorld) +DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::StreamedImageManager::generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld) { // I think eventually it's better to just transform georeferenced images that aren't big enough into static images and forget about them - if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) + if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) //Pass imageID as parameter, down from the addGeoRef call return TileUploadData{ {}, georeferencedImageParams.worldspaceOBB }; // Following need only be done if image is actually streamed diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 74a503549..6d29e9637 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -18,6 +18,8 @@ static_assert(sizeof(DrawObject) == 16u); static_assert(sizeof(MainObject) == 20u); static_assert(sizeof(LineStyle) == 88u); +//TODO[Francisco]: Update briefs for geotex related functions + // ! DrawResourcesFiller // ! This class provides important functionality to manage resources needed for a draw. // ! Drawing new objects (polylines, hatches, etc.) should go through this function. @@ -126,8 +128,9 @@ struct DrawResourcesFiller { friend class DrawResourcesFiller; constexpr static uint32_t TileSize = 128u; + constexpr static uint32_t PaddingTiles = 2; - StreamedImageManager(GeoreferencedImageParams&& _georeferencedImageParams); + StreamedImageManager(image_id _imageID, GeoreferencedImageParams&& _georeferencedImageParams); struct TileUploadData { @@ -137,14 +140,13 @@ struct DrawResourcesFiller float32_t2 maxUV; }; - TileUploadData generateTileUploadData(const float64_t3x3& NDCToWorld); + TileUploadData generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld); - // This and the logic they're in will likely change later with Toroidal updating - protected: + image_id imageID; GeoreferencedImageParams georeferencedImageParams; - uint32_t2 maxResidentTiles = {}; + // This and the logic they're in will likely change later with Toroidal updating private: - ImageType imageType; + uint32_t2 maxResidentTiles = {}; uint32_t2 minLoadedTileIndices = {}; uint32_t2 maxLoadedTileIndices = {}; uint32_t2 maxImageTileIndices = {}; @@ -375,7 +377,7 @@ struct DrawResourcesFiller * @return true if the image was successfully cached and is ready for use; false if allocation failed. * [TODO]: should be internal protected member function. */ - bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(StreamedImageManager& manager, SIntendedSubmitInfo& intendedNextSubmit); + bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); // [TODO]: should be internal protected member function. bool queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy); @@ -695,7 +697,7 @@ struct DrawResourcesFiller * @param[out] outImageType Indicates whether the image should be fully resident or streamed. * @param[in] manager Manager for the georeferenced image */ - void determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, StreamedImageManager& manager); + ImageType determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const GeoreferencedImageParams& params); /** * @brief Used to implement both `drawHatch` and `drawFixedGeometryHatch` without exposing the transformation type parameter diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 517fc0e06..5329a5bda 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -28,7 +28,6 @@ struct GeoreferencedImageParams uint32_t2 imageExtents = {}; uint32_t2 viewportExtents = {}; asset::E_FORMAT format = {}; - image_id imageID; // For now it's going to be fully resident in memory, later on it's probably going to be a streamer class most likely. core::smart_refctd_ptr geoReferencedImage; // TODO: Need to add other stuff later. diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 540f7ec86..e420a81af 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -3687,6 +3687,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } else if (mode == ExampleMode::CASE_12) { + image_id tiledGridID = 6996; GeoreferencedImageParams tiledGridParams; auto& tiledGridCreationParams = bigTiledGrid->getCreationParameters(); // Position at topLeft viewport @@ -3703,12 +3704,11 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio tiledGridParams.imageExtents = { tiledGridCreationParams.extent.width, tiledGridCreationParams.extent.height}; tiledGridParams.viewportExtents = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; tiledGridParams.format = tiledGridCreationParams.format; - tiledGridParams.imageID = 6996; tiledGridParams.geoReferencedImage = bigTiledGrid; - DrawResourcesFiller::StreamedImageManager tiledGridManager(std::move(tiledGridParams)); + DrawResourcesFiller::StreamedImageManager tiledGridManager(tiledGridID, std::move(tiledGridParams)); - drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(tiledGridManager, intendedNextSubmit); + drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(tiledGridID, tiledGridManager.georeferencedImageParams, intendedNextSubmit); drawResourcesFiller.addGeoreferencedImage(tiledGridManager, inverseViewProj, intendedNextSubmit); } From 6a1b76ed0b7ed810dba7dd248c2c0821e91c6b4a Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 13 Aug 2025 16:32:06 -0300 Subject: [PATCH 11/29] Some names are wrong here, but the example still works --- 62_CAD/DrawResourcesFiller.cpp | 134 ++++++++++++++++++++------------- 62_CAD/DrawResourcesFiller.h | 91 +++++++++++++++++++++- 62_CAD/Images.h | 5 +- 62_CAD/main.cpp | 60 ++++++++++----- 4 files changed, 208 insertions(+), 82 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 547feca64..5d2fea009 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -2679,8 +2679,8 @@ void DrawResourcesFiller::flushDrawObjects() } } -DrawResourcesFiller::StreamedImageManager::StreamedImageManager(image_id _imageID, GeoreferencedImageParams&& _georeferencedImageParams) - : imageID(_imageID), georeferencedImageParams(std::move(_georeferencedImageParams)) +DrawResourcesFiller::StreamedImageManager::StreamedImageManager(image_id _imageID, GeoreferencedImageParams&& _georeferencedImageParams, ImageLoader&& _loader) + : imageID(_imageID), georeferencedImageParams(std::move(_georeferencedImageParams)), loader(std::move(_loader)) { maxImageTileIndices = georeferencedImageParams.imageExtents / uint32_t2(TileSize, TileSize); // If it fits perfectly along any dimension, we need one less tile with this scheme @@ -2709,14 +2709,8 @@ DrawResourcesFiller::StreamedImageManager::StreamedImageManager(image_id _imageI float64_t2 secondRow = dirV / dirVLengthSquared; float64_t2x2 changeOfBasisMatrix(firstRow, secondRow); - // 3. Scaling. The vector obtained by doing `CoB * displacement * p` are now the coordinates in the `span{dirU, dirV}`, which would be `uv` coordinates in [0,1]^2 - // (or outside this range for points not in the image). To get tile lattice coordinates, we need to scale this number by an nTiles vector which counts - // (fractionally) how many tiles fit in the image along each axis - float32_t2 nTiles = float32_t2(georeferencedImageParams.imageExtents) / float32_t2(TileSize, TileSize); - float64_t2x2 scaleMatrix(nTiles.x, 0., 0., nTiles.y); - // Put them all together - world2Tile = nbl::hlsl::mul(scaleMatrix, nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix)); + world2UV = nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix); } DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::StreamedImageManager::generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld) @@ -2725,55 +2719,38 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) //Pass imageID as parameter, down from the addGeoRef call return TileUploadData{ {}, georeferencedImageParams.worldspaceOBB }; - // Following need only be done if image is actually streamed - - // Using Vulkan NDC, the viewport has coordinates in the range [-1, -1] x [1,1]. First we get the world coordinates of the viewport corners, in homogenous - const float64_t3 topLeftNDCH(-1., -1., 1.); - const float64_t3 topRightNDCH(1., -1., 1.); - const float64_t3 bottomLeftNDCH(-1., 1., 1.); - const float64_t3 bottomRightNDCH(1., 1., 1.); - - const float64_t3 topLeftWorldH = nbl::hlsl::mul(NDCToWorld, topLeftNDCH); - const float64_t3 topRightWorldH = nbl::hlsl::mul(NDCToWorld, topRightNDCH); - const float64_t3 bottomLeftWorldH = nbl::hlsl::mul(NDCToWorld, bottomLeftNDCH); - const float64_t3 bottomRightWorldH = nbl::hlsl::mul(NDCToWorld, bottomRightNDCH); - - // We can use `world2Tile` to get tile lattice coordinates for each of these points - const float64_t2 topLeftTileLattice = nbl::hlsl::mul(world2Tile, topLeftWorldH); - const float64_t2 topRightTileLattice = nbl::hlsl::mul(world2Tile, topRightWorldH); - const float64_t2 bottomLeftTileLattice = nbl::hlsl::mul(world2Tile, bottomLeftWorldH); - const float64_t2 bottomRightTileLattice = nbl::hlsl::mul(world2Tile, bottomRightWorldH); - - // Get the min and max of each lattice coordinate - const float64_t2 minTop = nbl::hlsl::min(topLeftTileLattice, topRightTileLattice); - const float64_t2 minBottom = nbl::hlsl::min(bottomLeftTileLattice, bottomRightTileLattice); - const float64_t2 minAll = nbl::hlsl::min(minTop, minBottom); - - const float64_t2 maxTop = nbl::hlsl::max(topLeftTileLattice, topRightTileLattice); - const float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice); - const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom); - - // Floor them to get an integer for the tiles they're in - const int32_t2 minAllFloored = nbl::hlsl::floor(minAll); - const int32_t2 maxAllFloored = nbl::hlsl::floor(maxAll); - - // Clamp them to reasonable tile indices - minLoadedTileIndices = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(maxImageTileIndices)); - maxLoadedTileIndices = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), nbl::hlsl::min(int32_t2(maxImageTileIndices), int32_t2(minLoadedTileIndices + maxResidentTiles - uint32_t2(1,1)))); + currentMappedRegion = computeViewportTileAlignedObb(NDCToWorld); // Now we have the indices of the tiles we want to upload, so create the vector of `StreamedImageCopies` - 1 per tile. core::vector tiles; - tiles.reserve((maxLoadedTileIndices.x - minLoadedTileIndices.x + 1) * (maxLoadedTileIndices.y - minLoadedTileIndices.y + 1)); + uint32_t nTiles = (currentMappedRegion.bottomRight.x - currentMappedRegion.topLeft.x + 1) * (currentMappedRegion.topLeft.y - currentMappedRegion.topLeft.y + 1); + tiles.reserve(nTiles); // Assuming a 1 pixel per block format - otherwise math here gets a bit trickier auto bytesPerPixel = getTexelOrBlockBytesize(georeferencedImageParams.format); const size_t bytesPerSide = bytesPerPixel * TileSize; // Dangerous code - assumes image can be perfectly covered with tiles. Otherwise will need to handle edge cases - for (uint32_t tileX = minLoadedTileIndices.x; tileX <= maxLoadedTileIndices.x; tileX++) + for (uint32_t tileX = currentMappedRegion.topLeft.x; tileX <= currentMappedRegion.bottomRight.x; tileX++) { - for (uint32_t tileY = minLoadedTileIndices.y; tileY <= maxLoadedTileIndices.y; tileY++) + for (uint32_t tileY = currentMappedRegion.topLeft.y; tileY <= currentMappedRegion.bottomRight.y; tileY++) { + // Flush the loaded sections into the buffer - they should be done uploading by now + loader.clear(); + // Reserve enough sections + loader.reserve(nTiles); + auto tile = loader.load(uint32_t2(tileX * TileSize, tileY * TileSize), uint32_t2(TileSize, TileSize)); + // Alias the buffer + asset::IBuffer::SCreationParams bufParams = { .size = tile->getBuffer()->getSize(), .usage = tile->getBuffer()->getUsageFlags() }; + ICPUBuffer::SCreationParams cpuBufParams(std::move(bufParams)); + cpuBufParams.data = tile->getBuffer()->getPointer(); + cpuBufParams.memoryResource = core::getNullMemoryResource(); + auto aliasedBuffer = ICPUBuffer::create(std::move(cpuBufParams), nbl::core::adopt_memory_t{}); + + // The math here is like this because of the buffer we're getting (full image in the emulated case) + // When moving to actual ECW loading, bufferOffset will be 0, bufferRowLength will be the extent.width of the loaded section, + // imageExtent will be the extent of the loaded section, and imageOffset will be the appropriate offset (we will be loading whole sections of tiles + // that can be made into a rectangle instead of tile by tile) asset::IImage::SBufferCopy bufCopy; bufCopy.bufferOffset = (tileY * (maxImageTileIndices.x + 1) * TileSize + tileX) * bytesPerSide; bufCopy.bufferRowLength = georeferencedImageParams.imageExtents.x; @@ -2782,12 +2759,12 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S bufCopy.imageSubresource.mipLevel = 0u; bufCopy.imageSubresource.baseArrayLayer = 0u; bufCopy.imageSubresource.layerCount = 1u; - bufCopy.imageOffset = { (tileX - minLoadedTileIndices.x) * TileSize, (tileY - minLoadedTileIndices.y) * TileSize, 0u }; + bufCopy.imageOffset = { (tileX - currentMappedRegion.topLeft.x) * TileSize, (tileY - currentMappedRegion.topLeft.y) * TileSize, 0u }; bufCopy.imageExtent.width = TileSize; bufCopy.imageExtent.height = TileSize; bufCopy.imageExtent.depth = 1; - tiles.emplace_back(georeferencedImageParams.format, georeferencedImageParams.geoReferencedImage->getBuffer(), std::move(bufCopy)); + tiles.emplace_back(georeferencedImageParams.format, aliasedBuffer, std::move(bufCopy)); } } @@ -2796,8 +2773,8 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S OrientedBoundingBox2D worldspaceOBB = fromTopLeftOBB; const float32_t2 dirV = float32_t2(worldspaceOBB.dirU.y, -worldspaceOBB.dirU.x) * worldspaceOBB.aspectRatio; - worldspaceOBB.topLeft += worldspaceOBB.dirU * float32_t(minLoadedTileIndices.x) / float32_t(maxResidentTiles.x); - worldspaceOBB.topLeft += dirV * float32_t(minLoadedTileIndices.y) / float32_t(maxResidentTiles.y); + worldspaceOBB.topLeft += worldspaceOBB.dirU * float32_t(currentMappedRegion.topLeft.x) / float32_t(maxResidentTiles.x); + worldspaceOBB.topLeft += dirV * float32_t(currentMappedRegion.topLeft.y) / float32_t(maxResidentTiles.y); // Compute minUV, maxUV // Since right now we don't shift the obb around, minUV will always be (0,0), but this is bound to change later on (shifting obb will happen when we want to reuse tiles and not @@ -2806,14 +2783,14 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S // By default we have a large-as-possible obb so maxUV is (1,1). This is bound to change in the general case when we shift obb. However, since right now we need to shrink the obb // to ensure we don't draw outside the bounds of the real image, we also need to change the maxUV; float32_t2 maxUV(1.f, 1.f); - int32_t excessTiles = minLoadedTileIndices.x + maxResidentTiles.x - 1 - maxImageTileIndices.x; + int32_t excessTiles = currentMappedRegion.topLeft.x + maxResidentTiles.x - 1 - maxImageTileIndices.x; if (excessTiles > 0) { // Shrink obb to only fit necessary tiles, compute maxUV.x which turns out to be exactly the shrink factor for dirU. maxUV.x = float32_t(maxResidentTiles.x - excessTiles) / maxResidentTiles.x; } // De the same along the other axis - excessTiles = minLoadedTileIndices.y + maxResidentTiles.y - 1 - maxImageTileIndices.y; + excessTiles = currentMappedRegion.topLeft.y + maxResidentTiles.y - 1 - maxImageTileIndices.y; if (excessTiles > 0) { // Analogously, maxUV.y is the shrink factor for dirV. @@ -2827,4 +2804,53 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S return TileUploadData{ std::move(tiles), worldspaceOBB, minUV, maxUV }; -} \ No newline at end of file +} + +float64_t DrawResourcesFiller::StreamedImageManager::computeViewportMipLevel(const float64_t3x3& NDCToWorld, float64_t2 viewportExtents) +{ + const auto viewportWidthVectorWorld = nbl::hlsl::mul(NDCToWorld, topRightViewportNDCH - topLeftViewportNDCH); + const auto viewportHeightVectorWorld = nbl::hlsl::mul(NDCToWorld, bottomLeftViewportNDCH - topLeftViewportNDCH); + // Abuse of notation, we're passing a vector not a point, won't be affected by offset so that's ok + const auto viewportWidthImagePixelLengthVector = transformWorldCoordsToPixelCoords(viewportWidthVectorWorld); + const auto viewportHeightImagePixelLengthVector = transformWorldCoordsToPixelCoords(viewportHeightVectorWorld); + + const auto viewportWidthImagePixelLength = nbl::hlsl::length(viewportWidthImagePixelLengthVector); + const auto viewportHeightImagePixelLength = nbl::hlsl::length(viewportHeightImagePixelLengthVector); + + // Mip is decided based on max of these + const auto maxPixelLength = nbl::hlsl::max(viewportWidthImagePixelLength, viewportHeightImagePixelLength); + return maxPixelLength / nbl::hlsl::max(viewportExtents.x, viewportExtents.y); +} + +DrawResourcesFiller::StreamedImageManager::TileLatticeAlignedObb DrawResourcesFiller::StreamedImageManager::computeViewportTileAlignedObb(const float64_t3x3& NDCToWorld) +{ + const float64_t3 topLeftViewportWorldH = nbl::hlsl::mul(NDCToWorld, topLeftViewportNDCH); + const float64_t3 topRightViewportWorldH = nbl::hlsl::mul(NDCToWorld, topRightViewportNDCH); + const float64_t3 bottomLeftViewportWorldH = nbl::hlsl::mul(NDCToWorld, bottomLeftViewportNDCH); + const float64_t3 bottomRightViewportWorldH = nbl::hlsl::mul(NDCToWorld, bottomRightViewportNDCH); + + // We can use `world2Tile` to get tile lattice coordinates for each of these points + const float64_t2 topLeftTileLattice = transformWorldCoordsToTileCoords(topLeftViewportWorldH); + const float64_t2 topRightTileLattice = transformWorldCoordsToTileCoords(topRightViewportWorldH); + const float64_t2 bottomLeftTileLattice = transformWorldCoordsToTileCoords(bottomLeftViewportWorldH); + const float64_t2 bottomRightTileLattice = transformWorldCoordsToTileCoords(bottomRightViewportWorldH); + + // Get the min and max of each lattice coordinate + const float64_t2 minTop = nbl::hlsl::min(topLeftTileLattice, topRightTileLattice); + const float64_t2 minBottom = nbl::hlsl::min(bottomLeftTileLattice, bottomRightTileLattice); + const float64_t2 minAll = nbl::hlsl::min(minTop, minBottom); + + const float64_t2 maxTop = nbl::hlsl::max(topLeftTileLattice, topRightTileLattice); + const float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice); + const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom); + + // Floor them to get an integer for the tiles they're in + const int32_t2 minAllFloored = nbl::hlsl::floor(minAll); + const int32_t2 maxAllFloored = nbl::hlsl::floor(maxAll); + + // Clamp them to reasonable tile indices + TileLatticeAlignedObb retVal = {}; + retVal.topLeft = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(maxImageTileIndices)); + retVal.bottomRight = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), nbl::hlsl::min(int32_t2(maxImageTileIndices), int32_t2(currentMappedRegion.topLeft + maxResidentTiles - uint32_t2(1, 1)))); + return retVal; +} diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 6d29e9637..3855afba6 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -123,6 +123,66 @@ struct DrawResourcesFiller } }; + // Used to load pieces of an ECW from disk - currently just emulated + struct ImageLoader + { + ImageLoader(core::smart_refctd_ptr&& _geoReferencedImage) : geoReferencedImage(std::move(_geoReferencedImage)) {} + + // Emulates the loading of a rectangle from the original image + core::smart_refctd_ptr load(uint32_t2 offset, uint32_t2 extent) + { + // Create a new buffer pointing to same data - this is not what the class will be doing when streaming ECW + auto imageBuffer = geoReferencedImage->getBuffer(); + asset::IBuffer::SCreationParams bufCreationParams = { .size = imageBuffer->getSize(), .usage = imageBuffer->getUsageFlags() }; + ICPUBuffer::SCreationParams cpuBufCreationParams(std::move(bufCreationParams)); + cpuBufCreationParams.data = imageBuffer->getPointer(); + cpuBufCreationParams.memoryResource = core::getNullMemoryResource(); + auto imageBufferAlias = ICPUBuffer::create(std::move(cpuBufCreationParams), core::adopt_memory_t{}); + // Now set up the image region + auto bytesPerPixel = getTexelOrBlockBytesize(geoReferencedImage->getCreationParameters().format); + + auto regions = core::make_refctd_dynamic_array>(1u); + auto& region = regions->front(); + region.bufferOffset = (offset.y * geoReferencedImage->getCreationParameters().extent.width + offset.x) * bytesPerPixel; + region.bufferRowLength = geoReferencedImage->getCreationParameters().extent.width; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = 0u; + region.imageSubresource.baseArrayLayer = 0u; + region.imageSubresource.layerCount = 1u; + region.imageOffset = { 0u, 0u, 0u }; + region.imageExtent.width = extent.x; + region.imageExtent.height = extent.y; + region.imageExtent.depth = 1; + + ICPUImage::SCreationParams loadedImageParams = geoReferencedImage->getCreationParameters(); + loadedImageParams.extent = { extent.x, extent.y, 1u }; + auto loadedImage = ICPUImage::create(std::move(loadedImageParams)); + loadedImage->setBufferAndRegions(std::move(imageBufferAlias), regions); + + loadedSections.push_back(loadedImage); + + return loadedImage; + } + + void clear() + { + loadedSections.clear(); + } + + void reserve(uint32_t sections) + { + loadedSections.reserve(sections); + } + + // This will be the path to the image + core::smart_refctd_ptr geoReferencedImage; + private: + // This will be actually loaded sections (each section is a rectangle of one or more tiles) of the above image. Since we alias buffers for uploads, + // we want this class to track their lifetime so they don't get deallocated before they get uploaded. + core::vector> loadedSections; + }; + // @brief Used to load tiles into VRAM, keep track of loaded tiles, determine how they get sampled etc. struct StreamedImageManager { @@ -130,7 +190,18 @@ struct DrawResourcesFiller constexpr static uint32_t TileSize = 128u; constexpr static uint32_t PaddingTiles = 2; - StreamedImageManager(image_id _imageID, GeoreferencedImageParams&& _georeferencedImageParams); + constexpr static float64_t3 topLeftViewportNDCH = float64_t3(-1.0, -1.0, 1.0); + constexpr static float64_t3 topRightViewportNDCH = float64_t3(1.0, -1.0, 1.0); + constexpr static float64_t3 bottomLeftViewportNDCH = float64_t3(-1.0, 1.0, 1.0); + constexpr static float64_t3 bottomRightViewportNDCH = float64_t3(1.0, 1.0, 1.0); + + StreamedImageManager(image_id _imageID, GeoreferencedImageParams&& _georeferencedImageParams, ImageLoader&& _loader); + + struct TileLatticeAlignedObb + { + uint32_t2 topLeft; + uint32_t2 bottomRight; + }; struct TileUploadData { @@ -140,20 +211,32 @@ struct DrawResourcesFiller float32_t2 maxUV; }; + // Right now it's generating tile-by-tile. Can be improved to produce at worst 4 different rectangles to load (depending on how we need to load tiles) TileUploadData generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld); + // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image + // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial + float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoordsH) {return nbl::hlsl::mul(world2UV, worldCoordsH);} + float64_t2 transformWorldCoordsToPixelCoords(const float64_t3 worldCoordsH) { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoordsH); } + float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoordsH) { return (1.0 / TileSize) * transformWorldCoordsToPixelCoords(worldCoordsH); } + + // Compute repeated here, can both be done together if necessary + float64_t computeViewportMipLevel(const float64_t3x3& NDCToWorld, float64_t2 viewportExtents); + TileLatticeAlignedObb computeViewportTileAlignedObb(const float64_t3x3& NDCToWorld); + image_id imageID; GeoreferencedImageParams georeferencedImageParams; // This and the logic they're in will likely change later with Toroidal updating private: + uint32_t currentResidentMipLevel = {}; uint32_t2 maxResidentTiles = {}; - uint32_t2 minLoadedTileIndices = {}; - uint32_t2 maxLoadedTileIndices = {}; uint32_t2 maxImageTileIndices = {}; - float64_t2x3 world2Tile = {}; + TileLatticeAlignedObb currentMappedRegion = {}; + float64_t2x3 world2UV = {}; // Worldspace OBB that covers the top left `maxResidentTiles.x x maxResidentTiles.y` tiles of the image. // We shift this OBB by appropriate tile offsets when loading tiles OrientedBoundingBox2D fromTopLeftOBB = {}; + ImageLoader loader; }; DrawResourcesFiller(); diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 5329a5bda..2e66f3c44 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -28,9 +28,6 @@ struct GeoreferencedImageParams uint32_t2 imageExtents = {}; uint32_t2 viewportExtents = {}; asset::E_FORMAT format = {}; - // For now it's going to be fully resident in memory, later on it's probably going to be a streamer class most likely. - core::smart_refctd_ptr geoReferencedImage; - // TODO: Need to add other stuff later. }; /** @@ -207,7 +204,7 @@ class ImagesCache : public core::ResizableLRUCache struct StreamedImageCopy { asset::E_FORMAT srcFormat; - ICPUBuffer* srcBuffer; // Make it 'std::future' later? + smart_refctd_ptr srcBuffer; // Make it 'std::future' later? asset::IImage::SBufferCopy region; }; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index e420a81af..62496ef64 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -49,6 +49,7 @@ static constexpr bool DebugRotatingViewProj = false; static constexpr bool FragmentShaderPixelInterlock = true; static constexpr bool LargeGeoTextureStreaming = true; static constexpr bool CacheAndReplay = false; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic +static constexpr bool textCameraRotation = true; enum class ExampleMode { @@ -1497,17 +1498,9 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio projectionToNDC = m_Camera.constructViewProjection(); // TEST CAMERA ROTATION -#if 0 - // double rotation = 0.25 * PI(); - double rotation = abs(cos(m_timeElapsed * 0.0004)) * 0.25 * PI() ; - float64_t2 rotationVec = float64_t2(cos(rotation), sin(rotation)); - float64_t3x3 rotationParameter = float64_t3x3 { - rotationVec.x, rotationVec.y, 0.0, - -rotationVec.y, rotationVec.x, 0.0, - 0.0, 0.0, 1.0 - }; - projectionToNDC = nbl::hlsl::mul(projectionToNDC, rotationParameter); -#endif + if constexpr (textCameraRotation) + projectionToNDC = rotateBasedOnTime(projectionToNDC); + Globals globalData = {}; uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress(); globalData.pointers = { @@ -3687,30 +3680,45 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } else if (mode == ExampleMode::CASE_12) { + const static float64_t3 topLeftViewportH = float64_t3(-1.0, -1.0, 1.0); + const static float64_t3 topRightViewportH = float64_t3(1.0, -1.0, 1.0); + const static float64_t3 bottomLeftViewportH = float64_t3(-1.0, 1.0, 1.0); + const static float64_t3 bottomRightViewportH = float64_t3(1.0, 1.0, 1.0); + image_id tiledGridID = 6996; - GeoreferencedImageParams tiledGridParams; + static GeoreferencedImageParams tiledGridParams; auto& tiledGridCreationParams = bigTiledGrid->getCreationParameters(); // Position at topLeft viewport - auto inverseViewProj = nbl::hlsl::inverse(m_Camera.constructViewProjection()); - const float64_t3 topLeftViewportH = float64_t3(-1.0, -1.0, 1.0); + auto projectionToNDC = m_Camera.constructViewProjection(); + // TEST CAMERA ROTATION + if constexpr (textCameraRotation) + projectionToNDC = rotateBasedOnTime(projectionToNDC); + auto inverseViewProj = nbl::hlsl::inverse(projectionToNDC); + const static auto startingTopLeft = nbl::hlsl::mul(inverseViewProj, topLeftViewportH); tiledGridParams.worldspaceOBB.topLeft = startingTopLeft; - // Get screen pixel to match 2 viewport pixels (to test at mip border) by choosing appropriate dirU - const float64_t3 topRightViewportH = float64_t3(1.0, -1.0, 1.0); - const static auto startingViewportLengthVector = nbl::hlsl::mul(inverseViewProj, topRightViewportH - topLeftViewportH); - const static auto dirU = startingViewportLengthVector * float64_t(bigTiledGrid->getCreationParameters().extent.width) / float64_t(2 * m_window->getWidth()); + + // Get 1 viewport pixel to match `startingImagePixelsPerViewportPixel` pixels of the image by choosing appropriate dirU + const static float64_t startingImagePixelsPerViewportPixels = 2.0; + const static auto startingViewportWidthVector = nbl::hlsl::mul(inverseViewProj, topRightViewportH - topLeftViewportH); + const static auto dirU = startingViewportWidthVector * float64_t(bigTiledGrid->getCreationParameters().extent.width) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); tiledGridParams.worldspaceOBB.dirU = dirU; tiledGridParams.worldspaceOBB.aspectRatio = 1.0; tiledGridParams.imageExtents = { tiledGridCreationParams.extent.width, tiledGridCreationParams.extent.height}; tiledGridParams.viewportExtents = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; tiledGridParams.format = tiledGridCreationParams.format; - tiledGridParams.geoReferencedImage = bigTiledGrid; - DrawResourcesFiller::StreamedImageManager tiledGridManager(tiledGridID, std::move(tiledGridParams)); + static auto bigTileGridPtr = bigTiledGrid; + static DrawResourcesFiller::ImageLoader loader(std::move(bigTileGridPtr)); + static DrawResourcesFiller::StreamedImageManager tiledGridManager(tiledGridID, std::move(tiledGridParams), std::move(loader)); drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(tiledGridID, tiledGridManager.georeferencedImageParams, intendedNextSubmit); drawResourcesFiller.addGeoreferencedImage(tiledGridManager, inverseViewProj, intendedNextSubmit); + + // Mip level calculation + // Uncomment to print mip + //std::cout << "mip level: " << tiledGridManager.computeViewportMipLevel(inverseViewProj, float64_t2(m_window->getWidth(), m_window->getHeight())) << std::endl; } } @@ -3721,6 +3729,18 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio return hlsl::length(float64_t2(idx_0_0, idx_1_0)); } + float64_t3x3 rotateBasedOnTime(const float64_t3x3& projectionMatrix) + { + double rotation = abs(cos(m_timeElapsed * 0.0004)) * 0.25 * PI(); + float64_t2 rotationVec = float64_t2(cos(rotation), sin(rotation)); + float64_t3x3 rotationParameter = float64_t3x3{ + rotationVec.x, rotationVec.y, 0.0, + -rotationVec.y, rotationVec.x, 0.0, + 0.0, 0.0, 1.0 + }; + return nbl::hlsl::mul(projectionMatrix, rotationParameter); + } + protected: std::chrono::seconds timeout = std::chrono::seconds(0x7fffFFFFu); clock_t::time_point start; From 0ed564e5c88ba90ca5859036f33ea5cb241c3443 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Sun, 17 Aug 2025 18:52:28 -0300 Subject: [PATCH 12/29] Tile tracking done --- 62_CAD/DrawResourcesFiller.cpp | 274 +++++++++++++++++++++++---------- 62_CAD/DrawResourcesFiller.h | 65 ++++---- 62_CAD/main.cpp | 10 +- 3 files changed, 229 insertions(+), 120 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 5d2fea009..de919b276 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -884,7 +884,7 @@ void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundin endMainObject(); } -void DrawResourcesFiller::addGeoreferencedImage(StreamedImageManager& manager, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::addGeoreferencedImage(StreamedImageManager& manager, const float64_t3x3& NDCToWorld, uint32_t2 viewportExtents, SIntendedSubmitInfo& intendedNextSubmit) { beginMainObject(MainObjectType::STREAMED_IMAGE); @@ -899,15 +899,11 @@ void DrawResourcesFiller::addGeoreferencedImage(StreamedImageManager& manager, c // Query imageType auto cachedImageRecord = imagesCache->peek(manager.imageID); - manager.maxResidentTiles.x = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / manager.TileSize; - manager.maxResidentTiles.y = manager.maxResidentTiles.x; - // Create a "sliding window OBB" that we use to offset tiles - manager.fromTopLeftOBB.topLeft = manager.georeferencedImageParams.worldspaceOBB.topLeft; - manager.fromTopLeftOBB.dirU = manager.georeferencedImageParams.worldspaceOBB.dirU * float32_t(manager.TileSize * manager.maxResidentTiles.x) / float32_t(manager.georeferencedImageParams.imageExtents.x); - manager.fromTopLeftOBB.aspectRatio = float32_t(manager.maxResidentTiles.y) / float32_t(manager.maxResidentTiles.x); + // This is because gpu image is square + manager.maxResidentTiles = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / manager.TileSize; // Generate upload data - auto uploadData = manager.generateTileUploadData(cachedImageRecord->type, NDCToWorld); + auto uploadData = manager.generateTileUploadData(cachedImageRecord->type, NDCToWorld, manager.georeferencedImageParams.viewportExtents); // Queue image uploads for (const auto& imageCopy : uploadData.tiles) @@ -2686,13 +2682,10 @@ DrawResourcesFiller::StreamedImageManager::StreamedImageManager(image_id _imageI // If it fits perfectly along any dimension, we need one less tile with this scheme maxImageTileIndices -= uint32_t2(maxImageTileIndices.x * TileSize == georeferencedImageParams.imageExtents.x, maxImageTileIndices.y * TileSize == georeferencedImageParams.imageExtents.y); - // R^2 can be covered with a lattice of image tiles. Real tiles (those actually covered by the image) are indexed in the range [0, maxImageTileIndices.x] x [0, maxImageTileIndices.y], - // but part of the algorithm to figure out which tiles need to be resident for a draw involves figuring out the coordinates in this lattice of each of the viewport corners. - // To that end, we devise an algorithm that maps a point in worldspace to its coordinates in this tile lattice: - // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point - // 2. Transform this displacement vector into a displacement into the coordinates spanned by the basis {dirU, dirV}. Notice that these vectors are still in world units - // 3. Map world units to tile units. This scaling is generally nonuniform, since it depends on the ratio of pixels to world units per coordinate. - // The product of the above matrices is `world2Tile` after the fact that it maps a world coordinate to a coordinate in the tile lattice + // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point + // 2. Transform this displacement vector into the coordinates in the basis {dirU, dirV} (worldspace vectors that span the sides of the image). + // The composition of these matrices there fore transforms any point in worldspace into uv coordinates in imagespace + // 1. Displacement. The following matrix computes the offset for an input point `p` with homogenous worldspace coordinates. // By foregoing the homogenous coordinate we can keep only the vector part, that's why it's `2x3` and not `3x3` @@ -2713,17 +2706,88 @@ DrawResourcesFiller::StreamedImageManager::StreamedImageManager(image_id _imageI world2UV = nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix); } -DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::StreamedImageManager::generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld) +DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::StreamedImageManager::generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld, const float64_t2 viewportExtents) { // I think eventually it's better to just transform georeferenced images that aren't big enough into static images and forget about them if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) //Pass imageID as parameter, down from the addGeoRef call return TileUploadData{ {}, georeferencedImageParams.worldspaceOBB }; - currentMappedRegion = computeViewportTileAlignedObb(NDCToWorld); + // Compute the mip level and tile obb we would need to encompass the viewport + TileLatticeAlignedObb viewportTileAlignedObb = computeViewportTileAlignedObb(NDCToWorld); + + // A base mip level of x in the current mapped region means we can handle the viewport having mip level y, with x <= y < x + 1.0 + // without needing to remap the region. When the user starts zooming in or out and the mip level of the viewport falls outside this range, we have to remap + // the mapped region. + const bool mipBoundaryCrossed = viewportTileAlignedObb.baseMipLevel >= currentMappedRegion.baseMipLevel + 1.0 + || viewportTileAlignedObb.baseMipLevel < currentMappedRegion.baseMipLevel; + + // If any of the corners of the obb encompassing the viewport falls outside the currently mapped region, we have to remap the mapped region + const bool tileBoundaryCrossed = nbl::hlsl::any(viewportTileAlignedObb.topLeft < currentMappedRegion.topLeft) + || nbl::hlsl::any(viewportTileAlignedObb.bottomRight > currentMappedRegion.bottomRight); + + if (mipBoundaryCrossed || tileBoundaryCrossed) + { + remapCurrentRegion(viewportTileAlignedObb); + } + + // DEBUG - Sampled mip level + { + // Get world coordinates for each corner of the mapped region + const float32_t2 oneTileDirU = georeferencedImageParams.worldspaceOBB.dirU / float32_t(maxImageTileIndices.x + 1u) * float32_t(1u << currentMappedRegion.baseMipLevel); + const float32_t2 oneTileDirV = float32_t2(oneTileDirU.y, -oneTileDirU.x) * georeferencedImageParams.worldspaceOBB.aspectRatio; + float64_t2 topLeftMappedRegionWorld = georeferencedImageParams.worldspaceOBB.topLeft; + topLeftMappedRegionWorld += oneTileDirU * float32_t(currentMappedRegion.topLeft.x) + oneTileDirV * float32_t(currentMappedRegion.topLeft.y); + const uint32_t2 mappedRegionTileLength = currentMappedRegion.bottomRight - currentMappedRegion.topLeft + uint32_t2(1, 1); + float64_t2 bottomRightMappedRegionWorld = topLeftMappedRegionWorld; + bottomRightMappedRegionWorld += oneTileDirU * float32_t(mappedRegionTileLength.x) + oneTileDirV * float32_t(mappedRegionTileLength.y); + + // With the above, get an affine transform that maps points in worldspace to their pixel coordinates in the mapped region tile space. This can be done by mapping + // `topLeftMappedRegionWorld -> (0,0)` and `bottomRightMappedRegionWorld -> mappedRegionPixelLength - 1` + const uint32_t2 mappedRegionPixelLength = TileSize * mappedRegionTileLength; + + // 1. Displacement + // Multiplying a (homogenous) point p by this matrix yields the displacement vector `p - topLeftMappedRegionWorld` + float64_t2x3 displacementMatrix(1., 0., -topLeftMappedRegionWorld.x, 0., 1., -topLeftMappedRegionWorld.y); + + // 2. Change of Basis. We again abuse the fact that the basis vectors are orthogonal + float64_t2 dirU = oneTileDirU * float32_t(mappedRegionTileLength.x); + float64_t2 dirV = oneTileDirV * float32_t(mappedRegionTileLength.y); + float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); + float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); + float64_t2 firstRow = dirU / dirULengthSquared; + float64_t2 secondRow = dirV / dirVLengthSquared; + float64_t2x2 changeOfBasisMatrix(firstRow, secondRow); + + // 3. Rescaling. The above matrix yields uv coordinates in the rectangle spanned by the mapped region. To get pixel coordinates, we simply multiply each coordinate by + // how many pixels they span in the gpu image + float64_t2x2 scalingMatrix(mappedRegionTileLength.x * TileSize, 0.0, 0.0, mappedRegionTileLength.y * TileSize); + + // Put them all together + float64_t2x3 toPixelCoordsMatrix = nbl::hlsl::mul(scalingMatrix, nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix)); + + // Map viewport points to world + const float64_t3 topLeftViewportWorldH = nbl::hlsl::mul(NDCToWorld, topLeftViewportNDCH); + const float64_t3 topRightViewportWorldH = nbl::hlsl::mul(NDCToWorld, topRightViewportNDCH); + const float64_t3 bottomLeftViewportWorldH = nbl::hlsl::mul(NDCToWorld, bottomLeftViewportNDCH); + + // Get pixel coordinates vectors for each side + const float64_t2 viewportWidthPixelLengthVector = nbl::hlsl::mul(toPixelCoordsMatrix, topRightViewportWorldH - topLeftViewportWorldH); + const float64_t2 viewportHeightPixelLengthVector = nbl::hlsl::mul(toPixelCoordsMatrix, bottomLeftViewportWorldH - topLeftViewportWorldH); + + // Get pixel length for each of these vectors + const auto viewportWidthPixelLength = nbl::hlsl::length(viewportWidthPixelLengthVector); + const auto viewportHeightPixelLength = nbl::hlsl::length(viewportHeightPixelLengthVector); + + // Mip is decided based on max of these + float64_t pixelRatio = nbl::hlsl::max(viewportWidthPixelLength / georeferencedImageParams.viewportExtents.x, viewportHeightPixelLength / georeferencedImageParams.viewportExtents.y); + pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; - // Now we have the indices of the tiles we want to upload, so create the vector of `StreamedImageCopies` - 1 per tile. + std::cout << "Sampled mip level: " << nbl::hlsl::log2(pixelRatio) << std::endl; + } + + // We need to make every tile that covers the viewport resident, so we create the vector of `StreamedImageCopies`, 1 such copy per tile. core::vector tiles; - uint32_t nTiles = (currentMappedRegion.bottomRight.x - currentMappedRegion.topLeft.x + 1) * (currentMappedRegion.topLeft.y - currentMappedRegion.topLeft.y + 1); + uint32_t nTiles = (viewportTileAlignedObb.bottomRight.x - viewportTileAlignedObb.topLeft.x + 1) * (viewportTileAlignedObb.bottomRight.y - viewportTileAlignedObb.topLeft.y + 1); tiles.reserve(nTiles); // Assuming a 1 pixel per block format - otherwise math here gets a bit trickier @@ -2731,14 +2795,19 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S const size_t bytesPerSide = bytesPerPixel * TileSize; // Dangerous code - assumes image can be perfectly covered with tiles. Otherwise will need to handle edge cases - for (uint32_t tileX = currentMappedRegion.topLeft.x; tileX <= currentMappedRegion.bottomRight.x; tileX++) + // TODO: All of this code only works for mip 0. Needs to be changed next to upload mip 1. + // Eventually this is all replaced by a few uploads to staging buffer + CS mip calc + for (uint32_t tileX = viewportTileAlignedObb.topLeft.x; tileX <= viewportTileAlignedObb.bottomRight.x; tileX++) { - for (uint32_t tileY = currentMappedRegion.topLeft.y; tileY <= currentMappedRegion.bottomRight.y; tileY++) + for (uint32_t tileY = viewportTileAlignedObb.topLeft.y; tileY <= viewportTileAlignedObb.bottomRight.y; tileY++) { - // Flush the loaded sections into the buffer - they should be done uploading by now - loader.clear(); - // Reserve enough sections - loader.reserve(nTiles); + // Compute tile offset relative to `currentMappedRegion.topLeft`, to get tile index into the gpu image + uint32_t2 gpuImageTileIndex = uint32_t2(tileX, tileY) - currentMappedRegion.topLeft; + + // If tile already resident, do nothing + if (currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) + continue; + auto tile = loader.load(uint32_t2(tileX * TileSize, tileY * TileSize), uint32_t2(TileSize, TileSize)); // Alias the buffer asset::IBuffer::SCreationParams bufParams = { .size = tile->getBuffer()->getSize(), .usage = tile->getBuffer()->getUsageFlags() }; @@ -2765,77 +2834,59 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S bufCopy.imageExtent.depth = 1; tiles.emplace_back(georeferencedImageParams.format, aliasedBuffer, std::move(bufCopy)); + + // Mark tile as resident + currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; } } // Last, we need to figure out an obb that covers only the currently loaded tiles // By shifting the `fromTopLeftOBB` an appropriate number of tiles in each direction, we get an obb that covers at least the uploaded tiles - OrientedBoundingBox2D worldspaceOBB = fromTopLeftOBB; - const float32_t2 dirV = float32_t2(worldspaceOBB.dirU.y, -worldspaceOBB.dirU.x) * worldspaceOBB.aspectRatio; - worldspaceOBB.topLeft += worldspaceOBB.dirU * float32_t(currentMappedRegion.topLeft.x) / float32_t(maxResidentTiles.x); - worldspaceOBB.topLeft += dirV * float32_t(currentMappedRegion.topLeft.y) / float32_t(maxResidentTiles.y); + OrientedBoundingBox2D viewportWorldspaceOBB = georeferencedImageParams.worldspaceOBB; + // The original image `dirU` corresponds to `maxImageTileIndices.x + 1` mip 0 tiles (provided it's exactly that length in tiles) + // Dividing dirU by `maxImageTileIndices + (1,1)` we therefore get a vector that spans exactly one mip 0 tile (in the u direction) in worldspace. + // Multiplying that by `2^mipLevel` we get a vector that spans exactly one mip `mipLevel` tile (in the u direction) + const float32_t2 oneTileDirU = georeferencedImageParams.worldspaceOBB.dirU / float32_t(maxImageTileIndices.x + 1u) * float32_t(1u << currentMappedRegion.baseMipLevel); + const float32_t2 oneTileDirV = float32_t2(oneTileDirU.y, -oneTileDirU.x) * viewportWorldspaceOBB.aspectRatio; + viewportWorldspaceOBB.topLeft += oneTileDirU * float32_t(viewportTileAlignedObb.topLeft.x); + viewportWorldspaceOBB.topLeft += oneTileDirV * float32_t(viewportTileAlignedObb.topLeft.y); + + const uint32_t2 viewportTileLength = viewportTileAlignedObb.bottomRight - viewportTileAlignedObb.topLeft + uint32_t2(1, 1); + viewportWorldspaceOBB.dirU = oneTileDirU * float32_t(viewportTileLength.x); + viewportWorldspaceOBB.aspectRatio = float32_t(viewportTileLength.y) / float32_t(viewportTileLength.x); - // Compute minUV, maxUV - // Since right now we don't shift the obb around, minUV will always be (0,0), but this is bound to change later on (shifting obb will happen when we want to reuse tiles and not - // reupload them on every frame in the next phase) - float32_t2 minUV(0.f, 0.f); - // By default we have a large-as-possible obb so maxUV is (1,1). This is bound to change in the general case when we shift obb. However, since right now we need to shrink the obb - // to ensure we don't draw outside the bounds of the real image, we also need to change the maxUV; - float32_t2 maxUV(1.f, 1.f); - int32_t excessTiles = currentMappedRegion.topLeft.x + maxResidentTiles.x - 1 - maxImageTileIndices.x; - if (excessTiles > 0) - { - // Shrink obb to only fit necessary tiles, compute maxUV.x which turns out to be exactly the shrink factor for dirU. - maxUV.x = float32_t(maxResidentTiles.x - excessTiles) / maxResidentTiles.x; - } - // De the same along the other axis - excessTiles = currentMappedRegion.topLeft.y + maxResidentTiles.y - 1 - maxImageTileIndices.y; - if (excessTiles > 0) - { - // Analogously, maxUV.y is the shrink factor for dirV. - maxUV.y = float32_t(maxResidentTiles.y - excessTiles) / maxResidentTiles.y; - } - // Recompute dirU and aspect ratio - // Multiply dirU by the shrink factor - worldspaceOBB.dirU *= maxUV.x; - // Scale the aspect ratio by the relative shrinkage of U,V. Remember our aspect ratio is V / U. - worldspaceOBB.aspectRatio *= maxUV.y / maxUV.x; + // UV logic currently ONLY works when the image not only fits an integer amount of tiles, but also when it's a PoT amount of them + // (this means every mip level also gets an integer amount of tiles). + // When porting to n4ce, for the image to fit an integer amount of tiles (instead of rewriting the logic) we can just pad the right/bottom sides with alpha=0 pixels + // The UV logic will have to change to consider what happens to the last loaded tile (or, alternatively, we can also fill the empty tiles with alpha=0 pixels) - return TileUploadData{ std::move(tiles), worldspaceOBB, minUV, maxUV }; + // Compute minUV, maxUV -} - -float64_t DrawResourcesFiller::StreamedImageManager::computeViewportMipLevel(const float64_t3x3& NDCToWorld, float64_t2 viewportExtents) -{ - const auto viewportWidthVectorWorld = nbl::hlsl::mul(NDCToWorld, topRightViewportNDCH - topLeftViewportNDCH); - const auto viewportHeightVectorWorld = nbl::hlsl::mul(NDCToWorld, bottomLeftViewportNDCH - topLeftViewportNDCH); - // Abuse of notation, we're passing a vector not a point, won't be affected by offset so that's ok - const auto viewportWidthImagePixelLengthVector = transformWorldCoordsToPixelCoords(viewportWidthVectorWorld); - const auto viewportHeightImagePixelLengthVector = transformWorldCoordsToPixelCoords(viewportHeightVectorWorld); - - const auto viewportWidthImagePixelLength = nbl::hlsl::length(viewportWidthImagePixelLengthVector); - const auto viewportHeightImagePixelLength = nbl::hlsl::length(viewportHeightImagePixelLengthVector); + const uint32_t2 mappedRegionTileLength = currentMappedRegion.bottomRight - currentMappedRegion.topLeft + uint32_t2(1, 1); + const float32_t2 uvPerTile = float32_t2(1.f, 1.f) / float32_t2(maxResidentTiles, maxResidentTiles); + float32_t2 minUV = uvPerTile * float32_t2(viewportTileAlignedObb.topLeft - currentMappedRegion.topLeft); + float32_t2 maxUV = minUV + uvPerTile * float32_t2(viewportTileLength); - // Mip is decided based on max of these - const auto maxPixelLength = nbl::hlsl::max(viewportWidthImagePixelLength, viewportHeightImagePixelLength); - return maxPixelLength / nbl::hlsl::max(viewportExtents.x, viewportExtents.y); + return TileUploadData{ std::move(tiles), viewportWorldspaceOBB, minUV, maxUV }; + } DrawResourcesFiller::StreamedImageManager::TileLatticeAlignedObb DrawResourcesFiller::StreamedImageManager::computeViewportTileAlignedObb(const float64_t3x3& NDCToWorld) { + // First get world coordinates for each of the viewport's corners const float64_t3 topLeftViewportWorldH = nbl::hlsl::mul(NDCToWorld, topLeftViewportNDCH); const float64_t3 topRightViewportWorldH = nbl::hlsl::mul(NDCToWorld, topRightViewportNDCH); const float64_t3 bottomLeftViewportWorldH = nbl::hlsl::mul(NDCToWorld, bottomLeftViewportNDCH); const float64_t3 bottomRightViewportWorldH = nbl::hlsl::mul(NDCToWorld, bottomRightViewportNDCH); - // We can use `world2Tile` to get tile lattice coordinates for each of these points + // Then we get mip 0 tiles coordinates for each of them, into the image const float64_t2 topLeftTileLattice = transformWorldCoordsToTileCoords(topLeftViewportWorldH); const float64_t2 topRightTileLattice = transformWorldCoordsToTileCoords(topRightViewportWorldH); const float64_t2 bottomLeftTileLattice = transformWorldCoordsToTileCoords(bottomLeftViewportWorldH); const float64_t2 bottomRightTileLattice = transformWorldCoordsToTileCoords(bottomRightViewportWorldH); - // Get the min and max of each lattice coordinate + // Get the min and max of each lattice coordinate to get a bounding rectangle const float64_t2 minTop = nbl::hlsl::min(topLeftTileLattice, topRightTileLattice); const float64_t2 minBottom = nbl::hlsl::min(bottomLeftTileLattice, bottomRightTileLattice); const float64_t2 minAll = nbl::hlsl::min(minTop, minBottom); @@ -2844,13 +2895,80 @@ DrawResourcesFiller::StreamedImageManager::TileLatticeAlignedObb DrawResourcesFi const float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice); const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom); - // Floor them to get an integer for the tiles they're in - const int32_t2 minAllFloored = nbl::hlsl::floor(minAll); - const int32_t2 maxAllFloored = nbl::hlsl::floor(maxAll); + // Floor them to get an integer coordinate (index) for the tiles they fall in + int32_t2 minAllFloored = nbl::hlsl::floor(minAll); + int32_t2 maxAllFloored = nbl::hlsl::floor(maxAll); + + // We're undoing a previous division. Could be avoided but won't restructure the code atp. + // Here we compute how many image pixels each side of the viewport spans + const auto viewportWidthImagePixelLengthVector = float64_t(TileSize) * (topRightTileLattice - topLeftTileLattice); + const auto viewportHeightImagePixelLengthVector = float64_t(TileSize) * (bottomLeftTileLattice - topLeftTileLattice); - // Clamp them to reasonable tile indices + // WARNING: This assumes pixels in the image are the same size along each axis. If the image is nonuniformly scaled or sheared, I *think* it should not matter + // (since the pixel span takes that transformation into account), BUT we have to check if we plan on allowing those + const auto viewportWidthImagePixelLength = nbl::hlsl::length(viewportWidthImagePixelLengthVector); + const auto viewportHeightImagePixelLength = nbl::hlsl::length(viewportHeightImagePixelLengthVector); + + // Mip is decided based on max of these + float64_t pixelRatio = nbl::hlsl::max(viewportWidthImagePixelLength / georeferencedImageParams.viewportExtents.x, viewportHeightImagePixelLength / georeferencedImageParams.viewportExtents.y); + pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; + + // DEBUG - Clamped at 0 for magnification + { + std::cout << "Real mip level: " << nbl::hlsl::log2(pixelRatio) << std::endl; + } + TileLatticeAlignedObb retVal = {}; - retVal.topLeft = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(maxImageTileIndices)); - retVal.bottomRight = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), nbl::hlsl::min(int32_t2(maxImageTileIndices), int32_t2(currentMappedRegion.topLeft + maxResidentTiles - uint32_t2(1, 1)))); + retVal.baseMipLevel = nbl::hlsl::findMSB(uint32_t(nbl::hlsl::floor(pixelRatio))); + + // Current tiles are measured in mip 0. We want the result to measure mip `retVal.baseMipLevel` tiles. Each next mip level divides by 2. + minAllFloored >>= retVal.baseMipLevel; + maxAllFloored >>= retVal.baseMipLevel; + + // Clamp them to reasonable tile indices + retVal.topLeft = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(maxImageTileIndices >> retVal.baseMipLevel)); + retVal.bottomRight = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), int32_t2(maxImageTileIndices >> retVal.baseMipLevel)); + return retVal; } + +void DrawResourcesFiller::StreamedImageManager::remapCurrentRegion(const TileLatticeAlignedObb& viewportObb) +{ + // Zoomed out + if (viewportObb.baseMipLevel > currentMappedRegion.baseMipLevel + 1.0) + { + // TODO: Here we would move some mip 1 tiles to mip 0 image to save the work of reuploading them, reflect that in the tracked tiles + } + // Zoomed in + else if (viewportObb.baseMipLevel < currentMappedRegion.baseMipLevel) + { + // TODO: Here we would move some mip 0 tiles to mip 1 image to save the work of reuploading them, reflect that in the tracked tiles + } + // Tile boundary crossing + else + { + // TODO: Here we would shuffle some tiles around to save the work of reuploading them, reflect that in the tracked tiles + } + currentMappedRegion.baseMipLevel = viewportObb.baseMipLevel; + + // Some variation of this code would go into each branch above + uint32_t2 viewportTileLength = viewportObb.bottomRight - viewportObb.topLeft + uint32_t2(1, 1); + int32_t2 nextTopLeft = int32_t2(viewportObb.topLeft) - int32_t2((uint32_t2(maxResidentTiles, maxResidentTiles) - viewportTileLength) / 2u); + int32_t2 nextBottomRight = nextTopLeft + int32_t2(maxResidentTiles, maxResidentTiles) - int32_t2(1, 1); + // Clamp to the left/up, and add the difference to the right/down + int32_t2 clampedTopLeft = nbl::hlsl::max(nextTopLeft, int32_t2(0, 0)); + nextBottomRight += clampedTopLeft - nextTopLeft; + nextTopLeft = clampedTopLeft; + // Now clamp to the right/down, and add the difference to the left/up, this time clamping it for sure + int32_t2 clampedBottomRight = nbl::hlsl::min(nextBottomRight, int32_t2(maxImageTileIndices) >> int32_t(currentMappedRegion.baseMipLevel)); + nextTopLeft = nbl::hlsl::max(nextTopLeft - nextBottomRight + clampedBottomRight, int32_t2(0, 0)); + currentMappedRegion.topLeft = nextTopLeft; + currentMappedRegion.bottomRight = clampedBottomRight; + + currentMappedRegionOccupancy.resize(maxResidentTiles); + for (auto i = 0u; i < maxResidentTiles; i++) + { + currentMappedRegionOccupancy[i].clear(); + currentMappedRegionOccupancy[i].resize(maxResidentTiles, false); + } +} diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 3855afba6..fc895046f 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -143,6 +143,7 @@ struct DrawResourcesFiller auto regions = core::make_refctd_dynamic_array>(1u); auto& region = regions->front(); + // Row-major location of the first pixel (topleft) given by `offset` region.bufferOffset = (offset.y * geoReferencedImage->getCreationParameters().extent.width + offset.x) * bytesPerPixel; region.bufferRowLength = geoReferencedImage->getCreationParameters().extent.width; region.bufferImageHeight = 0; @@ -150,6 +151,7 @@ struct DrawResourcesFiller region.imageSubresource.mipLevel = 0u; region.imageSubresource.baseArrayLayer = 0u; region.imageSubresource.layerCount = 1u; + // We're creating an image of exactly `extent` size and filling it entirely region.imageOffset = { 0u, 0u, 0u }; region.imageExtent.width = extent.x; region.imageExtent.height = extent.y; @@ -160,49 +162,37 @@ struct DrawResourcesFiller auto loadedImage = ICPUImage::create(std::move(loadedImageParams)); loadedImage->setBufferAndRegions(std::move(imageBufferAlias), regions); - loadedSections.push_back(loadedImage); - return loadedImage; } - void clear() - { - loadedSections.clear(); - } - - void reserve(uint32_t sections) - { - loadedSections.reserve(sections); - } - - // This will be the path to the image + // This will be the path to the image, and the loading will obviously be different core::smart_refctd_ptr geoReferencedImage; - private: - // This will be actually loaded sections (each section is a rectangle of one or more tiles) of the above image. Since we alias buffers for uploads, - // we want this class to track their lifetime so they don't get deallocated before they get uploaded. - core::vector> loadedSections; }; // @brief Used to load tiles into VRAM, keep track of loaded tiles, determine how they get sampled etc. struct StreamedImageManager { friend class DrawResourcesFiller; + // These are mip 0 pixels per tile, also size of each physical tile into the gpu resident image constexpr static uint32_t TileSize = 128u; constexpr static uint32_t PaddingTiles = 2; + // These are vulkan standard, might be different in n4ce! constexpr static float64_t3 topLeftViewportNDCH = float64_t3(-1.0, -1.0, 1.0); constexpr static float64_t3 topRightViewportNDCH = float64_t3(1.0, -1.0, 1.0); constexpr static float64_t3 bottomLeftViewportNDCH = float64_t3(-1.0, 1.0, 1.0); constexpr static float64_t3 bottomRightViewportNDCH = float64_t3(1.0, 1.0, 1.0); - StreamedImageManager(image_id _imageID, GeoreferencedImageParams&& _georeferencedImageParams, ImageLoader&& _loader); - + // Measured in tile coordinates in the image, and the mip level the tiles correspond to struct TileLatticeAlignedObb { uint32_t2 topLeft; uint32_t2 bottomRight; + uint32_t baseMipLevel; }; + // Holds gpu image upload info (what tiles to upload and where to upload them), an obb that encompasses the viewport and uv coords into the gpu image + // for the corners of that obb struct TileUploadData { core::vector tiles; @@ -211,31 +201,36 @@ struct DrawResourcesFiller float32_t2 maxUV; }; + StreamedImageManager(image_id _imageID, GeoreferencedImageParams&& _georeferencedImageParams, ImageLoader&& _loader); + // Right now it's generating tile-by-tile. Can be improved to produce at worst 4 different rectangles to load (depending on how we need to load tiles) - TileUploadData generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld); - + TileUploadData generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld, const float64_t2 viewportExtents); + + image_id imageID = {}; + GeoreferencedImageParams georeferencedImageParams = {}; + private: // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial - float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoordsH) {return nbl::hlsl::mul(world2UV, worldCoordsH);} + float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoordsH) { return nbl::hlsl::mul(world2UV, worldCoordsH); } float64_t2 transformWorldCoordsToPixelCoords(const float64_t3 worldCoordsH) { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoordsH); } float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoordsH) { return (1.0 / TileSize) * transformWorldCoordsToPixelCoords(worldCoordsH); } - // Compute repeated here, can both be done together if necessary - float64_t computeViewportMipLevel(const float64_t3x3& NDCToWorld, float64_t2 viewportExtents); + // Returns a tile aligned obb that encompasses the whole viewport in "image-world". Tiles are measured in the mip level required to fit the viewport entirely + // withing the gpu image. TileLatticeAlignedObb computeViewportTileAlignedObb(const float64_t3x3& NDCToWorld); - image_id imageID; - GeoreferencedImageParams georeferencedImageParams; - // This and the logic they're in will likely change later with Toroidal updating - private: - uint32_t currentResidentMipLevel = {}; - uint32_t2 maxResidentTiles = {}; + // When the current mapped region is inadequate to fit the viewport, we compute a new mapped region + void remapCurrentRegion(const TileLatticeAlignedObb& viewportObb); + + // Sidelength of the gpu image, in tiles that are `TileSize` pixels wide + uint32_t maxResidentTiles = {}; + // Size of the image (minus 1), in tiles of `TileSize` sidelength uint32_t2 maxImageTileIndices = {}; - TileLatticeAlignedObb currentMappedRegion = {}; + // Set topLeft to extreme value so it gets recreated on first iteration + TileLatticeAlignedObb currentMappedRegion = { .topLeft = uint32_t2(std::numeric_limits::max(), std::numeric_limits::max())}; + std::vector> currentMappedRegionOccupancy = {}; + // Converts a point (z = 1) in worldspace to UV coordinates in image space (origin shifted to topleft of the image) float64_t2x3 world2UV = {}; - // Worldspace OBB that covers the top left `maxResidentTiles.x x maxResidentTiles.y` tiles of the image. - // We shift this OBB by appropriate tile offsets when loading tiles - OrientedBoundingBox2D fromTopLeftOBB = {}; ImageLoader loader; }; @@ -469,7 +464,7 @@ struct DrawResourcesFiller void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); // This function must be called immediately after `addStaticImage` for the same imageID. - void addGeoreferencedImage(StreamedImageManager& manager, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit); + void addGeoreferencedImage(StreamedImageManager& manager, const float64_t3x3& NDCToWorld, uint32_t2 viewportExtents, SIntendedSubmitInfo& intendedNextSubmit); /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 62496ef64..b9feb0cb8 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -49,7 +49,7 @@ static constexpr bool DebugRotatingViewProj = false; static constexpr bool FragmentShaderPixelInterlock = true; static constexpr bool LargeGeoTextureStreaming = true; static constexpr bool CacheAndReplay = false; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic -static constexpr bool textCameraRotation = true; +static constexpr bool textCameraRotation = false; enum class ExampleMode { @@ -3699,7 +3699,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio tiledGridParams.worldspaceOBB.topLeft = startingTopLeft; // Get 1 viewport pixel to match `startingImagePixelsPerViewportPixel` pixels of the image by choosing appropriate dirU - const static float64_t startingImagePixelsPerViewportPixels = 2.0; + const static float64_t startingImagePixelsPerViewportPixels = 1.5; const static auto startingViewportWidthVector = nbl::hlsl::mul(inverseViewProj, topRightViewportH - topLeftViewportH); const static auto dirU = startingViewportWidthVector * float64_t(bigTiledGrid->getCreationParameters().extent.width) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); tiledGridParams.worldspaceOBB.dirU = dirU; @@ -3714,11 +3714,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(tiledGridID, tiledGridManager.georeferencedImageParams, intendedNextSubmit); - drawResourcesFiller.addGeoreferencedImage(tiledGridManager, inverseViewProj, intendedNextSubmit); - - // Mip level calculation - // Uncomment to print mip - //std::cout << "mip level: " << tiledGridManager.computeViewportMipLevel(inverseViewProj, float64_t2(m_window->getWidth(), m_window->getHeight())) << std::endl; + drawResourcesFiller.addGeoreferencedImage(tiledGridManager, inverseViewProj, tiledGridParams.viewportExtents, intendedNextSubmit); } } From f638ca69d58274cf2a41c025839444add65efdf7 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 19 Aug 2025 00:34:52 -0300 Subject: [PATCH 13/29] Cleaning up the code following PR review --- 62_CAD/DrawResourcesFiller.cpp | 181 +++++++-------- 62_CAD/DrawResourcesFiller.h | 191 +++++++--------- 62_CAD/Images.h | 5 + 62_CAD/main.cpp | 209 ++++++++++++++++-- 62_CAD/scripts/generate_mipmaps.py | 47 ++++ 62_CAD/scripts/tiled_grid.py | 2 +- .../shaders/main_pipeline/vertex_shader.hlsl | 2 +- 7 files changed, 420 insertions(+), 217 deletions(-) create mode 100644 62_CAD/scripts/generate_mipmaps.py diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index de919b276..6d160cb15 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -589,6 +589,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s cachedImageRecord->allocationSize = allocResults.allocationSize; cachedImageRecord->gpuImageView = allocResults.gpuImageView; cachedImageRecord->staticCPUImage = staticImage.cpuImage; + cachedImageRecord->georeferencedImageState = nullptr; } else { @@ -651,7 +652,7 @@ bool DrawResourcesFiller::ensureMultipleStaticImagesAvailability(std::spangetLogicalDevice(); auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); @@ -735,6 +736,16 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( cachedImageRecord->allocationSize = allocResults.allocationSize; cachedImageRecord->gpuImageView = allocResults.gpuImageView; cachedImageRecord->staticCPUImage = nullptr; + cachedImageRecord->georeferencedImageState = GeoreferencedImageStreamingState::create(std::move(params)); + + // This is because gpu image is square + cachedImageRecord->georeferencedImageState->maxResidentTiles = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / GeoreferencedImageTileSize; + + auto& maxImageTileIndices = cachedImageRecord->georeferencedImageState->maxImageTileIndices; + maxImageTileIndices = cachedImageRecord->georeferencedImageState->georeferencedImageParams.imageExtents / uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize); + // If it fits perfectly along any dimension, we need one less tile with this scheme + maxImageTileIndices -= uint32_t2(maxImageTileIndices.x * GeoreferencedImageTileSize == cachedImageRecord->georeferencedImageState->georeferencedImageParams.imageExtents.x, + maxImageTileIndices.y * GeoreferencedImageTileSize == cachedImageRecord->georeferencedImageState->georeferencedImageParams.imageExtents.y); } else { @@ -884,7 +895,7 @@ void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundin endMainObject(); } -void DrawResourcesFiller::addGeoreferencedImage(StreamedImageManager& manager, const float64_t3x3& NDCToWorld, uint32_t2 viewportExtents, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit) { beginMainObject(MainObjectType::STREAMED_IMAGE); @@ -897,23 +908,25 @@ void DrawResourcesFiller::addGeoreferencedImage(StreamedImageManager& manager, c } // Query imageType - auto cachedImageRecord = imagesCache->peek(manager.imageID); - - // This is because gpu image is square - manager.maxResidentTiles = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / manager.TileSize; + auto cachedImageRecord = imagesCache->peek(imageID); + if (!cachedImageRecord) + { + m_logger.log("addGeoreferencedImage was not called immediately after enforceGeoreferencedImageAvailability!", nbl::system::ILogger::ELL_ERROR); + assert(false); + } // Generate upload data - auto uploadData = manager.generateTileUploadData(cachedImageRecord->type, NDCToWorld, manager.georeferencedImageParams.viewportExtents); + auto uploadData = generateTileUploadData(cachedImageRecord->type, NDCToWorld, cachedImageRecord->georeferencedImageState.get()); // Queue image uploads for (const auto& imageCopy : uploadData.tiles) - queueGeoreferencedImageCopy_Internal(manager.imageID, imageCopy); + queueGeoreferencedImageCopy_Internal(imageID, imageCopy); GeoreferencedImageInfo info = {}; info.topLeft = uploadData.worldspaceOBB.topLeft; info.dirU = uploadData.worldspaceOBB.dirU; info.aspectRatio = uploadData.worldspaceOBB.aspectRatio; - info.textureID = getImageIndexFromID(manager.imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory info.minUV = uploadData.minUV; info.maxUV = uploadData.maxUV; if (!addGeoreferencedImageInfo_Internal(info, mainObjIdx)) @@ -2528,10 +2541,10 @@ ImageType DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::as // and in any rotation (taking the longest side suffices). Can be increased to avoid frequent tile eviction when moving the camera at mip close to 1 const uint32_t diagonal = static_cast(nbl::hlsl::ceil( nbl::hlsl::sqrt(static_cast(params.viewportExtents.x * params.viewportExtents.x - + params.viewportExtents.y * params.viewportExtents.y)) + + params.viewportExtents.y * params.viewportExtents.y)) ) ); - const uint32_t gpuImageSidelength = 2 * core::roundUp(diagonal, StreamedImageManager::TileSize) + StreamedImageManager::PaddingTiles * StreamedImageManager::TileSize; + const uint32_t gpuImageSidelength = 2 * core::roundUp(diagonal, GeoreferencedImageTileSize) + GeoreferencedImagePaddingTiles * GeoreferencedImageTileSize; outImageParams.extent = { gpuImageSidelength, gpuImageSidelength, 1u }; } @@ -2675,13 +2688,10 @@ void DrawResourcesFiller::flushDrawObjects() } } -DrawResourcesFiller::StreamedImageManager::StreamedImageManager(image_id _imageID, GeoreferencedImageParams&& _georeferencedImageParams, ImageLoader&& _loader) - : imageID(_imageID), georeferencedImageParams(std::move(_georeferencedImageParams)), loader(std::move(_loader)) +smart_refctd_ptr GeoreferencedImageStreamingState::create(GeoreferencedImageParams&& _georeferencedImageParams) { - maxImageTileIndices = georeferencedImageParams.imageExtents / uint32_t2(TileSize, TileSize); - // If it fits perfectly along any dimension, we need one less tile with this scheme - maxImageTileIndices -= uint32_t2(maxImageTileIndices.x * TileSize == georeferencedImageParams.imageExtents.x, maxImageTileIndices.y * TileSize == georeferencedImageParams.imageExtents.y); - + smart_refctd_ptr retVal(new GeoreferencedImageStreamingState{}); + retVal->georeferencedImageParams = std::move(_georeferencedImageParams); // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point // 2. Transform this displacement vector into the coordinates in the basis {dirU, dirV} (worldspace vectors that span the sides of the image). // The composition of these matrices there fore transforms any point in worldspace into uv coordinates in imagespace @@ -2689,13 +2699,13 @@ DrawResourcesFiller::StreamedImageManager::StreamedImageManager(image_id _imageI // 1. Displacement. The following matrix computes the offset for an input point `p` with homogenous worldspace coordinates. // By foregoing the homogenous coordinate we can keep only the vector part, that's why it's `2x3` and not `3x3` - float64_t2 topLeftWorld = georeferencedImageParams.worldspaceOBB.topLeft; + float64_t2 topLeftWorld = retVal->georeferencedImageParams.worldspaceOBB.topLeft; float64_t2x3 displacementMatrix(1., 0., - topLeftWorld.x, 0., 1., - topLeftWorld.y); // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression // Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal) - float64_t2 dirU = georeferencedImageParams.worldspaceOBB.dirU; - float64_t2 dirV = float32_t2(dirU.y, -dirU.x) * georeferencedImageParams.worldspaceOBB.aspectRatio; + float64_t2 dirU = retVal->georeferencedImageParams.worldspaceOBB.dirU; + float64_t2 dirV = float32_t2(dirU.y, -dirU.x) * retVal->georeferencedImageParams.worldspaceOBB.aspectRatio; float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); float64_t2 firstRow = dirU / dirULengthSquared; @@ -2703,47 +2713,48 @@ DrawResourcesFiller::StreamedImageManager::StreamedImageManager(image_id _imageI float64_t2x2 changeOfBasisMatrix(firstRow, secondRow); // Put them all together - world2UV = nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix); + retVal->world2UV = nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix); + return retVal; } -DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::StreamedImageManager::generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld, const float64_t2 viewportExtents) +DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld, GeoreferencedImageStreamingState* imageStreamingState) { // I think eventually it's better to just transform georeferenced images that aren't big enough into static images and forget about them if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) //Pass imageID as parameter, down from the addGeoRef call - return TileUploadData{ {}, georeferencedImageParams.worldspaceOBB }; + return TileUploadData{ {}, imageStreamingState->georeferencedImageParams.worldspaceOBB }; - // Compute the mip level and tile obb we would need to encompass the viewport - TileLatticeAlignedObb viewportTileAlignedObb = computeViewportTileAlignedObb(NDCToWorld); + // Compute the mip level and tile range we would need to encompass the viewport + GeoreferencedImageTileRange viewportTileRange = computeViewportTileRange(NDCToWorld, imageStreamingState); // A base mip level of x in the current mapped region means we can handle the viewport having mip level y, with x <= y < x + 1.0 // without needing to remap the region. When the user starts zooming in or out and the mip level of the viewport falls outside this range, we have to remap // the mapped region. - const bool mipBoundaryCrossed = viewportTileAlignedObb.baseMipLevel >= currentMappedRegion.baseMipLevel + 1.0 - || viewportTileAlignedObb.baseMipLevel < currentMappedRegion.baseMipLevel; + const bool mipBoundaryCrossed = viewportTileRange.baseMipLevel >= imageStreamingState->currentMappedRegion.baseMipLevel + 1.0 + || viewportTileRange.baseMipLevel < imageStreamingState->currentMappedRegion.baseMipLevel; // If any of the corners of the obb encompassing the viewport falls outside the currently mapped region, we have to remap the mapped region - const bool tileBoundaryCrossed = nbl::hlsl::any(viewportTileAlignedObb.topLeft < currentMappedRegion.topLeft) - || nbl::hlsl::any(viewportTileAlignedObb.bottomRight > currentMappedRegion.bottomRight); + const bool tileBoundaryCrossed = nbl::hlsl::any(viewportTileRange.topLeft < imageStreamingState->currentMappedRegion.topLeft) + || nbl::hlsl::any(viewportTileRange.bottomRight > imageStreamingState->currentMappedRegion.bottomRight); if (mipBoundaryCrossed || tileBoundaryCrossed) { - remapCurrentRegion(viewportTileAlignedObb); + imageStreamingState->remapCurrentRegion(viewportTileRange); } // DEBUG - Sampled mip level { // Get world coordinates for each corner of the mapped region - const float32_t2 oneTileDirU = georeferencedImageParams.worldspaceOBB.dirU / float32_t(maxImageTileIndices.x + 1u) * float32_t(1u << currentMappedRegion.baseMipLevel); - const float32_t2 oneTileDirV = float32_t2(oneTileDirU.y, -oneTileDirU.x) * georeferencedImageParams.worldspaceOBB.aspectRatio; - float64_t2 topLeftMappedRegionWorld = georeferencedImageParams.worldspaceOBB.topLeft; - topLeftMappedRegionWorld += oneTileDirU * float32_t(currentMappedRegion.topLeft.x) + oneTileDirV * float32_t(currentMappedRegion.topLeft.y); - const uint32_t2 mappedRegionTileLength = currentMappedRegion.bottomRight - currentMappedRegion.topLeft + uint32_t2(1, 1); + const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->maxImageTileIndices.x + 1u) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); + const float32_t2 oneTileDirV = float32_t2(oneTileDirU.y, -oneTileDirU.x) * imageStreamingState->georeferencedImageParams.worldspaceOBB.aspectRatio; + float64_t2 topLeftMappedRegionWorld = imageStreamingState->georeferencedImageParams.worldspaceOBB.topLeft; + topLeftMappedRegionWorld += oneTileDirU * float32_t(imageStreamingState->currentMappedRegion.topLeft.x) + oneTileDirV * float32_t(imageStreamingState->currentMappedRegion.topLeft.y); + const uint32_t2 mappedRegionTileLength = imageStreamingState->currentMappedRegion.bottomRight - imageStreamingState->currentMappedRegion.topLeft + uint32_t2(1, 1); float64_t2 bottomRightMappedRegionWorld = topLeftMappedRegionWorld; bottomRightMappedRegionWorld += oneTileDirU * float32_t(mappedRegionTileLength.x) + oneTileDirV * float32_t(mappedRegionTileLength.y); // With the above, get an affine transform that maps points in worldspace to their pixel coordinates in the mapped region tile space. This can be done by mapping // `topLeftMappedRegionWorld -> (0,0)` and `bottomRightMappedRegionWorld -> mappedRegionPixelLength - 1` - const uint32_t2 mappedRegionPixelLength = TileSize * mappedRegionTileLength; + const uint32_t2 mappedRegionPixelLength = GeoreferencedImageTileSize * mappedRegionTileLength; // 1. Displacement // Multiplying a (homogenous) point p by this matrix yields the displacement vector `p - topLeftMappedRegionWorld` @@ -2760,7 +2771,7 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S // 3. Rescaling. The above matrix yields uv coordinates in the rectangle spanned by the mapped region. To get pixel coordinates, we simply multiply each coordinate by // how many pixels they span in the gpu image - float64_t2x2 scalingMatrix(mappedRegionTileLength.x * TileSize, 0.0, 0.0, mappedRegionTileLength.y * TileSize); + float64_t2x2 scalingMatrix(mappedRegionTileLength.x * GeoreferencedImageTileSize, 0.0, 0.0, mappedRegionTileLength.y * GeoreferencedImageTileSize); // Put them all together float64_t2x3 toPixelCoordsMatrix = nbl::hlsl::mul(scalingMatrix, nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix)); @@ -2779,7 +2790,7 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S const auto viewportHeightPixelLength = nbl::hlsl::length(viewportHeightPixelLengthVector); // Mip is decided based on max of these - float64_t pixelRatio = nbl::hlsl::max(viewportWidthPixelLength / georeferencedImageParams.viewportExtents.x, viewportHeightPixelLength / georeferencedImageParams.viewportExtents.y); + float64_t pixelRatio = nbl::hlsl::max(viewportWidthPixelLength / imageStreamingState->georeferencedImageParams.viewportExtents.x, viewportHeightPixelLength / imageStreamingState->georeferencedImageParams.viewportExtents.y); pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; std::cout << "Sampled mip level: " << nbl::hlsl::log2(pixelRatio) << std::endl; @@ -2787,72 +2798,61 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S // We need to make every tile that covers the viewport resident, so we create the vector of `StreamedImageCopies`, 1 such copy per tile. core::vector tiles; - uint32_t nTiles = (viewportTileAlignedObb.bottomRight.x - viewportTileAlignedObb.topLeft.x + 1) * (viewportTileAlignedObb.bottomRight.y - viewportTileAlignedObb.topLeft.y + 1); + uint32_t nTiles = (viewportTileRange.bottomRight.x - viewportTileRange.topLeft.x + 1) * (viewportTileRange.bottomRight.y - viewportTileRange.topLeft.y + 1); tiles.reserve(nTiles); // Assuming a 1 pixel per block format - otherwise math here gets a bit trickier - auto bytesPerPixel = getTexelOrBlockBytesize(georeferencedImageParams.format); - const size_t bytesPerSide = bytesPerPixel * TileSize; + auto bytesPerPixel = getTexelOrBlockBytesize(imageStreamingState->georeferencedImageParams.format); + const size_t bytesPerSide = bytesPerPixel * GeoreferencedImageTileSize; // Dangerous code - assumes image can be perfectly covered with tiles. Otherwise will need to handle edge cases // TODO: All of this code only works for mip 0. Needs to be changed next to upload mip 1. // Eventually this is all replaced by a few uploads to staging buffer + CS mip calc - for (uint32_t tileX = viewportTileAlignedObb.topLeft.x; tileX <= viewportTileAlignedObb.bottomRight.x; tileX++) + for (uint32_t tileX = viewportTileRange.topLeft.x; tileX <= viewportTileRange.bottomRight.x; tileX++) { - for (uint32_t tileY = viewportTileAlignedObb.topLeft.y; tileY <= viewportTileAlignedObb.bottomRight.y; tileY++) + for (uint32_t tileY = viewportTileRange.topLeft.y; tileY <= viewportTileRange.bottomRight.y; tileY++) { // Compute tile offset relative to `currentMappedRegion.topLeft`, to get tile index into the gpu image - uint32_t2 gpuImageTileIndex = uint32_t2(tileX, tileY) - currentMappedRegion.topLeft; + uint32_t2 gpuImageTileIndex = uint32_t2(tileX, tileY) - imageStreamingState->currentMappedRegion.topLeft; // If tile already resident, do nothing - if (currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) + if (imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) continue; - auto tile = loader.load(uint32_t2(tileX * TileSize, tileY * TileSize), uint32_t2(TileSize, TileSize)); - // Alias the buffer - asset::IBuffer::SCreationParams bufParams = { .size = tile->getBuffer()->getSize(), .usage = tile->getBuffer()->getUsageFlags() }; - ICPUBuffer::SCreationParams cpuBufParams(std::move(bufParams)); - cpuBufParams.data = tile->getBuffer()->getPointer(); - cpuBufParams.memoryResource = core::getNullMemoryResource(); - auto aliasedBuffer = ICPUBuffer::create(std::move(cpuBufParams), nbl::core::adopt_memory_t{}); - - // The math here is like this because of the buffer we're getting (full image in the emulated case) - // When moving to actual ECW loading, bufferOffset will be 0, bufferRowLength will be the extent.width of the loaded section, - // imageExtent will be the extent of the loaded section, and imageOffset will be the appropriate offset (we will be loading whole sections of tiles - // that can be made into a rectangle instead of tile by tile) + auto tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, uint32_t2(tileX * GeoreferencedImageTileSize, tileY * GeoreferencedImageTileSize), uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize), imageStreamingState->currentMappedRegion.baseMipLevel); + asset::IImage::SBufferCopy bufCopy; - bufCopy.bufferOffset = (tileY * (maxImageTileIndices.x + 1) * TileSize + tileX) * bytesPerSide; - bufCopy.bufferRowLength = georeferencedImageParams.imageExtents.x; + bufCopy.bufferOffset = 0; + bufCopy.bufferRowLength = GeoreferencedImageTileSize; bufCopy.bufferImageHeight = 0; bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; bufCopy.imageSubresource.mipLevel = 0u; bufCopy.imageSubresource.baseArrayLayer = 0u; bufCopy.imageSubresource.layerCount = 1u; - bufCopy.imageOffset = { (tileX - currentMappedRegion.topLeft.x) * TileSize, (tileY - currentMappedRegion.topLeft.y) * TileSize, 0u }; - bufCopy.imageExtent.width = TileSize; - bufCopy.imageExtent.height = TileSize; + bufCopy.imageOffset = { (tileX - imageStreamingState->currentMappedRegion.topLeft.x) * GeoreferencedImageTileSize, (tileY - imageStreamingState->currentMappedRegion.topLeft.y) * GeoreferencedImageTileSize, 0u }; + bufCopy.imageExtent.width = GeoreferencedImageTileSize; + bufCopy.imageExtent.height = GeoreferencedImageTileSize; bufCopy.imageExtent.depth = 1; - tiles.emplace_back(georeferencedImageParams.format, aliasedBuffer, std::move(bufCopy)); + tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); // Mark tile as resident - currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; + imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; } } // Last, we need to figure out an obb that covers only the currently loaded tiles - // By shifting the `fromTopLeftOBB` an appropriate number of tiles in each direction, we get an obb that covers at least the uploaded tiles - OrientedBoundingBox2D viewportWorldspaceOBB = georeferencedImageParams.worldspaceOBB; + OrientedBoundingBox2D viewportWorldspaceOBB = imageStreamingState->georeferencedImageParams.worldspaceOBB; // The original image `dirU` corresponds to `maxImageTileIndices.x + 1` mip 0 tiles (provided it's exactly that length in tiles) // Dividing dirU by `maxImageTileIndices + (1,1)` we therefore get a vector that spans exactly one mip 0 tile (in the u direction) in worldspace. // Multiplying that by `2^mipLevel` we get a vector that spans exactly one mip `mipLevel` tile (in the u direction) - const float32_t2 oneTileDirU = georeferencedImageParams.worldspaceOBB.dirU / float32_t(maxImageTileIndices.x + 1u) * float32_t(1u << currentMappedRegion.baseMipLevel); + const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->maxImageTileIndices.x + 1u) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); const float32_t2 oneTileDirV = float32_t2(oneTileDirU.y, -oneTileDirU.x) * viewportWorldspaceOBB.aspectRatio; - viewportWorldspaceOBB.topLeft += oneTileDirU * float32_t(viewportTileAlignedObb.topLeft.x); - viewportWorldspaceOBB.topLeft += oneTileDirV * float32_t(viewportTileAlignedObb.topLeft.y); + viewportWorldspaceOBB.topLeft += oneTileDirU * float32_t(viewportTileRange.topLeft.x); + viewportWorldspaceOBB.topLeft += oneTileDirV * float32_t(viewportTileRange.topLeft.y); - const uint32_t2 viewportTileLength = viewportTileAlignedObb.bottomRight - viewportTileAlignedObb.topLeft + uint32_t2(1, 1); + const uint32_t2 viewportTileLength = viewportTileRange.bottomRight - viewportTileRange.topLeft + uint32_t2(1, 1); viewportWorldspaceOBB.dirU = oneTileDirU * float32_t(viewportTileLength.x); viewportWorldspaceOBB.aspectRatio = float32_t(viewportTileLength.y) / float32_t(viewportTileLength.x); @@ -2863,16 +2863,16 @@ DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::S // Compute minUV, maxUV - const uint32_t2 mappedRegionTileLength = currentMappedRegion.bottomRight - currentMappedRegion.topLeft + uint32_t2(1, 1); - const float32_t2 uvPerTile = float32_t2(1.f, 1.f) / float32_t2(maxResidentTiles, maxResidentTiles); - float32_t2 minUV = uvPerTile * float32_t2(viewportTileAlignedObb.topLeft - currentMappedRegion.topLeft); + const uint32_t2 mappedRegionTileLength = imageStreamingState->currentMappedRegion.bottomRight - imageStreamingState->currentMappedRegion.topLeft + uint32_t2(1, 1); + const float32_t2 uvPerTile = float32_t2(1.f, 1.f) / float32_t2(imageStreamingState->maxResidentTiles, imageStreamingState->maxResidentTiles); + float32_t2 minUV = uvPerTile * float32_t2(viewportTileRange.topLeft - imageStreamingState->currentMappedRegion.topLeft); float32_t2 maxUV = minUV + uvPerTile * float32_t2(viewportTileLength); return TileUploadData{ std::move(tiles), viewportWorldspaceOBB, minUV, maxUV }; } -DrawResourcesFiller::StreamedImageManager::TileLatticeAlignedObb DrawResourcesFiller::StreamedImageManager::computeViewportTileAlignedObb(const float64_t3x3& NDCToWorld) +GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const float64_t3x3& NDCToWorld, const GeoreferencedImageStreamingState* imageStreamingState) { // First get world coordinates for each of the viewport's corners const float64_t3 topLeftViewportWorldH = nbl::hlsl::mul(NDCToWorld, topLeftViewportNDCH); @@ -2881,10 +2881,10 @@ DrawResourcesFiller::StreamedImageManager::TileLatticeAlignedObb DrawResourcesFi const float64_t3 bottomRightViewportWorldH = nbl::hlsl::mul(NDCToWorld, bottomRightViewportNDCH); // Then we get mip 0 tiles coordinates for each of them, into the image - const float64_t2 topLeftTileLattice = transformWorldCoordsToTileCoords(topLeftViewportWorldH); - const float64_t2 topRightTileLattice = transformWorldCoordsToTileCoords(topRightViewportWorldH); - const float64_t2 bottomLeftTileLattice = transformWorldCoordsToTileCoords(bottomLeftViewportWorldH); - const float64_t2 bottomRightTileLattice = transformWorldCoordsToTileCoords(bottomRightViewportWorldH); + const float64_t2 topLeftTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(topLeftViewportWorldH, GeoreferencedImageTileSize); + const float64_t2 topRightTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(topRightViewportWorldH, GeoreferencedImageTileSize); + const float64_t2 bottomLeftTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(bottomLeftViewportWorldH, GeoreferencedImageTileSize); + const float64_t2 bottomRightTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(bottomRightViewportWorldH, GeoreferencedImageTileSize); // Get the min and max of each lattice coordinate to get a bounding rectangle const float64_t2 minTop = nbl::hlsl::min(topLeftTileLattice, topRightTileLattice); @@ -2901,8 +2901,8 @@ DrawResourcesFiller::StreamedImageManager::TileLatticeAlignedObb DrawResourcesFi // We're undoing a previous division. Could be avoided but won't restructure the code atp. // Here we compute how many image pixels each side of the viewport spans - const auto viewportWidthImagePixelLengthVector = float64_t(TileSize) * (topRightTileLattice - topLeftTileLattice); - const auto viewportHeightImagePixelLengthVector = float64_t(TileSize) * (bottomLeftTileLattice - topLeftTileLattice); + const auto viewportWidthImagePixelLengthVector = float64_t(GeoreferencedImageTileSize) * (topRightTileLattice - topLeftTileLattice); + const auto viewportHeightImagePixelLengthVector = float64_t(GeoreferencedImageTileSize) * (bottomLeftTileLattice - topLeftTileLattice); // WARNING: This assumes pixels in the image are the same size along each axis. If the image is nonuniformly scaled or sheared, I *think* it should not matter // (since the pixel span takes that transformation into account), BUT we have to check if we plan on allowing those @@ -2910,7 +2910,8 @@ DrawResourcesFiller::StreamedImageManager::TileLatticeAlignedObb DrawResourcesFi const auto viewportHeightImagePixelLength = nbl::hlsl::length(viewportHeightImagePixelLengthVector); // Mip is decided based on max of these - float64_t pixelRatio = nbl::hlsl::max(viewportWidthImagePixelLength / georeferencedImageParams.viewportExtents.x, viewportHeightImagePixelLength / georeferencedImageParams.viewportExtents.y); + float64_t pixelRatio = nbl::hlsl::max(viewportWidthImagePixelLength / imageStreamingState->georeferencedImageParams.viewportExtents.x, + viewportHeightImagePixelLength / imageStreamingState->georeferencedImageParams.viewportExtents.y); pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; // DEBUG - Clamped at 0 for magnification @@ -2918,7 +2919,7 @@ DrawResourcesFiller::StreamedImageManager::TileLatticeAlignedObb DrawResourcesFi std::cout << "Real mip level: " << nbl::hlsl::log2(pixelRatio) << std::endl; } - TileLatticeAlignedObb retVal = {}; + GeoreferencedImageTileRange retVal = {}; retVal.baseMipLevel = nbl::hlsl::findMSB(uint32_t(nbl::hlsl::floor(pixelRatio))); // Current tiles are measured in mip 0. We want the result to measure mip `retVal.baseMipLevel` tiles. Each next mip level divides by 2. @@ -2926,21 +2927,21 @@ DrawResourcesFiller::StreamedImageManager::TileLatticeAlignedObb DrawResourcesFi maxAllFloored >>= retVal.baseMipLevel; // Clamp them to reasonable tile indices - retVal.topLeft = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(maxImageTileIndices >> retVal.baseMipLevel)); - retVal.bottomRight = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), int32_t2(maxImageTileIndices >> retVal.baseMipLevel)); + retVal.topLeft = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(imageStreamingState->maxImageTileIndices >> retVal.baseMipLevel)); + retVal.bottomRight = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), int32_t2(imageStreamingState->maxImageTileIndices >> retVal.baseMipLevel)); return retVal; } -void DrawResourcesFiller::StreamedImageManager::remapCurrentRegion(const TileLatticeAlignedObb& viewportObb) +void GeoreferencedImageStreamingState::remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) { // Zoomed out - if (viewportObb.baseMipLevel > currentMappedRegion.baseMipLevel + 1.0) + if (viewportTileRange.baseMipLevel > currentMappedRegion.baseMipLevel + 1.0) { // TODO: Here we would move some mip 1 tiles to mip 0 image to save the work of reuploading them, reflect that in the tracked tiles } // Zoomed in - else if (viewportObb.baseMipLevel < currentMappedRegion.baseMipLevel) + else if (viewportTileRange.baseMipLevel < currentMappedRegion.baseMipLevel) { // TODO: Here we would move some mip 0 tiles to mip 1 image to save the work of reuploading them, reflect that in the tracked tiles } @@ -2949,11 +2950,11 @@ void DrawResourcesFiller::StreamedImageManager::remapCurrentRegion(const TileLat { // TODO: Here we would shuffle some tiles around to save the work of reuploading them, reflect that in the tracked tiles } - currentMappedRegion.baseMipLevel = viewportObb.baseMipLevel; + currentMappedRegion.baseMipLevel = viewportTileRange.baseMipLevel; // Some variation of this code would go into each branch above - uint32_t2 viewportTileLength = viewportObb.bottomRight - viewportObb.topLeft + uint32_t2(1, 1); - int32_t2 nextTopLeft = int32_t2(viewportObb.topLeft) - int32_t2((uint32_t2(maxResidentTiles, maxResidentTiles) - viewportTileLength) / 2u); + uint32_t2 viewportTileLength = viewportTileRange.bottomRight - viewportTileRange.topLeft + uint32_t2(1, 1); + int32_t2 nextTopLeft = int32_t2(viewportTileRange.topLeft) - int32_t2((uint32_t2(maxResidentTiles, maxResidentTiles) - viewportTileLength) / 2u); int32_t2 nextBottomRight = nextTopLeft + int32_t2(maxResidentTiles, maxResidentTiles) - int32_t2(1, 1); // Clamp to the left/up, and add the difference to the right/down int32_t2 clampedTopLeft = nbl::hlsl::max(nextTopLeft, int32_t2(0, 0)); diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index fc895046f..7a4f5962a 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -20,6 +20,45 @@ static_assert(sizeof(LineStyle) == 88u); //TODO[Francisco]: Update briefs for geotex related functions +// Measured in tile coordinates in the image that the range spans, and the mip level the tiles correspond to +struct GeoreferencedImageTileRange +{ + uint32_t2 topLeft; + uint32_t2 bottomRight; + uint32_t baseMipLevel; +}; + +// @brief Used to load tiles into VRAM, keep track of loaded tiles, determine how they get sampled etc. +struct GeoreferencedImageStreamingState : public IReferenceCounted +{ + friend class DrawResourcesFiller; + +protected: + static smart_refctd_ptr create(GeoreferencedImageParams&& _georeferencedImageParams); + + //image_id imageID = {}; + GeoreferencedImageParams georeferencedImageParams = {}; + std::vector> currentMappedRegionOccupancy = {}; + + // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image + // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial + float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoordsH) const { return nbl::hlsl::mul(world2UV, worldCoordsH); } + float64_t2 transformWorldCoordsToPixelCoords(const float64_t3 worldCoordsH) const { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoordsH); } + float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoordsH, const uint32_t TileSize) const { return (1.0 / TileSize) * transformWorldCoordsToPixelCoords(worldCoordsH); } + + // When the current mapped region is inadequate to fit the viewport, we compute a new mapped region + void remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange); + + // Sidelength of the gpu image, in tiles that are `GeoreferencedImageTileSize` pixels wide + uint32_t maxResidentTiles = {}; + // Size of the image (minus 1), in tiles of `GeoreferencedImageTileSize` sidelength + uint32_t2 maxImageTileIndices = {}; + // Set topLeft to extreme value so it gets recreated on first iteration + GeoreferencedImageTileRange currentMappedRegion = { .topLeft = uint32_t2(std::numeric_limits::max(), std::numeric_limits::max()) }; + // Converts a point (z = 1) in worldspace to UV coordinates in image space (origin shifted to topleft of the image) + float64_t2x3 world2UV = {}; +}; + // ! DrawResourcesFiller // ! This class provides important functionality to manage resources needed for a draw. // ! Drawing new objects (polylines, hatches, etc.) should go through this function. @@ -123,116 +162,35 @@ struct DrawResourcesFiller } }; - // Used to load pieces of an ECW from disk - currently just emulated - struct ImageLoader + struct IGeoreferencedImageLoader : IReferenceCounted { - ImageLoader(core::smart_refctd_ptr&& _geoReferencedImage) : geoReferencedImage(std::move(_geoReferencedImage)) {} + virtual core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel) = 0; - // Emulates the loading of a rectangle from the original image - core::smart_refctd_ptr load(uint32_t2 offset, uint32_t2 extent) - { - // Create a new buffer pointing to same data - this is not what the class will be doing when streaming ECW - auto imageBuffer = geoReferencedImage->getBuffer(); - asset::IBuffer::SCreationParams bufCreationParams = { .size = imageBuffer->getSize(), .usage = imageBuffer->getUsageFlags() }; - ICPUBuffer::SCreationParams cpuBufCreationParams(std::move(bufCreationParams)); - cpuBufCreationParams.data = imageBuffer->getPointer(); - cpuBufCreationParams.memoryResource = core::getNullMemoryResource(); - auto imageBufferAlias = ICPUBuffer::create(std::move(cpuBufCreationParams), core::adopt_memory_t{}); - // Now set up the image region - auto bytesPerPixel = getTexelOrBlockBytesize(geoReferencedImage->getCreationParameters().format); - - auto regions = core::make_refctd_dynamic_array>(1u); - auto& region = regions->front(); - // Row-major location of the first pixel (topleft) given by `offset` - region.bufferOffset = (offset.y * geoReferencedImage->getCreationParameters().extent.width + offset.x) * bytesPerPixel; - region.bufferRowLength = geoReferencedImage->getCreationParameters().extent.width; - region.bufferImageHeight = 0; - region.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; - region.imageSubresource.mipLevel = 0u; - region.imageSubresource.baseArrayLayer = 0u; - region.imageSubresource.layerCount = 1u; - // We're creating an image of exactly `extent` size and filling it entirely - region.imageOffset = { 0u, 0u, 0u }; - region.imageExtent.width = extent.x; - region.imageExtent.height = extent.y; - region.imageExtent.depth = 1; - - ICPUImage::SCreationParams loadedImageParams = geoReferencedImage->getCreationParameters(); - loadedImageParams.extent = { extent.x, extent.y, 1u }; - auto loadedImage = ICPUImage::create(std::move(loadedImageParams)); - loadedImage->setBufferAndRegions(std::move(imageBufferAlias), regions); - - return loadedImage; - } + virtual uint32_t2 getExtents(std::filesystem::path imagePath) = 0; - // This will be the path to the image, and the loading will obviously be different - core::smart_refctd_ptr geoReferencedImage; + virtual asset::E_FORMAT getFormat(std::filesystem::path imagePath) = 0; }; - // @brief Used to load tiles into VRAM, keep track of loaded tiles, determine how they get sampled etc. - struct StreamedImageManager + void setGeoreferencedImageLoader(core::smart_refctd_ptr&& _georeferencedImageLoader) { - friend class DrawResourcesFiller; - // These are mip 0 pixels per tile, also size of each physical tile into the gpu resident image - constexpr static uint32_t TileSize = 128u; - constexpr static uint32_t PaddingTiles = 2; - - // These are vulkan standard, might be different in n4ce! - constexpr static float64_t3 topLeftViewportNDCH = float64_t3(-1.0, -1.0, 1.0); - constexpr static float64_t3 topRightViewportNDCH = float64_t3(1.0, -1.0, 1.0); - constexpr static float64_t3 bottomLeftViewportNDCH = float64_t3(-1.0, 1.0, 1.0); - constexpr static float64_t3 bottomRightViewportNDCH = float64_t3(1.0, 1.0, 1.0); - - // Measured in tile coordinates in the image, and the mip level the tiles correspond to - struct TileLatticeAlignedObb - { - uint32_t2 topLeft; - uint32_t2 bottomRight; - uint32_t baseMipLevel; - }; - - // Holds gpu image upload info (what tiles to upload and where to upload them), an obb that encompasses the viewport and uv coords into the gpu image - // for the corners of that obb - struct TileUploadData - { - core::vector tiles; - OrientedBoundingBox2D worldspaceOBB; - float32_t2 minUV; - float32_t2 maxUV; - }; + georeferencedImageLoader = _georeferencedImageLoader; + } - StreamedImageManager(image_id _imageID, GeoreferencedImageParams&& _georeferencedImageParams, ImageLoader&& _loader); + uint32_t2 queryGeoreferencedImageExtents(std::filesystem::path imagePath) + { + return georeferencedImageLoader->getExtents(imagePath); + } - // Right now it's generating tile-by-tile. Can be improved to produce at worst 4 different rectangles to load (depending on how we need to load tiles) - TileUploadData generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld, const float64_t2 viewportExtents); + asset::E_FORMAT queryGeoreferencedImageFormat(std::filesystem::path imagePath) + { + return georeferencedImageLoader->getFormat(imagePath); + } - image_id imageID = {}; - GeoreferencedImageParams georeferencedImageParams = {}; - private: - // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image - // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial - float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoordsH) { return nbl::hlsl::mul(world2UV, worldCoordsH); } - float64_t2 transformWorldCoordsToPixelCoords(const float64_t3 worldCoordsH) { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoordsH); } - float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoordsH) { return (1.0 / TileSize) * transformWorldCoordsToPixelCoords(worldCoordsH); } - - // Returns a tile aligned obb that encompasses the whole viewport in "image-world". Tiles are measured in the mip level required to fit the viewport entirely - // withing the gpu image. - TileLatticeAlignedObb computeViewportTileAlignedObb(const float64_t3x3& NDCToWorld); - - // When the current mapped region is inadequate to fit the viewport, we compute a new mapped region - void remapCurrentRegion(const TileLatticeAlignedObb& viewportObb); - - // Sidelength of the gpu image, in tiles that are `TileSize` pixels wide - uint32_t maxResidentTiles = {}; - // Size of the image (minus 1), in tiles of `TileSize` sidelength - uint32_t2 maxImageTileIndices = {}; - // Set topLeft to extreme value so it gets recreated on first iteration - TileLatticeAlignedObb currentMappedRegion = { .topLeft = uint32_t2(std::numeric_limits::max(), std::numeric_limits::max())}; - std::vector> currentMappedRegionOccupancy = {}; - // Converts a point (z = 1) in worldspace to UV coordinates in image space (origin shifted to topleft of the image) - float64_t2x3 world2UV = {}; - ImageLoader loader; - }; + // These are vulkan standard, might be different in n4ce! + constexpr static float64_t3 topLeftViewportNDCH = float64_t3(-1.0, -1.0, 1.0); + constexpr static float64_t3 topRightViewportNDCH = float64_t3(1.0, -1.0, 1.0); + constexpr static float64_t3 bottomLeftViewportNDCH = float64_t3(-1.0, 1.0, 1.0); + constexpr static float64_t3 bottomRightViewportNDCH = float64_t3(1.0, 1.0, 1.0); DrawResourcesFiller(); @@ -455,7 +413,7 @@ struct DrawResourcesFiller * @return true if the image was successfully cached and is ready for use; false if allocation failed. * [TODO]: should be internal protected member function. */ - bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); + bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, GeoreferencedImageParams&& params, SIntendedSubmitInfo& intendedNextSubmit); // [TODO]: should be internal protected member function. bool queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy); @@ -463,8 +421,8 @@ struct DrawResourcesFiller // This function must be called immediately after `addStaticImage` for the same imageID. void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); - // This function must be called immediately after `addStaticImage` for the same imageID. - void addGeoreferencedImage(StreamedImageManager& manager, const float64_t3x3& NDCToWorld, uint32_t2 viewportExtents, SIntendedSubmitInfo& intendedNextSubmit); + // This function must be called immediately after `ensureGeoreferencedImageAvailability_AllocateIfNeeded` for the same imageID. + void addGeoreferencedImage(image_id imageID, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit); /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. @@ -875,7 +833,6 @@ struct DrawResourcesFiller core::blake3_hash_t hash = {}; // actual hash, we will check in == operator size_t lookupHash = 0ull; // for containers expecting size_t hash - private: void computeBlake3Hash() @@ -908,7 +865,29 @@ struct DrawResourcesFiller uint32_t getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit); uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); + + // These are mip 0 pixels per tile, also size of each physical tile into the gpu resident image + constexpr static uint32_t GeoreferencedImageTileSize = 128u; + constexpr static uint32_t GeoreferencedImagePaddingTiles = 2; + + // Returns a tile range that encompasses the whole viewport in "image-world". Tiles are measured in the mip level required to fit the viewport entirely + // withing the gpu image. + GeoreferencedImageTileRange computeViewportTileRange(const float64_t3x3& NDCToWorld, const GeoreferencedImageStreamingState* imageStreamingState); + + // Holds gpu image upload info (what tiles to upload and where to upload them), an obb that encompasses the viewport and uv coords into the gpu image + // for the corners of that obb + struct TileUploadData + { + core::vector tiles; + OrientedBoundingBox2D worldspaceOBB; + float32_t2 minUV; + float32_t2 maxUV; + }; + + // Right now it's generating tile-by-tile. Can be improved to produce at worst 4 different rectangles to load (depending on how we need to load tiles) + TileUploadData generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld, GeoreferencedImageStreamingState* imageStreamingState); + // Flushes Current Draw Call and adds to drawCalls void flushDrawObjects(); @@ -974,6 +953,8 @@ struct DrawResourcesFiller std::unique_ptr imagesCache; smart_refctd_ptr suballocatedDescriptorSet; uint32_t imagesArrayBinding = 0u; + // Georef - pushed here rn for simplicity + core::smart_refctd_ptr georeferencedImageLoader; std::unordered_map> streamedImageCopies; }; diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 2e66f3c44..5d81d7d78 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -28,6 +28,7 @@ struct GeoreferencedImageParams uint32_t2 imageExtents = {}; uint32_t2 viewportExtents = {}; asset::E_FORMAT format = {}; + std::filesystem::path storagePath = {}; }; /** @@ -108,6 +109,9 @@ struct ImageCleanup : public core::IReferenceCounted }; +// Forward declared so we can have a smart pointer in the cached record +struct GeoreferencedImageStreamingState; + struct CachedImageRecord { static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; @@ -120,6 +124,7 @@ struct CachedImageRecord uint64_t allocationSize = 0ull; core::smart_refctd_ptr gpuImageView = nullptr; core::smart_refctd_ptr staticCPUImage = nullptr; // cached cpu image for uploading to gpuImageView when needed. + core::smart_refctd_ptr georeferencedImageState = nullptr; // Used to track tile residency for georeferenced images // In LRU Cache `insert` function, in case of cache miss, we need to construct the refereence with semaphore value CachedImageRecord(uint64_t currentFrameIndex) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index b9feb0cb8..759ba5692 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -49,7 +49,7 @@ static constexpr bool DebugRotatingViewProj = false; static constexpr bool FragmentShaderPixelInterlock = true; static constexpr bool LargeGeoTextureStreaming = true; static constexpr bool CacheAndReplay = false; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic -static constexpr bool textCameraRotation = false; +static constexpr bool testCameraRotation = false; enum class ExampleMode { @@ -364,6 +364,173 @@ bool performImageFormatPromotionCopy(const core::smart_refctd_ptr>>(inCPUImage, outCPUImage); } +// Used by case 12 +struct ImageLoader : public DrawResourcesFiller::IGeoreferencedImageLoader +{ + core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel) override + { + // Image path ignored for this hardcoded example + const auto& image = mipLevels[mipLevel]; + const auto& imageBuffer = image->getBuffer(); + const core::rational bytesPerPixel = asset::getBytesPerPixel(image->getCreationParameters().format); + const size_t bytesPerRow = (bytesPerPixel * extent.x).getIntegerApprox(); + const size_t loadedImageBytes = bytesPerRow * extent.y; + asset::IBuffer::SCreationParams bufCreationParams = { .size = loadedImageBytes, .usage = imageBuffer->getCreationParams().usage}; + ICPUBuffer::SCreationParams cpuBufCreationParams(std::move(bufCreationParams)); + core::smart_refctd_ptr retVal = ICPUBuffer::create(std::move(cpuBufCreationParams)); + + // Copy row by row into the new buffer + uint8_t* dataPtr = reinterpret_cast(retVal->getPointer()); + const uint8_t* imageBufferDataPtr = reinterpret_cast(imageBuffer->getPointer()); + const size_t bytesPerImageRow = (bytesPerPixel * image->getCreationParameters().extent.width).getIntegerApprox(); + for (auto row = 0u; row < extent.y; row++) + { + const size_t imageBufferOffset = bytesPerImageRow * (offset.y + row) + (bytesPerPixel * offset.x).getIntegerApprox(); + std::memcpy(dataPtr + row * bytesPerRow, imageBufferDataPtr + imageBufferOffset, bytesPerRow); + } + return retVal; + } + + ImageLoader(asset::IAssetManager* assetMgr, system::ILogger* logger, video::IPhysicalDevice* physicalDevice) + : m_assetMgr(assetMgr), m_logger(logger), m_physicalDevice(physicalDevice) + { + auto loadImage = [&](const std::string& imagePath) -> smart_refctd_ptr + { + system::path m_loadCWD = ".."; + constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); + const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger, m_loadCWD); + auto bundle = m_assetMgr->getAsset(imagePath, loadParams); + auto contents = bundle.getContents(); + if (contents.empty()) + { + m_logger->log("Failed to load image with path %s, skipping!", ILogger::ELL_ERROR, (m_loadCWD / imagePath).c_str()); + return nullptr; + } + + smart_refctd_ptr cpuImgView; + const auto& asset = contents[0]; + switch (asset->getAssetType()) + { + case IAsset::ET_IMAGE: + { + auto image = smart_refctd_ptr_static_cast(asset); + auto& flags = image->getCreationParameters().flags; + // assert if asset is mutable + const_cast&>(flags) |= asset::IImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; + const auto format = image->getCreationParameters().format; + + ICPUImageView::SCreationParams viewParams = { + .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, + .image = std::move(image), + .viewType = IImageView::E_TYPE::ET_2D, + .format = format, + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + } + }; + + cpuImgView = ICPUImageView::create(std::move(viewParams)); + } break; + + case IAsset::ET_IMAGE_VIEW: + cpuImgView = smart_refctd_ptr_static_cast(asset); + break; + default: + m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!", ILogger::ELL_ERROR); + return nullptr; + } + + const auto loadedCPUImage = cpuImgView->getCreationParameters().image; + const auto loadedCPUImageCreationParams = loadedCPUImage->getCreationParameters(); + + // Promoting the image to a format GPU supports. (so that updateImageViaStagingBuffer doesn't have to handle that each frame if overflow-submit needs to happen) + auto promotedCPUImageCreationParams = loadedCPUImage->getCreationParameters(); + + promotedCPUImageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT | IGPUImage::EUF_SAMPLED_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = promotedCPUImageCreationParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(promotedCPUImageCreationParams.usage) + }; + promotedCPUImageCreationParams.format = m_physicalDevice->promoteImageFormat(request, video::IGPUImage::TILING::OPTIMAL); + } + + if (loadedCPUImageCreationParams.format != promotedCPUImageCreationParams.format) + { + smart_refctd_ptr promotedCPUImage = ICPUImage::create(promotedCPUImageCreationParams); + core::rational bytesPerPixel = asset::getBytesPerPixel(promotedCPUImageCreationParams.format); + + const auto extent = loadedCPUImageCreationParams.extent; + const uint32_t mipLevels = loadedCPUImageCreationParams.mipLevels; + const uint32_t arrayLayers = loadedCPUImageCreationParams.arrayLayers; + + // Only supporting 1 mip, it's just for test.. + const size_t byteSize = (bytesPerPixel * extent.width * extent.height * extent.depth * arrayLayers).getIntegerApprox(); // TODO: consider mips + ICPUBuffer::SCreationParams bufferCreationParams = {}; + bufferCreationParams.size = byteSize; + smart_refctd_ptr promotedCPUImageBuffer = ICPUBuffer::create(std::move(bufferCreationParams)); + + auto newRegions = core::make_refctd_dynamic_array>(1u); + ICPUImage::SBufferCopy& region = newRegions->front(); + region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = 0u; // TODO + region.imageSubresource.baseArrayLayer = 0u; + region.imageSubresource.layerCount = arrayLayers; + region.bufferOffset = 0u; + region.bufferRowLength = 0u; + region.bufferImageHeight = 0u; + region.imageOffset = { 0u, 0u, 0u }; + region.imageExtent = extent; + promotedCPUImage->setBufferAndRegions(std::move(promotedCPUImageBuffer), newRegions); + + performImageFormatPromotionCopy(loadedCPUImage, promotedCPUImage); + return promotedCPUImage; + } + else + { + return loadedCPUImage; + } + }; + + // TODO: Unhardcode + const std::string basePath = "../../media/tiled_grid_mip_"; + smart_refctd_ptr img = loadImage(basePath + "0.exr"); + const uint32_t sidelength = img->getCreationParameters().extent.width; + + const uint32_t maxMipLevel = nbl::hlsl::findMSB(sidelength / 128u); + mipLevels.reserve(maxMipLevel + 1); + mipLevels.emplace_back(std::move(img)); + for (auto i = 1u; i <= maxMipLevel; i++) + { + mipLevels.emplace_back(loadImage(basePath + std::to_string(i) + ".exr")); + } + } + + uint32_t2 getExtents(std::filesystem::path imagePath) override + { + uint32_t sidelength = mipLevels[0]->getCreationParameters().extent.width; + return uint32_t2(sidelength, sidelength); + } + + asset::E_FORMAT getFormat(std::filesystem::path imagePath) override + { + return mipLevels[0]->getCreationParameters().format; + } + +private: + // These are here for the example, might not be class members when porting to n4ce + asset::IAssetManager* m_assetMgr = {}; + system::ILogger* m_logger = {}; + video::IPhysicalDevice* m_physicalDevice = {}; + // We're going to fake it in the example so it's easier to work with, but the interface remains + core::vector> mipLevels = {}; +}; + class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplication, public nbl::examples::BuiltinResourcesApplication { using device_base_t = nbl::examples::SimpleWindowedApplication; @@ -1266,7 +1433,11 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio gridDTMHeightMap = loadImage("../../media/gridDTMHeightMap.exr"); - bigTiledGrid = loadImage("../../media/tiled_grid.exr"); + // Create case 12 image loader + if constexpr (mode == ExampleMode::CASE_12) + { + drawResourcesFiller.setGeoreferencedImageLoader(make_smart_refctd_ptr(m_assetMgr.get(), m_logger.get(), m_physicalDevice)); + } // set diagonals of cells to TOP_LEFT_TO_BOTTOM_RIGHT or BOTTOM_LEFT_TO_TOP_RIGHT randomly { @@ -1498,7 +1669,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio projectionToNDC = m_Camera.constructViewProjection(); // TEST CAMERA ROTATION - if constexpr (textCameraRotation) + if constexpr (testCameraRotation) projectionToNDC = rotateBasedOnTime(projectionToNDC); Globals globalData = {}; @@ -3680,18 +3851,20 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } else if (mode == ExampleMode::CASE_12) { - const static float64_t3 topLeftViewportH = float64_t3(-1.0, -1.0, 1.0); - const static float64_t3 topRightViewportH = float64_t3(1.0, -1.0, 1.0); - const static float64_t3 bottomLeftViewportH = float64_t3(-1.0, 1.0, 1.0); - const static float64_t3 bottomRightViewportH = float64_t3(1.0, 1.0, 1.0); + // placeholder, actual path is right now hardcoded into the loader + const static std::string tiledGridPath = "../../media/tiled_grid_mip_0.exr"; + + constexpr float64_t3 topLeftViewportH = float64_t3(-1.0, -1.0, 1.0); + constexpr float64_t3 topRightViewportH = float64_t3(1.0, -1.0, 1.0); + constexpr float64_t3 bottomLeftViewportH = float64_t3(-1.0, 1.0, 1.0); + constexpr float64_t3 bottomRightViewportH = float64_t3(1.0, 1.0, 1.0); image_id tiledGridID = 6996; - static GeoreferencedImageParams tiledGridParams; - auto& tiledGridCreationParams = bigTiledGrid->getCreationParameters(); + GeoreferencedImageParams tiledGridParams; // Position at topLeft viewport auto projectionToNDC = m_Camera.constructViewProjection(); // TEST CAMERA ROTATION - if constexpr (textCameraRotation) + if constexpr (testCameraRotation) projectionToNDC = rotateBasedOnTime(projectionToNDC); auto inverseViewProj = nbl::hlsl::inverse(projectionToNDC); @@ -3701,20 +3874,17 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio // Get 1 viewport pixel to match `startingImagePixelsPerViewportPixel` pixels of the image by choosing appropriate dirU const static float64_t startingImagePixelsPerViewportPixels = 1.5; const static auto startingViewportWidthVector = nbl::hlsl::mul(inverseViewProj, topRightViewportH - topLeftViewportH); - const static auto dirU = startingViewportWidthVector * float64_t(bigTiledGrid->getCreationParameters().extent.width) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); + const static auto dirU = startingViewportWidthVector * float64_t(drawResourcesFiller.queryGeoreferencedImageExtents(tiledGridPath).x) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); tiledGridParams.worldspaceOBB.dirU = dirU; tiledGridParams.worldspaceOBB.aspectRatio = 1.0; - tiledGridParams.imageExtents = { tiledGridCreationParams.extent.width, tiledGridCreationParams.extent.height}; + tiledGridParams.imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(tiledGridPath); tiledGridParams.viewportExtents = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; - tiledGridParams.format = tiledGridCreationParams.format; - - static auto bigTileGridPtr = bigTiledGrid; - static DrawResourcesFiller::ImageLoader loader(std::move(bigTileGridPtr)); - static DrawResourcesFiller::StreamedImageManager tiledGridManager(tiledGridID, std::move(tiledGridParams), std::move(loader)); + tiledGridParams.format = drawResourcesFiller.queryGeoreferencedImageFormat(tiledGridPath); + tiledGridParams.storagePath = tiledGridPath; - drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(tiledGridID, tiledGridManager.georeferencedImageParams, intendedNextSubmit); + drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(tiledGridID, std::move(tiledGridParams), intendedNextSubmit); - drawResourcesFiller.addGeoreferencedImage(tiledGridManager, inverseViewProj, tiledGridParams.viewportExtents, intendedNextSubmit); + drawResourcesFiller.addGeoreferencedImage(tiledGridID, inverseViewProj, intendedNextSubmit); } } @@ -3800,7 +3970,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio std::vector> sampleImages; smart_refctd_ptr gridDTMHeightMap; - smart_refctd_ptr bigTiledGrid; static constexpr char FirstGeneratedCharacter = ' '; static constexpr char LastGeneratedCharacter = '~'; diff --git a/62_CAD/scripts/generate_mipmaps.py b/62_CAD/scripts/generate_mipmaps.py new file mode 100644 index 000000000..78420cda5 --- /dev/null +++ b/62_CAD/scripts/generate_mipmaps.py @@ -0,0 +1,47 @@ +import OpenEXR +import Imath +import numpy as np + +def read_exr(path): + exr = OpenEXR.InputFile(path) + dw = exr.header()['dataWindow'] + size = (dw.max.x - dw.min.x + 1, dw.max.y - dw.min.y + 1) + + pt = Imath.PixelType(Imath.PixelType.FLOAT) + channels = ['R', 'G', 'B'] + data = [np.frombuffer(exr.channel(c, pt), dtype=np.float32).reshape(size[1], size[0]) for c in channels] + return np.stack(data, axis=-1) # shape: (H, W, 3) + +def write_exr(path, arr): + H, W, C = arr.shape + assert C == 3, "Only RGB supported" + header = OpenEXR.Header(W, H) + pt = Imath.PixelType(Imath.PixelType.FLOAT) + channels = { + 'R': arr[:, :, 0].astype(np.float32).tobytes(), + 'G': arr[:, :, 1].astype(np.float32).tobytes(), + 'B': arr[:, :, 2].astype(np.float32).tobytes() + } + exr = OpenEXR.OutputFile(path, header) + exr.writePixels(channels) + +def mipmap_exr(): + img = read_exr("../../media/tiled_grid_mip_0.exr") + h, w, _ = img.shape + base_path = "../../media/tiled_grid_mip_" + tile_size = 128 + mip_level = 1 + tile_length = h // (2 * tile_size) + + while tile_length > 0: + # Reshape and average 2x2 blocks + reshaped = img.reshape(h//2, 2, w//2, 2, 3) + mipmap = reshaped.mean(axis=(1, 3)) + write_exr(base_path + str(mip_level) + ".exr", mipmap) + img = mipmap + mip_level = mip_level + 1 + tile_length = tile_length // 2 + h = h // 2 + w = w // 2 + +mipmap_exr() \ No newline at end of file diff --git a/62_CAD/scripts/tiled_grid.py b/62_CAD/scripts/tiled_grid.py index 737c3463e..89c637338 100644 --- a/62_CAD/scripts/tiled_grid.py +++ b/62_CAD/scripts/tiled_grid.py @@ -250,7 +250,7 @@ def generate_interpolated_grid_image(tile_size, count, font_path=None): try: final_image = generate_interpolated_grid_image(tile_sidelength, grid_count, font_path=font_to_use) - output_filename = "../../media/tiled_grid.exr" + output_filename = "../../media/tiled_grid_mip_0.exr" np_img = np.array(final_image).astype(np.float32) / 255.0 # Normalize for EXR spec = oiio.ImageSpec(final_image.width, final_image.height, 3, oiio.TypeDesc("float")) out = oiio.ImageOutput.create(output_filename) diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 9842e67b0..a22e1d883 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -754,7 +754,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID) const float32_t2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); const float32_t2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); - const bool2 corner = bool2(vertexIdx & 0x1u, vertexIdx & 0x2u); + const bool2 corner = bool2(vertexIdx & 0x1u, vertexIdx >> 1u); const float32_t2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; const float32_t2 uv = float32_t2(corner.x ? maxUV.x : minUV.x, corner.y ? maxUV.y : minUV.y); From 89af347f30d8305248d5a00e5e841c8b18da6a70 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 20 Aug 2025 16:06:27 -0300 Subject: [PATCH 14/29] Checkpoint for Phase 2 --- 62_CAD/DrawResourcesFiller.cpp | 49 ++++++++++--------------- 62_CAD/DrawResourcesFiller.h | 39 -------------------- 62_CAD/Images.h | 66 ++++++++++++++++++++++++++++++++-- 62_CAD/main.cpp | 11 +++--- 4 files changed, 89 insertions(+), 76 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 6d160cb15..5ff20f35e 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -2548,7 +2548,7 @@ ImageType DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::as outImageParams.extent = { gpuImageSidelength, gpuImageSidelength, 1u }; } - outImageParams.mipLevels = 1u; // TODO: Later do mipmapping + outImageParams.mipLevels = 2u; outImageParams.arrayLayers = 1u; return imageType; @@ -2688,35 +2688,6 @@ void DrawResourcesFiller::flushDrawObjects() } } -smart_refctd_ptr GeoreferencedImageStreamingState::create(GeoreferencedImageParams&& _georeferencedImageParams) -{ - smart_refctd_ptr retVal(new GeoreferencedImageStreamingState{}); - retVal->georeferencedImageParams = std::move(_georeferencedImageParams); - // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point - // 2. Transform this displacement vector into the coordinates in the basis {dirU, dirV} (worldspace vectors that span the sides of the image). - // The composition of these matrices there fore transforms any point in worldspace into uv coordinates in imagespace - - - // 1. Displacement. The following matrix computes the offset for an input point `p` with homogenous worldspace coordinates. - // By foregoing the homogenous coordinate we can keep only the vector part, that's why it's `2x3` and not `3x3` - float64_t2 topLeftWorld = retVal->georeferencedImageParams.worldspaceOBB.topLeft; - float64_t2x3 displacementMatrix(1., 0., - topLeftWorld.x, 0., 1., - topLeftWorld.y); - - // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression - // Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal) - float64_t2 dirU = retVal->georeferencedImageParams.worldspaceOBB.dirU; - float64_t2 dirV = float32_t2(dirU.y, -dirU.x) * retVal->georeferencedImageParams.worldspaceOBB.aspectRatio; - float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); - float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); - float64_t2 firstRow = dirU / dirULengthSquared; - float64_t2 secondRow = dirV / dirVLengthSquared; - float64_t2x2 changeOfBasisMatrix(firstRow, secondRow); - - // Put them all together - retVal->world2UV = nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix); - return retVal; -} - DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld, GeoreferencedImageStreamingState* imageStreamingState) { // I think eventually it's better to just transform georeferenced images that aren't big enough into static images and forget about them @@ -2836,6 +2807,24 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); + // Upload the smaller tile to mip 1 + tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, uint32_t2(tileX * (GeoreferencedImageTileSize >> 1), tileY * (GeoreferencedImageTileSize >> 1)), uint32_t2(GeoreferencedImageTileSize >> 1, GeoreferencedImageTileSize >> 1), imageStreamingState->currentMappedRegion.baseMipLevel + 1); + bufCopy = {}; + + bufCopy.bufferOffset = 0; + bufCopy.bufferRowLength = GeoreferencedImageTileSize >> 1; + bufCopy.bufferImageHeight = 0; + bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + bufCopy.imageSubresource.mipLevel = 1u; + bufCopy.imageSubresource.baseArrayLayer = 0u; + bufCopy.imageSubresource.layerCount = 1u; + bufCopy.imageOffset = { (tileX - imageStreamingState->currentMappedRegion.topLeft.x) * (GeoreferencedImageTileSize >> 1), (tileY - imageStreamingState->currentMappedRegion.topLeft.y) * (GeoreferencedImageTileSize >> 1), 0u }; + bufCopy.imageExtent.width = GeoreferencedImageTileSize >> 1; + bufCopy.imageExtent.height = GeoreferencedImageTileSize >> 1; + bufCopy.imageExtent.depth = 1; + + tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); + // Mark tile as resident imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; } diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 7a4f5962a..8ddffb61b 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -20,45 +20,6 @@ static_assert(sizeof(LineStyle) == 88u); //TODO[Francisco]: Update briefs for geotex related functions -// Measured in tile coordinates in the image that the range spans, and the mip level the tiles correspond to -struct GeoreferencedImageTileRange -{ - uint32_t2 topLeft; - uint32_t2 bottomRight; - uint32_t baseMipLevel; -}; - -// @brief Used to load tiles into VRAM, keep track of loaded tiles, determine how they get sampled etc. -struct GeoreferencedImageStreamingState : public IReferenceCounted -{ - friend class DrawResourcesFiller; - -protected: - static smart_refctd_ptr create(GeoreferencedImageParams&& _georeferencedImageParams); - - //image_id imageID = {}; - GeoreferencedImageParams georeferencedImageParams = {}; - std::vector> currentMappedRegionOccupancy = {}; - - // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image - // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial - float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoordsH) const { return nbl::hlsl::mul(world2UV, worldCoordsH); } - float64_t2 transformWorldCoordsToPixelCoords(const float64_t3 worldCoordsH) const { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoordsH); } - float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoordsH, const uint32_t TileSize) const { return (1.0 / TileSize) * transformWorldCoordsToPixelCoords(worldCoordsH); } - - // When the current mapped region is inadequate to fit the viewport, we compute a new mapped region - void remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange); - - // Sidelength of the gpu image, in tiles that are `GeoreferencedImageTileSize` pixels wide - uint32_t maxResidentTiles = {}; - // Size of the image (minus 1), in tiles of `GeoreferencedImageTileSize` sidelength - uint32_t2 maxImageTileIndices = {}; - // Set topLeft to extreme value so it gets recreated on first iteration - GeoreferencedImageTileRange currentMappedRegion = { .topLeft = uint32_t2(std::numeric_limits::max(), std::numeric_limits::max()) }; - // Converts a point (z = 1) in worldspace to UV coordinates in image space (origin shifted to topleft of the image) - float64_t2x3 world2UV = {}; -}; - // ! DrawResourcesFiller // ! This class provides important functionality to manage resources needed for a draw. // ! Drawing new objects (polylines, hatches, etc.) should go through this function. diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 5d81d7d78..bba1736b7 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -109,8 +109,70 @@ struct ImageCleanup : public core::IReferenceCounted }; -// Forward declared so we can have a smart pointer in the cached record -struct GeoreferencedImageStreamingState; +// Measured in tile coordinates in the image that the range spans, and the mip level the tiles correspond to +struct GeoreferencedImageTileRange +{ + uint32_t2 topLeft; + uint32_t2 bottomRight; + uint32_t baseMipLevel; +}; + +// @brief Used to load tiles into VRAM, keep track of loaded tiles, determine how they get sampled etc. +struct GeoreferencedImageStreamingState : public IReferenceCounted +{ + friend class DrawResourcesFiller; + +protected: + static smart_refctd_ptr create(GeoreferencedImageParams&& _georeferencedImageParams) + { + smart_refctd_ptr retVal(new GeoreferencedImageStreamingState{}); + retVal->georeferencedImageParams = std::move(_georeferencedImageParams); + // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point + // 2. Transform this displacement vector into the coordinates in the basis {dirU, dirV} (worldspace vectors that span the sides of the image). + // The composition of these matrices there fore transforms any point in worldspace into uv coordinates in imagespace + + + // 1. Displacement. The following matrix computes the offset for an input point `p` with homogenous worldspace coordinates. + // By foregoing the homogenous coordinate we can keep only the vector part, that's why it's `2x3` and not `3x3` + float64_t2 topLeftWorld = retVal->georeferencedImageParams.worldspaceOBB.topLeft; + float64_t2x3 displacementMatrix(1., 0., - topLeftWorld.x, 0., 1., - topLeftWorld.y); + + // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression + // Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal) + float64_t2 dirU = retVal->georeferencedImageParams.worldspaceOBB.dirU; + float64_t2 dirV = float32_t2(dirU.y, -dirU.x) * retVal->georeferencedImageParams.worldspaceOBB.aspectRatio; + float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); + float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); + float64_t2 firstRow = dirU / dirULengthSquared; + float64_t2 secondRow = dirV / dirVLengthSquared; + float64_t2x2 changeOfBasisMatrix(firstRow, secondRow); + + // Put them all together + retVal->world2UV = nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix); + return retVal; + } + + GeoreferencedImageParams georeferencedImageParams = {}; + std::vector> currentMappedRegionOccupancy = {}; + + // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image + // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial + float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoordsH) const { return nbl::hlsl::mul(world2UV, worldCoordsH); } + float64_t2 transformWorldCoordsToPixelCoords(const float64_t3 worldCoordsH) const { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoordsH); } + float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoordsH, const uint32_t TileSize) const { return (1.0 / TileSize) * transformWorldCoordsToPixelCoords(worldCoordsH); } + + // When the current mapped region is inadequate to fit the viewport, we compute a new mapped region + void remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange); + + // Sidelength of the gpu image, in tiles that are `GeoreferencedImageTileSize` pixels wide + uint32_t maxResidentTiles = {}; + // Size of the image (minus 1), in tiles of `GeoreferencedImageTileSize` sidelength + uint32_t2 maxImageTileIndices = {}; + // Set topLeft to extreme value so it gets recreated on first iteration + GeoreferencedImageTileRange currentMappedRegion = { .topLeft = uint32_t2(std::numeric_limits::max(), std::numeric_limits::max()) }; + // Converts a point (z = 1) in worldspace to UV coordinates in image space (origin shifted to topleft of the image) + float64_t2x3 world2UV = {}; +}; struct CachedImageRecord { diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 759ba5692..2739f45f5 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -49,7 +49,7 @@ static constexpr bool DebugRotatingViewProj = false; static constexpr bool FragmentShaderPixelInterlock = true; static constexpr bool LargeGeoTextureStreaming = true; static constexpr bool CacheAndReplay = false; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic -static constexpr bool testCameraRotation = false; +static constexpr bool testCameraRotation = true; enum class ExampleMode { @@ -664,10 +664,11 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio // Static Image Sampler { + constexpr auto wrapMode = mode == ExampleMode::CASE_12 ? IGPUSampler::E_TEXTURE_CLAMP::ETC_REPEAT : IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; IGPUSampler::SParams samplerParams = {}; - samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; - samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; - samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; + samplerParams.TextureWrapU = wrapMode; + samplerParams.TextureWrapV = wrapMode; + samplerParams.TextureWrapW = wrapMode; samplerParams.BorderColor = IGPUSampler::ETBC_FLOAT_TRANSPARENT_BLACK; samplerParams.MinFilter = IGPUSampler::ETF_LINEAR; samplerParams.MaxFilter = IGPUSampler::ETF_LINEAR; @@ -3872,7 +3873,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio tiledGridParams.worldspaceOBB.topLeft = startingTopLeft; // Get 1 viewport pixel to match `startingImagePixelsPerViewportPixel` pixels of the image by choosing appropriate dirU - const static float64_t startingImagePixelsPerViewportPixels = 1.5; + const static float64_t startingImagePixelsPerViewportPixels = 2.0; const static auto startingViewportWidthVector = nbl::hlsl::mul(inverseViewProj, topRightViewportH - topLeftViewportH); const static auto dirU = startingViewportWidthVector * float64_t(drawResourcesFiller.queryGeoreferencedImageExtents(tiledGridPath).x) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); tiledGridParams.worldspaceOBB.dirU = dirU; From f3532feff216b9cf47e5ee246c7637102ac103c0 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 22 Aug 2025 16:57:59 -0300 Subject: [PATCH 15/29] Addressed Erfan PR messages --- 62_CAD/DrawResourcesFiller.cpp | 151 +++++++++++++----------------- 62_CAD/DrawResourcesFiller.h | 12 +-- 62_CAD/Images.h | 165 ++++++++++++++++++++++++++++----- 62_CAD/main.cpp | 7 +- 4 files changed, 214 insertions(+), 121 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 5ff20f35e..f0aae8c40 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -739,13 +739,10 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( cachedImageRecord->georeferencedImageState = GeoreferencedImageStreamingState::create(std::move(params)); // This is because gpu image is square - cachedImageRecord->georeferencedImageState->maxResidentTiles = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / GeoreferencedImageTileSize; + cachedImageRecord->georeferencedImageState->gpuImageSideLengthTiles = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / GeoreferencedImageTileSize; - auto& maxImageTileIndices = cachedImageRecord->georeferencedImageState->maxImageTileIndices; - maxImageTileIndices = cachedImageRecord->georeferencedImageState->georeferencedImageParams.imageExtents / uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize); - // If it fits perfectly along any dimension, we need one less tile with this scheme - maxImageTileIndices -= uint32_t2(maxImageTileIndices.x * GeoreferencedImageTileSize == cachedImageRecord->georeferencedImageState->georeferencedImageParams.imageExtents.x, - maxImageTileIndices.y * GeoreferencedImageTileSize == cachedImageRecord->georeferencedImageState->georeferencedImageParams.imageExtents.y); + auto& fullImageLastTileIndices = cachedImageRecord->georeferencedImageState->fullImageLastTileIndices; + fullImageLastTileIndices = (cachedImageRecord->georeferencedImageState->georeferencedImageParams.imageExtents - 1u) / GeoreferencedImageTileSize; } else { @@ -913,6 +910,7 @@ void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const float64_ { m_logger.log("addGeoreferencedImage was not called immediately after enforceGeoreferencedImageAvailability!", nbl::system::ILogger::ELL_ERROR); assert(false); + return; } // Generate upload data @@ -923,9 +921,9 @@ void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const float64_ queueGeoreferencedImageCopy_Internal(imageID, imageCopy); GeoreferencedImageInfo info = {}; - info.topLeft = uploadData.worldspaceOBB.topLeft; - info.dirU = uploadData.worldspaceOBB.dirU; - info.aspectRatio = uploadData.worldspaceOBB.aspectRatio; + info.topLeft = uploadData.viewportEncompassingOBB.topLeft; + info.dirU = uploadData.viewportEncompassingOBB.dirU; + info.aspectRatio = uploadData.viewportEncompassingOBB.aspectRatio; info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory info.minUV = uploadData.minUV; info.maxUV = uploadData.maxUV; @@ -2700,22 +2698,33 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // A base mip level of x in the current mapped region means we can handle the viewport having mip level y, with x <= y < x + 1.0 // without needing to remap the region. When the user starts zooming in or out and the mip level of the viewport falls outside this range, we have to remap // the mapped region. - const bool mipBoundaryCrossed = viewportTileRange.baseMipLevel >= imageStreamingState->currentMappedRegion.baseMipLevel + 1.0 - || viewportTileRange.baseMipLevel < imageStreamingState->currentMappedRegion.baseMipLevel; + const bool mipBoundaryCrossed = viewportTileRange.baseMipLevel != imageStreamingState->currentMappedRegion.baseMipLevel; - // If any of the corners of the obb encompassing the viewport falls outside the currently mapped region, we have to remap the mapped region - const bool tileBoundaryCrossed = nbl::hlsl::any(viewportTileRange.topLeft < imageStreamingState->currentMappedRegion.topLeft) - || nbl::hlsl::any(viewportTileRange.bottomRight > imageStreamingState->currentMappedRegion.bottomRight); + // If we moved a huge amount in any direction, no tiles will remain resident, so we simply reset state + const bool relativeShiftTooBig = nbl::hlsl::any + ( + nbl::hlsl::abs(int32_t2(viewportTileRange.topLeft) - int32_t2(imageStreamingState->currentMappedRegion.topLeft)) >= int32_t2(imageStreamingState->gpuImageSideLengthTiles, imageStreamingState->gpuImageSideLengthTiles) + ) + || nbl::hlsl::any + ( + nbl::hlsl::abs(int32_t2(viewportTileRange.bottomRight) - int32_t2(imageStreamingState->currentMappedRegion.bottomRight)) >= int32_t2(imageStreamingState->gpuImageSideLengthTiles, imageStreamingState->gpuImageSideLengthTiles) + ); - if (mipBoundaryCrossed || tileBoundaryCrossed) + if (mipBoundaryCrossed || relativeShiftTooBig) { imageStreamingState->remapCurrentRegion(viewportTileRange); } + else + { + imageStreamingState->shiftAndExpandCurrentRegion(viewportTileRange); + } + + // DEBUG - Sampled mip level { // Get world coordinates for each corner of the mapped region - const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->maxImageTileIndices.x + 1u) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); + const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageLastTileIndices.x + 1u) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); const float32_t2 oneTileDirV = float32_t2(oneTileDirU.y, -oneTileDirU.x) * imageStreamingState->georeferencedImageParams.worldspaceOBB.aspectRatio; float64_t2 topLeftMappedRegionWorld = imageStreamingState->georeferencedImageParams.worldspaceOBB.topLeft; topLeftMappedRegionWorld += oneTileDirU * float32_t(imageStreamingState->currentMappedRegion.topLeft.x) + oneTileDirV * float32_t(imageStreamingState->currentMappedRegion.topLeft.y); @@ -2748,13 +2757,13 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( float64_t2x3 toPixelCoordsMatrix = nbl::hlsl::mul(scalingMatrix, nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix)); // Map viewport points to world - const float64_t3 topLeftViewportWorldH = nbl::hlsl::mul(NDCToWorld, topLeftViewportNDCH); - const float64_t3 topRightViewportWorldH = nbl::hlsl::mul(NDCToWorld, topRightViewportNDCH); - const float64_t3 bottomLeftViewportWorldH = nbl::hlsl::mul(NDCToWorld, bottomLeftViewportNDCH); + const float64_t3 topLeftViewportWorld = nbl::hlsl::mul(NDCToWorld, topLeftViewportNDC); + const float64_t3 topRightViewportWorld = nbl::hlsl::mul(NDCToWorld, topRightViewportNDC); + const float64_t3 bottomLeftViewportWorld = nbl::hlsl::mul(NDCToWorld, bottomLeftViewportNDC); // Get pixel coordinates vectors for each side - const float64_t2 viewportWidthPixelLengthVector = nbl::hlsl::mul(toPixelCoordsMatrix, topRightViewportWorldH - topLeftViewportWorldH); - const float64_t2 viewportHeightPixelLengthVector = nbl::hlsl::mul(toPixelCoordsMatrix, bottomLeftViewportWorldH - topLeftViewportWorldH); + const float64_t2 viewportWidthPixelLengthVector = nbl::hlsl::mul(toPixelCoordsMatrix, topRightViewportWorld - topLeftViewportWorld); + const float64_t2 viewportHeightPixelLengthVector = nbl::hlsl::mul(toPixelCoordsMatrix, bottomLeftViewportWorld - topLeftViewportWorld); // Get pixel length for each of these vectors const auto viewportWidthPixelLength = nbl::hlsl::length(viewportWidthPixelLengthVector); @@ -2767,9 +2776,16 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( std::cout << "Sampled mip level: " << nbl::hlsl::log2(pixelRatio) << std::endl; } - // We need to make every tile that covers the viewport resident, so we create the vector of `StreamedImageCopies`, 1 such copy per tile. + // We need to make every tile that covers the viewport resident. We reserve the amount of tiles needed for upload. core::vector tiles; - uint32_t nTiles = (viewportTileRange.bottomRight.x - viewportTileRange.topLeft.x + 1) * (viewportTileRange.bottomRight.y - viewportTileRange.topLeft.y + 1); + uint32_t nTiles = 0; + for (uint32_t tileX = viewportTileRange.topLeft.x; tileX <= viewportTileRange.bottomRight.x; tileX++) + for (uint32_t tileY = viewportTileRange.topLeft.y; tileY <= viewportTileRange.bottomRight.y; tileY++) + { + // Compute tile offset relative to `currentMappedRegion.topLeft`, to get tile index into the gpu image + uint32_t2 gpuImageTileIndex = ((uint32_t2(tileX, tileY) - imageStreamingState->currentMappedRegion.topLeft) + imageStreamingState->gpuImageTopLeft) % imageStreamingState->gpuImageSideLengthTiles; + nTiles += !imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]; + } tiles.reserve(nTiles); // Assuming a 1 pixel per block format - otherwise math here gets a bit trickier @@ -2784,13 +2800,13 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( for (uint32_t tileY = viewportTileRange.topLeft.y; tileY <= viewportTileRange.bottomRight.y; tileY++) { // Compute tile offset relative to `currentMappedRegion.topLeft`, to get tile index into the gpu image - uint32_t2 gpuImageTileIndex = uint32_t2(tileX, tileY) - imageStreamingState->currentMappedRegion.topLeft; + uint32_t2 gpuImageTileIndex = ((uint32_t2(tileX, tileY) - imageStreamingState->currentMappedRegion.topLeft) + imageStreamingState->gpuImageTopLeft) % imageStreamingState->gpuImageSideLengthTiles; // If tile already resident, do nothing if (imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) continue; - auto tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, uint32_t2(tileX * GeoreferencedImageTileSize, tileY * GeoreferencedImageTileSize), uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize), imageStreamingState->currentMappedRegion.baseMipLevel); + auto tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, uint32_t2(tileX, tileY) * GeoreferencedImageTileSize, uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize), imageStreamingState->currentMappedRegion.baseMipLevel); asset::IImage::SBufferCopy bufCopy; bufCopy.bufferOffset = 0; @@ -2800,7 +2816,8 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( bufCopy.imageSubresource.mipLevel = 0u; bufCopy.imageSubresource.baseArrayLayer = 0u; bufCopy.imageSubresource.layerCount = 1u; - bufCopy.imageOffset = { (tileX - imageStreamingState->currentMappedRegion.topLeft.x) * GeoreferencedImageTileSize, (tileY - imageStreamingState->currentMappedRegion.topLeft.y) * GeoreferencedImageTileSize, 0u }; + uint32_t2 imageOffset = gpuImageTileIndex * GeoreferencedImageTileSize; + bufCopy.imageOffset = { imageOffset.x, imageOffset.y, 0u }; bufCopy.imageExtent.width = GeoreferencedImageTileSize; bufCopy.imageExtent.height = GeoreferencedImageTileSize; bufCopy.imageExtent.depth = 1; @@ -2808,19 +2825,20 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); // Upload the smaller tile to mip 1 - tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, uint32_t2(tileX * (GeoreferencedImageTileSize >> 1), tileY * (GeoreferencedImageTileSize >> 1)), uint32_t2(GeoreferencedImageTileSize >> 1, GeoreferencedImageTileSize >> 1), imageStreamingState->currentMappedRegion.baseMipLevel + 1); + tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, uint32_t2(tileX, tileY) * (GeoreferencedImageTileSize >> 1), uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize) >> 1u, imageStreamingState->currentMappedRegion.baseMipLevel + 1); bufCopy = {}; bufCopy.bufferOffset = 0; - bufCopy.bufferRowLength = GeoreferencedImageTileSize >> 1; + bufCopy.bufferRowLength = GeoreferencedImageTileSize >> 1u; bufCopy.bufferImageHeight = 0; bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; bufCopy.imageSubresource.mipLevel = 1u; bufCopy.imageSubresource.baseArrayLayer = 0u; bufCopy.imageSubresource.layerCount = 1u; - bufCopy.imageOffset = { (tileX - imageStreamingState->currentMappedRegion.topLeft.x) * (GeoreferencedImageTileSize >> 1), (tileY - imageStreamingState->currentMappedRegion.topLeft.y) * (GeoreferencedImageTileSize >> 1), 0u }; - bufCopy.imageExtent.width = GeoreferencedImageTileSize >> 1; - bufCopy.imageExtent.height = GeoreferencedImageTileSize >> 1; + imageOffset /= 2; // Half tile size! + bufCopy.imageOffset = { imageOffset.x, imageOffset.y, 0u }; + bufCopy.imageExtent.width = GeoreferencedImageTileSize >> 1u; + bufCopy.imageExtent.height = GeoreferencedImageTileSize >> 1u; bufCopy.imageExtent.depth = 1; tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); @@ -2836,7 +2854,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // The original image `dirU` corresponds to `maxImageTileIndices.x + 1` mip 0 tiles (provided it's exactly that length in tiles) // Dividing dirU by `maxImageTileIndices + (1,1)` we therefore get a vector that spans exactly one mip 0 tile (in the u direction) in worldspace. // Multiplying that by `2^mipLevel` we get a vector that spans exactly one mip `mipLevel` tile (in the u direction) - const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->maxImageTileIndices.x + 1u) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); + const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageLastTileIndices.x + 1u) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); const float32_t2 oneTileDirV = float32_t2(oneTileDirU.y, -oneTileDirU.x) * viewportWorldspaceOBB.aspectRatio; viewportWorldspaceOBB.topLeft += oneTileDirU * float32_t(viewportTileRange.topLeft.x); viewportWorldspaceOBB.topLeft += oneTileDirV * float32_t(viewportTileRange.topLeft.y); @@ -2851,10 +2869,8 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // The UV logic will have to change to consider what happens to the last loaded tile (or, alternatively, we can also fill the empty tiles with alpha=0 pixels) // Compute minUV, maxUV - - const uint32_t2 mappedRegionTileLength = imageStreamingState->currentMappedRegion.bottomRight - imageStreamingState->currentMappedRegion.topLeft + uint32_t2(1, 1); - const float32_t2 uvPerTile = float32_t2(1.f, 1.f) / float32_t2(imageStreamingState->maxResidentTiles, imageStreamingState->maxResidentTiles); - float32_t2 minUV = uvPerTile * float32_t2(viewportTileRange.topLeft - imageStreamingState->currentMappedRegion.topLeft); + const float32_t2 uvPerTile = float32_t2(1.f, 1.f) / float32_t2(imageStreamingState->gpuImageSideLengthTiles, imageStreamingState->gpuImageSideLengthTiles); + float32_t2 minUV = uvPerTile * float32_t2(((viewportTileRange.topLeft - imageStreamingState->currentMappedRegion.topLeft) + imageStreamingState->gpuImageTopLeft) % imageStreamingState->gpuImageSideLengthTiles); float32_t2 maxUV = minUV + uvPerTile * float32_t2(viewportTileLength); return TileUploadData{ std::move(tiles), viewportWorldspaceOBB, minUV, maxUV }; @@ -2864,16 +2880,16 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const float64_t3x3& NDCToWorld, const GeoreferencedImageStreamingState* imageStreamingState) { // First get world coordinates for each of the viewport's corners - const float64_t3 topLeftViewportWorldH = nbl::hlsl::mul(NDCToWorld, topLeftViewportNDCH); - const float64_t3 topRightViewportWorldH = nbl::hlsl::mul(NDCToWorld, topRightViewportNDCH); - const float64_t3 bottomLeftViewportWorldH = nbl::hlsl::mul(NDCToWorld, bottomLeftViewportNDCH); - const float64_t3 bottomRightViewportWorldH = nbl::hlsl::mul(NDCToWorld, bottomRightViewportNDCH); + const float64_t3 topLeftViewportWorld = nbl::hlsl::mul(NDCToWorld, topLeftViewportNDC); + const float64_t3 topRightViewportWorld = nbl::hlsl::mul(NDCToWorld, topRightViewportNDC); + const float64_t3 bottomLeftViewportWorld = nbl::hlsl::mul(NDCToWorld, bottomLeftViewportNDC); + const float64_t3 bottomRightViewportWorld = nbl::hlsl::mul(NDCToWorld, bottomRightViewportNDC); // Then we get mip 0 tiles coordinates for each of them, into the image - const float64_t2 topLeftTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(topLeftViewportWorldH, GeoreferencedImageTileSize); - const float64_t2 topRightTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(topRightViewportWorldH, GeoreferencedImageTileSize); - const float64_t2 bottomLeftTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(bottomLeftViewportWorldH, GeoreferencedImageTileSize); - const float64_t2 bottomRightTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(bottomRightViewportWorldH, GeoreferencedImageTileSize); + const float64_t2 topLeftTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(topLeftViewportWorld, GeoreferencedImageTileSize); + const float64_t2 topRightTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(topRightViewportWorld, GeoreferencedImageTileSize); + const float64_t2 bottomLeftTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(bottomLeftViewportWorld, GeoreferencedImageTileSize); + const float64_t2 bottomRightTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(bottomRightViewportWorld, GeoreferencedImageTileSize); // Get the min and max of each lattice coordinate to get a bounding rectangle const float64_t2 minTop = nbl::hlsl::min(topLeftTileLattice, topRightTileLattice); @@ -2916,49 +2932,8 @@ GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const maxAllFloored >>= retVal.baseMipLevel; // Clamp them to reasonable tile indices - retVal.topLeft = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(imageStreamingState->maxImageTileIndices >> retVal.baseMipLevel)); - retVal.bottomRight = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), int32_t2(imageStreamingState->maxImageTileIndices >> retVal.baseMipLevel)); + retVal.topLeft = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(imageStreamingState->fullImageLastTileIndices >> retVal.baseMipLevel)); + retVal.bottomRight = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), int32_t2(imageStreamingState->fullImageLastTileIndices >> retVal.baseMipLevel)); return retVal; -} - -void GeoreferencedImageStreamingState::remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) -{ - // Zoomed out - if (viewportTileRange.baseMipLevel > currentMappedRegion.baseMipLevel + 1.0) - { - // TODO: Here we would move some mip 1 tiles to mip 0 image to save the work of reuploading them, reflect that in the tracked tiles - } - // Zoomed in - else if (viewportTileRange.baseMipLevel < currentMappedRegion.baseMipLevel) - { - // TODO: Here we would move some mip 0 tiles to mip 1 image to save the work of reuploading them, reflect that in the tracked tiles - } - // Tile boundary crossing - else - { - // TODO: Here we would shuffle some tiles around to save the work of reuploading them, reflect that in the tracked tiles - } - currentMappedRegion.baseMipLevel = viewportTileRange.baseMipLevel; - - // Some variation of this code would go into each branch above - uint32_t2 viewportTileLength = viewportTileRange.bottomRight - viewportTileRange.topLeft + uint32_t2(1, 1); - int32_t2 nextTopLeft = int32_t2(viewportTileRange.topLeft) - int32_t2((uint32_t2(maxResidentTiles, maxResidentTiles) - viewportTileLength) / 2u); - int32_t2 nextBottomRight = nextTopLeft + int32_t2(maxResidentTiles, maxResidentTiles) - int32_t2(1, 1); - // Clamp to the left/up, and add the difference to the right/down - int32_t2 clampedTopLeft = nbl::hlsl::max(nextTopLeft, int32_t2(0, 0)); - nextBottomRight += clampedTopLeft - nextTopLeft; - nextTopLeft = clampedTopLeft; - // Now clamp to the right/down, and add the difference to the left/up, this time clamping it for sure - int32_t2 clampedBottomRight = nbl::hlsl::min(nextBottomRight, int32_t2(maxImageTileIndices) >> int32_t(currentMappedRegion.baseMipLevel)); - nextTopLeft = nbl::hlsl::max(nextTopLeft - nextBottomRight + clampedBottomRight, int32_t2(0, 0)); - currentMappedRegion.topLeft = nextTopLeft; - currentMappedRegion.bottomRight = clampedBottomRight; - - currentMappedRegionOccupancy.resize(maxResidentTiles); - for (auto i = 0u; i < maxResidentTiles; i++) - { - currentMappedRegionOccupancy[i].clear(); - currentMappedRegionOccupancy[i].resize(maxResidentTiles, false); - } -} +} \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 8ddffb61b..682bc0247 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -148,10 +148,10 @@ struct DrawResourcesFiller } // These are vulkan standard, might be different in n4ce! - constexpr static float64_t3 topLeftViewportNDCH = float64_t3(-1.0, -1.0, 1.0); - constexpr static float64_t3 topRightViewportNDCH = float64_t3(1.0, -1.0, 1.0); - constexpr static float64_t3 bottomLeftViewportNDCH = float64_t3(-1.0, 1.0, 1.0); - constexpr static float64_t3 bottomRightViewportNDCH = float64_t3(1.0, 1.0, 1.0); + constexpr static float64_t3 topLeftViewportNDC = float64_t3(-1.0, -1.0, 1.0); + constexpr static float64_t3 topRightViewportNDC = float64_t3(1.0, -1.0, 1.0); + constexpr static float64_t3 bottomLeftViewportNDC = float64_t3(-1.0, 1.0, 1.0); + constexpr static float64_t3 bottomRightViewportNDC = float64_t3(1.0, 1.0, 1.0); DrawResourcesFiller(); @@ -692,7 +692,7 @@ struct DrawResourcesFiller * * @param[out] outImageParams Structure to be filled with image creation parameters (format, size, etc.). * @param[out] outImageType Indicates whether the image should be fully resident or streamed. - * @param[in] manager Manager for the georeferenced image + * @param[in] params Parameters for the georeferenced image */ ImageType determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const GeoreferencedImageParams& params); @@ -841,7 +841,7 @@ struct DrawResourcesFiller struct TileUploadData { core::vector tiles; - OrientedBoundingBox2D worldspaceOBB; + OrientedBoundingBox2D viewportEncompassingOBB; float32_t2 minUV; float32_t2 maxUV; }; diff --git a/62_CAD/Images.h b/62_CAD/Images.h index bba1736b7..a65a087ff 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -129,26 +129,28 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted retVal->georeferencedImageParams = std::move(_georeferencedImageParams); // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point // 2. Transform this displacement vector into the coordinates in the basis {dirU, dirV} (worldspace vectors that span the sides of the image). - // The composition of these matrices there fore transforms any point in worldspace into uv coordinates in imagespace - - - // 1. Displacement. The following matrix computes the offset for an input point `p` with homogenous worldspace coordinates. - // By foregoing the homogenous coordinate we can keep only the vector part, that's why it's `2x3` and not `3x3` - float64_t2 topLeftWorld = retVal->georeferencedImageParams.worldspaceOBB.topLeft; - float64_t2x3 displacementMatrix(1., 0., - topLeftWorld.x, 0., 1., - topLeftWorld.y); + // The composition of these matrices therefore transforms any point in worldspace into uv coordinates in imagespace + // To reduce code complexity, instead of computing the product of these matrices, since the first is a pure displacement matrix + // (non-homogenous 2x2 upper left is identity matrix) and the other is a pure rotation matrix (2x2) we can just put them together + // by putting the rotation in the upper left 2x2 of the result and the post-rotated displacement in the upper right 2x1. + // The result is also 2x3 and not 3x3 because we can drop he homogenous since the displacement yields a vector // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression // Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal) - float64_t2 dirU = retVal->georeferencedImageParams.worldspaceOBB.dirU; - float64_t2 dirV = float32_t2(dirU.y, -dirU.x) * retVal->georeferencedImageParams.worldspaceOBB.aspectRatio; - float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); - float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); - float64_t2 firstRow = dirU / dirULengthSquared; - float64_t2 secondRow = dirV / dirVLengthSquared; - float64_t2x2 changeOfBasisMatrix(firstRow, secondRow); + const float64_t2 dirU = retVal->georeferencedImageParams.worldspaceOBB.dirU; + const float64_t2 dirV = float32_t2(dirU.y, -dirU.x) * retVal->georeferencedImageParams.worldspaceOBB.aspectRatio; + const float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); + const float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); + const float64_t2 firstRow = dirU / dirULengthSquared; + const float64_t2 secondRow = dirV / dirVLengthSquared; + + const float64_t2 displacement = - retVal->georeferencedImageParams.worldspaceOBB.topLeft; + // This is the same as multiplying the change of basis matrix by the displacement vector + const float64_t postRotatedShiftX = nbl::hlsl::dot(firstRow, displacement); + const float64_t postRotatedShiftY = nbl::hlsl::dot(secondRow, displacement); // Put them all together - retVal->world2UV = nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix); + retVal->world2UV = float64_t2x3(firstRow.x, firstRow.y, postRotatedShiftX, secondRow.x, secondRow.y, postRotatedShiftY); return retVal; } @@ -157,19 +159,136 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial - float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoordsH) const { return nbl::hlsl::mul(world2UV, worldCoordsH); } - float64_t2 transformWorldCoordsToPixelCoords(const float64_t3 worldCoordsH) const { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoordsH); } - float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoordsH, const uint32_t TileSize) const { return (1.0 / TileSize) * transformWorldCoordsToPixelCoords(worldCoordsH); } + float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoords) const { return nbl::hlsl::mul(world2UV, worldCoords); } + float64_t2 transformWorldCoordsToPixelCoords(const float64_t3 worldCoords) const { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoords); } + float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoords, const uint32_t TileSize) const { return (1.0 / TileSize) * transformWorldCoordsToPixelCoords(worldCoords); } // When the current mapped region is inadequate to fit the viewport, we compute a new mapped region - void remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange); + void remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) + { + // Zoomed out + if (viewportTileRange.baseMipLevel > currentMappedRegion.baseMipLevel) + { + // TODO: Here we would move some mip 1 tiles to mip 0 image to save the work of reuploading them, reflect that in the tracked tiles + } + // Zoomed in + else if (viewportTileRange.baseMipLevel < currentMappedRegion.baseMipLevel) + { + // TODO: Here we would move some mip 0 tiles to mip 1 image to save the work of reuploading them, reflect that in the tracked tiles + } + currentMappedRegion = viewportTileRange; + + currentMappedRegionOccupancy.resize(gpuImageSideLengthTiles); + for (auto i = 0u; i < gpuImageSideLengthTiles; i++) + { + currentMappedRegionOccupancy[i].clear(); + currentMappedRegionOccupancy[i].resize(gpuImageSideLengthTiles, false); + } + gpuImageTopLeft = uint32_t2(0, 0); + } + + // When we can shift the mapped a region a bit and avoid tile uploads by using toroidal shifting + void shiftAndExpandCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) + { + // `topLeftDiff` starts as the vector (in tiles) from the current mapped region's top left to the top left of the range encompassing the viewport + int32_t2 topLeftDiff = int32_t2(viewportTileRange.topLeft) - int32_t2(currentMappedRegion.topLeft); + // Since we only consider expanding the mapped region by moving the top left up and to the left, we clamp the above vector to `(-infty, 0] x (-infty, 0]` + topLeftDiff = nbl::hlsl::min(topLeftDiff, int32_t2(0, 0)); + int32_t2 nextTopLeft = int32_t2(currentMappedRegion.topLeft) + topLeftDiff; + // Same logic for bottom right but considering it only moves down and to the right, so clamped to `[0, infty) x [0, infty)` + int32_t2 bottomRightDiff = int32_t2(viewportTileRange.bottomRight) - int32_t2(currentMappedRegion.bottomRight); + bottomRightDiff = nbl::hlsl::max(bottomRightDiff, int32_t2(0, 0)); + int32_t2 nextBottomRight = int32_t2(currentMappedRegion.bottomRight) + bottomRightDiff; + + // If the number of tiles resident in this new mapped region along any axis becomes bigger than the max number of tiles the gpu image can hold, + // we need to shrink this next mapped region. For this to happen, we have to have expanded in only one direction, the one that has `diff != 0` + // Therefore, we need to shrink the mapped region along the axis that has `diff = 0`, just enough tiles so that the mapped region's tile size stays within + // the max number of tiles the gpu image can hold. + int32_t2 nextMappedRegionDimensions = nextBottomRight - nextTopLeft + 1; + uint32_t2 currentMappedRegionDimensions = currentMappedRegion.bottomRight - currentMappedRegion.topLeft + 1u; + uint32_t2 gpuImageBottomRight = (gpuImageTopLeft + currentMappedRegionDimensions - 1u) % gpuImageSideLengthTiles; + + // Shrink along x axis + if (nextMappedRegionDimensions.x > gpuImageSideLengthTiles) + { + int32_t tilesToFit = nextMappedRegionDimensions.x - gpuImageSideLengthTiles; + if (0 == topLeftDiff.x) + { + // Move topLeft to the right to fit tiles on the other side + nextTopLeft.x += tilesToFit; + topLeftDiff.x += tilesToFit; + // Mark all these tiles as non-resident + for (uint32_t tile = 0; tile < tilesToFit; tile++) + { + // Get actual tile index with wraparound + uint32_t tileIdx = (tile + gpuImageTopLeft.x) % gpuImageSideLengthTiles; + for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) + currentMappedRegionOccupancy[tileIdx][i] = false; + } + } + else + { + // Move bottomRight to the left to fit tiles on the other side + nextBottomRight.x -= tilesToFit; + // Mark all these tiles as non-resident + for (uint32_t tile = 0; tile < tilesToFit; tile++) + { + // Get actual tile index with wraparound + uint32_t tileIdx = (gpuImageBottomRight.x + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; + for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) + currentMappedRegionOccupancy[tileIdx][i] = false; + } + } + } + // Shrink along y axis + if (nextMappedRegionDimensions.y > gpuImageSideLengthTiles) + { + int32_t tilesToFit = nextMappedRegionDimensions.y - gpuImageSideLengthTiles; + if (0 == topLeftDiff.y) + { + // Move topLeft down to fit tiles on the other side + nextTopLeft.y += tilesToFit; + topLeftDiff.y += tilesToFit; + // Mark all these tiles as non-resident + for (uint32_t tile = 0; tile < tilesToFit; tile++) + { + // Get actual tile index with wraparound + uint32_t tileIdx = (tile + gpuImageTopLeft.y) % gpuImageSideLengthTiles; + for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) + currentMappedRegionOccupancy[i][tileIdx] = false; + } + } + else + { + // Move bottomRight up to fit tiles on the other side + nextBottomRight.y -= tilesToFit; + // Mark all these tiles as non-resident + for (uint32_t tile = 0; tile < tilesToFit; tile++) + { + // Get actual tile index with wraparound + uint32_t tileIdx = (gpuImageBottomRight.y + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; + for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) + currentMappedRegionOccupancy[i][tileIdx] = false; + } + } + } + + // Set new values for mapped region + currentMappedRegion.topLeft = nextTopLeft; + currentMappedRegion.bottomRight = nextBottomRight; + + // Toroidal shift for the gpu image top left + gpuImageTopLeft = (gpuImageTopLeft + uint32_t2(topLeftDiff + int32_t(gpuImageSideLengthTiles))) % gpuImageSideLengthTiles; + } // Sidelength of the gpu image, in tiles that are `GeoreferencedImageTileSize` pixels wide - uint32_t maxResidentTiles = {}; + uint32_t gpuImageSideLengthTiles = {}; // Size of the image (minus 1), in tiles of `GeoreferencedImageTileSize` sidelength - uint32_t2 maxImageTileIndices = {}; - // Set topLeft to extreme value so it gets recreated on first iteration - GeoreferencedImageTileRange currentMappedRegion = { .topLeft = uint32_t2(std::numeric_limits::max(), std::numeric_limits::max()) }; + uint32_t2 fullImageLastTileIndices = {}; + // Set mip level to extreme value so it gets recreated on first iteration + GeoreferencedImageTileRange currentMappedRegion = { .baseMipLevel = std::numeric_limits::max() }; + // Indicates on which tile of the gpu image the current mapped region's `topLeft` resides + uint32_t2 gpuImageTopLeft = {}; // Converts a point (z = 1) in worldspace to UV coordinates in image space (origin shifted to topleft of the image) float64_t2x3 world2UV = {}; }; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 2739f45f5..ecd05e00d 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -664,11 +664,10 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio // Static Image Sampler { - constexpr auto wrapMode = mode == ExampleMode::CASE_12 ? IGPUSampler::E_TEXTURE_CLAMP::ETC_REPEAT : IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; IGPUSampler::SParams samplerParams = {}; - samplerParams.TextureWrapU = wrapMode; - samplerParams.TextureWrapV = wrapMode; - samplerParams.TextureWrapW = wrapMode; + samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_REPEAT; + samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_REPEAT; + samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_REPEAT; samplerParams.BorderColor = IGPUSampler::ETBC_FLOAT_TRANSPARENT_BLACK; samplerParams.MinFilter = IGPUSampler::ETF_LINEAR; samplerParams.MaxFilter = IGPUSampler::ETF_LINEAR; From 888bcb1f7635950e2dcf0c796726cb472e704bca Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 27 Aug 2025 18:28:08 -0300 Subject: [PATCH 16/29] Addressed some PR comments, checkpoint before modifying UV logic --- 62_CAD/DrawResourcesFiller.cpp | 179 +++++++++++++++++------------ 62_CAD/DrawResourcesFiller.h | 9 +- 62_CAD/Images.h | 204 ++++++++++++++++++++------------- 3 files changed, 230 insertions(+), 162 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index f0aae8c40..679187f9c 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -741,8 +741,8 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( // This is because gpu image is square cachedImageRecord->georeferencedImageState->gpuImageSideLengthTiles = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / GeoreferencedImageTileSize; - auto& fullImageLastTileIndices = cachedImageRecord->georeferencedImageState->fullImageLastTileIndices; - fullImageLastTileIndices = (cachedImageRecord->georeferencedImageState->georeferencedImageParams.imageExtents - 1u) / GeoreferencedImageTileSize; + auto& fullImageTileLength = cachedImageRecord->georeferencedImageState->fullImageTileLength; + fullImageTileLength = (cachedImageRecord->georeferencedImageState->georeferencedImageParams.imageExtents - 1u) / GeoreferencedImageTileSize + 1u; } else { @@ -2693,42 +2693,23 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( return TileUploadData{ {}, imageStreamingState->georeferencedImageParams.worldspaceOBB }; // Compute the mip level and tile range we would need to encompass the viewport - GeoreferencedImageTileRange viewportTileRange = computeViewportTileRange(NDCToWorld, imageStreamingState); - - // A base mip level of x in the current mapped region means we can handle the viewport having mip level y, with x <= y < x + 1.0 - // without needing to remap the region. When the user starts zooming in or out and the mip level of the viewport falls outside this range, we have to remap - // the mapped region. - const bool mipBoundaryCrossed = viewportTileRange.baseMipLevel != imageStreamingState->currentMappedRegion.baseMipLevel; - - // If we moved a huge amount in any direction, no tiles will remain resident, so we simply reset state - const bool relativeShiftTooBig = nbl::hlsl::any - ( - nbl::hlsl::abs(int32_t2(viewportTileRange.topLeft) - int32_t2(imageStreamingState->currentMappedRegion.topLeft)) >= int32_t2(imageStreamingState->gpuImageSideLengthTiles, imageStreamingState->gpuImageSideLengthTiles) - ) - || nbl::hlsl::any - ( - nbl::hlsl::abs(int32_t2(viewportTileRange.bottomRight) - int32_t2(imageStreamingState->currentMappedRegion.bottomRight)) >= int32_t2(imageStreamingState->gpuImageSideLengthTiles, imageStreamingState->gpuImageSideLengthTiles) - ); - - if (mipBoundaryCrossed || relativeShiftTooBig) - { - imageStreamingState->remapCurrentRegion(viewportTileRange); - } - else - { - imageStreamingState->shiftAndExpandCurrentRegion(viewportTileRange); - } + // `viewportTileRange` is always should be a subset of `currentMappedRegion`, covering only the tiles visible in the viewport + // This also computes the optimal mip level for these tiles (basically a measure of how zoomed in or out the viewport is from the image) + GeoreferencedImageTileRange viewportTileRange = computeViewportTileRange(NDCToWorld, imageStreamingState); + // Slide or remap the current mapped region to ensure the viewport falls inside it + imageStreamingState->ensureMappedRegionCoversViewport(viewportTileRange); // DEBUG - Sampled mip level { // Get world coordinates for each corner of the mapped region - const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageLastTileIndices.x + 1u) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); - const float32_t2 oneTileDirV = float32_t2(oneTileDirU.y, -oneTileDirU.x) * imageStreamingState->georeferencedImageParams.worldspaceOBB.aspectRatio; + const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageTileLength.x) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); + const float32_t2 fullImageDirV = float32_t2(imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.y, -imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.x); + const float32_t2 oneTileDirV = fullImageDirV / float32_t(imageStreamingState->fullImageTileLength.y) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); float64_t2 topLeftMappedRegionWorld = imageStreamingState->georeferencedImageParams.worldspaceOBB.topLeft; - topLeftMappedRegionWorld += oneTileDirU * float32_t(imageStreamingState->currentMappedRegion.topLeft.x) + oneTileDirV * float32_t(imageStreamingState->currentMappedRegion.topLeft.y); - const uint32_t2 mappedRegionTileLength = imageStreamingState->currentMappedRegion.bottomRight - imageStreamingState->currentMappedRegion.topLeft + uint32_t2(1, 1); + topLeftMappedRegionWorld += oneTileDirU * float32_t(imageStreamingState->currentMappedRegion.topLeftTile.x) + oneTileDirV * float32_t(imageStreamingState->currentMappedRegion.topLeftTile.y); + const uint32_t2 mappedRegionTileLength = imageStreamingState->currentMappedRegion.bottomRightTile - imageStreamingState->currentMappedRegion.topLeftTile + uint32_t2(1, 1); float64_t2 bottomRightMappedRegionWorld = topLeftMappedRegionWorld; bottomRightMappedRegionWorld += oneTileDirU * float32_t(mappedRegionTileLength.x) + oneTileDirV * float32_t(mappedRegionTileLength.y); @@ -2756,6 +2737,12 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // Put them all together float64_t2x3 toPixelCoordsMatrix = nbl::hlsl::mul(scalingMatrix, nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix)); + // These are vulkan standard, might be different in n4ce! + constexpr static float64_t3 topLeftViewportNDC = float64_t3(-1.0, -1.0, 1.0); + constexpr static float64_t3 topRightViewportNDC = float64_t3(1.0, -1.0, 1.0); + constexpr static float64_t3 bottomLeftViewportNDC = float64_t3(-1.0, 1.0, 1.0); + constexpr static float64_t3 bottomRightViewportNDC = float64_t3(1.0, 1.0, 1.0); + // Map viewport points to world const float64_t3 topLeftViewportWorld = nbl::hlsl::mul(NDCToWorld, topLeftViewportNDC); const float64_t3 topRightViewportWorld = nbl::hlsl::mul(NDCToWorld, topRightViewportNDC); @@ -2778,29 +2765,61 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // We need to make every tile that covers the viewport resident. We reserve the amount of tiles needed for upload. core::vector tiles; - uint32_t nTiles = 0; - for (uint32_t tileX = viewportTileRange.topLeft.x; tileX <= viewportTileRange.bottomRight.x; tileX++) - for (uint32_t tileY = viewportTileRange.topLeft.y; tileY <= viewportTileRange.bottomRight.y; tileY++) - { - // Compute tile offset relative to `currentMappedRegion.topLeft`, to get tile index into the gpu image - uint32_t2 gpuImageTileIndex = ((uint32_t2(tileX, tileY) - imageStreamingState->currentMappedRegion.topLeft) + imageStreamingState->gpuImageTopLeft) % imageStreamingState->gpuImageSideLengthTiles; - nTiles += !imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]; - } - tiles.reserve(nTiles); - - // Assuming a 1 pixel per block format - otherwise math here gets a bit trickier - auto bytesPerPixel = getTexelOrBlockBytesize(imageStreamingState->georeferencedImageParams.format); - const size_t bytesPerSide = bytesPerPixel * GeoreferencedImageTileSize; + auto tilesToLoad = imageStreamingState->tilesToLoad(viewportTileRange); + tiles.reserve(tilesToLoad.size()); + + for (auto [imageTileIndex, gpuImageTileIndex] : tilesToLoad) + { + auto tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, imageTileIndex * GeoreferencedImageTileSize, uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize), imageStreamingState->currentMappedRegion.baseMipLevel); + + asset::IImage::SBufferCopy bufCopy; + bufCopy.bufferOffset = 0; + bufCopy.bufferRowLength = GeoreferencedImageTileSize; + bufCopy.bufferImageHeight = 0; + bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + bufCopy.imageSubresource.mipLevel = 0u; + bufCopy.imageSubresource.baseArrayLayer = 0u; + bufCopy.imageSubresource.layerCount = 1u; + uint32_t2 gpuImageOffset = gpuImageTileIndex * GeoreferencedImageTileSize; + bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; + bufCopy.imageExtent.width = GeoreferencedImageTileSize; + bufCopy.imageExtent.height = GeoreferencedImageTileSize; + bufCopy.imageExtent.depth = 1; + + tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); + + // Upload the smaller tile to mip 1 + tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, imageTileIndex * GeoreferencedImageTileSizeMip1, uint32_t2(GeoreferencedImageTileSizeMip1, GeoreferencedImageTileSizeMip1), imageStreamingState->currentMappedRegion.baseMipLevel + 1); + bufCopy = {}; + + bufCopy.bufferOffset = 0; + bufCopy.bufferRowLength = GeoreferencedImageTileSizeMip1; + bufCopy.bufferImageHeight = 0; + bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + bufCopy.imageSubresource.mipLevel = 1u; + bufCopy.imageSubresource.baseArrayLayer = 0u; + bufCopy.imageSubresource.layerCount = 1u; + gpuImageOffset /= 2; // Half tile size! + bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; + bufCopy.imageExtent.width = GeoreferencedImageTileSizeMip1; + bufCopy.imageExtent.height = GeoreferencedImageTileSizeMip1; + bufCopy.imageExtent.depth = 1; + + tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); + + // Mark tile as resident + imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; + } // Dangerous code - assumes image can be perfectly covered with tiles. Otherwise will need to handle edge cases // TODO: All of this code only works for mip 0. Needs to be changed next to upload mip 1. // Eventually this is all replaced by a few uploads to staging buffer + CS mip calc - for (uint32_t tileX = viewportTileRange.topLeft.x; tileX <= viewportTileRange.bottomRight.x; tileX++) + for (uint32_t tileX = viewportTileRange.topLeftTile.x; tileX <= viewportTileRange.bottomRightTile.x; tileX++) { - for (uint32_t tileY = viewportTileRange.topLeft.y; tileY <= viewportTileRange.bottomRight.y; tileY++) + for (uint32_t tileY = viewportTileRange.topLeftTile.y; tileY <= viewportTileRange.bottomRightTile.y; tileY++) { - // Compute tile offset relative to `currentMappedRegion.topLeft`, to get tile index into the gpu image - uint32_t2 gpuImageTileIndex = ((uint32_t2(tileX, tileY) - imageStreamingState->currentMappedRegion.topLeft) + imageStreamingState->gpuImageTopLeft) % imageStreamingState->gpuImageSideLengthTiles; + // Compute tile offset relative to `currentMappedRegion.topLeftTile`, to get tile index into the gpu image + uint32_t2 gpuImageTileIndex = ((uint32_t2(tileX, tileY) - imageStreamingState->currentMappedRegion.topLeftTile) + imageStreamingState->gpuImageTopLeft) % imageStreamingState->gpuImageSideLengthTiles; // If tile already resident, do nothing if (imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) @@ -2816,8 +2835,8 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( bufCopy.imageSubresource.mipLevel = 0u; bufCopy.imageSubresource.baseArrayLayer = 0u; bufCopy.imageSubresource.layerCount = 1u; - uint32_t2 imageOffset = gpuImageTileIndex * GeoreferencedImageTileSize; - bufCopy.imageOffset = { imageOffset.x, imageOffset.y, 0u }; + uint32_t2 gpuImageOffset = gpuImageTileIndex * GeoreferencedImageTileSize; + bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; bufCopy.imageExtent.width = GeoreferencedImageTileSize; bufCopy.imageExtent.height = GeoreferencedImageTileSize; bufCopy.imageExtent.depth = 1; @@ -2825,20 +2844,20 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); // Upload the smaller tile to mip 1 - tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, uint32_t2(tileX, tileY) * (GeoreferencedImageTileSize >> 1), uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize) >> 1u, imageStreamingState->currentMappedRegion.baseMipLevel + 1); + tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, uint32_t2(tileX, tileY) * GeoreferencedImageTileSizeMip1, uint32_t2(GeoreferencedImageTileSizeMip1, GeoreferencedImageTileSizeMip1), imageStreamingState->currentMappedRegion.baseMipLevel + 1); bufCopy = {}; bufCopy.bufferOffset = 0; - bufCopy.bufferRowLength = GeoreferencedImageTileSize >> 1u; + bufCopy.bufferRowLength = GeoreferencedImageTileSizeMip1; bufCopy.bufferImageHeight = 0; bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; bufCopy.imageSubresource.mipLevel = 1u; bufCopy.imageSubresource.baseArrayLayer = 0u; bufCopy.imageSubresource.layerCount = 1u; - imageOffset /= 2; // Half tile size! - bufCopy.imageOffset = { imageOffset.x, imageOffset.y, 0u }; - bufCopy.imageExtent.width = GeoreferencedImageTileSize >> 1u; - bufCopy.imageExtent.height = GeoreferencedImageTileSize >> 1u; + gpuImageOffset /= 2; // Half tile size! + bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; + bufCopy.imageExtent.width = GeoreferencedImageTileSizeMip1; + bufCopy.imageExtent.height = GeoreferencedImageTileSizeMip1; bufCopy.imageExtent.depth = 1; tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); @@ -2850,18 +2869,19 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // Last, we need to figure out an obb that covers only the currently loaded tiles - OrientedBoundingBox2D viewportWorldspaceOBB = imageStreamingState->georeferencedImageParams.worldspaceOBB; + OrientedBoundingBox2D viewportEncompassingOBB = imageStreamingState->georeferencedImageParams.worldspaceOBB; // The original image `dirU` corresponds to `maxImageTileIndices.x + 1` mip 0 tiles (provided it's exactly that length in tiles) // Dividing dirU by `maxImageTileIndices + (1,1)` we therefore get a vector that spans exactly one mip 0 tile (in the u direction) in worldspace. // Multiplying that by `2^mipLevel` we get a vector that spans exactly one mip `mipLevel` tile (in the u direction) - const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageLastTileIndices.x + 1u) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); - const float32_t2 oneTileDirV = float32_t2(oneTileDirU.y, -oneTileDirU.x) * viewportWorldspaceOBB.aspectRatio; - viewportWorldspaceOBB.topLeft += oneTileDirU * float32_t(viewportTileRange.topLeft.x); - viewportWorldspaceOBB.topLeft += oneTileDirV * float32_t(viewportTileRange.topLeft.y); - - const uint32_t2 viewportTileLength = viewportTileRange.bottomRight - viewportTileRange.topLeft + uint32_t2(1, 1); - viewportWorldspaceOBB.dirU = oneTileDirU * float32_t(viewportTileLength.x); - viewportWorldspaceOBB.aspectRatio = float32_t(viewportTileLength.y) / float32_t(viewportTileLength.x); + const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageTileLength.x) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); + const float32_t2 fullImageDirV = float32_t2(imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.y, -imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.x); + const float32_t2 oneTileDirV = fullImageDirV / float32_t(imageStreamingState->fullImageTileLength.y) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); + viewportEncompassingOBB.topLeft += oneTileDirU * float32_t(viewportTileRange.topLeftTile.x); + viewportEncompassingOBB.topLeft += oneTileDirV * float32_t(viewportTileRange.topLeftTile.y); + + const uint32_t2 viewportTileLength = viewportTileRange.bottomRightTile - viewportTileRange.topLeftTile + uint32_t2(1, 1); + viewportEncompassingOBB.dirU = oneTileDirU * float32_t(viewportTileLength.x); + viewportEncompassingOBB.aspectRatio = float32_t(viewportTileLength.y) / float32_t(viewportTileLength.x); // UV logic currently ONLY works when the image not only fits an integer amount of tiles, but also when it's a PoT amount of them // (this means every mip level also gets an integer amount of tiles). @@ -2870,15 +2890,21 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // Compute minUV, maxUV const float32_t2 uvPerTile = float32_t2(1.f, 1.f) / float32_t2(imageStreamingState->gpuImageSideLengthTiles, imageStreamingState->gpuImageSideLengthTiles); - float32_t2 minUV = uvPerTile * float32_t2(((viewportTileRange.topLeft - imageStreamingState->currentMappedRegion.topLeft) + imageStreamingState->gpuImageTopLeft) % imageStreamingState->gpuImageSideLengthTiles); + float32_t2 minUV = uvPerTile * float32_t2(((viewportTileRange.topLeftTile - imageStreamingState->currentMappedRegion.topLeftTile) + imageStreamingState->gpuImageTopLeft) % imageStreamingState->gpuImageSideLengthTiles); float32_t2 maxUV = minUV + uvPerTile * float32_t2(viewportTileLength); - return TileUploadData{ std::move(tiles), viewportWorldspaceOBB, minUV, maxUV }; + return TileUploadData{ std::move(tiles), viewportEncompassingOBB, minUV, maxUV }; } GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const float64_t3x3& NDCToWorld, const GeoreferencedImageStreamingState* imageStreamingState) { + // These are vulkan standard, might be different in n4ce! + constexpr static float64_t3 topLeftViewportNDC = float64_t3(-1.0, -1.0, 1.0); + constexpr static float64_t3 topRightViewportNDC = float64_t3(1.0, -1.0, 1.0); + constexpr static float64_t3 bottomLeftViewportNDC = float64_t3(-1.0, 1.0, 1.0); + constexpr static float64_t3 bottomRightViewportNDC = float64_t3(1.0, 1.0, 1.0); + // First get world coordinates for each of the viewport's corners const float64_t3 topLeftViewportWorld = nbl::hlsl::mul(NDCToWorld, topLeftViewportNDC); const float64_t3 topRightViewportWorld = nbl::hlsl::mul(NDCToWorld, topRightViewportNDC); @@ -2906,17 +2932,20 @@ GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const // We're undoing a previous division. Could be avoided but won't restructure the code atp. // Here we compute how many image pixels each side of the viewport spans - const auto viewportWidthImagePixelLengthVector = float64_t(GeoreferencedImageTileSize) * (topRightTileLattice - topLeftTileLattice); - const auto viewportHeightImagePixelLengthVector = float64_t(GeoreferencedImageTileSize) * (bottomLeftTileLattice - topLeftTileLattice); + const float64_t2 viewportSideUImageTexelsVector = float64_t(GeoreferencedImageTileSize) * (topRightTileLattice - topLeftTileLattice); + const float64_t2 viewportSideVImageTexelsVector = float64_t(GeoreferencedImageTileSize) * (bottomLeftTileLattice - topLeftTileLattice); // WARNING: This assumes pixels in the image are the same size along each axis. If the image is nonuniformly scaled or sheared, I *think* it should not matter // (since the pixel span takes that transformation into account), BUT we have to check if we plan on allowing those - const auto viewportWidthImagePixelLength = nbl::hlsl::length(viewportWidthImagePixelLengthVector); - const auto viewportHeightImagePixelLength = nbl::hlsl::length(viewportHeightImagePixelLengthVector); + // Compute the side vectors of the viewport in image pixel(texel) space. + // These vectors represent how many image pixels each side of the viewport spans. + // They correspond to the local axes of the mapped OBB (not the mapped region one, the viewport one) in texel coordinates. + const float64_t viewportSideUImageTexels = nbl::hlsl::length(viewportSideUImageTexelsVector); + const float64_t viewportSideVImageTexels = nbl::hlsl::length(viewportSideVImageTexelsVector); // Mip is decided based on max of these - float64_t pixelRatio = nbl::hlsl::max(viewportWidthImagePixelLength / imageStreamingState->georeferencedImageParams.viewportExtents.x, - viewportHeightImagePixelLength / imageStreamingState->georeferencedImageParams.viewportExtents.y); + float64_t pixelRatio = nbl::hlsl::max(viewportSideUImageTexels / imageStreamingState->georeferencedImageParams.viewportExtents.x, + viewportSideVImageTexels / imageStreamingState->georeferencedImageParams.viewportExtents.y); pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; // DEBUG - Clamped at 0 for magnification @@ -2932,8 +2961,8 @@ GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const maxAllFloored >>= retVal.baseMipLevel; // Clamp them to reasonable tile indices - retVal.topLeft = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(imageStreamingState->fullImageLastTileIndices >> retVal.baseMipLevel)); - retVal.bottomRight = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), int32_t2(imageStreamingState->fullImageLastTileIndices >> retVal.baseMipLevel)); + retVal.topLeftTile = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2((imageStreamingState->fullImageTileLength - 1u) >> retVal.baseMipLevel)); + retVal.bottomRightTile = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), int32_t2((imageStreamingState->fullImageTileLength - 1u) >> retVal.baseMipLevel)); return retVal; } \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 682bc0247..42a080fce 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -146,12 +146,6 @@ struct DrawResourcesFiller { return georeferencedImageLoader->getFormat(imagePath); } - - // These are vulkan standard, might be different in n4ce! - constexpr static float64_t3 topLeftViewportNDC = float64_t3(-1.0, -1.0, 1.0); - constexpr static float64_t3 topRightViewportNDC = float64_t3(1.0, -1.0, 1.0); - constexpr static float64_t3 bottomLeftViewportNDC = float64_t3(-1.0, 1.0, 1.0); - constexpr static float64_t3 bottomRightViewportNDC = float64_t3(1.0, 1.0, 1.0); DrawResourcesFiller(); @@ -829,6 +823,9 @@ struct DrawResourcesFiller // These are mip 0 pixels per tile, also size of each physical tile into the gpu resident image constexpr static uint32_t GeoreferencedImageTileSize = 128u; + // Mip 1 tiles are naturally half the size + constexpr static uint32_t GeoreferencedImageTileSizeMip1 = GeoreferencedImageTileSize / 2; + // How many tiles of extra padding we give to the gpu image holding the tiles for a georeferenced image constexpr static uint32_t GeoreferencedImagePaddingTiles = 2; // Returns a tile range that encompasses the whole viewport in "image-world". Tiles are measured in the mip level required to fit the viewport entirely diff --git a/62_CAD/Images.h b/62_CAD/Images.h index a65a087ff..e40e9823f 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -109,11 +109,11 @@ struct ImageCleanup : public core::IReferenceCounted }; -// Measured in tile coordinates in the image that the range spans, and the mip level the tiles correspond to +// Measures a range of mip `baseMipLevel` tiles in the georeferenced image, starting at `topLeftTile` that is `nTiles` long struct GeoreferencedImageTileRange { - uint32_t2 topLeft; - uint32_t2 bottomRight; + uint32_t2 topLeftTile; + uint32_t2 bottomRightTile; uint32_t baseMipLevel; }; @@ -163,6 +163,31 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted float64_t2 transformWorldCoordsToPixelCoords(const float64_t3 worldCoords) const { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoords); } float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoords, const uint32_t TileSize) const { return (1.0 / TileSize) * transformWorldCoordsToPixelCoords(worldCoords); } + void ensureMappedRegionCoversViewport(const GeoreferencedImageTileRange& viewportTileRange) + { + // A base mip level of x in the current mapped region means we can handle the viewport having mip level y, with x <= y < x + 1.0 + // without needing to remap the region. When the user starts zooming in or out and the mip level of the viewport falls outside this range, we have to remap + // the mapped region. + const bool mipBoundaryCrossed = viewportTileRange.baseMipLevel != currentMappedRegion.baseMipLevel; + + // If we moved a huge amount in any direction, no tiles will remain resident, so we simply reset state + // This only need be evaluated if the mip boundary was not already crossed + const bool relativeShiftTooBig = !mipBoundaryCrossed && + nbl::hlsl::any + ( + nbl::hlsl::abs(int32_t2(viewportTileRange.topLeftTile) - int32_t2(currentMappedRegion.topLeftTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) + ) + || nbl::hlsl::any + ( + nbl::hlsl::abs(int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegion.bottomRightTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) + ); + + if (mipBoundaryCrossed || relativeShiftTooBig) + remapCurrentRegion(viewportTileRange); + else + slideCurrentRegion(viewportTileRange); + } + // When the current mapped region is inadequate to fit the viewport, we compute a new mapped region void remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) { @@ -177,6 +202,18 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // TODO: Here we would move some mip 0 tiles to mip 1 image to save the work of reuploading them, reflect that in the tracked tiles } currentMappedRegion = viewportTileRange; + // We can expand the currentMappedRegion to make it as big as possible, at no extra cost since we only upload tiles on demand + // Since we use toroidal updating it's kinda the same which way we expand the region. We first tryo to expand it downwards to the right + const uint32_t2 currentTileExtents = currentMappedRegion.bottomRightTile - currentMappedRegion.topLeftTile + uint32_t2(1, 1); + // Extend extent up to `gpuImageSideLengthTiles` by moving the `bottomRightTile` an appropriate amount downwards to the right + currentMappedRegion.bottomRightTile += uint32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) - currentTileExtents; + // This extension can cause the mapped region to fall out of bounds on border cases, therefore we clamp it and extend it in the other direction + // by the amount of tiles we removed during clamping + const uint32_t2 excessTiles = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegion.bottomRightTile + 1u) - int32_t2(fullImageTileLength))); + currentMappedRegion.bottomRightTile -= excessTiles; + // Now, on some pathological cases (such as an image that is not long along one dimension but very long along the other) shifting of the topLeftTile + // could fall out of bounds. So we shift if possible, otherwise set it to 0 + currentMappedRegion.topLeftTile = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegion.topLeftTile) - int32_t2(excessTiles))); currentMappedRegionOccupancy.resize(gpuImageSideLengthTiles); for (auto i = 0u; i < gpuImageSideLengthTiles; i++) @@ -187,104 +224,109 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted gpuImageTopLeft = uint32_t2(0, 0); } - // When we can shift the mapped a region a bit and avoid tile uploads by using toroidal shifting - void shiftAndExpandCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) + // Checks whether the viewport falls entirely withing the current mapped region and slides the latter otherwise, just enough until it covers the viewport + void slideCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) { - // `topLeftDiff` starts as the vector (in tiles) from the current mapped region's top left to the top left of the range encompassing the viewport - int32_t2 topLeftDiff = int32_t2(viewportTileRange.topLeft) - int32_t2(currentMappedRegion.topLeft); - // Since we only consider expanding the mapped region by moving the top left up and to the left, we clamp the above vector to `(-infty, 0] x (-infty, 0]` - topLeftDiff = nbl::hlsl::min(topLeftDiff, int32_t2(0, 0)); - int32_t2 nextTopLeft = int32_t2(currentMappedRegion.topLeft) + topLeftDiff; - // Same logic for bottom right but considering it only moves down and to the right, so clamped to `[0, infty) x [0, infty)` - int32_t2 bottomRightDiff = int32_t2(viewportTileRange.bottomRight) - int32_t2(currentMappedRegion.bottomRight); - bottomRightDiff = nbl::hlsl::max(bottomRightDiff, int32_t2(0, 0)); - int32_t2 nextBottomRight = int32_t2(currentMappedRegion.bottomRight) + bottomRightDiff; - - // If the number of tiles resident in this new mapped region along any axis becomes bigger than the max number of tiles the gpu image can hold, - // we need to shrink this next mapped region. For this to happen, we have to have expanded in only one direction, the one that has `diff != 0` - // Therefore, we need to shrink the mapped region along the axis that has `diff = 0`, just enough tiles so that the mapped region's tile size stays within - // the max number of tiles the gpu image can hold. - int32_t2 nextMappedRegionDimensions = nextBottomRight - nextTopLeft + 1; - uint32_t2 currentMappedRegionDimensions = currentMappedRegion.bottomRight - currentMappedRegion.topLeft + 1u; - uint32_t2 gpuImageBottomRight = (gpuImageTopLeft + currentMappedRegionDimensions - 1u) % gpuImageSideLengthTiles; - - // Shrink along x axis - if (nextMappedRegionDimensions.x > gpuImageSideLengthTiles) + // `topLeftShift` represents how many tiles up and to the left we have to move the mapped region to fit the viewport. + // First we compute a vector from the current mapped region's topleft to the viewport's topleft. If this vector is positive along a dimension it means + // the viewport's topleft is to the right or below the current mapped region's topleft, so we don't have to shift the mapped region to the left/up in that case + const int32_t2 topLeftShift = nbl::hlsl::min(int32_t2(0, 0), int32_t2(viewportTileRange.topLeftTile) - int32_t2(currentMappedRegion.topLeftTile)); + // `bottomRightShift` represents the same as above but in the other direction. + const int32_t2 bottomRightShift = nbl::hlsl::max(int32_t2(0, 0), int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegion.bottomRightTile)); + + // Mark dropped tiles as dirty/non-resident + // The following is not necessarily equal to `gpuImageSideLengthTiles` since there can be pathological cases, as explained in the remapping method + const uint32_t2 mappedRegionDimensions = currentMappedRegion.bottomRightTile - currentMappedRegion.topLeftTile + 1u; + const uint32_t2 gpuImageBottomRight = (gpuImageTopLeft + mappedRegionDimensions - 1u) % gpuImageSideLengthTiles; + + if (topLeftShift.x < 0) { - int32_t tilesToFit = nextMappedRegionDimensions.x - gpuImageSideLengthTiles; - if (0 == topLeftDiff.x) + // Shift left + const uint32_t tilesToFit = -topLeftShift.x; + for (uint32_t tile = 0; tile < tilesToFit; tile++) { - // Move topLeft to the right to fit tiles on the other side - nextTopLeft.x += tilesToFit; - topLeftDiff.x += tilesToFit; - // Mark all these tiles as non-resident - for (uint32_t tile = 0; tile < tilesToFit; tile++) - { - // Get actual tile index with wraparound - uint32_t tileIdx = (tile + gpuImageTopLeft.x) % gpuImageSideLengthTiles; - for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) - currentMappedRegionOccupancy[tileIdx][i] = false; - } + // Get actual tile index with wraparound + uint32_t tileIdx = (gpuImageBottomRight.x + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; + for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) + currentMappedRegionOccupancy[tileIdx][i] = false; } - else + } + else if (bottomRightShift.x > 0) + { + //Shift right + const uint32_t tilesToFit = bottomRightShift.x; + for (uint32_t tile = 0; tile < tilesToFit; tile++) { - // Move bottomRight to the left to fit tiles on the other side - nextBottomRight.x -= tilesToFit; - // Mark all these tiles as non-resident - for (uint32_t tile = 0; tile < tilesToFit; tile++) - { - // Get actual tile index with wraparound - uint32_t tileIdx = (gpuImageBottomRight.x + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; - for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) - currentMappedRegionOccupancy[tileIdx][i] = false; - } + // Get actual tile index with wraparound + uint32_t tileIdx = (tile + gpuImageTopLeft.x) % gpuImageSideLengthTiles; + for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) + currentMappedRegionOccupancy[tileIdx][i] = false; } } - // Shrink along y axis - if (nextMappedRegionDimensions.y > gpuImageSideLengthTiles) + + if (topLeftShift.y < 0) { - int32_t tilesToFit = nextMappedRegionDimensions.y - gpuImageSideLengthTiles; - if (0 == topLeftDiff.y) + // Shift up + const uint32_t tilesToFit = -topLeftShift.y; + for (uint32_t tile = 0; tile < tilesToFit; tile++) { - // Move topLeft down to fit tiles on the other side - nextTopLeft.y += tilesToFit; - topLeftDiff.y += tilesToFit; - // Mark all these tiles as non-resident - for (uint32_t tile = 0; tile < tilesToFit; tile++) - { - // Get actual tile index with wraparound - uint32_t tileIdx = (tile + gpuImageTopLeft.y) % gpuImageSideLengthTiles; - for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) - currentMappedRegionOccupancy[i][tileIdx] = false; - } + // Get actual tile index with wraparound + uint32_t tileIdx = (gpuImageBottomRight.y + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; + for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) + currentMappedRegionOccupancy[i][tileIdx] = false; } - else + } + else if (bottomRightShift.y > 0) + { + //Shift down + const uint32_t tilesToFit = bottomRightShift.y; + for (uint32_t tile = 0; tile < tilesToFit; tile++) { - // Move bottomRight up to fit tiles on the other side - nextBottomRight.y -= tilesToFit; - // Mark all these tiles as non-resident - for (uint32_t tile = 0; tile < tilesToFit; tile++) - { - // Get actual tile index with wraparound - uint32_t tileIdx = (gpuImageBottomRight.y + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; - for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) - currentMappedRegionOccupancy[i][tileIdx] = false; - } + // Get actual tile index with wraparound + uint32_t tileIdx = (tile + gpuImageTopLeft.y) % gpuImageSideLengthTiles; + for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) + currentMappedRegionOccupancy[i][tileIdx] = false; } } - // Set new values for mapped region - currentMappedRegion.topLeft = nextTopLeft; - currentMappedRegion.bottomRight = nextBottomRight; + // Shift the mapped region accordingly + // A nice consequence of the mapped region being always maximally - sized is that + // along any dimension, only a shift in one direction is necessary, so we can simply add up the shifts + currentMappedRegion.topLeftTile = uint32_t2(int32_t2(currentMappedRegion.topLeftTile) + topLeftShift + bottomRightShift); + currentMappedRegion.bottomRightTile = uint32_t2(int32_t2(currentMappedRegion.bottomRightTile) + topLeftShift + bottomRightShift); // Toroidal shift for the gpu image top left - gpuImageTopLeft = (gpuImageTopLeft + uint32_t2(topLeftDiff + int32_t(gpuImageSideLengthTiles))) % gpuImageSideLengthTiles; + gpuImageTopLeft = (gpuImageTopLeft + uint32_t2(topLeftShift + bottomRightShift + int32_t(gpuImageSideLengthTiles))) % gpuImageSideLengthTiles; + } + + // This can become a rectangle if we implement the by-rectangle upload instead of tile-by-tile to reduce loader calls + struct ImageTileToGPUTileCorrespondence + { + uint32_t2 imageTileIndex; + uint32_t2 gpuImageTileIndex; + }; + + // Given a tile range covering the viewport, returns which tiles (at the mip level of the current mapped region) need to be made resident to draw it, + // returning a vector of `ImageTileToGPUTileCorrespondence`, each indicating that tile `imageTileIndex` in the full image needs to be uploaded to tile + // `gpuImageTileIndex` in the gpu image + core::vector tilesToLoad(const GeoreferencedImageTileRange& viewportTileRange) + { + core::vector retVal; + for (uint32_t tileX = viewportTileRange.topLeftTile.x; tileX <= viewportTileRange.bottomRightTile.x; tileX++) + for (uint32_t tileY = viewportTileRange.topLeftTile.y; tileY <= viewportTileRange.bottomRightTile.y; tileY++) + { + uint32_t2 imageTileIndex = uint32_t2(tileX, tileY); + uint32_t2 gpuImageTileIndex = ((imageTileIndex - currentMappedRegion.topLeftTile) + gpuImageTopLeft) % gpuImageSideLengthTiles; + if (!currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) + retVal.push_back({ imageTileIndex , gpuImageTileIndex }); + } + return retVal; } // Sidelength of the gpu image, in tiles that are `GeoreferencedImageTileSize` pixels wide uint32_t gpuImageSideLengthTiles = {}; - // Size of the image (minus 1), in tiles of `GeoreferencedImageTileSize` sidelength - uint32_t2 fullImageLastTileIndices = {}; + // Size of the image in tiles of `GeoreferencedImageTileSize` sidelength + uint32_t2 fullImageTileLength = {}; // Set mip level to extreme value so it gets recreated on first iteration GeoreferencedImageTileRange currentMappedRegion = { .baseMipLevel = std::numeric_limits::max() }; // Indicates on which tile of the gpu image the current mapped region's `topLeft` resides From f0ba40fedfba85bb6839c592ac4270890107c346 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 29 Aug 2025 17:32:27 -0300 Subject: [PATCH 17/29] Another checkpoint before modifying UV logic --- 62_CAD/DrawResourcesFiller.cpp | 9 ++++----- 62_CAD/DrawResourcesFiller.h | 4 ++-- 62_CAD/Images.h | 22 +++++++++++++++++----- 62_CAD/main.cpp | 8 ++++++-- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 679187f9c..21ca0250c 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -736,13 +736,10 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( cachedImageRecord->allocationSize = allocResults.allocationSize; cachedImageRecord->gpuImageView = allocResults.gpuImageView; cachedImageRecord->staticCPUImage = nullptr; - cachedImageRecord->georeferencedImageState = GeoreferencedImageStreamingState::create(std::move(params)); + cachedImageRecord->georeferencedImageState = GeoreferencedImageStreamingState::create(std::move(params), GeoreferencedImageTileSize); // This is because gpu image is square cachedImageRecord->georeferencedImageState->gpuImageSideLengthTiles = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / GeoreferencedImageTileSize; - - auto& fullImageTileLength = cachedImageRecord->georeferencedImageState->fullImageTileLength; - fullImageTileLength = (cachedImageRecord->georeferencedImageState->georeferencedImageParams.imageExtents - 1u) / GeoreferencedImageTileSize + 1u; } else { @@ -2954,7 +2951,9 @@ GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const } GeoreferencedImageTileRange retVal = {}; - retVal.baseMipLevel = nbl::hlsl::findMSB(uint32_t(nbl::hlsl::floor(pixelRatio))); + // Clamp mip level so we don't consider tiles that are too small along one dimension + // If on a pathological case this gets too expensive because the GPU starts sampling a lot, we can consider changing this, but I doubt that will happen + retVal.baseMipLevel = nbl::hlsl::min(nbl::hlsl::findMSB(uint32_t(nbl::hlsl::floor(pixelRatio))), int32_t(imageStreamingState->maxMipLevel)); // Current tiles are measured in mip 0. We want the result to measure mip `retVal.baseMipLevel` tiles. Each next mip level divides by 2. minAllFloored >>= retVal.baseMipLevel; diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 42a080fce..d569005e6 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -127,7 +127,7 @@ struct DrawResourcesFiller { virtual core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel) = 0; - virtual uint32_t2 getExtents(std::filesystem::path imagePath) = 0; + virtual uint32_t2 getExtents(std::filesystem::path imagePath, uint32_t mipLevel) = 0; virtual asset::E_FORMAT getFormat(std::filesystem::path imagePath) = 0; }; @@ -139,7 +139,7 @@ struct DrawResourcesFiller uint32_t2 queryGeoreferencedImageExtents(std::filesystem::path imagePath) { - return georeferencedImageLoader->getExtents(imagePath); + return georeferencedImageLoader->getExtents(imagePath, 0); } asset::E_FORMAT queryGeoreferencedImageFormat(std::filesystem::path imagePath) diff --git a/62_CAD/Images.h b/62_CAD/Images.h index e40e9823f..8b380ad7e 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -123,7 +123,7 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted friend class DrawResourcesFiller; protected: - static smart_refctd_ptr create(GeoreferencedImageParams&& _georeferencedImageParams) + static smart_refctd_ptr create(GeoreferencedImageParams&& _georeferencedImageParams, uint32_t TileSize) { smart_refctd_ptr retVal(new GeoreferencedImageStreamingState{}); retVal->georeferencedImageParams = std::move(_georeferencedImageParams); @@ -151,6 +151,14 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // Put them all together retVal->world2UV = float64_t2x3(firstRow.x, firstRow.y, postRotatedShiftX, secondRow.x, secondRow.y, postRotatedShiftY); + + // Also set the maxMipLevel + uint32_t2 maxMipLevels = nbl::hlsl::findMSB(nbl::hlsl::roundUpToPoT(retVal->georeferencedImageParams.imageExtents / TileSize)); + retVal->maxMipLevel = nbl::hlsl::min(maxMipLevels.x, maxMipLevels.y); + + // Set max number of mip 0 tiles + retVal->fullImageTileLength = (retVal->georeferencedImageParams.imageExtents - 1u) / TileSize + 1u; + return retVal; } @@ -323,16 +331,20 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted return retVal; } - // Sidelength of the gpu image, in tiles that are `GeoreferencedImageTileSize` pixels wide + // Sidelength of the gpu image, in tiles that are `TileSize` pixels wide uint32_t gpuImageSideLengthTiles = {}; - // Size of the image in tiles of `GeoreferencedImageTileSize` sidelength + // We establish a max mipLevel for the image, which is the mip level at which any of width, height fit in a single Tile + uint32_t maxMipLevel = {}; + // Size of the image in tiles of `TileSize` sidelength uint32_t2 fullImageTileLength = {}; - // Set mip level to extreme value so it gets recreated on first iteration - GeoreferencedImageTileRange currentMappedRegion = { .baseMipLevel = std::numeric_limits::max() }; // Indicates on which tile of the gpu image the current mapped region's `topLeft` resides uint32_t2 gpuImageTopLeft = {}; // Converts a point (z = 1) in worldspace to UV coordinates in image space (origin shifted to topleft of the image) float64_t2x3 world2UV = {}; + // If the image dimensions are not exactly divisible by `TileSize`, then the last tile along a dimension only holds a proportion of `lastTileFraction` pixels along that dimension + float64_t lastTileFraction = {}; + // Set mip level to extreme value so it gets recreated on first iteration + GeoreferencedImageTileRange currentMappedRegion = { .baseMipLevel = std::numeric_limits::max() }; }; struct CachedImageRecord diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index ecd05e00d..ebc0a1e8a 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -367,8 +367,12 @@ bool performImageFormatPromotionCopy(const core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel) override { + auto mippedImageExtents = getExtents(imagePath, mipLevel); + // If `offset + extent` exceeds the extent of the image at the current mip level, we clamp it + extent = nbl::hlsl::min(mippedImageExtents - offset, extent); // Image path ignored for this hardcoded example const auto& image = mipLevels[mipLevel]; const auto& imageBuffer = image->getBuffer(); @@ -511,9 +515,9 @@ struct ImageLoader : public DrawResourcesFiller::IGeoreferencedImageLoader } } - uint32_t2 getExtents(std::filesystem::path imagePath) override + uint32_t2 getExtents(std::filesystem::path imagePath, uint32_t mipLevel) override { - uint32_t sidelength = mipLevels[0]->getCreationParameters().extent.width; + uint32_t sidelength = mipLevels[0]->getCreationParameters().extent.width >> mipLevel; return uint32_t2(sidelength, sidelength); } From dc322da29be98401c876fb3f4216c8f9afcec3a8 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 10 Sep 2025 18:24:35 -0300 Subject: [PATCH 18/29] Checkpoint: example mip level emulated computation --- 62_CAD/DrawResourcesFiller.cpp | 64 +++------------------------------- 62_CAD/DrawResourcesFiller.h | 2 +- 62_CAD/main.cpp | 41 ++++++++++++---------- 3 files changed, 28 insertions(+), 79 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 21ca0250c..2ddf5dafb 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -1565,7 +1565,7 @@ bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intende .baseArrayLayer = 0u, .layerCount = ICPUImageView::remaining_array_layers }, - .oldLayout = IImage::LAYOUT::UNDEFINED, + .oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, }); } @@ -2767,7 +2767,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( for (auto [imageTileIndex, gpuImageTileIndex] : tilesToLoad) { - auto tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, imageTileIndex * GeoreferencedImageTileSize, uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize), imageStreamingState->currentMappedRegion.baseMipLevel); + auto tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, imageTileIndex * GeoreferencedImageTileSize, uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize), imageStreamingState->currentMappedRegion.baseMipLevel, true); asset::IImage::SBufferCopy bufCopy; bufCopy.bufferOffset = 0; @@ -2786,7 +2786,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); // Upload the smaller tile to mip 1 - tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, imageTileIndex * GeoreferencedImageTileSizeMip1, uint32_t2(GeoreferencedImageTileSizeMip1, GeoreferencedImageTileSizeMip1), imageStreamingState->currentMappedRegion.baseMipLevel + 1); + tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, imageTileIndex * GeoreferencedImageTileSizeMip1, uint32_t2(GeoreferencedImageTileSizeMip1, GeoreferencedImageTileSizeMip1), imageStreamingState->currentMappedRegion.baseMipLevel, false); bufCopy = {}; bufCopy.bufferOffset = 0; @@ -2808,62 +2808,6 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; } - // Dangerous code - assumes image can be perfectly covered with tiles. Otherwise will need to handle edge cases - // TODO: All of this code only works for mip 0. Needs to be changed next to upload mip 1. - // Eventually this is all replaced by a few uploads to staging buffer + CS mip calc - for (uint32_t tileX = viewportTileRange.topLeftTile.x; tileX <= viewportTileRange.bottomRightTile.x; tileX++) - { - for (uint32_t tileY = viewportTileRange.topLeftTile.y; tileY <= viewportTileRange.bottomRightTile.y; tileY++) - { - // Compute tile offset relative to `currentMappedRegion.topLeftTile`, to get tile index into the gpu image - uint32_t2 gpuImageTileIndex = ((uint32_t2(tileX, tileY) - imageStreamingState->currentMappedRegion.topLeftTile) + imageStreamingState->gpuImageTopLeft) % imageStreamingState->gpuImageSideLengthTiles; - - // If tile already resident, do nothing - if (imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) - continue; - - auto tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, uint32_t2(tileX, tileY) * GeoreferencedImageTileSize, uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize), imageStreamingState->currentMappedRegion.baseMipLevel); - - asset::IImage::SBufferCopy bufCopy; - bufCopy.bufferOffset = 0; - bufCopy.bufferRowLength = GeoreferencedImageTileSize; - bufCopy.bufferImageHeight = 0; - bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; - bufCopy.imageSubresource.mipLevel = 0u; - bufCopy.imageSubresource.baseArrayLayer = 0u; - bufCopy.imageSubresource.layerCount = 1u; - uint32_t2 gpuImageOffset = gpuImageTileIndex * GeoreferencedImageTileSize; - bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; - bufCopy.imageExtent.width = GeoreferencedImageTileSize; - bufCopy.imageExtent.height = GeoreferencedImageTileSize; - bufCopy.imageExtent.depth = 1; - - tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); - - // Upload the smaller tile to mip 1 - tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, uint32_t2(tileX, tileY) * GeoreferencedImageTileSizeMip1, uint32_t2(GeoreferencedImageTileSizeMip1, GeoreferencedImageTileSizeMip1), imageStreamingState->currentMappedRegion.baseMipLevel + 1); - bufCopy = {}; - - bufCopy.bufferOffset = 0; - bufCopy.bufferRowLength = GeoreferencedImageTileSizeMip1; - bufCopy.bufferImageHeight = 0; - bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; - bufCopy.imageSubresource.mipLevel = 1u; - bufCopy.imageSubresource.baseArrayLayer = 0u; - bufCopy.imageSubresource.layerCount = 1u; - gpuImageOffset /= 2; // Half tile size! - bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; - bufCopy.imageExtent.width = GeoreferencedImageTileSizeMip1; - bufCopy.imageExtent.height = GeoreferencedImageTileSizeMip1; - bufCopy.imageExtent.depth = 1; - - tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); - - // Mark tile as resident - imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; - } - } - // Last, we need to figure out an obb that covers only the currently loaded tiles OrientedBoundingBox2D viewportEncompassingOBB = imageStreamingState->georeferencedImageParams.worldspaceOBB; @@ -2871,7 +2815,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // Dividing dirU by `maxImageTileIndices + (1,1)` we therefore get a vector that spans exactly one mip 0 tile (in the u direction) in worldspace. // Multiplying that by `2^mipLevel` we get a vector that spans exactly one mip `mipLevel` tile (in the u direction) const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageTileLength.x) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); - const float32_t2 fullImageDirV = float32_t2(imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.y, -imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.x); + const float32_t2 fullImageDirV = float32_t2(imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.y, -imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.x) * imageStreamingState->georeferencedImageParams.worldspaceOBB.aspectRatio; const float32_t2 oneTileDirV = fullImageDirV / float32_t(imageStreamingState->fullImageTileLength.y) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); viewportEncompassingOBB.topLeft += oneTileDirU * float32_t(viewportTileRange.topLeftTile.x); viewportEncompassingOBB.topLeft += oneTileDirV * float32_t(viewportTileRange.topLeftTile.y); diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index d569005e6..b381f996c 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -125,7 +125,7 @@ struct DrawResourcesFiller struct IGeoreferencedImageLoader : IReferenceCounted { - virtual core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel) = 0; + virtual core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) = 0; virtual uint32_t2 getExtents(std::filesystem::path imagePath, uint32_t mipLevel) = 0; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index ebc0a1e8a..ca60d5e3c 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -49,7 +49,7 @@ static constexpr bool DebugRotatingViewProj = false; static constexpr bool FragmentShaderPixelInterlock = true; static constexpr bool LargeGeoTextureStreaming = true; static constexpr bool CacheAndReplay = false; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic -static constexpr bool testCameraRotation = true; +static constexpr bool testCameraRotation = false; enum class ExampleMode { @@ -136,7 +136,7 @@ class Camera2D if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) { - m_bounds = m_bounds + float64_t2{ (double)ev.scrollEvent.verticalScroll * -0.0025 * m_aspectRatio, (double)ev.scrollEvent.verticalScroll * -0.0025}; + m_bounds = m_bounds + float64_t2{ (double)ev.scrollEvent.verticalScroll * -0.025 * m_aspectRatio, (double)ev.scrollEvent.verticalScroll * -0.025}; m_bounds = float64_t2{ core::max(m_aspectRatio, m_bounds.x), core::max(1.0, m_bounds.y) }; } } @@ -368,13 +368,13 @@ bool performImageFormatPromotionCopy(const core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel) override + core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) override { auto mippedImageExtents = getExtents(imagePath, mipLevel); // If `offset + extent` exceeds the extent of the image at the current mip level, we clamp it extent = nbl::hlsl::min(mippedImageExtents - offset, extent); // Image path ignored for this hardcoded example - const auto& image = mipLevels[mipLevel]; + const auto& image = downsample ? baseMipLevels[mipLevel] : downsampledMipLevels[mipLevel]; const auto& imageBuffer = image->getBuffer(); const core::rational bytesPerPixel = asset::getBytesPerPixel(image->getCreationParameters().format); const size_t bytesPerRow = (bytesPerPixel * extent.x).getIntegerApprox(); @@ -501,29 +501,33 @@ struct ImageLoader : public DrawResourcesFiller::IGeoreferencedImageLoader } }; - // TODO: Unhardcode - const std::string basePath = "../../media/tiled_grid_mip_"; - smart_refctd_ptr img = loadImage(basePath + "0.exr"); - const uint32_t sidelength = img->getCreationParameters().extent.width; + // This is all hardcoded for the example + const std::string basePath = "../../media/npot_geotex_mip_"; + smart_refctd_ptr img = loadImage(basePath + "0_base.png"); - const uint32_t maxMipLevel = nbl::hlsl::findMSB(sidelength / 128u); - mipLevels.reserve(maxMipLevel + 1); - mipLevels.emplace_back(std::move(img)); + // This is hardcoded + const uint32_t maxMipLevel = 7; + baseMipLevels.reserve(maxMipLevel + 1); + baseMipLevels.emplace_back(std::move(img)); for (auto i = 1u; i <= maxMipLevel; i++) { - mipLevels.emplace_back(loadImage(basePath + std::to_string(i) + ".exr")); + baseMipLevels.emplace_back(loadImage(basePath + std::to_string(i) + "_base.png")); + } + downsampledMipLevels.reserve(maxMipLevel + 1); + for (auto i = 0u; i <= maxMipLevel; i++) + { + downsampledMipLevels.emplace_back(loadImage(basePath + std::to_string(i) + "_downsampled.png")); } } uint32_t2 getExtents(std::filesystem::path imagePath, uint32_t mipLevel) override { - uint32_t sidelength = mipLevels[0]->getCreationParameters().extent.width >> mipLevel; - return uint32_t2(sidelength, sidelength); + return { baseMipLevels[mipLevel]->getCreationParameters().extent.width, baseMipLevels[mipLevel]->getCreationParameters().extent.height }; } asset::E_FORMAT getFormat(std::filesystem::path imagePath) override { - return mipLevels[0]->getCreationParameters().format; + return baseMipLevels[0]->getCreationParameters().format; } private: @@ -532,7 +536,8 @@ struct ImageLoader : public DrawResourcesFiller::IGeoreferencedImageLoader system::ILogger* m_logger = {}; video::IPhysicalDevice* m_physicalDevice = {}; // We're going to fake it in the example so it's easier to work with, but the interface remains - core::vector> mipLevels = {}; + core::vector> baseMipLevels = {}; + core::vector> downsampledMipLevels = {}; }; class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplication, public nbl::examples::BuiltinResourcesApplication @@ -3876,12 +3881,12 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio tiledGridParams.worldspaceOBB.topLeft = startingTopLeft; // Get 1 viewport pixel to match `startingImagePixelsPerViewportPixel` pixels of the image by choosing appropriate dirU - const static float64_t startingImagePixelsPerViewportPixels = 2.0; + const static float64_t startingImagePixelsPerViewportPixels = 1.0; const static auto startingViewportWidthVector = nbl::hlsl::mul(inverseViewProj, topRightViewportH - topLeftViewportH); const static auto dirU = startingViewportWidthVector * float64_t(drawResourcesFiller.queryGeoreferencedImageExtents(tiledGridPath).x) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); tiledGridParams.worldspaceOBB.dirU = dirU; - tiledGridParams.worldspaceOBB.aspectRatio = 1.0; tiledGridParams.imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(tiledGridPath); + tiledGridParams.worldspaceOBB.aspectRatio = float32_t(tiledGridParams.imageExtents.y) / tiledGridParams.imageExtents.x; tiledGridParams.viewportExtents = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; tiledGridParams.format = drawResourcesFiller.queryGeoreferencedImageFormat(tiledGridPath); tiledGridParams.storagePath = tiledGridPath; From 2be88a50d78ebb267366f8cc9325b6eddd5e09d3 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 12 Sep 2025 01:30:47 -0300 Subject: [PATCH 19/29] nPoT handled! --- 62_CAD/DrawResourcesFiller.cpp | 83 ++++++++++++++++++++++++++-------- 62_CAD/DrawResourcesFiller.h | 21 ++++++++- 62_CAD/Images.h | 26 +++++++++-- 62_CAD/main.cpp | 67 +++++++++++++++------------ 4 files changed, 145 insertions(+), 52 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 2ddf5dafb..d20a8c98c 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -2767,11 +2767,49 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( for (auto [imageTileIndex, gpuImageTileIndex] : tilesToLoad) { - auto tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, imageTileIndex * GeoreferencedImageTileSize, uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize), imageStreamingState->currentMappedRegion.baseMipLevel, true); + uint32_t2 gpuMip0Texels(GeoreferencedImageTileSize,GeoreferencedImageTileSize); + core::smart_refctd_ptr gpuMip0Tile = nullptr; + core::smart_refctd_ptr gpuMip1Tile = nullptr; + { + uint32_t2 georeferencedImageMip0SampledTexels = uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel; + const uint32_t2 georeferencedImageMip0SamplingOffset = (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel; + const uint32_t2 lastTileIndex = imageStreamingState->getLastTileIndex(viewportTileRange.baseMipLevel); + + if (imageTileIndex.x == lastTileIndex.x) + { + georeferencedImageMip0SampledTexels.x = imageStreamingState->georeferencedImageParams.imageExtents.x - georeferencedImageMip0SamplingOffset.x; + uint32_t gpuMip1Texels = georeferencedImageMip0SampledTexels.x >> (viewportTileRange.baseMipLevel + 1); + gpuMip0Texels.x = 2 * gpuMip1Texels; + imageStreamingState->lastGPUTileTexels.x = gpuMip0Texels.x; + } + if (imageTileIndex.y == lastTileIndex.y) + { + georeferencedImageMip0SampledTexels.y = imageStreamingState->georeferencedImageParams.imageExtents.y - georeferencedImageMip0SamplingOffset.y; + uint32_t gpuMip1Texels = georeferencedImageMip0SampledTexels.y >> (viewportTileRange.baseMipLevel + 1); + gpuMip0Texels.y = 2 * gpuMip1Texels; + imageStreamingState->lastGPUTileTexels.y = gpuMip0Texels.y; + } + + // If the last tile is too small just ignore it + // If this looks bad we can do fractional pixelage by moving the uv an even tinier amount but at high zoom levels it should be imperceptible + if (!imageStreamingState->lastGPUTileTexels.x || !imageStreamingState->lastGPUTileTexels.y) + continue; + if (!georeferencedImageLoader->hasPrecomputedMips(imageStreamingState->georeferencedImageParams.storagePath)) + { + gpuMip0Tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel, georeferencedImageMip0SampledTexels, gpuMip0Texels); + gpuMip1Tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel, georeferencedImageMip0SampledTexels, gpuMip0Texels / 2u); + } + else + { + gpuMip0Tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, imageTileIndex * GeoreferencedImageTileSize, gpuMip0Texels, imageStreamingState->currentMappedRegion.baseMipLevel, false); + gpuMip1Tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, imageTileIndex * GeoreferencedImageTileSizeMip1, gpuMip0Texels / 2u, imageStreamingState->currentMappedRegion.baseMipLevel, true); + } + } + asset::IImage::SBufferCopy bufCopy; bufCopy.bufferOffset = 0; - bufCopy.bufferRowLength = GeoreferencedImageTileSize; + bufCopy.bufferRowLength = gpuMip0Texels.x; bufCopy.bufferImageHeight = 0; bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; bufCopy.imageSubresource.mipLevel = 0u; @@ -2779,18 +2817,17 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( bufCopy.imageSubresource.layerCount = 1u; uint32_t2 gpuImageOffset = gpuImageTileIndex * GeoreferencedImageTileSize; bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; - bufCopy.imageExtent.width = GeoreferencedImageTileSize; - bufCopy.imageExtent.height = GeoreferencedImageTileSize; + bufCopy.imageExtent.width = gpuMip0Texels.x; + bufCopy.imageExtent.height = gpuMip0Texels.y; bufCopy.imageExtent.depth = 1; - tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); + tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(gpuMip0Tile), std::move(bufCopy)); // Upload the smaller tile to mip 1 - tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, imageTileIndex * GeoreferencedImageTileSizeMip1, uint32_t2(GeoreferencedImageTileSizeMip1, GeoreferencedImageTileSizeMip1), imageStreamingState->currentMappedRegion.baseMipLevel, false); bufCopy = {}; bufCopy.bufferOffset = 0; - bufCopy.bufferRowLength = GeoreferencedImageTileSizeMip1; + bufCopy.bufferRowLength = gpuMip0Texels.x / 2; bufCopy.bufferImageHeight = 0; bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; bufCopy.imageSubresource.mipLevel = 1u; @@ -2798,11 +2835,11 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( bufCopy.imageSubresource.layerCount = 1u; gpuImageOffset /= 2; // Half tile size! bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; - bufCopy.imageExtent.width = GeoreferencedImageTileSizeMip1; - bufCopy.imageExtent.height = GeoreferencedImageTileSizeMip1; + bufCopy.imageExtent.width = gpuMip0Texels.x / 2; + bufCopy.imageExtent.height = gpuMip0Texels.y / 2; bufCopy.imageExtent.depth = 1; - tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(tile), std::move(bufCopy)); + tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(gpuMip1Tile), std::move(bufCopy)); // Mark tile as resident imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; @@ -2821,19 +2858,25 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( viewportEncompassingOBB.topLeft += oneTileDirV * float32_t(viewportTileRange.topLeftTile.y); const uint32_t2 viewportTileLength = viewportTileRange.bottomRightTile - viewportTileRange.topLeftTile + uint32_t2(1, 1); - viewportEncompassingOBB.dirU = oneTileDirU * float32_t(viewportTileLength.x); - viewportEncompassingOBB.aspectRatio = float32_t(viewportTileLength.y) / float32_t(viewportTileLength.x); - + // If the last tile is visible, we use the fractional span for the last tile. Otherwise it's just a normal tile + const float32_t2 lastGeoreferencedImageTileFractionalSpan = float32_t2(imageStreamingState->lastGPUTileTexels) / float32_t(GeoreferencedImageTileSize); + const bool2 isLastTileLoaded = imageStreamingState->isLastTileVisible(viewportTileRange.bottomRightTile); + const float32_t2 lastGPUImageTileFractionalSpan = { isLastTileLoaded.x ? lastGeoreferencedImageTileFractionalSpan.x : 1.f, isLastTileLoaded.y ? lastGeoreferencedImageTileFractionalSpan.y : 1.f }; + + viewportEncompassingOBB.dirU = oneTileDirU * (float32_t(viewportTileLength.x - 1u) + lastGPUImageTileFractionalSpan.x); + viewportEncompassingOBB.aspectRatio = (float32_t(viewportTileLength.y - 1u) + lastGPUImageTileFractionalSpan.y) / (float32_t(viewportTileLength.x - 1u) + lastGPUImageTileFractionalSpan.x); + // UV logic currently ONLY works when the image not only fits an integer amount of tiles, but also when it's a PoT amount of them // (this means every mip level also gets an integer amount of tiles). // When porting to n4ce, for the image to fit an integer amount of tiles (instead of rewriting the logic) we can just pad the right/bottom sides with alpha=0 pixels // The UV logic will have to change to consider what happens to the last loaded tile (or, alternatively, we can also fill the empty tiles with alpha=0 pixels) - + // Compute minUV, maxUV const float32_t2 uvPerTile = float32_t2(1.f, 1.f) / float32_t2(imageStreamingState->gpuImageSideLengthTiles, imageStreamingState->gpuImageSideLengthTiles); - float32_t2 minUV = uvPerTile * float32_t2(((viewportTileRange.topLeftTile - imageStreamingState->currentMappedRegion.topLeftTile) + imageStreamingState->gpuImageTopLeft) % imageStreamingState->gpuImageSideLengthTiles); - float32_t2 maxUV = minUV + uvPerTile * float32_t2(viewportTileLength); - + const float32_t2 minUV = uvPerTile * float32_t2(((viewportTileRange.topLeftTile - imageStreamingState->currentMappedRegion.topLeftTile) + imageStreamingState->gpuImageTopLeft) % imageStreamingState->gpuImageSideLengthTiles); + float32_t2 maxUV = minUV + uvPerTile * float32_t2(viewportTileLength - 1u); + // uvPerTile is the uv per GeoreferencedImageTileSize pixels. Since the last tile might not be fully resident with pixels, we don't add the uv for it above and add the proper uv it should be sampled at here + maxUV += uvPerTile * lastGPUImageTileFractionalSpan; return TileUploadData{ std::move(tiles), viewportEncompassingOBB, minUV, maxUV }; } @@ -2903,9 +2946,11 @@ GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const minAllFloored >>= retVal.baseMipLevel; maxAllFloored >>= retVal.baseMipLevel; + // Clamp them to reasonable tile indices - retVal.topLeftTile = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2((imageStreamingState->fullImageTileLength - 1u) >> retVal.baseMipLevel)); - retVal.bottomRightTile = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), int32_t2((imageStreamingState->fullImageTileLength - 1u) >> retVal.baseMipLevel)); + int32_t2 lastTileIndex = imageStreamingState->getLastTileIndex(retVal.baseMipLevel); + retVal.topLeftTile = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), lastTileIndex); + retVal.bottomRightTile = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), lastTileIndex); return retVal; } \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index b381f996c..ce9a33fa8 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -123,13 +123,32 @@ struct DrawResourcesFiller } }; + // TODO: Figure out how to do this statically, this is essentially the same as having two templated versions and dynamic casting + // Probably CRTP/F-bound but it might be overkill struct IGeoreferencedImageLoader : IReferenceCounted { - virtual core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) = 0; + core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) + { + assert(hasPrecomputedMips(imagePath)); + return load_impl(imagePath, offset, extent, mipLevel, downsample); + } + + core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t2 targetExtent) + { + assert(!hasPrecomputedMips(imagePath)); + return load_impl(imagePath, offset, extent, targetExtent); + } virtual uint32_t2 getExtents(std::filesystem::path imagePath, uint32_t mipLevel) = 0; virtual asset::E_FORMAT getFormat(std::filesystem::path imagePath) = 0; + + virtual bool hasPrecomputedMips(std::filesystem::path imagePath) const = 0; + private: + + virtual core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) { return nullptr; } + + virtual core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t2 targetExtent) { return nullptr; } }; void setGeoreferencedImageLoader(core::smart_refctd_ptr&& _georeferencedImageLoader) diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 8b380ad7e..0da6dc0ea 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -159,11 +159,12 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // Set max number of mip 0 tiles retVal->fullImageTileLength = (retVal->georeferencedImageParams.imageExtents - 1u) / TileSize + 1u; + retVal->lastGPUTileTexels = {TileSize, TileSize}; + return retVal; } - GeoreferencedImageParams georeferencedImageParams = {}; - std::vector> currentMappedRegionOccupancy = {}; + // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial @@ -217,7 +218,7 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted currentMappedRegion.bottomRightTile += uint32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) - currentTileExtents; // This extension can cause the mapped region to fall out of bounds on border cases, therefore we clamp it and extend it in the other direction // by the amount of tiles we removed during clamping - const uint32_t2 excessTiles = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegion.bottomRightTile + 1u) - int32_t2(fullImageTileLength))); + const uint32_t2 excessTiles = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegion.bottomRightTile) - int32_t2(getLastTileIndex(currentMappedRegion.baseMipLevel)))); currentMappedRegion.bottomRightTile -= excessTiles; // Now, on some pathological cases (such as an image that is not long along one dimension but very long along the other) shifting of the topLeftTile // could fall out of bounds. So we shift if possible, otherwise set it to 0 @@ -317,7 +318,7 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // Given a tile range covering the viewport, returns which tiles (at the mip level of the current mapped region) need to be made resident to draw it, // returning a vector of `ImageTileToGPUTileCorrespondence`, each indicating that tile `imageTileIndex` in the full image needs to be uploaded to tile // `gpuImageTileIndex` in the gpu image - core::vector tilesToLoad(const GeoreferencedImageTileRange& viewportTileRange) + core::vector tilesToLoad(const GeoreferencedImageTileRange& viewportTileRange) const { core::vector retVal; for (uint32_t tileX = viewportTileRange.topLeftTile.x; tileX <= viewportTileRange.bottomRightTile.x; tileX++) @@ -331,6 +332,21 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted return retVal; } + // Returns the index of the last tile when covering the image with `mipLevel` tiles + uint32_t2 getLastTileIndex(uint32_t mipLevel) const + { + return (fullImageTileLength - 1u) >> mipLevel; + } + + bool2 isLastTileVisible(const uint32_t2 viewportBottomRightTile) const + { + const uint32_t2 lastTileIndex = getLastTileIndex(currentMappedRegion.baseMipLevel); + return bool2(lastTileIndex.x == viewportBottomRightTile.x, lastTileIndex.y == viewportBottomRightTile.y); + } + + GeoreferencedImageParams georeferencedImageParams = {}; + std::vector> currentMappedRegionOccupancy = {}; + // Sidelength of the gpu image, in tiles that are `TileSize` pixels wide uint32_t gpuImageSideLengthTiles = {}; // We establish a max mipLevel for the image, which is the mip level at which any of width, height fit in a single Tile @@ -343,6 +359,8 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted float64_t2x3 world2UV = {}; // If the image dimensions are not exactly divisible by `TileSize`, then the last tile along a dimension only holds a proportion of `lastTileFraction` pixels along that dimension float64_t lastTileFraction = {}; + // Stores the number of texels currently loaded into the last tile - this matters for tiles at the right/bottom borders + uint32_t2 lastGPUTileTexels = {}; // Set mip level to extreme value so it gets recreated on first iteration GeoreferencedImageTileRange currentMappedRegion = { .baseMipLevel = std::numeric_limits::max() }; }; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index ca60d5e3c..5e6310cbd 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -367,34 +367,6 @@ bool performImageFormatPromotionCopy(const core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) override - { - auto mippedImageExtents = getExtents(imagePath, mipLevel); - // If `offset + extent` exceeds the extent of the image at the current mip level, we clamp it - extent = nbl::hlsl::min(mippedImageExtents - offset, extent); - // Image path ignored for this hardcoded example - const auto& image = downsample ? baseMipLevels[mipLevel] : downsampledMipLevels[mipLevel]; - const auto& imageBuffer = image->getBuffer(); - const core::rational bytesPerPixel = asset::getBytesPerPixel(image->getCreationParameters().format); - const size_t bytesPerRow = (bytesPerPixel * extent.x).getIntegerApprox(); - const size_t loadedImageBytes = bytesPerRow * extent.y; - asset::IBuffer::SCreationParams bufCreationParams = { .size = loadedImageBytes, .usage = imageBuffer->getCreationParams().usage}; - ICPUBuffer::SCreationParams cpuBufCreationParams(std::move(bufCreationParams)); - core::smart_refctd_ptr retVal = ICPUBuffer::create(std::move(cpuBufCreationParams)); - - // Copy row by row into the new buffer - uint8_t* dataPtr = reinterpret_cast(retVal->getPointer()); - const uint8_t* imageBufferDataPtr = reinterpret_cast(imageBuffer->getPointer()); - const size_t bytesPerImageRow = (bytesPerPixel * image->getCreationParameters().extent.width).getIntegerApprox(); - for (auto row = 0u; row < extent.y; row++) - { - const size_t imageBufferOffset = bytesPerImageRow * (offset.y + row) + (bytesPerPixel * offset.x).getIntegerApprox(); - std::memcpy(dataPtr + row * bytesPerRow, imageBufferDataPtr + imageBufferOffset, bytesPerRow); - } - return retVal; - } - ImageLoader(asset::IAssetManager* assetMgr, system::ILogger* logger, video::IPhysicalDevice* physicalDevice) : m_assetMgr(assetMgr), m_logger(logger), m_physicalDevice(physicalDevice) { @@ -530,7 +502,42 @@ struct ImageLoader : public DrawResourcesFiller::IGeoreferencedImageLoader return baseMipLevels[0]->getCreationParameters().format; } + bool hasPrecomputedMips(std::filesystem::path imagePath) const override + { + return true; + } + private: + + // Assume offset always fits in the image, but maybe offset + extent doesn't + // Example of a precomputed mip loader with 2x mip levels + core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) override + { + auto mippedImageExtents = getExtents(imagePath, mipLevel); + // If `offset + extent` exceeds the extent of the image at the current mip level, we clamp it + extent = nbl::hlsl::min(mippedImageExtents - offset, extent); + // Image path ignored for this hardcoded example + const auto& image = downsample ? downsampledMipLevels[mipLevel] : baseMipLevels[mipLevel]; + const auto& imageBuffer = image->getBuffer(); + const core::rational bytesPerPixel = asset::getBytesPerPixel(image->getCreationParameters().format); + const size_t bytesPerRow = (bytesPerPixel * extent.x).getIntegerApprox(); + const size_t loadedImageBytes = bytesPerRow * extent.y; + asset::IBuffer::SCreationParams bufCreationParams = { .size = loadedImageBytes, .usage = imageBuffer->getCreationParams().usage }; + ICPUBuffer::SCreationParams cpuBufCreationParams(std::move(bufCreationParams)); + core::smart_refctd_ptr retVal = ICPUBuffer::create(std::move(cpuBufCreationParams)); + + // Copy row by row into the new buffer + uint8_t* dataPtr = reinterpret_cast(retVal->getPointer()); + const uint8_t* imageBufferDataPtr = reinterpret_cast(imageBuffer->getPointer()); + const size_t bytesPerImageRow = (bytesPerPixel * image->getCreationParameters().extent.width).getIntegerApprox(); + for (auto row = 0u; row < extent.y; row++) + { + const size_t imageBufferOffset = bytesPerImageRow * (offset.y + row) + (bytesPerPixel * offset.x).getIntegerApprox(); + std::memcpy(dataPtr + row * bytesPerRow, imageBufferDataPtr + imageBufferOffset, bytesPerRow); + } + return retVal; + } + // These are here for the example, might not be class members when porting to n4ce asset::IAssetManager* m_assetMgr = {}; system::ILogger* m_logger = {}; @@ -3884,6 +3891,10 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const static float64_t startingImagePixelsPerViewportPixels = 1.0; const static auto startingViewportWidthVector = nbl::hlsl::mul(inverseViewProj, topRightViewportH - topLeftViewportH); const static auto dirU = startingViewportWidthVector * float64_t(drawResourcesFiller.queryGeoreferencedImageExtents(tiledGridPath).x) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); + + // DEBUG + tiledGridParams.worldspaceOBB.topLeft += float32_t2(startingViewportWidthVector - dirU); + tiledGridParams.worldspaceOBB.dirU = dirU; tiledGridParams.imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(tiledGridPath); tiledGridParams.worldspaceOBB.aspectRatio = float32_t(tiledGridParams.imageExtents.y) / tiledGridParams.imageExtents.x; From 8a023798107556c5441b6695a91a12ef8d4b90da Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 15 Sep 2025 13:18:26 -0300 Subject: [PATCH 20/29] Cleanup, some precomputes --- 62_CAD/DrawResourcesFiller.cpp | 16 ++++----- 62_CAD/DrawResourcesFiller.h | 1 - 62_CAD/Images.h | 36 ++++++++++--------- .../shaders/main_pipeline/vertex_shader.hlsl | 6 ++-- 4 files changed, 30 insertions(+), 29 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index d20a8c98c..1e8ece738 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -2767,7 +2767,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( for (auto [imageTileIndex, gpuImageTileIndex] : tilesToLoad) { - uint32_t2 gpuMip0Texels(GeoreferencedImageTileSize,GeoreferencedImageTileSize); + uint32_t2 gpuMip0Texels(GeoreferencedImageTileSize, GeoreferencedImageTileSize); core::smart_refctd_ptr gpuMip0Tile = nullptr; core::smart_refctd_ptr gpuMip1Tile = nullptr; @@ -2781,19 +2781,20 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( georeferencedImageMip0SampledTexels.x = imageStreamingState->georeferencedImageParams.imageExtents.x - georeferencedImageMip0SamplingOffset.x; uint32_t gpuMip1Texels = georeferencedImageMip0SampledTexels.x >> (viewportTileRange.baseMipLevel + 1); gpuMip0Texels.x = 2 * gpuMip1Texels; - imageStreamingState->lastGPUTileTexels.x = gpuMip0Texels.x; + imageStreamingState->lastImageTileFractionalSpan.x = float32_t(gpuMip0Texels.x) / GeoreferencedImageTileSize; } if (imageTileIndex.y == lastTileIndex.y) { georeferencedImageMip0SampledTexels.y = imageStreamingState->georeferencedImageParams.imageExtents.y - georeferencedImageMip0SamplingOffset.y; uint32_t gpuMip1Texels = georeferencedImageMip0SampledTexels.y >> (viewportTileRange.baseMipLevel + 1); gpuMip0Texels.y = 2 * gpuMip1Texels; - imageStreamingState->lastGPUTileTexels.y = gpuMip0Texels.y; + imageStreamingState->lastImageTileFractionalSpan.y = float32_t(gpuMip0Texels.y) / GeoreferencedImageTileSize; } - // If the last tile is too small just ignore it + // If the last tile is too small just ignore it - given the way we set up stuff it's valid to check if these floats are exactly equal to 0, + // they're always a fraction of the form `x / GeoreferencedImageTileSize` with `0 <= x <= GeoreferencedImageTileSize` and `GeoreferencedImageTileSize` is PoT // If this looks bad we can do fractional pixelage by moving the uv an even tinier amount but at high zoom levels it should be imperceptible - if (!imageStreamingState->lastGPUTileTexels.x || !imageStreamingState->lastGPUTileTexels.y) + if ((imageStreamingState->lastImageTileFractionalSpan.x == 0.f) || (imageStreamingState->lastImageTileFractionalSpan.y == 0.f)) continue; if (!georeferencedImageLoader->hasPrecomputedMips(imageStreamingState->georeferencedImageParams.storagePath)) { @@ -2859,9 +2860,8 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( const uint32_t2 viewportTileLength = viewportTileRange.bottomRightTile - viewportTileRange.topLeftTile + uint32_t2(1, 1); // If the last tile is visible, we use the fractional span for the last tile. Otherwise it's just a normal tile - const float32_t2 lastGeoreferencedImageTileFractionalSpan = float32_t2(imageStreamingState->lastGPUTileTexels) / float32_t(GeoreferencedImageTileSize); - const bool2 isLastTileLoaded = imageStreamingState->isLastTileVisible(viewportTileRange.bottomRightTile); - const float32_t2 lastGPUImageTileFractionalSpan = { isLastTileLoaded.x ? lastGeoreferencedImageTileFractionalSpan.x : 1.f, isLastTileLoaded.y ? lastGeoreferencedImageTileFractionalSpan.y : 1.f }; + const bool2 isLastTileVisible = imageStreamingState->isLastTileVisible(viewportTileRange.bottomRightTile); + const float32_t2 lastGPUImageTileFractionalSpan = { isLastTileVisible.x ? imageStreamingState->lastImageTileFractionalSpan.x : 1.f, isLastTileVisible.y ? imageStreamingState->lastImageTileFractionalSpan.y : 1.f }; viewportEncompassingOBB.dirU = oneTileDirU * (float32_t(viewportTileLength.x - 1u) + lastGPUImageTileFractionalSpan.x); viewportEncompassingOBB.aspectRatio = (float32_t(viewportTileLength.y - 1u) + lastGPUImageTileFractionalSpan.y) / (float32_t(viewportTileLength.x - 1u) + lastGPUImageTileFractionalSpan.x); diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index ce9a33fa8..f5a8309b1 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -853,7 +853,6 @@ struct DrawResourcesFiller // Holds gpu image upload info (what tiles to upload and where to upload them), an obb that encompasses the viewport and uv coords into the gpu image // for the corners of that obb - struct TileUploadData { core::vector tiles; diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 0da6dc0ea..a48b8df5d 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -152,15 +152,16 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // Put them all together retVal->world2UV = float64_t2x3(firstRow.x, firstRow.y, postRotatedShiftX, secondRow.x, secondRow.y, postRotatedShiftY); - // Also set the maxMipLevel + // Also set the maxMipLevel - to keep stuff simple, we don't consider having less than one tile per dimension + // If you're zoomed out enough then at that point the whole image is just sampled as one tile along that dimension + // In pathological cases, such as images that are way bigger on one side than the other, this could cause aliasing and slow down sampling if zoomed out too much. + // If we were ever to observe such pathological cases, then maybe we should consider doing something else here. For example, making the loader able to handle different tile lengths per dimension + // (so for example a 128x64 tile) but again for now it should be left as-is. uint32_t2 maxMipLevels = nbl::hlsl::findMSB(nbl::hlsl::roundUpToPoT(retVal->georeferencedImageParams.imageExtents / TileSize)); retVal->maxMipLevel = nbl::hlsl::min(maxMipLevels.x, maxMipLevels.y); - // Set max number of mip 0 tiles retVal->fullImageTileLength = (retVal->georeferencedImageParams.imageExtents - 1u) / TileSize + 1u; - retVal->lastGPUTileTexels = {TileSize, TileSize}; - return retVal; } @@ -220,16 +221,17 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // by the amount of tiles we removed during clamping const uint32_t2 excessTiles = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegion.bottomRightTile) - int32_t2(getLastTileIndex(currentMappedRegion.baseMipLevel)))); currentMappedRegion.bottomRightTile -= excessTiles; - // Now, on some pathological cases (such as an image that is not long along one dimension but very long along the other) shifting of the topLeftTile - // could fall out of bounds. So we shift if possible, otherwise set it to 0 + // Shifting of the topLeftTile could fall out of bounds in pathological cases or at very high mip levels (zooming out too much), so we shift if possible, otherwise set it to 0 currentMappedRegion.topLeftTile = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegion.topLeftTile) - int32_t2(excessTiles))); + // Mark all gpu tiles as dirty currentMappedRegionOccupancy.resize(gpuImageSideLengthTiles); for (auto i = 0u; i < gpuImageSideLengthTiles; i++) { currentMappedRegionOccupancy[i].clear(); currentMappedRegionOccupancy[i].resize(gpuImageSideLengthTiles, false); } + // Reset state for gpu image so that it starts loading tiles at top left. Not really necessary. gpuImageTopLeft = uint32_t2(0, 0); } @@ -243,11 +245,11 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // `bottomRightShift` represents the same as above but in the other direction. const int32_t2 bottomRightShift = nbl::hlsl::max(int32_t2(0, 0), int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegion.bottomRightTile)); - // Mark dropped tiles as dirty/non-resident // The following is not necessarily equal to `gpuImageSideLengthTiles` since there can be pathological cases, as explained in the remapping method const uint32_t2 mappedRegionDimensions = currentMappedRegion.bottomRightTile - currentMappedRegion.topLeftTile + 1u; const uint32_t2 gpuImageBottomRight = (gpuImageTopLeft + mappedRegionDimensions - 1u) % gpuImageSideLengthTiles; + // Mark dropped tiles as dirty/non-resident if (topLeftShift.x < 0) { // Shift left @@ -256,8 +258,8 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted { // Get actual tile index with wraparound uint32_t tileIdx = (gpuImageBottomRight.x + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; - for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) - currentMappedRegionOccupancy[tileIdx][i] = false; + currentMappedRegionOccupancy[tileIdx].clear(); + currentMappedRegionOccupancy[tileIdx].resize(gpuImageSideLengthTiles, false); } } else if (bottomRightShift.x > 0) @@ -268,8 +270,8 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted { // Get actual tile index with wraparound uint32_t tileIdx = (tile + gpuImageTopLeft.x) % gpuImageSideLengthTiles; - for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) - currentMappedRegionOccupancy[tileIdx][i] = false; + currentMappedRegionOccupancy[tileIdx].clear(); + currentMappedRegionOccupancy[tileIdx].resize(gpuImageSideLengthTiles, false); } } @@ -338,6 +340,7 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted return (fullImageTileLength - 1u) >> mipLevel; } + // Returns whether the last tile in the image (along each dimension) is visible from the current viewport bool2 isLastTileVisible(const uint32_t2 viewportBottomRightTile) const { const uint32_t2 lastTileIndex = getLastTileIndex(currentMappedRegion.baseMipLevel); @@ -347,11 +350,11 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted GeoreferencedImageParams georeferencedImageParams = {}; std::vector> currentMappedRegionOccupancy = {}; - // Sidelength of the gpu image, in tiles that are `TileSize` pixels wide + // Sidelength of the gpu image, in mip 0 tiles that are `TileSize` (creation parameter) texels wide uint32_t gpuImageSideLengthTiles = {}; - // We establish a max mipLevel for the image, which is the mip level at which any of width, height fit in a single Tile + // We establish a max mipLevel for the image, which is the mip level at which any of width, height fit in a single tile uint32_t maxMipLevel = {}; - // Size of the image in tiles of `TileSize` sidelength + // Number of mip 0 tiles needed to cover the whole image, counting the last tile that might be fractional if the image size is not perfectly divisible by TileSize uint32_t2 fullImageTileLength = {}; // Indicates on which tile of the gpu image the current mapped region's `topLeft` resides uint32_t2 gpuImageTopLeft = {}; @@ -359,8 +362,9 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted float64_t2x3 world2UV = {}; // If the image dimensions are not exactly divisible by `TileSize`, then the last tile along a dimension only holds a proportion of `lastTileFraction` pixels along that dimension float64_t lastTileFraction = {}; - // Stores the number of texels currently loaded into the last tile - this matters for tiles at the right/bottom borders - uint32_t2 lastGPUTileTexels = {}; + // Reflects what fraction of a FULL tile the LAST tile in the image at the current mip level actually spans. + // It only gets set when necessary, and should always be updated correctly before being used, since it's related to the current `baseMipLevel` of the `currentMappedRegion` + float32_t2 lastImageTileFractionalSpan = {1.f, 1.f}; // Set mip level to extreme value so it gets recreated on first iteration GeoreferencedImageTileRange currentMappedRegion = { .baseMipLevel = std::numeric_limits::max() }; }; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index a22e1d883..6ee6c263f 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -743,12 +743,11 @@ PSInput vtxMain(uint vertexID : SV_VertexID) const float32_t2 dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); const float32_t aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float32_t2), 4u); const uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float32_t2) + sizeof(float32_t), 4u); + // Remember we are constructing a quad in worldspace whose corners are matched to a quad in our toroidally-resident gpu image. `minUV` and `maxUV` are used to indicate where to sample from + // the gpu image to reconstruct the toroidal quad. const float32_t2 minUV = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float32_t2) + sizeof(float32_t) + sizeof(uint32_t), 4u); const float32_t2 maxUV = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(float32_t2) + sizeof(float32_t) + sizeof(uint32_t), 4u); - //printf("%f %f", minUV.x, minUV.y); - //printf("%f %f", maxUV.x, maxUV.y); - const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; const float32_t2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); const float32_t2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); @@ -758,7 +757,6 @@ PSInput vtxMain(uint vertexID : SV_VertexID) const float32_t2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; const float32_t2 uv = float32_t2(corner.x ? maxUV.x : minUV.x, corner.y ? maxUV.y : minUV.y); - printf("%f %f", ndcCorner.x, ndcCorner.y); outV.position = float4(ndcCorner, 0.f, 1.f); outV.setImageUV(uv); From 73303836b839dbd1667ada6f930cadc792569537 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 15 Sep 2025 21:10:10 -0300 Subject: [PATCH 21/29] Some more brief updates --- 62_CAD/DrawResourcesFiller.cpp | 7 ++-- 62_CAD/DrawResourcesFiller.h | 60 +++++++++++++++++++++++++---- 62_CAD/Images.h | 69 ++++++++++++++++++++++++++-------- 3 files changed, 109 insertions(+), 27 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 1e8ece738..501fd50de 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -2692,7 +2692,6 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // Compute the mip level and tile range we would need to encompass the viewport // `viewportTileRange` is always should be a subset of `currentMappedRegion`, covering only the tiles visible in the viewport // This also computes the optimal mip level for these tiles (basically a measure of how zoomed in or out the viewport is from the image) - GeoreferencedImageTileRange viewportTileRange = computeViewportTileRange(NDCToWorld, imageStreamingState); // Slide or remap the current mapped region to ensure the viewport falls inside it @@ -2776,6 +2775,8 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( const uint32_t2 georeferencedImageMip0SamplingOffset = (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel; const uint32_t2 lastTileIndex = imageStreamingState->getLastTileIndex(viewportTileRange.baseMipLevel); + // If on the last tile, we might not load a full `GeoreferencedImageTileSize x GeoreferencedImageTileSize` tile, so we figure out how many pixels to load in this case to have + // minimal artifacts and no stretching if (imageTileIndex.x == lastTileIndex.x) { georeferencedImageMip0SampledTexels.x = imageStreamingState->georeferencedImageParams.imageExtents.x - georeferencedImageMip0SamplingOffset.x; @@ -2846,8 +2847,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; } - // Last, we need to figure out an obb that covers only the currently loaded tiles - + // Figure out an obb that covers only the currently loaded tiles OrientedBoundingBox2D viewportEncompassingOBB = imageStreamingState->georeferencedImageParams.worldspaceOBB; // The original image `dirU` corresponds to `maxImageTileIndices.x + 1` mip 0 tiles (provided it's exactly that length in tiles) // Dividing dirU by `maxImageTileIndices + (1,1)` we therefore get a vector that spans exactly one mip 0 tile (in the u direction) in worldspace. @@ -2878,7 +2878,6 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // uvPerTile is the uv per GeoreferencedImageTileSize pixels. Since the last tile might not be fully resident with pixels, we don't add the uv for it above and add the proper uv it should be sampled at here maxUV += uvPerTile * lastGPUImageTileFractionalSpan; return TileUploadData{ std::move(tiles), viewportEncompassingOBB, minUV, maxUV }; - } GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const float64_t3x3& NDCToWorld, const GeoreferencedImageStreamingState* imageStreamingState) diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index f5a8309b1..9a2a92051 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -123,34 +123,64 @@ struct DrawResourcesFiller } }; - // TODO: Figure out how to do this statically, this is essentially the same as having two templated versions and dynamic casting - // Probably CRTP/F-bound but it might be overkill + /// @brief Abstract class with two overridable methods to load a region of an image, either by requesting a region at a target extent (like the loaders in n4ce do) or to request a specific region from a mip level + // (like precomputed mips solution would use). struct IGeoreferencedImageLoader : IReferenceCounted { + /** + * @brief Load a region from an image - used to load from images with precomputed mips + * + * @param imagePath Path to file holding the image data + * @param offset Offset into the image (at requested mipLevel!) at which the region begins + * @param extent Extent of the region to load (at requested mipLevel!) + * @param mipLevel From which mip level image to retrieve the data from + * @param downsample True if this request is supposed to go into GPU mip level 1, false otherwise + * + * @return ICPUBuffer with the requested image data + */ core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) { assert(hasPrecomputedMips(imagePath)); return load_impl(imagePath, offset, extent, mipLevel, downsample); } + /** + * @brief Load a region from an image - used to load from images using the n4ce loaders. Loads a region given by `offset, extent` as an image of size `targetExtent` + * where `targetExtent <= extent` so the loader is in charge of downsampling. + * + * @param imagePath Path to file holding the image data + * @param offset Offset into the image at which the region begins + * @param extent Extent of the region to load + * @param targetExtent Extent of the resulting image. Should NEVER be bigger than `extent` + * + * @return ICPUBuffer with the requested image data + */ core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t2 targetExtent) { assert(!hasPrecomputedMips(imagePath)); return load_impl(imagePath, offset, extent, targetExtent); } + // @brief Get the extents (in texels) of an image's `mipLevel` mip. virtual uint32_t2 getExtents(std::filesystem::path imagePath, uint32_t mipLevel) = 0; + /** + * @brief Get the texel format for an image. + */ virtual asset::E_FORMAT getFormat(std::filesystem::path imagePath) = 0; + // @brief Returns whether the image should be loaded with the precomputed mip method or the n4ce loader method. virtual bool hasPrecomputedMips(std::filesystem::path imagePath) const = 0; private: + // @brief Override to support loading with precomputed mips virtual core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) { return nullptr; } + // @brief Override to support loading with n4ce-style loaders virtual core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t2 targetExtent) { return nullptr; } }; + // @brief Register a loader void setGeoreferencedImageLoader(core::smart_refctd_ptr&& _georeferencedImageLoader) { georeferencedImageLoader = _georeferencedImageLoader; @@ -840,19 +870,26 @@ struct DrawResourcesFiller uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); - // These are mip 0 pixels per tile, also size of each physical tile into the gpu resident image + // These are mip 0 texels of the image per tile, also size of each physical tile into the gpu resident image constexpr static uint32_t GeoreferencedImageTileSize = 128u; // Mip 1 tiles are naturally half the size constexpr static uint32_t GeoreferencedImageTileSizeMip1 = GeoreferencedImageTileSize / 2; // How many tiles of extra padding we give to the gpu image holding the tiles for a georeferenced image constexpr static uint32_t GeoreferencedImagePaddingTiles = 2; - // Returns a tile range that encompasses the whole viewport in "image-world". Tiles are measured in the mip level required to fit the viewport entirely - // withing the gpu image. + /* + * @brief Returns a tile range (+mip level) which is the smallest region of the image consisting of whole tiles (at specified mip level) that encompasses the current viewport + * + * @param NDCToWorld Affine matrix that represents a linear transform from NDC coordinates (related to viewport) to world coordinates. + * + * @param imageStreamingState Image for which we want to compute said tile range + */ GeoreferencedImageTileRange computeViewportTileRange(const float64_t3x3& NDCToWorld, const GeoreferencedImageStreamingState* imageStreamingState); - // Holds gpu image upload info (what tiles to upload and where to upload them), an obb that encompasses the viewport and uv coords into the gpu image - // for the corners of that obb + /* + * @struct TileUploadData + * @brief Holds gpu image upload info (what tiles to upload and where to upload them), an obb that encompasses the viewport and uv coords into the gpu image for the corners of that obb + */ struct TileUploadData { core::vector tiles; @@ -861,6 +898,15 @@ struct DrawResourcesFiller float32_t2 maxUV; }; + /* + * @brief Generates all the tile upload data needed to render the image on the current viewport + * + * @param imageType Type of the image (static or georeferenced) + * + * @param NDCToWorld Affine matrix that represents a linear transform from NDC coordinates (related to viewport) to world coordinates. + * + * @param imageStreamingState Image for which we want to generate the `TileUploadData` + */ // Right now it's generating tile-by-tile. Can be improved to produce at worst 4 different rectangles to load (depending on how we need to load tiles) TileUploadData generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld, GeoreferencedImageStreamingState* imageStreamingState); diff --git a/62_CAD/Images.h b/62_CAD/Images.h index a48b8df5d..77b31f4dc 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -22,13 +22,17 @@ enum class ImageType : uint8_t GEOREFERENCED_FULL_RESOLUTION // For smaller georeferenced images, entire image is eventually loaded and not streamed or view-dependant }; +/** + * @struct GeoreferencedImageParams + * @brief Info needed to add a georeferenced image. + */ struct GeoreferencedImageParams { - OrientedBoundingBox2D worldspaceOBB = {}; - uint32_t2 imageExtents = {}; - uint32_t2 viewportExtents = {}; - asset::E_FORMAT format = {}; - std::filesystem::path storagePath = {}; + OrientedBoundingBox2D worldspaceOBB = {}; // Position and extents of the image in worldspace + uint32_t2 imageExtents = {}; // Real extents (in texels) of the image + uint32_t2 viewportExtents = {}; // Extents (in pixels) of the viewport on which the image is to be displayed + asset::E_FORMAT format = {}; // Texel format of the image + std::filesystem::path storagePath = {}; // Path to the file where image data is stored }; /** @@ -123,6 +127,12 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted friend class DrawResourcesFiller; protected: + /* + * @brief Create a streaming state for a georeferenced image + * + * @param _georeferencedImageParams Info relating to the georeferenced image for which to create a streaming state. + * @param TileSize Size of the tiles used to break up the image. Also size of the tiles in the GPU image backing this georeferenced image. + */ static smart_refctd_ptr create(GeoreferencedImageParams&& _georeferencedImageParams, uint32_t TileSize) { smart_refctd_ptr retVal(new GeoreferencedImageStreamingState{}); @@ -169,10 +179,21 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial - float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoords) const { return nbl::hlsl::mul(world2UV, worldCoords); } - float64_t2 transformWorldCoordsToPixelCoords(const float64_t3 worldCoords) const { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoords); } - float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoords, const uint32_t TileSize) const { return (1.0 / TileSize) * transformWorldCoordsToPixelCoords(worldCoords); } + // @brief Transform worldspace coordinates into UV coordinates into the image + float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoords) const { return nbl::hlsl::mul(world2UV, worldCoords); } + // @brief Transform worldspace coordinates into texel coordinates into the image + float64_t2 transformWorldCoordsToTexelCoords(const float64_t3 worldCoords) const { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoords); } + // @brief Transform worldspace coordinates into tile coordinates into the image, where the image is broken up into tiles of size `TileSize` + float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoords, const uint32_t TileSize) const { return (1.0 / TileSize) * transformWorldCoordsToTexelCoords(worldCoords); } + + /* + * @brief The GPU image backs a mapped region which is a rectangular sub-region of the original image. Note that a region being mapped does NOT imply it's currently resident in GPU memory. + * To display the iomage on the screen, before even checking that the tiles needed to render the portion of the image currently visible are resident in GPU memory, we first must ensure that + * said region is included (as a sub-rectangle) in the mapped region. + * + * @param viewportTileRange Range of tiles + mip level indicating what sub-rectangle (and at which mip level) of the image is going to be visible from the viewport + */ void ensureMappedRegionCoversViewport(const GeoreferencedImageTileRange& viewportTileRange) { // A base mip level of x in the current mapped region means we can handle the viewport having mip level y, with x <= y < x + 1.0 @@ -192,13 +213,19 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted nbl::hlsl::abs(int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegion.bottomRightTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) ); + // If there is no overlap between previous mapped region and the next, just reset everything if (mipBoundaryCrossed || relativeShiftTooBig) remapCurrentRegion(viewportTileRange); + // Otherwise we can get away with (at worst) sliding the mapped region along the real image, preserving the residency of the tiles that overlap between previous mapped region and the next else slideCurrentRegion(viewportTileRange); } - // When the current mapped region is inadequate to fit the viewport, we compute a new mapped region + /* + * @brief Sets the mapped region into the image so it at least covers the sub-rectangle currently visible from the viewport. Also marks all gpu tiles dirty since none can be recycled + * + * @param viewportTileRange Range of tiles + mip level indicating a sub-rectangle of the image (visible from viewport) that the mapped region needs to cover + */ void remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) { // Zoomed out @@ -235,7 +262,12 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted gpuImageTopLeft = uint32_t2(0, 0); } - // Checks whether the viewport falls entirely withing the current mapped region and slides the latter otherwise, just enough until it covers the viewport + /* + * @brief Slides the mapped region along the image, marking the tiles dropped as dirty but preserving the residency for tiles that are inside both the previous and new mapped regions. + * Note that the checks for whether this is valid to do happen outside of this function. + * + * @param viewportTileRange Range of tiles + mip level indicating a sub-rectangle of the image (visible from viewport) that the mapped region needs to cover + */ void slideCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) { // `topLeftShift` represents how many tiles up and to the left we have to move the mapped region to fit the viewport. @@ -310,16 +342,19 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted gpuImageTopLeft = (gpuImageTopLeft + uint32_t2(topLeftShift + bottomRightShift + int32_t(gpuImageSideLengthTiles))) % gpuImageSideLengthTiles; } - // This can become a rectangle if we implement the by-rectangle upload instead of tile-by-tile to reduce loader calls + // @brief Info to match a gpu tile to the tile in the real image it should hold image data for struct ImageTileToGPUTileCorrespondence { uint32_t2 imageTileIndex; uint32_t2 gpuImageTileIndex; }; - // Given a tile range covering the viewport, returns which tiles (at the mip level of the current mapped region) need to be made resident to draw it, - // returning a vector of `ImageTileToGPUTileCorrespondence`, each indicating that tile `imageTileIndex` in the full image needs to be uploaded to tile - // `gpuImageTileIndex` in the gpu image + /* + * @brief Given a tile range covering the viewport, returns which tiles (at the mip level of the current mapped region) need to be made resident to draw it, and to which tile of the gpu image each tile should be + * uploaded to + * + * @param viewportTileRange Range of tiles + mip level indicating a sub-rectangle of the image covering the viewport + */ core::vector tilesToLoad(const GeoreferencedImageTileRange& viewportTileRange) const { core::vector retVal; @@ -327,20 +362,22 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted for (uint32_t tileY = viewportTileRange.topLeftTile.y; tileY <= viewportTileRange.bottomRightTile.y; tileY++) { uint32_t2 imageTileIndex = uint32_t2(tileX, tileY); + // Toroidal shift to find which gpu tile the image tile corresponds to uint32_t2 gpuImageTileIndex = ((imageTileIndex - currentMappedRegion.topLeftTile) + gpuImageTopLeft) % gpuImageSideLengthTiles; + // Don't bother scheduling an upload if the tile is already resident if (!currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) retVal.push_back({ imageTileIndex , gpuImageTileIndex }); } return retVal; } - // Returns the index of the last tile when covering the image with `mipLevel` tiles + // @brief Returns the index of the last tile when covering the image with `mipLevel` tiles uint32_t2 getLastTileIndex(uint32_t mipLevel) const { return (fullImageTileLength - 1u) >> mipLevel; } - // Returns whether the last tile in the image (along each dimension) is visible from the current viewport + // @brief Returns whether the last tile in the image (along each dimension) is visible from the current viewport bool2 isLastTileVisible(const uint32_t2 viewportBottomRightTile) const { const uint32_t2 lastTileIndex = getLastTileIndex(currentMappedRegion.baseMipLevel); From 452bee7be3f75ef84de3d7143e65ac385b7614ae Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 16 Sep 2025 16:52:40 -0300 Subject: [PATCH 22/29] Some minor refactors, added some padding to max tile comp for viewport since there's some numerical shenaningans I couldn't figure out --- 62_CAD/DrawResourcesFiller.cpp | 3 ++- 62_CAD/DrawResourcesFiller.h | 6 +++--- 62_CAD/Images.h | 4 +--- 62_CAD/main.cpp | 8 +++++++- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 501fd50de..a7b707976 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -2907,7 +2907,8 @@ GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const const float64_t2 maxTop = nbl::hlsl::max(topLeftTileLattice, topRightTileLattice); const float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice); - const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom); + // Edge case padding - there seems to be some numerical error going on when really close to tile boundaries + const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom) + float64_t2(0.5, 0.5); // Floor them to get an integer coordinate (index) for the tiles they fall in int32_t2 minAllFloored = nbl::hlsl::floor(minAll); diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 9a2a92051..cd25ab3a5 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -161,8 +161,8 @@ struct DrawResourcesFiller return load_impl(imagePath, offset, extent, targetExtent); } - // @brief Get the extents (in texels) of an image's `mipLevel` mip. - virtual uint32_t2 getExtents(std::filesystem::path imagePath, uint32_t mipLevel) = 0; + // @brief Get the extents (in texels) of an image. + virtual uint32_t2 getExtents(std::filesystem::path imagePath) = 0; /** * @brief Get the texel format for an image. @@ -188,7 +188,7 @@ struct DrawResourcesFiller uint32_t2 queryGeoreferencedImageExtents(std::filesystem::path imagePath) { - return georeferencedImageLoader->getExtents(imagePath, 0); + return georeferencedImageLoader->getExtents(imagePath); } asset::E_FORMAT queryGeoreferencedImageFormat(std::filesystem::path imagePath) diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 77b31f4dc..2619e33ed 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -148,7 +148,7 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression // Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal) const float64_t2 dirU = retVal->georeferencedImageParams.worldspaceOBB.dirU; - const float64_t2 dirV = float32_t2(dirU.y, -dirU.x) * retVal->georeferencedImageParams.worldspaceOBB.aspectRatio; + const float64_t2 dirV = float64_t2(dirU.y, -dirU.x) * float64_t(retVal->georeferencedImageParams.worldspaceOBB.aspectRatio); const float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); const float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); const float64_t2 firstRow = dirU / dirULengthSquared; @@ -175,8 +175,6 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted return retVal; } - - // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 5e6310cbd..97da0b51d 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -492,11 +492,16 @@ struct ImageLoader : public DrawResourcesFiller::IGeoreferencedImageLoader } } - uint32_t2 getExtents(std::filesystem::path imagePath, uint32_t mipLevel) override + uint32_t2 getExtents(std::filesystem::path imagePath, uint32_t mipLevel) { return { baseMipLevels[mipLevel]->getCreationParameters().extent.width, baseMipLevels[mipLevel]->getCreationParameters().extent.height }; } + uint32_t2 getExtents(std::filesystem::path imagePath) override + { + return getExtents(imagePath, 0); + } + asset::E_FORMAT getFormat(std::filesystem::path imagePath) override { return baseMipLevels[0]->getCreationParameters().format; @@ -513,6 +518,7 @@ struct ImageLoader : public DrawResourcesFiller::IGeoreferencedImageLoader // Example of a precomputed mip loader with 2x mip levels core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) override { + // Hardcoded tile size that's not accessible auto mippedImageExtents = getExtents(imagePath, mipLevel); // If `offset + extent` exceeds the extent of the image at the current mip level, we clamp it extent = nbl::hlsl::min(mippedImageExtents - offset, extent); From 932cb74d38acffc53a39bbd3a1a638e29b3603b5 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 17 Sep 2025 17:07:22 -0300 Subject: [PATCH 23/29] Mirrored changes on n4ce after PR review --- 62_CAD/CMakeLists.txt | 1 + 62_CAD/DrawResourcesFiller.cpp | 4 +- 62_CAD/Images.cpp | 196 ++++++++++++++++++++++++++++++++ 62_CAD/Images.h | 198 ++------------------------------- 4 files changed, 208 insertions(+), 191 deletions(-) create mode 100644 62_CAD/Images.cpp diff --git a/62_CAD/CMakeLists.txt b/62_CAD/CMakeLists.txt index c3a0fa47e..756965c85 100644 --- a/62_CAD/CMakeLists.txt +++ b/62_CAD/CMakeLists.txt @@ -16,6 +16,7 @@ set(EXAMPLE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/SingleLineText.h" "${CMAKE_CURRENT_SOURCE_DIR}/GeoTexture.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/GeoTexture.h" + "${CMAKE_CURRENT_SOURCE_DIR}/Images.cpp" "../../src/nbl/ext/TextRendering/TextRendering.cpp" # TODO: this one will be a part of dedicated Nabla ext called "TextRendering" later on which uses MSDF + Freetype ) set(EXAMPLE_INCLUDES diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index a7b707976..cd75dfe77 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -2513,7 +2513,9 @@ ImageType DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::as // Decide whether the image can reside fully into memory rather than get streamed. // TODO: Improve logic, currently just a simple check to see if the full-screen image has more pixels that viewport or not // TODO: add criterial that the size of the full-res image shouldn't consume more than 30% of the total memory arena for images (if we allowed larger than viewport extents) - const bool betterToResideFullyInMem = params.imageExtents.x * params.imageExtents.y <= params.viewportExtents.x * params.viewportExtents.y; + const size_t mainImagePixels = ((size_t)params.imageExtents.x * (size_t)params.imageExtents.y); + const size_t viewportImagePixels = ((size_t)params.viewportExtents.x * (size_t)params.viewportExtents.y); + const bool betterToResideFullyInMem = params.imageExtents.x < (2 ^ 14) && params.imageExtents.y < (2 ^ 14) && mainImagePixels <= viewportImagePixels; ImageType imageType; diff --git a/62_CAD/Images.cpp b/62_CAD/Images.cpp new file mode 100644 index 000000000..38fac0069 --- /dev/null +++ b/62_CAD/Images.cpp @@ -0,0 +1,196 @@ +#include "Images.h" + +using namespace nbl::hlsl; + +smart_refctd_ptr GeoreferencedImageStreamingState::create(GeoreferencedImageParams&& _georeferencedImageParams, uint32_t TileSize) +{ + smart_refctd_ptr retVal(new GeoreferencedImageStreamingState{}); + retVal->georeferencedImageParams = std::move(_georeferencedImageParams); + // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point + // 2. Transform this displacement vector into the coordinates in the basis {dirU, dirV} (worldspace vectors that span the sides of the image). + // The composition of these matrices therefore transforms any point in worldspace into uv coordinates in imagespace + // To reduce code complexity, instead of computing the product of these matrices, since the first is a pure displacement matrix + // (non-homogenous 2x2 upper left is identity matrix) and the other is a pure rotation matrix (2x2) we can just put them together + // by putting the rotation in the upper left 2x2 of the result and the post-rotated displacement in the upper right 2x1. + // The result is also 2x3 and not 3x3 because we can drop he homogenous since the displacement yields a vector + + // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression + // Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal) + const float64_t2 dirU = retVal->georeferencedImageParams.worldspaceOBB.dirU; + const float64_t2 dirV = float64_t2(dirU.y, -dirU.x) * float64_t(retVal->georeferencedImageParams.worldspaceOBB.aspectRatio); + const float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); + const float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); + const float64_t2 firstRow = dirU / dirULengthSquared; + const float64_t2 secondRow = dirV / dirVLengthSquared; + + const float64_t2 displacement = -retVal->georeferencedImageParams.worldspaceOBB.topLeft; + // This is the same as multiplying the change of basis matrix by the displacement vector + const float64_t postRotatedShiftX = nbl::hlsl::dot(firstRow, displacement); + const float64_t postRotatedShiftY = nbl::hlsl::dot(secondRow, displacement); + + // Put them all together + retVal->world2UV = float64_t2x3(firstRow.x, firstRow.y, postRotatedShiftX, secondRow.x, secondRow.y, postRotatedShiftY); + + // Also set the maxMipLevel - to keep stuff simple, we don't consider having less than one tile per dimension + // If you're zoomed out enough then at that point the whole image is just sampled as one tile along that dimension + // In pathological cases, such as images that are way bigger on one side than the other, this could cause aliasing and slow down sampling if zoomed out too much. + // If we were ever to observe such pathological cases, then maybe we should consider doing something else here. For example, making the loader able to handle different tile lengths per dimension + // (so for example a 128x64 tile) but again for now it should be left as-is. + uint32_t2 maxMipLevels = nbl::hlsl::findMSB(nbl::hlsl::roundUpToPoT(retVal->georeferencedImageParams.imageExtents / TileSize)); + retVal->maxMipLevel = nbl::hlsl::min(maxMipLevels.x, maxMipLevels.y); + + retVal->fullImageTileLength = (retVal->georeferencedImageParams.imageExtents - 1u) / TileSize + 1u; + + return retVal; +} + +void GeoreferencedImageStreamingState::ensureMappedRegionCoversViewport(const GeoreferencedImageTileRange& viewportTileRange) +{ + // A base mip level of x in the current mapped region means we can handle the viewport having mip level y, with x <= y < x + 1.0 + // without needing to remap the region. When the user starts zooming in or out and the mip level of the viewport falls outside this range, we have to remap + // the mapped region. + const bool mipBoundaryCrossed = viewportTileRange.baseMipLevel != currentMappedRegion.baseMipLevel; + + // If we moved a huge amount in any direction, no tiles will remain resident, so we simply reset state + // This only need be evaluated if the mip boundary was not already crossed + const bool relativeShiftTooBig = !mipBoundaryCrossed && + nbl::hlsl::any + ( + nbl::hlsl::abs(int32_t2(viewportTileRange.topLeftTile) - int32_t2(currentMappedRegion.topLeftTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) + ) + || nbl::hlsl::any + ( + nbl::hlsl::abs(int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegion.bottomRightTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) + ); + + // If there is no overlap between previous mapped region and the next, just reset everything + if (mipBoundaryCrossed || relativeShiftTooBig) + remapCurrentRegion(viewportTileRange); + // Otherwise we can get away with (at worst) sliding the mapped region along the real image, preserving the residency of the tiles that overlap between previous mapped region and the next + else + slideCurrentRegion(viewportTileRange); +} + +void GeoreferencedImageStreamingState::remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) +{ + // Zoomed out + if (viewportTileRange.baseMipLevel > currentMappedRegion.baseMipLevel) + { + // TODO: Here we would move some mip 1 tiles to mip 0 image to save the work of reuploading them, reflect that in the tracked tiles + } + // Zoomed in + else if (viewportTileRange.baseMipLevel < currentMappedRegion.baseMipLevel) + { + // TODO: Here we would move some mip 0 tiles to mip 1 image to save the work of reuploading them, reflect that in the tracked tiles + } + currentMappedRegion = viewportTileRange; + // We can expand the currentMappedRegion to make it as big as possible, at no extra cost since we only upload tiles on demand + // Since we use toroidal updating it's kinda the same which way we expand the region. We first try to make the extent be `gpuImageSideLengthTiles` + currentMappedRegion.bottomRightTile = currentMappedRegion.topLeftTile + uint32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) - uint32_t2(1, 1); + // This extension can cause the mapped region to fall out of bounds on border cases, therefore we clamp it and extend it in the other direction + // by the amount of tiles we removed during clamping + const uint32_t2 excessTiles = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegion.bottomRightTile) - int32_t2(getLastTileIndex(currentMappedRegion.baseMipLevel)))); + currentMappedRegion.bottomRightTile -= excessTiles; + // Shifting of the topLeftTile could fall out of bounds in pathological cases or at very high mip levels (zooming out too much), so we shift if possible, otherwise set it to 0 + currentMappedRegion.topLeftTile = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegion.topLeftTile) - int32_t2(excessTiles))); + + // Mark all gpu tiles as dirty + currentMappedRegionOccupancy.resize(gpuImageSideLengthTiles); + for (auto i = 0u; i < gpuImageSideLengthTiles; i++) + { + currentMappedRegionOccupancy[i].clear(); + currentMappedRegionOccupancy[i].resize(gpuImageSideLengthTiles, false); + } + // Reset state for gpu image so that it starts loading tiles at top left. Not really necessary. + gpuImageTopLeft = uint32_t2(0, 0); +} + +void GeoreferencedImageStreamingState::slideCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) +{ + // `topLeftShift` represents how many tiles up and to the left we have to move the mapped region to fit the viewport. + // First we compute a vector from the current mapped region's topleft to the viewport's topleft. If this vector is positive along a dimension it means + // the viewport's topleft is to the right or below the current mapped region's topleft, so we don't have to shift the mapped region to the left/up in that case + const int32_t2 topLeftShift = nbl::hlsl::min(int32_t2(0, 0), int32_t2(viewportTileRange.topLeftTile) - int32_t2(currentMappedRegion.topLeftTile)); + // `bottomRightShift` represents the same as above but in the other direction. + const int32_t2 bottomRightShift = nbl::hlsl::max(int32_t2(0, 0), int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegion.bottomRightTile)); + + // The following is not necessarily equal to `gpuImageSideLengthTiles` since there can be pathological cases, as explained in the remapping method + const uint32_t2 mappedRegionDimensions = currentMappedRegion.bottomRightTile - currentMappedRegion.topLeftTile + 1u; + const uint32_t2 gpuImageBottomRight = (gpuImageTopLeft + mappedRegionDimensions - 1u) % gpuImageSideLengthTiles; + + // Mark dropped tiles as dirty/non-resident + if (topLeftShift.x < 0) + { + // Shift left + const uint32_t tilesToFit = -topLeftShift.x; + for (uint32_t tile = 0; tile < tilesToFit; tile++) + { + // Get actual tile index with wraparound + uint32_t tileIdx = (gpuImageBottomRight.x + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; + currentMappedRegionOccupancy[tileIdx].clear(); + currentMappedRegionOccupancy[tileIdx].resize(gpuImageSideLengthTiles, false); + } + } + else if (bottomRightShift.x > 0) + { + //Shift right + const uint32_t tilesToFit = bottomRightShift.x; + for (uint32_t tile = 0; tile < tilesToFit; tile++) + { + // Get actual tile index with wraparound + uint32_t tileIdx = (tile + gpuImageTopLeft.x) % gpuImageSideLengthTiles; + currentMappedRegionOccupancy[tileIdx].clear(); + currentMappedRegionOccupancy[tileIdx].resize(gpuImageSideLengthTiles, false); + } + } + + if (topLeftShift.y < 0) + { + // Shift up + const uint32_t tilesToFit = -topLeftShift.y; + for (uint32_t tile = 0; tile < tilesToFit; tile++) + { + // Get actual tile index with wraparound + uint32_t tileIdx = (gpuImageBottomRight.y + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; + for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) + currentMappedRegionOccupancy[i][tileIdx] = false; + } + } + else if (bottomRightShift.y > 0) + { + //Shift down + const uint32_t tilesToFit = bottomRightShift.y; + for (uint32_t tile = 0; tile < tilesToFit; tile++) + { + // Get actual tile index with wraparound + uint32_t tileIdx = (tile + gpuImageTopLeft.y) % gpuImageSideLengthTiles; + for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) + currentMappedRegionOccupancy[i][tileIdx] = false; + } + } + + // Shift the mapped region accordingly + // A nice consequence of the mapped region being always maximally - sized is that + // along any dimension, only a shift in one direction is necessary, so we can simply add up the shifts + currentMappedRegion.topLeftTile = uint32_t2(int32_t2(currentMappedRegion.topLeftTile) + topLeftShift + bottomRightShift); + currentMappedRegion.bottomRightTile = uint32_t2(int32_t2(currentMappedRegion.bottomRightTile) + topLeftShift + bottomRightShift); + + // Toroidal shift for the gpu image top left + gpuImageTopLeft = (gpuImageTopLeft + uint32_t2(topLeftShift + bottomRightShift + int32_t(gpuImageSideLengthTiles))) % gpuImageSideLengthTiles; +} + +core::vector GeoreferencedImageStreamingState::tilesToLoad(const GeoreferencedImageTileRange& viewportTileRange) const +{ + core::vector retVal; + for (uint32_t tileY = viewportTileRange.topLeftTile.y; tileY <= viewportTileRange.bottomRightTile.y; tileY++) + for (uint32_t tileX = viewportTileRange.topLeftTile.x; tileX <= viewportTileRange.bottomRightTile.x; tileX++) + { + uint32_t2 imageTileIndex = uint32_t2(tileX, tileY); + // Toroidal shift to find which gpu tile the image tile corresponds to + uint32_t2 gpuImageTileIndex = ((imageTileIndex - currentMappedRegion.topLeftTile) + gpuImageTopLeft) % gpuImageSideLengthTiles; + // Don't bother scheduling an upload if the tile is already resident + if (!currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) + retVal.push_back({ imageTileIndex , gpuImageTileIndex }); + } + return retVal; +} \ No newline at end of file diff --git a/62_CAD/Images.h b/62_CAD/Images.h index 2619e33ed..df3eed4b6 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -1,4 +1,7 @@ #pragma once + +#include "shaders/globals.hlsl" + using namespace nbl; using namespace nbl::video; using namespace nbl::core; @@ -133,47 +136,7 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted * @param _georeferencedImageParams Info relating to the georeferenced image for which to create a streaming state. * @param TileSize Size of the tiles used to break up the image. Also size of the tiles in the GPU image backing this georeferenced image. */ - static smart_refctd_ptr create(GeoreferencedImageParams&& _georeferencedImageParams, uint32_t TileSize) - { - smart_refctd_ptr retVal(new GeoreferencedImageStreamingState{}); - retVal->georeferencedImageParams = std::move(_georeferencedImageParams); - // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point - // 2. Transform this displacement vector into the coordinates in the basis {dirU, dirV} (worldspace vectors that span the sides of the image). - // The composition of these matrices therefore transforms any point in worldspace into uv coordinates in imagespace - // To reduce code complexity, instead of computing the product of these matrices, since the first is a pure displacement matrix - // (non-homogenous 2x2 upper left is identity matrix) and the other is a pure rotation matrix (2x2) we can just put them together - // by putting the rotation in the upper left 2x2 of the result and the post-rotated displacement in the upper right 2x1. - // The result is also 2x3 and not 3x3 because we can drop he homogenous since the displacement yields a vector - - // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression - // Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal) - const float64_t2 dirU = retVal->georeferencedImageParams.worldspaceOBB.dirU; - const float64_t2 dirV = float64_t2(dirU.y, -dirU.x) * float64_t(retVal->georeferencedImageParams.worldspaceOBB.aspectRatio); - const float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); - const float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); - const float64_t2 firstRow = dirU / dirULengthSquared; - const float64_t2 secondRow = dirV / dirVLengthSquared; - - const float64_t2 displacement = - retVal->georeferencedImageParams.worldspaceOBB.topLeft; - // This is the same as multiplying the change of basis matrix by the displacement vector - const float64_t postRotatedShiftX = nbl::hlsl::dot(firstRow, displacement); - const float64_t postRotatedShiftY = nbl::hlsl::dot(secondRow, displacement); - - // Put them all together - retVal->world2UV = float64_t2x3(firstRow.x, firstRow.y, postRotatedShiftX, secondRow.x, secondRow.y, postRotatedShiftY); - - // Also set the maxMipLevel - to keep stuff simple, we don't consider having less than one tile per dimension - // If you're zoomed out enough then at that point the whole image is just sampled as one tile along that dimension - // In pathological cases, such as images that are way bigger on one side than the other, this could cause aliasing and slow down sampling if zoomed out too much. - // If we were ever to observe such pathological cases, then maybe we should consider doing something else here. For example, making the loader able to handle different tile lengths per dimension - // (so for example a 128x64 tile) but again for now it should be left as-is. - uint32_t2 maxMipLevels = nbl::hlsl::findMSB(nbl::hlsl::roundUpToPoT(retVal->georeferencedImageParams.imageExtents / TileSize)); - retVal->maxMipLevel = nbl::hlsl::min(maxMipLevels.x, maxMipLevels.y); - - retVal->fullImageTileLength = (retVal->georeferencedImageParams.imageExtents - 1u) / TileSize + 1u; - - return retVal; - } + static smart_refctd_ptr create(GeoreferencedImageParams&& _georeferencedImageParams, uint32_t TileSize); // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial @@ -192,73 +155,14 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted * * @param viewportTileRange Range of tiles + mip level indicating what sub-rectangle (and at which mip level) of the image is going to be visible from the viewport */ - void ensureMappedRegionCoversViewport(const GeoreferencedImageTileRange& viewportTileRange) - { - // A base mip level of x in the current mapped region means we can handle the viewport having mip level y, with x <= y < x + 1.0 - // without needing to remap the region. When the user starts zooming in or out and the mip level of the viewport falls outside this range, we have to remap - // the mapped region. - const bool mipBoundaryCrossed = viewportTileRange.baseMipLevel != currentMappedRegion.baseMipLevel; - - // If we moved a huge amount in any direction, no tiles will remain resident, so we simply reset state - // This only need be evaluated if the mip boundary was not already crossed - const bool relativeShiftTooBig = !mipBoundaryCrossed && - nbl::hlsl::any - ( - nbl::hlsl::abs(int32_t2(viewportTileRange.topLeftTile) - int32_t2(currentMappedRegion.topLeftTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) - ) - || nbl::hlsl::any - ( - nbl::hlsl::abs(int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegion.bottomRightTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) - ); - - // If there is no overlap between previous mapped region and the next, just reset everything - if (mipBoundaryCrossed || relativeShiftTooBig) - remapCurrentRegion(viewportTileRange); - // Otherwise we can get away with (at worst) sliding the mapped region along the real image, preserving the residency of the tiles that overlap between previous mapped region and the next - else - slideCurrentRegion(viewportTileRange); - } + void ensureMappedRegionCoversViewport(const GeoreferencedImageTileRange& viewportTileRange); /* * @brief Sets the mapped region into the image so it at least covers the sub-rectangle currently visible from the viewport. Also marks all gpu tiles dirty since none can be recycled * * @param viewportTileRange Range of tiles + mip level indicating a sub-rectangle of the image (visible from viewport) that the mapped region needs to cover */ - void remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) - { - // Zoomed out - if (viewportTileRange.baseMipLevel > currentMappedRegion.baseMipLevel) - { - // TODO: Here we would move some mip 1 tiles to mip 0 image to save the work of reuploading them, reflect that in the tracked tiles - } - // Zoomed in - else if (viewportTileRange.baseMipLevel < currentMappedRegion.baseMipLevel) - { - // TODO: Here we would move some mip 0 tiles to mip 1 image to save the work of reuploading them, reflect that in the tracked tiles - } - currentMappedRegion = viewportTileRange; - // We can expand the currentMappedRegion to make it as big as possible, at no extra cost since we only upload tiles on demand - // Since we use toroidal updating it's kinda the same which way we expand the region. We first tryo to expand it downwards to the right - const uint32_t2 currentTileExtents = currentMappedRegion.bottomRightTile - currentMappedRegion.topLeftTile + uint32_t2(1, 1); - // Extend extent up to `gpuImageSideLengthTiles` by moving the `bottomRightTile` an appropriate amount downwards to the right - currentMappedRegion.bottomRightTile += uint32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) - currentTileExtents; - // This extension can cause the mapped region to fall out of bounds on border cases, therefore we clamp it and extend it in the other direction - // by the amount of tiles we removed during clamping - const uint32_t2 excessTiles = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegion.bottomRightTile) - int32_t2(getLastTileIndex(currentMappedRegion.baseMipLevel)))); - currentMappedRegion.bottomRightTile -= excessTiles; - // Shifting of the topLeftTile could fall out of bounds in pathological cases or at very high mip levels (zooming out too much), so we shift if possible, otherwise set it to 0 - currentMappedRegion.topLeftTile = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegion.topLeftTile) - int32_t2(excessTiles))); - - // Mark all gpu tiles as dirty - currentMappedRegionOccupancy.resize(gpuImageSideLengthTiles); - for (auto i = 0u; i < gpuImageSideLengthTiles; i++) - { - currentMappedRegionOccupancy[i].clear(); - currentMappedRegionOccupancy[i].resize(gpuImageSideLengthTiles, false); - } - // Reset state for gpu image so that it starts loading tiles at top left. Not really necessary. - gpuImageTopLeft = uint32_t2(0, 0); - } + void remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange); /* * @brief Slides the mapped region along the image, marking the tiles dropped as dirty but preserving the residency for tiles that are inside both the previous and new mapped regions. @@ -266,79 +170,7 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted * * @param viewportTileRange Range of tiles + mip level indicating a sub-rectangle of the image (visible from viewport) that the mapped region needs to cover */ - void slideCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) - { - // `topLeftShift` represents how many tiles up and to the left we have to move the mapped region to fit the viewport. - // First we compute a vector from the current mapped region's topleft to the viewport's topleft. If this vector is positive along a dimension it means - // the viewport's topleft is to the right or below the current mapped region's topleft, so we don't have to shift the mapped region to the left/up in that case - const int32_t2 topLeftShift = nbl::hlsl::min(int32_t2(0, 0), int32_t2(viewportTileRange.topLeftTile) - int32_t2(currentMappedRegion.topLeftTile)); - // `bottomRightShift` represents the same as above but in the other direction. - const int32_t2 bottomRightShift = nbl::hlsl::max(int32_t2(0, 0), int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegion.bottomRightTile)); - - // The following is not necessarily equal to `gpuImageSideLengthTiles` since there can be pathological cases, as explained in the remapping method - const uint32_t2 mappedRegionDimensions = currentMappedRegion.bottomRightTile - currentMappedRegion.topLeftTile + 1u; - const uint32_t2 gpuImageBottomRight = (gpuImageTopLeft + mappedRegionDimensions - 1u) % gpuImageSideLengthTiles; - - // Mark dropped tiles as dirty/non-resident - if (topLeftShift.x < 0) - { - // Shift left - const uint32_t tilesToFit = -topLeftShift.x; - for (uint32_t tile = 0; tile < tilesToFit; tile++) - { - // Get actual tile index with wraparound - uint32_t tileIdx = (gpuImageBottomRight.x + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; - currentMappedRegionOccupancy[tileIdx].clear(); - currentMappedRegionOccupancy[tileIdx].resize(gpuImageSideLengthTiles, false); - } - } - else if (bottomRightShift.x > 0) - { - //Shift right - const uint32_t tilesToFit = bottomRightShift.x; - for (uint32_t tile = 0; tile < tilesToFit; tile++) - { - // Get actual tile index with wraparound - uint32_t tileIdx = (tile + gpuImageTopLeft.x) % gpuImageSideLengthTiles; - currentMappedRegionOccupancy[tileIdx].clear(); - currentMappedRegionOccupancy[tileIdx].resize(gpuImageSideLengthTiles, false); - } - } - - if (topLeftShift.y < 0) - { - // Shift up - const uint32_t tilesToFit = -topLeftShift.y; - for (uint32_t tile = 0; tile < tilesToFit; tile++) - { - // Get actual tile index with wraparound - uint32_t tileIdx = (gpuImageBottomRight.y + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; - for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) - currentMappedRegionOccupancy[i][tileIdx] = false; - } - } - else if (bottomRightShift.y > 0) - { - //Shift down - const uint32_t tilesToFit = bottomRightShift.y; - for (uint32_t tile = 0; tile < tilesToFit; tile++) - { - // Get actual tile index with wraparound - uint32_t tileIdx = (tile + gpuImageTopLeft.y) % gpuImageSideLengthTiles; - for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) - currentMappedRegionOccupancy[i][tileIdx] = false; - } - } - - // Shift the mapped region accordingly - // A nice consequence of the mapped region being always maximally - sized is that - // along any dimension, only a shift in one direction is necessary, so we can simply add up the shifts - currentMappedRegion.topLeftTile = uint32_t2(int32_t2(currentMappedRegion.topLeftTile) + topLeftShift + bottomRightShift); - currentMappedRegion.bottomRightTile = uint32_t2(int32_t2(currentMappedRegion.bottomRightTile) + topLeftShift + bottomRightShift); - - // Toroidal shift for the gpu image top left - gpuImageTopLeft = (gpuImageTopLeft + uint32_t2(topLeftShift + bottomRightShift + int32_t(gpuImageSideLengthTiles))) % gpuImageSideLengthTiles; - } + void slideCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange); // @brief Info to match a gpu tile to the tile in the real image it should hold image data for struct ImageTileToGPUTileCorrespondence @@ -353,21 +185,7 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted * * @param viewportTileRange Range of tiles + mip level indicating a sub-rectangle of the image covering the viewport */ - core::vector tilesToLoad(const GeoreferencedImageTileRange& viewportTileRange) const - { - core::vector retVal; - for (uint32_t tileX = viewportTileRange.topLeftTile.x; tileX <= viewportTileRange.bottomRightTile.x; tileX++) - for (uint32_t tileY = viewportTileRange.topLeftTile.y; tileY <= viewportTileRange.bottomRightTile.y; tileY++) - { - uint32_t2 imageTileIndex = uint32_t2(tileX, tileY); - // Toroidal shift to find which gpu tile the image tile corresponds to - uint32_t2 gpuImageTileIndex = ((imageTileIndex - currentMappedRegion.topLeftTile) + gpuImageTopLeft) % gpuImageSideLengthTiles; - // Don't bother scheduling an upload if the tile is already resident - if (!currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) - retVal.push_back({ imageTileIndex , gpuImageTileIndex }); - } - return retVal; - } + core::vector tilesToLoad(const GeoreferencedImageTileRange& viewportTileRange) const; // @brief Returns the index of the last tile when covering the image with `mipLevel` tiles uint32_t2 getLastTileIndex(uint32_t mipLevel) const From bb3c3e84f93447884b70b94c9dbd3c4c2bce9885 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 17 Sep 2025 22:20:46 -0300 Subject: [PATCH 24/29] Changes following PR review, to be moved to n4ce --- 62_CAD/DrawResourcesFiller.cpp | 80 +++++++++++++++++++--------------- 62_CAD/DrawResourcesFiller.h | 15 +++++-- 62_CAD/Images.cpp | 31 ++++++++----- 62_CAD/Images.h | 40 ++++++++--------- 62_CAD/main.cpp | 34 ++++++++------- 5 files changed, 113 insertions(+), 87 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index cd75dfe77..231fee09c 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -652,7 +652,7 @@ bool DrawResourcesFiller::ensureMultipleStaticImagesAvailability(std::spangetLogicalDevice(); auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); @@ -662,9 +662,9 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); - // TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENCED TYPE + // TODO: Function call that gets you image creaation params based on the georeferenced image, it will also get you the GEOREFERENCED TYPE IGPUImage::SCreationParams imageCreationParams = {}; - ImageType imageType = determineGeoreferencedImageCreationParams(imageCreationParams, params); + ImageType imageType = determineGeoreferencedImageCreationParams(imageCreationParams, imageStoragePath); // imageParams = cpuImage->getCreationParameters(); imageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; @@ -736,10 +736,10 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( cachedImageRecord->allocationSize = allocResults.allocationSize; cachedImageRecord->gpuImageView = allocResults.gpuImageView; cachedImageRecord->staticCPUImage = nullptr; - cachedImageRecord->georeferencedImageState = GeoreferencedImageStreamingState::create(std::move(params), GeoreferencedImageTileSize); - // This is because gpu image is square - cachedImageRecord->georeferencedImageState->gpuImageSideLengthTiles = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / GeoreferencedImageTileSize; + const uint32_t gpuImageSideLengthTiles = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / GeoreferencedImageTileSize; + const uint32_t2 imageExtents = queryGeoreferencedImageExtents(imageStoragePath); + cachedImageRecord->georeferencedImageState = GeoreferencedImageStreamingState::create(imageStoragePath, imageExtents, gpuImageSideLengthTiles); } else { @@ -889,7 +889,7 @@ void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundin endMainObject(); } -void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const float64_t3x3& NDCToWorld, OrientedBoundingBox2D&& worldspaceOBB, SIntendedSubmitInfo& intendedNextSubmit) { beginMainObject(MainObjectType::STREAMED_IMAGE); @@ -910,6 +910,10 @@ void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const float64_ return; } + // Exploit the fact that this is default-initialized to 0, and only instantiate if it's not already instantiated + if (!cachedImageRecord->georeferencedImageState->fullImageTileLength.x) + cachedImageRecord->georeferencedImageState->initialize(std::move(worldspaceOBB), GeoreferencedImageTileSize); + // Generate upload data auto uploadData = generateTileUploadData(cachedImageRecord->type, NDCToWorld, cachedImageRecord->georeferencedImageState.get()); @@ -2508,14 +2512,17 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc return ret; } -ImageType DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const GeoreferencedImageParams& params) +ImageType DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const std::filesystem::path imageStoragePath) { // Decide whether the image can reside fully into memory rather than get streamed. // TODO: Improve logic, currently just a simple check to see if the full-screen image has more pixels that viewport or not // TODO: add criterial that the size of the full-res image shouldn't consume more than 30% of the total memory arena for images (if we allowed larger than viewport extents) - const size_t mainImagePixels = ((size_t)params.imageExtents.x * (size_t)params.imageExtents.y); - const size_t viewportImagePixels = ((size_t)params.viewportExtents.x * (size_t)params.viewportExtents.y); - const bool betterToResideFullyInMem = params.imageExtents.x < (2 ^ 14) && params.imageExtents.y < (2 ^ 14) && mainImagePixels <= viewportImagePixels; + const uint32_t2 imageExtents = queryGeoreferencedImageExtents(imageStoragePath); + const nbl::asset::E_FORMAT imageFormat = queryGeoreferencedImageFormat(imageStoragePath); + + const size_t mainImagePixels = ((size_t)imageExtents.x * (size_t)imageExtents.y); + const size_t viewportImagePixels = ((size_t)viewportExtents.x * (size_t)viewportExtents.y); + const bool betterToResideFullyInMem = imageExtents.x < (2 ^ 14) && imageExtents.y < (2 ^ 14) && mainImagePixels <= viewportImagePixels; ImageType imageType; @@ -2526,19 +2533,19 @@ ImageType DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::as outImageParams.type = asset::IImage::ET_2D; outImageParams.samples = asset::IImage::ESCF_1_BIT; - outImageParams.format = params.format; + outImageParams.format = imageFormat; if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) { - outImageParams.extent = { params.imageExtents.x, params.imageExtents.y, 1u }; + outImageParams.extent = { imageExtents.x, imageExtents.y, 1u }; } else { // Enough to cover twice the viewport at mip 0 (so that when zooming out to mip 1 the whole viewport still gets covered with mip 0 tiles) // and in any rotation (taking the longest side suffices). Can be increased to avoid frequent tile eviction when moving the camera at mip close to 1 const uint32_t diagonal = static_cast(nbl::hlsl::ceil( - nbl::hlsl::sqrt(static_cast(params.viewportExtents.x * params.viewportExtents.x - + params.viewportExtents.y * params.viewportExtents.y)) + nbl::hlsl::sqrt(static_cast(viewportExtents.x * viewportExtents.x + + viewportExtents.y * viewportExtents.y)) ) ); const uint32_t gpuImageSidelength = 2 * core::roundUp(diagonal, GeoreferencedImageTileSize) + GeoreferencedImagePaddingTiles * GeoreferencedImageTileSize; @@ -2689,7 +2696,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( { // I think eventually it's better to just transform georeferenced images that aren't big enough into static images and forget about them if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) //Pass imageID as parameter, down from the addGeoRef call - return TileUploadData{ {}, imageStreamingState->georeferencedImageParams.worldspaceOBB }; + return TileUploadData{ {}, imageStreamingState->worldspaceOBB }; // Compute the mip level and tile range we would need to encompass the viewport // `viewportTileRange` is always should be a subset of `currentMappedRegion`, covering only the tiles visible in the viewport @@ -2702,10 +2709,10 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // DEBUG - Sampled mip level { // Get world coordinates for each corner of the mapped region - const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageTileLength.x) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); - const float32_t2 fullImageDirV = float32_t2(imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.y, -imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.x); + const float32_t2 oneTileDirU = imageStreamingState->worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageTileLength.x) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); + const float32_t2 fullImageDirV = float32_t2(imageStreamingState->worldspaceOBB.dirU.y, -imageStreamingState->worldspaceOBB.dirU.x); const float32_t2 oneTileDirV = fullImageDirV / float32_t(imageStreamingState->fullImageTileLength.y) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); - float64_t2 topLeftMappedRegionWorld = imageStreamingState->georeferencedImageParams.worldspaceOBB.topLeft; + float64_t2 topLeftMappedRegionWorld = imageStreamingState->worldspaceOBB.topLeft; topLeftMappedRegionWorld += oneTileDirU * float32_t(imageStreamingState->currentMappedRegion.topLeftTile.x) + oneTileDirV * float32_t(imageStreamingState->currentMappedRegion.topLeftTile.y); const uint32_t2 mappedRegionTileLength = imageStreamingState->currentMappedRegion.bottomRightTile - imageStreamingState->currentMappedRegion.topLeftTile + uint32_t2(1, 1); float64_t2 bottomRightMappedRegionWorld = topLeftMappedRegionWorld; @@ -2755,7 +2762,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( const auto viewportHeightPixelLength = nbl::hlsl::length(viewportHeightPixelLengthVector); // Mip is decided based on max of these - float64_t pixelRatio = nbl::hlsl::max(viewportWidthPixelLength / imageStreamingState->georeferencedImageParams.viewportExtents.x, viewportHeightPixelLength / imageStreamingState->georeferencedImageParams.viewportExtents.y); + float64_t pixelRatio = nbl::hlsl::max(viewportWidthPixelLength / viewportExtents.x, viewportHeightPixelLength / viewportExtents.y); pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; std::cout << "Sampled mip level: " << nbl::hlsl::log2(pixelRatio) << std::endl; @@ -2766,6 +2773,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( auto tilesToLoad = imageStreamingState->tilesToLoad(viewportTileRange); tiles.reserve(tilesToLoad.size()); + const uint32_t2 imageExtents = imageStreamingState->imageExtents; for (auto [imageTileIndex, gpuImageTileIndex] : tilesToLoad) { uint32_t2 gpuMip0Texels(GeoreferencedImageTileSize, GeoreferencedImageTileSize); @@ -2781,14 +2789,14 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // minimal artifacts and no stretching if (imageTileIndex.x == lastTileIndex.x) { - georeferencedImageMip0SampledTexels.x = imageStreamingState->georeferencedImageParams.imageExtents.x - georeferencedImageMip0SamplingOffset.x; + georeferencedImageMip0SampledTexels.x = imageExtents.x - georeferencedImageMip0SamplingOffset.x; uint32_t gpuMip1Texels = georeferencedImageMip0SampledTexels.x >> (viewportTileRange.baseMipLevel + 1); gpuMip0Texels.x = 2 * gpuMip1Texels; imageStreamingState->lastImageTileFractionalSpan.x = float32_t(gpuMip0Texels.x) / GeoreferencedImageTileSize; } if (imageTileIndex.y == lastTileIndex.y) { - georeferencedImageMip0SampledTexels.y = imageStreamingState->georeferencedImageParams.imageExtents.y - georeferencedImageMip0SamplingOffset.y; + georeferencedImageMip0SampledTexels.y = imageExtents.y - georeferencedImageMip0SamplingOffset.y; uint32_t gpuMip1Texels = georeferencedImageMip0SampledTexels.y >> (viewportTileRange.baseMipLevel + 1); gpuMip0Texels.y = 2 * gpuMip1Texels; imageStreamingState->lastImageTileFractionalSpan.y = float32_t(gpuMip0Texels.y) / GeoreferencedImageTileSize; @@ -2799,18 +2807,20 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // If this looks bad we can do fractional pixelage by moving the uv an even tinier amount but at high zoom levels it should be imperceptible if ((imageStreamingState->lastImageTileFractionalSpan.x == 0.f) || (imageStreamingState->lastImageTileFractionalSpan.y == 0.f)) continue; - if (!georeferencedImageLoader->hasPrecomputedMips(imageStreamingState->georeferencedImageParams.storagePath)) + if (!georeferencedImageLoader->hasPrecomputedMips(imageStreamingState->imageStoragePath)) { - gpuMip0Tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel, georeferencedImageMip0SampledTexels, gpuMip0Texels); - gpuMip1Tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel, georeferencedImageMip0SampledTexels, gpuMip0Texels / 2u); + gpuMip0Tile = georeferencedImageLoader->load(imageStreamingState->imageStoragePath, (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel, georeferencedImageMip0SampledTexels, gpuMip0Texels); + gpuMip1Tile = georeferencedImageLoader->load(imageStreamingState->imageStoragePath, (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel, georeferencedImageMip0SampledTexels, gpuMip0Texels / 2u); } else { - gpuMip0Tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, imageTileIndex * GeoreferencedImageTileSize, gpuMip0Texels, imageStreamingState->currentMappedRegion.baseMipLevel, false); - gpuMip1Tile = georeferencedImageLoader->load(imageStreamingState->georeferencedImageParams.storagePath, imageTileIndex * GeoreferencedImageTileSizeMip1, gpuMip0Texels / 2u, imageStreamingState->currentMappedRegion.baseMipLevel, true); + gpuMip0Tile = georeferencedImageLoader->load(imageStreamingState->imageStoragePath, imageTileIndex * GeoreferencedImageTileSize, gpuMip0Texels, imageStreamingState->currentMappedRegion.baseMipLevel, false); + gpuMip1Tile = georeferencedImageLoader->load(imageStreamingState->imageStoragePath, imageTileIndex * GeoreferencedImageTileSizeMip1, gpuMip0Texels / 2u, imageStreamingState->currentMappedRegion.baseMipLevel, true); } } - + + const nbl::asset::E_FORMAT imageFormat = queryGeoreferencedImageFormat(imageStreamingState->imageStoragePath); + asset::IImage::SBufferCopy bufCopy; bufCopy.bufferOffset = 0; bufCopy.bufferRowLength = gpuMip0Texels.x; @@ -2825,7 +2835,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( bufCopy.imageExtent.height = gpuMip0Texels.y; bufCopy.imageExtent.depth = 1; - tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(gpuMip0Tile), std::move(bufCopy)); + tiles.emplace_back(imageFormat, std::move(gpuMip0Tile), std::move(bufCopy)); // Upload the smaller tile to mip 1 bufCopy = {}; @@ -2843,19 +2853,19 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( bufCopy.imageExtent.height = gpuMip0Texels.y / 2; bufCopy.imageExtent.depth = 1; - tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(gpuMip1Tile), std::move(bufCopy)); + tiles.emplace_back(imageFormat, std::move(gpuMip1Tile), std::move(bufCopy)); // Mark tile as resident imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; } // Figure out an obb that covers only the currently loaded tiles - OrientedBoundingBox2D viewportEncompassingOBB = imageStreamingState->georeferencedImageParams.worldspaceOBB; + OrientedBoundingBox2D viewportEncompassingOBB = imageStreamingState->worldspaceOBB; // The original image `dirU` corresponds to `maxImageTileIndices.x + 1` mip 0 tiles (provided it's exactly that length in tiles) // Dividing dirU by `maxImageTileIndices + (1,1)` we therefore get a vector that spans exactly one mip 0 tile (in the u direction) in worldspace. // Multiplying that by `2^mipLevel` we get a vector that spans exactly one mip `mipLevel` tile (in the u direction) - const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageTileLength.x) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); - const float32_t2 fullImageDirV = float32_t2(imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.y, -imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.x) * imageStreamingState->georeferencedImageParams.worldspaceOBB.aspectRatio; + const float32_t2 oneTileDirU = imageStreamingState->worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageTileLength.x) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); + const float32_t2 fullImageDirV = float32_t2(imageStreamingState->worldspaceOBB.dirU.y, -imageStreamingState->worldspaceOBB.dirU.x) * imageStreamingState->worldspaceOBB.aspectRatio; const float32_t2 oneTileDirV = fullImageDirV / float32_t(imageStreamingState->fullImageTileLength.y) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); viewportEncompassingOBB.topLeft += oneTileDirU * float32_t(viewportTileRange.topLeftTile.x); viewportEncompassingOBB.topLeft += oneTileDirV * float32_t(viewportTileRange.topLeftTile.y); @@ -2930,8 +2940,8 @@ GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const const float64_t viewportSideVImageTexels = nbl::hlsl::length(viewportSideVImageTexelsVector); // Mip is decided based on max of these - float64_t pixelRatio = nbl::hlsl::max(viewportSideUImageTexels / imageStreamingState->georeferencedImageParams.viewportExtents.x, - viewportSideVImageTexels / imageStreamingState->georeferencedImageParams.viewportExtents.y); + float64_t pixelRatio = nbl::hlsl::max(viewportSideUImageTexels / viewportExtents.x, + viewportSideVImageTexels / viewportExtents.y); pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; // DEBUG - Clamped at 0 for magnification diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index cd25ab3a5..20b309176 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -203,6 +203,11 @@ struct DrawResourcesFiller typedef std::function SubmitFunc; void setSubmitDrawsFunction(const SubmitFunc& func); + void setViewportExtents(const uint32_t2 _viewportExtents) + { + viewportExtents = _viewportExtents; + } + // DrawResourcesFiller needs to access these in order to allocate GPUImages and write the to their correct descriptor set binding void setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr&& descriptorSet, uint32_t binding); @@ -417,7 +422,7 @@ struct DrawResourcesFiller * @return true if the image was successfully cached and is ready for use; false if allocation failed. * [TODO]: should be internal protected member function. */ - bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, GeoreferencedImageParams&& params, SIntendedSubmitInfo& intendedNextSubmit); + bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const std::filesystem::path imageStoragePath, SIntendedSubmitInfo& intendedNextSubmit); // [TODO]: should be internal protected member function. bool queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy); @@ -426,7 +431,7 @@ struct DrawResourcesFiller void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); // This function must be called immediately after `ensureGeoreferencedImageAvailability_AllocateIfNeeded` for the same imageID. - void addGeoreferencedImage(image_id imageID, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit); + void addGeoreferencedImage(image_id imageID, const float64_t3x3& NDCToWorld, OrientedBoundingBox2D&& worldspaceOBB, SIntendedSubmitInfo& intendedNextSubmit); /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. @@ -737,7 +742,7 @@ struct DrawResourcesFiller * @param[out] outImageType Indicates whether the image should be fully resident or streamed. * @param[in] params Parameters for the georeferenced image */ - ImageType determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const GeoreferencedImageParams& params); + ImageType determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const std::filesystem::path imageStoragePath); /** * @brief Used to implement both `drawHatch` and `drawFixedGeometryHatch` without exposing the transformation type parameter @@ -977,7 +982,9 @@ struct DrawResourcesFiller uint32_t imagesArrayBinding = 0u; // Georef - pushed here rn for simplicity core::smart_refctd_ptr georeferencedImageLoader; - std::unordered_map> streamedImageCopies; + + // Viewport state + uint32_t2 viewportExtents = {}; }; diff --git a/62_CAD/Images.cpp b/62_CAD/Images.cpp index 38fac0069..131b836a3 100644 --- a/62_CAD/Images.cpp +++ b/62_CAD/Images.cpp @@ -2,10 +2,21 @@ using namespace nbl::hlsl; -smart_refctd_ptr GeoreferencedImageStreamingState::create(GeoreferencedImageParams&& _georeferencedImageParams, uint32_t TileSize) +smart_refctd_ptr GeoreferencedImageStreamingState::create(std::filesystem::path imageStoragePath, const uint32_t2 imageExtents, const uint32_t gpuImageSideLengthTiles) { smart_refctd_ptr retVal(new GeoreferencedImageStreamingState{}); - retVal->georeferencedImageParams = std::move(_georeferencedImageParams); + + retVal->imageStoragePath = imageStoragePath; + retVal->imageExtents = imageExtents; + retVal->gpuImageSideLengthTiles = gpuImageSideLengthTiles; + + return retVal; +} + +void GeoreferencedImageStreamingState::initialize(OrientedBoundingBox2D&& _worldspaceOBB, uint32_t TileSize) +{ + worldspaceOBB = std::move(_worldspaceOBB); + // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point // 2. Transform this displacement vector into the coordinates in the basis {dirU, dirV} (worldspace vectors that span the sides of the image). // The composition of these matrices therefore transforms any point in worldspace into uv coordinates in imagespace @@ -16,32 +27,30 @@ smart_refctd_ptr GeoreferencedImageStreamingSt // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression // Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal) - const float64_t2 dirU = retVal->georeferencedImageParams.worldspaceOBB.dirU; - const float64_t2 dirV = float64_t2(dirU.y, -dirU.x) * float64_t(retVal->georeferencedImageParams.worldspaceOBB.aspectRatio); + const float64_t2 dirU = worldspaceOBB.dirU; + const float64_t2 dirV = float64_t2(dirU.y, -dirU.x) * float64_t(worldspaceOBB.aspectRatio); const float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); const float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); const float64_t2 firstRow = dirU / dirULengthSquared; const float64_t2 secondRow = dirV / dirVLengthSquared; - const float64_t2 displacement = -retVal->georeferencedImageParams.worldspaceOBB.topLeft; + const float64_t2 displacement = -worldspaceOBB.topLeft; // This is the same as multiplying the change of basis matrix by the displacement vector const float64_t postRotatedShiftX = nbl::hlsl::dot(firstRow, displacement); const float64_t postRotatedShiftY = nbl::hlsl::dot(secondRow, displacement); // Put them all together - retVal->world2UV = float64_t2x3(firstRow.x, firstRow.y, postRotatedShiftX, secondRow.x, secondRow.y, postRotatedShiftY); + world2UV = float64_t2x3(firstRow.x, firstRow.y, postRotatedShiftX, secondRow.x, secondRow.y, postRotatedShiftY); // Also set the maxMipLevel - to keep stuff simple, we don't consider having less than one tile per dimension // If you're zoomed out enough then at that point the whole image is just sampled as one tile along that dimension // In pathological cases, such as images that are way bigger on one side than the other, this could cause aliasing and slow down sampling if zoomed out too much. // If we were ever to observe such pathological cases, then maybe we should consider doing something else here. For example, making the loader able to handle different tile lengths per dimension // (so for example a 128x64 tile) but again for now it should be left as-is. - uint32_t2 maxMipLevels = nbl::hlsl::findMSB(nbl::hlsl::roundUpToPoT(retVal->georeferencedImageParams.imageExtents / TileSize)); - retVal->maxMipLevel = nbl::hlsl::min(maxMipLevels.x, maxMipLevels.y); + uint32_t2 maxMipLevels = nbl::hlsl::findMSB(nbl::hlsl::roundUpToPoT(imageExtents / TileSize)); + maxMipLevel = nbl::hlsl::min(maxMipLevels.x, maxMipLevels.y); - retVal->fullImageTileLength = (retVal->georeferencedImageParams.imageExtents - 1u) / TileSize + 1u; - - return retVal; + fullImageTileLength = (imageExtents - 1u) / TileSize + 1u; } void GeoreferencedImageStreamingState::ensureMappedRegionCoversViewport(const GeoreferencedImageTileRange& viewportTileRange) diff --git a/62_CAD/Images.h b/62_CAD/Images.h index df3eed4b6..cb135b8c1 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -25,19 +25,6 @@ enum class ImageType : uint8_t GEOREFERENCED_FULL_RESOLUTION // For smaller georeferenced images, entire image is eventually loaded and not streamed or view-dependant }; -/** - * @struct GeoreferencedImageParams - * @brief Info needed to add a georeferenced image. - */ -struct GeoreferencedImageParams -{ - OrientedBoundingBox2D worldspaceOBB = {}; // Position and extents of the image in worldspace - uint32_t2 imageExtents = {}; // Real extents (in texels) of the image - uint32_t2 viewportExtents = {}; // Extents (in pixels) of the viewport on which the image is to be displayed - asset::E_FORMAT format = {}; // Texel format of the image - std::filesystem::path storagePath = {}; // Path to the file where image data is stored -}; - /** * @class ImagesMemorySubAllocator * @brief A memory sub-allocator designed for managing sub-allocations within a pre-allocated GPU memory arena for images. @@ -131,12 +118,21 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted protected: /* - * @brief Create a streaming state for a georeferenced image + * @brief Create an image streaming state and only set the fields that are passed as parameters + * + * @param _imageStoragePath Path to the file where image data is stored. + * @param imageExtents Extents of the image in texels. Some internal algos of the class require knowing this and the class doesn-t have access to the image loader + * @param _gpuImageSideLengthTiles Length of each side of the gpu image, in tiles (it's always square) + */ + static smart_refctd_ptr create(std::filesystem::path imageStoragePath, const uint32_t2 imageExtents, const uint32_t gpuImageSideLengthTiles); + + /* + * @brief Set most fields for the imageStreamingState * - * @param _georeferencedImageParams Info relating to the georeferenced image for which to create a streaming state. + * @param _worldspaceOBB Worldspace oriented bounding box where image needs to be drawn. * @param TileSize Size of the tiles used to break up the image. Also size of the tiles in the GPU image backing this georeferenced image. */ - static smart_refctd_ptr create(GeoreferencedImageParams&& _georeferencedImageParams, uint32_t TileSize); + void initialize(OrientedBoundingBox2D&& _worldspaceOBB, uint32_t TileSize); // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial @@ -144,7 +140,7 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // @brief Transform worldspace coordinates into UV coordinates into the image float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoords) const { return nbl::hlsl::mul(world2UV, worldCoords); } // @brief Transform worldspace coordinates into texel coordinates into the image - float64_t2 transformWorldCoordsToTexelCoords(const float64_t3 worldCoords) const { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoords); } + float64_t2 transformWorldCoordsToTexelCoords(const float64_t3 worldCoords) const { return float64_t2(imageExtents) * transformWorldCoordsToUV(worldCoords); } // @brief Transform worldspace coordinates into tile coordinates into the image, where the image is broken up into tiles of size `TileSize` float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoords, const uint32_t TileSize) const { return (1.0 / TileSize) * transformWorldCoordsToTexelCoords(worldCoords); } @@ -199,27 +195,29 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted const uint32_t2 lastTileIndex = getLastTileIndex(currentMappedRegion.baseMipLevel); return bool2(lastTileIndex.x == viewportBottomRightTile.x, lastTileIndex.y == viewportBottomRightTile.y); } - - GeoreferencedImageParams georeferencedImageParams = {}; std::vector> currentMappedRegionOccupancy = {}; // Sidelength of the gpu image, in mip 0 tiles that are `TileSize` (creation parameter) texels wide uint32_t gpuImageSideLengthTiles = {}; // We establish a max mipLevel for the image, which is the mip level at which any of width, height fit in a single tile uint32_t maxMipLevel = {}; + // Dimensions of the stored image, in texels + uint32_t2 imageExtents = {}; // Number of mip 0 tiles needed to cover the whole image, counting the last tile that might be fractional if the image size is not perfectly divisible by TileSize uint32_t2 fullImageTileLength = {}; // Indicates on which tile of the gpu image the current mapped region's `topLeft` resides uint32_t2 gpuImageTopLeft = {}; + // Worldspace bounding box for the image + OrientedBoundingBox2D worldspaceOBB; // Converts a point (z = 1) in worldspace to UV coordinates in image space (origin shifted to topleft of the image) float64_t2x3 world2UV = {}; - // If the image dimensions are not exactly divisible by `TileSize`, then the last tile along a dimension only holds a proportion of `lastTileFraction` pixels along that dimension - float64_t lastTileFraction = {}; // Reflects what fraction of a FULL tile the LAST tile in the image at the current mip level actually spans. // It only gets set when necessary, and should always be updated correctly before being used, since it's related to the current `baseMipLevel` of the `currentMappedRegion` float32_t2 lastImageTileFractionalSpan = {1.f, 1.f}; // Set mip level to extreme value so it gets recreated on first iteration GeoreferencedImageTileRange currentMappedRegion = { .baseMipLevel = std::numeric_limits::max() }; + // Path to the file where image data is stored + std::filesystem::path imageStoragePath; }; struct CachedImageRecord diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 97da0b51d..a89fb0bbd 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -568,7 +568,9 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio void allocateResources() { drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger)); - + + drawResourcesFiller.setViewportExtents(uint32_t2(m_window->getWidth(), m_window->getHeight())); + size_t maxImagesMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB size_t maxBufferMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB drawResourcesFiller.allocateDrawResourcesWithinAvailableVRAM(m_device.get(), maxImagesMemSize, maxBufferMemSize); @@ -3874,15 +3876,14 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio else if (mode == ExampleMode::CASE_12) { // placeholder, actual path is right now hardcoded into the loader - const static std::string tiledGridPath = "../../media/tiled_grid_mip_0.exr"; + const static std::string georeferencedImagePath = "../../media/tiled_grid_mip_0.exr"; constexpr float64_t3 topLeftViewportH = float64_t3(-1.0, -1.0, 1.0); constexpr float64_t3 topRightViewportH = float64_t3(1.0, -1.0, 1.0); constexpr float64_t3 bottomLeftViewportH = float64_t3(-1.0, 1.0, 1.0); constexpr float64_t3 bottomRightViewportH = float64_t3(1.0, 1.0, 1.0); - image_id tiledGridID = 6996; - GeoreferencedImageParams tiledGridParams; + image_id georefImageID = 6996; // Position at topLeft viewport auto projectionToNDC = m_Camera.constructViewProjection(); // TEST CAMERA ROTATION @@ -3891,26 +3892,27 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio auto inverseViewProj = nbl::hlsl::inverse(projectionToNDC); const static auto startingTopLeft = nbl::hlsl::mul(inverseViewProj, topLeftViewportH); - tiledGridParams.worldspaceOBB.topLeft = startingTopLeft; + OrientedBoundingBox2D georefImageOBB = {}; + georefImageOBB.topLeft = startingTopLeft; // Get 1 viewport pixel to match `startingImagePixelsPerViewportPixel` pixels of the image by choosing appropriate dirU const static float64_t startingImagePixelsPerViewportPixels = 1.0; const static auto startingViewportWidthVector = nbl::hlsl::mul(inverseViewProj, topRightViewportH - topLeftViewportH); - const static auto dirU = startingViewportWidthVector * float64_t(drawResourcesFiller.queryGeoreferencedImageExtents(tiledGridPath).x) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); + const static auto dirU = startingViewportWidthVector * float64_t(drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath).x) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); // DEBUG - tiledGridParams.worldspaceOBB.topLeft += float32_t2(startingViewportWidthVector - dirU); - - tiledGridParams.worldspaceOBB.dirU = dirU; - tiledGridParams.imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(tiledGridPath); - tiledGridParams.worldspaceOBB.aspectRatio = float32_t(tiledGridParams.imageExtents.y) / tiledGridParams.imageExtents.x; - tiledGridParams.viewportExtents = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; - tiledGridParams.format = drawResourcesFiller.queryGeoreferencedImageFormat(tiledGridPath); - tiledGridParams.storagePath = tiledGridPath; + georefImageOBB.topLeft += float32_t2(startingViewportWidthVector - dirU); + + georefImageOBB.dirU = dirU; + const uint32_t2 imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath); + georefImageOBB.aspectRatio = float32_t(imageExtents.y) / imageExtents.x; + + // Unnecessary but should go into a callback if window can change dimensions during execution + drawResourcesFiller.setViewportExtents(uint32_t2(m_window->getWidth(), m_window->getHeight())); - drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(tiledGridID, std::move(tiledGridParams), intendedNextSubmit); + drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(georefImageID, georeferencedImagePath, intendedNextSubmit); - drawResourcesFiller.addGeoreferencedImage(tiledGridID, inverseViewProj, intendedNextSubmit); + drawResourcesFiller.addGeoreferencedImage(georefImageID, inverseViewProj, std::move(georefImageOBB), intendedNextSubmit); } } From b232c218aaba0e6bf614cc59aaa65bd50312dc60 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 23 Sep 2025 00:16:49 -0300 Subject: [PATCH 25/29] Add a whole texel shift --- 62_CAD/DrawResourcesFiller.cpp | 155 +++++++++++++++++---------------- 62_CAD/DrawResourcesFiller.h | 12 +-- 62_CAD/Images.cpp | 33 +++---- 62_CAD/Images.h | 45 +++++----- 62_CAD/main.cpp | 23 +++-- 5 files changed, 134 insertions(+), 134 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 231fee09c..b030981b0 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -652,7 +652,7 @@ bool DrawResourcesFiller::ensureMultipleStaticImagesAvailability(std::spangetLogicalDevice(); auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); @@ -664,7 +664,7 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( // TODO: Function call that gets you image creaation params based on the georeferenced image, it will also get you the GEOREFERENCED TYPE IGPUImage::SCreationParams imageCreationParams = {}; - ImageType imageType = determineGeoreferencedImageCreationParams(imageCreationParams, imageStoragePath); + ImageType imageType = determineGeoreferencedImageCreationParams(imageCreationParams, params); // imageParams = cpuImage->getCreationParameters(); imageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; @@ -736,10 +736,13 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded( cachedImageRecord->allocationSize = allocResults.allocationSize; cachedImageRecord->gpuImageView = allocResults.gpuImageView; cachedImageRecord->staticCPUImage = nullptr; + cachedImageRecord->georeferencedImageState = GeoreferencedImageStreamingState::create(std::move(params), GeoreferencedImageTileSize); + // This is because gpu image is square - const uint32_t gpuImageSideLengthTiles = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / GeoreferencedImageTileSize; - const uint32_t2 imageExtents = queryGeoreferencedImageExtents(imageStoragePath); - cachedImageRecord->georeferencedImageState = GeoreferencedImageStreamingState::create(imageStoragePath, imageExtents, gpuImageSideLengthTiles); + cachedImageRecord->georeferencedImageState->gpuImageSideLengthTiles = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / GeoreferencedImageTileSize; + + auto& fullImageTileLength = cachedImageRecord->georeferencedImageState->fullImageTileLength; + fullImageTileLength = (cachedImageRecord->georeferencedImageState->georeferencedImageParams.imageExtents - 1u) / GeoreferencedImageTileSize + 1u; } else { @@ -889,7 +892,7 @@ void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundin endMainObject(); } -void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const float64_t3x3& NDCToWorld, OrientedBoundingBox2D&& worldspaceOBB, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit) { beginMainObject(MainObjectType::STREAMED_IMAGE); @@ -910,10 +913,6 @@ void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const float64_ return; } - // Exploit the fact that this is default-initialized to 0, and only instantiate if it's not already instantiated - if (!cachedImageRecord->georeferencedImageState->fullImageTileLength.x) - cachedImageRecord->georeferencedImageState->initialize(std::move(worldspaceOBB), GeoreferencedImageTileSize); - // Generate upload data auto uploadData = generateTileUploadData(cachedImageRecord->type, NDCToWorld, cachedImageRecord->georeferencedImageState.get()); @@ -2512,17 +2511,18 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc return ret; } -ImageType DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const std::filesystem::path imageStoragePath) +ImageType DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const GeoreferencedImageParams& params) { // Decide whether the image can reside fully into memory rather than get streamed. // TODO: Improve logic, currently just a simple check to see if the full-screen image has more pixels that viewport or not // TODO: add criterial that the size of the full-res image shouldn't consume more than 30% of the total memory arena for images (if we allowed larger than viewport extents) - const uint32_t2 imageExtents = queryGeoreferencedImageExtents(imageStoragePath); - const nbl::asset::E_FORMAT imageFormat = queryGeoreferencedImageFormat(imageStoragePath); + const uint32_t2 imageExtents = queryGeoreferencedImageExtents(params.storagePath); + const nbl::asset::E_FORMAT imageFormat = queryGeoreferencedImageFormat(params.storagePath); const size_t mainImagePixels = ((size_t)imageExtents.x * (size_t)imageExtents.y); - const size_t viewportImagePixels = ((size_t)viewportExtents.x * (size_t)viewportExtents.y); - const bool betterToResideFullyInMem = imageExtents.x < (2 ^ 14) && imageExtents.y < (2 ^ 14) && mainImagePixels <= viewportImagePixels; + const size_t viewportImagePixels = ((size_t)viewportExtent.x * (size_t)viewportExtent.y); + // If it's too long along any dimension it's obviously going to be streamed + const bool betterToResideFullyInMem = imageExtents.x < (1u << 14u) && imageExtents.y < (1u << 14u) && mainImagePixels <= viewportImagePixels; ImageType imageType; @@ -2544,8 +2544,8 @@ ImageType DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::as // Enough to cover twice the viewport at mip 0 (so that when zooming out to mip 1 the whole viewport still gets covered with mip 0 tiles) // and in any rotation (taking the longest side suffices). Can be increased to avoid frequent tile eviction when moving the camera at mip close to 1 const uint32_t diagonal = static_cast(nbl::hlsl::ceil( - nbl::hlsl::sqrt(static_cast(viewportExtents.x * viewportExtents.x - + viewportExtents.y * viewportExtents.y)) + nbl::hlsl::sqrt(static_cast(viewportExtent.x * viewportExtent.x + + viewportExtent.y * viewportExtent.y)) ) ); const uint32_t gpuImageSidelength = 2 * core::roundUp(diagonal, GeoreferencedImageTileSize) + GeoreferencedImagePaddingTiles * GeoreferencedImageTileSize; @@ -2696,7 +2696,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( { // I think eventually it's better to just transform georeferenced images that aren't big enough into static images and forget about them if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) //Pass imageID as parameter, down from the addGeoRef call - return TileUploadData{ {}, imageStreamingState->worldspaceOBB }; + return TileUploadData{ {}, imageStreamingState->georeferencedImageParams.worldspaceOBB }; // Compute the mip level and tile range we would need to encompass the viewport // `viewportTileRange` is always should be a subset of `currentMappedRegion`, covering only the tiles visible in the viewport @@ -2709,10 +2709,10 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // DEBUG - Sampled mip level { // Get world coordinates for each corner of the mapped region - const float32_t2 oneTileDirU = imageStreamingState->worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageTileLength.x) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); - const float32_t2 fullImageDirV = float32_t2(imageStreamingState->worldspaceOBB.dirU.y, -imageStreamingState->worldspaceOBB.dirU.x); + const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageTileLength.x) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); + const float32_t2 fullImageDirV = float32_t2(imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.y, -imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.x); const float32_t2 oneTileDirV = fullImageDirV / float32_t(imageStreamingState->fullImageTileLength.y) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); - float64_t2 topLeftMappedRegionWorld = imageStreamingState->worldspaceOBB.topLeft; + float64_t2 topLeftMappedRegionWorld = imageStreamingState->georeferencedImageParams.worldspaceOBB.topLeft; topLeftMappedRegionWorld += oneTileDirU * float32_t(imageStreamingState->currentMappedRegion.topLeftTile.x) + oneTileDirV * float32_t(imageStreamingState->currentMappedRegion.topLeftTile.y); const uint32_t2 mappedRegionTileLength = imageStreamingState->currentMappedRegion.bottomRightTile - imageStreamingState->currentMappedRegion.topLeftTile + uint32_t2(1, 1); float64_t2 bottomRightMappedRegionWorld = topLeftMappedRegionWorld; @@ -2721,7 +2721,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // With the above, get an affine transform that maps points in worldspace to their pixel coordinates in the mapped region tile space. This can be done by mapping // `topLeftMappedRegionWorld -> (0,0)` and `bottomRightMappedRegionWorld -> mappedRegionPixelLength - 1` const uint32_t2 mappedRegionPixelLength = GeoreferencedImageTileSize * mappedRegionTileLength; - + // 1. Displacement // Multiplying a (homogenous) point p by this matrix yields the displacement vector `p - topLeftMappedRegionWorld` float64_t2x3 displacementMatrix(1., 0., -topLeftMappedRegionWorld.x, 0., 1., -topLeftMappedRegionWorld.y); @@ -2760,20 +2760,21 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( // Get pixel length for each of these vectors const auto viewportWidthPixelLength = nbl::hlsl::length(viewportWidthPixelLengthVector); const auto viewportHeightPixelLength = nbl::hlsl::length(viewportHeightPixelLengthVector); - + // Mip is decided based on max of these - float64_t pixelRatio = nbl::hlsl::max(viewportWidthPixelLength / viewportExtents.x, viewportHeightPixelLength / viewportExtents.y); + float64_t pixelRatio = nbl::hlsl::max(viewportWidthPixelLength / viewportExtent.x, viewportHeightPixelLength / viewportExtent.y); pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; std::cout << "Sampled mip level: " << nbl::hlsl::log2(pixelRatio) << std::endl; } - + // We need to make every tile that covers the viewport resident. We reserve the amount of tiles needed for upload. core::vector tiles; auto tilesToLoad = imageStreamingState->tilesToLoad(viewportTileRange); tiles.reserve(tilesToLoad.size()); - const uint32_t2 imageExtents = imageStreamingState->imageExtents; + const uint32_t2 imageExtents = imageStreamingState->georeferencedImageParams.imageExtents; + const std::filesystem::path imageStoragePath = imageStreamingState->georeferencedImageParams.storagePath; for (auto [imageTileIndex, gpuImageTileIndex] : tilesToLoad) { uint32_t2 gpuMip0Texels(GeoreferencedImageTileSize, GeoreferencedImageTileSize); @@ -2792,35 +2793,34 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( georeferencedImageMip0SampledTexels.x = imageExtents.x - georeferencedImageMip0SamplingOffset.x; uint32_t gpuMip1Texels = georeferencedImageMip0SampledTexels.x >> (viewportTileRange.baseMipLevel + 1); gpuMip0Texels.x = 2 * gpuMip1Texels; - imageStreamingState->lastImageTileFractionalSpan.x = float32_t(gpuMip0Texels.x) / GeoreferencedImageTileSize; + imageStreamingState->lastImageTileTexels.x = gpuMip0Texels.x; + // If the last tile is too small just ignore it + if (!imageStreamingState->lastImageTileTexels.x) + continue; } if (imageTileIndex.y == lastTileIndex.y) { georeferencedImageMip0SampledTexels.y = imageExtents.y - georeferencedImageMip0SamplingOffset.y; uint32_t gpuMip1Texels = georeferencedImageMip0SampledTexels.y >> (viewportTileRange.baseMipLevel + 1); gpuMip0Texels.y = 2 * gpuMip1Texels; - imageStreamingState->lastImageTileFractionalSpan.y = float32_t(gpuMip0Texels.y) / GeoreferencedImageTileSize; + imageStreamingState->lastImageTileTexels.y = gpuMip0Texels.y; + // If the last tile is too small just ignore it + if (!imageStreamingState->lastImageTileTexels.y) + continue; } - // If the last tile is too small just ignore it - given the way we set up stuff it's valid to check if these floats are exactly equal to 0, - // they're always a fraction of the form `x / GeoreferencedImageTileSize` with `0 <= x <= GeoreferencedImageTileSize` and `GeoreferencedImageTileSize` is PoT - // If this looks bad we can do fractional pixelage by moving the uv an even tinier amount but at high zoom levels it should be imperceptible - if ((imageStreamingState->lastImageTileFractionalSpan.x == 0.f) || (imageStreamingState->lastImageTileFractionalSpan.y == 0.f)) - continue; - if (!georeferencedImageLoader->hasPrecomputedMips(imageStreamingState->imageStoragePath)) + if (!georeferencedImageLoader->hasPrecomputedMips(imageStoragePath)) { - gpuMip0Tile = georeferencedImageLoader->load(imageStreamingState->imageStoragePath, (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel, georeferencedImageMip0SampledTexels, gpuMip0Texels); - gpuMip1Tile = georeferencedImageLoader->load(imageStreamingState->imageStoragePath, (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel, georeferencedImageMip0SampledTexels, gpuMip0Texels / 2u); + gpuMip0Tile = georeferencedImageLoader->load(imageStoragePath, georeferencedImageMip0SamplingOffset, georeferencedImageMip0SampledTexels, gpuMip0Texels); + gpuMip1Tile = georeferencedImageLoader->load(imageStoragePath, georeferencedImageMip0SamplingOffset, georeferencedImageMip0SampledTexels, gpuMip0Texels / 2u); } else { - gpuMip0Tile = georeferencedImageLoader->load(imageStreamingState->imageStoragePath, imageTileIndex * GeoreferencedImageTileSize, gpuMip0Texels, imageStreamingState->currentMappedRegion.baseMipLevel, false); - gpuMip1Tile = georeferencedImageLoader->load(imageStreamingState->imageStoragePath, imageTileIndex * GeoreferencedImageTileSizeMip1, gpuMip0Texels / 2u, imageStreamingState->currentMappedRegion.baseMipLevel, true); + gpuMip0Tile = georeferencedImageLoader->load(imageStoragePath, imageTileIndex * GeoreferencedImageTileSize, gpuMip0Texels, imageStreamingState->currentMappedRegion.baseMipLevel, false); + gpuMip1Tile = georeferencedImageLoader->load(imageStoragePath, imageTileIndex * GeoreferencedImageTileSizeMip1, gpuMip0Texels / 2u, imageStreamingState->currentMappedRegion.baseMipLevel, true); } } - const nbl::asset::E_FORMAT imageFormat = queryGeoreferencedImageFormat(imageStreamingState->imageStoragePath); - asset::IImage::SBufferCopy bufCopy; bufCopy.bufferOffset = 0; bufCopy.bufferRowLength = gpuMip0Texels.x; @@ -2835,7 +2835,7 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( bufCopy.imageExtent.height = gpuMip0Texels.y; bufCopy.imageExtent.depth = 1; - tiles.emplace_back(imageFormat, std::move(gpuMip0Tile), std::move(bufCopy)); + tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(gpuMip0Tile), std::move(bufCopy)); // Upload the smaller tile to mip 1 bufCopy = {}; @@ -2853,42 +2853,49 @@ DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData( bufCopy.imageExtent.height = gpuMip0Texels.y / 2; bufCopy.imageExtent.depth = 1; - tiles.emplace_back(imageFormat, std::move(gpuMip1Tile), std::move(bufCopy)); + tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(gpuMip1Tile), std::move(bufCopy)); // Mark tile as resident imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; } // Figure out an obb that covers only the currently loaded tiles - OrientedBoundingBox2D viewportEncompassingOBB = imageStreamingState->worldspaceOBB; - // The original image `dirU` corresponds to `maxImageTileIndices.x + 1` mip 0 tiles (provided it's exactly that length in tiles) - // Dividing dirU by `maxImageTileIndices + (1,1)` we therefore get a vector that spans exactly one mip 0 tile (in the u direction) in worldspace. - // Multiplying that by `2^mipLevel` we get a vector that spans exactly one mip `mipLevel` tile (in the u direction) - const float32_t2 oneTileDirU = imageStreamingState->worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageTileLength.x) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); - const float32_t2 fullImageDirV = float32_t2(imageStreamingState->worldspaceOBB.dirU.y, -imageStreamingState->worldspaceOBB.dirU.x) * imageStreamingState->worldspaceOBB.aspectRatio; - const float32_t2 oneTileDirV = fullImageDirV / float32_t(imageStreamingState->fullImageTileLength.y) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); - viewportEncompassingOBB.topLeft += oneTileDirU * float32_t(viewportTileRange.topLeftTile.x); - viewportEncompassingOBB.topLeft += oneTileDirV * float32_t(viewportTileRange.topLeftTile.y); + OrientedBoundingBox2D viewportEncompassingOBB = imageStreamingState->georeferencedImageParams.worldspaceOBB; + // The image's worldspace dirU corresponds to `imageExtents.x` texels of the image, therefore one image texel in the U direction has a worldspace span of `dirU / imageExtents.x`. + // One mip 0 tiles therefore spans `dirU * GeoreferencedImageTileSize/ imageExtents.x`. A mip `n` tile spans `2^n` this amount, since each texel at that mip level spans + // `2^n` mip texels. Therefore the dirU offset from the image wordlspace's topLeft of the tile of index `viewportTileRange.topLeftTile.x` at mip level `currentMappedRegion.baseMipLevel` can be calculated as + const uint32_t oneTileTexelSpan = GeoreferencedImageTileSize << imageStreamingState->currentMappedRegion.baseMipLevel; + viewportEncompassingOBB.topLeft += imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU * float32_t(viewportTileRange.topLeftTile.x * oneTileTexelSpan) / float32_t(imageStreamingState->georeferencedImageParams.imageExtents.x); + // Same reasoning for offset in v direction + const float32_t2 dirV = float32_t2(imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.y, -imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.x) * imageStreamingState->georeferencedImageParams.worldspaceOBB.aspectRatio; + viewportEncompassingOBB.topLeft += dirV * float32_t(viewportTileRange.topLeftTile.y * oneTileTexelSpan) / float32_t(imageStreamingState->georeferencedImageParams.imageExtents.y); const uint32_t2 viewportTileLength = viewportTileRange.bottomRightTile - viewportTileRange.topLeftTile + uint32_t2(1, 1); // If the last tile is visible, we use the fractional span for the last tile. Otherwise it's just a normal tile const bool2 isLastTileVisible = imageStreamingState->isLastTileVisible(viewportTileRange.bottomRightTile); - const float32_t2 lastGPUImageTileFractionalSpan = { isLastTileVisible.x ? imageStreamingState->lastImageTileFractionalSpan.x : 1.f, isLastTileVisible.y ? imageStreamingState->lastImageTileFractionalSpan.y : 1.f }; - - viewportEncompassingOBB.dirU = oneTileDirU * (float32_t(viewportTileLength.x - 1u) + lastGPUImageTileFractionalSpan.x); - viewportEncompassingOBB.aspectRatio = (float32_t(viewportTileLength.y - 1u) + lastGPUImageTileFractionalSpan.y) / (float32_t(viewportTileLength.x - 1u) + lastGPUImageTileFractionalSpan.x); - - // UV logic currently ONLY works when the image not only fits an integer amount of tiles, but also when it's a PoT amount of them - // (this means every mip level also gets an integer amount of tiles). - // When porting to n4ce, for the image to fit an integer amount of tiles (instead of rewriting the logic) we can just pad the right/bottom sides with alpha=0 pixels - // The UV logic will have to change to consider what happens to the last loaded tile (or, alternatively, we can also fill the empty tiles with alpha=0 pixels) - - // Compute minUV, maxUV - const float32_t2 uvPerTile = float32_t2(1.f, 1.f) / float32_t2(imageStreamingState->gpuImageSideLengthTiles, imageStreamingState->gpuImageSideLengthTiles); - const float32_t2 minUV = uvPerTile * float32_t2(((viewportTileRange.topLeftTile - imageStreamingState->currentMappedRegion.topLeftTile) + imageStreamingState->gpuImageTopLeft) % imageStreamingState->gpuImageSideLengthTiles); - float32_t2 maxUV = minUV + uvPerTile * float32_t2(viewportTileLength - 1u); - // uvPerTile is the uv per GeoreferencedImageTileSize pixels. Since the last tile might not be fully resident with pixels, we don't add the uv for it above and add the proper uv it should be sampled at here - maxUV += uvPerTile * lastGPUImageTileFractionalSpan; + const uint32_t2 lastGPUImageTileTexels = { isLastTileVisible.x ? imageStreamingState->lastImageTileTexels.x : GeoreferencedImageTileSize, isLastTileVisible.y ? imageStreamingState->lastImageTileTexels.y : GeoreferencedImageTileSize }; + + // Instead of grouping per tile like in the offset case, we group per texel: the same reasoning leads to a single texel at current mip level having a span of `dirU * 2^(currentMappedRegion.baseMipLevel)/ imageExtents.x` + // in the U direction. Therefore the span in worldspace of the OBB we construct is just this number multiplied by the number of gpu texels spanned to draw. + // The number of texels is just `GeoreferencedImageTileSize` times the number of full tiles (all but the last) + the number of texels of the last tile, which might not be a full tile if near the right boundary + viewportEncompassingOBB.dirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU * float32_t((GeoreferencedImageTileSize * (viewportTileLength.x - 1) + lastGPUImageTileTexels.x) << imageStreamingState->currentMappedRegion.baseMipLevel) / float32_t(imageStreamingState->georeferencedImageParams.imageExtents.x); + // Simply number of gpu texels in the y direction divided by number of texels in the x direction. + viewportEncompassingOBB.aspectRatio = float32_t(GeoreferencedImageTileSize * (viewportTileLength.y - 1) + lastGPUImageTileTexels.y) / float32_t(GeoreferencedImageTileSize * (viewportTileLength.x - 1) + lastGPUImageTileTexels.x); + + // GPU tile corresponding to the real image tile containing the viewport top left - we can let it be negative since wrapping mode is repeat, negative tiles are correct modulo `gpuImageSideLengthTiles` + const uint32_t2 viewportTopLeftGPUTile = viewportTileRange.topLeftTile - imageStreamingState->currentMappedRegion.topLeftTile + imageStreamingState->gpuImageTopLeft; + // To get the uv corresponding to the above, simply divide the tile index by the number of tiles in the GPU image. + // However to consider a one-texel shift inward (to prevent color bleeding at the edges) we map both numerator and denominator to texel units (by multiplying with `GeoreferencedImageTileSize`) and add + // a single texel to the numerator + const float32_t2 minUV = float32_t2(GeoreferencedImageTileSize * viewportTopLeftGPUTile + 1u) / float32_t(GeoreferencedImageTileSize * imageStreamingState->gpuImageSideLengthTiles); + // If the image was perfectly partitioned into tiles, we could get the maxUV in a similar fashion to minUV: Just compute `bottomRightTile - currentMappedRegion.topLeftTile` to get a tile + // then divide by `gpuImageSideLengthTiles` to get a coord in `(0,1)` (correct modulo `gpuImageSideLengthTiles`) + // However the last tile might not have all `GeoreferencedImageTileSize` texels in it. Therefore maxUV computation can be separated into a UV contribution by all full tiles (all but the last) + a contribution from the last tile + // UV contribution from full tiles will therefore be `(bottomRightTile - currentMappedRegion.topLeftTile) / gpuImageSideLengthTiles` while last tile contribution will be + // `lastGPUImageTileTexels / (gpuImageSideLengthTiles * GeoreferencedImageTileSize)`. We group terms below to reduce number of float ops. + // Again we first map to texel units then subtract one to add a single texel uv shift. + const uint32_t2 viewportBottomRightGPUTile = viewportTileRange.bottomRightTile - imageStreamingState->currentMappedRegion.topLeftTile + imageStreamingState->gpuImageTopLeft; + const float32_t2 maxUV = float32_t2(GeoreferencedImageTileSize * viewportBottomRightGPUTile + lastGPUImageTileTexels - 1u) / float32_t(GeoreferencedImageTileSize * imageStreamingState->gpuImageSideLengthTiles); return TileUploadData{ std::move(tiles), viewportEncompassingOBB, minUV, maxUV }; } @@ -2919,13 +2926,12 @@ GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const const float64_t2 maxTop = nbl::hlsl::max(topLeftTileLattice, topRightTileLattice); const float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice); - // Edge case padding - there seems to be some numerical error going on when really close to tile boundaries - const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom) + float64_t2(0.5, 0.5); + const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom); // Floor them to get an integer coordinate (index) for the tiles they fall in int32_t2 minAllFloored = nbl::hlsl::floor(minAll); int32_t2 maxAllFloored = nbl::hlsl::floor(maxAll); - + // We're undoing a previous division. Could be avoided but won't restructure the code atp. // Here we compute how many image pixels each side of the viewport spans const float64_t2 viewportSideUImageTexelsVector = float64_t(GeoreferencedImageTileSize) * (topRightTileLattice - topLeftTileLattice); @@ -2940,20 +2946,19 @@ GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const const float64_t viewportSideVImageTexels = nbl::hlsl::length(viewportSideVImageTexelsVector); // Mip is decided based on max of these - float64_t pixelRatio = nbl::hlsl::max(viewportSideUImageTexels / viewportExtents.x, - viewportSideVImageTexels / viewportExtents.y); + float64_t pixelRatio = nbl::hlsl::max(viewportSideUImageTexels / viewportExtent.x, viewportSideVImageTexels / viewportExtent.y); pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; - + // DEBUG - Clamped at 0 for magnification { std::cout << "Real mip level: " << nbl::hlsl::log2(pixelRatio) << std::endl; } - + GeoreferencedImageTileRange retVal = {}; // Clamp mip level so we don't consider tiles that are too small along one dimension // If on a pathological case this gets too expensive because the GPU starts sampling a lot, we can consider changing this, but I doubt that will happen retVal.baseMipLevel = nbl::hlsl::min(nbl::hlsl::findMSB(uint32_t(nbl::hlsl::floor(pixelRatio))), int32_t(imageStreamingState->maxMipLevel)); - + // Current tiles are measured in mip 0. We want the result to measure mip `retVal.baseMipLevel` tiles. Each next mip level divides by 2. minAllFloored >>= retVal.baseMipLevel; maxAllFloored >>= retVal.baseMipLevel; diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 20b309176..6c28d6135 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -203,9 +203,9 @@ struct DrawResourcesFiller typedef std::function SubmitFunc; void setSubmitDrawsFunction(const SubmitFunc& func); - void setViewportExtents(const uint32_t2 _viewportExtents) + void setViewportExtent(const uint32_t2 _viewportExtent) { - viewportExtents = _viewportExtents; + viewportExtent = _viewportExtent; } // DrawResourcesFiller needs to access these in order to allocate GPUImages and write the to their correct descriptor set binding @@ -422,7 +422,7 @@ struct DrawResourcesFiller * @return true if the image was successfully cached and is ready for use; false if allocation failed. * [TODO]: should be internal protected member function. */ - bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const std::filesystem::path imageStoragePath, SIntendedSubmitInfo& intendedNextSubmit); + bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, GeoreferencedImageParams&& params, SIntendedSubmitInfo& intendedNextSubmit); // [TODO]: should be internal protected member function. bool queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy); @@ -431,7 +431,7 @@ struct DrawResourcesFiller void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); // This function must be called immediately after `ensureGeoreferencedImageAvailability_AllocateIfNeeded` for the same imageID. - void addGeoreferencedImage(image_id imageID, const float64_t3x3& NDCToWorld, OrientedBoundingBox2D&& worldspaceOBB, SIntendedSubmitInfo& intendedNextSubmit); + void addGeoreferencedImage(image_id imageID, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit); /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. @@ -742,7 +742,7 @@ struct DrawResourcesFiller * @param[out] outImageType Indicates whether the image should be fully resident or streamed. * @param[in] params Parameters for the georeferenced image */ - ImageType determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const std::filesystem::path imageStoragePath); + ImageType determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const GeoreferencedImageParams& params); /** * @brief Used to implement both `drawHatch` and `drawFixedGeometryHatch` without exposing the transformation type parameter @@ -985,6 +985,6 @@ struct DrawResourcesFiller std::unordered_map> streamedImageCopies; // Viewport state - uint32_t2 viewportExtents = {}; + uint32_t2 viewportExtent = {}; }; diff --git a/62_CAD/Images.cpp b/62_CAD/Images.cpp index 131b836a3..8b7dd1075 100644 --- a/62_CAD/Images.cpp +++ b/62_CAD/Images.cpp @@ -2,21 +2,10 @@ using namespace nbl::hlsl; -smart_refctd_ptr GeoreferencedImageStreamingState::create(std::filesystem::path imageStoragePath, const uint32_t2 imageExtents, const uint32_t gpuImageSideLengthTiles) +smart_refctd_ptr GeoreferencedImageStreamingState::create(GeoreferencedImageParams&& _georeferencedImageParams, uint32_t TileSize) { smart_refctd_ptr retVal(new GeoreferencedImageStreamingState{}); - - retVal->imageStoragePath = imageStoragePath; - retVal->imageExtents = imageExtents; - retVal->gpuImageSideLengthTiles = gpuImageSideLengthTiles; - - return retVal; -} - -void GeoreferencedImageStreamingState::initialize(OrientedBoundingBox2D&& _worldspaceOBB, uint32_t TileSize) -{ - worldspaceOBB = std::move(_worldspaceOBB); - + retVal->georeferencedImageParams = std::move(_georeferencedImageParams); // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point // 2. Transform this displacement vector into the coordinates in the basis {dirU, dirV} (worldspace vectors that span the sides of the image). // The composition of these matrices therefore transforms any point in worldspace into uv coordinates in imagespace @@ -27,30 +16,32 @@ void GeoreferencedImageStreamingState::initialize(OrientedBoundingBox2D&& _world // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression // Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal) - const float64_t2 dirU = worldspaceOBB.dirU; - const float64_t2 dirV = float64_t2(dirU.y, -dirU.x) * float64_t(worldspaceOBB.aspectRatio); + const float64_t2 dirU = retVal->georeferencedImageParams.worldspaceOBB.dirU; + const float64_t2 dirV = float64_t2(dirU.y, -dirU.x) * float64_t(retVal->georeferencedImageParams.worldspaceOBB.aspectRatio); const float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); const float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); const float64_t2 firstRow = dirU / dirULengthSquared; const float64_t2 secondRow = dirV / dirVLengthSquared; - const float64_t2 displacement = -worldspaceOBB.topLeft; + const float64_t2 displacement = -retVal->georeferencedImageParams.worldspaceOBB.topLeft; // This is the same as multiplying the change of basis matrix by the displacement vector const float64_t postRotatedShiftX = nbl::hlsl::dot(firstRow, displacement); const float64_t postRotatedShiftY = nbl::hlsl::dot(secondRow, displacement); // Put them all together - world2UV = float64_t2x3(firstRow.x, firstRow.y, postRotatedShiftX, secondRow.x, secondRow.y, postRotatedShiftY); + retVal->world2UV = float64_t2x3(firstRow.x, firstRow.y, postRotatedShiftX, secondRow.x, secondRow.y, postRotatedShiftY); // Also set the maxMipLevel - to keep stuff simple, we don't consider having less than one tile per dimension // If you're zoomed out enough then at that point the whole image is just sampled as one tile along that dimension // In pathological cases, such as images that are way bigger on one side than the other, this could cause aliasing and slow down sampling if zoomed out too much. // If we were ever to observe such pathological cases, then maybe we should consider doing something else here. For example, making the loader able to handle different tile lengths per dimension // (so for example a 128x64 tile) but again for now it should be left as-is. - uint32_t2 maxMipLevels = nbl::hlsl::findMSB(nbl::hlsl::roundUpToPoT(imageExtents / TileSize)); - maxMipLevel = nbl::hlsl::min(maxMipLevels.x, maxMipLevels.y); + uint32_t2 maxMipLevels = nbl::hlsl::findMSB(nbl::hlsl::roundUpToPoT(retVal->georeferencedImageParams.imageExtents / TileSize)); + retVal->maxMipLevel = nbl::hlsl::min(maxMipLevels.x, maxMipLevels.y); - fullImageTileLength = (imageExtents - 1u) / TileSize + 1u; + retVal->fullImageTileLength = (retVal->georeferencedImageParams.imageExtents - 1u) / TileSize + 1u; + + return retVal; } void GeoreferencedImageStreamingState::ensureMappedRegionCoversViewport(const GeoreferencedImageTileRange& viewportTileRange) @@ -93,6 +84,8 @@ void GeoreferencedImageStreamingState::remapCurrentRegion(const GeoreferencedIma // TODO: Here we would move some mip 0 tiles to mip 1 image to save the work of reuploading them, reflect that in the tracked tiles } currentMappedRegion = viewportTileRange; + // Roughly center the viewport in the mapped region + currentMappedRegion.topLeftTile = nbl::hlsl::max(uint32_t2(0, 0), currentMappedRegion.topLeftTile - (gpuImageSideLengthTiles / 2)); // We can expand the currentMappedRegion to make it as big as possible, at no extra cost since we only upload tiles on demand // Since we use toroidal updating it's kinda the same which way we expand the region. We first try to make the extent be `gpuImageSideLengthTiles` currentMappedRegion.bottomRightTile = currentMappedRegion.topLeftTile + uint32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) - uint32_t2(1, 1); diff --git a/62_CAD/Images.h b/62_CAD/Images.h index cb135b8c1..df4186020 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -25,6 +25,14 @@ enum class ImageType : uint8_t GEOREFERENCED_FULL_RESOLUTION // For smaller georeferenced images, entire image is eventually loaded and not streamed or view-dependant }; +struct GeoreferencedImageParams +{ + OrientedBoundingBox2D worldspaceOBB = {}; + uint32_t2 imageExtents = {}; + asset::E_FORMAT format = {}; + std::filesystem::path storagePath = {}; +}; + /** * @class ImagesMemorySubAllocator * @brief A memory sub-allocator designed for managing sub-allocations within a pre-allocated GPU memory arena for images. @@ -118,21 +126,12 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted protected: /* - * @brief Create an image streaming state and only set the fields that are passed as parameters + * @brief Create a streaming state for a georeferenced image * - * @param _imageStoragePath Path to the file where image data is stored. - * @param imageExtents Extents of the image in texels. Some internal algos of the class require knowing this and the class doesn-t have access to the image loader - * @param _gpuImageSideLengthTiles Length of each side of the gpu image, in tiles (it's always square) - */ - static smart_refctd_ptr create(std::filesystem::path imageStoragePath, const uint32_t2 imageExtents, const uint32_t gpuImageSideLengthTiles); - - /* - * @brief Set most fields for the imageStreamingState - * - * @param _worldspaceOBB Worldspace oriented bounding box where image needs to be drawn. + * @param _georeferencedImageParams Info relating to the georeferenced image for which to create a streaming state. * @param TileSize Size of the tiles used to break up the image. Also size of the tiles in the GPU image backing this georeferenced image. - */ - void initialize(OrientedBoundingBox2D&& _worldspaceOBB, uint32_t TileSize); + */ + static smart_refctd_ptr create(GeoreferencedImageParams&& _georeferencedImageParams, uint32_t TileSize); // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial @@ -140,14 +139,14 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // @brief Transform worldspace coordinates into UV coordinates into the image float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoords) const { return nbl::hlsl::mul(world2UV, worldCoords); } // @brief Transform worldspace coordinates into texel coordinates into the image - float64_t2 transformWorldCoordsToTexelCoords(const float64_t3 worldCoords) const { return float64_t2(imageExtents) * transformWorldCoordsToUV(worldCoords); } + float64_t2 transformWorldCoordsToTexelCoords(const float64_t3 worldCoords) const { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoords); } // @brief Transform worldspace coordinates into tile coordinates into the image, where the image is broken up into tiles of size `TileSize` float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoords, const uint32_t TileSize) const { return (1.0 / TileSize) * transformWorldCoordsToTexelCoords(worldCoords); } /* * @brief The GPU image backs a mapped region which is a rectangular sub-region of the original image. Note that a region being mapped does NOT imply it's currently resident in GPU memory. - * To display the iomage on the screen, before even checking that the tiles needed to render the portion of the image currently visible are resident in GPU memory, we first must ensure that - * said region is included (as a sub-rectangle) in the mapped region. + * To display the iomage on the screen, before even checking that the tiles needed to render the portion of the image currently visible are resident in GPU memory, we first must ensure that + * said region is included (as a sub-rectangle) in the mapped region. * * @param viewportTileRange Range of tiles + mip level indicating what sub-rectangle (and at which mip level) of the image is going to be visible from the viewport */ @@ -178,7 +177,7 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted /* * @brief Given a tile range covering the viewport, returns which tiles (at the mip level of the current mapped region) need to be made resident to draw it, and to which tile of the gpu image each tile should be * uploaded to - * + * * @param viewportTileRange Range of tiles + mip level indicating a sub-rectangle of the image covering the viewport */ core::vector tilesToLoad(const GeoreferencedImageTileRange& viewportTileRange) const; @@ -195,29 +194,27 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted const uint32_t2 lastTileIndex = getLastTileIndex(currentMappedRegion.baseMipLevel); return bool2(lastTileIndex.x == viewportBottomRightTile.x, lastTileIndex.y == viewportBottomRightTile.y); } + + GeoreferencedImageParams georeferencedImageParams = {}; std::vector> currentMappedRegionOccupancy = {}; // Sidelength of the gpu image, in mip 0 tiles that are `TileSize` (creation parameter) texels wide uint32_t gpuImageSideLengthTiles = {}; // We establish a max mipLevel for the image, which is the mip level at which any of width, height fit in a single tile uint32_t maxMipLevel = {}; - // Dimensions of the stored image, in texels - uint32_t2 imageExtents = {}; // Number of mip 0 tiles needed to cover the whole image, counting the last tile that might be fractional if the image size is not perfectly divisible by TileSize uint32_t2 fullImageTileLength = {}; // Indicates on which tile of the gpu image the current mapped region's `topLeft` resides uint32_t2 gpuImageTopLeft = {}; - // Worldspace bounding box for the image - OrientedBoundingBox2D worldspaceOBB; // Converts a point (z = 1) in worldspace to UV coordinates in image space (origin shifted to topleft of the image) float64_t2x3 world2UV = {}; + // If the image dimensions are not exactly divisible by `TileSize`, then the last tile along a dimension only holds a proportion of `lastTileFraction` pixels along that dimension + float64_t lastTileFraction = {}; // Reflects what fraction of a FULL tile the LAST tile in the image at the current mip level actually spans. // It only gets set when necessary, and should always be updated correctly before being used, since it's related to the current `baseMipLevel` of the `currentMappedRegion` - float32_t2 lastImageTileFractionalSpan = {1.f, 1.f}; + uint32_t2 lastImageTileTexels = {}; // Set mip level to extreme value so it gets recreated on first iteration GeoreferencedImageTileRange currentMappedRegion = { .baseMipLevel = std::numeric_limits::max() }; - // Path to the file where image data is stored - std::filesystem::path imageStoragePath; }; struct CachedImageRecord diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index a89fb0bbd..fc0ad404d 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -569,7 +569,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio { drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger)); - drawResourcesFiller.setViewportExtents(uint32_t2(m_window->getWidth(), m_window->getHeight())); + drawResourcesFiller.setViewportExtent(uint32_t2(m_window->getWidth(), m_window->getHeight())); size_t maxImagesMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB size_t maxBufferMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB @@ -3883,6 +3883,12 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio constexpr float64_t3 bottomLeftViewportH = float64_t3(-1.0, 1.0, 1.0); constexpr float64_t3 bottomRightViewportH = float64_t3(1.0, 1.0, 1.0); + GeoreferencedImageParams georeferencedImageParams; + georeferencedImageParams.storagePath = georeferencedImagePath; + georeferencedImageParams.format = drawResourcesFiller.queryGeoreferencedImageFormat(georeferencedImagePath); + georeferencedImageParams.imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath); + + image_id georefImageID = 6996; // Position at topLeft viewport auto projectionToNDC = m_Camera.constructViewProjection(); @@ -3892,8 +3898,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio auto inverseViewProj = nbl::hlsl::inverse(projectionToNDC); const static auto startingTopLeft = nbl::hlsl::mul(inverseViewProj, topLeftViewportH); - OrientedBoundingBox2D georefImageOBB = {}; - georefImageOBB.topLeft = startingTopLeft; + georeferencedImageParams.worldspaceOBB.topLeft = startingTopLeft; // Get 1 viewport pixel to match `startingImagePixelsPerViewportPixel` pixels of the image by choosing appropriate dirU const static float64_t startingImagePixelsPerViewportPixels = 1.0; @@ -3901,18 +3906,18 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const static auto dirU = startingViewportWidthVector * float64_t(drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath).x) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); // DEBUG - georefImageOBB.topLeft += float32_t2(startingViewportWidthVector - dirU); + //georefImageOBB.topLeft += float32_t2(startingViewportWidthVector - dirU); - georefImageOBB.dirU = dirU; + georeferencedImageParams.worldspaceOBB.dirU = dirU; const uint32_t2 imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath); - georefImageOBB.aspectRatio = float32_t(imageExtents.y) / imageExtents.x; + georeferencedImageParams.worldspaceOBB.aspectRatio = float32_t(imageExtents.y) / imageExtents.x; // Unnecessary but should go into a callback if window can change dimensions during execution - drawResourcesFiller.setViewportExtents(uint32_t2(m_window->getWidth(), m_window->getHeight())); + drawResourcesFiller.setViewportExtent(uint32_t2(m_window->getWidth(), m_window->getHeight())); - drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(georefImageID, georeferencedImagePath, intendedNextSubmit); + drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(georefImageID, std::move(georeferencedImageParams), intendedNextSubmit); - drawResourcesFiller.addGeoreferencedImage(georefImageID, inverseViewProj, std::move(georefImageOBB), intendedNextSubmit); + drawResourcesFiller.addGeoreferencedImage(georefImageID, inverseViewProj, intendedNextSubmit); } } From 1c128978a114e1f4edd125d63e7c50d0558a8f7f Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 14 Oct 2025 12:02:35 -0300 Subject: [PATCH 26/29] Linking errors on TextRendering --- 62_CAD/DrawResourcesFiller.cpp | 1600 ++++++++++++++++---------------- 62_CAD/DrawResourcesFiller.h | 713 +++++++------- 62_CAD/Images.cpp | 248 ++++- 62_CAD/Images.h | 324 +++++-- 62_CAD/main.cpp | 47 +- 5 files changed, 1628 insertions(+), 1304 deletions(-) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index b030981b0..88f4914e9 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -1,7 +1,10 @@ #include "DrawResourcesFiller.h" +using namespace nbl; + DrawResourcesFiller::DrawResourcesFiller() -{} +{ +} DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue, core::smart_refctd_ptr&& logger) : m_utilities(std::move(utils)), @@ -27,6 +30,7 @@ void DrawResourcesFiller::setTexturesDescriptorSetAndBinding(core::smart_refctd_ bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize) { + // requiredImageMemorySize = core::alignUp(50'399'744 * 2, 1024); // single memory allocation sectioned into images+buffers (images start at offset=0) const size_t adjustedImagesMemorySize = core::alignUp(requiredImageMemorySize, GPUStructsMaxNaturalAlignment); const size_t adjustedBuffersMemorySize = core::max(requiredBufferMemorySize, getMinimumRequiredResourcesBufferSize()); @@ -39,7 +43,7 @@ bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, s resourcesGPUBuffer->setObjectDebugName("drawResourcesBuffer"); IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = resourcesGPUBuffer->getMemoryReqs(); - + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuBufferMemoryReqs = resourcesGPUBuffer->getMemoryReqs(); const bool memoryRequirementsMatch = (logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits() & gpuBufferMemoryReqs.memoryTypeBits) != 0 && // should have device local memory compatible @@ -50,7 +54,7 @@ bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, s m_logger.log("Shouldn't happen: Buffer Memory Requires Dedicated Allocation or can't biind to device local memory.", nbl::system::ILogger::ELL_ERROR); return false; } - + const auto& memoryProperties = logicalDevice->getPhysicalDevice()->getMemoryProperties(); uint32_t memoryTypeIdx = ~0u; @@ -71,7 +75,7 @@ bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, s }; allocation = logicalDevice->allocate(allocationInfo); - + if (allocation.isValid()) break; } @@ -102,7 +106,7 @@ bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, s .buffer = resourcesGPUBuffer.get(), .binding = { .memory = buffersMemoryArena.memory.get(), - .offset = buffersMemoryArena.offset, + .offset = buffersMemoryArena.offset, } }; @@ -137,7 +141,10 @@ bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevic while ((currentBufferSize + currentImageSize) >= minimumAcceptableSize && numTries < maxTries) { if (allocateDrawResources(logicalDevice, currentBufferSize, currentImageSize)) + { + m_logger.log("Successfully allocated memory for images (%zu) and buffers (%zu).", system::ILogger::ELL_INFO, currentImageSize, currentBufferSize); return true; + } currentBufferSize = (currentBufferSize * (100 - reductionPercent)) / 100; currentImageSize = (currentImageSize * (100 - reductionPercent)) / 100; @@ -153,13 +160,13 @@ bool DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui { // TODO: Make this function failable and report insufficient memory asset::E_FORMAT msdfFormat = MSDFTextureFormat; - asset::VkExtent3D MSDFsExtent = { msdfsExtent.x, msdfsExtent.y, 1u }; + asset::VkExtent3D MSDFsExtent = { msdfsExtent.x, msdfsExtent.y, 1u }; if (maxMSDFs > logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers) { m_logger.log("requested maxMSDFs is greater than maxImageArrayLayers. lowering the limit...", nbl::system::ILogger::ELL_WARNING); maxMSDFs = logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers; } - + IPhysicalDevice::SImageFormatPromotionRequest promotionRequest = {}; promotionRequest.originalFormat = msdfFormat; promotionRequest.usages = {}; @@ -171,7 +178,7 @@ bool DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui imgInfo.format = msdfFormat; imgInfo.type = IGPUImage::ET_2D; imgInfo.extent = MSDFsExtent; - imgInfo.mipLevels = MSDFMips; + imgInfo.mipLevels = MSDFMips; imgInfo.arrayLayers = maxMSDFs; imgInfo.samples = asset::ICPUImage::ESCF_1_BIT; imgInfo.flags = asset::IImage::E_CREATE_FLAGS::ECF_NONE; @@ -217,7 +224,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const Line return; setActiveLineStyle(lineStyleInfo); - + beginMainObject(MainObjectType::POLYLINE, TransformationType::TT_NORMAL); drawPolyline(polyline, intendedNextSubmit); endMainObject(); @@ -229,7 +236,7 @@ void DrawResourcesFiller::drawFixedGeometryPolyline(const CPolylineBase& polylin return; setActiveLineStyle(lineStyleInfo); - + pushCustomProjection(getFixedGeometryFinalTransformationMatrix(transformation, transformationType)); beginMainObject(MainObjectType::POLYLINE, transformationType); drawPolyline(polyline, intendedNextSubmit); @@ -246,7 +253,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, SIntendedS assert(false); return; } - + const auto sectionsCount = polyline.getSectionsCount(); uint32_t currentSectionIdx = 0u; @@ -297,7 +304,7 @@ void DrawResourcesFiller::drawTriangleMesh( return; } - DrawCallData drawCallData = {}; + DrawCallData drawCallData = {}; drawCallData.isDTMRendering = true; ICPUBuffer::SCreationParams geometryBuffParams; @@ -306,7 +313,7 @@ void DrawResourcesFiller::drawTriangleMesh( const auto& indexBuffer = mesh.getIndices(); const auto& vertexBuffer = mesh.getVertices(); assert(indexBuffer.size() == vertexBuffer.size()); // We don't have any vertex re-use due to other limitations at the moemnt. - + const uint32_t numTriangles = indexBuffer.size() / 3u; uint32_t trianglesUploaded = 0; @@ -326,18 +333,18 @@ void DrawResourcesFiller::drawTriangleMesh( void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; // the actual bda address will be determined only after all copies are finalized, later we will do += `baseBDAAddress + geometryInfo.bufferOffset` // the - is a small hack because index buffer grows but vertex buffer needs to start from 0, remove that once we either get rid of the index buffer or implement an algorithm that can have vertex reuse - drawCallData.dtm.triangleMeshVerticesBaseAddress = geometryBufferOffset - (sizeof(CTriangleMesh::vertex_t) * trianglesUploaded * 3); + drawCallData.dtm.triangleMeshVerticesBaseAddress = geometryBufferOffset - (sizeof(CTriangleMesh::vertex_t) * trianglesUploaded * 3); memcpy(dst, &vertexBuffer[trianglesUploaded * 3u], vtxBuffByteSize); - geometryBufferOffset += vtxBuffByteSize; + geometryBufferOffset += vtxBuffByteSize; // Copy IndexBuffer dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; drawCallData.dtm.indexBufferOffset = geometryBufferOffset; memcpy(dst, &indexBuffer[trianglesUploaded * 3u], indexBuffByteSize); geometryBufferOffset += indexBuffByteSize; - + trianglesUploaded += trianglesToUpload; - + drawCallData.dtm.triangleMeshMainObjectIndex = mainObjectIdx; drawCallData.dtm.indexCount = trianglesToUpload * 3u; drawCalls.push_back(drawCallData); @@ -359,11 +366,11 @@ void DrawResourcesFiller::drawTriangleMesh( // TODO[Erfan]: Makes more sense if parameters are: solidColor + fillPattern + patternColor void DrawResourcesFiller::drawHatch( - const Hatch& hatch, - const float32_t4& foregroundColor, - const float32_t4& backgroundColor, - const HatchFillPattern fillPattern, - SIntendedSubmitInfo& intendedNextSubmit) + const Hatch& hatch, + const float32_t4& foregroundColor, + const float32_t4& backgroundColor, + const HatchFillPattern fillPattern, + SIntendedSubmitInfo& intendedNextSubmit) { // TODO[Optimization Idea]: don't draw hatch twice, we now have color storage buffer and we can treat rendering hatches like a procedural texture (requires 2 colors so no more abusing of linestyle for hatches) @@ -374,10 +381,10 @@ void DrawResourcesFiller::drawHatch( } void DrawResourcesFiller::drawHatch( - const Hatch& hatch, - const float32_t4& color, - const HatchFillPattern fillPattern, - SIntendedSubmitInfo& intendedNextSubmit) + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + SIntendedSubmitInfo& intendedNextSubmit) { drawHatch_impl(hatch, color, fillPattern, intendedNextSubmit); } @@ -388,13 +395,13 @@ void DrawResourcesFiller::drawHatch(const Hatch& hatch, const float32_t4& color, } void DrawResourcesFiller::drawFixedGeometryHatch( - const Hatch& hatch, - const float32_t4& foregroundColor, - const float32_t4& backgroundColor, - const HatchFillPattern fillPattern, - const float64_t3x3& transformation, - TransformationType transformationType, - SIntendedSubmitInfo& intendedNextSubmit) + const Hatch& hatch, + const float32_t4& foregroundColor, + const float32_t4& backgroundColor, + const HatchFillPattern fillPattern, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit) { // TODO[Optimization Idea]: don't draw hatch twice, we now have color storage buffer and we can treat rendering hatches like a procedural texture (requires 2 colors so no more abusing of linestyle for hatches) @@ -467,13 +474,13 @@ void DrawResourcesFiller::drawHatch_impl( } void DrawResourcesFiller::drawFontGlyph( - nbl::ext::TextRendering::FontFace* fontFace, - uint32_t glyphIdx, - float64_t2 topLeft, - float32_t2 dirU, - float32_t aspectRatio, - float32_t2 minUV, - SIntendedSubmitInfo& intendedNextSubmit) + nbl::ext::TextRendering::FontFace* fontFace, + uint32_t glyphIdx, + float64_t2 topLeft, + float32_t2 dirU, + float32_t aspectRatio, + float32_t2 minUV, + SIntendedSubmitInfo& intendedNextSubmit) { uint32_t textureIdx = InvalidTextureIndex; const MSDFInputInfo msdfInput = MSDFInputInfo(fontFace->getHash(), glyphIdx); @@ -488,7 +495,7 @@ void DrawResourcesFiller::drawFontGlyph( assert(false); return; } - + if (textureIdx != InvalidTextureIndex) { GlyphInfo glyphInfo = GlyphInfo(topLeft, dirU, aspectRatio, textureIdx, minUV); @@ -531,7 +538,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s { // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena along with it's allocated array slot from the suballocated descriptor set evictCallback(staticImage.imageID, *cachedImageRecord); - + // Instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image // imagesCache->erase(imageID); // cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); @@ -541,7 +548,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s { // Doesn't need image recreation, we'll use the same array index in descriptor set + the same bound memory. // reset it's state + update the cpu image used for copying. - cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; cachedImageRecord->staticCPUImage = staticImage.cpuImage; } } @@ -567,14 +574,14 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s IGPUImage::SCreationParams imageParams = {}; imageParams = staticImage.cpuImage->getCreationParameters(); - imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; + imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT | IGPUImage::EUF_SAMPLED_BIT; // promote format because RGB8 and friends don't actually exist in HW { const IPhysicalDevice::SImageFormatPromotionRequest request = { .originalFormat = imageParams.format, .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageParams.usage) }; - imageParams.format = physDev->promoteImageFormat(request,imageParams.tiling); + imageParams.format = physDev->promoteImageFormat(request, imageParams.tiling); } // Attempt to create a GPU image and image view for this texture. @@ -584,6 +591,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s { cachedImageRecord->type = ImageType::STATIC; cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN cachedImageRecord->allocationOffset = allocResults.allocationOffset; cachedImageRecord->allocationSize = allocResults.allocationSize; @@ -625,8 +633,8 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s cachedImageRecord->arrayIndex = InvalidTextureIndex; } } - - + + // cached or just inserted, we update the lastUsedFrameIndex cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; @@ -652,149 +660,6 @@ bool DrawResourcesFiller::ensureMultipleStaticImagesAvailability(std::spangetLogicalDevice(); - auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); - - // Try inserting or updating the image usage in the cache. - // If the image is already present, updates its semaphore value. - auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; - CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); - - // TODO: Function call that gets you image creaation params based on the georeferenced image, it will also get you the GEOREFERENCED TYPE - IGPUImage::SCreationParams imageCreationParams = {}; - ImageType imageType = determineGeoreferencedImageCreationParams(imageCreationParams, params); - - // imageParams = cpuImage->getCreationParameters(); - imageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; - // promote format because RGB8 and friends don't actually exist in HW - { - const IPhysicalDevice::SImageFormatPromotionRequest request = { - .originalFormat = imageCreationParams.format, - .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageCreationParams.usage) - }; - imageCreationParams.format = physDev->promoteImageFormat(request,imageCreationParams.tiling); - } - - // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema - // But we need to check if the cached image needs resizing/recreation. - if (cachedImageRecord->arrayIndex != InvalidTextureIndex) - { - // found in cache, but does it require resize? recreation? - if (cachedImageRecord->gpuImageView) - { - auto imgViewParams = cachedImageRecord->gpuImageView->getCreationParameters(); - if (imgViewParams.image) - { - const auto cachedParams = static_cast(imgViewParams.image->getCreationParameters()); - const auto cachedImageType = cachedImageRecord->type; - // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus - const auto currentParams = static_cast(imageCreationParams); - const bool needsRecreation = cachedImageType != imageType || cachedParams != currentParams; - if (needsRecreation) - { - // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena. - evictCallback(imageID, *cachedImageRecord); - - // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image - *cachedImageRecord = CachedImageRecord(currentFrameIndex); - // imagesCache->erase(imageID); - // cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); - } - } - else - { - m_logger.log("Cached georeferenced image has invalid gpu image.", nbl::system::ILogger::ELL_ERROR); - } - } - else - { - m_logger.log("Cached georeferenced image has invalid gpu image view.", nbl::system::ILogger::ELL_ERROR); - } - } - - // in which case we don't queue anything for upload, and return the idx - if (cachedImageRecord->arrayIndex == InvalidTextureIndex) - { - // This is a new image (cache miss). Allocate a descriptor index for it. - cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; - // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. - suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint - - if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) - { - // Attempt to create a GPU image and image view for this texture. - ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(imageID)); - - if (allocResults.isValid()) - { - cachedImageRecord->type = imageType; - cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; - cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN - cachedImageRecord->allocationOffset = allocResults.allocationOffset; - cachedImageRecord->allocationSize = allocResults.allocationSize; - cachedImageRecord->gpuImageView = allocResults.gpuImageView; - cachedImageRecord->staticCPUImage = nullptr; - cachedImageRecord->georeferencedImageState = GeoreferencedImageStreamingState::create(std::move(params), GeoreferencedImageTileSize); - - // This is because gpu image is square - cachedImageRecord->georeferencedImageState->gpuImageSideLengthTiles = cachedImageRecord->gpuImageView->getCreationParameters().image->getCreationParameters().extent.width / GeoreferencedImageTileSize; - - auto& fullImageTileLength = cachedImageRecord->georeferencedImageState->fullImageTileLength; - fullImageTileLength = (cachedImageRecord->georeferencedImageState->georeferencedImageParams.imageExtents - 1u) / GeoreferencedImageTileSize + 1u; - } - else - { - // All attempts to try create the GPU image and its corresponding view have failed. - // Most likely cause: insufficient GPU memory or unsupported image parameters. - - m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed, likely due to low VRAM.", nbl::system::ILogger::ELL_ERROR); - _NBL_DEBUG_BREAK_IF(true); - - if (cachedImageRecord->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) - { - // We previously successfully create and allocated memory for the Image - // but failed to bind and create image view - // It's crucial to deallocate the offset+size form our images memory suballocator - imagesMemorySubAllocator->deallocate(cachedImageRecord->allocationOffset, cachedImageRecord->allocationSize); - } - - if (cachedImageRecord->arrayIndex != InvalidTextureIndex) - { - // We previously allocated a descriptor index, but failed to create a usable GPU image. - // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. - // No semaphore wait needed here, as the GPU never got to use this slot. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); - cachedImageRecord->arrayIndex = InvalidTextureIndex; - } - - // erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation - imagesCache->erase(imageID); - } - } - else - { - m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed index allocation. shouldn't have happened.", nbl::system::ILogger::ELL_ERROR); - cachedImageRecord->arrayIndex = InvalidTextureIndex; - } - } - - - // cached or just inserted, we update the lastUsedFrameIndex - cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; - - assert(cachedImageRecord->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed - return (cachedImageRecord->arrayIndex != InvalidTextureIndex); -} - -bool DrawResourcesFiller::queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy) -{ - auto& vec = streamedImageCopies[imageID]; - vec.emplace_back(imageCopy); - return true; -} - // TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo // We don't have an allocator or memory management for texture updates yet, see how `_test_addImageObject` is being temporarily used (Descriptor updates and pipeline barriers) to upload an image into gpu and update a descriptor slot (it will become more sophisticated but doesn't block you) void DrawResourcesFiller::drawGridDTM( @@ -892,54 +757,450 @@ void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundin endMainObject(); } -void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit) +uint32_t2 DrawResourcesFiller::computeStreamingImageExtentsForViewportCoverage(const uint32_t2 viewportExtents) { - beginMainObject(MainObjectType::STREAMED_IMAGE); + const uint32_t diagonal = static_cast(nbl::hlsl::ceil( + nbl::hlsl::sqrt(static_cast( + viewportExtents.x * viewportExtents.x + viewportExtents.y * viewportExtents.y)) + )); - uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); - if (mainObjIdx == InvalidMainObjectIdx) + const uint32_t gpuImageSidelength = + 2 * core::roundUp(diagonal, GeoreferencedImageTileSize) + + GeoreferencedImagePaddingTiles * GeoreferencedImageTileSize; + + return { gpuImageSidelength, gpuImageSidelength }; +} + +nbl::core::smart_refctd_ptr DrawResourcesFiller::ensureGeoreferencedImageEntry(image_id imageID, const OrientedBoundingBox2D& worldSpaceOBB, const std::filesystem::path& storagePath) +{ + nbl::core::smart_refctd_ptr ret = nullptr; + + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + + if (!imageLoader) { - m_logger.log("addGeoreferencedImage: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); - assert(false); - return; + m_logger.log("imageLoader is null/empty. make sure to register your loader!", nbl::system::ILogger::ELL_ERROR); + return nullptr; + } + + uint32_t2 fullResImageExtents = imageLoader->getExtents(storagePath); + asset::E_FORMAT format = imageLoader->getFormat(storagePath); + + uint32_t2 gpuImageExtents = computeStreamingImageExtentsForViewportCoverage(currentViewportExtents); + + IGPUImage::SCreationParams gpuImageCreationParams = {}; + gpuImageCreationParams.type = asset::IImage::ET_2D; + gpuImageCreationParams.samples = asset::IImage::ESCF_1_BIT; + gpuImageCreationParams.format = format; + gpuImageCreationParams.extent = { .width = gpuImageExtents.x, .height = gpuImageExtents.y, .depth = 1u }; + gpuImageCreationParams.mipLevels = 2u; + gpuImageCreationParams.arrayLayers = 1u; + + gpuImageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT | IGPUImage::EUF_SAMPLED_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = gpuImageCreationParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(gpuImageCreationParams.usage) + }; + gpuImageCreationParams.format = physDev->promoteImageFormat(request, gpuImageCreationParams.tiling); } - // Query imageType - auto cachedImageRecord = imagesCache->peek(imageID); + CachedImageRecord* cachedImageRecord = imagesCache->get(imageID); if (!cachedImageRecord) { - m_logger.log("addGeoreferencedImage was not called immediately after enforceGeoreferencedImageAvailability!", nbl::system::ILogger::ELL_ERROR); + ret = nbl::core::make_smart_refctd_ptr(); + const bool initSuccess = ret->init(worldSpaceOBB, fullResImageExtents, format, storagePath); + if (!initSuccess) + m_logger.log("Failed to init GeoreferencedImageStreamingState!", nbl::system::ILogger::ELL_ERROR); + } + else + { + // StreamingState already in cache, we return it; + if (!cachedImageRecord->georeferencedImageState) + m_logger.log("image had entry in the cache but cachedImageRecord->georeferencedImageState was nullptr, this shouldn't happen!", nbl::system::ILogger::ELL_ERROR); + ret = cachedImageRecord->georeferencedImageState; + } + + // Update GeoreferencedImageState with new viewport width/height and requirements + + // width only because gpu image is square + const uint32_t newGPUImageSideLengthTiles = gpuImageCreationParams.extent.width / GeoreferencedImageTileSize; + + // This will reset the residency state after a resize. it makes sense because when gpu image is resized, it's recreated and no previous tile is resident anymore + // We don't copy tiles between prev/next resized image, we're more focused on optimizing pan/zoom with a fixed window size. + if (ret->gpuImageSideLengthTiles != newGPUImageSideLengthTiles) + { + ret->gpuImageSideLengthTiles = newGPUImageSideLengthTiles; + ret->ResetTileOccupancyState(); + } + + // DONT UNCOMMENT, IT WILL SLOW DOWN LOADING: It's Test For full re-cache loading speed + // ret->ResetTileOccupancyState(); + + ret->gpuImageCreationParams = std::move(gpuImageCreationParams); + // Update with current viewport + ret->updateStreamingStateForViewport(currentViewportExtents, ndcToWorldTransformationMatrix); + + return ret; +} + +bool DrawResourcesFiller::launchGeoreferencedImageTileLoads(image_id imageID, GeoreferencedImageStreamingState* imageStreamingState, const WorldClipRect clipRect) +{ + if (!imageStreamingState) + { + m_logger.log("imageStreamingState is null/empty, make sure `ensureGeoreferencedImageEntry` was called beforehand!", nbl::system::ILogger::ELL_ERROR); assert(false); - return; + return false; } - // Generate upload data - auto uploadData = generateTileUploadData(cachedImageRecord->type, NDCToWorld, cachedImageRecord->georeferencedImageState.get()); + auto& thisImageQueuedCopies = streamedImageCopies[imageID]; - // Queue image uploads - for (const auto& imageCopy : uploadData.tiles) - queueGeoreferencedImageCopy_Internal(imageID, imageCopy); + const auto& viewportTileRange = imageStreamingState->currentViewportTileRange; + const uint32_t2 lastTileIndex = imageStreamingState->getLastTileIndex(viewportTileRange.baseMipLevel); - GeoreferencedImageInfo info = {}; - info.topLeft = uploadData.viewportEncompassingOBB.topLeft; - info.dirU = uploadData.viewportEncompassingOBB.dirU; - info.aspectRatio = uploadData.viewportEncompassingOBB.aspectRatio; - info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory - info.minUV = uploadData.minUV; - info.maxUV = uploadData.maxUV; - if (!addGeoreferencedImageInfo_Internal(info, mainObjIdx)) + // We need to make every tile that covers the viewport resident. We reserve the amount of tiles needed for upload. + auto tilesToLoad = imageStreamingState->tilesToLoad(); + + const uint32_t2 imageExtents = imageStreamingState->fullResImageExtents; + const std::filesystem::path imageStoragePath = imageStreamingState->storagePath; + + // Figure out worldspace coordinates for each of the tile's corners - these are used if there's a clip rect + const float64_t2 imageTopLeft = imageStreamingState->worldspaceOBB.topLeft; + const float64_t2 dirU = float64_t2(imageStreamingState->worldspaceOBB.dirU); + const float64_t2 dirV = float64_t2(dirU.y, -dirU.x) * float64_t(imageStreamingState->worldspaceOBB.aspectRatio); + const uint32_t tileMipLevel = imageStreamingState->currentViewportTileRange.baseMipLevel; + + uint32_t ignored = 0; + for (auto [imageTileIndex, gpuImageTileIndex] : tilesToLoad) { - // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects - submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); - const bool success = addGeoreferencedImageInfo_Internal(info, mainObjIdx); - if (!success) + // clip against current rect, if valid + if (clipRect.minClip.x != std::numeric_limits::signaling_NaN()) + { + float64_t2 topLeftWorld = imageTopLeft + dirU * (float64_t(GeoreferencedImageTileSize * imageTileIndex.x << tileMipLevel) / float64_t(imageExtents.x)) + dirV * (float64_t(GeoreferencedImageTileSize * imageTileIndex.y << tileMipLevel) / float64_t(imageExtents.y)); + float64_t2 topRightWorld = imageTopLeft + dirU * (float64_t(GeoreferencedImageTileSize * (imageTileIndex.x + 1) << tileMipLevel) / float64_t(imageExtents.x)) + dirV * (float64_t(GeoreferencedImageTileSize * imageTileIndex.y << tileMipLevel) / float64_t(imageExtents.y)); + float64_t2 bottomLeftWorld = imageTopLeft + dirU * (float64_t(GeoreferencedImageTileSize * imageTileIndex.x << tileMipLevel) / float64_t(imageExtents.x)) + dirV * (float64_t(GeoreferencedImageTileSize * (imageTileIndex.y + 1) << tileMipLevel) / float64_t(imageExtents.y)); + float64_t2 bottomRightWorld = imageTopLeft + dirU * (float64_t(GeoreferencedImageTileSize * (imageTileIndex.x + 1) << tileMipLevel) / float64_t(imageExtents.x)) + dirV * (float64_t(GeoreferencedImageTileSize * (imageTileIndex.y + 1) << tileMipLevel) / float64_t(imageExtents.y)); + + float64_t minX = std::min({ topLeftWorld.x, topRightWorld.x, bottomLeftWorld.x, bottomRightWorld.x }); + float64_t minY = std::min({ topLeftWorld.y, topRightWorld.y, bottomLeftWorld.y, bottomRightWorld.y }); + float64_t maxX = std::max({ topLeftWorld.x, topRightWorld.x, bottomLeftWorld.x, bottomRightWorld.x }); + float64_t maxY = std::max({ topLeftWorld.y, topRightWorld.y, bottomLeftWorld.y, bottomRightWorld.y }); + + // Check if the tile intersects clip rect at all. Note that y clips are inverted + if (maxX < clipRect.minClip.x || minX > clipRect.maxClip.x || maxY < clipRect.maxClip.y || minY > clipRect.minClip.y) + continue; + } + + uint32_t2 targetExtentMip0(GeoreferencedImageTileSize, GeoreferencedImageTileSize); + std::future> gpuMip0Tile; + std::future> gpuMip1Tile; + { - m_logger.log("addGeoreferencedImageInfo_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + uint32_t2 samplingExtentMip0 = uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel; + const uint32_t2 samplingOffsetMip0 = (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel; + + // If on the last tile, we might not load a full `GeoreferencedImageTileSize x GeoreferencedImageTileSize` tile, so we figure out how many pixels to load in this case to have + // minimal artifacts and no stretching + if (imageTileIndex.x == lastTileIndex.x) + { + samplingExtentMip0.x = imageStreamingState->lastTileSamplingExtent.x; + targetExtentMip0.x = imageStreamingState->lastTileTargetExtent.x; + // If the last tile is too small just ignore it + if (targetExtentMip0.x == 0u) + continue; + } + if (imageTileIndex.y == lastTileIndex.y) + { + samplingExtentMip0.y = imageStreamingState->lastTileSamplingExtent.y; + targetExtentMip0.y = imageStreamingState->lastTileTargetExtent.y; + // If the last tile is too small just ignore it + if (targetExtentMip0.y == 0u) + continue; + } + + if (!imageLoader->hasPrecomputedMips(imageStoragePath)) + { + gpuMip0Tile = std::async(std::launch::async, [=, this]() { + return imageLoader->load(imageStoragePath, samplingOffsetMip0, samplingExtentMip0, targetExtentMip0); + }); + gpuMip1Tile = std::async(std::launch::async, [=, this]() { + return imageLoader->load(imageStoragePath, samplingOffsetMip0, samplingExtentMip0, targetExtentMip0 / 2u); + }); + } + else + { + gpuMip0Tile = std::async(std::launch::async, [=, this]() { + return imageLoader->load(imageStoragePath, imageTileIndex * GeoreferencedImageTileSize, targetExtentMip0, imageStreamingState->currentMappedRegionTileRange.baseMipLevel, false); + }); + gpuMip1Tile = std::async(std::launch::async, [=, this]() { + return imageLoader->load(imageStoragePath, imageTileIndex * GeoreferencedImageTileSizeMip1, targetExtentMip0 / 2u, imageStreamingState->currentMappedRegionTileRange.baseMipLevel, true); + }); + } + } + + asset::IImage::SBufferCopy bufCopy; + bufCopy.bufferOffset = 0; + bufCopy.bufferRowLength = targetExtentMip0.x; + bufCopy.bufferImageHeight = 0; + bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + bufCopy.imageSubresource.mipLevel = 0u; + bufCopy.imageSubresource.baseArrayLayer = 0u; + bufCopy.imageSubresource.layerCount = 1u; + uint32_t2 gpuImageOffset = gpuImageTileIndex * GeoreferencedImageTileSize; + bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; + bufCopy.imageExtent.width = targetExtentMip0.x; + bufCopy.imageExtent.height = targetExtentMip0.y; + bufCopy.imageExtent.depth = 1; + + thisImageQueuedCopies.emplace_back(imageStreamingState->sourceImageFormat, std::move(gpuMip0Tile), std::move(bufCopy)); + + // Upload the smaller tile to mip 1 + bufCopy = {}; + + bufCopy.bufferOffset = 0; + bufCopy.bufferRowLength = targetExtentMip0.x / 2; + bufCopy.bufferImageHeight = 0; + bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + bufCopy.imageSubresource.mipLevel = 1u; + bufCopy.imageSubresource.baseArrayLayer = 0u; + bufCopy.imageSubresource.layerCount = 1u; + gpuImageOffset /= 2; // Half tile size! + bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; + bufCopy.imageExtent.width = targetExtentMip0.x / 2; + bufCopy.imageExtent.height = targetExtentMip0.y / 2; + bufCopy.imageExtent.depth = 1; + + thisImageQueuedCopies.emplace_back(imageStreamingState->sourceImageFormat, std::move(gpuMip1Tile), std::move(bufCopy)); + + // Mark tile as resident + imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; + } + + return true; +} + +void DrawResourcesFiller::drawGeoreferencedImage(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit) +{ + // OutputDebugStringA(std::format("Image Cache Size = {} ", imagesCache->size()).c_str()); + + const bool resourcesEnsured = ensureGeoreferencedImageResources_AllocateIfNeeded(imageID, std::move(imageStreamingState), intendedNextSubmit); + if (resourcesEnsured) + { + // Georefernced Image Data in the cache was already pre-transformed from local to main worldspace coordinates for tile calculation purposes + // Because of this reason, the pre-transformed obb in the cache doesn't need to be transformed by custom projection again anymore. + // we push the identity transform to prevent any more tranformation on the obb which is already in worldspace units. + float64_t3x3 identity = float64_t3x3(1, 0, 0, 0, 1, 0, 0, 0, 1); + pushCustomProjection(identity); + + beginMainObject(MainObjectType::STREAMED_IMAGE); + + uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjIdx != InvalidMainObjectIdx) + { + // Query imageType + auto cachedImageRecord = imagesCache->peek(imageID); + if (cachedImageRecord) + { + GeoreferencedImageInfo info = cachedImageRecord->georeferencedImageState->computeGeoreferencedImageAddressingAndPositioningInfo(); + info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + if (!addGeoreferencedImageInfo_Internal(info, mainObjIdx)) + { + // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + const bool success = addGeoreferencedImageInfo_Internal(info, mainObjIdx); + if (!success) + { + m_logger.log("addGeoreferencedImageInfo_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } + } + } + else + { + m_logger.log("drawGeoreferencedImage was not called immediately after enforceGeoreferencedImageAvailability!", nbl::system::ILogger::ELL_ERROR); + assert(false); + } + } + else + { + m_logger.log("drawGeoreferencedImage: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); assert(false); } + + endMainObject(); + + popCustomProjection(); + } + else + { + m_logger.log("Failed to ensure resources (memory and descriptorIndex) for georeferencedImage", nbl::system::ILogger::ELL_ERROR); } +} - endMainObject(); +bool DrawResourcesFiller::finalizeGeoreferencedImageTileLoads(SIntendedSubmitInfo& intendedNextSubmit) +{ + bool success = true; + + if (streamedImageCopies.size() > 0ull) + { + auto* device = m_utilities->getLogicalDevice(); + auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); + + if (cmdBuffInfo) + { + std::vector validCopies; + validCopies.reserve(streamedImageCopies.size()); + + // Step 1: collect valid image iters + for (auto it = streamedImageCopies.begin(); it != streamedImageCopies.end(); ++it) + { + const auto& imageID = it->first; + auto* imageRecord = imagesCache->peek(imageID); + + if (imageRecord && imageRecord->gpuImageView && imageRecord->georeferencedImageState) + validCopies.push_back(it); + else + m_logger.log(std::format("Can't upload to imageId {} yet. (no gpu record yet).", imageID).c_str(), nbl::system::ILogger::ELL_INFO); + } + + IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; + + std::vector beforeCopyImageBarriers; + beforeCopyImageBarriers.reserve(streamedImageCopies.size()); + + // Pipeline Barriers before imageCopy + for (auto it : validCopies) + { + auto& [imageID, imageCopies] = *it; + // OutputDebugStringA(std::format("Copying {} copies for Id = {} \n", imageCopies.size(), imageID).c_str()); + + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + { + m_logger.log(std::format("`pushStreamedImagesUploads` failed, no image record found for image id {}.", imageID).c_str(), nbl::system::ILogger::ELL_ERROR); + continue; + } + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + IImage::LAYOUT newLayout = IImage::LAYOUT::GENERAL; + + beforeCopyImageBarriers.push_back( + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = imageRecord->currentLayout, + .newLayout = newLayout, + }); + imageRecord->currentLayout = newLayout; + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); + + for (auto it : validCopies) + { + auto& [imageID, imageCopies] = *it; + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + continue; + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + for (auto& imageCopy : imageCopies) + { + auto srcBuffer = imageCopy.srcBufferFuture.get(); + if (srcBuffer) + { + success &= m_utilities->updateImageViaStagingBuffer( + intendedNextSubmit, + srcBuffer->getPointer(), imageCopy.srcFormat, + gpuImg.get(), IImage::LAYOUT::GENERAL, + { &imageCopy.region, 1u }); + } + else + m_logger.log(std::format("srcBuffer was invalid for image id {}.", imageID).c_str(), nbl::system::ILogger::ELL_ERROR); + } + } + + commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change + + std::vector afterCopyImageBarriers; + afterCopyImageBarriers.reserve(streamedImageCopies.size()); + + // Pipeline Barriers after imageCopy + for (auto it : validCopies) + { + auto& [imageID, imageCopies] = *it; + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + { + m_logger.log(std::format("`pushStreamedImagesUploads` failed, no image record found for image id {}.", imageID).c_str(), nbl::system::ILogger::ELL_ERROR); + continue; + } + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + IImage::LAYOUT newLayout = IImage::LAYOUT::GENERAL; + + afterCopyImageBarriers.push_back( + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = imageRecord->currentLayout, + .newLayout = newLayout, + }); + imageRecord->currentLayout = newLayout; + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + + // Remove the processed valid ones, keep invalids for later retries + for (auto it : validCopies) + streamedImageCopies.erase(it); + } + else + { + _NBL_DEBUG_BREAK_IF(true); + success = false; + } + } + + if (!success) + { + m_logger.log("Failure in `pushStreamedImagesUploads`.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + } + return success; } bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit) @@ -961,12 +1222,13 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit // Push Static Images Uploads from replay cache, all the work below is necessary to detect whether our image to replay is already in the cache in the exact form OR we need to create new image + bind memory and set array index auto* device = m_utilities->getLogicalDevice(); bool replayCacheFullyCovered = true; - for (auto& [imageID, toReplayRecord] : *currentReplayCache->imagesCache) + + for (auto& [toReplayImageID, toReplayRecord] : *currentReplayCache->imagesCache) { if (toReplayRecord.type != ImageType::STATIC) // non-static images (Georeferenced) won't be replayed like this continue; - auto* cachedRecord = imagesCache->peek(imageID); + auto* cachedRecord = imagesCache->peek(toReplayImageID); bool alreadyResident = false; // compare with existing state, and check whether image id is already resident. @@ -981,63 +1243,92 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit alreadyResident = allocationMatches && arrayIndexMatches && cachedRecord->state == ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA; } - // if already resident, just update the state to the cached state (to make sure it doesn't get issued for upload again) and move on. - if (alreadyResident) + // if already resident, ignore, no need to insert into cache anymore + // if bot already resident, we need to recreate the image and bind the image memory to correct location again, and update the descriptor set and push the uploads + if (!alreadyResident) { - toReplayRecord.state = cachedRecord->state; // update the toReplayImageRecords's state, to completely match the currently resident state - continue; - } + replayCacheFullyCovered = false; - replayCacheFullyCovered = false; - - bool successCreateNewImage = false; + // make sure to evict any cache entry that conflicts with the new entry (either in memory allocation or descriptor index) + for (auto& [cachedImageID, cachedRecord] : *imagesCache) + { + bool cachedImageConflictsWithImageToReplay = true; - // Not already resident, we need to recreate the image and bind the image memory to correct location again, and update the descriptor set and push the uploads - auto existingGPUImageViewParams = toReplayRecord.gpuImageView->getCreationParameters(); - IGPUImage::SCreationParams imageParams = {}; - imageParams = existingGPUImageViewParams.image->getCreationParameters(); + // Case 1: Same imageID, but params differ (offset/size/arrayIndex mismatch) conflict + if (cachedImageID == toReplayImageID) + { + // this will always return true, because if it was a exact param match, we wouldn't need to insert a new one and handle evictions + cachedImageConflictsWithImageToReplay = true; + } + else + { + // Case 2: Different imageID but overlap in memory range on the same array index conflict + if (cachedRecord.arrayIndex == toReplayRecord.arrayIndex && + (cachedRecord.allocationOffset < toReplayRecord.allocationOffset + toReplayRecord.allocationSize) && + (toReplayRecord.allocationOffset < cachedRecord.allocationOffset + cachedRecord.allocationSize)) + { + cachedImageConflictsWithImageToReplay = true; + } + } - auto newGPUImage = device->createImage(std::move(imageParams)); - if (newGPUImage) - { - nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = - { - .image = newGPUImage.get(), - .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + toReplayRecord.allocationOffset } - }; + if (cachedImageConflictsWithImageToReplay) + { + evictImage_SubmitIfNeeded(cachedImageID, cachedRecord, intendedNextSubmit); + imagesCache->erase(cachedImageID); + } + } - const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); - if (boundToMemorySuccessfully) + // creating and inserting new entry + bool successCreateNewImage = false; { - newGPUImage->setObjectDebugName((std::to_string(imageID) + " Static Image 2D").c_str()); - IGPUImageView::SCreationParams viewParams = existingGPUImageViewParams; - viewParams.image = newGPUImage; + // Not already resident, we need to recreate the image and bind the image memory to correct location again, and update the descriptor set and push the uploads + auto existingGPUImageViewParams = toReplayRecord.gpuImageView->getCreationParameters(); + IGPUImage::SCreationParams imageParams = {}; + imageParams = existingGPUImageViewParams.image->getCreationParameters(); - auto newGPUImageView = device->createImageView(std::move(viewParams)); - if (newGPUImageView) + auto newGPUImage = device->createImage(std::move(imageParams)); + if (newGPUImage) { - successCreateNewImage = true; - toReplayRecord.gpuImageView = newGPUImageView; - toReplayRecord.state = ImageState::CREATED_AND_MEMORY_BOUND; - newGPUImageView->setObjectDebugName((std::to_string(imageID) + " Static Image View 2D").c_str()); + nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = + { + .image = newGPUImage.get(), + .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + toReplayRecord.allocationOffset } + }; + + const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); + if (boundToMemorySuccessfully) + { + newGPUImage->setObjectDebugName((std::to_string(toReplayImageID) + " Static Image 2D").c_str()); + IGPUImageView::SCreationParams viewParams = existingGPUImageViewParams; + viewParams.image = newGPUImage; + + auto newGPUImageView = device->createImageView(std::move(viewParams)); + if (newGPUImageView) + { + successCreateNewImage = true; + toReplayRecord.gpuImageView = newGPUImageView; + toReplayRecord.state = ImageState::CREATED_AND_MEMORY_BOUND; + newGPUImageView->setObjectDebugName((std::to_string(toReplayImageID) + " Static Image View 2D").c_str()); + } + + } } + } + if (successCreateNewImage) + { + // inserting the new entry into the cache (With new image and memory binding) + imagesCache->base_t::insert(toReplayImageID, toReplayRecord); + } + else + { + m_logger.log("Couldn't create new gpu image in pushAllUploads: cache and replay mode.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + success = false; } - } - if (!successCreateNewImage) - { - m_logger.log("Couldn't create new gpu image in pushAllUploads: cache and replay mode.", nbl::system::ILogger::ELL_ERROR); - _NBL_DEBUG_BREAK_IF(true); - success = false; } } - - // Our actual `imageCache` (which represents GPU state) didn't cover the replayCache fully, so new images had to be created, bound to memory. and they need to be written into their respective descriptor array indices again. - // imagesCache = std::make_unique(*currentReplayCache->imagesCache); - imagesCache->clear(); - for (auto it = currentReplayCache->imagesCache->rbegin(); it != currentReplayCache->imagesCache->rend(); it++) - imagesCache->base_t::insert(it->first, it->second); if (!replayCacheFullyCovered) { @@ -1053,7 +1344,7 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit success &= bindImagesToArrayIndices(*imagesCache); success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); - // Streamed uploads in cache&replay?! + // There should be no georeferenced image and thus streamed uploads in replay mode: georeferenced/streamed images should be drawn in a separate isolated submit } else { @@ -1062,7 +1353,6 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit success &= pushMSDFImagesUploads(intendedNextSubmit, msdfImagesState); success &= bindImagesToArrayIndices(*imagesCache); success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); - success &= pushStreamedImagesUploads(intendedNextSubmit); } return success; } @@ -1123,7 +1413,8 @@ void DrawResourcesFiller::pushCustomClipRect(const WorldClipRect& clipRect) } void DrawResourcesFiller::popCustomClipRect() -{ if (activeClipRects.empty()) +{ + if (activeClipRects.empty()) return; activeClipRects.pop_back(); @@ -1171,6 +1462,29 @@ void DrawResourcesFiller::unsetReplayCache() currentReplayCache = nullptr; } +uint64_t DrawResourcesFiller::getImagesMemoryConsumption() const +{ + uint64_t ret = 0ull; + for (auto& [imageID, record] : *imagesCache) + ret += record.allocationSize; + return ret; +} + +DrawResourcesFiller::UsageData DrawResourcesFiller::getCurrentUsageData() +{ + UsageData ret = {}; + const auto& resources = getResourcesCollection(); + ret.lineStyleCount = resources.lineStyles.getCount(); + ret.dtmSettingsCount = resources.dtmSettings.getCount(); + ret.customProjectionsCount = resources.customProjections.getCount(); + ret.mainObjectCount = resources.mainObjects.getCount(); + ret.drawObjectCount = resources.drawObjects.getCount(); + ret.geometryBufferSize = resources.geometryInfo.getStorageSize(); + ret.bufferMemoryConsumption = resources.calculateTotalConsumption(); + ret.imageMemoryConsumption = getImagesMemoryConsumption(); + return ret; +} + bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resources) { copiedResourcesSize = 0ull; @@ -1185,7 +1499,7 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub auto copyCPUFilledDrawBuffer = [&](auto& drawBuffer) -> bool { // drawBuffer must be of type CPUGeneratedResource - SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer}; + SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer }; if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) { @@ -1203,11 +1517,11 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub } return true; }; - + auto addComputeReservedFilledDrawBuffer = [&](auto& drawBuffer) -> bool { // drawBuffer must be of type ReservedComputeResource - SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer}; + SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer }; if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) { @@ -1228,14 +1542,14 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub copyCPUFilledDrawBuffer(resources.drawObjects); copyCPUFilledDrawBuffer(resources.indexBuffer); copyCPUFilledDrawBuffer(resources.geometryInfo); - + return true; } bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& stagedMSDFCPUImages) { auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); - + if (cmdBuffInfo) { IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; @@ -1361,7 +1675,7 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex bool DrawResourcesFiller::bindImagesToArrayIndices(ImagesCache& imagesCache) { bool success = true; - + auto* device = m_utilities->getLogicalDevice(); auto* descriptorSet = suballocatedDescriptorSet->getDescriptorSet(); @@ -1410,7 +1724,7 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN std::vector nonResidentImageRecords; for (auto& [id, record] : imagesCache) { - if (record.staticCPUImage && (record.type == ImageType::STATIC || record.type == ImageType::GEOREFERENCED_FULL_RESOLUTION) && record.state < ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA) + if (record.staticCPUImage && record.type == ImageType::STATIC && record.state < ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA) nonResidentImageRecords.push_back(&record); } @@ -1418,7 +1732,7 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN { auto* device = m_utilities->getLogicalDevice(); auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); - + if (cmdBuffInfo) { IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; @@ -1450,9 +1764,10 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN .baseArrayLayer = 0u, .layerCount = ICPUImageView::remaining_array_layers }, - .oldLayout = IImage::LAYOUT::UNDEFINED, + .oldLayout = imageRecord.currentLayout, .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, }; + imageRecord.currentLayout = beforeCopyImageBarriers[i].newLayout; } success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); @@ -1503,9 +1818,10 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN .baseArrayLayer = 0u, .layerCount = ICPUImageView::remaining_array_layers }, - .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .oldLayout = imageRecord.currentLayout, .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, }; + imageRecord.currentLayout = afterCopyImageBarriers[i].newLayout; } success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); } @@ -1524,128 +1840,129 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN return success; } -bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::ensureGeoreferencedImageResources_AllocateIfNeeded(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit) { - bool success = true; + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + + // Try inserting or updating the image usage in the cache. + // If the image is already present, updates its semaphore value. + auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + + // Setting the image streaming state returned in `ensureGeoreferencedImageEntry` which was either creating anew or gotten from this very own cache + cachedImageRecord->georeferencedImageState = std::move(imageStreamingState); + + if (cachedImageRecord == nullptr) + { + m_logger.log("Couldn't insert image in cache; make sure you called `ensureGeoreferencedImageEntry` before anything else.", nbl::system::ILogger::ELL_ERROR); + return false; + } + + // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema + // But we need to check if the cached image needs resizing/recreation. + if (cachedImageRecord->arrayIndex != InvalidTextureIndex) + { + // found in cache, but does it require resize? recreation? + if (cachedImageRecord->gpuImageView) + { + auto imgViewParams = cachedImageRecord->gpuImageView->getCreationParameters(); + if (imgViewParams.image) + { + const auto cachedParams = static_cast(imgViewParams.image->getCreationParameters()); + // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus + const auto toCreateParams = static_cast(cachedImageRecord->georeferencedImageState->gpuImageCreationParams); + const bool needsRecreation = cachedParams != toCreateParams; + if (needsRecreation) + { + // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena. + // note: it doesn't remove the entry from lru cache. + evictCallback(imageID, *cachedImageRecord); + + // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image + CachedImageRecord newRecord = CachedImageRecord(currentFrameIndex); //reser everything except image streaming state + newRecord.georeferencedImageState = std::move(cachedImageRecord->georeferencedImageState); + newRecord.type = cachedImageRecord->type; + *cachedImageRecord = std::move(newRecord); + } + } + else + { + m_logger.log("Cached georeferenced image has invalid gpu image.", nbl::system::ILogger::ELL_ERROR); + } + } + else + { + m_logger.log("Cached georeferenced image has invalid gpu image view.", nbl::system::ILogger::ELL_ERROR); + } + } + + // in which case we don't queue anything for upload, and return the idx + if (cachedImageRecord->arrayIndex == InvalidTextureIndex) + { + // This is a new image (cache miss). Allocate a descriptor index for it. + cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. + suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint - if (streamedImageCopies.size() > 0ull) - { - auto* device = m_utilities->getLogicalDevice(); - auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); - - if (cmdBuffInfo) + if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) { - IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; - - std::vector beforeCopyImageBarriers; - beforeCopyImageBarriers.reserve(streamedImageCopies.size()); + const auto& imageCreationParams = cachedImageRecord->georeferencedImageState->gpuImageCreationParams; + // Attempt to create a GPU image and image view for this texture. + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(imageID)); - // Pipeline Barriers before imageCopy - for (auto& [imageID, imageCopies] : streamedImageCopies) + if (allocResults.isValid()) { - auto* imageRecord = imagesCache->peek(imageID); - if (imageRecord == nullptr) - continue; - - const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; - - beforeCopyImageBarriers.push_back( - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - } - // .ownershipOp. No queueFam ownership transfer - }, - .image = gpuImg.get(), - .subresourceRange = { - .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = ICPUImageView::remaining_mip_levels, - .baseArrayLayer = 0u, - .layerCount = ICPUImageView::remaining_array_layers - }, - .oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, - .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - }); + cachedImageRecord->type = ImageType::GEOREFERENCED_STREAMED; + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + cachedImageRecord->allocationOffset = allocResults.allocationOffset; + cachedImageRecord->allocationSize = allocResults.allocationSize; + cachedImageRecord->gpuImageView = allocResults.gpuImageView; + cachedImageRecord->staticCPUImage = nullptr; } - success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); - - for (auto& [imageID, imageCopies] : streamedImageCopies) + else { - auto* imageRecord = imagesCache->peek(imageID); - if (imageRecord == nullptr) - continue; + // All attempts to try create the GPU image and its corresponding view have failed. + // Most likely cause: insufficient GPU memory or unsupported image parameters. - const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed, likely due to low VRAM.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); - for (auto& imageCopy : imageCopies) + if (cachedImageRecord->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) { - success &= m_utilities->updateImageViaStagingBuffer( - intendedNextSubmit, - imageCopy.srcBuffer->getPointer(), imageCopy.srcFormat, - gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - { &imageCopy.region, 1u }); + // We previously successfully create and allocated memory for the Image + // but failed to bind and create image view + // It's crucial to deallocate the offset+size form our images memory suballocator + imagesMemorySubAllocator->deallocate(cachedImageRecord->allocationOffset, cachedImageRecord->allocationSize); } - } - - commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change - - std::vector afterCopyImageBarriers; - afterCopyImageBarriers.reserve(streamedImageCopies.size()); - - // Pipeline Barriers after imageCopy - for (auto& [imageID, imageCopies] : streamedImageCopies) - { - auto* imageRecord = imagesCache->peek(imageID); - if (imageRecord == nullptr) - continue; - const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + if (cachedImageRecord->arrayIndex != InvalidTextureIndex) + { + // We previously allocated a descriptor index, but failed to create a usable GPU image. + // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. + // No semaphore wait needed here, as the GPU never got to use this slot. + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); + cachedImageRecord->arrayIndex = InvalidTextureIndex; + } - afterCopyImageBarriers.push_back ( - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, - } - // .ownershipOp. No queueFam ownership transfer - }, - .image = gpuImg.get(), - .subresourceRange = { - .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = ICPUImageView::remaining_mip_levels, - .baseArrayLayer = 0u, - .layerCount = ICPUImageView::remaining_array_layers - }, - .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, - }); + // erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation + imagesCache->erase(imageID); } - success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); - - streamedImageCopies.clear(); } else { - _NBL_DEBUG_BREAK_IF(true); - success = false; + m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed index allocation. shouldn't have happened.", nbl::system::ILogger::ELL_ERROR); + cachedImageRecord->arrayIndex = InvalidTextureIndex; } } - if (!success) - { - m_logger.log("Failure in `pushStreamedImagesUploads`.", nbl::system::ILogger::ELL_ERROR); - _NBL_DEBUG_BREAK_IF(true); - } - return success; + // cached or just inserted, we update the lastUsedFrameIndex + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; + + assert(cachedImageRecord->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + return (cachedImageRecord->arrayIndex != InvalidTextureIndex); } const size_t DrawResourcesFiller::calculateRemainingResourcesSize() const @@ -1791,7 +2108,7 @@ uint32_t DrawResourcesFiller::acquireActiveLineStyleIndex_SubmitIfNeeded(SIntend { if (activeLineStyleIndex == InvalidStyleIdx) activeLineStyleIndex = addLineStyle_SubmitIfNeeded(activeLineStyle, intendedNextSubmit); - + return activeLineStyleIndex; } @@ -1799,7 +2116,7 @@ uint32_t DrawResourcesFiller::acquireActiveDTMSettingsIndex_SubmitIfNeeded(SInte { if (activeDTMSettingsIndex == InvalidDTMSettingsIdx) activeDTMSettingsIndex = addDTMSettings_SubmitIfNeeded(activeDTMSettings, intendedNextSubmit); - + return activeDTMSettingsIndex; } @@ -1810,7 +2127,7 @@ uint32_t DrawResourcesFiller::acquireActiveCustomProjectionIndex_SubmitIfNeeded( if (activeProjectionIndices.back() == InvalidCustomProjectionIndex) activeProjectionIndices.back() = addCustomProjection_SubmitIfNeeded(activeProjections.back(), intendedNextSubmit); - + return activeProjectionIndices.back(); } @@ -1821,7 +2138,7 @@ uint32_t DrawResourcesFiller::acquireActiveCustomClipRectIndex_SubmitIfNeeded(SI if (activeClipRectIndices.back() == InvalidCustomClipRectIndex) activeClipRectIndices.back() = addCustomClipRect_SubmitIfNeeded(activeClipRects.back(), intendedNextSubmit); - + return activeClipRectIndices.back(); } @@ -1853,14 +2170,14 @@ uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SInten const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? const bool needToOverflowSubmit = (!enoughMem) || (resourcesCollection.mainObjects.vector.size() >= MaxIndexableMainObjects); - + if (needToOverflowSubmit) { // failed to fit into remaining resources mem or exceeded max indexable mainobj submitDraws(intendedNextSubmit); reset(); // resets everything! be careful! } - + MainObject mainObject = {}; // These 3 calls below shouldn't need to Submit because we made sure there is enough memory for all of them. // if something here triggers a auto-submit it's a possible bug with calculating `memRequired` above, TODO: assert that somehow? @@ -1916,7 +2233,7 @@ uint32_t DrawResourcesFiller::addCustomProjection_SubmitIfNeeded(const float64_t submitDraws(intendedNextSubmit); reset(); // resets everything! be careful! } - + resourcesCollection.customProjections.vector.push_back(projection); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers return resourcesCollection.customProjections.vector.size() - 1u; } @@ -1932,7 +2249,7 @@ uint32_t DrawResourcesFiller::addCustomClipRect_SubmitIfNeeded(const WorldClipRe submitDraws(intendedNextSubmit); reset(); // resets everything! be careful! } - + resourcesCollection.customClipRects.vector.push_back(clipRect); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers return resourcesCollection.customClipRects.vector.size() - 1u; } @@ -1953,7 +2270,7 @@ void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& po const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(PolylineConnector) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); // TODO[ERFAN]: later take into account: our maximum indexable vertex - + const uint32_t connectorCount = static_cast(polyline.getConnectors().size()); const uint32_t remainingObjects = connectorCount - currentPolylineConnectorObj; const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); @@ -1973,12 +2290,12 @@ void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& po const uint32_t startObj = resourcesCollection.drawObjects.getCount(); for (uint32_t i = 0u; i < objectsToUpload; ++i) { - indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; - indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; - indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; - indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; - indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; - indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; } // Add DrawObjs @@ -1991,7 +2308,7 @@ void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& po { drawObjectsToBeFilled[i] = drawObj; drawObj.geometryAddress += sizeof(PolylineConnector); - } + } currentPolylineConnectorObj += objectsToUpload; } @@ -2029,12 +2346,12 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const const uint32_t startObj = resourcesCollection.drawObjects.getCount(); for (uint32_t i = 0u; i < objectsToUpload; ++i) { - indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; - indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; - indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; - indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; - indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; - indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; } // Add DrawObjs @@ -2047,7 +2364,7 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const { drawObjectsToBeFilled[i] = drawObj; drawObj.geometryAddress += sizeof(LinePointInfo); - } + } currentObjectInSection += objectsToUpload; } @@ -2063,7 +2380,7 @@ void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, // memConsumption = quadBezCount * (sizeof(QuadraticBezierInfo) + 3*(sizeof(DrawObject)+6u*sizeof(uint32_t)) const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(QuadraticBezierInfo) + (sizeof(DrawObject) + 6u * sizeof(uint32_t)) * CagesPerQuadBezier); // TODO[ERFAN]: later take into account: our maximum indexable vertex - + const uint32_t beziersCount = section.count; const uint32_t remainingObjects = beziersCount - currentObjectInSection; const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); @@ -2071,7 +2388,7 @@ void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, if (objectsToUpload <= 0u) return; - + // Add Geometry const auto beziersByteSize = sizeof(QuadraticBezierInfo) * (objectsToUpload); size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(beziersByteSize, alignof(QuadraticBezierInfo)); @@ -2082,18 +2399,18 @@ void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, // Push Indices, remove later when compute fills this - uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u*cagesCount); + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * cagesCount); const uint32_t startObj = resourcesCollection.drawObjects.getCount(); for (uint32_t i = 0u; i < cagesCount; ++i) { - indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; - indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; - indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; - indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; - indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; - indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; } - + // Add DrawObjs DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(cagesCount); DrawObject drawObj = {}; @@ -2119,7 +2436,7 @@ void DrawResourcesFiller::addHatch_Internal(const Hatch& hatch, uint32_t& curren const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(Hatch::CurveHatchBox) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); // TODO[ERFAN]: later take into account: our maximum indexable vertex - + uint32_t remainingObjects = hatch.getHatchBoxCount() - currentObjectInSection; const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); @@ -2133,20 +2450,20 @@ void DrawResourcesFiller::addHatch_Internal(const Hatch& hatch, uint32_t& curren void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; const Hatch::CurveHatchBox& hatchBox = hatch.getHatchBox(currentObjectInSection); // WARNING: This is assuming hatch boxes are contigous in memory, TODO: maybe make that more obvious through Hatch interface memcpy(dst, &hatchBox, curveBoxesByteSize); - + // Push Indices, remove later when compute fills this uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * objectsToUpload); const uint32_t startObj = resourcesCollection.drawObjects.getCount(); for (uint32_t i = 0u; i < objectsToUpload; ++i) { - indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; - indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; - indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; - indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; - indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; - indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; } - + // Add DrawObjs DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(objectsToUpload); DrawObject drawObj = {}; @@ -2169,7 +2486,7 @@ bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(GlyphInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); // TODO[ERFAN]: later take into account: our maximum indexable vertex - + if (uploadableObjects <= 0u) return false; @@ -2182,12 +2499,12 @@ bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); const uint32_t startObj = resourcesCollection.drawObjects.getCount(); uint32_t i = 0u; - indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; - indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; - indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; - indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; - indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; - indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; // Add DrawObjs DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); @@ -2384,7 +2701,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc // Try creating the image and allocating memory for it: nbl::video::IGPUImage::SCreationParams params = {}; params = imageParams; - + if (imageViewFormatOverride != asset::E_FORMAT::EF_COUNT && imageViewFormatOverride != imageParams.format) { params.viewFormats.set(static_cast(imageViewFormatOverride), true); @@ -2403,7 +2720,11 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc if (imageMemoryRequirementsMatch) { + // OutputDebugStringA(std::format("ALlocating {} !!!!\n", gpuImageMemoryRequirements.size).c_str()); + // m_logger.log(std::format(" [BEFORE] Allocator Free Size={} \n",imagesMemorySubAllocator->getFreeSize()).c_str(), nbl::system::ILogger::ELL_INFO); ret.allocationOffset = imagesMemorySubAllocator->allocate(gpuImageMemoryRequirements.size, 1u << gpuImageMemoryRequirements.alignmentLog2); + // m_logger.log(std::format(" [AFTER] Alloc Size = {}, Alloc Offset = {}, Alignment = {} \n",gpuImageMemoryRequirements.size, ret.allocationOffset, 1u << gpuImageMemoryRequirements.alignmentLog2).c_str(), nbl::system::ILogger::ELL_INFO); + // m_logger.log(std::format(" [AFTER] Allocator Free Size={} \n",imagesMemorySubAllocator->getFreeSize()).c_str(), nbl::system::ILogger::ELL_INFO); const bool allocationFromImagesMemoryArenaSuccessfull = ret.allocationOffset != ImagesMemorySubAllocator::InvalidAddress; if (allocationFromImagesMemoryArenaSuccessfull) { @@ -2411,7 +2732,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = { .image = gpuImage.get(), - .binding = { .memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + ret.allocationOffset } + .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + ret.allocationOffset } }; const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); if (boundToMemorySuccessfully) @@ -2459,7 +2780,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc } else { - // printf(std::format("Allocation Failed, Trying again, ImageID={} Size={} \n", imageID, gpuImageMemoryRequirements.size).c_str()); + m_logger.log(std::format("Retrying Allocation after failure with Allocation Size={}, Allocator Free Size={} \n", gpuImageMemoryRequirements.size, imagesMemorySubAllocator->getFreeSize()).c_str(), nbl::system::ILogger::ELL_INFO); // recoverable error when allocation fails, we don't log anything, next code will try evicting other images and retry } } @@ -2511,53 +2832,6 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc return ret; } -ImageType DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const GeoreferencedImageParams& params) -{ - // Decide whether the image can reside fully into memory rather than get streamed. - // TODO: Improve logic, currently just a simple check to see if the full-screen image has more pixels that viewport or not - // TODO: add criterial that the size of the full-res image shouldn't consume more than 30% of the total memory arena for images (if we allowed larger than viewport extents) - const uint32_t2 imageExtents = queryGeoreferencedImageExtents(params.storagePath); - const nbl::asset::E_FORMAT imageFormat = queryGeoreferencedImageFormat(params.storagePath); - - const size_t mainImagePixels = ((size_t)imageExtents.x * (size_t)imageExtents.y); - const size_t viewportImagePixels = ((size_t)viewportExtent.x * (size_t)viewportExtent.y); - // If it's too long along any dimension it's obviously going to be streamed - const bool betterToResideFullyInMem = imageExtents.x < (1u << 14u) && imageExtents.y < (1u << 14u) && mainImagePixels <= viewportImagePixels; - - ImageType imageType; - - if (betterToResideFullyInMem) - imageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; - else - imageType = ImageType::GEOREFERENCED_STREAMED; - - outImageParams.type = asset::IImage::ET_2D; - outImageParams.samples = asset::IImage::ESCF_1_BIT; - outImageParams.format = imageFormat; - - if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) - { - outImageParams.extent = { imageExtents.x, imageExtents.y, 1u }; - } - else - { - // Enough to cover twice the viewport at mip 0 (so that when zooming out to mip 1 the whole viewport still gets covered with mip 0 tiles) - // and in any rotation (taking the longest side suffices). Can be increased to avoid frequent tile eviction when moving the camera at mip close to 1 - const uint32_t diagonal = static_cast(nbl::hlsl::ceil( - nbl::hlsl::sqrt(static_cast(viewportExtent.x * viewportExtent.x - + viewportExtent.y * viewportExtent.y)) - ) - ); - const uint32_t gpuImageSidelength = 2 * core::roundUp(diagonal, GeoreferencedImageTileSize) + GeoreferencedImagePaddingTiles * GeoreferencedImageTileSize; - outImageParams.extent = { gpuImageSidelength, gpuImageSidelength, 1u }; - } - - outImageParams.mipLevels = 2u; - outImageParams.arrayLayers = 1u; - - return imageType; -} - void DrawResourcesFiller::setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func) { getGlyphMSDF = func; @@ -2604,54 +2878,54 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor } /* - * The `msdfTextureArrayIndexAllocator` manages indices (slots) into a texture array for MSDF images. - * When all slots are occupied, the least recently used entry is evicted via `msdfLRUCache`. - * This callback is invoked on eviction, and must: - * - Ensure safe deallocation of the slot. - * - Submit any pending draw calls if the evicted MSDF was scheduled to be used in the upcoming submission. - */ + * The `msdfTextureArrayIndexAllocator` manages indices (slots) into a texture array for MSDF images. + * When all slots are occupied, the least recently used entry is evicted via `msdfLRUCache`. + * This callback is invoked on eviction, and must: + * - Ensure safe deallocation of the slot. + * - Submit any pending draw calls if the evicted MSDF was scheduled to be used in the upcoming submission. + */ auto evictionCallback = [&](const MSDFReference& evicted) - { - // `deallocationWaitInfo` is used to prepare wait info to defer index deallocation until the GPU has finished using the resource. - // NOTE: `deallocationWaitInfo` is currently *not* required for correctness because: - // - Both the image upload (msdfImagesState) and usage occur within the same timeline (`intendedNextSubmit`). - // - timeline semaphores guarantee proper ordering: the next submit's msdfImagesState will wait on the prior usage. - // - Therefore, we can safely overwrite or reallocate the slot without waiting for explicit GPU completion. - // - // However, this `deallocationWaitInfo` *will* become essential if we start interacting with MSDF images - // outside the `intendedNextSubmit` timeline for example, issuing uploads via a transfer queue or using a separate command buffer and timeline. + { + // `deallocationWaitInfo` is used to prepare wait info to defer index deallocation until the GPU has finished using the resource. + // NOTE: `deallocationWaitInfo` is currently *not* required for correctness because: + // - Both the image upload (msdfImagesState) and usage occur within the same timeline (`intendedNextSubmit`). + // - timeline semaphores guarantee proper ordering: the next submit's msdfImagesState will wait on the prior usage. + // - Therefore, we can safely overwrite or reallocate the slot without waiting for explicit GPU completion. + // + // However, this `deallocationWaitInfo` *will* become essential if we start interacting with MSDF images + // outside the `intendedNextSubmit` timeline for example, issuing uploads via a transfer queue or using a separate command buffer and timeline. + + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); + + if (imageUsedForNextIntendedSubmit) + { + // The evicted image is scheduled for use in the upcoming submit. + // To avoid rendering artifacts, we must flush the current draw queue now. + // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // we wait on the signal semaphore for the submit we just did above. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); + } + else + { + // The image is not used in the current frame, so we can deallocate without submitting any draws. + // Still wait on the semaphore to ensure past GPU usage is complete. + // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value which is the immediate prev submit (scratchSemaphore.value). + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); + } - const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); + // Clear CPU-side metadata associated with the evicted slot. + msdfImagesState[evicted.alloc_idx].evict(); + }; - if (imageUsedForNextIntendedSubmit) - { - // The evicted image is scheduled for use in the upcoming submit. - // To avoid rendering artifacts, we must flush the current draw queue now. - // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. - submitDraws(intendedNextSubmit); - reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded - - // Prepare wait info to defer index deallocation until the GPU has finished using the resource. - // we wait on the signal semaphore for the submit we just did above. - ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; - msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); - } - else - { - // The image is not used in the current frame, so we can deallocate without submitting any draws. - // Still wait on the semaphore to ensure past GPU usage is complete. - // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value which is the immediate prev submit (scratchSemaphore.value). - ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; - msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); - } - - // Clear CPU-side metadata associated with the evicted slot. - msdfImagesState[evicted.alloc_idx].evict(); - }; - // We pass nextSemaValue instead of constructing a new MSDFReference and passing it into `insert` that's because we might get a cache hit and only update the value of the nextSema MSDFReference* inserted = msdfLRUCache->insert(msdfInput, currentFrameIndex, evictionCallback); - + inserted->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN // if cachedImageRecord->alloc_idx was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx @@ -2673,7 +2947,7 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor inserted->alloc_idx = InvalidTextureIndex; } } - + assert(inserted->alloc_idx != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed return inserted->alloc_idx; @@ -2690,284 +2964,4 @@ void DrawResourcesFiller::flushDrawObjects() drawCalls.push_back(drawCall); drawObjectsFlushedToDrawCalls = resourcesCollection.drawObjects.getCount(); } -} - -DrawResourcesFiller::TileUploadData DrawResourcesFiller::generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld, GeoreferencedImageStreamingState* imageStreamingState) -{ - // I think eventually it's better to just transform georeferenced images that aren't big enough into static images and forget about them - if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) //Pass imageID as parameter, down from the addGeoRef call - return TileUploadData{ {}, imageStreamingState->georeferencedImageParams.worldspaceOBB }; - - // Compute the mip level and tile range we would need to encompass the viewport - // `viewportTileRange` is always should be a subset of `currentMappedRegion`, covering only the tiles visible in the viewport - // This also computes the optimal mip level for these tiles (basically a measure of how zoomed in or out the viewport is from the image) - GeoreferencedImageTileRange viewportTileRange = computeViewportTileRange(NDCToWorld, imageStreamingState); - - // Slide or remap the current mapped region to ensure the viewport falls inside it - imageStreamingState->ensureMappedRegionCoversViewport(viewportTileRange); - - // DEBUG - Sampled mip level - { - // Get world coordinates for each corner of the mapped region - const float32_t2 oneTileDirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU / float32_t(imageStreamingState->fullImageTileLength.x) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); - const float32_t2 fullImageDirV = float32_t2(imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.y, -imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.x); - const float32_t2 oneTileDirV = fullImageDirV / float32_t(imageStreamingState->fullImageTileLength.y) * float32_t(1u << imageStreamingState->currentMappedRegion.baseMipLevel); - float64_t2 topLeftMappedRegionWorld = imageStreamingState->georeferencedImageParams.worldspaceOBB.topLeft; - topLeftMappedRegionWorld += oneTileDirU * float32_t(imageStreamingState->currentMappedRegion.topLeftTile.x) + oneTileDirV * float32_t(imageStreamingState->currentMappedRegion.topLeftTile.y); - const uint32_t2 mappedRegionTileLength = imageStreamingState->currentMappedRegion.bottomRightTile - imageStreamingState->currentMappedRegion.topLeftTile + uint32_t2(1, 1); - float64_t2 bottomRightMappedRegionWorld = topLeftMappedRegionWorld; - bottomRightMappedRegionWorld += oneTileDirU * float32_t(mappedRegionTileLength.x) + oneTileDirV * float32_t(mappedRegionTileLength.y); - - // With the above, get an affine transform that maps points in worldspace to their pixel coordinates in the mapped region tile space. This can be done by mapping - // `topLeftMappedRegionWorld -> (0,0)` and `bottomRightMappedRegionWorld -> mappedRegionPixelLength - 1` - const uint32_t2 mappedRegionPixelLength = GeoreferencedImageTileSize * mappedRegionTileLength; - - // 1. Displacement - // Multiplying a (homogenous) point p by this matrix yields the displacement vector `p - topLeftMappedRegionWorld` - float64_t2x3 displacementMatrix(1., 0., -topLeftMappedRegionWorld.x, 0., 1., -topLeftMappedRegionWorld.y); - - // 2. Change of Basis. We again abuse the fact that the basis vectors are orthogonal - float64_t2 dirU = oneTileDirU * float32_t(mappedRegionTileLength.x); - float64_t2 dirV = oneTileDirV * float32_t(mappedRegionTileLength.y); - float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); - float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); - float64_t2 firstRow = dirU / dirULengthSquared; - float64_t2 secondRow = dirV / dirVLengthSquared; - float64_t2x2 changeOfBasisMatrix(firstRow, secondRow); - - // 3. Rescaling. The above matrix yields uv coordinates in the rectangle spanned by the mapped region. To get pixel coordinates, we simply multiply each coordinate by - // how many pixels they span in the gpu image - float64_t2x2 scalingMatrix(mappedRegionTileLength.x * GeoreferencedImageTileSize, 0.0, 0.0, mappedRegionTileLength.y * GeoreferencedImageTileSize); - - // Put them all together - float64_t2x3 toPixelCoordsMatrix = nbl::hlsl::mul(scalingMatrix, nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix)); - - // These are vulkan standard, might be different in n4ce! - constexpr static float64_t3 topLeftViewportNDC = float64_t3(-1.0, -1.0, 1.0); - constexpr static float64_t3 topRightViewportNDC = float64_t3(1.0, -1.0, 1.0); - constexpr static float64_t3 bottomLeftViewportNDC = float64_t3(-1.0, 1.0, 1.0); - constexpr static float64_t3 bottomRightViewportNDC = float64_t3(1.0, 1.0, 1.0); - - // Map viewport points to world - const float64_t3 topLeftViewportWorld = nbl::hlsl::mul(NDCToWorld, topLeftViewportNDC); - const float64_t3 topRightViewportWorld = nbl::hlsl::mul(NDCToWorld, topRightViewportNDC); - const float64_t3 bottomLeftViewportWorld = nbl::hlsl::mul(NDCToWorld, bottomLeftViewportNDC); - - // Get pixel coordinates vectors for each side - const float64_t2 viewportWidthPixelLengthVector = nbl::hlsl::mul(toPixelCoordsMatrix, topRightViewportWorld - topLeftViewportWorld); - const float64_t2 viewportHeightPixelLengthVector = nbl::hlsl::mul(toPixelCoordsMatrix, bottomLeftViewportWorld - topLeftViewportWorld); - - // Get pixel length for each of these vectors - const auto viewportWidthPixelLength = nbl::hlsl::length(viewportWidthPixelLengthVector); - const auto viewportHeightPixelLength = nbl::hlsl::length(viewportHeightPixelLengthVector); - - // Mip is decided based on max of these - float64_t pixelRatio = nbl::hlsl::max(viewportWidthPixelLength / viewportExtent.x, viewportHeightPixelLength / viewportExtent.y); - pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; - - std::cout << "Sampled mip level: " << nbl::hlsl::log2(pixelRatio) << std::endl; - } - - // We need to make every tile that covers the viewport resident. We reserve the amount of tiles needed for upload. - core::vector tiles; - auto tilesToLoad = imageStreamingState->tilesToLoad(viewportTileRange); - tiles.reserve(tilesToLoad.size()); - - const uint32_t2 imageExtents = imageStreamingState->georeferencedImageParams.imageExtents; - const std::filesystem::path imageStoragePath = imageStreamingState->georeferencedImageParams.storagePath; - for (auto [imageTileIndex, gpuImageTileIndex] : tilesToLoad) - { - uint32_t2 gpuMip0Texels(GeoreferencedImageTileSize, GeoreferencedImageTileSize); - core::smart_refctd_ptr gpuMip0Tile = nullptr; - core::smart_refctd_ptr gpuMip1Tile = nullptr; - - { - uint32_t2 georeferencedImageMip0SampledTexels = uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel; - const uint32_t2 georeferencedImageMip0SamplingOffset = (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel; - const uint32_t2 lastTileIndex = imageStreamingState->getLastTileIndex(viewportTileRange.baseMipLevel); - - // If on the last tile, we might not load a full `GeoreferencedImageTileSize x GeoreferencedImageTileSize` tile, so we figure out how many pixels to load in this case to have - // minimal artifacts and no stretching - if (imageTileIndex.x == lastTileIndex.x) - { - georeferencedImageMip0SampledTexels.x = imageExtents.x - georeferencedImageMip0SamplingOffset.x; - uint32_t gpuMip1Texels = georeferencedImageMip0SampledTexels.x >> (viewportTileRange.baseMipLevel + 1); - gpuMip0Texels.x = 2 * gpuMip1Texels; - imageStreamingState->lastImageTileTexels.x = gpuMip0Texels.x; - // If the last tile is too small just ignore it - if (!imageStreamingState->lastImageTileTexels.x) - continue; - } - if (imageTileIndex.y == lastTileIndex.y) - { - georeferencedImageMip0SampledTexels.y = imageExtents.y - georeferencedImageMip0SamplingOffset.y; - uint32_t gpuMip1Texels = georeferencedImageMip0SampledTexels.y >> (viewportTileRange.baseMipLevel + 1); - gpuMip0Texels.y = 2 * gpuMip1Texels; - imageStreamingState->lastImageTileTexels.y = gpuMip0Texels.y; - // If the last tile is too small just ignore it - if (!imageStreamingState->lastImageTileTexels.y) - continue; - } - - if (!georeferencedImageLoader->hasPrecomputedMips(imageStoragePath)) - { - gpuMip0Tile = georeferencedImageLoader->load(imageStoragePath, georeferencedImageMip0SamplingOffset, georeferencedImageMip0SampledTexels, gpuMip0Texels); - gpuMip1Tile = georeferencedImageLoader->load(imageStoragePath, georeferencedImageMip0SamplingOffset, georeferencedImageMip0SampledTexels, gpuMip0Texels / 2u); - } - else - { - gpuMip0Tile = georeferencedImageLoader->load(imageStoragePath, imageTileIndex * GeoreferencedImageTileSize, gpuMip0Texels, imageStreamingState->currentMappedRegion.baseMipLevel, false); - gpuMip1Tile = georeferencedImageLoader->load(imageStoragePath, imageTileIndex * GeoreferencedImageTileSizeMip1, gpuMip0Texels / 2u, imageStreamingState->currentMappedRegion.baseMipLevel, true); - } - } - - asset::IImage::SBufferCopy bufCopy; - bufCopy.bufferOffset = 0; - bufCopy.bufferRowLength = gpuMip0Texels.x; - bufCopy.bufferImageHeight = 0; - bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; - bufCopy.imageSubresource.mipLevel = 0u; - bufCopy.imageSubresource.baseArrayLayer = 0u; - bufCopy.imageSubresource.layerCount = 1u; - uint32_t2 gpuImageOffset = gpuImageTileIndex * GeoreferencedImageTileSize; - bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; - bufCopy.imageExtent.width = gpuMip0Texels.x; - bufCopy.imageExtent.height = gpuMip0Texels.y; - bufCopy.imageExtent.depth = 1; - - tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(gpuMip0Tile), std::move(bufCopy)); - - // Upload the smaller tile to mip 1 - bufCopy = {}; - - bufCopy.bufferOffset = 0; - bufCopy.bufferRowLength = gpuMip0Texels.x / 2; - bufCopy.bufferImageHeight = 0; - bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; - bufCopy.imageSubresource.mipLevel = 1u; - bufCopy.imageSubresource.baseArrayLayer = 0u; - bufCopy.imageSubresource.layerCount = 1u; - gpuImageOffset /= 2; // Half tile size! - bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; - bufCopy.imageExtent.width = gpuMip0Texels.x / 2; - bufCopy.imageExtent.height = gpuMip0Texels.y / 2; - bufCopy.imageExtent.depth = 1; - - tiles.emplace_back(imageStreamingState->georeferencedImageParams.format, std::move(gpuMip1Tile), std::move(bufCopy)); - - // Mark tile as resident - imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; - } - - // Figure out an obb that covers only the currently loaded tiles - OrientedBoundingBox2D viewportEncompassingOBB = imageStreamingState->georeferencedImageParams.worldspaceOBB; - // The image's worldspace dirU corresponds to `imageExtents.x` texels of the image, therefore one image texel in the U direction has a worldspace span of `dirU / imageExtents.x`. - // One mip 0 tiles therefore spans `dirU * GeoreferencedImageTileSize/ imageExtents.x`. A mip `n` tile spans `2^n` this amount, since each texel at that mip level spans - // `2^n` mip texels. Therefore the dirU offset from the image wordlspace's topLeft of the tile of index `viewportTileRange.topLeftTile.x` at mip level `currentMappedRegion.baseMipLevel` can be calculated as - const uint32_t oneTileTexelSpan = GeoreferencedImageTileSize << imageStreamingState->currentMappedRegion.baseMipLevel; - viewportEncompassingOBB.topLeft += imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU * float32_t(viewportTileRange.topLeftTile.x * oneTileTexelSpan) / float32_t(imageStreamingState->georeferencedImageParams.imageExtents.x); - // Same reasoning for offset in v direction - const float32_t2 dirV = float32_t2(imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.y, -imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU.x) * imageStreamingState->georeferencedImageParams.worldspaceOBB.aspectRatio; - viewportEncompassingOBB.topLeft += dirV * float32_t(viewportTileRange.topLeftTile.y * oneTileTexelSpan) / float32_t(imageStreamingState->georeferencedImageParams.imageExtents.y); - - const uint32_t2 viewportTileLength = viewportTileRange.bottomRightTile - viewportTileRange.topLeftTile + uint32_t2(1, 1); - // If the last tile is visible, we use the fractional span for the last tile. Otherwise it's just a normal tile - const bool2 isLastTileVisible = imageStreamingState->isLastTileVisible(viewportTileRange.bottomRightTile); - const uint32_t2 lastGPUImageTileTexels = { isLastTileVisible.x ? imageStreamingState->lastImageTileTexels.x : GeoreferencedImageTileSize, isLastTileVisible.y ? imageStreamingState->lastImageTileTexels.y : GeoreferencedImageTileSize }; - - // Instead of grouping per tile like in the offset case, we group per texel: the same reasoning leads to a single texel at current mip level having a span of `dirU * 2^(currentMappedRegion.baseMipLevel)/ imageExtents.x` - // in the U direction. Therefore the span in worldspace of the OBB we construct is just this number multiplied by the number of gpu texels spanned to draw. - // The number of texels is just `GeoreferencedImageTileSize` times the number of full tiles (all but the last) + the number of texels of the last tile, which might not be a full tile if near the right boundary - viewportEncompassingOBB.dirU = imageStreamingState->georeferencedImageParams.worldspaceOBB.dirU * float32_t((GeoreferencedImageTileSize * (viewportTileLength.x - 1) + lastGPUImageTileTexels.x) << imageStreamingState->currentMappedRegion.baseMipLevel) / float32_t(imageStreamingState->georeferencedImageParams.imageExtents.x); - // Simply number of gpu texels in the y direction divided by number of texels in the x direction. - viewportEncompassingOBB.aspectRatio = float32_t(GeoreferencedImageTileSize * (viewportTileLength.y - 1) + lastGPUImageTileTexels.y) / float32_t(GeoreferencedImageTileSize * (viewportTileLength.x - 1) + lastGPUImageTileTexels.x); - - // GPU tile corresponding to the real image tile containing the viewport top left - we can let it be negative since wrapping mode is repeat, negative tiles are correct modulo `gpuImageSideLengthTiles` - const uint32_t2 viewportTopLeftGPUTile = viewportTileRange.topLeftTile - imageStreamingState->currentMappedRegion.topLeftTile + imageStreamingState->gpuImageTopLeft; - // To get the uv corresponding to the above, simply divide the tile index by the number of tiles in the GPU image. - // However to consider a one-texel shift inward (to prevent color bleeding at the edges) we map both numerator and denominator to texel units (by multiplying with `GeoreferencedImageTileSize`) and add - // a single texel to the numerator - const float32_t2 minUV = float32_t2(GeoreferencedImageTileSize * viewportTopLeftGPUTile + 1u) / float32_t(GeoreferencedImageTileSize * imageStreamingState->gpuImageSideLengthTiles); - // If the image was perfectly partitioned into tiles, we could get the maxUV in a similar fashion to minUV: Just compute `bottomRightTile - currentMappedRegion.topLeftTile` to get a tile - // then divide by `gpuImageSideLengthTiles` to get a coord in `(0,1)` (correct modulo `gpuImageSideLengthTiles`) - // However the last tile might not have all `GeoreferencedImageTileSize` texels in it. Therefore maxUV computation can be separated into a UV contribution by all full tiles (all but the last) + a contribution from the last tile - // UV contribution from full tiles will therefore be `(bottomRightTile - currentMappedRegion.topLeftTile) / gpuImageSideLengthTiles` while last tile contribution will be - // `lastGPUImageTileTexels / (gpuImageSideLengthTiles * GeoreferencedImageTileSize)`. We group terms below to reduce number of float ops. - // Again we first map to texel units then subtract one to add a single texel uv shift. - const uint32_t2 viewportBottomRightGPUTile = viewportTileRange.bottomRightTile - imageStreamingState->currentMappedRegion.topLeftTile + imageStreamingState->gpuImageTopLeft; - const float32_t2 maxUV = float32_t2(GeoreferencedImageTileSize * viewportBottomRightGPUTile + lastGPUImageTileTexels - 1u) / float32_t(GeoreferencedImageTileSize * imageStreamingState->gpuImageSideLengthTiles); - return TileUploadData{ std::move(tiles), viewportEncompassingOBB, minUV, maxUV }; -} - -GeoreferencedImageTileRange DrawResourcesFiller::computeViewportTileRange(const float64_t3x3& NDCToWorld, const GeoreferencedImageStreamingState* imageStreamingState) -{ - // These are vulkan standard, might be different in n4ce! - constexpr static float64_t3 topLeftViewportNDC = float64_t3(-1.0, -1.0, 1.0); - constexpr static float64_t3 topRightViewportNDC = float64_t3(1.0, -1.0, 1.0); - constexpr static float64_t3 bottomLeftViewportNDC = float64_t3(-1.0, 1.0, 1.0); - constexpr static float64_t3 bottomRightViewportNDC = float64_t3(1.0, 1.0, 1.0); - - // First get world coordinates for each of the viewport's corners - const float64_t3 topLeftViewportWorld = nbl::hlsl::mul(NDCToWorld, topLeftViewportNDC); - const float64_t3 topRightViewportWorld = nbl::hlsl::mul(NDCToWorld, topRightViewportNDC); - const float64_t3 bottomLeftViewportWorld = nbl::hlsl::mul(NDCToWorld, bottomLeftViewportNDC); - const float64_t3 bottomRightViewportWorld = nbl::hlsl::mul(NDCToWorld, bottomRightViewportNDC); - - // Then we get mip 0 tiles coordinates for each of them, into the image - const float64_t2 topLeftTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(topLeftViewportWorld, GeoreferencedImageTileSize); - const float64_t2 topRightTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(topRightViewportWorld, GeoreferencedImageTileSize); - const float64_t2 bottomLeftTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(bottomLeftViewportWorld, GeoreferencedImageTileSize); - const float64_t2 bottomRightTileLattice = imageStreamingState->transformWorldCoordsToTileCoords(bottomRightViewportWorld, GeoreferencedImageTileSize); - - // Get the min and max of each lattice coordinate to get a bounding rectangle - const float64_t2 minTop = nbl::hlsl::min(topLeftTileLattice, topRightTileLattice); - const float64_t2 minBottom = nbl::hlsl::min(bottomLeftTileLattice, bottomRightTileLattice); - const float64_t2 minAll = nbl::hlsl::min(minTop, minBottom); - - const float64_t2 maxTop = nbl::hlsl::max(topLeftTileLattice, topRightTileLattice); - const float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice); - const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom); - - // Floor them to get an integer coordinate (index) for the tiles they fall in - int32_t2 minAllFloored = nbl::hlsl::floor(minAll); - int32_t2 maxAllFloored = nbl::hlsl::floor(maxAll); - - // We're undoing a previous division. Could be avoided but won't restructure the code atp. - // Here we compute how many image pixels each side of the viewport spans - const float64_t2 viewportSideUImageTexelsVector = float64_t(GeoreferencedImageTileSize) * (topRightTileLattice - topLeftTileLattice); - const float64_t2 viewportSideVImageTexelsVector = float64_t(GeoreferencedImageTileSize) * (bottomLeftTileLattice - topLeftTileLattice); - - // WARNING: This assumes pixels in the image are the same size along each axis. If the image is nonuniformly scaled or sheared, I *think* it should not matter - // (since the pixel span takes that transformation into account), BUT we have to check if we plan on allowing those - // Compute the side vectors of the viewport in image pixel(texel) space. - // These vectors represent how many image pixels each side of the viewport spans. - // They correspond to the local axes of the mapped OBB (not the mapped region one, the viewport one) in texel coordinates. - const float64_t viewportSideUImageTexels = nbl::hlsl::length(viewportSideUImageTexelsVector); - const float64_t viewportSideVImageTexels = nbl::hlsl::length(viewportSideVImageTexelsVector); - - // Mip is decided based on max of these - float64_t pixelRatio = nbl::hlsl::max(viewportSideUImageTexels / viewportExtent.x, viewportSideVImageTexels / viewportExtent.y); - pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; - - // DEBUG - Clamped at 0 for magnification - { - std::cout << "Real mip level: " << nbl::hlsl::log2(pixelRatio) << std::endl; - } - - GeoreferencedImageTileRange retVal = {}; - // Clamp mip level so we don't consider tiles that are too small along one dimension - // If on a pathological case this gets too expensive because the GPU starts sampling a lot, we can consider changing this, but I doubt that will happen - retVal.baseMipLevel = nbl::hlsl::min(nbl::hlsl::findMSB(uint32_t(nbl::hlsl::floor(pixelRatio))), int32_t(imageStreamingState->maxMipLevel)); - - // Current tiles are measured in mip 0. We want the result to measure mip `retVal.baseMipLevel` tiles. Each next mip level divides by 2. - minAllFloored >>= retVal.baseMipLevel; - maxAllFloored >>= retVal.baseMipLevel; - - - // Clamp them to reasonable tile indices - int32_t2 lastTileIndex = imageStreamingState->getLastTileIndex(retVal.baseMipLevel); - retVal.topLeftTile = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), lastTileIndex); - retVal.bottomRightTile = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), lastTileIndex); - - return retVal; } \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 6c28d6135..4a0d7b490 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -1,25 +1,38 @@ +/******************************************************************************/ +/* DrawResourcesFiller: This class provides important functionality to manage resources needed for a draw. +/******************************************************************************/ #pragma once + +#if __has_include("glm/glm/glm.hpp") // legacy +#include "glm/glm/glm.hpp" +#else +#include "glm/glm.hpp" // new build system +#endif +#include +#include +#include +#include +#include #include "Polyline.h" -#include "CTriangleMesh.h" #include "Hatch.h" #include "IndexAllocator.h" -#include "Images.h" #include -#include +#include "CTriangleMesh.h" +#include "Shaders/globals.hlsl" +#include "Images.h" + +//#include #include -// #include + using namespace nbl; using namespace nbl::video; using namespace nbl::core; using namespace nbl::asset; -using namespace nbl::ext::TextRendering; static_assert(sizeof(DrawObject) == 16u); static_assert(sizeof(MainObject) == 20u); static_assert(sizeof(LineStyle) == 88u); -//TODO[Francisco]: Update briefs for geotex related functions - // ! DrawResourcesFiller // ! This class provides important functionality to manage resources needed for a draw. // ! Drawing new objects (polylines, hatches, etc.) should go through this function. @@ -29,7 +42,7 @@ static_assert(sizeof(LineStyle) == 88u); struct DrawResourcesFiller { public: - + // We pack multiple data types in a single buffer, we need to makes sure each offset starts aligned to avoid mis-aligned accesses static constexpr size_t GPUStructsMaxNaturalAlignment = 8u; static constexpr size_t MinimumDrawResourcesMemorySize = 512u * 1 << 20u; // 512MB @@ -50,7 +63,7 @@ struct DrawResourcesFiller { size_t count = 0ull; size_t getCount() const override { return count; } - size_t getStorageSize() const override { return count * sizeof(T); } + size_t getStorageSize() const override { return count * sizeof(T); } }; /// @brief ResourceBase which is filled by CPU, packed and sent to GPU @@ -60,9 +73,9 @@ struct DrawResourcesFiller core::vector vector; size_t getCount() const { return vector.size(); } size_t getStorageSize() const { return vector.size() * sizeof(T); } - + /// @return pointer to start of the data to be filled, up to additionalCount - T* increaseCountAndGetPtr(size_t additionalCount) + T* increaseCountAndGetPtr(size_t additionalCount) { size_t offset = vector.size(); vector.resize(offset + additionalCount); @@ -72,14 +85,14 @@ struct DrawResourcesFiller /// @brief increases size of general-purpose resources that hold bytes /// @param alignment: Alignment of the pointer returned to be filled, should be PoT and <= GPUStructsMaxNaturalAlignment, only use this if storing raw bytes in vector /// @return pointer to start of the data to be filled, up to additional size - size_t increaseSizeAndGetOffset(size_t additionalSize, size_t alignment) + size_t increaseSizeAndGetOffset(size_t additionalSize, size_t alignment) { assert(core::isPoT(alignment) && alignment <= GPUStructsMaxNaturalAlignment); size_t offset = core::alignUp(vector.size(), alignment); vector.resize(offset + additionalSize); return offset; } - + uint32_t addAndGetOffset(const T& val) { vector.push_back(val); @@ -98,7 +111,7 @@ struct DrawResourcesFiller CPUGeneratedResource dtmSettings; CPUGeneratedResource customProjections; CPUGeneratedResource customClipRects; - + // auto-submission level 1 buffers (mainObj that drawObjs references, if all drawObjs+idxBuffer+geometryInfo doesn't fit into mem this will be broken down into many) CPUGeneratedResource mainObjects; @@ -123,90 +136,29 @@ struct DrawResourcesFiller } }; - /// @brief Abstract class with two overridable methods to load a region of an image, either by requesting a region at a target extent (like the loaders in n4ce do) or to request a specific region from a mip level - // (like precomputed mips solution would use). - struct IGeoreferencedImageLoader : IReferenceCounted - { - /** - * @brief Load a region from an image - used to load from images with precomputed mips - * - * @param imagePath Path to file holding the image data - * @param offset Offset into the image (at requested mipLevel!) at which the region begins - * @param extent Extent of the region to load (at requested mipLevel!) - * @param mipLevel From which mip level image to retrieve the data from - * @param downsample True if this request is supposed to go into GPU mip level 1, false otherwise - * - * @return ICPUBuffer with the requested image data - */ - core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) - { - assert(hasPrecomputedMips(imagePath)); - return load_impl(imagePath, offset, extent, mipLevel, downsample); - } - - /** - * @brief Load a region from an image - used to load from images using the n4ce loaders. Loads a region given by `offset, extent` as an image of size `targetExtent` - * where `targetExtent <= extent` so the loader is in charge of downsampling. - * - * @param imagePath Path to file holding the image data - * @param offset Offset into the image at which the region begins - * @param extent Extent of the region to load - * @param targetExtent Extent of the resulting image. Should NEVER be bigger than `extent` - * - * @return ICPUBuffer with the requested image data - */ - core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t2 targetExtent) - { - assert(!hasPrecomputedMips(imagePath)); - return load_impl(imagePath, offset, extent, targetExtent); - } - - // @brief Get the extents (in texels) of an image. - virtual uint32_t2 getExtents(std::filesystem::path imagePath) = 0; - - /** - * @brief Get the texel format for an image. - */ - virtual asset::E_FORMAT getFormat(std::filesystem::path imagePath) = 0; - - // @brief Returns whether the image should be loaded with the precomputed mip method or the n4ce loader method. - virtual bool hasPrecomputedMips(std::filesystem::path imagePath) const = 0; - private: - - // @brief Override to support loading with precomputed mips - virtual core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) { return nullptr; } - - // @brief Override to support loading with n4ce-style loaders - virtual core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t2 targetExtent) { return nullptr; } - }; - // @brief Register a loader - void setGeoreferencedImageLoader(core::smart_refctd_ptr&& _georeferencedImageLoader) + void setGeoreferencedImageLoader(core::smart_refctd_ptr&& _imageLoader) { - georeferencedImageLoader = _georeferencedImageLoader; + imageLoader = _imageLoader; } uint32_t2 queryGeoreferencedImageExtents(std::filesystem::path imagePath) { - return georeferencedImageLoader->getExtents(imagePath); + return imageLoader->getExtents(imagePath); } asset::E_FORMAT queryGeoreferencedImageFormat(std::filesystem::path imagePath) { - return georeferencedImageLoader->getFormat(imagePath); + return imageLoader->getFormat(imagePath); } - + DrawResourcesFiller(); DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue, core::smart_refctd_ptr&& logger); typedef std::function SubmitFunc; void setSubmitDrawsFunction(const SubmitFunc& func); - - void setViewportExtent(const uint32_t2 _viewportExtent) - { - viewportExtent = _viewportExtent; - } + // DrawResourcesFiller needs to access these in order to allocate GPUImages and write the to their correct descriptor set binding void setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr&& descriptorSet, uint32_t binding); @@ -220,33 +172,33 @@ struct DrawResourcesFiller } /** - * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections. - * - * The function allocates a single memory block and splits it into image and buffer arenas. - * - * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation. - * @param requiredImageMemorySize The size in bytes of the memory required for images. - * @param requiredBufferMemorySize The size in bytes of the memory required for buffers. - * - * @return true if the memory allocation and resource setup succeeded; false otherwise. - */ + * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections. + * + * The function allocates a single memory block and splits it into image and buffer arenas. + * + * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation. + * @param requiredImageMemorySize The size in bytes of the memory required for images. + * @param requiredBufferMemorySize The size in bytes of the memory required for buffers. + * + * @return true if the memory allocation and resource setup succeeded; false otherwise. + */ bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize); - + /** - * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure. - * - * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small, - * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory - * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`. - * - * @param logicalDevice Pointer to the logical device used for allocation. - * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with. - * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with. - * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%). - * @param maxTries Maximum number of attempts to try reducing and allocating memory. - * - * @return true if the allocation succeeded at any iteration; false if all attempts failed. - */ + * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure. + * + * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small, + * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory + * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`. + * + * @param logicalDevice Pointer to the logical device used for allocation. + * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with. + * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with. + * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%). + * @param maxTries Maximum number of attempts to try reducing and allocating memory. + * + * @return true if the allocation succeeded at any iteration; false if all attempts failed. + */ bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u); bool allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent); @@ -276,11 +228,11 @@ struct DrawResourcesFiller //! Draws a fixed-geometry polyline using a custom transformation. //! TODO: Change `polyline` input to an ID referencing a possibly cached instance in our buffers, allowing reuse and avoiding redundant uploads. void drawFixedGeometryPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, const float64_t3x3& transformation, TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit); - + /// Use this in a begin/endMainObject scope when you want to draw different polylines that should essentially be a single main object (no self-blending between components of a single main object) /// WARNING: make sure this function is called within begin/endMainObject scope void drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit); - + void drawTriangleMesh( const CTriangleMesh& mesh, const DTMSettingsInfo& dtmSettingsInfo, @@ -289,11 +241,11 @@ struct DrawResourcesFiller // ! Convinience function for Hatch with MSDF Pattern and a solid background void drawHatch( const Hatch& hatch, - const float32_t4& foregroundColor, + const float32_t4& foregroundColor, const float32_t4& backgroundColor, const HatchFillPattern fillPattern, SIntendedSubmitInfo& intendedNextSubmit); - + // ! Hatch with MSDF Pattern void drawHatch( const Hatch& hatch, @@ -306,7 +258,7 @@ struct DrawResourcesFiller const Hatch& hatch, const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit); - + //! Convinience function for fixed-geometry Hatch with MSDF Pattern and a solid background void drawFixedGeometryHatch( const Hatch& hatch, @@ -333,7 +285,7 @@ struct DrawResourcesFiller const float64_t3x3& transformation, TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit); - + /// Used by SingleLineText, Issue drawing a font glyph /// WARNING: make sure this function is called within begin/endMainObject scope void drawFontGlyph( @@ -353,85 +305,168 @@ struct DrawResourcesFiller SIntendedSubmitInfo& intendedNextSubmit); /** - * @brief Adds a static 2D image to the draw resource set for rendering. - * - * This function ensures that a given image is available as a GPU-resident texture for future draw submissions. - * It uses an LRU cache to manage descriptor set slots and evicts old images if necessary to make room for new ones. - * - * If the image is already cached and its slot is valid, it returns true; - * Otherwise, it performs the following: - * - Allocates a new descriptor set slot. - * - Promotes the image format to be GPU-compatible. - * - Creates a GPU image and GPU image view. - * - Queues the image for uploading via staging in the next submit. - * - If memory is constrained, attempts to evict other images to free up space. - * - * @param staticImage Unique identifier for the image resource plus the CPU-side image resource to (possibly) upload. - * @param staticImage::forceUpdate If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. - * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. - * - * @note This function ensures that the descriptor slot is not reused while the GPU may still be reading from it. - * If an eviction is required and the evicted image is scheduled to be used in the next submit, it triggers - * a flush of pending draws to preserve correctness. - * - * @note The function uses the `imagesCache` LRU cache to track usage and validity of texture slots. - * If an insertion leads to an eviction, a callback ensures proper deallocation and synchronization. - * @return true if the image was successfully cached and is ready for use; false if allocation failed most likely due to the image being larger than the memory arena allocated for all images. - */ + * @brief Adds a static 2D image to the draw resource set for rendering. + * + * This function ensures that a given image is available as a GPU-resident texture for future draw submissions. + * It uses an LRU cache to manage descriptor set slots and evicts old images if necessary to make room for new ones. + * + * If the image is already cached and its slot is valid, it returns true; + * Otherwise, it performs the following: + * - Allocates a new descriptor set slot. + * - Promotes the image format to be GPU-compatible. + * - Creates a GPU image and GPU image view. + * - Queues the image for uploading via staging in the next submit. + * - If memory is constrained, attempts to evict other images to free up space. + * + * @param staticImage Unique identifier for the image resource plus the CPU-side image resource to (possibly) upload. + * @param staticImage::forceUpdate If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. + * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. + * + * @note This function ensures that the descriptor slot is not reused while the GPU may still be reading from it. + * If an eviction is required and the evicted image is scheduled to be used in the next submit, it triggers + * a flush of pending draws to preserve correctness. + * + * @note The function uses the `imagesCache` LRU cache to track usage and validity of texture slots. + * If an insertion leads to an eviction, a callback ensures proper deallocation and synchronization. + * @return true if the image was successfully cached and is ready for use; false if allocation failed most likely due to the image being larger than the memory arena allocated for all images. + */ bool ensureStaticImageAvailability(const StaticImageInfo& staticImage, SIntendedSubmitInfo& intendedNextSubmit); - - /** - * @brief Ensures that multiple static 2D images are resident and ready for rendering. - * - * Attempts to make all provided static images GPU-resident by calling `ensureStaticImageAvailability` - * for each. Afterward, it verifies that none of the newly ensured images have been evicted, - * which could happen due to limited VRAM or memory fragmentation. - * - * This function is expected to succeed if: - * - The number of images does not exceed `ImagesBindingArraySize`. - * - Each image individually fits into the image memory arena. - * - There is enough VRAM to hold all images simultaneously. - * - * @param staticImages A span of StaticImageInfo structures describing the images to be ensured. - * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. - * - * @return true If all images were successfully made resident and none were evicted during the process. - * @return false If: - * - The number of images exceeds the descriptor binding array size. - * - Any individual image could not be made resident (e.g., larger than the allocator can support). - * - Some images were evicted due to VRAM pressure or allocator fragmentation, in which case Clearing the image cache and retrying MIGHT be a success (TODO: handle internally) - */ - bool ensureMultipleStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit); /** - * @brief Ensures a GPU-resident georeferenced image exists in the cache, allocating resources if necessary. - * - * If the specified image ID is not already present in the cache, or if the cached version is incompatible - * with the requested parameters (e.g. extent, format, or type), this function allocates GPU memory, - * creates the image and its view, to be bound to a descriptor binding in the future. - * - * If the image already exists and matches the requested parameters, its usage metadata is updated. - * In either case, the cache is updated to reflect usage in the current frame. - * - * This function also handles automatic eviction of old images via an LRU policy when space is limited. - * - * @param imageID Unique identifier of the image to add or reuse. - * @param params Georeferenced Image Params - * @param intendedNextSubmit Submit info object used to track resources pending GPU submission. - * - * @return true if the image was successfully cached and is ready for use; false if allocation failed. - * [TODO]: should be internal protected member function. - */ - bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, GeoreferencedImageParams&& params, SIntendedSubmitInfo& intendedNextSubmit); - - // [TODO]: should be internal protected member function. - bool queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy); + * @brief Ensures that multiple static 2D images are resident and ready for rendering. + * + * Attempts to make all provided static images GPU-resident by calling `ensureStaticImageAvailability` + * for each. Afterward, it verifies that none of the newly ensured images have been evicted, + * which could happen due to limited VRAM or memory fragmentation. + * + * This function is expected to succeed if: + * - The number of images does not exceed `ImagesBindingArraySize`. + * - Each image individually fits into the image memory arena. + * - There is enough VRAM to hold all images simultaneously. + * + * @param staticImages A span of StaticImageInfo structures describing the images to be ensured. + * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. + * + * @return true If all images were successfully made resident and none were evicted during the process. + * @return false If: + * - The number of images exceeds the descriptor binding array size. + * - Any individual image could not be made resident (e.g., larger than the allocator can support). + * - Some images were evicted due to VRAM pressure or allocator fragmentation, in which case Clearing the image cache and retrying MIGHT be a success (TODO: handle internally) + */ + bool ensureMultipleStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit); // This function must be called immediately after `addStaticImage` for the same imageID. void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); - - // This function must be called immediately after `ensureGeoreferencedImageAvailability_AllocateIfNeeded` for the same imageID. - void addGeoreferencedImage(image_id imageID, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit); + + /* + Georeferenced Image Functions: + */ + + /** + * @brief Computes the recommended GPU image extents for streamed (georeferenced) imagery. + * + * This function estimates the required GPU-side image size to safely cover the current viewport, accounting for: + * - Full coverage of twice the viewport at mip 0 + * - Arbitrary rotation (by considering the diagonal) + * - Padding + * + * The resulting size is always rounded up to a multiple of the georeferenced tile size. + * + * @param viewportExtents The width and height of the viewport in pixels. + * @return A uint32_t2 representing the GPU image width and height for streamed imagery. + */ + static uint32_t2 computeStreamingImageExtentsForViewportCoverage(const uint32_t2 viewportExtents); + + /** + * @brief Updates viewport information for georeferenced image calculations. + * + * This function sets the current viewport extents and the NDC-to-world transform, + * which are used by georeferenced image streaming logic (e.g., computing tile ranges, + * determining GPU image size, and checking for tile loading or GPU image resize). + * + * Note: This class handles many rendering tasks; this function affects only the + * georeferenced image streaming and positioning calculations. + * + * @param viewportExtent Extent of the current viewport in pixels. + * @param ndcToWorldMat 3x3 matrix transforming NDC coordinates to world coordinates. + */ + inline void updateViewportInfo(const uint32_t2 viewportExtent, const float64_t3x3& ndcToWorldMat) + { + currentViewportExtents = viewportExtent; + ndcToWorldTransformationMatrix = ndcToWorldMat; + } + + /** + * @brief Creates a streaming state for a georeferenced image. + * + * This function prepares the required state for streaming and rendering a georeferenced image. + * + * WARNING: User should make sure to: + * - Transforms the OBB into world space if custom projections (such as dwg/symbols) are active. + * + * Specifically, this function: + * - Builds a new GeoreferencedImageStreamingState for the given image ID, OBB, and storage path. + * - Looks up image info such as format and extents from the registered loader and the storage path + * - Updates the returned state with current viewport. + * + * @note The returned state is not managed by the cache. The caller is responsible for + * storing it and passing the same state to subsequent streaming and draw functions. + * + * this function does **not** insert the image into the internal cache, because doing so could lead to + * premature eviction (either of this image or of another resource) before the draw call is made. + * + * @param imageID Unique identifier of the image. + * @param worldspaceOBB Oriented bounding box of the image in world space. + * @param storagePath Filesystem path where the image data is stored. + * @return A GeoreferencedImageStreamingState object initialized for this image. + */ + nbl::core::smart_refctd_ptr ensureGeoreferencedImageEntry(image_id imageID, const OrientedBoundingBox2D& worldSpaceOBB, const std::filesystem::path& storagePath); + + /** + * @brief Launches tile loading for a cached georeferenced image. + * + * Queues all tiles visible in the current viewport for GPU upload. + * + * The work includes: + * - Calculating visible tile coverage from the OBB and viewport. + * - Loading the necessary tiles from disk via the registered `imageLoader`. + * - Preparing staging buffers and `IImage::SBufferCopy` upload regions for GPU transfer. + * - Appending the upload commands into `streamedImageCopies` for later execution. + * - Updating the state's tile occupancy map to reflect newly resident tiles. + * + * Context: this function is dedicated to streaming tiles for georeferenced images only. + * This function should be called anywhere between `ensureGeoreferencedImageEntry` and `finalizeGeoreferencedImageTileLoads` + * But It's prefered to start loading as soon as possible to hide the latency of loading tiles from disk. + * + * @note The `imageStreamingState` passed in must be exactly the one returned by `ensureGeoreferencedImageEntry` with same image_id. Passing a stale or unrelated state is undefined. + * @note This function only queues uploads; GPU transfer happens later when queued copies are executed. + * + * @param imageID Unique identifier of the image. + * @param imageStreamingState Reference to the GeoreferencedImageStreamingState created or returned by `ensureGeoreferencedImageEntry` with same image_id. + */ + bool launchGeoreferencedImageTileLoads(image_id imageID, GeoreferencedImageStreamingState* imageStreamingState, const WorldClipRect clipRect); + + /** + * @brief Issue Drawing a GeoreferencedImage + * + * Ensures streaming resources are allocated, computes addressing and positioning info (OBB and min/max UV), and pushes the image info to the geometry buffer for rendering. + * + * This function should be called anywhere between `ensureGeoreferencedImageEntry` and `finalizeGeoreferencedImageTileLoads` + * + * @note The `imageStreamingState` must be the one returned by `ensureGeoreferencedImageEntry`. + * + * @param imageID Unique identifier of the image. + * @param imageStreamingState Reference to the GeoreferencedImageStreamingState created or returned by `ensureGeoreferencedImageEntry` with same image_id. + * @param intendedNextSubmit Submission info describing synchronization and barriers for the next batch. + */ + void drawGeoreferencedImage(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit); + + /** + * @brief copies the queued up streamed copies. + * @note call this function after `drawGeoreferencedImage` to make sure there is a gpu resource to copy to. + * @because`drawGeoreferencedImage` internally calls `ensureGeoreferencedImageResources_AllocateIfNeeded` + */ + bool finalizeGeoreferencedImageTileLoads(SIntendedSubmitInfo& intendedNextSubmit); /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. @@ -462,7 +497,7 @@ struct DrawResourcesFiller // Setting Active Resources: void setActiveLineStyle(const LineStyleInfo& lineStyle); - + void setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo); void beginMainObject(MainObjectType type, TransformationType transformationType = TransformationType::TT_NORMAL); @@ -470,7 +505,7 @@ struct DrawResourcesFiller void pushCustomProjection(const float64_t3x3& projection); void popCustomProjection(); - + void pushCustomClipRect(const WorldClipRect& clipRect); void popCustomClipRect(); @@ -563,18 +598,61 @@ struct DrawResourcesFiller /// /// User is responsible for management of cache and making sure it's alive in the ReplayCache scope void setReplayCache(ReplayCache* cache); - + /// @brief Reverts internal logic to use the default internal staging and resource accumulation cache. /// Must be called once per corresponding `pushReplayCacheUse()`. void unsetReplayCache(); + uint64_t getImagesMemoryConsumption() const; + + struct UsageData + { + uint32_t lineStyleCount = 0u; + uint32_t dtmSettingsCount = 0u; + uint32_t customProjectionsCount = 0u; + uint32_t mainObjectCount = 0u; + uint32_t drawObjectCount = 0u; + uint32_t geometryBufferSize = 0u; + uint64_t bufferMemoryConsumption = 0ull; + uint64_t imageMemoryConsumption = 0ull; + + void add(const UsageData& other) + { + lineStyleCount += other.lineStyleCount; + dtmSettingsCount += other.dtmSettingsCount; + customProjectionsCount += other.customProjectionsCount; + mainObjectCount += other.mainObjectCount; + drawObjectCount += other.drawObjectCount; + geometryBufferSize += other.geometryBufferSize; + bufferMemoryConsumption = nbl::hlsl::max(bufferMemoryConsumption, other.bufferMemoryConsumption); + imageMemoryConsumption = nbl::hlsl::max(imageMemoryConsumption, other.imageMemoryConsumption); + } + + std::string toString() const + { + std::ostringstream oss; + oss << "Usage Data:\n"; + oss << " lineStyles (Count): " << lineStyleCount << "\n"; + oss << " dtmSettings (Count): " << dtmSettingsCount << "\n"; + oss << " customProjections (Count): " << customProjectionsCount << "\n"; + oss << " mainObject (Count): " << mainObjectCount << "\n"; + oss << " drawObject (Count): " << drawObjectCount << "\n"; + oss << " geometryBufferSize (Bytes): " << geometryBufferSize << "\n"; + oss << " Max Buffer Memory Consumption (Bytes): " << bufferMemoryConsumption << "\n"; + oss << " Max Image Memory Consumption (Bytes):" << imageMemoryConsumption; + return oss.str(); + } + }; + + UsageData getCurrentUsageData(); + protected: SubmitFunc submitDraws; /// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer. bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection); - + /// @brief Records GPU copy commands for all staged msdf images into the active command buffer. bool pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& msdfImagesState); @@ -583,9 +661,31 @@ struct DrawResourcesFiller /// @brief Records GPU copy commands for all staged images into the active command buffer. bool pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, ImagesCache& imagesCache); - - /// @brief copies the queued up streamed copies. - bool pushStreamedImagesUploads(SIntendedSubmitInfo& intendedNextSubmit); + + + /* + GeoreferencesImage Protected Functions: + */ + + /** + * @brief Ensures a GPU-resident georeferenced image exists in the cache, allocating resources if necessary. + * + * If the specified image ID is not already present in the cache, or if the cached version is incompatible + * with the requested parameters (e.g. extent, format, or type), this function allocates GPU memory, + * creates the image and its view, to be bound to a descriptor binding in the future. + * + * If the image already exists and matches the requested parameters, its usage metadata is updated. + * In either case, the cache is updated to reflect usage in the current frame. + * + * This function also handles automatic eviction of old images via an LRU policy when space is limited. + * + * @param imageID Unique identifier of the image to add or reuse. + * @param imageStreamingState Reference to the GeoreferencedImageStreamingState created or returned by `ensureGeoreferencedImageEntry` with same image_id. + * @param intendedNextSubmit Submit info object used to track resources pending GPU submission. + * + * @return true if the image was successfully cached and is ready for use; false if allocation failed. + */ + bool ensureGeoreferencedImageResources_AllocateIfNeeded(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit); const size_t calculateRemainingResourcesSize() const; @@ -597,7 +697,7 @@ struct DrawResourcesFiller // Gets resource index to the active linestyle data from the top of stack // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) uint32_t acquireActiveLineStyleIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); - + // Gets resource index to the active linestyle data from the top of stack // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) uint32_t acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); @@ -605,99 +705,99 @@ struct DrawResourcesFiller // Gets resource index to the active projection data from the top of stack // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) uint32_t acquireActiveCustomProjectionIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); - + // Gets resource index to the active clip data from the top of stack // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) uint32_t acquireActiveCustomClipRectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); - + // Gets resource index to the active main object data // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) uint32_t acquireActiveMainObjectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); /// Attempts to add lineStyle to resources. If it fails to do, due to resource limitations, auto-submits and tries again. uint32_t addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit); - + /// Attempts to add dtmSettings to resources. If it fails to do, due to resource limitations, auto-submits and tries again. uint32_t addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); - + /// Attempts to add custom projection to gpu resources. If it fails to do, due to resource limitations, auto-submits and tries again. uint32_t addCustomProjection_SubmitIfNeeded(const float64_t3x3& projection, SIntendedSubmitInfo& intendedNextSubmit); - + /// Attempts to add custom clip to gpu resources. If it fails to do, due to resource limitations, auto-submits and tries again. uint32_t addCustomClipRect_SubmitIfNeeded(const WorldClipRect& clipRect, SIntendedSubmitInfo& intendedNextSubmit); - + /// returns index to added LineStyleInfo, returns Invalid index if it exceeds resource limitations uint32_t addLineStyle_Internal(const LineStyleInfo& lineStyleInfo); - + /// returns index to added DTMSettingsInfo, returns Invalid index if it exceeds resource limitations uint32_t addDTMSettings_Internal(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); - + /** - * @brief Computes the final transformation matrix for fixed geometry rendering, - * considering any active custom projections and the transformation type. - * - * This function handles how a given transformation should be applied depending on the - * current transformation type and the presence of any active projection matrices. - * - * - If no active projection exists, the input transformation is returned unmodified. - * - * - If an active projection exists: - * - For TT_NORMAL, the input transformation is simply multiplied by the top of the projection stack. - * - For TT_FIXED_SCREENSPACE_SIZE, the input transformation is multiplied by the top of the projection stack, - * but the resulting scale is replaced with the screen-space scale from the original input `transformation`. - * - * @param transformation The input 3x3 transformation matrix to apply. - * @param transformationType The type of transformation to apply (e.g., TT_NORMAL or TT_FIXED_SCREENSPACE_SIZE). - * - */ + * @brief Computes the final transformation matrix for fixed geometry rendering, + * considering any active custom projections and the transformation type. + * + * This function handles how a given transformation should be applied depending on the + * current transformation type and the presence of any active projection matrices. + * + * - If no active projection exists, the input transformation is returned unmodified. + * + * - If an active projection exists: + * - For TT_NORMAL, the input transformation is simply multiplied by the top of the projection stack. + * - For TT_FIXED_SCREENSPACE_SIZE, the input transformation is multiplied by the top of the projection stack, + * but the resulting scale is replaced with the screen-space scale from the original input `transformation`. + * + * @param transformation The input 3x3 transformation matrix to apply. + * @param transformationType The type of transformation to apply (e.g., TT_NORMAL or TT_FIXED_SCREENSPACE_SIZE). + * + */ float64_t3x3 getFixedGeometryFinalTransformationMatrix(const float64_t3x3& transformation, TransformationType transformationType) const; /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + /// Attempts to upload as many draw objects as possible within the given polyline connectors considering resource limitations void addPolylineConnectors_Internal(const CPolylineBase& polyline, uint32_t& currentPolylineConnectorObj, uint32_t mainObjIdx); - + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addLines_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addQuadBeziers_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + /// Attempts to upload as many draw objects as possible within the given hatch considering resource limitations void addHatch_Internal(const Hatch& hatch, uint32_t& currentObjectInSection, uint32_t mainObjIndex); - + /// Attempts to upload a single GlyphInfo considering resource limitations bool addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx); - + /// Attempts to upload a single GridDTMInfo considering resource limitations bool addGridDTM_Internal(const GridDTMInfo& gridDTMInfo, uint32_t mainObjIdx); /// Attempts to upload a single image object considering resource limitations (not accounting for the resource image added using ensureStaticImageAvailability function) bool addImageObject_Internal(const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx);; - + /// Attempts to upload a georeferenced image info considering resource limitations (not accounting for the resource image added using ensureStaticImageAvailability function) bool addGeoreferencedImageInfo_Internal(const GeoreferencedImageInfo& georeferencedImageInfo, uint32_t mainObjIdx); - + uint32_t getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit); /** - * @brief Evicts a GPU image and deallocates its associated descriptor and memory, flushing draws if needed. - * - * This function is called when an image must be removed from GPU memory (typically due to VRAM pressure). - * If the evicted image is scheduled to be used in the next draw submission, a flush is performed to avoid - * use-after-free issues. Otherwise, it proceeds with deallocation immediately. - * - * It prepares a cleanup object that ensures the memory range used by the image will be returned to the suballocator - * only after the GPU has finished using it, guarded by a semaphore wait. - * - * @param imageID The unique ID of the image being evicted. - * @param evicted A reference to the evicted image, containing metadata such as allocation offset, size, usage frame, etc. - * @param intendedNextSubmit Reference to the intended submit information. Used for synchronizing draw submission and safe deallocation. - * - * @warning Deallocation may use a conservative semaphore wait value if exact usage information is unavailable. [future todo: fix] - */ + * @brief Evicts a GPU image and deallocates its associated descriptor and memory, flushing draws if needed. + * + * This function is called when an image must be removed from GPU memory (typically due to VRAM pressure). + * If the evicted image is scheduled to be used in the next draw submission, a flush is performed to avoid + * use-after-free issues. Otherwise, it proceeds with deallocation immediately. + * + * It prepares a cleanup object that ensures the memory range used by the image will be returned to the suballocator + * only after the GPU has finished using it, guarded by a semaphore wait. + * + * @param imageID The unique ID of the image being evicted. + * @param evicted A reference to the evicted image, containing metadata such as allocation offset, size, usage frame, etc. + * @param intendedNextSubmit Reference to the intended submit information. Used for synchronizing draw submission and safe deallocation. + * + * @warning Deallocation may use a conservative semaphore wait value if exact usage information is unavailable. [future todo: fix] + */ void evictImage_SubmitIfNeeded(image_id imageID, const CachedImageRecord& evicted, SIntendedSubmitInfo& intendedNextSubmit); - + struct ImageAllocateResults { nbl::core::smart_refctd_ptr gpuImageView = nullptr; @@ -707,45 +807,32 @@ struct DrawResourcesFiller }; /** - * @brief Attempts to create and allocate a GPU image and its view, with fallback eviction on failure. - * - * This function tries to create a GPU image using the specified creation parameters, allocate memory - * from the shared image memory arena, bind it to device-local memory, and create an associated image view. - * If memory allocation fails (e.g. due to VRAM exhaustion), the function will evict textures from the internal - * LRU cache and retry the operation until successful, or until only the currently-inserted image remains. - * - * This is primarily used by the draw resource filler to manage GPU image memory for streamed or cached images. - * - * @param imageParams Creation parameters for the image. Should match `nbl::asset::IImage::SCreationParams`. - * @param imageViewFormatOverride Specifies whether the image view format should differ from the image format. If set to asset::E_FORMAT_ET_COUNT, the image view uses the same format as the image - * @param intendedNextSubmit Reference to the current intended submit info. Used for synchronizing evictions. - * @param imageDebugName Debug name assigned to the image and its view for easier profiling/debugging. - * - * @return ImageAllocateResults A struct containing: - * - `allocationOffset`: Offset into the memory arena (or InvalidAddress on failure). - * - `allocationSize`: Size of the allocated memory region. - * - `gpuImageView`: The created GPU image view (nullptr if creation failed). - */ + * @brief Attempts to create and allocate a GPU image and its view, with fallback eviction on failure. + * + * This function tries to create a GPU image using the specified creation parameters, allocate memory + * from the shared image memory arena, bind it to device-local memory, and create an associated image view. + * If memory allocation fails (e.g. due to VRAM exhaustion), the function will evict textures from the internal + * LRU cache and retry the operation until successful, or until only the currently-inserted image remains. + * + * This is primarily used by the draw resource filler to manage GPU image memory for streamed or cached images. + * + * @param imageParams Creation parameters for the image. Should match `nbl::asset::IImage::SCreationParams`. + * @param imageViewFormatOverride Specifies whether the image view format should differ from the image format. If set to asset::E_FORMAT_ET_COUNT, the image view uses the same format as the image + * @param intendedNextSubmit Reference to the current intended submit info. Used for synchronizing evictions. + * @param imageDebugName Debug name assigned to the image and its view for easier profiling/debugging. + * + * @return ImageAllocateResults A struct containing: + * - `allocationOffset`: Offset into the memory arena (or InvalidAddress on failure). + * - `allocationSize`: Size of the allocated memory region. + * - `gpuImageView`: The created GPU image view (nullptr if creation failed). + */ ImageAllocateResults tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, const asset::E_FORMAT imageViewFormatOverride, nbl::video::SIntendedSubmitInfo& intendedNextSubmit, std::string imageDebugName); /** - * @brief Determines creation parameters for a georeferenced image based on heuristics. - * - * This function decides whether a georeferenced image should be treated as a fully resident GPU texture - * or as a streamable image based on the relationship between its total resolution and the viewport size. - * It then fills out the appropriate Nabla image creation parameters. - * - * @param[out] outImageParams Structure to be filled with image creation parameters (format, size, etc.). - * @param[out] outImageType Indicates whether the image should be fully resident or streamed. - * @param[in] params Parameters for the georeferenced image - */ - ImageType determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, const GeoreferencedImageParams& params); - - /** - * @brief Used to implement both `drawHatch` and `drawFixedGeometryHatch` without exposing the transformation type parameter + * @brief Used to implement both `drawHatch` and `drawFixedGeometryHatch` without exposing the transformation type parameter */ void drawHatch_impl( const Hatch& hatch, @@ -771,7 +858,7 @@ struct DrawResourcesFiller void resetCustomProjections() { resourcesCollection.customProjections.vector.clear(); - + // Invalidate all the clip projection addresses because activeProjections buffer got reset for (auto& addr : activeProjectionIndices) addr = InvalidCustomProjectionIndex; @@ -780,7 +867,7 @@ struct DrawResourcesFiller void resetCustomClipRects() { resourcesCollection.customClipRects.vector.clear(); - + // Invalidate all the clip projection addresses because activeProjections buffer got reset for (auto& addr : activeClipRectIndices) addr = InvalidCustomClipRectIndex; @@ -797,7 +884,7 @@ struct DrawResourcesFiller resourcesCollection.dtmSettings.vector.clear(); activeDTMSettingsIndex = InvalidDTMSettingsIdx; } - + // MSDF Hashing and Caching Internal Functions enum class MSDFType : uint8_t { @@ -824,9 +911,10 @@ struct DrawResourcesFiller { computeBlake3Hash(); } - + bool operator==(const MSDFInputInfo& rhs) const - { return hash == rhs.hash && glyphIndex == rhs.glyphIndex && type == rhs.type; + { + return hash == rhs.hash && glyphIndex == rhs.glyphIndex && type == rhs.type; } MSDFType type; @@ -837,13 +925,13 @@ struct DrawResourcesFiller HatchFillPattern fillPattern; }; static_assert(sizeof(uint32_t) == sizeof(HatchFillPattern)); - + core::blake3_hash_t faceHash = {}; core::blake3_hash_t hash = {}; // actual hash, we will check in == operator size_t lookupHash = 0ull; // for containers expecting size_t hash private: - + void computeBlake3Hash() { core::blake3_hasher hasher; @@ -857,7 +945,7 @@ struct DrawResourcesFiller }; struct MSDFInputInfoHash { std::size_t operator()(const MSDFInputInfo& info) const { return info.lookupHash; } }; - + struct MSDFReference { uint32_t alloc_idx; @@ -868,52 +956,12 @@ struct DrawResourcesFiller MSDFReference() : MSDFReference(InvalidTextureIndex, ~0ull) {} // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value to MSDFReference without changing `alloc_idx` - inline MSDFReference& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } + inline MSDFReference& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } }; - - uint32_t getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit); - - uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); - - // These are mip 0 texels of the image per tile, also size of each physical tile into the gpu resident image - constexpr static uint32_t GeoreferencedImageTileSize = 128u; - // Mip 1 tiles are naturally half the size - constexpr static uint32_t GeoreferencedImageTileSizeMip1 = GeoreferencedImageTileSize / 2; - // How many tiles of extra padding we give to the gpu image holding the tiles for a georeferenced image - constexpr static uint32_t GeoreferencedImagePaddingTiles = 2; - /* - * @brief Returns a tile range (+mip level) which is the smallest region of the image consisting of whole tiles (at specified mip level) that encompasses the current viewport - * - * @param NDCToWorld Affine matrix that represents a linear transform from NDC coordinates (related to viewport) to world coordinates. - * - * @param imageStreamingState Image for which we want to compute said tile range - */ - GeoreferencedImageTileRange computeViewportTileRange(const float64_t3x3& NDCToWorld, const GeoreferencedImageStreamingState* imageStreamingState); + uint32_t getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit); - /* - * @struct TileUploadData - * @brief Holds gpu image upload info (what tiles to upload and where to upload them), an obb that encompasses the viewport and uv coords into the gpu image for the corners of that obb - */ - struct TileUploadData - { - core::vector tiles; - OrientedBoundingBox2D viewportEncompassingOBB; - float32_t2 minUV; - float32_t2 maxUV; - }; - - /* - * @brief Generates all the tile upload data needed to render the image on the current viewport - * - * @param imageType Type of the image (static or georeferenced) - * - * @param NDCToWorld Affine matrix that represents a linear transform from NDC coordinates (related to viewport) to world coordinates. - * - * @param imageStreamingState Image for which we want to generate the `TileUploadData` - */ - // Right now it's generating tile-by-tile. Can be improved to produce at worst 4 different rectangles to load (depending on how we need to load tiles) - TileUploadData generateTileUploadData(const ImageType imageType, const float64_t3x3& NDCToWorld, GeoreferencedImageStreamingState* imageStreamingState); + uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); // Flushes Current Draw Call and adds to drawCalls void flushDrawObjects(); @@ -960,7 +1008,7 @@ struct DrawResourcesFiller // The ClipRects & Projections are stack, because user can push/pop ClipRects & Projections in any order std::deque activeProjections; // stack of projections stored so we can resubmit them if geometry buffer got reset. std::deque activeProjectionIndices; // stack of projection gpu addresses in geometry buffer. to keep track of them in push/pops - + std::deque activeClipRects; // stack of clips stored so we can resubmit them if geometry buffer got reset. std::deque activeClipRectIndices; // stack of clips gpu addresses in geometry buffer. to keep track of them in push/pops @@ -975,16 +1023,17 @@ struct DrawResourcesFiller std::vector msdfImagesState = {}; // cached cpu imaged + their status, size equals to LRUCache size static constexpr asset::E_FORMAT MSDFTextureFormat = asset::E_FORMAT::EF_R8G8B8A8_SNORM; bool m_hasInitializedMSDFTextureArrays = false; - + // Images: std::unique_ptr imagesCache; smart_refctd_ptr suballocatedDescriptorSet; uint32_t imagesArrayBinding = 0u; // Georef - pushed here rn for simplicity - core::smart_refctd_ptr georeferencedImageLoader; + core::smart_refctd_ptr imageLoader; + std::unordered_map> streamedImageCopies; // Viewport state - uint32_t2 viewportExtent = {}; -}; - + uint32_t2 currentViewportExtents = {}; + float64_t3x3 ndcToWorldTransformationMatrix = {}; +}; \ No newline at end of file diff --git a/62_CAD/Images.cpp b/62_CAD/Images.cpp index 8b7dd1075..0570338fe 100644 --- a/62_CAD/Images.cpp +++ b/62_CAD/Images.cpp @@ -2,10 +2,26 @@ using namespace nbl::hlsl; -smart_refctd_ptr GeoreferencedImageStreamingState::create(GeoreferencedImageParams&& _georeferencedImageParams, uint32_t TileSize) +ImageCleanup::ImageCleanup() + : imagesMemorySuballocator(nullptr) + , addr(ImagesMemorySubAllocator::InvalidAddress) + , size(0ull) { - smart_refctd_ptr retVal(new GeoreferencedImageStreamingState{}); - retVal->georeferencedImageParams = std::move(_georeferencedImageParams); +} + +ImageCleanup::~ImageCleanup() +{ + // printf(std::format("Actual Eviction size={}, offset={} \n", size, addr).c_str()); + if (imagesMemorySuballocator && addr != ImagesMemorySubAllocator::InvalidAddress) + imagesMemorySuballocator->deallocate(addr, size); +} + +bool GeoreferencedImageStreamingState::init(const OrientedBoundingBox2D& worldspaceOBB, const uint32_t2 fullResImageExtents, const asset::E_FORMAT sourceImageFormat, const std::filesystem::path& storagePath) +{ + this->worldspaceOBB = std::move(worldspaceOBB); + this->fullResImageExtents = fullResImageExtents; + this->sourceImageFormat = sourceImageFormat; + this->storagePath = storagePath; // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point // 2. Transform this displacement vector into the coordinates in the basis {dirU, dirV} (worldspace vectors that span the sides of the image). // The composition of these matrices therefore transforms any point in worldspace into uv coordinates in imagespace @@ -16,30 +32,178 @@ smart_refctd_ptr GeoreferencedImageStreamingSt // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression // Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal) - const float64_t2 dirU = retVal->georeferencedImageParams.worldspaceOBB.dirU; - const float64_t2 dirV = float64_t2(dirU.y, -dirU.x) * float64_t(retVal->georeferencedImageParams.worldspaceOBB.aspectRatio); + const float64_t2 dirU = this->worldspaceOBB.dirU; + const float64_t2 dirV = float64_t2(dirU.y, -dirU.x) * float64_t(this->worldspaceOBB.aspectRatio); const float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); const float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); const float64_t2 firstRow = dirU / dirULengthSquared; const float64_t2 secondRow = dirV / dirVLengthSquared; - const float64_t2 displacement = -retVal->georeferencedImageParams.worldspaceOBB.topLeft; + const float64_t2 displacement = -(this->worldspaceOBB.topLeft); // This is the same as multiplying the change of basis matrix by the displacement vector const float64_t postRotatedShiftX = nbl::hlsl::dot(firstRow, displacement); const float64_t postRotatedShiftY = nbl::hlsl::dot(secondRow, displacement); // Put them all together - retVal->world2UV = float64_t2x3(firstRow.x, firstRow.y, postRotatedShiftX, secondRow.x, secondRow.y, postRotatedShiftY); + this->worldToUV = float64_t2x3(firstRow.x, firstRow.y, postRotatedShiftX, secondRow.x, secondRow.y, postRotatedShiftY); // Also set the maxMipLevel - to keep stuff simple, we don't consider having less than one tile per dimension // If you're zoomed out enough then at that point the whole image is just sampled as one tile along that dimension // In pathological cases, such as images that are way bigger on one side than the other, this could cause aliasing and slow down sampling if zoomed out too much. // If we were ever to observe such pathological cases, then maybe we should consider doing something else here. For example, making the loader able to handle different tile lengths per dimension // (so for example a 128x64 tile) but again for now it should be left as-is. - uint32_t2 maxMipLevels = nbl::hlsl::findMSB(nbl::hlsl::roundUpToPoT(retVal->georeferencedImageParams.imageExtents / TileSize)); - retVal->maxMipLevel = nbl::hlsl::min(maxMipLevels.x, maxMipLevels.y); + uint32_t2 maxMipLevels = nbl::hlsl::findMSB(nbl::hlsl::roundUpToPoT(this->fullResImageExtents / GeoreferencedImageTileSize)); + this->maxMipLevel = nbl::hlsl::min(maxMipLevels.x, maxMipLevels.y); + + this->fullImageTileLength = (this->fullResImageExtents - 1u) / GeoreferencedImageTileSize + 1u; + + return true; +} + +void GeoreferencedImageStreamingState::updateStreamingStateForViewport(const uint32_t2 viewportExtent, const float64_t3x3& ndcToWorldMat) +{ + currentViewportTileRange = computeViewportTileRange(viewportExtent, ndcToWorldMat); + // Slide or remap the current mapped region to ensure the viewport falls inside it + ensureMappedRegionCoversViewport(currentViewportTileRange); + + const uint32_t2 lastTileIndex = getLastTileIndex(currentViewportTileRange.baseMipLevel); + const uint32_t2 lastTileSampligOffsetMip0 = (lastTileIndex * GeoreferencedImageTileSize) << currentViewportTileRange.baseMipLevel; + lastTileSamplingExtent = fullResImageExtents - lastTileSampligOffsetMip0; + const uint32_t2 lastTileTargetExtentMip1 = lastTileSamplingExtent >> (currentViewportTileRange.baseMipLevel + 1); + lastTileTargetExtent = lastTileTargetExtentMip1 << 1u; +} + +core::vector GeoreferencedImageStreamingState::tilesToLoad() const +{ + core::vector retVal; + for (uint32_t tileY = currentViewportTileRange.topLeftTile.y; tileY <= currentViewportTileRange.bottomRightTile.y; tileY++) + for (uint32_t tileX = currentViewportTileRange.topLeftTile.x; tileX <= currentViewportTileRange.bottomRightTile.x; tileX++) + { + uint32_t2 imageTileIndex = uint32_t2(tileX, tileY); + // Toroidal shift to find which gpu tile the image tile corresponds to + uint32_t2 gpuImageTileIndex = ((imageTileIndex - currentMappedRegionTileRange.topLeftTile) + gpuImageTopLeft) % gpuImageSideLengthTiles; + // Don't bother scheduling an upload if the tile is already resident + if (!currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) + retVal.push_back({ imageTileIndex , gpuImageTileIndex }); + } + return retVal; +} + +GeoreferencedImageInfo GeoreferencedImageStreamingState::computeGeoreferencedImageAddressingAndPositioningInfo() +{ + GeoreferencedImageInfo ret = {}; + + // Figure out an obb that covers only the currently loaded tiles + OrientedBoundingBox2D viewportEncompassingOBB = worldspaceOBB; + // The image's worldspace dirU corresponds to `fullResImageExtents.x` texels of the image, therefore one image texel in the U direction has a worldspace span of `dirU / fullResImageExtents.x`. + // One mip 0 tiles therefore spans `dirU * GeoreferencedImageTileSize/ fullResImageExtents.x`. A mip `n` tile spans `2^n` this amount, since each texel at that mip level spans + // `2^n` mip texels. Therefore the dirU offset from the image wordlspace's topLeft of the tile of index `currentViewportTileRange.topLeftTile.x` at mip level `currentMappedRegion.baseMipLevel` can be calculated as + const uint32_t oneTileTexelSpan = GeoreferencedImageTileSize << currentMappedRegionTileRange.baseMipLevel; + viewportEncompassingOBB.topLeft += worldspaceOBB.dirU * float32_t(currentViewportTileRange.topLeftTile.x * oneTileTexelSpan) / float32_t(fullResImageExtents.x); + // Same reasoning for offset in v direction + const float32_t2 dirV = float32_t2(worldspaceOBB.dirU.y, -worldspaceOBB.dirU.x) * worldspaceOBB.aspectRatio; + viewportEncompassingOBB.topLeft += dirV * float32_t(currentViewportTileRange.topLeftTile.y * oneTileTexelSpan) / float32_t(fullResImageExtents.y); + + const uint32_t2 viewportTileLength = currentViewportTileRange.bottomRightTile - currentViewportTileRange.topLeftTile + uint32_t2(1, 1); + // If the last tile is visible, we use the fractional span for the last tile. Otherwise it's just a normal tile + const bool2 lastTileVisible = isLastTileVisible(currentViewportTileRange.bottomRightTile); + const uint32_t2 lastGPUImageTileTexels = { lastTileVisible.x ? lastTileTargetExtent.x : GeoreferencedImageTileSize, lastTileVisible.y ? lastTileTargetExtent.y : GeoreferencedImageTileSize }; + + // Instead of grouping per tile like in the offset case, we group per texel: the same reasoning leads to a single texel at current mip level having a span of `dirU * 2^(currentMappedRegionTileRange.baseMipLevel)/ fullResImageExtents.x` + // in the U direction. Therefore the span in worldspace of the OBB we construct is just this number multiplied by the number of gpu texels spanned to draw. + // The number of texels is just `GeoreferencedImageTileSize` times the number of full tiles (all but the last) + the number of texels of the last tile, which might not be a full tile if near the right boundary + viewportEncompassingOBB.dirU = worldspaceOBB.dirU * float32_t((GeoreferencedImageTileSize * (viewportTileLength.x - 1) + lastGPUImageTileTexels.x) << currentMappedRegionTileRange.baseMipLevel) / float32_t(fullResImageExtents.x); + // Simply number of gpu texels in the y direction divided by number of texels in the x direction. + viewportEncompassingOBB.aspectRatio = float32_t(GeoreferencedImageTileSize * (viewportTileLength.y - 1) + lastGPUImageTileTexels.y) / float32_t(GeoreferencedImageTileSize * (viewportTileLength.x - 1) + lastGPUImageTileTexels.x); + + // GPU tile corresponding to the real image tile containing the viewport top left - we can let it be negative since wrapping mode is repeat, negative tiles are correct modulo `gpuImageSideLengthTiles` + const uint32_t2 viewportTopLeftGPUTile = currentViewportTileRange.topLeftTile - currentMappedRegionTileRange.topLeftTile + gpuImageTopLeft; + // To get the uv corresponding to the above, simply divide the tile index by the number of tiles in the GPU image. + // However to consider a one-texel shift inward (to prevent color bleeding at the edges) we map both numerator and denominator to texel units (by multiplying with `GeoreferencedImageTileSize`) and add + // a single texel to the numerator + const float32_t2 minUV = float32_t2(GeoreferencedImageTileSize * viewportTopLeftGPUTile + 1u) / float32_t(GeoreferencedImageTileSize * gpuImageSideLengthTiles); + // If the image was perfectly partitioned into tiles, we could get the maxUV in a similar fashion to minUV: Just compute `bottomRightTile - currentMappedRegionTileRange.topLeftTile` to get a tile + // then divide by `gpuImageSideLengthTiles` to get a coord in `(0,1)` (correct modulo `gpuImageSideLengthTiles`) + // However the last tile might not have all `GeoreferencedImageTileSize` texels in it. Therefore maxUV computation can be separated into a UV contribution by all full tiles (all but the last) + a contribution from the last tile + // UV contribution from full tiles will therefore be `(bottomRightTile - currentMappedRegionTileRange.topLeftTile) / gpuImageSideLengthTiles` while last tile contribution will be + // `lastGPUImageTileTexels / (gpuImageSideLengthTiles * GeoreferencedImageTileSize)`. We group terms below to reduce number of float ops. + // Again we first map to texel units then subtract one to add a single texel uv shift. + const uint32_t2 viewportBottomRightGPUTile = currentViewportTileRange.bottomRightTile - currentMappedRegionTileRange.topLeftTile + gpuImageTopLeft; + const float32_t2 maxUV = float32_t2(GeoreferencedImageTileSize * viewportBottomRightGPUTile + lastGPUImageTileTexels - 1u) / float32_t(GeoreferencedImageTileSize * gpuImageSideLengthTiles); + + ret.minUV = minUV; + ret.maxUV = maxUV; + ret.topLeft = viewportEncompassingOBB.topLeft; + ret.dirU = viewportEncompassingOBB.dirU; + ret.aspectRatio = viewportEncompassingOBB.aspectRatio; + + return ret; +} + +GeoreferencedImageTileRange GeoreferencedImageStreamingState::computeViewportTileRange(const uint32_t2 viewportExtent, const float64_t3x3& ndcToWorldMat) +{ + // These are vulkan standard, might be different in n4ce! + constexpr static float64_t3 topLeftViewportNDC = float64_t3(-1.0, -1.0, 1.0); + constexpr static float64_t3 topRightViewportNDC = float64_t3(1.0, -1.0, 1.0); + constexpr static float64_t3 bottomLeftViewportNDC = float64_t3(-1.0, 1.0, 1.0); + constexpr static float64_t3 bottomRightViewportNDC = float64_t3(1.0, 1.0, 1.0); + + // First get world coordinates for each of the viewport's corners + const float64_t3 topLeftViewportWorld = nbl::hlsl::mul(ndcToWorldMat, topLeftViewportNDC); + const float64_t3 topRightViewportWorld = nbl::hlsl::mul(ndcToWorldMat, topRightViewportNDC); + const float64_t3 bottomLeftViewportWorld = nbl::hlsl::mul(ndcToWorldMat, bottomLeftViewportNDC); + const float64_t3 bottomRightViewportWorld = nbl::hlsl::mul(ndcToWorldMat, bottomRightViewportNDC); + + // Then we get mip 0 tiles coordinates for each of them, into the image + const float64_t2 topLeftTileLattice = transformWorldCoordsToTileCoords(topLeftViewportWorld); + const float64_t2 topRightTileLattice = transformWorldCoordsToTileCoords(topRightViewportWorld); + const float64_t2 bottomLeftTileLattice = transformWorldCoordsToTileCoords(bottomLeftViewportWorld); + const float64_t2 bottomRightTileLattice = transformWorldCoordsToTileCoords(bottomRightViewportWorld); + + // Get the min and max of each lattice coordinate to get a bounding rectangle + const float64_t2 minTop = nbl::hlsl::min(topLeftTileLattice, topRightTileLattice); + const float64_t2 minBottom = nbl::hlsl::min(bottomLeftTileLattice, bottomRightTileLattice); + const float64_t2 minAll = nbl::hlsl::min(minTop, minBottom); + + const float64_t2 maxTop = nbl::hlsl::max(topLeftTileLattice, topRightTileLattice); + const float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice); + const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom); + + // Floor them to get an integer coordinate (index) for the tiles they fall in + int32_t2 minAllFloored = nbl::hlsl::floor(minAll); + int32_t2 maxAllFloored = nbl::hlsl::floor(maxAll); + + // We're undoing a previous division. Could be avoided but won't restructure the code atp. + // Here we compute how many image pixels each side of the viewport spans + const float64_t2 viewportSideUImageTexelsVector = float64_t(GeoreferencedImageTileSize) * (topRightTileLattice - topLeftTileLattice); + const float64_t2 viewportSideVImageTexelsVector = float64_t(GeoreferencedImageTileSize) * (bottomLeftTileLattice - topLeftTileLattice); - retVal->fullImageTileLength = (retVal->georeferencedImageParams.imageExtents - 1u) / TileSize + 1u; + // WARNING: This assumes pixels in the image are the same size along each axis. If the image is nonuniformly scaled or sheared, I *think* it should not matter + // (since the pixel span takes that transformation into account), BUT we have to check if we plan on allowing those + // Compute the side vectors of the viewport in image pixel(texel) space. + // These vectors represent how many image pixels each side of the viewport spans. + // They correspond to the local axes of the mapped OBB (not the mapped region one, the viewport one) in texel coordinates. + const float64_t viewportSideUImageTexels = nbl::hlsl::length(viewportSideUImageTexelsVector); + const float64_t viewportSideVImageTexels = nbl::hlsl::length(viewportSideVImageTexelsVector); + + // Mip is decided based on max of these + float64_t pixelRatio = nbl::hlsl::max(viewportSideUImageTexels / viewportExtent.x, viewportSideVImageTexels / viewportExtent.y); + pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; + + GeoreferencedImageTileRange retVal = {}; + // Clamp mip level so we don't consider tiles that are too small along one dimension + // If on a pathological case this gets too expensive because the GPU starts sampling a lot, we can consider changing this, but I doubt that will happen + retVal.baseMipLevel = nbl::hlsl::min(nbl::hlsl::findMSB(uint32_t(nbl::hlsl::floor(pixelRatio))), int32_t(maxMipLevel)); + + // Current tiles are measured in mip 0. We want the result to measure mip `retVal.baseMipLevel` tiles. Each next mip level divides by 2. + minAllFloored >>= retVal.baseMipLevel; + maxAllFloored >>= retVal.baseMipLevel; + + + // Clamp them to reasonable tile indices + int32_t2 lastTileIndex = getLastTileIndex(retVal.baseMipLevel); + retVal.topLeftTile = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), lastTileIndex); + retVal.bottomRightTile = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), lastTileIndex); return retVal; } @@ -49,18 +213,18 @@ void GeoreferencedImageStreamingState::ensureMappedRegionCoversViewport(const Ge // A base mip level of x in the current mapped region means we can handle the viewport having mip level y, with x <= y < x + 1.0 // without needing to remap the region. When the user starts zooming in or out and the mip level of the viewport falls outside this range, we have to remap // the mapped region. - const bool mipBoundaryCrossed = viewportTileRange.baseMipLevel != currentMappedRegion.baseMipLevel; + const bool mipBoundaryCrossed = viewportTileRange.baseMipLevel != currentMappedRegionTileRange.baseMipLevel; // If we moved a huge amount in any direction, no tiles will remain resident, so we simply reset state // This only need be evaluated if the mip boundary was not already crossed const bool relativeShiftTooBig = !mipBoundaryCrossed && nbl::hlsl::any ( - nbl::hlsl::abs(int32_t2(viewportTileRange.topLeftTile) - int32_t2(currentMappedRegion.topLeftTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) + nbl::hlsl::abs(int32_t2(viewportTileRange.topLeftTile) - int32_t2(currentMappedRegionTileRange.topLeftTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) ) || nbl::hlsl::any ( - nbl::hlsl::abs(int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegion.bottomRightTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) + nbl::hlsl::abs(int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegionTileRange.bottomRightTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) ); // If there is no overlap between previous mapped region and the next, just reset everything @@ -74,50 +238,50 @@ void GeoreferencedImageStreamingState::ensureMappedRegionCoversViewport(const Ge void GeoreferencedImageStreamingState::remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) { // Zoomed out - if (viewportTileRange.baseMipLevel > currentMappedRegion.baseMipLevel) + if (viewportTileRange.baseMipLevel > currentMappedRegionTileRange.baseMipLevel) { // TODO: Here we would move some mip 1 tiles to mip 0 image to save the work of reuploading them, reflect that in the tracked tiles } // Zoomed in - else if (viewportTileRange.baseMipLevel < currentMappedRegion.baseMipLevel) + else if (viewportTileRange.baseMipLevel < currentMappedRegionTileRange.baseMipLevel) { // TODO: Here we would move some mip 0 tiles to mip 1 image to save the work of reuploading them, reflect that in the tracked tiles } - currentMappedRegion = viewportTileRange; + currentMappedRegionTileRange = viewportTileRange; // Roughly center the viewport in the mapped region - currentMappedRegion.topLeftTile = nbl::hlsl::max(uint32_t2(0, 0), currentMappedRegion.topLeftTile - (gpuImageSideLengthTiles / 2)); - // We can expand the currentMappedRegion to make it as big as possible, at no extra cost since we only upload tiles on demand + currentMappedRegionTileRange.topLeftTile = nbl::hlsl::max(uint32_t2(0, 0), currentMappedRegionTileRange.topLeftTile - (gpuImageSideLengthTiles / 2)); + // We can expand the currentMappedRegionTileRange to make it as big as possible, at no extra cost since we only upload tiles on demand // Since we use toroidal updating it's kinda the same which way we expand the region. We first try to make the extent be `gpuImageSideLengthTiles` - currentMappedRegion.bottomRightTile = currentMappedRegion.topLeftTile + uint32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) - uint32_t2(1, 1); + currentMappedRegionTileRange.bottomRightTile = currentMappedRegionTileRange.topLeftTile + uint32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) - uint32_t2(1, 1); // This extension can cause the mapped region to fall out of bounds on border cases, therefore we clamp it and extend it in the other direction // by the amount of tiles we removed during clamping - const uint32_t2 excessTiles = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegion.bottomRightTile) - int32_t2(getLastTileIndex(currentMappedRegion.baseMipLevel)))); - currentMappedRegion.bottomRightTile -= excessTiles; + const uint32_t2 excessTiles = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegionTileRange.bottomRightTile) - int32_t2(getLastTileIndex(currentMappedRegionTileRange.baseMipLevel)))); + currentMappedRegionTileRange.bottomRightTile -= excessTiles; // Shifting of the topLeftTile could fall out of bounds in pathological cases or at very high mip levels (zooming out too much), so we shift if possible, otherwise set it to 0 - currentMappedRegion.topLeftTile = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegion.topLeftTile) - int32_t2(excessTiles))); + currentMappedRegionTileRange.topLeftTile = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegionTileRange.topLeftTile) - int32_t2(excessTiles))); - // Mark all gpu tiles as dirty - currentMappedRegionOccupancy.resize(gpuImageSideLengthTiles); - for (auto i = 0u; i < gpuImageSideLengthTiles; i++) - { - currentMappedRegionOccupancy[i].clear(); - currentMappedRegionOccupancy[i].resize(gpuImageSideLengthTiles, false); - } + ResetTileOccupancyState(); // Reset state for gpu image so that it starts loading tiles at top left. Not really necessary. gpuImageTopLeft = uint32_t2(0, 0); } +void GeoreferencedImageStreamingState::ResetTileOccupancyState() +{ + // Mark all gpu tiles as dirty + currentMappedRegionOccupancy.assign(gpuImageSideLengthTiles, std::vector(gpuImageSideLengthTiles, false)); +} + void GeoreferencedImageStreamingState::slideCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) { // `topLeftShift` represents how many tiles up and to the left we have to move the mapped region to fit the viewport. // First we compute a vector from the current mapped region's topleft to the viewport's topleft. If this vector is positive along a dimension it means // the viewport's topleft is to the right or below the current mapped region's topleft, so we don't have to shift the mapped region to the left/up in that case - const int32_t2 topLeftShift = nbl::hlsl::min(int32_t2(0, 0), int32_t2(viewportTileRange.topLeftTile) - int32_t2(currentMappedRegion.topLeftTile)); + const int32_t2 topLeftShift = nbl::hlsl::min(int32_t2(0, 0), int32_t2(viewportTileRange.topLeftTile) - int32_t2(currentMappedRegionTileRange.topLeftTile)); // `bottomRightShift` represents the same as above but in the other direction. - const int32_t2 bottomRightShift = nbl::hlsl::max(int32_t2(0, 0), int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegion.bottomRightTile)); + const int32_t2 bottomRightShift = nbl::hlsl::max(int32_t2(0, 0), int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegionTileRange.bottomRightTile)); // The following is not necessarily equal to `gpuImageSideLengthTiles` since there can be pathological cases, as explained in the remapping method - const uint32_t2 mappedRegionDimensions = currentMappedRegion.bottomRightTile - currentMappedRegion.topLeftTile + 1u; + const uint32_t2 mappedRegionDimensions = currentMappedRegionTileRange.bottomRightTile - currentMappedRegionTileRange.topLeftTile + 1u; const uint32_t2 gpuImageBottomRight = (gpuImageTopLeft + mappedRegionDimensions - 1u) % gpuImageSideLengthTiles; // Mark dropped tiles as dirty/non-resident @@ -174,25 +338,9 @@ void GeoreferencedImageStreamingState::slideCurrentRegion(const GeoreferencedIma // Shift the mapped region accordingly // A nice consequence of the mapped region being always maximally - sized is that // along any dimension, only a shift in one direction is necessary, so we can simply add up the shifts - currentMappedRegion.topLeftTile = uint32_t2(int32_t2(currentMappedRegion.topLeftTile) + topLeftShift + bottomRightShift); - currentMappedRegion.bottomRightTile = uint32_t2(int32_t2(currentMappedRegion.bottomRightTile) + topLeftShift + bottomRightShift); + currentMappedRegionTileRange.topLeftTile = uint32_t2(int32_t2(currentMappedRegionTileRange.topLeftTile) + topLeftShift + bottomRightShift); + currentMappedRegionTileRange.bottomRightTile = uint32_t2(int32_t2(currentMappedRegionTileRange.bottomRightTile) + topLeftShift + bottomRightShift); // Toroidal shift for the gpu image top left gpuImageTopLeft = (gpuImageTopLeft + uint32_t2(topLeftShift + bottomRightShift + int32_t(gpuImageSideLengthTiles))) % gpuImageSideLengthTiles; -} - -core::vector GeoreferencedImageStreamingState::tilesToLoad(const GeoreferencedImageTileRange& viewportTileRange) const -{ - core::vector retVal; - for (uint32_t tileY = viewportTileRange.topLeftTile.y; tileY <= viewportTileRange.bottomRightTile.y; tileY++) - for (uint32_t tileX = viewportTileRange.topLeftTile.x; tileX <= viewportTileRange.bottomRightTile.x; tileX++) - { - uint32_t2 imageTileIndex = uint32_t2(tileX, tileY); - // Toroidal shift to find which gpu tile the image tile corresponds to - uint32_t2 gpuImageTileIndex = ((imageTileIndex - currentMappedRegion.topLeftTile) + gpuImageTopLeft) % gpuImageSideLengthTiles; - // Don't bother scheduling an upload if the tile is already resident - if (!currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) - retVal.push_back({ imageTileIndex , gpuImageTileIndex }); - } - return retVal; } \ No newline at end of file diff --git a/62_CAD/Images.h b/62_CAD/Images.h index df4186020..e727ed46a 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -1,6 +1,9 @@ +/* DrawResourcesFiller: This class provides important functionality to manage resources needed for a draw. +/******************************************************************************/ #pragma once #include "shaders/globals.hlsl" +#include using namespace nbl; using namespace nbl::video; @@ -9,6 +12,13 @@ using namespace nbl::asset; using image_id = uint64_t; // Could later be templated or replaced with a stronger type or hash key. +// These are mip 0 pixels per tile, also size of each physical tile into the gpu resident image +constexpr static uint32_t GeoreferencedImageTileSize = 128u; +// Mip 1 tiles are naturally half the size +constexpr static uint32_t GeoreferencedImageTileSizeMip1 = GeoreferencedImageTileSize / 2; +// How many tiles of extra padding we give to the gpu image holding the tiles for a georeferenced image +constexpr static uint32_t GeoreferencedImagePaddingTiles = 2; + enum class ImageState : uint8_t { INVALID = 0, @@ -21,28 +31,19 @@ enum class ImageType : uint8_t { INVALID = 0, STATIC, // Regular non-georeferenced image, fully loaded once - GEOREFERENCED_STREAMED, // Streamed image, resolution depends on camera/view - GEOREFERENCED_FULL_RESOLUTION // For smaller georeferenced images, entire image is eventually loaded and not streamed or view-dependant -}; - -struct GeoreferencedImageParams -{ - OrientedBoundingBox2D worldspaceOBB = {}; - uint32_t2 imageExtents = {}; - asset::E_FORMAT format = {}; - std::filesystem::path storagePath = {}; + GEOREFERENCED_STREAMED, // Streamed image, resolution depends on camera/view // TODO[DEVSH]: Probably best to rename this to STREAMED image }; /** - * @class ImagesMemorySubAllocator - * @brief A memory sub-allocator designed for managing sub-allocations within a pre-allocated GPU memory arena for images. - * - * This class wraps around `nbl::core::GeneralpurposeAddressAllocator` to provide offset-based memory allocation - * for image resources within a contiguous block of GPU memory. - * - * @note This class only manages address offsets. The actual memory must be bound separately. - */ -class ImagesMemorySubAllocator : public core::IReferenceCounted +* @class ImagesMemorySubAllocator +* @brief A memory sub-allocator designed for managing sub-allocations within a pre-allocated GPU memory arena for images. +* +* This class wraps around `nbl::core::GeneralpurposeAddressAllocator` to provide offset-based memory allocation +* for image resources within a contiguous block of GPU memory. +* +* @note This class only manages address offsets. The actual memory must be bound separately. +*/ +class ImagesMemorySubAllocator : public core::IReferenceCounted { public: using AddressAllocator = nbl::core::GeneralpurposeAddressAllocator; @@ -72,12 +73,17 @@ class ImagesMemorySubAllocator : public core::IReferenceCounted m_addressAllocator->free_addr(addr, size); } + uint64_t getFreeSize() const + { + return m_addressAllocator->get_free_size(); + } + ~ImagesMemorySubAllocator() { if (m_reservedAlloc) m_reservedAllocator->deallocate(reinterpret_cast(m_reservedAlloc), m_reservedAllocSize); } - + private: std::unique_ptr m_addressAllocator = nullptr; @@ -92,18 +98,9 @@ class ImagesMemorySubAllocator : public core::IReferenceCounted // Destructor will then deallocate from GeneralPurposeAllocator, making the previously allocated range of the image available/free again. struct ImageCleanup : public core::IReferenceCounted { - ImageCleanup() - : imagesMemorySuballocator(nullptr) - , addr(ImagesMemorySubAllocator::InvalidAddress) - , size(0ull) - {} + ImageCleanup(); - ~ImageCleanup() override - { - // printf(std::format("Actual Eviction size={}, offset={} \n", size, addr).c_str()); - if (imagesMemorySuballocator && addr != ImagesMemorySubAllocator::InvalidAddress) - imagesMemorySuballocator->deallocate(addr, size); - } + ~ImageCleanup() override; smart_refctd_ptr imagesMemorySuballocator; uint64_t addr; @@ -122,26 +119,109 @@ struct GeoreferencedImageTileRange // @brief Used to load tiles into VRAM, keep track of loaded tiles, determine how they get sampled etc. struct GeoreferencedImageStreamingState : public IReferenceCounted { - friend class DrawResourcesFiller; +public: + + GeoreferencedImageStreamingState() + { + } + + //! Creates a new streaming state for a georeferenced image + /* + Initializes CPU-side state for image streaming. + Sets up world-to-UV transform, computes mip hierarchy parameters, + and stores metadata about the image. + + @param worldspaceOBB Oriented bounding box of the image in world space + @param fullResImageExtents Full resolution image size in pixels (width, height) + @param format Pixel format of the image + @param storagePath Filesystem path for image tiles + */ + bool init(const OrientedBoundingBox2D& worldSpaceOBB, const uint32_t2 fullResImageExtents, const asset::E_FORMAT format, const std::filesystem::path& storagePath); + + /** + * @brief Update the mapped region to cover the current viewport. + * + * Computes the required tile range from the viewport and updates + * `currentMappedRegion` by remapping or sliding as needed. + * + * @param currentViewportExtents Viewport size in pixels. + * @param ndcToWorldMat NDC to world space mattix. + * + * @see tilesToLoad + */ + void updateStreamingStateForViewport(const uint32_t2 viewportExtent, const float64_t3x3& ndcToWorldMat); + + // @brief Info to match a gpu tile to the tile in the real image it should hold image data for + struct ImageTileToGPUTileCorrespondence + { + uint32_t2 imageTileIndex; + uint32_t2 gpuImageTileIndex; + }; -protected: /* - * @brief Create a streaming state for a georeferenced image + * @brief Get the tiles required for rendering the current viewport. + * Uses the region set by `updateStreamingStateForViewport()` to return + * which image tiles need loading and their target GPU tile indices. + */ + core::vector tilesToLoad() const; + + // @brief Returns the index of the last tile when covering the image with `mipLevel` tiles + inline uint32_t2 getLastTileIndex(uint32_t mipLevel) const + { + return (fullImageTileLength - 1u) >> mipLevel; + } + + // @brief Returns whether the last tile in the image (along each dimension) is visible from the current viewport + inline bool2 isLastTileVisible(const uint32_t2 viewportBottomRightTile) const + { + const uint32_t2 lastTileIndex = getLastTileIndex(currentMappedRegionTileRange.baseMipLevel); + return bool2(lastTileIndex.x == viewportBottomRightTile.x, lastTileIndex.y == viewportBottomRightTile.y); + } + + /** + * @brief Compute viewport positioning and UV addressing for a georeferenced image. + * + * Returns a `GeoreferencedImageInfo` filled with: + * - `topLeft`, `dirU`, `aspectRatio` (world-space OBB) + * - `minUV`, `maxUV` (UV addressing for the viewport) + * + * Leaves `textureID` unmodified. + * + * @note Make sure to call `updateStreamingStateForViewport()` first so that + * the OBB and UVs reflect the latest viewport. * - * @param _georeferencedImageParams Info relating to the georeferenced image for which to create a streaming state. - * @param TileSize Size of the tiles used to break up the image. Also size of the tiles in the GPU image backing this georeferenced image. + * @param imageStreamingState The streaming state of the georeferenced image. + * @return GeoreferencedImageInfo containing viewport positioning and UV info. */ - static smart_refctd_ptr create(GeoreferencedImageParams&& _georeferencedImageParams, uint32_t TileSize); + GeoreferencedImageInfo computeGeoreferencedImageAddressingAndPositioningInfo(); +private: // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial // @brief Transform worldspace coordinates into UV coordinates into the image - float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoords) const { return nbl::hlsl::mul(world2UV, worldCoords); } + float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoords) const { return nbl::hlsl::mul(worldToUV, worldCoords); } // @brief Transform worldspace coordinates into texel coordinates into the image - float64_t2 transformWorldCoordsToTexelCoords(const float64_t3 worldCoords) const { return float64_t2(georeferencedImageParams.imageExtents) * transformWorldCoordsToUV(worldCoords); } - // @brief Transform worldspace coordinates into tile coordinates into the image, where the image is broken up into tiles of size `TileSize` - float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoords, const uint32_t TileSize) const { return (1.0 / TileSize) * transformWorldCoordsToTexelCoords(worldCoords); } + float64_t2 transformWorldCoordsToTexelCoords(const float64_t3 worldCoords) const { return float64_t2(fullResImageExtents) * transformWorldCoordsToUV(worldCoords); } + // @brief Transform worldspace coordinates into tile coordinates into the image, where the image is broken up into tiles of size `GeoreferencedImageTileSize` + float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoords) const { return (1.0 / GeoreferencedImageTileSize) * transformWorldCoordsToTexelCoords(worldCoords); } + + /** + * @brief Compute the tile range and mip level needed to cover the viewport. + * + * Calculates which portion of the source image is visible through the given + * viewport and chooses the optimal mip level based on zoom (viewport size + * relative to the image). The returned range is always a subset of + * `currentMappedRegion` and covers only the visible tiles. + * + * @param currentViewportExtents Size of the viewport in pixels. + * @param ndcToWorldMat Transform from NDC to world space, used to project + * the viewport onto the image. + * + * @return A tile range (`GeoreferencedImageTileRange`) representing the + * visible region at the chosen mip level. + */ + GeoreferencedImageTileRange computeViewportTileRange(const uint32_t2 viewportExtent, const float64_t3x3& ndcToWorldMat); /* * @brief The GPU image backs a mapped region which is a rectangular sub-region of the original image. Note that a region being mapped does NOT imply it's currently resident in GPU memory. @@ -159,6 +239,14 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted */ void remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange); + /** + * @brief Resets the streaming state's GPU tile occupancy map. + * - Clears all previously marked resident tiles. + * - After this call, every entry in `currentMappedRegionOccupancy` is `false`, + * meaning the GPU image is considered completely dirty (no tiles mapped). + */ + void ResetTileOccupancyState(); + /* * @brief Slides the mapped region along the image, marking the tiles dropped as dirty but preserving the residency for tiles that are inside both the previous and new mapped regions. * Note that the checks for whether this is valid to do happen outside of this function. @@ -167,37 +255,21 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted */ void slideCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange); - // @brief Info to match a gpu tile to the tile in the real image it should hold image data for - struct ImageTileToGPUTileCorrespondence - { - uint32_t2 imageTileIndex; - uint32_t2 gpuImageTileIndex; - }; - - /* - * @brief Given a tile range covering the viewport, returns which tiles (at the mip level of the current mapped region) need to be made resident to draw it, and to which tile of the gpu image each tile should be - * uploaded to - * - * @param viewportTileRange Range of tiles + mip level indicating a sub-rectangle of the image covering the viewport - */ - core::vector tilesToLoad(const GeoreferencedImageTileRange& viewportTileRange) const; - - // @brief Returns the index of the last tile when covering the image with `mipLevel` tiles - uint32_t2 getLastTileIndex(uint32_t mipLevel) const - { - return (fullImageTileLength - 1u) >> mipLevel; - } - - // @brief Returns whether the last tile in the image (along each dimension) is visible from the current viewport - bool2 isLastTileVisible(const uint32_t2 viewportBottomRightTile) const - { - const uint32_t2 lastTileIndex = getLastTileIndex(currentMappedRegion.baseMipLevel); - return bool2(lastTileIndex.x == viewportBottomRightTile.x, lastTileIndex.y == viewportBottomRightTile.y); - } +protected: + friend class DrawResourcesFiller; - GeoreferencedImageParams georeferencedImageParams = {}; + // Oriented bounding box of the original image in world space (position + orientation) + OrientedBoundingBox2D worldspaceOBB = {}; + // Full resolution original image size in pixels (width, height) + uint32_t2 fullResImageExtents = {}; + // Pixel format of the image as provided by storage/loader (may differ from GPU format) + asset::E_FORMAT sourceImageFormat = {}; + // Filesystem path where image tiles are stored + std::filesystem::path storagePath = {}; + // GPU Image Params for the image to be created with + IGPUImage::SCreationParams gpuImageCreationParams = {}; + // 2D bool set for tile validity of the currentMappedRegionTileRange std::vector> currentMappedRegionOccupancy = {}; - // Sidelength of the gpu image, in mip 0 tiles that are `TileSize` (creation parameter) texels wide uint32_t gpuImageSideLengthTiles = {}; // We establish a max mipLevel for the image, which is the mip level at which any of width, height fit in a single tile @@ -207,32 +279,34 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // Indicates on which tile of the gpu image the current mapped region's `topLeft` resides uint32_t2 gpuImageTopLeft = {}; // Converts a point (z = 1) in worldspace to UV coordinates in image space (origin shifted to topleft of the image) - float64_t2x3 world2UV = {}; - // If the image dimensions are not exactly divisible by `TileSize`, then the last tile along a dimension only holds a proportion of `lastTileFraction` pixels along that dimension - float64_t lastTileFraction = {}; - // Reflects what fraction of a FULL tile the LAST tile in the image at the current mip level actually spans. - // It only gets set when necessary, and should always be updated correctly before being used, since it's related to the current `baseMipLevel` of the `currentMappedRegion` - uint32_t2 lastImageTileTexels = {}; - // Set mip level to extreme value so it gets recreated on first iteration - GeoreferencedImageTileRange currentMappedRegion = { .baseMipLevel = std::numeric_limits::max() }; + float64_t2x3 worldToUV = {}; + // The GPU-mapped region covering a subrectangle of the source image + GeoreferencedImageTileRange currentMappedRegionTileRange = { .baseMipLevel = std::numeric_limits::max() }; + // Tile range covering only the tiles currently visible in the viewport + GeoreferencedImageTileRange currentViewportTileRange = { .baseMipLevel = std::numeric_limits::max() }; + // Extents used for sampling the last tile (handles partial tiles / NPOT images); gets updated with `updateStreamingStateForViewport` + uint32_t2 lastTileSamplingExtent; + // Extents used when writing/updating the last tile in GPU memory (handles partial tiles / NPOT images); gets updated with `updateStreamingStateForViewport` + uint32_t2 lastTileTargetExtent; }; struct CachedImageRecord { static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; - + uint32_t arrayIndex = InvalidTextureIndex; // index in our array of textures binding ImageType type = ImageType::INVALID; ImageState state = ImageState::INVALID; + nbl::asset::IImage::LAYOUT currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; uint64_t lastUsedFrameIndex = 0ull; // last used semaphore value on this image uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; uint64_t allocationSize = 0ull; core::smart_refctd_ptr gpuImageView = nullptr; core::smart_refctd_ptr staticCPUImage = nullptr; // cached cpu image for uploading to gpuImageView when needed. core::smart_refctd_ptr georeferencedImageState = nullptr; // Used to track tile residency for georeferenced images - + // In LRU Cache `insert` function, in case of cache miss, we need to construct the refereence with semaphore value - CachedImageRecord(uint64_t currentFrameIndex) + CachedImageRecord(uint64_t currentFrameIndex) : arrayIndex(InvalidTextureIndex) , type(ImageType::INVALID) , state(ImageState::INVALID) @@ -241,14 +315,16 @@ struct CachedImageRecord , allocationSize(0ull) , gpuImageView(nullptr) , staticCPUImage(nullptr) - {} - - CachedImageRecord() + { + } + + CachedImageRecord() : CachedImageRecord(0ull) - {} + { + } // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value without changing `index` - inline CachedImageRecord& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } + inline CachedImageRecord& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } }; // A resource-aware image cache with an LRU eviction policy. @@ -261,10 +337,11 @@ class ImagesCache : public core::ResizableLRUCache { public: using base_t = core::ResizableLRUCache; - - ImagesCache(size_t capacity) + + ImagesCache(size_t capacity) : base_t(capacity) - {} + { + } // Attempts to insert a new image into the cache. // If the cache is full, invokes the provided `evictCallback` to evict an image. @@ -274,13 +351,13 @@ class ImagesCache : public core::ResizableLRUCache { return base_t::insert(imageID, lastUsedSema, evictCallback); } - + // Retrieves the image associated with `imageID`, updating its LRU position. inline CachedImageRecord* get(image_id imageID) { return base_t::get(imageID); } - + // Retrieves the ImageReference without updating LRU order. inline CachedImageRecord* peek(image_id imageID) { @@ -288,10 +365,10 @@ class ImagesCache : public core::ResizableLRUCache } inline size_t size() const { return base_t::size(); } - + // Selects an eviction candidate based on LRU policy. // In the future, this could factor in memory pressure or semaphore sync requirements. - inline image_id select_eviction_candidate() + inline image_id select_eviction_candidate() { const image_id* lru = base_t::get_least_recently_used(); if (lru) @@ -303,7 +380,7 @@ class ImagesCache : public core::ResizableLRUCache return ~0ull; } } - + // Removes a specific image from the cache (manual eviction). inline void erase(image_id imageID) { @@ -314,7 +391,7 @@ class ImagesCache : public core::ResizableLRUCache struct StreamedImageCopy { asset::E_FORMAT srcFormat; - smart_refctd_ptr srcBuffer; // Make it 'std::future' later? + std::future> srcBufferFuture; asset::IImage::SBufferCopy region; }; @@ -326,3 +403,60 @@ struct StaticImageInfo bool forceUpdate = false; // If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. asset::E_FORMAT imageViewFormatOverride = asset::E_FORMAT::EF_COUNT; // if asset::E_FORMAT::EF_COUNT then image view will have the same format as `cpuImage` }; + +/// @brief Abstract class with two overridable methods to load a region of an image, either by requesting a region at a target extent (like the loaders in n4ce do) or to request a specific region from a mip level +// (like precomputed mips solution would use). +struct IImageRegionLoader : IReferenceCounted +{ + /** + * @brief Load a region from an image - used to load from images with precomputed mips + * + * @param imagePath Path to file holding the image data + * @param offset Offset into the image (at requested mipLevel!) at which the region begins + * @param extent Extent of the region to load (at requested mipLevel!) + * @param mipLevel From which mip level image to retrieve the data from + * @param downsample True if this request is supposed to go into GPU mip level 1, false otherwise + * + * @return ICPUBuffer with the requested image data + */ + core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) + { + assert(hasPrecomputedMips(imagePath)); + return load_impl(imagePath, offset, extent, mipLevel, downsample); + } + + /** + * @brief Load a region from an image - used to load from images using the n4ce loaders. Loads a region given by `offset, extent` as an image of size `targetExtent` + * where `targetExtent <= extent` so the loader is in charge of downsampling. + * + * @param imagePath Path to file holding the image data + * @param offset Offset into the image at which the region begins + * @param extent Extent of the region to load + * @param targetExtent Extent of the resulting image. Should NEVER be bigger than `extent` + * + * @return ICPUBuffer with the requested image data + */ + core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t2 targetExtent) + { + assert(!hasPrecomputedMips(imagePath)); + return load_impl(imagePath, offset, extent, targetExtent); + } + + // @brief Get the extents (in texels) of an image. + virtual uint32_t2 getExtents(std::filesystem::path imagePath) = 0; + + /** + * @brief Get the texel format for an image. + */ + virtual asset::E_FORMAT getFormat(std::filesystem::path imagePath) = 0; + + // @brief Returns whether the image should be loaded with the precomputed mip method or the n4ce loader method. + virtual bool hasPrecomputedMips(std::filesystem::path imagePath) const = 0; +private: + + // @brief Override to support loading with precomputed mips + virtual core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) { return nullptr; } + + // @brief Override to support loading with n4ce-style loaders + virtual core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t2 targetExtent) { return nullptr; } +}; \ No newline at end of file diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index fc0ad404d..0e3060e9d 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -365,7 +365,7 @@ bool performImageFormatPromotionCopy(const core::smart_refctd_ptrgetWidth(), m_window->getHeight())); - size_t maxImagesMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB size_t maxBufferMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB drawResourcesFiller.allocateDrawResourcesWithinAvailableVRAM(m_device.get(), maxImagesMemSize, maxBufferMemSize); @@ -1716,7 +1714,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio 0.0f, 0.0f, 1.0f); globalData.miterLimit = 10.0f; globalData.currentlyActiveMainObjectIndex = drawResourcesFiller.getActiveMainObjectIndex(); - SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer.get() }; + SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer}; bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); assert(updateSuccess); @@ -1806,7 +1804,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio { if (drawCall.isDTMRendering) { - cb->bindIndexBuffer({ .offset = resourcesCollection.geometryInfo.bufferOffset + drawCall.dtm.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer().get()}, asset::EIT_32BIT); + cb->bindIndexBuffer({ .offset = resourcesCollection.geometryInfo.bufferOffset + drawCall.dtm.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer()}, asset::EIT_32BIT); PushConstants pc = { .triangleMeshVerticesBaseAddress = drawCall.dtm.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resourcesCollection.geometryInfo.bufferOffset, @@ -1828,7 +1826,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const uint64_t indexCount = drawCall.drawObj.drawObjectCount * 6u; // assert(currentIndexCount == resourcesCollection.indexBuffer.getCount()); - cb->bindIndexBuffer({ .offset = resourcesCollection.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer.get()}, asset::EIT_32BIT); + cb->bindIndexBuffer({ .offset = resourcesCollection.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer}, asset::EIT_32BIT); cb->drawIndexed(indexCount, 1u, 0u, 0u, 0u); } } @@ -1854,7 +1852,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const uint64_t indexCount = drawCall.drawObj.drawObjectCount * 6u; // assert(currentIndexCount == resourcesCollection.indexBuffer.getCount()); - cb->bindIndexBuffer({ .offset = resourcesCollection.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer.get()}, asset::EIT_32BIT); + cb->bindIndexBuffer({ .offset = resourcesCollection.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer}, asset::EIT_32BIT); cb->drawIndexed(indexCount, 1u, 0u, 0u, 0u); } @@ -3883,11 +3881,10 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio constexpr float64_t3 bottomLeftViewportH = float64_t3(-1.0, 1.0, 1.0); constexpr float64_t3 bottomRightViewportH = float64_t3(1.0, 1.0, 1.0); - GeoreferencedImageParams georeferencedImageParams; - georeferencedImageParams.storagePath = georeferencedImagePath; - georeferencedImageParams.format = drawResourcesFiller.queryGeoreferencedImageFormat(georeferencedImagePath); - georeferencedImageParams.imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath); - + //GeoreferencedImageParams georeferencedImageParams; + //georeferencedImageParams.storagePath = georeferencedImagePath; + //georeferencedImageParams.format = drawResourcesFiller.queryGeoreferencedImageFormat(georeferencedImagePath); + //georeferencedImageParams.imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath); image_id georefImageID = 6996; // Position at topLeft viewport @@ -3896,28 +3893,30 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio if constexpr (testCameraRotation) projectionToNDC = rotateBasedOnTime(projectionToNDC); auto inverseViewProj = nbl::hlsl::inverse(projectionToNDC); - - const static auto startingTopLeft = nbl::hlsl::mul(inverseViewProj, topLeftViewportH); - georeferencedImageParams.worldspaceOBB.topLeft = startingTopLeft; // Get 1 viewport pixel to match `startingImagePixelsPerViewportPixel` pixels of the image by choosing appropriate dirU const static float64_t startingImagePixelsPerViewportPixels = 1.0; const static auto startingViewportWidthVector = nbl::hlsl::mul(inverseViewProj, topRightViewportH - topLeftViewportH); const static auto dirU = startingViewportWidthVector * float64_t(drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath).x) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); - - // DEBUG - //georefImageOBB.topLeft += float32_t2(startingViewportWidthVector - dirU); - georeferencedImageParams.worldspaceOBB.dirU = dirU; + // Unnecessary but should go into a callback if window can change dimensions during execution + drawResourcesFiller.updateViewportInfo(uint32_t2(m_window->getWidth(), m_window->getHeight()), inverseViewProj); + + const static auto startingTopLeft = nbl::hlsl::mul(inverseViewProj, topLeftViewportH); const uint32_t2 imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath); - georeferencedImageParams.worldspaceOBB.aspectRatio = float32_t(imageExtents.y) / imageExtents.x; + OrientedBoundingBox2D georefImageBB = { .topLeft = startingTopLeft, .dirU = dirU, .aspectRatio = float32_t(imageExtents.y) / imageExtents.x }; - // Unnecessary but should go into a callback if window can change dimensions during execution - drawResourcesFiller.setViewportExtent(uint32_t2(m_window->getWidth(), m_window->getHeight())); + auto streamingState = drawResourcesFiller.ensureGeoreferencedImageEntry(georefImageID, georefImageBB, georeferencedImagePath); + constexpr static WorldClipRect invalidClipRect = { .minClip = float64_t2(std::numeric_limits::signaling_NaN()) }; + drawResourcesFiller.launchGeoreferencedImageTileLoads(georefImageID, streamingState.get(), invalidClipRect); + + drawResourcesFiller.drawGeoreferencedImage(georefImageID, std::move(streamingState), intendedNextSubmit); + + drawResourcesFiller.finalizeGeoreferencedImageTileLoads(intendedNextSubmit); - drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(georefImageID, std::move(georeferencedImageParams), intendedNextSubmit); + //drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(georefImageID, std::move(georeferencedImageParams), intendedNextSubmit); - drawResourcesFiller.addGeoreferencedImage(georefImageID, inverseViewProj, intendedNextSubmit); + //drawResourcesFiller.addGeoreferencedImage(georefImageID, inverseViewProj, intendedNextSubmit); } } From 9c1dfdce935a3a1d7eb2896dfb5375fb29205693 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Tue, 11 Nov 2025 09:30:22 +0400 Subject: [PATCH 27/29] 6 month old Polyline and Hatch changes brought over from n4ce --- 62_CAD/Hatch.cpp | 66 ++++++++++++++++++++++++++++++--------------- 62_CAD/Polyline.cpp | 2 +- 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/62_CAD/Hatch.cpp b/62_CAD/Hatch.cpp index cfb10b9af..b383e7d81 100644 --- a/62_CAD/Hatch.cpp +++ b/62_CAD/Hatch.cpp @@ -812,7 +812,7 @@ static constexpr float64_t FillPatternShapeExtent = 32.0; void line(std::vector& polylines, float64_t2 begin, float64_t2 end) { - std::vector points = { + std::array points = { begin, end }; CPolyline polyline; @@ -846,15 +846,23 @@ void checkered(std::vector& polylines, const float64_t2& offset) float64_t2(0.0, 1.0), }; { - std::vector points; - points.reserve(squarePointsCW.size()); - for (const auto& p : squarePointsCW) points.push_back(p * FillPatternShapeExtent + offset); + std::array points; + auto i = 0u; + for (const auto& p : squarePointsCW) + { + points[i] = p * FillPatternShapeExtent + offset; + i++; + } polyline.addLinePoints(points); } { - std::vector points; - points.reserve(squarePointsCW.size()); - for (const auto& p : squarePointsCW) points.push_back((p + float64_t2(0.5, -0.5)) * FillPatternShapeExtent + offset); + std::array points; + auto i = 0u; + for (const auto& p : squarePointsCW) + { + points[i] = (p + float64_t2(0.5, -0.5)) * FillPatternShapeExtent + offset; + i++; + } polyline.addLinePoints(points); } polylines.push_back(std::move(polyline)); @@ -885,16 +893,24 @@ void diamonds(std::vector& polylines, const float64_t2& offset) // Outer { - std::vector points; - points.reserve(diamondPointsCW.size()); - for (const auto& p : diamondPointsCW) points.push_back(p * outerSize + origin); + std::array points; + auto i = 0u; + for (const auto& p : diamondPointsCW) + { + points[i] = p * outerSize + origin; + i++; + } polyline.addLinePoints(points); } // Inner { - std::vector points; - points.reserve(diamondPointsCCW.size()); - for (const auto& p : diamondPointsCCW) points.push_back(p * innerSize + origin); + std::array points; + auto i = 0u; + for (const auto& p : diamondPointsCCW) + { + points[i] = p * innerSize + origin; + i++; + } polyline.addLinePoints(points); } polylines.push_back(std::move(polyline)); @@ -915,9 +931,13 @@ void crossHatch(std::vector& polylines, const float64_t2& offset) float64_t2(0.375, 0.0), }; { - std::vector points; - points.reserve(outerPointsCW.size()); - for (const auto& p : outerPointsCW) points.push_back(p * FillPatternShapeExtent + offset); + std::array points; + auto i = 0u; + for (const auto& p : outerPointsCW) + { + points[i] = p * FillPatternShapeExtent + offset; + i++; + } polyline.addLinePoints(points); } @@ -930,9 +950,13 @@ void crossHatch(std::vector& polylines, const float64_t2& offset) }; { float64_t2 origin = float64_t2(FillPatternShapeExtent/2.0, FillPatternShapeExtent/2.0) + offset; - std::vector points; - points.reserve(diamondPointsCCW.size()); - for (const auto& p : diamondPointsCCW) points.push_back(p * 0.75 * FillPatternShapeExtent + origin); + std::array points; + auto i = 0u; + for (const auto& p : diamondPointsCCW) + { + points[i] = p * 0.75 * FillPatternShapeExtent + origin; + i++; + } polyline.addLinePoints(points); } polylines.push_back(std::move(polyline)); @@ -948,7 +972,7 @@ void hatch(std::vector& polylines, const float64_t2& offset) { float64_t2 radiusOffsetTL = float64_t2(+lineDiameter / 2.0, +lineDiameter / 2.0) * FillPatternShapeExtent / 8.0; float64_t2 radiusOffsetBL = float64_t2(-lineDiameter / 2.0, -lineDiameter / 2.0) * FillPatternShapeExtent / 8.0; - std::vector points = { + std::array points = { basePt0 + radiusOffsetTL, basePt0 + radiusOffsetBL, // 0 basePt1 + radiusOffsetBL, // 1 @@ -1052,7 +1076,7 @@ void reverseHatch(std::vector& polylines, const float64_t2& offset) { float64_t2 radiusOffsetTL = float64_t2(-lineDiameter / 2.0, +lineDiameter / 2.0) * FillPatternShapeExtent / 8.0; float64_t2 radiusOffsetBL = float64_t2(+lineDiameter / 2.0, -lineDiameter / 2.0) * FillPatternShapeExtent / 8.0; - std::vector points = { + std::array points = { basePt0 + radiusOffsetTL, basePt1 + radiusOffsetTL, // 0 basePt1 + radiusOffsetBL, // 1 diff --git a/62_CAD/Polyline.cpp b/62_CAD/Polyline.cpp index b00a64a88..4149942c7 100644 --- a/62_CAD/Polyline.cpp +++ b/62_CAD/Polyline.cpp @@ -387,7 +387,7 @@ CPolyline CPolyline::generateParallelPolyline(float64_t offset, const float64_t { // TODO: try merging lines if they have same tangent (resultin in less points) std::vector newLinePoints; - newLinePoints.reserve(m_linePoints.size()); + newLinePoints.reserve(section.count + 1); for (uint32_t j = 0; j < section.count + 1; ++j) { const uint32_t linePointIdx = section.index + j; From 57912507f4d9fac493c4127ebf9ba9969449eabe Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Tue, 11 Nov 2025 10:16:54 +0400 Subject: [PATCH 28/29] Bringing changes from n4ce --- 62_CAD/CMakeLists.txt | 2 - 62_CAD/DrawResourcesFiller.cpp | 996 ++++++++++-------- 62_CAD/DrawResourcesFiller.h | 476 ++++----- 62_CAD/GeoTexture.cpp | 117 -- 62_CAD/GeoTexture.h | 64 -- 62_CAD/Images.cpp | 56 +- 62_CAD/Images.h | 130 ++- 62_CAD/main.cpp | 61 +- 62_CAD/shaders/geotexture/common.hlsl | 35 - .../shaders/geotexture/fragment_shader.hlsl | 9 - 62_CAD/shaders/geotexture/vertex_shader.hlsl | 25 - 62_CAD/shaders/globals.hlsl | 15 +- .../main_pipeline/fragment_shader.hlsl | 13 +- 13 files changed, 982 insertions(+), 1017 deletions(-) delete mode 100644 62_CAD/GeoTexture.cpp delete mode 100644 62_CAD/GeoTexture.h delete mode 100644 62_CAD/shaders/geotexture/common.hlsl delete mode 100644 62_CAD/shaders/geotexture/fragment_shader.hlsl delete mode 100644 62_CAD/shaders/geotexture/vertex_shader.hlsl diff --git a/62_CAD/CMakeLists.txt b/62_CAD/CMakeLists.txt index 756965c85..bde4ade92 100644 --- a/62_CAD/CMakeLists.txt +++ b/62_CAD/CMakeLists.txt @@ -14,8 +14,6 @@ set(EXAMPLE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/DrawResourcesFiller.h" "${CMAKE_CURRENT_SOURCE_DIR}/SingleLineText.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/SingleLineText.h" - "${CMAKE_CURRENT_SOURCE_DIR}/GeoTexture.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/GeoTexture.h" "${CMAKE_CURRENT_SOURCE_DIR}/Images.cpp" "../../src/nbl/ext/TextRendering/TextRendering.cpp" # TODO: this one will be a part of dedicated Nabla ext called "TextRendering" later on which uses MSDF + Freetype ) diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 88f4914e9..97ae6621b 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -3,11 +3,12 @@ using namespace nbl; DrawResourcesFiller::DrawResourcesFiller() -{ -} +{} -DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue, core::smart_refctd_ptr&& logger) : - m_utilities(std::move(utils)), +DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& device, smart_refctd_ptr&& bufferUploadUtils, smart_refctd_ptr&& imageUploadUtils, IQueue* copyQueue, core::smart_refctd_ptr&& logger) : + m_device(std::move(device)), + m_bufferUploadUtils(std::move(bufferUploadUtils)), + m_imageUploadUtils(std::move(imageUploadUtils)), m_copyQueue(copyQueue), m_logger(std::move(logger)) { @@ -25,10 +26,10 @@ void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) void DrawResourcesFiller::setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr&& descriptorSet, uint32_t binding) { imagesArrayBinding = binding; - suballocatedDescriptorSet = core::make_smart_refctd_ptr(std::move(descriptorSet)); + imagesDescriptorIndexAllocator = core::make_smart_refctd_ptr(std::move(descriptorSet)); } -bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize) +bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span memoryTypeIndexTryOrder) { // requiredImageMemorySize = core::alignUp(50'399'744 * 2, 1024); // single memory allocation sectioned into images+buffers (images start at offset=0) @@ -40,10 +41,17 @@ bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, s resourcesBufferCreationParams.size = adjustedBuffersMemorySize; resourcesBufferCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; resourcesGPUBuffer = logicalDevice->createBuffer(std::move(resourcesBufferCreationParams)); + + if (!resourcesGPUBuffer) + { + m_logger.log("Failed to create resourcesGPUBuffer.", nbl::system::ILogger::ELL_ERROR); + return false; + } + resourcesGPUBuffer->setObjectDebugName("drawResourcesBuffer"); IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = resourcesGPUBuffer->getMemoryReqs(); - + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuBufferMemoryReqs = resourcesGPUBuffer->getMemoryReqs(); const bool memoryRequirementsMatch = (logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits() & gpuBufferMemoryReqs.memoryTypeBits) != 0 && // should have device local memory compatible @@ -54,42 +62,32 @@ bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, s m_logger.log("Shouldn't happen: Buffer Memory Requires Dedicated Allocation or can't biind to device local memory.", nbl::system::ILogger::ELL_ERROR); return false; } - + const auto& memoryProperties = logicalDevice->getPhysicalDevice()->getMemoryProperties(); - uint32_t memoryTypeIdx = ~0u; - video::IDeviceMemoryAllocator::SAllocation allocation = {}; - for (uint32_t i = 0u; i < memoryProperties.memoryTypeCount; ++i) + for (const auto& memoryTypeIdx : memoryTypeIndexTryOrder) { - if (memoryProperties.memoryTypes[i].propertyFlags.hasFlags(IDeviceMemoryAllocation::EMPF_DEVICE_LOCAL_BIT)) + IDeviceMemoryAllocator::SAllocateInfo allocationInfo = { - memoryTypeIdx = i; - - IDeviceMemoryAllocator::SAllocateInfo allocationInfo = - { - .size = totalResourcesSize, - .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT, // for the buffers - .memoryTypeIndex = memoryTypeIdx, - .dedication = nullptr, - }; - - allocation = logicalDevice->allocate(allocationInfo); + .size = totalResourcesSize, + .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT, // for the buffers + .memoryTypeIndex = memoryTypeIdx, + .dedication = nullptr, + }; - if (allocation.isValid()) - break; - } + allocation = logicalDevice->allocate(allocationInfo); + + if (allocation.isValid()) + break; } - if (memoryTypeIdx == ~0u) + if (!allocation.isValid()) { - m_logger.log("allocateResourcesBuffer: no device local memory type found!", nbl::system::ILogger::ELL_ERROR); + m_logger.log("Failed Allocation for draw resources!", nbl::system::ILogger::ELL_ERROR); return false; } - if (!allocation.isValid()) - return false; - imagesMemoryArena = { .memory = allocation.memory, .offset = allocation.offset, @@ -106,7 +104,7 @@ bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, s .buffer = resourcesGPUBuffer.get(), .binding = { .memory = buffersMemoryArena.memory.get(), - .offset = buffersMemoryArena.offset, + .offset = buffersMemoryArena.offset, } }; @@ -119,7 +117,7 @@ bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, s return true; } -bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, uint32_t reductionPercent, uint32_t maxTries) +bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span memoryTypeIndexTryOrder, uint32_t reductionPercent, uint32_t maxTries) { const size_t minimumAcceptableSize = core::max(MinimumDrawResourcesMemorySize, getMinimumRequiredResourcesBufferSize()); @@ -140,16 +138,16 @@ bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevic uint32_t numTries = 0u; while ((currentBufferSize + currentImageSize) >= minimumAcceptableSize && numTries < maxTries) { - if (allocateDrawResources(logicalDevice, currentBufferSize, currentImageSize)) + if (allocateDrawResources(logicalDevice, currentImageSize, currentBufferSize, memoryTypeIndexTryOrder)) { m_logger.log("Successfully allocated memory for images (%zu) and buffers (%zu).", system::ILogger::ELL_INFO, currentImageSize, currentBufferSize); return true; } + m_logger.log("Allocation of memory for images(%zu) and buffers(%zu) failed; Reducing allocation size by %u%% and retrying...", system::ILogger::ELL_WARNING, currentImageSize, currentBufferSize, reductionPercent); currentBufferSize = (currentBufferSize * (100 - reductionPercent)) / 100; currentImageSize = (currentImageSize * (100 - reductionPercent)) / 100; numTries++; - m_logger.log("Allocation of memory for images(%zu) and buffers(%zu) failed; Reducing allocation size by %u%% and retrying...", system::ILogger::ELL_WARNING, currentImageSize, currentBufferSize, reductionPercent); } m_logger.log("All attempts to allocate memory for images(%zu) and buffers(%zu) failed.", system::ILogger::ELL_ERROR, currentImageSize, currentBufferSize); @@ -160,13 +158,13 @@ bool DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui { // TODO: Make this function failable and report insufficient memory asset::E_FORMAT msdfFormat = MSDFTextureFormat; - asset::VkExtent3D MSDFsExtent = { msdfsExtent.x, msdfsExtent.y, 1u }; + asset::VkExtent3D MSDFsExtent = { msdfsExtent.x, msdfsExtent.y, 1u }; if (maxMSDFs > logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers) { m_logger.log("requested maxMSDFs is greater than maxImageArrayLayers. lowering the limit...", nbl::system::ILogger::ELL_WARNING); maxMSDFs = logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers; } - + IPhysicalDevice::SImageFormatPromotionRequest promotionRequest = {}; promotionRequest.originalFormat = msdfFormat; promotionRequest.usages = {}; @@ -178,7 +176,7 @@ bool DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui imgInfo.format = msdfFormat; imgInfo.type = IGPUImage::ET_2D; imgInfo.extent = MSDFsExtent; - imgInfo.mipLevels = MSDFMips; + imgInfo.mipLevels = MSDFMips; imgInfo.arrayLayers = maxMSDFs; imgInfo.samples = asset::ICPUImage::ESCF_1_BIT; imgInfo.flags = asset::IImage::E_CREATE_FLAGS::ECF_NONE; @@ -224,7 +222,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const Line return; setActiveLineStyle(lineStyleInfo); - + beginMainObject(MainObjectType::POLYLINE, TransformationType::TT_NORMAL); drawPolyline(polyline, intendedNextSubmit); endMainObject(); @@ -236,7 +234,7 @@ void DrawResourcesFiller::drawFixedGeometryPolyline(const CPolylineBase& polylin return; setActiveLineStyle(lineStyleInfo); - + pushCustomProjection(getFixedGeometryFinalTransformationMatrix(transformation, transformationType)); beginMainObject(MainObjectType::POLYLINE, transformationType); drawPolyline(polyline, intendedNextSubmit); @@ -253,7 +251,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, SIntendedS assert(false); return; } - + const auto sectionsCount = polyline.getSectionsCount(); uint32_t currentSectionIdx = 0u; @@ -304,7 +302,7 @@ void DrawResourcesFiller::drawTriangleMesh( return; } - DrawCallData drawCallData = {}; + DrawCallData drawCallData = {}; drawCallData.isDTMRendering = true; ICPUBuffer::SCreationParams geometryBuffParams; @@ -313,7 +311,7 @@ void DrawResourcesFiller::drawTriangleMesh( const auto& indexBuffer = mesh.getIndices(); const auto& vertexBuffer = mesh.getVertices(); assert(indexBuffer.size() == vertexBuffer.size()); // We don't have any vertex re-use due to other limitations at the moemnt. - + const uint32_t numTriangles = indexBuffer.size() / 3u; uint32_t trianglesUploaded = 0; @@ -333,18 +331,18 @@ void DrawResourcesFiller::drawTriangleMesh( void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; // the actual bda address will be determined only after all copies are finalized, later we will do += `baseBDAAddress + geometryInfo.bufferOffset` // the - is a small hack because index buffer grows but vertex buffer needs to start from 0, remove that once we either get rid of the index buffer or implement an algorithm that can have vertex reuse - drawCallData.dtm.triangleMeshVerticesBaseAddress = geometryBufferOffset - (sizeof(CTriangleMesh::vertex_t) * trianglesUploaded * 3); + drawCallData.dtm.triangleMeshVerticesBaseAddress = geometryBufferOffset - (sizeof(CTriangleMesh::vertex_t) * trianglesUploaded * 3); memcpy(dst, &vertexBuffer[trianglesUploaded * 3u], vtxBuffByteSize); - geometryBufferOffset += vtxBuffByteSize; + geometryBufferOffset += vtxBuffByteSize; // Copy IndexBuffer dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; drawCallData.dtm.indexBufferOffset = geometryBufferOffset; memcpy(dst, &indexBuffer[trianglesUploaded * 3u], indexBuffByteSize); geometryBufferOffset += indexBuffByteSize; - + trianglesUploaded += trianglesToUpload; - + drawCallData.dtm.triangleMeshMainObjectIndex = mainObjectIdx; drawCallData.dtm.indexCount = trianglesToUpload * 3u; drawCalls.push_back(drawCallData); @@ -366,11 +364,11 @@ void DrawResourcesFiller::drawTriangleMesh( // TODO[Erfan]: Makes more sense if parameters are: solidColor + fillPattern + patternColor void DrawResourcesFiller::drawHatch( - const Hatch& hatch, - const float32_t4& foregroundColor, - const float32_t4& backgroundColor, - const HatchFillPattern fillPattern, - SIntendedSubmitInfo& intendedNextSubmit) + const Hatch& hatch, + const float32_t4& foregroundColor, + const float32_t4& backgroundColor, + const HatchFillPattern fillPattern, + SIntendedSubmitInfo& intendedNextSubmit) { // TODO[Optimization Idea]: don't draw hatch twice, we now have color storage buffer and we can treat rendering hatches like a procedural texture (requires 2 colors so no more abusing of linestyle for hatches) @@ -381,10 +379,10 @@ void DrawResourcesFiller::drawHatch( } void DrawResourcesFiller::drawHatch( - const Hatch& hatch, - const float32_t4& color, - const HatchFillPattern fillPattern, - SIntendedSubmitInfo& intendedNextSubmit) + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + SIntendedSubmitInfo& intendedNextSubmit) { drawHatch_impl(hatch, color, fillPattern, intendedNextSubmit); } @@ -395,13 +393,13 @@ void DrawResourcesFiller::drawHatch(const Hatch& hatch, const float32_t4& color, } void DrawResourcesFiller::drawFixedGeometryHatch( - const Hatch& hatch, - const float32_t4& foregroundColor, - const float32_t4& backgroundColor, - const HatchFillPattern fillPattern, - const float64_t3x3& transformation, - TransformationType transformationType, - SIntendedSubmitInfo& intendedNextSubmit) + const Hatch& hatch, + const float32_t4& foregroundColor, + const float32_t4& backgroundColor, + const HatchFillPattern fillPattern, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit) { // TODO[Optimization Idea]: don't draw hatch twice, we now have color storage buffer and we can treat rendering hatches like a procedural texture (requires 2 colors so no more abusing of linestyle for hatches) @@ -474,13 +472,13 @@ void DrawResourcesFiller::drawHatch_impl( } void DrawResourcesFiller::drawFontGlyph( - nbl::ext::TextRendering::FontFace* fontFace, - uint32_t glyphIdx, - float64_t2 topLeft, - float32_t2 dirU, - float32_t aspectRatio, - float32_t2 minUV, - SIntendedSubmitInfo& intendedNextSubmit) + nbl::ext::TextRendering::FontFace* fontFace, + uint32_t glyphIdx, + float64_t2 topLeft, + float32_t2 dirU, + float32_t aspectRatio, + float32_t2 minUV, + SIntendedSubmitInfo& intendedNextSubmit) { uint32_t textureIdx = InvalidTextureIndex; const MSDFInputInfo msdfInput = MSDFInputInfo(fontFace->getHash(), glyphIdx); @@ -495,7 +493,7 @@ void DrawResourcesFiller::drawFontGlyph( assert(false); return; } - + if (textureIdx != InvalidTextureIndex) { GlyphInfo glyphInfo = GlyphInfo(topLeft, dirU, aspectRatio, textureIdx, minUV); @@ -520,13 +518,13 @@ void DrawResourcesFiller::drawFontGlyph( bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& staticImage, SIntendedSubmitInfo& intendedNextSubmit) { - // Try inserting or updating the image usage in the cache. - // If the image is already present, updates its semaphore value. - auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; - CachedImageRecord* cachedImageRecord = imagesCache->insert(staticImage.imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); - cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN - - if (cachedImageRecord->arrayIndex != InvalidTextureIndex && staticImage.forceUpdate) + // imagesCache->logState(m_logger); + + // Check if image already exists and requires force update. We do this before insertion and updating `lastUsedFrameIndex` to get correct overflow-submit behaviour + // otherwise we'd always overflow submit, even if not needed and image was not queued/intended to use in the next submit. + CachedImageRecord* cachedImageRecord = imagesCache->get(staticImage.imageID); + + if (cachedImageRecord && cachedImageRecord->arrayIndex != InvalidTextureIndex && staticImage.forceUpdate) { // found in cache, and we want to force new data into the image if (cachedImageRecord->staticCPUImage) @@ -537,8 +535,8 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s if (needsRecreation) { // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena along with it's allocated array slot from the suballocated descriptor set - evictCallback(staticImage.imageID, *cachedImageRecord); - + evictImage_SubmitIfNeeded(staticImage.imageID, *cachedImageRecord, intendedNextSubmit); + // Instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image // imagesCache->erase(imageID); // cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); @@ -548,7 +546,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s { // Doesn't need image recreation, we'll use the same array index in descriptor set + the same bound memory. // reset it's state + update the cpu image used for copying. - cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; cachedImageRecord->staticCPUImage = staticImage.cpuImage; } } @@ -558,6 +556,13 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s } } + // Try inserting or updating the image usage in the cache. + // If the image is already present, updates its semaphore value. + auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + cachedImageRecord = imagesCache->insert(staticImage.imageID, currentFrameIndex, evictCallback); + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN + + // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema // in which case we don't queue anything for upload, and return the idx if (cachedImageRecord->arrayIndex == InvalidTextureIndex) @@ -565,23 +570,23 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s // This is a new image (cache miss). Allocate a descriptor index for it. cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. - suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + imagesDescriptorIndexAllocator->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + cachedImageRecord->arrayIndexAllocatedUsingImageDescriptorIndexAllocator = true; if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) { - auto* device = m_utilities->getLogicalDevice(); - auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + auto* physDev = m_device->getPhysicalDevice(); IGPUImage::SCreationParams imageParams = {}; imageParams = staticImage.cpuImage->getCreationParameters(); - imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT | IGPUImage::EUF_SAMPLED_BIT; + imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; // promote format because RGB8 and friends don't actually exist in HW { const IPhysicalDevice::SImageFormatPromotionRequest request = { .originalFormat = imageParams.format, .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageParams.usage) }; - imageParams.format = physDev->promoteImageFormat(request, imageParams.tiling); + imageParams.format = physDev->promoteImageFormat(request,imageParams.tiling); } // Attempt to create a GPU image and image view for this texture. @@ -591,13 +596,14 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s { cachedImageRecord->type = ImageType::STATIC; cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; - cachedImageRecord->currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; + cachedImageRecord->currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN cachedImageRecord->allocationOffset = allocResults.allocationOffset; cachedImageRecord->allocationSize = allocResults.allocationSize; cachedImageRecord->gpuImageView = allocResults.gpuImageView; cachedImageRecord->staticCPUImage = staticImage.cpuImage; cachedImageRecord->georeferencedImageState = nullptr; + evictConflictingImagesInCache_SubmitIfNeeded(staticImage.imageID, *cachedImageRecord, intendedNextSubmit); } else { @@ -619,7 +625,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s // We previously allocated a descriptor index, but failed to create a usable GPU image. // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. // No semaphore wait needed here, as the GPU never got to use this slot. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); + imagesDescriptorIndexAllocator->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); cachedImageRecord->arrayIndex = InvalidTextureIndex; } @@ -633,8 +639,8 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s cachedImageRecord->arrayIndex = InvalidTextureIndex; } } - - + + // cached or just inserted, we update the lastUsedFrameIndex cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; @@ -771,12 +777,11 @@ uint32_t2 DrawResourcesFiller::computeStreamingImageExtentsForViewportCoverage(c return { gpuImageSidelength, gpuImageSidelength }; } -nbl::core::smart_refctd_ptr DrawResourcesFiller::ensureGeoreferencedImageEntry(image_id imageID, const OrientedBoundingBox2D& worldSpaceOBB, const std::filesystem::path& storagePath) +nbl::core::smart_refctd_ptr DrawResourcesFiller::ensureGeoreferencedImageEntry(image_id imageID, const OrientedBoundingBox2D& worldSpaceOBB, const uint32_t2 currentViewportExtents, const float64_t3x3& ndcToWorldMat, const std::filesystem::path& storagePath) { nbl::core::smart_refctd_ptr ret = nullptr; - auto* device = m_utilities->getLogicalDevice(); - auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + auto* physDev = m_device->getPhysicalDevice(); if (!imageLoader) { @@ -797,16 +802,16 @@ nbl::core::smart_refctd_ptr DrawResourcesFille gpuImageCreationParams.mipLevels = 2u; gpuImageCreationParams.arrayLayers = 1u; - gpuImageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT | IGPUImage::EUF_SAMPLED_BIT; + gpuImageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; // promote format because RGB8 and friends don't actually exist in HW { const IPhysicalDevice::SImageFormatPromotionRequest request = { .originalFormat = gpuImageCreationParams.format, .usages = IPhysicalDevice::SFormatImageUsages::SUsage(gpuImageCreationParams.usage) }; - gpuImageCreationParams.format = physDev->promoteImageFormat(request, gpuImageCreationParams.tiling); + gpuImageCreationParams.format = physDev->promoteImageFormat(request,gpuImageCreationParams.tiling); } - + CachedImageRecord* cachedImageRecord = imagesCache->get(imageID); if (!cachedImageRecord) { @@ -822,26 +827,24 @@ nbl::core::smart_refctd_ptr DrawResourcesFille m_logger.log("image had entry in the cache but cachedImageRecord->georeferencedImageState was nullptr, this shouldn't happen!", nbl::system::ILogger::ELL_ERROR); ret = cachedImageRecord->georeferencedImageState; } - + // Update GeoreferencedImageState with new viewport width/height and requirements // width only because gpu image is square const uint32_t newGPUImageSideLengthTiles = gpuImageCreationParams.extent.width / GeoreferencedImageTileSize; - + // This will reset the residency state after a resize. it makes sense because when gpu image is resized, it's recreated and no previous tile is resident anymore // We don't copy tiles between prev/next resized image, we're more focused on optimizing pan/zoom with a fixed window size. if (ret->gpuImageSideLengthTiles != newGPUImageSideLengthTiles) { ret->gpuImageSideLengthTiles = newGPUImageSideLengthTiles; ret->ResetTileOccupancyState(); + ret->currentMappedRegionTileRange = { .baseMipLevel = std::numeric_limits::max() }; } - - // DONT UNCOMMENT, IT WILL SLOW DOWN LOADING: It's Test For full re-cache loading speed - // ret->ResetTileOccupancyState(); - + ret->gpuImageCreationParams = std::move(gpuImageCreationParams); // Update with current viewport - ret->updateStreamingStateForViewport(currentViewportExtents, ndcToWorldTransformationMatrix); + ret->updateStreamingStateForViewport(currentViewportExtents, ndcToWorldMat); return ret; } @@ -863,6 +866,10 @@ bool DrawResourcesFiller::launchGeoreferencedImageTileLoads(image_id imageID, Ge // We need to make every tile that covers the viewport resident. We reserve the amount of tiles needed for upload. auto tilesToLoad = imageStreamingState->tilesToLoad(); + + // m_logger.log(std::format("Tiles to Load = {}.", tilesToLoad.size()).c_str(), nbl::system::ILogger::ELL_INFO); + + const uint32_t2 imageExtents = imageStreamingState->fullResImageExtents; const std::filesystem::path imageStoragePath = imageStreamingState->storagePath; @@ -887,7 +894,7 @@ bool DrawResourcesFiller::launchGeoreferencedImageTileLoads(image_id imageID, Ge float64_t minY = std::min({ topLeftWorld.y, topRightWorld.y, bottomLeftWorld.y, bottomRightWorld.y }); float64_t maxX = std::max({ topLeftWorld.x, topRightWorld.x, bottomLeftWorld.x, bottomRightWorld.x }); float64_t maxY = std::max({ topLeftWorld.y, topRightWorld.y, bottomLeftWorld.y, bottomRightWorld.y }); - + // Check if the tile intersects clip rect at all. Note that y clips are inverted if (maxX < clipRect.minClip.x || minX > clipRect.maxClip.x || maxY < clipRect.maxClip.y || minY > clipRect.minClip.y) continue; @@ -924,19 +931,19 @@ bool DrawResourcesFiller::launchGeoreferencedImageTileLoads(image_id imageID, Ge { gpuMip0Tile = std::async(std::launch::async, [=, this]() { return imageLoader->load(imageStoragePath, samplingOffsetMip0, samplingExtentMip0, targetExtentMip0); - }); + }); gpuMip1Tile = std::async(std::launch::async, [=, this]() { return imageLoader->load(imageStoragePath, samplingOffsetMip0, samplingExtentMip0, targetExtentMip0 / 2u); - }); + }); } else { gpuMip0Tile = std::async(std::launch::async, [=, this]() { return imageLoader->load(imageStoragePath, imageTileIndex * GeoreferencedImageTileSize, targetExtentMip0, imageStreamingState->currentMappedRegionTileRange.baseMipLevel, false); - }); + }); gpuMip1Tile = std::async(std::launch::async, [=, this]() { return imageLoader->load(imageStoragePath, imageTileIndex * GeoreferencedImageTileSizeMip1, targetExtentMip0 / 2u, imageStreamingState->currentMappedRegionTileRange.baseMipLevel, true); - }); + }); } } @@ -981,6 +988,15 @@ bool DrawResourcesFiller::launchGeoreferencedImageTileLoads(image_id imageID, Ge return true; } +bool DrawResourcesFiller::cancelGeoreferencedImageTileLoads(image_id imageID) +{ + auto it = streamedImageCopies.find(imageID); + if (it != streamedImageCopies.end()) + it->second.clear(); // clear the vector of copies for this image + + return true; +} + void DrawResourcesFiller::drawGeoreferencedImage(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit) { // OutputDebugStringA(std::format("Image Cache Size = {} ", imagesCache->size()).c_str()); @@ -1045,7 +1061,6 @@ bool DrawResourcesFiller::finalizeGeoreferencedImageTileLoads(SIntendedSubmitInf if (streamedImageCopies.size() > 0ull) { - auto* device = m_utilities->getLogicalDevice(); auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); if (cmdBuffInfo) @@ -1059,134 +1074,148 @@ bool DrawResourcesFiller::finalizeGeoreferencedImageTileLoads(SIntendedSubmitInf const auto& imageID = it->first; auto* imageRecord = imagesCache->peek(imageID); - if (imageRecord && imageRecord->gpuImageView && imageRecord->georeferencedImageState) - validCopies.push_back(it); - else - m_logger.log(std::format("Can't upload to imageId {} yet. (no gpu record yet).", imageID).c_str(), nbl::system::ILogger::ELL_INFO); + if (it->second.size() > 0u) + { + if (imageRecord && imageRecord->gpuImageView && imageRecord->georeferencedImageState) + validCopies.push_back(it); + else + m_logger.log(std::format("Can't upload to imageId {} yet. (no gpu record yet).", imageID).c_str(), nbl::system::ILogger::ELL_INFO); + } } + + // m_logger.log(std::format("{} Valid Copies, Frame Idx = {}.", validCopies.size(), currentFrameIndex).c_str(), nbl::system::ILogger::ELL_INFO); - IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; - - std::vector beforeCopyImageBarriers; - beforeCopyImageBarriers.reserve(streamedImageCopies.size()); - - // Pipeline Barriers before imageCopy - for (auto it : validCopies) + if (validCopies.size() > 0u) { - auto& [imageID, imageCopies] = *it; - // OutputDebugStringA(std::format("Copying {} copies for Id = {} \n", imageCopies.size(), imageID).c_str()); + IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; + std::vector beforeCopyImageBarriers; + beforeCopyImageBarriers.reserve(streamedImageCopies.size()); - auto* imageRecord = imagesCache->peek(imageID); - if (imageRecord == nullptr) + // Pipeline Barriers before imageCopy + for (auto it : validCopies) { - m_logger.log(std::format("`pushStreamedImagesUploads` failed, no image record found for image id {}.", imageID).c_str(), nbl::system::ILogger::ELL_ERROR); - continue; - } + auto& [imageID, imageCopies] = *it; + // OutputDebugStringA(std::format("Copying {} copies for Id = {} \n", imageCopies.size(), imageID).c_str()); - const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; - - IImage::LAYOUT newLayout = IImage::LAYOUT::GENERAL; - - beforeCopyImageBarriers.push_back( + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - } - // .ownershipOp. No queueFam ownership transfer - }, - .image = gpuImg.get(), - .subresourceRange = { - .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = ICPUImageView::remaining_mip_levels, - .baseArrayLayer = 0u, - .layerCount = ICPUImageView::remaining_array_layers - }, - .oldLayout = imageRecord->currentLayout, - .newLayout = newLayout, - }); - imageRecord->currentLayout = newLayout; - } - success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); + m_logger.log(std::format("`pushStreamedImagesUploads` failed, no image record found for image id {}.", imageID).c_str(), nbl::system::ILogger::ELL_ERROR); + continue; + } - for (auto it : validCopies) - { - auto& [imageID, imageCopies] = *it; - auto* imageRecord = imagesCache->peek(imageID); - if (imageRecord == nullptr) - continue; + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + IImage::LAYOUT newLayout = IImage::LAYOUT::GENERAL; - const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + beforeCopyImageBarriers.push_back( + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = imageRecord->currentLayout, + .newLayout = newLayout, + }); + imageRecord->currentLayout = newLayout; + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); - for (auto& imageCopy : imageCopies) + for (auto it : validCopies) { - auto srcBuffer = imageCopy.srcBufferFuture.get(); - if (srcBuffer) + auto& [imageID, imageCopies] = *it; + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + continue; + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + for (auto& imageCopy : imageCopies) { - success &= m_utilities->updateImageViaStagingBuffer( - intendedNextSubmit, - srcBuffer->getPointer(), imageCopy.srcFormat, - gpuImg.get(), IImage::LAYOUT::GENERAL, - { &imageCopy.region, 1u }); + auto srcBuffer = imageCopy.srcBufferFuture.get(); + if (srcBuffer) + { + const bool copySuccess = m_imageUploadUtils->updateImageViaStagingBuffer( + intendedNextSubmit, + srcBuffer->getPointer(), imageCopy.srcFormat, + gpuImg.get(), IImage::LAYOUT::GENERAL, + { &imageCopy.region, 1u }); + success &= copySuccess; + if (!copySuccess) + { + m_logger.log(std::format("updateImageViaStagingBuffer failed. region offset = ({}, {}), region size = ({}, {}), gpu image size = ({}, {})", + imageCopy.region.imageOffset.x,imageCopy.region.imageOffset.y, + imageCopy.region.imageExtent.width, imageCopy.region.imageExtent.height, + gpuImg->getCreationParameters().extent.width, gpuImg->getCreationParameters().extent.height).c_str(), nbl::system::ILogger::ELL_ERROR); + } + } + else + m_logger.log(std::format("srcBuffer was invalid for image id {}.", imageID).c_str(), nbl::system::ILogger::ELL_ERROR); } - else - m_logger.log(std::format("srcBuffer was invalid for image id {}.", imageID).c_str(), nbl::system::ILogger::ELL_ERROR); } - } - commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change + commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change - std::vector afterCopyImageBarriers; - afterCopyImageBarriers.reserve(streamedImageCopies.size()); + std::vector afterCopyImageBarriers; + afterCopyImageBarriers.reserve(streamedImageCopies.size()); - // Pipeline Barriers after imageCopy - for (auto it : validCopies) - { - auto& [imageID, imageCopies] = *it; - auto* imageRecord = imagesCache->peek(imageID); - if (imageRecord == nullptr) + // Pipeline Barriers after imageCopy + for (auto it : validCopies) { - m_logger.log(std::format("`pushStreamedImagesUploads` failed, no image record found for image id {}.", imageID).c_str(), nbl::system::ILogger::ELL_ERROR); - continue; - } + auto& [imageID, imageCopies] = *it; + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + { + m_logger.log(std::format("`pushStreamedImagesUploads` failed, no image record found for image id {}.", imageID).c_str(), nbl::system::ILogger::ELL_ERROR); + continue; + } - const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; - IImage::LAYOUT newLayout = IImage::LAYOUT::GENERAL; + IImage::LAYOUT newLayout = IImage::LAYOUT::GENERAL; - afterCopyImageBarriers.push_back( - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, - } - // .ownershipOp. No queueFam ownership transfer - }, - .image = gpuImg.get(), - .subresourceRange = { - .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = ICPUImageView::remaining_mip_levels, - .baseArrayLayer = 0u, - .layerCount = ICPUImageView::remaining_array_layers - }, - .oldLayout = imageRecord->currentLayout, - .newLayout = newLayout, - }); - imageRecord->currentLayout = newLayout; + afterCopyImageBarriers.push_back ( + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = imageRecord->currentLayout, + .newLayout = newLayout, + }); + imageRecord->currentLayout = newLayout; + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + // Remove the processed valid ones, keep invalids for later retries + for (auto it : validCopies) + streamedImageCopies.erase(it); } - success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); - - // Remove the processed valid ones, keep invalids for later retries - for (auto it : validCopies) - streamedImageCopies.erase(it); } else { @@ -1213,16 +1242,19 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit } bool success = true; + if (currentReplayCache) { + // In rare cases, we need to wait for the previous frame's submit to ensure all GPU usage of the any images has completed. + nbl::video::ISemaphore::SWaitInfo previousSubmitWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + // This means we're in a replay cache scope, use the replay cache to push to GPU instead of internal accumulation success &= pushBufferUploads(intendedNextSubmit, currentReplayCache->resourcesCollection); success &= pushMSDFImagesUploads(intendedNextSubmit, currentReplayCache->msdfImagesState); - // Push Static Images Uploads from replay cache, all the work below is necessary to detect whether our image to replay is already in the cache in the exact form OR we need to create new image + bind memory and set array index - auto* device = m_utilities->getLogicalDevice(); - bool replayCacheFullyCovered = true; + bool evictedAnotherImage = false; + // Push Static Images Uploads from replay cache, all the work below is necessary to detect whether our image to replay is already in the cache in the exact form OR we need to create new image + bind memory and set array index for (auto& [toReplayImageID, toReplayRecord] : *currentReplayCache->imagesCache) { if (toReplayRecord.type != ImageType::STATIC) // non-static images (Georeferenced) won't be replayed like this @@ -1240,43 +1272,19 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit const bool arrayIndexMatches = cachedRecord->arrayIndex == toReplayRecord.arrayIndex; - alreadyResident = allocationMatches && arrayIndexMatches && cachedRecord->state == ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA; + alreadyResident = allocationMatches && arrayIndexMatches && cachedRecord->state != ImageState::INVALID; } // if already resident, ignore, no need to insert into cache anymore - // if bot already resident, we need to recreate the image and bind the image memory to correct location again, and update the descriptor set and push the uploads - if (!alreadyResident) + if (alreadyResident) + { + cachedRecord->lastUsedFrameIndex = currentFrameIndex; + } + else { - replayCacheFullyCovered = false; - // make sure to evict any cache entry that conflicts with the new entry (either in memory allocation or descriptor index) - for (auto& [cachedImageID, cachedRecord] : *imagesCache) - { - bool cachedImageConflictsWithImageToReplay = true; - - // Case 1: Same imageID, but params differ (offset/size/arrayIndex mismatch) conflict - if (cachedImageID == toReplayImageID) - { - // this will always return true, because if it was a exact param match, we wouldn't need to insert a new one and handle evictions - cachedImageConflictsWithImageToReplay = true; - } - else - { - // Case 2: Different imageID but overlap in memory range on the same array index conflict - if (cachedRecord.arrayIndex == toReplayRecord.arrayIndex && - (cachedRecord.allocationOffset < toReplayRecord.allocationOffset + toReplayRecord.allocationSize) && - (toReplayRecord.allocationOffset < cachedRecord.allocationOffset + cachedRecord.allocationSize)) - { - cachedImageConflictsWithImageToReplay = true; - } - } - - if (cachedImageConflictsWithImageToReplay) - { - evictImage_SubmitIfNeeded(cachedImageID, cachedRecord, intendedNextSubmit); - imagesCache->erase(cachedImageID); - } - } + if (evictConflictingImagesInCache_SubmitIfNeeded(toReplayImageID, toReplayRecord, intendedNextSubmit)) + evictedAnotherImage = true; // creating and inserting new entry bool successCreateNewImage = false; @@ -1286,7 +1294,7 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit IGPUImage::SCreationParams imageParams = {}; imageParams = existingGPUImageViewParams.image->getCreationParameters(); - auto newGPUImage = device->createImage(std::move(imageParams)); + auto newGPUImage = m_device->createImage(std::move(imageParams)); if (newGPUImage) { nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = @@ -1295,19 +1303,22 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + toReplayRecord.allocationOffset } }; - const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); + const bool boundToMemorySuccessfully = m_device->bindImageMemory({ &bindImageMemoryInfo, 1u }); if (boundToMemorySuccessfully) { newGPUImage->setObjectDebugName((std::to_string(toReplayImageID) + " Static Image 2D").c_str()); IGPUImageView::SCreationParams viewParams = existingGPUImageViewParams; viewParams.image = newGPUImage; - auto newGPUImageView = device->createImageView(std::move(viewParams)); + auto newGPUImageView = m_device->createImageView(std::move(viewParams)); if (newGPUImageView) { successCreateNewImage = true; + toReplayRecord.arrayIndexAllocatedUsingImageDescriptorIndexAllocator = false; // array index wasn't allocated useing desc set suballocator. it's being replayed toReplayRecord.gpuImageView = newGPUImageView; toReplayRecord.state = ImageState::CREATED_AND_MEMORY_BOUND; + toReplayRecord.currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; + toReplayRecord.lastUsedFrameIndex = currentFrameIndex; newGPUImageView->setObjectDebugName((std::to_string(toReplayImageID) + " Static Image View 2D").c_str()); } @@ -1326,34 +1337,39 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit _NBL_DEBUG_BREAK_IF(true); success = false; } - + } } - if (!replayCacheFullyCovered) + success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); + + if (evictedAnotherImage) { - // We need to block for previous submit in order to safely update the descriptor set array index next. - // - // [FUTURE_CONSIDERATION]: To avoid stalling the CPU when replaying caches that overflow GPU memory, - // we could recreate the image and image view, binding them to entirely new memory locations. - // This would require an indirection mechanism in the shader to remap references from cached geometry or objects to the new image array indices. - // Note: This isn't a problem if the replayed scene fits in memory and doesn't require overflow submissions due to image memory exhaustion. - nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; - device->blockForSemaphores({ &waitInfo, 1u }); + // We're about to update the descriptor set binding using the replay's array indices. + // Normally, descriptor-set allocation and updates are synchronized to ensure the GPU + // isn't still using the same descriptor indices we're about to overwrite. + // + // However, in this case we bypassed the descriptor-set allocator (imagesDescriptorIndexAllocator) and are writing directly into the set. + // This means proper synchronization is not guaranteed. + // + // Since evicting another image can happen due to array index conflicts, + // we must ensure that any prior GPU work using those descriptor indices has finished before we update them. + // Therefore, wait for the previous frame (and any usage of these indices) to complete before proceeding to bind/write our images to their descriptor + m_device->blockForSemaphores({ &previousSubmitWaitInfo, 1u }); } - success &= bindImagesToArrayIndices(*imagesCache); - success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); - // There should be no georeferenced image and thus streamed uploads in replay mode: georeferenced/streamed images should be drawn in a separate isolated submit + success &= updateDescriptorSetImageBindings(*imagesCache); } else { flushDrawObjects(); success &= pushBufferUploads(intendedNextSubmit, resourcesCollection); success &= pushMSDFImagesUploads(intendedNextSubmit, msdfImagesState); - success &= bindImagesToArrayIndices(*imagesCache); success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); + success &= updateDescriptorSetImageBindings(*imagesCache); } + + return success; } @@ -1413,8 +1429,7 @@ void DrawResourcesFiller::pushCustomClipRect(const WorldClipRect& clipRect) } void DrawResourcesFiller::popCustomClipRect() -{ - if (activeClipRects.empty()) +{ if (activeClipRects.empty()) return; activeClipRects.pop_back(); @@ -1448,13 +1463,25 @@ std::unique_ptr DrawResourcesFiller::createRep stagedMSDF.uploadedToGPU = false; // to trigger upload for all msdf functions again. ret->drawCallsData = drawCalls; ret->activeMainObjectIndex = activeMainObjectIndex; - ret->imagesCache = std::unique_ptr(new ImagesCache(*imagesCache)); + ret->imagesCache = std::unique_ptr(new ImagesCache(ImagesBindingArraySize)); + + // m_logger.log(std::format("== createReplayCache, currentFrameIndex = {} ==", currentFrameIndex).c_str(), nbl::system::ILogger::ELL_INFO); + // imagesCache->logState(m_logger); + + for (auto& [imageID, record] : *imagesCache) + { + // Only return images in the cache used within the last frame + if (record.lastUsedFrameIndex == currentFrameIndex) + ret->imagesCache->base_t::insert(imageID, record); + } + return ret; } void DrawResourcesFiller::setReplayCache(ReplayCache* cache) { currentReplayCache = cache; + // currentReplayCache->imagesCache->logState(m_logger); } void DrawResourcesFiller::unsetReplayCache() @@ -1499,7 +1526,7 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub auto copyCPUFilledDrawBuffer = [&](auto& drawBuffer) -> bool { // drawBuffer must be of type CPUGeneratedResource - SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer }; + SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer}; if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) { @@ -1511,17 +1538,17 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub drawBuffer.bufferOffset = copyRange.offset; if (copyRange.size > 0ull) { - if (!m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, copyRange, drawBuffer.vector.data())) + if (!m_bufferUploadUtils->updateBufferRangeViaStagingBuffer(intendedNextSubmit, copyRange, drawBuffer.vector.data())) return false; copiedResourcesSize += drawBuffer.getAlignedStorageSize(); } return true; }; - + auto addComputeReservedFilledDrawBuffer = [&](auto& drawBuffer) -> bool { // drawBuffer must be of type ReservedComputeResource - SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer }; + SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer}; if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) { @@ -1542,14 +1569,14 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub copyCPUFilledDrawBuffer(resources.drawObjects); copyCPUFilledDrawBuffer(resources.indexBuffer); copyCPUFilledDrawBuffer(resources.geometryInfo); - + return true; } bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& stagedMSDFCPUImages) { auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); - + if (cmdBuffInfo) { IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; @@ -1610,7 +1637,7 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex auto buffer = reinterpret_cast(stagedMSDF.image->getBuffer()->getPointer()); auto bufferOffset = mipImageRegion->bufferOffset; - stagedMSDF.uploadedToGPU = m_utilities->updateImageViaStagingBuffer( + stagedMSDF.uploadedToGPU = m_bufferUploadUtils->updateImageViaStagingBuffer( intendedNextSubmit, buffer + bufferOffset, nbl::ext::TextRendering::TextRenderer::MSDFTextureFormat, @@ -1672,12 +1699,11 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex } } -bool DrawResourcesFiller::bindImagesToArrayIndices(ImagesCache& imagesCache) +bool DrawResourcesFiller::updateDescriptorSetImageBindings(ImagesCache& imagesCache) { bool success = true; - - auto* device = m_utilities->getLogicalDevice(); - auto* descriptorSet = suballocatedDescriptorSet->getDescriptorSet(); + + auto* descriptorSet = imagesDescriptorIndexAllocator->getDescriptorSet(); // DescriptorSet Updates std::vector descriptorInfos; @@ -1685,15 +1711,30 @@ bool DrawResourcesFiller::bindImagesToArrayIndices(ImagesCache& imagesCache) descriptorInfos.resize(imagesCache.size()); descriptorWrites.resize(imagesCache.size()); + // Potential GPU waits before writing to descriptor bindings that were previously deallocated manually (bypassing the imagesDescriptorIndexAllocator). + // The allocator normally guarantees safe reuse of array indices by synchronizing allocations and deallocations internally. + // but since these bindings were queued for deferred deallocation, we must ensure their previous GPU usage has completed before writing new data into those slots. + std::vector waitInfos; + waitInfos.reserve(deferredDescriptorIndexDeallocations.size()); + uint32_t descriptorWriteCount = 0u; for (auto& [id, record] : imagesCache) { if (record.state >= ImageState::BOUND_TO_DESCRIPTOR_SET || !record.gpuImageView) continue; + + // Check if this writing to this array index has a deferred deallocation pending + if (auto it = deferredDescriptorIndexDeallocations.find(record.arrayIndex); it != deferredDescriptorIndexDeallocations.end()) + { + // TODO: Assert we're not waiting for a value which hasn't been submitted yet. + waitInfos.push_back(it->second); + // erase -> it's a one-time wait: + deferredDescriptorIndexDeallocations.erase(it); + } // Bind gpu image view to descriptor set video::IGPUDescriptorSet::SDescriptorInfo descriptorInfo = {}; - descriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo.info.image.imageLayout = (record.type == ImageType::STATIC) ? IImage::LAYOUT::READ_ONLY_OPTIMAL : IImage::LAYOUT::GENERAL; // WARN: don't use `record.currentLayout`, it's the layout "At the time" the image is going to be accessed descriptorInfo.desc = record.gpuImageView; descriptorInfos[descriptorWriteCount] = descriptorInfo; @@ -1706,12 +1747,17 @@ bool DrawResourcesFiller::bindImagesToArrayIndices(ImagesCache& imagesCache) descriptorWrite.info = &descriptorInfos[descriptorWriteCount]; descriptorWrites[descriptorWriteCount] = descriptorWrite; + record.state = ImageState::BOUND_TO_DESCRIPTOR_SET; descriptorWriteCount++; } + if (!waitInfos.empty()) + m_device->blockForSemaphores(waitInfos, /*waitAll=*/true); + if (descriptorWriteCount > 0u) - success &= device->updateDescriptorSets(descriptorWriteCount, descriptorWrites.data(), 0u, nullptr); + success &= m_device->updateDescriptorSets(descriptorWriteCount, descriptorWrites.data(), 0u, nullptr); + return success; } @@ -1730,9 +1776,8 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN if (nonResidentImageRecords.size() > 0ull) { - auto* device = m_utilities->getLogicalDevice(); auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); - + if (cmdBuffInfo) { IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; @@ -1775,7 +1820,7 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN { auto& imageRecord = *nonResidentImageRecords[i]; auto& gpuImg = imageRecord.gpuImageView->getCreationParameters().image; - success &= m_utilities->updateImageViaStagingBuffer( + success &= m_imageUploadUtils->updateImageViaStagingBuffer( intendedNextSubmit, imageRecord.staticCPUImage->getBuffer()->getPointer(), imageRecord.staticCPUImage->getCreationParameters().format, gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, @@ -1840,28 +1885,58 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN return success; } -bool DrawResourcesFiller::ensureGeoreferencedImageResources_AllocateIfNeeded(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::evictConflictingImagesInCache_SubmitIfNeeded(image_id toInsertImageID, const CachedImageRecord& toInsertRecord, nbl::video::SIntendedSubmitInfo& intendedNextSubmit) { - auto* device = m_utilities->getLogicalDevice(); - auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); - - // Try inserting or updating the image usage in the cache. - // If the image is already present, updates its semaphore value. - auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; - CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + bool evictedSomething = false; + for (auto& [cachedImageID, cachedRecord] : *imagesCache) + { + bool cachedImageConflictsWithImageToReplay = false; - // Setting the image streaming state returned in `ensureGeoreferencedImageEntry` which was either creating anew or gotten from this very own cache - cachedImageRecord->georeferencedImageState = std::move(imageStreamingState); + // Case 1: Same imageID, but params differ (offset/size/arrayIndex mismatch) conflict + if (cachedImageID == toInsertImageID) + { + const bool allocationMatches = + cachedRecord.allocationOffset == toInsertRecord.allocationOffset && + cachedRecord.allocationSize == toInsertRecord.allocationSize; + const bool arrayIndexMatches = cachedRecord.arrayIndex == toInsertRecord.arrayIndex; + const bool exactSameImage = allocationMatches && arrayIndexMatches; + if (!exactSameImage) + cachedImageConflictsWithImageToReplay = true; + } + else + { + // Different Image ID: + // Conflicted if: 1. same array index or 2. conflict in allocation/mem + const bool sameArrayIndex = cachedRecord.arrayIndex == toInsertRecord.arrayIndex; + const bool conflictingMemory = + (cachedRecord.allocationOffset < toInsertRecord.allocationOffset + toInsertRecord.allocationSize) && + (toInsertRecord.allocationOffset < cachedRecord.allocationOffset + cachedRecord.allocationSize); + + if (sameArrayIndex || conflictingMemory) + cachedImageConflictsWithImageToReplay = true; + } - if (cachedImageRecord == nullptr) - { - m_logger.log("Couldn't insert image in cache; make sure you called `ensureGeoreferencedImageEntry` before anything else.", nbl::system::ILogger::ELL_ERROR); - return false; + if (cachedImageConflictsWithImageToReplay) + { + evictImage_SubmitIfNeeded(cachedImageID, cachedRecord, intendedNextSubmit); + imagesCache->erase(cachedImageID); + evictedSomething = true; + } } + return evictedSomething; +} + +bool DrawResourcesFiller::ensureGeoreferencedImageResources_AllocateIfNeeded(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit) +{ + auto* physDev = m_device->getPhysicalDevice(); + // Check if image already exists and requires resize. We do this before insertion and updating `lastUsedFrameIndex` to get correct overflow-submit behaviour + // otherwise we'd always overflow submit, even if not needed and image was not queued/intended to use in the next submit. + CachedImageRecord* cachedImageRecord = imagesCache->get(imageID); + // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema // But we need to check if the cached image needs resizing/recreation. - if (cachedImageRecord->arrayIndex != InvalidTextureIndex) + if (cachedImageRecord && cachedImageRecord->arrayIndex != InvalidTextureIndex) { // found in cache, but does it require resize? recreation? if (cachedImageRecord->gpuImageView) @@ -1871,18 +1946,16 @@ bool DrawResourcesFiller::ensureGeoreferencedImageResources_AllocateIfNeeded(ima { const auto cachedParams = static_cast(imgViewParams.image->getCreationParameters()); // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus - const auto toCreateParams = static_cast(cachedImageRecord->georeferencedImageState->gpuImageCreationParams); + const auto toCreateParams = static_cast(imageStreamingState->gpuImageCreationParams); const bool needsRecreation = cachedParams != toCreateParams; if (needsRecreation) { // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena. // note: it doesn't remove the entry from lru cache. - evictCallback(imageID, *cachedImageRecord); - + evictImage_SubmitIfNeeded(imageID, *cachedImageRecord, intendedNextSubmit); + // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image - CachedImageRecord newRecord = CachedImageRecord(currentFrameIndex); //reser everything except image streaming state - newRecord.georeferencedImageState = std::move(cachedImageRecord->georeferencedImageState); - newRecord.type = cachedImageRecord->type; + CachedImageRecord newRecord = CachedImageRecord(currentFrameIndex); //reset everything except image streaming state *cachedImageRecord = std::move(newRecord); } } @@ -1897,36 +1970,57 @@ bool DrawResourcesFiller::ensureGeoreferencedImageResources_AllocateIfNeeded(ima } } + + // Try inserting or updating the image usage in the cache. + // If the image is already present, updates its semaphore value. + auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + cachedImageRecord = imagesCache->insert(imageID, currentFrameIndex, evictCallback); + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN + + // Setting the image streaming state returned in `ensureGeoreferencedImageEntry` which was either creating anew or gotten from this very own cache + cachedImageRecord->georeferencedImageState = std::move(imageStreamingState); + cachedImageRecord->georeferencedImageState->outOfDate = false; + + if (cachedImageRecord == nullptr) + { + m_logger.log("Couldn't insert image in cache; make sure you called `ensureGeoreferencedImageEntry` before anything else.", nbl::system::ILogger::ELL_ERROR); + return false; + } + // in which case we don't queue anything for upload, and return the idx if (cachedImageRecord->arrayIndex == InvalidTextureIndex) { // This is a new image (cache miss). Allocate a descriptor index for it. cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. - suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + imagesDescriptorIndexAllocator->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + cachedImageRecord->arrayIndexAllocatedUsingImageDescriptorIndexAllocator = true; if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) { const auto& imageCreationParams = cachedImageRecord->georeferencedImageState->gpuImageCreationParams; - // Attempt to create a GPU image and image view for this texture. - ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(imageID)); + std::string debugName = cachedImageRecord->georeferencedImageState->storagePath.string(); + + // Attempt to create a GPU image and image view for this texture. + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, debugName); if (allocResults.isValid()) { cachedImageRecord->type = ImageType::GEOREFERENCED_STREAMED; cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; - cachedImageRecord->currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; + cachedImageRecord->currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN cachedImageRecord->allocationOffset = allocResults.allocationOffset; cachedImageRecord->allocationSize = allocResults.allocationSize; cachedImageRecord->gpuImageView = allocResults.gpuImageView; cachedImageRecord->staticCPUImage = nullptr; + evictConflictingImagesInCache_SubmitIfNeeded(imageID, *cachedImageRecord, intendedNextSubmit); } else { // All attempts to try create the GPU image and its corresponding view have failed. // Most likely cause: insufficient GPU memory or unsupported image parameters. - + m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed, likely due to low VRAM.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); @@ -1943,10 +2037,10 @@ bool DrawResourcesFiller::ensureGeoreferencedImageResources_AllocateIfNeeded(ima // We previously allocated a descriptor index, but failed to create a usable GPU image. // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. // No semaphore wait needed here, as the GPU never got to use this slot. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); + imagesDescriptorIndexAllocator->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); cachedImageRecord->arrayIndex = InvalidTextureIndex; } - + // erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation imagesCache->erase(imageID); } @@ -2108,7 +2202,7 @@ uint32_t DrawResourcesFiller::acquireActiveLineStyleIndex_SubmitIfNeeded(SIntend { if (activeLineStyleIndex == InvalidStyleIdx) activeLineStyleIndex = addLineStyle_SubmitIfNeeded(activeLineStyle, intendedNextSubmit); - + return activeLineStyleIndex; } @@ -2116,7 +2210,7 @@ uint32_t DrawResourcesFiller::acquireActiveDTMSettingsIndex_SubmitIfNeeded(SInte { if (activeDTMSettingsIndex == InvalidDTMSettingsIdx) activeDTMSettingsIndex = addDTMSettings_SubmitIfNeeded(activeDTMSettings, intendedNextSubmit); - + return activeDTMSettingsIndex; } @@ -2127,7 +2221,7 @@ uint32_t DrawResourcesFiller::acquireActiveCustomProjectionIndex_SubmitIfNeeded( if (activeProjectionIndices.back() == InvalidCustomProjectionIndex) activeProjectionIndices.back() = addCustomProjection_SubmitIfNeeded(activeProjections.back(), intendedNextSubmit); - + return activeProjectionIndices.back(); } @@ -2138,7 +2232,7 @@ uint32_t DrawResourcesFiller::acquireActiveCustomClipRectIndex_SubmitIfNeeded(SI if (activeClipRectIndices.back() == InvalidCustomClipRectIndex) activeClipRectIndices.back() = addCustomClipRect_SubmitIfNeeded(activeClipRects.back(), intendedNextSubmit); - + return activeClipRectIndices.back(); } @@ -2170,14 +2264,14 @@ uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SInten const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? const bool needToOverflowSubmit = (!enoughMem) || (resourcesCollection.mainObjects.vector.size() >= MaxIndexableMainObjects); - + if (needToOverflowSubmit) { // failed to fit into remaining resources mem or exceeded max indexable mainobj submitDraws(intendedNextSubmit); reset(); // resets everything! be careful! } - + MainObject mainObject = {}; // These 3 calls below shouldn't need to Submit because we made sure there is enough memory for all of them. // if something here triggers a auto-submit it's a possible bug with calculating `memRequired` above, TODO: assert that somehow? @@ -2233,7 +2327,7 @@ uint32_t DrawResourcesFiller::addCustomProjection_SubmitIfNeeded(const float64_t submitDraws(intendedNextSubmit); reset(); // resets everything! be careful! } - + resourcesCollection.customProjections.vector.push_back(projection); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers return resourcesCollection.customProjections.vector.size() - 1u; } @@ -2249,7 +2343,7 @@ uint32_t DrawResourcesFiller::addCustomClipRect_SubmitIfNeeded(const WorldClipRe submitDraws(intendedNextSubmit); reset(); // resets everything! be careful! } - + resourcesCollection.customClipRects.vector.push_back(clipRect); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers return resourcesCollection.customClipRects.vector.size() - 1u; } @@ -2270,7 +2364,7 @@ void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& po const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(PolylineConnector) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); // TODO[ERFAN]: later take into account: our maximum indexable vertex - + const uint32_t connectorCount = static_cast(polyline.getConnectors().size()); const uint32_t remainingObjects = connectorCount - currentPolylineConnectorObj; const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); @@ -2290,12 +2384,12 @@ void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& po const uint32_t startObj = resourcesCollection.drawObjects.getCount(); for (uint32_t i = 0u; i < objectsToUpload; ++i) { - indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; - indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; - indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; - indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; - indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; - indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; } // Add DrawObjs @@ -2308,7 +2402,7 @@ void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& po { drawObjectsToBeFilled[i] = drawObj; drawObj.geometryAddress += sizeof(PolylineConnector); - } + } currentPolylineConnectorObj += objectsToUpload; } @@ -2346,12 +2440,12 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const const uint32_t startObj = resourcesCollection.drawObjects.getCount(); for (uint32_t i = 0u; i < objectsToUpload; ++i) { - indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; - indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; - indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; - indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; - indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; - indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; } // Add DrawObjs @@ -2364,7 +2458,7 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const { drawObjectsToBeFilled[i] = drawObj; drawObj.geometryAddress += sizeof(LinePointInfo); - } + } currentObjectInSection += objectsToUpload; } @@ -2380,7 +2474,7 @@ void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, // memConsumption = quadBezCount * (sizeof(QuadraticBezierInfo) + 3*(sizeof(DrawObject)+6u*sizeof(uint32_t)) const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(QuadraticBezierInfo) + (sizeof(DrawObject) + 6u * sizeof(uint32_t)) * CagesPerQuadBezier); // TODO[ERFAN]: later take into account: our maximum indexable vertex - + const uint32_t beziersCount = section.count; const uint32_t remainingObjects = beziersCount - currentObjectInSection; const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); @@ -2388,7 +2482,7 @@ void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, if (objectsToUpload <= 0u) return; - + // Add Geometry const auto beziersByteSize = sizeof(QuadraticBezierInfo) * (objectsToUpload); size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(beziersByteSize, alignof(QuadraticBezierInfo)); @@ -2399,18 +2493,18 @@ void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, // Push Indices, remove later when compute fills this - uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * cagesCount); + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u*cagesCount); const uint32_t startObj = resourcesCollection.drawObjects.getCount(); for (uint32_t i = 0u; i < cagesCount; ++i) { - indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; - indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; - indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; - indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; - indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; - indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; } - + // Add DrawObjs DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(cagesCount); DrawObject drawObj = {}; @@ -2436,7 +2530,7 @@ void DrawResourcesFiller::addHatch_Internal(const Hatch& hatch, uint32_t& curren const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(Hatch::CurveHatchBox) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); // TODO[ERFAN]: later take into account: our maximum indexable vertex - + uint32_t remainingObjects = hatch.getHatchBoxCount() - currentObjectInSection; const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); @@ -2450,20 +2544,20 @@ void DrawResourcesFiller::addHatch_Internal(const Hatch& hatch, uint32_t& curren void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; const Hatch::CurveHatchBox& hatchBox = hatch.getHatchBox(currentObjectInSection); // WARNING: This is assuming hatch boxes are contigous in memory, TODO: maybe make that more obvious through Hatch interface memcpy(dst, &hatchBox, curveBoxesByteSize); - + // Push Indices, remove later when compute fills this uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * objectsToUpload); const uint32_t startObj = resourcesCollection.drawObjects.getCount(); for (uint32_t i = 0u; i < objectsToUpload; ++i) { - indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; - indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; - indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; - indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; - indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; - indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; } - + // Add DrawObjs DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(objectsToUpload); DrawObject drawObj = {}; @@ -2486,7 +2580,7 @@ bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(GlyphInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); // TODO[ERFAN]: later take into account: our maximum indexable vertex - + if (uploadableObjects <= 0u) return false; @@ -2499,12 +2593,12 @@ bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); const uint32_t startObj = resourcesCollection.drawObjects.getCount(); uint32_t i = 0u; - indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; - indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; - indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; - indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; - indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; - indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; // Add DrawObjs DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); @@ -2648,35 +2742,65 @@ void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const Cach _NBL_DEBUG_BREAK_IF(true); return; } - // Later used to release the image's memory range. - core::smart_refctd_ptr cleanupObject = core::make_smart_refctd_ptr(); - cleanupObject->imagesMemorySuballocator = imagesMemorySubAllocator; - cleanupObject->addr = evicted.allocationOffset; - cleanupObject->size = evicted.allocationSize; +#if 0 + m_logger.log(("Evicting Image: \n" + evicted.toString(imageID)).c_str(), nbl::system::ILogger::ELL_INFO); +#endif + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); - // NOTE: `deallocationWaitInfo` is crucial for both paths, we need to make sure we'll write to a descriptor arrayIndex when it's 100% done with previous usages. - if (imageUsedForNextIntendedSubmit) + if (evicted.arrayIndexAllocatedUsingImageDescriptorIndexAllocator) { - // The evicted image is scheduled for use in the upcoming submit. - // To avoid rendering artifacts, we must flush the current draw queue now. - // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. - submitDraws(intendedNextSubmit); - reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + // Image being evicted was allocated using image descriptor set allocator + // Later used to release the image's memory range. + core::smart_refctd_ptr cleanupObject = core::make_smart_refctd_ptr(); + cleanupObject->imagesMemorySuballocator = imagesMemorySubAllocator; + cleanupObject->addr = evicted.allocationOffset; + cleanupObject->size = evicted.allocationSize; + + if (evicted.type == ImageType::GEOREFERENCED_STREAMED) + { + // Important to mark this as out of date. + // because any other place still holding on to the state (which is possible) need to know the image associated with the state has been evicted and the state is no longer valid and needs to "ensure"d again. + evicted.georeferencedImageState->outOfDate = true; + // cancelGeoreferencedImageTileLoads(imageID); // clear any of the pending loads/futures requested for the image + } - // Prepare wait info to defer index deallocation until the GPU has finished using the resource. - // we wait on the signal semaphore for the submit we just did above. - ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); + // NOTE: `deallocationWaitInfo` is crucial for both paths, we need to make sure we'll write to a descriptor arrayIndex when it's 100% done with previous usages. + if (imageUsedForNextIntendedSubmit) + { + // The evicted image is scheduled for use in the upcoming submit. + // To avoid rendering artifacts, we must flush the current draw queue now. + // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // we wait on the signal semaphore for the submit we just did above. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + imagesDescriptorIndexAllocator->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); + } + else + { + // The image is not used in the current frame, so we can deallocate without submitting any draws. + // Still wait on the semaphore to ensure past GPU usage is complete. + // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value conservatively, which is the immediate prev submit. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + imagesDescriptorIndexAllocator->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); + } } else { - // The image is not used in the current frame, so we can deallocate without submitting any draws. - // Still wait on the semaphore to ensure past GPU usage is complete. - // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value conservatively, which is the immediate prev submit. - ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); + // Less often case: index wasn't allocated using imageDescriptorSetAllocator, like replayed images which skip the allocator to write to the set directly. + // we won't cleanup + multi_dealloc in this case, instead we queue the deallocations and wait for them before any next image writes into the same index. + if (!imageUsedForNextIntendedSubmit) + deferredDescriptorIndexDeallocations[evicted.arrayIndex] = ISemaphore::SWaitInfo{ .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + else + { + m_logger.log(std::format("Image which is being evicted and had skipped descriptor set allocator requires overflow submit; This shouldn't happen. Image Info = {}", evicted.toString(imageID)).c_str(), nbl::system::ILogger::ELL_ERROR); + imagesCache->logState(m_logger); + } + } } @@ -2688,8 +2812,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc { ImageAllocateResults ret = {}; - auto* device = m_utilities->getLogicalDevice(); - auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + auto* physDev = m_device->getPhysicalDevice(); bool alreadyBlockedForDeferredFrees = false; @@ -2701,13 +2824,13 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc // Try creating the image and allocating memory for it: nbl::video::IGPUImage::SCreationParams params = {}; params = imageParams; - + if (imageViewFormatOverride != asset::E_FORMAT::EF_COUNT && imageViewFormatOverride != imageParams.format) { params.viewFormats.set(static_cast(imageViewFormatOverride), true); params.flags |= asset::IImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; } - auto gpuImage = device->createImage(std::move(params)); + auto gpuImage = m_device->createImage(std::move(params)); if (gpuImage) { @@ -2732,9 +2855,9 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = { .image = gpuImage.get(), - .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + ret.allocationOffset } + .binding = { .memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + ret.allocationOffset } }; - const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); + const bool boundToMemorySuccessfully = m_device->bindImageMemory({ &bindImageMemoryInfo, 1u }); if (boundToMemorySuccessfully) { gpuImage->setObjectDebugName(imageDebugName.c_str()); @@ -2754,7 +2877,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc viewParams.components.a = nbl::asset::IImageViewBase::SComponentMapping::E_SWIZZLE::ES_ONE; } - ret.gpuImageView = device->createImageView(std::move(viewParams)); + ret.gpuImageView = m_device->createImageView(std::move(viewParams)); if (ret.gpuImageView) { // SUCCESS! @@ -2822,7 +2945,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc imagesCache->erase(evictionCandidate); } - while (suballocatedDescriptorSet->cull_frees()) {}; // to make sure deallocation requests in eviction callback are blocked for. + while (imagesDescriptorIndexAllocator->cull_frees()) {}; // to make sure deallocation requests in eviction callback are blocked for. alreadyBlockedForDeferredFrees = true; // we don't hold any references to the GPUImageView or GPUImage so descriptor binding will be the last reference @@ -2844,6 +2967,7 @@ void DrawResourcesFiller::setHatchFillMSDFTextureFunction(const GetHatchFillPatt void DrawResourcesFiller::markFrameUsageComplete(uint64_t drawSubmitWaitValue) { + // m_logger.log(std::format("Finished Frame Idx = {}", currentFrameIndex).c_str(), nbl::system::ILogger::ELL_INFO); currentFrameIndex++; // TODO[LATER]: take into account that currentFrameIndex was submitted with drawSubmitWaitValue; Use that value when deallocating the resources marked with this frame index // Currently, for evictions the worst case value will be waited for, as there is no way yet to know which semaphoroe value will signal the completion of the (to be evicted) resource's usage @@ -2878,54 +3002,54 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor } /* - * The `msdfTextureArrayIndexAllocator` manages indices (slots) into a texture array for MSDF images. - * When all slots are occupied, the least recently used entry is evicted via `msdfLRUCache`. - * This callback is invoked on eviction, and must: - * - Ensure safe deallocation of the slot. - * - Submit any pending draw calls if the evicted MSDF was scheduled to be used in the upcoming submission. - */ + * The `msdfTextureArrayIndexAllocator` manages indices (slots) into a texture array for MSDF images. + * When all slots are occupied, the least recently used entry is evicted via `msdfLRUCache`. + * This callback is invoked on eviction, and must: + * - Ensure safe deallocation of the slot. + * - Submit any pending draw calls if the evicted MSDF was scheduled to be used in the upcoming submission. + */ auto evictionCallback = [&](const MSDFReference& evicted) - { - // `deallocationWaitInfo` is used to prepare wait info to defer index deallocation until the GPU has finished using the resource. - // NOTE: `deallocationWaitInfo` is currently *not* required for correctness because: - // - Both the image upload (msdfImagesState) and usage occur within the same timeline (`intendedNextSubmit`). - // - timeline semaphores guarantee proper ordering: the next submit's msdfImagesState will wait on the prior usage. - // - Therefore, we can safely overwrite or reallocate the slot without waiting for explicit GPU completion. - // - // However, this `deallocationWaitInfo` *will* become essential if we start interacting with MSDF images - // outside the `intendedNextSubmit` timeline for example, issuing uploads via a transfer queue or using a separate command buffer and timeline. - - const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); - - if (imageUsedForNextIntendedSubmit) - { - // The evicted image is scheduled for use in the upcoming submit. - // To avoid rendering artifacts, we must flush the current draw queue now. - // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. - submitDraws(intendedNextSubmit); - reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded - - // Prepare wait info to defer index deallocation until the GPU has finished using the resource. - // we wait on the signal semaphore for the submit we just did above. - ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; - msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); - } - else - { - // The image is not used in the current frame, so we can deallocate without submitting any draws. - // Still wait on the semaphore to ensure past GPU usage is complete. - // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value which is the immediate prev submit (scratchSemaphore.value). - ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; - msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); - } + { + // `deallocationWaitInfo` is used to prepare wait info to defer index deallocation until the GPU has finished using the resource. + // NOTE: `deallocationWaitInfo` is currently *not* required for correctness because: + // - Both the image upload (msdfImagesState) and usage occur within the same timeline (`intendedNextSubmit`). + // - timeline semaphores guarantee proper ordering: the next submit's msdfImagesState will wait on the prior usage. + // - Therefore, we can safely overwrite or reallocate the slot without waiting for explicit GPU completion. + // + // However, this `deallocationWaitInfo` *will* become essential if we start interacting with MSDF images + // outside the `intendedNextSubmit` timeline for example, issuing uploads via a transfer queue or using a separate command buffer and timeline. - // Clear CPU-side metadata associated with the evicted slot. - msdfImagesState[evicted.alloc_idx].evict(); - }; + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); + if (imageUsedForNextIntendedSubmit) + { + // The evicted image is scheduled for use in the upcoming submit. + // To avoid rendering artifacts, we must flush the current draw queue now. + // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // we wait on the signal semaphore for the submit we just did above. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); + } + else + { + // The image is not used in the current frame, so we can deallocate without submitting any draws. + // Still wait on the semaphore to ensure past GPU usage is complete. + // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value which is the immediate prev submit (scratchSemaphore.value). + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); + } + + // Clear CPU-side metadata associated with the evicted slot. + msdfImagesState[evicted.alloc_idx].evict(); + }; + // We pass nextSemaValue instead of constructing a new MSDFReference and passing it into `insert` that's because we might get a cache hit and only update the value of the nextSema MSDFReference* inserted = msdfLRUCache->insert(msdfInput, currentFrameIndex, evictionCallback); - + inserted->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN // if cachedImageRecord->alloc_idx was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx @@ -2947,7 +3071,7 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor inserted->alloc_idx = InvalidTextureIndex; } } - + assert(inserted->alloc_idx != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed return inserted->alloc_idx; diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 4a0d7b490..3b0e0c4bb 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -42,7 +42,7 @@ static_assert(sizeof(LineStyle) == 88u); struct DrawResourcesFiller { public: - + // We pack multiple data types in a single buffer, we need to makes sure each offset starts aligned to avoid mis-aligned accesses static constexpr size_t GPUStructsMaxNaturalAlignment = 8u; static constexpr size_t MinimumDrawResourcesMemorySize = 512u * 1 << 20u; // 512MB @@ -63,7 +63,7 @@ struct DrawResourcesFiller { size_t count = 0ull; size_t getCount() const override { return count; } - size_t getStorageSize() const override { return count * sizeof(T); } + size_t getStorageSize() const override { return count * sizeof(T); } }; /// @brief ResourceBase which is filled by CPU, packed and sent to GPU @@ -73,9 +73,9 @@ struct DrawResourcesFiller core::vector vector; size_t getCount() const { return vector.size(); } size_t getStorageSize() const { return vector.size() * sizeof(T); } - + /// @return pointer to start of the data to be filled, up to additionalCount - T* increaseCountAndGetPtr(size_t additionalCount) + T* increaseCountAndGetPtr(size_t additionalCount) { size_t offset = vector.size(); vector.resize(offset + additionalCount); @@ -85,14 +85,14 @@ struct DrawResourcesFiller /// @brief increases size of general-purpose resources that hold bytes /// @param alignment: Alignment of the pointer returned to be filled, should be PoT and <= GPUStructsMaxNaturalAlignment, only use this if storing raw bytes in vector /// @return pointer to start of the data to be filled, up to additional size - size_t increaseSizeAndGetOffset(size_t additionalSize, size_t alignment) + size_t increaseSizeAndGetOffset(size_t additionalSize, size_t alignment) { assert(core::isPoT(alignment) && alignment <= GPUStructsMaxNaturalAlignment); size_t offset = core::alignUp(vector.size(), alignment); vector.resize(offset + additionalSize); return offset; } - + uint32_t addAndGetOffset(const T& val) { vector.push_back(val); @@ -111,7 +111,7 @@ struct DrawResourcesFiller CPUGeneratedResource dtmSettings; CPUGeneratedResource customProjections; CPUGeneratedResource customClipRects; - + // auto-submission level 1 buffers (mainObj that drawObjs references, if all drawObjs+idxBuffer+geometryInfo doesn't fit into mem this will be broken down into many) CPUGeneratedResource mainObjects; @@ -151,15 +151,15 @@ struct DrawResourcesFiller { return imageLoader->getFormat(imagePath); } - + DrawResourcesFiller(); - DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue, core::smart_refctd_ptr&& logger); + DrawResourcesFiller(smart_refctd_ptr&& device, smart_refctd_ptr&& bufferUploadUtils, smart_refctd_ptr&& imageUploadUtils, IQueue* copyQueue, core::smart_refctd_ptr&& logger); typedef std::function SubmitFunc; void setSubmitDrawsFunction(const SubmitFunc& func); - + // DrawResourcesFiller needs to access these in order to allocate GPUImages and write the to their correct descriptor set binding void setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr&& descriptorSet, uint32_t binding); @@ -172,34 +172,36 @@ struct DrawResourcesFiller } /** - * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections. - * - * The function allocates a single memory block and splits it into image and buffer arenas. - * - * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation. - * @param requiredImageMemorySize The size in bytes of the memory required for images. - * @param requiredBufferMemorySize The size in bytes of the memory required for buffers. - * - * @return true if the memory allocation and resource setup succeeded; false otherwise. - */ - bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize); - + * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections. + * + * The function allocates a single memory block and splits it into image and buffer arenas. + * + * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation. + * @param requiredImageMemorySize The size in bytes of the memory required for images. + * @param requiredBufferMemorySize The size in bytes of the memory required for buffers. + * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried. + * + * @return true if the memory allocation and resource setup succeeded; false otherwise. + */ + bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span memoryTypeIndexTryOrder); + /** - * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure. - * - * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small, - * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory - * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`. - * - * @param logicalDevice Pointer to the logical device used for allocation. - * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with. - * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with. - * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%). - * @param maxTries Maximum number of attempts to try reducing and allocating memory. - * - * @return true if the allocation succeeded at any iteration; false if all attempts failed. - */ - bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u); + * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure. + * + * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small, + * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory + * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`. + * + * @param logicalDevice Pointer to the logical device used for allocation. + * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with. + * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with. + * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried. + * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%). + * @param maxTries Maximum number of attempts to try reducing and allocating memory. + * + * @return true if the allocation succeeded at any iteration; false if all attempts failed. + */ + bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span memoryTypeIndexTryOrder, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u); bool allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent); @@ -228,11 +230,11 @@ struct DrawResourcesFiller //! Draws a fixed-geometry polyline using a custom transformation. //! TODO: Change `polyline` input to an ID referencing a possibly cached instance in our buffers, allowing reuse and avoiding redundant uploads. void drawFixedGeometryPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, const float64_t3x3& transformation, TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit); - + /// Use this in a begin/endMainObject scope when you want to draw different polylines that should essentially be a single main object (no self-blending between components of a single main object) /// WARNING: make sure this function is called within begin/endMainObject scope void drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit); - + void drawTriangleMesh( const CTriangleMesh& mesh, const DTMSettingsInfo& dtmSettingsInfo, @@ -241,11 +243,11 @@ struct DrawResourcesFiller // ! Convinience function for Hatch with MSDF Pattern and a solid background void drawHatch( const Hatch& hatch, - const float32_t4& foregroundColor, + const float32_t4& foregroundColor, const float32_t4& backgroundColor, const HatchFillPattern fillPattern, SIntendedSubmitInfo& intendedNextSubmit); - + // ! Hatch with MSDF Pattern void drawHatch( const Hatch& hatch, @@ -258,7 +260,7 @@ struct DrawResourcesFiller const Hatch& hatch, const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit); - + //! Convinience function for fixed-geometry Hatch with MSDF Pattern and a solid background void drawFixedGeometryHatch( const Hatch& hatch, @@ -285,7 +287,7 @@ struct DrawResourcesFiller const float64_t3x3& transformation, TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit); - + /// Used by SingleLineText, Issue drawing a font glyph /// WARNING: make sure this function is called within begin/endMainObject scope void drawFontGlyph( @@ -305,54 +307,54 @@ struct DrawResourcesFiller SIntendedSubmitInfo& intendedNextSubmit); /** - * @brief Adds a static 2D image to the draw resource set for rendering. - * - * This function ensures that a given image is available as a GPU-resident texture for future draw submissions. - * It uses an LRU cache to manage descriptor set slots and evicts old images if necessary to make room for new ones. - * - * If the image is already cached and its slot is valid, it returns true; - * Otherwise, it performs the following: - * - Allocates a new descriptor set slot. - * - Promotes the image format to be GPU-compatible. - * - Creates a GPU image and GPU image view. - * - Queues the image for uploading via staging in the next submit. - * - If memory is constrained, attempts to evict other images to free up space. - * - * @param staticImage Unique identifier for the image resource plus the CPU-side image resource to (possibly) upload. - * @param staticImage::forceUpdate If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. - * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. - * - * @note This function ensures that the descriptor slot is not reused while the GPU may still be reading from it. - * If an eviction is required and the evicted image is scheduled to be used in the next submit, it triggers - * a flush of pending draws to preserve correctness. - * - * @note The function uses the `imagesCache` LRU cache to track usage and validity of texture slots. - * If an insertion leads to an eviction, a callback ensures proper deallocation and synchronization. - * @return true if the image was successfully cached and is ready for use; false if allocation failed most likely due to the image being larger than the memory arena allocated for all images. - */ + * @brief Adds a static 2D image to the draw resource set for rendering. + * + * This function ensures that a given image is available as a GPU-resident texture for future draw submissions. + * It uses an LRU cache to manage descriptor set slots and evicts old images if necessary to make room for new ones. + * + * If the image is already cached and its slot is valid, it returns true; + * Otherwise, it performs the following: + * - Allocates a new descriptor set slot. + * - Promotes the image format to be GPU-compatible. + * - Creates a GPU image and GPU image view. + * - Queues the image for uploading via staging in the next submit. + * - If memory is constrained, attempts to evict other images to free up space. + * + * @param staticImage Unique identifier for the image resource plus the CPU-side image resource to (possibly) upload. + * @param staticImage::forceUpdate If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. + * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. + * + * @note This function ensures that the descriptor slot is not reused while the GPU may still be reading from it. + * If an eviction is required and the evicted image is scheduled to be used in the next submit, it triggers + * a flush of pending draws to preserve correctness. + * + * @note The function uses the `imagesCache` LRU cache to track usage and validity of texture slots. + * If an insertion leads to an eviction, a callback ensures proper deallocation and synchronization. + * @return true if the image was successfully cached and is ready for use; false if allocation failed most likely due to the image being larger than the memory arena allocated for all images. + */ bool ensureStaticImageAvailability(const StaticImageInfo& staticImage, SIntendedSubmitInfo& intendedNextSubmit); - + /** - * @brief Ensures that multiple static 2D images are resident and ready for rendering. - * - * Attempts to make all provided static images GPU-resident by calling `ensureStaticImageAvailability` - * for each. Afterward, it verifies that none of the newly ensured images have been evicted, - * which could happen due to limited VRAM or memory fragmentation. - * - * This function is expected to succeed if: - * - The number of images does not exceed `ImagesBindingArraySize`. - * - Each image individually fits into the image memory arena. - * - There is enough VRAM to hold all images simultaneously. - * - * @param staticImages A span of StaticImageInfo structures describing the images to be ensured. - * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. - * - * @return true If all images were successfully made resident and none were evicted during the process. - * @return false If: - * - The number of images exceeds the descriptor binding array size. - * - Any individual image could not be made resident (e.g., larger than the allocator can support). - * - Some images were evicted due to VRAM pressure or allocator fragmentation, in which case Clearing the image cache and retrying MIGHT be a success (TODO: handle internally) - */ + * @brief Ensures that multiple static 2D images are resident and ready for rendering. + * + * Attempts to make all provided static images GPU-resident by calling `ensureStaticImageAvailability` + * for each. Afterward, it verifies that none of the newly ensured images have been evicted, + * which could happen due to limited VRAM or memory fragmentation. + * + * This function is expected to succeed if: + * - The number of images does not exceed `ImagesBindingArraySize`. + * - Each image individually fits into the image memory arena. + * - There is enough VRAM to hold all images simultaneously. + * + * @param staticImages A span of StaticImageInfo structures describing the images to be ensured. + * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. + * + * @return true If all images were successfully made resident and none were evicted during the process. + * @return false If: + * - The number of images exceeds the descriptor binding array size. + * - Any individual image could not be made resident (e.g., larger than the allocator can support). + * - Some images were evicted due to VRAM pressure or allocator fragmentation, in which case Clearing the image cache and retrying MIGHT be a success (TODO: handle internally) + */ bool ensureMultipleStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit); // This function must be called immediately after `addStaticImage` for the same imageID. @@ -361,49 +363,30 @@ struct DrawResourcesFiller /* Georeferenced Image Functions: */ - + /** - * @brief Computes the recommended GPU image extents for streamed (georeferenced) imagery. - * - * This function estimates the required GPU-side image size to safely cover the current viewport, accounting for: - * - Full coverage of twice the viewport at mip 0 - * - Arbitrary rotation (by considering the diagonal) - * - Padding - * - * The resulting size is always rounded up to a multiple of the georeferenced tile size. - * - * @param viewportExtents The width and height of the viewport in pixels. - * @return A uint32_t2 representing the GPU image width and height for streamed imagery. + * @brief Computes the recommended GPU image extents for streamed (georeferenced) imagery. + * + * This function estimates the required GPU-side image size to safely cover the current viewport, accounting for: + * - Full coverage of twice the viewport at mip 0 + * - Arbitrary rotation (by considering the diagonal) + * - Padding + * + * The resulting size is always rounded up to a multiple of the georeferenced tile size. + * + * @param viewportExtents The width and height of the viewport in pixels. + * @return A uint32_t2 representing the GPU image width and height for streamed imagery. */ static uint32_t2 computeStreamingImageExtentsForViewportCoverage(const uint32_t2 viewportExtents); - /** - * @brief Updates viewport information for georeferenced image calculations. - * - * This function sets the current viewport extents and the NDC-to-world transform, - * which are used by georeferenced image streaming logic (e.g., computing tile ranges, - * determining GPU image size, and checking for tile loading or GPU image resize). - * - * Note: This class handles many rendering tasks; this function affects only the - * georeferenced image streaming and positioning calculations. - * - * @param viewportExtent Extent of the current viewport in pixels. - * @param ndcToWorldMat 3x3 matrix transforming NDC coordinates to world coordinates. - */ - inline void updateViewportInfo(const uint32_t2 viewportExtent, const float64_t3x3& ndcToWorldMat) - { - currentViewportExtents = viewportExtent; - ndcToWorldTransformationMatrix = ndcToWorldMat; - } - /** * @brief Creates a streaming state for a georeferenced image. - * + * * This function prepares the required state for streaming and rendering a georeferenced image. - * + * * WARNING: User should make sure to: * - Transforms the OBB into world space if custom projections (such as dwg/symbols) are active. - * + * * Specifically, this function: * - Builds a new GeoreferencedImageStreamingState for the given image ID, OBB, and storage path. * - Looks up image info such as format and extents from the registered loader and the storage path @@ -417,16 +400,18 @@ struct DrawResourcesFiller * * @param imageID Unique identifier of the image. * @param worldspaceOBB Oriented bounding box of the image in world space. - * @param storagePath Filesystem path where the image data is stored. + * @param viewportExtent Extent of the current viewport in pixels. + * @param ndcToWorldMat 3x3 matrix transforming NDC coordinates to world coordinates. + * @param storagePath Filesystem path where the image data is stored. * @return A GeoreferencedImageStreamingState object initialized for this image. */ - nbl::core::smart_refctd_ptr ensureGeoreferencedImageEntry(image_id imageID, const OrientedBoundingBox2D& worldSpaceOBB, const std::filesystem::path& storagePath); + nbl::core::smart_refctd_ptr ensureGeoreferencedImageEntry(image_id imageID, const OrientedBoundingBox2D& worldSpaceOBB, const uint32_t2 currentViewportExtents, const float64_t3x3& ndcToWorldMat, const std::filesystem::path& storagePath); /** * @brief Launches tile loading for a cached georeferenced image. - * + * * Queues all tiles visible in the current viewport for GPU upload. - * + * * The work includes: * - Calculating visible tile coverage from the OBB and viewport. * - Loading the necessary tiles from disk via the registered `imageLoader`. @@ -446,11 +431,13 @@ struct DrawResourcesFiller */ bool launchGeoreferencedImageTileLoads(image_id imageID, GeoreferencedImageStreamingState* imageStreamingState, const WorldClipRect clipRect); + bool cancelGeoreferencedImageTileLoads(image_id imageID); + /** * @brief Issue Drawing a GeoreferencedImage - * + * * Ensures streaming resources are allocated, computes addressing and positioning info (OBB and min/max UV), and pushes the image info to the geometry buffer for rendering. - * + * * This function should be called anywhere between `ensureGeoreferencedImageEntry` and `finalizeGeoreferencedImageTileLoads` * * @note The `imageStreamingState` must be the one returned by `ensureGeoreferencedImageEntry`. @@ -460,7 +447,7 @@ struct DrawResourcesFiller * @param intendedNextSubmit Submission info describing synchronization and barriers for the next batch. */ void drawGeoreferencedImage(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit); - + /** * @brief copies the queued up streamed copies. * @note call this function after `drawGeoreferencedImage` to make sure there is a gpu resource to copy to. @@ -497,7 +484,7 @@ struct DrawResourcesFiller // Setting Active Resources: void setActiveLineStyle(const LineStyleInfo& lineStyle); - + void setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo); void beginMainObject(MainObjectType type, TransformationType transformationType = TransformationType::TT_NORMAL); @@ -505,7 +492,7 @@ struct DrawResourcesFiller void pushCustomProjection(const float64_t3x3& projection); void popCustomProjection(); - + void pushCustomClipRect(const WorldClipRect& clipRect); void popCustomClipRect(); @@ -598,7 +585,7 @@ struct DrawResourcesFiller /// /// User is responsible for management of cache and making sure it's alive in the ReplayCache scope void setReplayCache(ReplayCache* cache); - + /// @brief Reverts internal logic to use the default internal staging and resource accumulation cache. /// Must be called once per corresponding `pushReplayCacheUse()`. void unsetReplayCache(); @@ -652,37 +639,48 @@ struct DrawResourcesFiller /// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer. bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection); - + /// @brief Records GPU copy commands for all staged msdf images into the active command buffer. bool pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& msdfImagesState); /// @brief binds cached images into their correct descriptor set slot if not already resident. - bool bindImagesToArrayIndices(ImagesCache& imagesCache); + bool updateDescriptorSetImageBindings(ImagesCache& imagesCache); /// @brief Records GPU copy commands for all staged images into the active command buffer. bool pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, ImagesCache& imagesCache); - - + + /// @brief Handles eviction of images with conflicting memory regions or array indices in cache & replay mode. + /// + /// In cache & replay mode, image allocations bypass the standard arena allocator and are rebound + /// to their original GPU memory locations. Since we can't depend on the allocator to avoid conflicting memory location, + /// this function scans the image cache for potential overlaps with the given image and evicts any conflicting entries, submitting work if necessary. + /// + /// @param toInsertImageID Identifier of the image being inserted. + /// @param toInsertRecord Record describing the image and its intended memory placement. + /// @param intendedNextSubmit Reference to the intended GPU submit info; may be used if eviction requires submission. + /// @return true if something was evicted, false otherwise + bool evictConflictingImagesInCache_SubmitIfNeeded(image_id toInsertImageID, const CachedImageRecord& toInsertRecord, nbl::video::SIntendedSubmitInfo& intendedNextSubmit); + /* GeoreferencesImage Protected Functions: */ - + /** * @brief Ensures a GPU-resident georeferenced image exists in the cache, allocating resources if necessary. - * + * * If the specified image ID is not already present in the cache, or if the cached version is incompatible * with the requested parameters (e.g. extent, format, or type), this function allocates GPU memory, * creates the image and its view, to be bound to a descriptor binding in the future. - * + * * If the image already exists and matches the requested parameters, its usage metadata is updated. * In either case, the cache is updated to reflect usage in the current frame. - * + * * This function also handles automatic eviction of old images via an LRU policy when space is limited. - * + * * @param imageID Unique identifier of the image to add or reuse. * @param imageStreamingState Reference to the GeoreferencedImageStreamingState created or returned by `ensureGeoreferencedImageEntry` with same image_id. * @param intendedNextSubmit Submit info object used to track resources pending GPU submission. - * + * * @return true if the image was successfully cached and is ready for use; false if allocation failed. */ bool ensureGeoreferencedImageResources_AllocateIfNeeded(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit); @@ -697,7 +695,7 @@ struct DrawResourcesFiller // Gets resource index to the active linestyle data from the top of stack // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) uint32_t acquireActiveLineStyleIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); - + // Gets resource index to the active linestyle data from the top of stack // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) uint32_t acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); @@ -705,99 +703,99 @@ struct DrawResourcesFiller // Gets resource index to the active projection data from the top of stack // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) uint32_t acquireActiveCustomProjectionIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); - + // Gets resource index to the active clip data from the top of stack // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) uint32_t acquireActiveCustomClipRectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); - + // Gets resource index to the active main object data // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) uint32_t acquireActiveMainObjectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); /// Attempts to add lineStyle to resources. If it fails to do, due to resource limitations, auto-submits and tries again. uint32_t addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit); - + /// Attempts to add dtmSettings to resources. If it fails to do, due to resource limitations, auto-submits and tries again. uint32_t addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); - + /// Attempts to add custom projection to gpu resources. If it fails to do, due to resource limitations, auto-submits and tries again. uint32_t addCustomProjection_SubmitIfNeeded(const float64_t3x3& projection, SIntendedSubmitInfo& intendedNextSubmit); - + /// Attempts to add custom clip to gpu resources. If it fails to do, due to resource limitations, auto-submits and tries again. uint32_t addCustomClipRect_SubmitIfNeeded(const WorldClipRect& clipRect, SIntendedSubmitInfo& intendedNextSubmit); - + /// returns index to added LineStyleInfo, returns Invalid index if it exceeds resource limitations uint32_t addLineStyle_Internal(const LineStyleInfo& lineStyleInfo); - + /// returns index to added DTMSettingsInfo, returns Invalid index if it exceeds resource limitations uint32_t addDTMSettings_Internal(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); - + /** - * @brief Computes the final transformation matrix for fixed geometry rendering, - * considering any active custom projections and the transformation type. - * - * This function handles how a given transformation should be applied depending on the - * current transformation type and the presence of any active projection matrices. - * - * - If no active projection exists, the input transformation is returned unmodified. - * - * - If an active projection exists: - * - For TT_NORMAL, the input transformation is simply multiplied by the top of the projection stack. - * - For TT_FIXED_SCREENSPACE_SIZE, the input transformation is multiplied by the top of the projection stack, - * but the resulting scale is replaced with the screen-space scale from the original input `transformation`. - * - * @param transformation The input 3x3 transformation matrix to apply. - * @param transformationType The type of transformation to apply (e.g., TT_NORMAL or TT_FIXED_SCREENSPACE_SIZE). - * - */ + * @brief Computes the final transformation matrix for fixed geometry rendering, + * considering any active custom projections and the transformation type. + * + * This function handles how a given transformation should be applied depending on the + * current transformation type and the presence of any active projection matrices. + * + * - If no active projection exists, the input transformation is returned unmodified. + * + * - If an active projection exists: + * - For TT_NORMAL, the input transformation is simply multiplied by the top of the projection stack. + * - For TT_FIXED_SCREENSPACE_SIZE, the input transformation is multiplied by the top of the projection stack, + * but the resulting scale is replaced with the screen-space scale from the original input `transformation`. + * + * @param transformation The input 3x3 transformation matrix to apply. + * @param transformationType The type of transformation to apply (e.g., TT_NORMAL or TT_FIXED_SCREENSPACE_SIZE). + * + */ float64_t3x3 getFixedGeometryFinalTransformationMatrix(const float64_t3x3& transformation, TransformationType transformationType) const; /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + /// Attempts to upload as many draw objects as possible within the given polyline connectors considering resource limitations void addPolylineConnectors_Internal(const CPolylineBase& polyline, uint32_t& currentPolylineConnectorObj, uint32_t mainObjIdx); - + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addLines_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addQuadBeziers_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + /// Attempts to upload as many draw objects as possible within the given hatch considering resource limitations void addHatch_Internal(const Hatch& hatch, uint32_t& currentObjectInSection, uint32_t mainObjIndex); - + /// Attempts to upload a single GlyphInfo considering resource limitations bool addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx); - + /// Attempts to upload a single GridDTMInfo considering resource limitations bool addGridDTM_Internal(const GridDTMInfo& gridDTMInfo, uint32_t mainObjIdx); /// Attempts to upload a single image object considering resource limitations (not accounting for the resource image added using ensureStaticImageAvailability function) bool addImageObject_Internal(const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx);; - + /// Attempts to upload a georeferenced image info considering resource limitations (not accounting for the resource image added using ensureStaticImageAvailability function) bool addGeoreferencedImageInfo_Internal(const GeoreferencedImageInfo& georeferencedImageInfo, uint32_t mainObjIdx); - + uint32_t getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit); /** - * @brief Evicts a GPU image and deallocates its associated descriptor and memory, flushing draws if needed. - * - * This function is called when an image must be removed from GPU memory (typically due to VRAM pressure). - * If the evicted image is scheduled to be used in the next draw submission, a flush is performed to avoid - * use-after-free issues. Otherwise, it proceeds with deallocation immediately. - * - * It prepares a cleanup object that ensures the memory range used by the image will be returned to the suballocator - * only after the GPU has finished using it, guarded by a semaphore wait. - * - * @param imageID The unique ID of the image being evicted. - * @param evicted A reference to the evicted image, containing metadata such as allocation offset, size, usage frame, etc. - * @param intendedNextSubmit Reference to the intended submit information. Used for synchronizing draw submission and safe deallocation. - * - * @warning Deallocation may use a conservative semaphore wait value if exact usage information is unavailable. [future todo: fix] - */ + * @brief Evicts a GPU image and deallocates its associated descriptor and memory, flushing draws if needed. + * + * This function is called when an image must be removed from GPU memory (typically due to VRAM pressure). + * If the evicted image is scheduled to be used in the next draw submission, a flush is performed to avoid + * use-after-free issues. Otherwise, it proceeds with deallocation immediately. + * + * It prepares a cleanup object that ensures the memory range used by the image will be returned to the suballocator + * only after the GPU has finished using it, guarded by a semaphore wait. + * + * @param imageID The unique ID of the image being evicted. + * @param evicted A reference to the evicted image, containing metadata such as allocation offset, size, usage frame, etc. + * @param intendedNextSubmit Reference to the intended submit information. Used for synchronizing draw submission and safe deallocation. + * + * @warning Deallocation may use a conservative semaphore wait value if exact usage information is unavailable. [future todo: fix] + */ void evictImage_SubmitIfNeeded(image_id imageID, const CachedImageRecord& evicted, SIntendedSubmitInfo& intendedNextSubmit); - + struct ImageAllocateResults { nbl::core::smart_refctd_ptr gpuImageView = nullptr; @@ -807,32 +805,32 @@ struct DrawResourcesFiller }; /** - * @brief Attempts to create and allocate a GPU image and its view, with fallback eviction on failure. - * - * This function tries to create a GPU image using the specified creation parameters, allocate memory - * from the shared image memory arena, bind it to device-local memory, and create an associated image view. - * If memory allocation fails (e.g. due to VRAM exhaustion), the function will evict textures from the internal - * LRU cache and retry the operation until successful, or until only the currently-inserted image remains. - * - * This is primarily used by the draw resource filler to manage GPU image memory for streamed or cached images. - * - * @param imageParams Creation parameters for the image. Should match `nbl::asset::IImage::SCreationParams`. - * @param imageViewFormatOverride Specifies whether the image view format should differ from the image format. If set to asset::E_FORMAT_ET_COUNT, the image view uses the same format as the image - * @param intendedNextSubmit Reference to the current intended submit info. Used for synchronizing evictions. - * @param imageDebugName Debug name assigned to the image and its view for easier profiling/debugging. - * - * @return ImageAllocateResults A struct containing: - * - `allocationOffset`: Offset into the memory arena (or InvalidAddress on failure). - * - `allocationSize`: Size of the allocated memory region. - * - `gpuImageView`: The created GPU image view (nullptr if creation failed). - */ + * @brief Attempts to create and allocate a GPU image and its view, with fallback eviction on failure. + * + * This function tries to create a GPU image using the specified creation parameters, allocate memory + * from the shared image memory arena, bind it to device-local memory, and create an associated image view. + * If memory allocation fails (e.g. due to VRAM exhaustion), the function will evict textures from the internal + * LRU cache and retry the operation until successful, or until only the currently-inserted image remains. + * + * This is primarily used by the draw resource filler to manage GPU image memory for streamed or cached images. + * + * @param imageParams Creation parameters for the image. Should match `nbl::asset::IImage::SCreationParams`. + * @param imageViewFormatOverride Specifies whether the image view format should differ from the image format. If set to asset::E_FORMAT_ET_COUNT, the image view uses the same format as the image + * @param intendedNextSubmit Reference to the current intended submit info. Used for synchronizing evictions. + * @param imageDebugName Debug name assigned to the image and its view for easier profiling/debugging. + * + * @return ImageAllocateResults A struct containing: + * - `allocationOffset`: Offset into the memory arena (or InvalidAddress on failure). + * - `allocationSize`: Size of the allocated memory region. + * - `gpuImageView`: The created GPU image view (nullptr if creation failed). + */ ImageAllocateResults tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, const asset::E_FORMAT imageViewFormatOverride, nbl::video::SIntendedSubmitInfo& intendedNextSubmit, std::string imageDebugName); /** - * @brief Used to implement both `drawHatch` and `drawFixedGeometryHatch` without exposing the transformation type parameter + * @brief Used to implement both `drawHatch` and `drawFixedGeometryHatch` without exposing the transformation type parameter */ void drawHatch_impl( const Hatch& hatch, @@ -858,7 +856,7 @@ struct DrawResourcesFiller void resetCustomProjections() { resourcesCollection.customProjections.vector.clear(); - + // Invalidate all the clip projection addresses because activeProjections buffer got reset for (auto& addr : activeProjectionIndices) addr = InvalidCustomProjectionIndex; @@ -867,7 +865,7 @@ struct DrawResourcesFiller void resetCustomClipRects() { resourcesCollection.customClipRects.vector.clear(); - + // Invalidate all the clip projection addresses because activeProjections buffer got reset for (auto& addr : activeClipRectIndices) addr = InvalidCustomClipRectIndex; @@ -884,7 +882,7 @@ struct DrawResourcesFiller resourcesCollection.dtmSettings.vector.clear(); activeDTMSettingsIndex = InvalidDTMSettingsIdx; } - + // MSDF Hashing and Caching Internal Functions enum class MSDFType : uint8_t { @@ -911,7 +909,6 @@ struct DrawResourcesFiller { computeBlake3Hash(); } - bool operator==(const MSDFInputInfo& rhs) const { return hash == rhs.hash && glyphIndex == rhs.glyphIndex && type == rhs.type; @@ -925,13 +922,13 @@ struct DrawResourcesFiller HatchFillPattern fillPattern; }; static_assert(sizeof(uint32_t) == sizeof(HatchFillPattern)); - + core::blake3_hash_t faceHash = {}; core::blake3_hash_t hash = {}; // actual hash, we will check in == operator size_t lookupHash = 0ull; // for containers expecting size_t hash private: - + void computeBlake3Hash() { core::blake3_hasher hasher; @@ -945,7 +942,7 @@ struct DrawResourcesFiller }; struct MSDFInputInfoHash { std::size_t operator()(const MSDFInputInfo& info) const { return info.lookupHash; } }; - + struct MSDFReference { uint32_t alloc_idx; @@ -956,11 +953,11 @@ struct DrawResourcesFiller MSDFReference() : MSDFReference(InvalidTextureIndex, ~0ull) {} // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value to MSDFReference without changing `alloc_idx` - inline MSDFReference& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } + inline MSDFReference& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } }; - + uint32_t getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit); - + uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); // Flushes Current Draw Call and adds to drawCalls @@ -985,12 +982,11 @@ struct DrawResourcesFiller nbl::core::smart_refctd_ptr resourcesGPUBuffer; size_t copiedResourcesSize; - // GPUImages Memory Arena + AddressAllocator - IDeviceMemoryAllocator::SAllocation imagesMemoryArena; - smart_refctd_ptr imagesMemorySubAllocator; - // Members - smart_refctd_ptr m_utilities; + smart_refctd_ptr m_device; + core::smart_refctd_ptr m_bufferUploadUtils; + core::smart_refctd_ptr m_imageUploadUtils; + IQueue* m_copyQueue; // Active Resources we need to keep track of and push to resources buffer if needed. @@ -1008,7 +1004,7 @@ struct DrawResourcesFiller // The ClipRects & Projections are stack, because user can push/pop ClipRects & Projections in any order std::deque activeProjections; // stack of projections stored so we can resubmit them if geometry buffer got reset. std::deque activeProjectionIndices; // stack of projection gpu addresses in geometry buffer. to keep track of them in push/pops - + std::deque activeClipRects; // stack of clips stored so we can resubmit them if geometry buffer got reset. std::deque activeClipRectIndices; // stack of clips gpu addresses in geometry buffer. to keep track of them in push/pops @@ -1023,17 +1019,21 @@ struct DrawResourcesFiller std::vector msdfImagesState = {}; // cached cpu imaged + their status, size equals to LRUCache size static constexpr asset::E_FORMAT MSDFTextureFormat = asset::E_FORMAT::EF_R8G8B8A8_SNORM; bool m_hasInitializedMSDFTextureArrays = false; - + // Images: + core::smart_refctd_ptr imageLoader; + // A. Image Cache std::unique_ptr imagesCache; - smart_refctd_ptr suballocatedDescriptorSet; + // B. GPUImages Memory Arena + AddressAllocator + IDeviceMemoryAllocator::SAllocation imagesMemoryArena; + smart_refctd_ptr imagesMemorySubAllocator; + // C. Images Descriptor Set Allocation/Deallocation uint32_t imagesArrayBinding = 0u; - // Georef - pushed here rn for simplicity - core::smart_refctd_ptr imageLoader; - + smart_refctd_ptr imagesDescriptorIndexAllocator; + // Tracks descriptor array indices that have been logically deallocated independant of the `imagesDescriptorSetAllocator` but may still be in use by the GPU. + // Notes: If `imagesDescriptorIndexAllocator` could give us functionality to force allocate and exact index, that would allow us to replay the cache perfectly + // remove the variable below and only rely on the `imagesDescriptorIndexAllocator` to synchronize accesses to descriptor sets for us. but unfortuantely it doesn't have that functionality yet. + std::unordered_map deferredDescriptorIndexDeallocations; + // D. Queued Up Copies/Futures for Streamed Images std::unordered_map> streamedImageCopies; - - // Viewport state - uint32_t2 currentViewportExtents = {}; - float64_t3x3 ndcToWorldTransformationMatrix = {}; }; \ No newline at end of file diff --git a/62_CAD/GeoTexture.cpp b/62_CAD/GeoTexture.cpp deleted file mode 100644 index de8a974d0..000000000 --- a/62_CAD/GeoTexture.cpp +++ /dev/null @@ -1,117 +0,0 @@ -#include "GeoTexture.h" - -bool GeoTextureRenderer::initialize( - IShader* vertexShader, - IShader* fragmentShader, - IGPURenderpass* compatibleRenderPass, - const smart_refctd_ptr& globalsBuffer) -{ - video::IGPUDescriptorSetLayout::SBinding bindingsSet0[] = { - { - .binding = 0u, - .type = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - }; - m_descriptorSetLayout0 = m_device->createDescriptorSetLayout(bindingsSet0); - if (!m_descriptorSetLayout0) - return logFail("Failed to Create Descriptor Layout 0"); - - video::IGPUDescriptorSetLayout::SBinding bindingsSet1[] = { - { - .binding = 0u, - .type = asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - { - .binding = 1u, - .type = asset::IDescriptor::E_TYPE::ET_SAMPLER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - }; - m_descriptorSetLayout1 = m_device->createDescriptorSetLayout(bindingsSet1); - if (!m_descriptorSetLayout1) - return logFail("Failed to Create Descriptor Layout 1"); - - const video::IGPUDescriptorSetLayout* const layouts[2u] = { m_descriptorSetLayout0.get(), m_descriptorSetLayout1.get() }; - - { - const uint32_t setCounts[2u] = { 1u, MaxGeoTextures}; - m_descriptorPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); - if (!m_descriptorPool) - return logFail("Failed to Create Descriptor Pool"); - } - - - asset::SPushConstantRange pushConstantRanges[1u] = - { - {.stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, .offset = 0ull, .size = sizeof(GeoTextureOBB)}, - }; - m_pipelineLayout = m_device->createPipelineLayout(pushConstantRanges, core::smart_refctd_ptr(m_descriptorSetLayout0), core::smart_refctd_ptr(m_descriptorSetLayout1), nullptr, nullptr); - - // Set 0 Create and Bind - m_descriptorSet0 = m_descriptorPool->createDescriptorSet(smart_refctd_ptr(m_descriptorSetLayout0)); - constexpr uint32_t DescriptorCountSet0 = 1u; - IGPUDescriptorSet::SDescriptorInfo descriptorInfosSet0[DescriptorCountSet0] = {}; - - descriptorInfosSet0[0u].info.buffer.offset = 0u; - descriptorInfosSet0[0u].info.buffer.size = globalsBuffer->getCreationParams().size; - descriptorInfosSet0[0u].desc = globalsBuffer; - - constexpr uint32_t DescriptorUpdatesCount = DescriptorCountSet0; - video::IGPUDescriptorSet::SWriteDescriptorSet descriptorUpdates[DescriptorUpdatesCount] = {}; - - descriptorUpdates[0u].dstSet = m_descriptorSet0.get(); - descriptorUpdates[0u].binding = 0u; - descriptorUpdates[0u].arrayElement = 0u; - descriptorUpdates[0u].count = 1u; - descriptorUpdates[0u].info = &descriptorInfosSet0[0u]; - m_device->updateDescriptorSets(DescriptorUpdatesCount, descriptorUpdates, 0u, nullptr); - - // Shared Blend Params between pipelines - //TODO: Where does GeoTexture rendering fit into pipelines, separate renderpass? separate submit? under blending? over blending? - SBlendParams blendParams = {}; - blendParams.blendParams[0u].srcColorFactor = asset::EBF_SRC_ALPHA; - blendParams.blendParams[0u].dstColorFactor = asset::EBF_ONE_MINUS_SRC_ALPHA; - blendParams.blendParams[0u].colorBlendOp = asset::EBO_ADD; - blendParams.blendParams[0u].srcAlphaFactor = asset::EBF_ONE; - blendParams.blendParams[0u].dstAlphaFactor = asset::EBF_ZERO; - blendParams.blendParams[0u].alphaBlendOp = asset::EBO_ADD; - blendParams.blendParams[0u].colorWriteMask = (1u << 4u) - 1u; - - // Create Main Graphics Pipelines - { - video::IGPUPipelineBase::SShaderSpecInfo specInfo[2] = { - {.shader=vertexShader }, - {.shader=fragmentShader }, - }; - - IGPUGraphicsPipeline::SCreationParams params[1] = {}; - params[0].layout = m_pipelineLayout.get(); - params[0].vertexShader = specInfo[0]; - params[0].fragmentShader = specInfo[1]; - params[0].cached = { - .vertexInput = {}, - .primitiveAssembly = { - .primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST, - }, - .rasterization = { - .polygonMode = EPM_FILL, - .faceCullingMode = EFCM_NONE, - .depthWriteEnable = false, - }, - .blend = blendParams, - }; - params[0].renderpass = compatibleRenderPass; - - if (!m_device->createGraphicsPipelines(nullptr,params,&m_graphicsPipeline)) - return logFail("Graphics Pipeline Creation Failed."); - } - -} diff --git a/62_CAD/GeoTexture.h b/62_CAD/GeoTexture.h deleted file mode 100644 index f471009fc..000000000 --- a/62_CAD/GeoTexture.h +++ /dev/null @@ -1,64 +0,0 @@ -#pragma once - -using namespace nbl::hlsl; -#include "shaders/geotexture/common.hlsl" - -using namespace nbl; -using namespace core; -using namespace system; -using namespace asset; -using namespace ui; -using namespace video; - -class GeoTexture : public nbl::core::IReferenceCounted -{ - GeoTextureOBB obbInfo = {}; - smart_refctd_ptr descriptorSet; // or index allocated in main geo texture renderer - smart_refctd_ptr texture; -}; - -class GeoTextureRenderer -{ -public: - static constexpr const char* VertexShaderRelativePath = "../shaders/geotexture/vertex_shader.hlsl"; - static constexpr const char* FragmentShaderRelativePath = "../shaders/geotexture/fragment_shader.hlsl"; - - GeoTextureRenderer(smart_refctd_ptr&& device, smart_refctd_ptr&& logger) - : m_device(device) - , m_logger(logger) - {} - - bool initialize( - IShader* vertexShader, - IShader* fragmentShader, - IGPURenderpass* compatibleRenderPass, - const smart_refctd_ptr& globalsBuffer); - - void createGeoTexture(const nbl::system::path& geoTexturePath); // + OBB Info (center, rotation, aspect ratio from image?) - - void bindPipeline(video::IGPUCommandBuffer* commandBuffer); - - void drawGeoTexture(const GeoTexture* geoTexture, video::IGPUCommandBuffer* commandBuffer); - -private: - - // made it return false so we can save some lines writing `if (failCond) {logFail(); return false;}` - template - inline bool logFail(const char* msg, Args&&... args) - { - m_logger->log(msg,system::ILogger::ELL_ERROR,std::forward(args)...); - return false; - } - -private: - smart_refctd_ptr m_device; - smart_refctd_ptr m_logger; - - smart_refctd_ptr m_pipelineLayout; - smart_refctd_ptr m_graphicsPipeline; - smart_refctd_ptr m_sampler; - smart_refctd_ptr m_descriptorPool; - smart_refctd_ptr m_descriptorSetLayout0; // globals - smart_refctd_ptr m_descriptorSet0; - smart_refctd_ptr m_descriptorSetLayout1; // contains geo texture -}; diff --git a/62_CAD/Images.cpp b/62_CAD/Images.cpp index 0570338fe..f29ba1f61 100644 --- a/62_CAD/Images.cpp +++ b/62_CAD/Images.cpp @@ -65,7 +65,7 @@ void GeoreferencedImageStreamingState::updateStreamingStateForViewport(const uin currentViewportTileRange = computeViewportTileRange(viewportExtent, ndcToWorldMat); // Slide or remap the current mapped region to ensure the viewport falls inside it ensureMappedRegionCoversViewport(currentViewportTileRange); - + const uint32_t2 lastTileIndex = getLastTileIndex(currentViewportTileRange.baseMipLevel); const uint32_t2 lastTileSampligOffsetMip0 = (lastTileIndex * GeoreferencedImageTileSize) << currentViewportTileRange.baseMipLevel; lastTileSamplingExtent = fullResImageExtents - lastTileSampligOffsetMip0; @@ -248,8 +248,6 @@ void GeoreferencedImageStreamingState::remapCurrentRegion(const GeoreferencedIma // TODO: Here we would move some mip 0 tiles to mip 1 image to save the work of reuploading them, reflect that in the tracked tiles } currentMappedRegionTileRange = viewportTileRange; - // Roughly center the viewport in the mapped region - currentMappedRegionTileRange.topLeftTile = nbl::hlsl::max(uint32_t2(0, 0), currentMappedRegionTileRange.topLeftTile - (gpuImageSideLengthTiles / 2)); // We can expand the currentMappedRegionTileRange to make it as big as possible, at no extra cost since we only upload tiles on demand // Since we use toroidal updating it's kinda the same which way we expand the region. We first try to make the extent be `gpuImageSideLengthTiles` currentMappedRegionTileRange.bottomRightTile = currentMappedRegionTileRange.topLeftTile + uint32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) - uint32_t2(1, 1); @@ -343,4 +341,56 @@ void GeoreferencedImageStreamingState::slideCurrentRegion(const GeoreferencedIma // Toroidal shift for the gpu image top left gpuImageTopLeft = (gpuImageTopLeft + uint32_t2(topLeftShift + bottomRightShift + int32_t(gpuImageSideLengthTiles))) % gpuImageSideLengthTiles; +} + +std::string CachedImageRecord::toString(uint64_t imageID) const +{ + auto stringifyImageState = [](ImageState state) -> std::string { + switch (state) + { + case ImageState::INVALID: return "INVALID"; + case ImageState::CREATED_AND_MEMORY_BOUND: return "CREATED_AND_MEMORY_BOUND"; + case ImageState::BOUND_TO_DESCRIPTOR_SET: return "BOUND_TO_DESCRIPTOR_SET"; + case ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA: return "GPU_RESIDENT_WITH_VALID_STATIC_DATA"; + default: return "UNKNOWN_STATE"; + } + }; + + auto stringifyImageType = [](ImageType type) -> std::string { + switch (type) + { + case ImageType::INVALID: return "INVALID"; + case ImageType::STATIC: return "STATIC"; + case ImageType::GEOREFERENCED_STREAMED: return "GEOREFERENCED_STREAMED"; + default: return "UNKNOWN_TYPE"; + } + }; + + std::string result; + if (imageID != std::numeric_limits::max()) + result += std::format(" ImageID: {}\n", imageID); + + result += std::format( + " Type: {}\n" + " State: {}\n" + " Array Index: {}\n" + " Allocation Offset: {}\n" + " Allocation Size: {}\n" + " Current Layout: {}\n" + " Last Used Frame Index: {}\n" + " GPU ImageView: {}\n" + " CPU Image: {}\n" + " Georeferenced Image State: {}\n", + stringifyImageType(type), + stringifyImageState(state), + arrayIndex, + allocationOffset, + allocationSize, + static_cast(currentLayout), + lastUsedFrameIndex, + gpuImageView ? "VALID" : "NULL", + staticCPUImage ? "VALID" : "NULL", + georeferencedImageState ? "VALID" : "NULL" + ); + return result; } \ No newline at end of file diff --git a/62_CAD/Images.h b/62_CAD/Images.h index e727ed46a..d397141d1 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -23,8 +23,8 @@ enum class ImageState : uint8_t { INVALID = 0, CREATED_AND_MEMORY_BOUND, // GPU image created, not bound to descriptor set yet - BOUND_TO_DESCRIPTOR_SET, // Bound to descriptor set, GPU resident, but may contain uninitialized or partial data - GPU_RESIDENT_WITH_VALID_STATIC_DATA, // When data for static images gets issued for upload successfully + GPU_RESIDENT_WITH_VALID_STATIC_DATA, // When data for static images gets issued for upload successfully, may not be bound to it's descriptor binding array index yet + BOUND_TO_DESCRIPTOR_SET, // Bound to descriptor set, GPU resident }; enum class ImageType : uint8_t @@ -35,15 +35,15 @@ enum class ImageType : uint8_t }; /** -* @class ImagesMemorySubAllocator -* @brief A memory sub-allocator designed for managing sub-allocations within a pre-allocated GPU memory arena for images. -* -* This class wraps around `nbl::core::GeneralpurposeAddressAllocator` to provide offset-based memory allocation -* for image resources within a contiguous block of GPU memory. -* -* @note This class only manages address offsets. The actual memory must be bound separately. -*/ -class ImagesMemorySubAllocator : public core::IReferenceCounted + * @class ImagesMemorySubAllocator + * @brief A memory sub-allocator designed for managing sub-allocations within a pre-allocated GPU memory arena for images. + * + * This class wraps around `nbl::core::GeneralpurposeAddressAllocator` to provide offset-based memory allocation + * for image resources within a contiguous block of GPU memory. + * + * @note This class only manages address offsets. The actual memory must be bound separately. + */ +class ImagesMemorySubAllocator : public core::IReferenceCounted { public: using AddressAllocator = nbl::core::GeneralpurposeAddressAllocator; @@ -72,7 +72,7 @@ class ImagesMemorySubAllocator : public core::IReferenceCounted { m_addressAllocator->free_addr(addr, size); } - + uint64_t getFreeSize() const { return m_addressAllocator->get_free_size(); @@ -83,7 +83,7 @@ class ImagesMemorySubAllocator : public core::IReferenceCounted if (m_reservedAlloc) m_reservedAllocator->deallocate(reinterpret_cast(m_reservedAlloc), m_reservedAllocSize); } - + private: std::unique_ptr m_addressAllocator = nullptr; @@ -120,35 +120,34 @@ struct GeoreferencedImageTileRange struct GeoreferencedImageStreamingState : public IReferenceCounted { public: - + GeoreferencedImageStreamingState() - { - } + { } //! Creates a new streaming state for a georeferenced image /* - Initializes CPU-side state for image streaming. - Sets up world-to-UV transform, computes mip hierarchy parameters, - and stores metadata about the image. - - @param worldspaceOBB Oriented bounding box of the image in world space - @param fullResImageExtents Full resolution image size in pixels (width, height) - @param format Pixel format of the image - @param storagePath Filesystem path for image tiles + Initializes CPU-side state for image streaming. + Sets up world-to-UV transform, computes mip hierarchy parameters, + and stores metadata about the image. + + @param worldspaceOBB Oriented bounding box of the image in world space + @param fullResImageExtents Full resolution image size in pixels (width, height) + @param format Pixel format of the image + @param storagePath Filesystem path for image tiles */ bool init(const OrientedBoundingBox2D& worldSpaceOBB, const uint32_t2 fullResImageExtents, const asset::E_FORMAT format, const std::filesystem::path& storagePath); /** - * @brief Update the mapped region to cover the current viewport. - * - * Computes the required tile range from the viewport and updates - * `currentMappedRegion` by remapping or sliding as needed. - * - * @param currentViewportExtents Viewport size in pixels. - * @param ndcToWorldMat NDC to world space mattix. - * - * @see tilesToLoad - */ + * @brief Update the mapped region to cover the current viewport. + * + * Computes the required tile range from the viewport and updates + * `currentMappedRegion` by remapping or sliding as needed. + * + * @param currentViewportExtents Viewport size in pixels. + * @param ndcToWorldMat NDC to world space mattix. + * + * @see tilesToLoad + */ void updateStreamingStateForViewport(const uint32_t2 viewportExtent, const float64_t3x3& ndcToWorldMat); // @brief Info to match a gpu tile to the tile in the real image it should hold image data for @@ -159,10 +158,10 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted }; /* - * @brief Get the tiles required for rendering the current viewport. - * Uses the region set by `updateStreamingStateForViewport()` to return - * which image tiles need loading and their target GPU tile indices. - */ + * @brief Get the tiles required for rendering the current viewport. + * Uses the region set by `updateStreamingStateForViewport()` to return + * which image tiles need loading and their target GPU tile indices. + */ core::vector tilesToLoad() const; // @brief Returns the index of the last tile when covering the image with `mipLevel` tiles @@ -195,6 +194,8 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted */ GeoreferencedImageInfo computeGeoreferencedImageAddressingAndPositioningInfo(); + bool isOutOfDate() const { return outOfDate; } + private: // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial @@ -285,16 +286,19 @@ struct GeoreferencedImageStreamingState : public IReferenceCounted // Tile range covering only the tiles currently visible in the viewport GeoreferencedImageTileRange currentViewportTileRange = { .baseMipLevel = std::numeric_limits::max() }; // Extents used for sampling the last tile (handles partial tiles / NPOT images); gets updated with `updateStreamingStateForViewport` - uint32_t2 lastTileSamplingExtent; + uint32_t2 lastTileSamplingExtent; // Extents used when writing/updating the last tile in GPU memory (handles partial tiles / NPOT images); gets updated with `updateStreamingStateForViewport` uint32_t2 lastTileTargetExtent; + // We set this to true when image is evicted from cache, hinting at other places holding a smart_refctd_ptr to this objet that the GeoreferencedImageStreamingState isn't valid anymore and needs recreation/update + bool outOfDate = false; }; struct CachedImageRecord { static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; - + uint32_t arrayIndex = InvalidTextureIndex; // index in our array of textures binding + bool arrayIndexAllocatedUsingImageDescriptorIndexAllocator; // whether the index of this cache entry was allocated using suballocated descriptor set which ensures correct synchronized access to a set index. (if not extra synchro is needed) ImageType type = ImageType::INVALID; ImageState state = ImageState::INVALID; nbl::asset::IImage::LAYOUT currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; @@ -304,10 +308,11 @@ struct CachedImageRecord core::smart_refctd_ptr gpuImageView = nullptr; core::smart_refctd_ptr staticCPUImage = nullptr; // cached cpu image for uploading to gpuImageView when needed. core::smart_refctd_ptr georeferencedImageState = nullptr; // Used to track tile residency for georeferenced images - + // In LRU Cache `insert` function, in case of cache miss, we need to construct the refereence with semaphore value - CachedImageRecord(uint64_t currentFrameIndex) + CachedImageRecord(uint64_t currentFrameIndex) : arrayIndex(InvalidTextureIndex) + , arrayIndexAllocatedUsingImageDescriptorIndexAllocator(false) , type(ImageType::INVALID) , state(ImageState::INVALID) , lastUsedFrameIndex(currentFrameIndex) @@ -315,16 +320,16 @@ struct CachedImageRecord , allocationSize(0ull) , gpuImageView(nullptr) , staticCPUImage(nullptr) - { - } - - CachedImageRecord() + {} + + CachedImageRecord() : CachedImageRecord(0ull) - { - } + {} + + std::string toString(uint64_t imageID = std::numeric_limits::max()) const; // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value without changing `index` - inline CachedImageRecord& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } + inline CachedImageRecord& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } }; // A resource-aware image cache with an LRU eviction policy. @@ -337,27 +342,26 @@ class ImagesCache : public core::ResizableLRUCache { public: using base_t = core::ResizableLRUCache; - - ImagesCache(size_t capacity) + + ImagesCache(size_t capacity) : base_t(capacity) - { - } + {} // Attempts to insert a new image into the cache. // If the cache is full, invokes the provided `evictCallback` to evict an image. // Returns a pointer to the inserted or existing ImageReference. - template EvictionCallback> + template EvictionCallback> inline CachedImageRecord* insert(image_id imageID, uint64_t lastUsedSema, EvictionCallback&& evictCallback) { return base_t::insert(imageID, lastUsedSema, evictCallback); } - + // Retrieves the image associated with `imageID`, updating its LRU position. inline CachedImageRecord* get(image_id imageID) { return base_t::get(imageID); } - + // Retrieves the ImageReference without updating LRU order. inline CachedImageRecord* peek(image_id imageID) { @@ -365,10 +369,10 @@ class ImagesCache : public core::ResizableLRUCache } inline size_t size() const { return base_t::size(); } - + // Selects an eviction candidate based on LRU policy. // In the future, this could factor in memory pressure or semaphore sync requirements. - inline image_id select_eviction_candidate() + inline image_id select_eviction_candidate() { const image_id* lru = base_t::get_least_recently_used(); if (lru) @@ -380,6 +384,16 @@ class ImagesCache : public core::ResizableLRUCache return ~0ull; } } + + inline void logState(nbl::system::logger_opt_smart_ptr logger) + { + logger.log("=== Image Cache Status ===", nbl::system::ILogger::ELL_INFO); + for (const auto& [imageID, record] : *this) + { + logger.log(("\n" + record.toString(imageID)).c_str(), nbl::system::ILogger::ELL_INFO); + } + logger.log("=== End of Image Cache ===", nbl::system::ILogger::ELL_INFO); + } // Removes a specific image from the cache (manual eviction). inline void erase(image_id imageID) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 0e3060e9d..fd3d99aa2 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -32,7 +32,6 @@ using namespace video; #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "HatchGlyphBuilder.h" -#include "GeoTexture.h" #include @@ -86,7 +85,7 @@ constexpr std::array cameraExtents = 10.0 // CASE_12 }; -constexpr ExampleMode mode = ExampleMode::CASE_12; +constexpr ExampleMode mode = ExampleMode::CASE_5; class Camera2D { @@ -567,11 +566,22 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio void allocateResources() { - drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger)); + // TODO: currently using the same utils for buffers and images, make them separate staging buffers + drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_device), core::smart_refctd_ptr(m_utils), core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger)); + + // Just wanting to try memory type indices with device local flag, TODO: later improve to prioritize pure device local + std::vector deviceLocalMemoryTypeIndices; + for (uint32_t i = 0u; i < m_physicalDevice->getMemoryProperties().memoryTypeCount; ++i) + { + const auto& memType = m_physicalDevice->getMemoryProperties().memoryTypes[i]; + if (memType.propertyFlags.hasFlags(IDeviceMemoryAllocation::EMPF_DEVICE_LOCAL_BIT)) + deviceLocalMemoryTypeIndices.push_back(i); + } size_t maxImagesMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB size_t maxBufferMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB - drawResourcesFiller.allocateDrawResourcesWithinAvailableVRAM(m_device.get(), maxImagesMemSize, maxBufferMemSize); + + drawResourcesFiller.allocateDrawResourcesWithinAvailableVRAM(m_device.get(), maxImagesMemSize, maxBufferMemSize, deviceLocalMemoryTypeIndices); drawResourcesFiller.allocateMSDFTextures(m_device.get(), 256u, uint32_t2(MSDFSize, MSDFSize)); { @@ -1201,16 +1211,27 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } } + // Shared Blend Params between pipelines - SBlendParams blendParams = {}; - blendParams.blendParams[0u].srcColorFactor = asset::EBF_SRC_ALPHA; - blendParams.blendParams[0u].dstColorFactor = asset::EBF_ONE_MINUS_SRC_ALPHA; - blendParams.blendParams[0u].colorBlendOp = asset::EBO_ADD; - blendParams.blendParams[0u].srcAlphaFactor = asset::EBF_ONE; - blendParams.blendParams[0u].dstAlphaFactor = asset::EBF_ZERO; - blendParams.blendParams[0u].alphaBlendOp = asset::EBO_ADD; - blendParams.blendParams[0u].colorWriteMask = (1u << 4u) - 1u; - + // Premultiplied over-blend (back-to-front) + SBlendParams premultipliedOverBlendParams = {}; + premultipliedOverBlendParams.blendParams[0u].srcColorFactor = asset::EBF_ONE; + premultipliedOverBlendParams.blendParams[0u].dstColorFactor = asset::EBF_ONE_MINUS_SRC_ALPHA; + premultipliedOverBlendParams.blendParams[0u].colorBlendOp = asset::EBO_ADD; + premultipliedOverBlendParams.blendParams[0u].srcAlphaFactor = asset::EBF_ONE; + premultipliedOverBlendParams.blendParams[0u].dstAlphaFactor = asset::EBF_ONE_MINUS_SRC_ALPHA; + premultipliedOverBlendParams.blendParams[0u].alphaBlendOp = asset::EBO_ADD; + premultipliedOverBlendParams.blendParams[0u].colorWriteMask = (1u << 4u) - 1u; + // Premultiplied UNDER-blend (front-to-back) + SBlendParams premultipliedUnderBlendParams = {}; + premultipliedUnderBlendParams.blendParams[0u].srcColorFactor = asset::EBF_ONE_MINUS_DST_ALPHA; + premultipliedUnderBlendParams.blendParams[0u].dstColorFactor = asset::EBF_ONE; + premultipliedUnderBlendParams.blendParams[0u].colorBlendOp = asset::EBO_ADD; + premultipliedUnderBlendParams.blendParams[0u].srcAlphaFactor = asset::EBF_ONE; + premultipliedUnderBlendParams.blendParams[0u].dstAlphaFactor = asset::EBF_ONE_MINUS_SRC_ALPHA; + premultipliedUnderBlendParams.blendParams[0u].alphaBlendOp = asset::EBO_ADD; + premultipliedUnderBlendParams.blendParams[0u].colorWriteMask = (1u << 4u) - 1u; + // Create Alpha Resovle Pipeline { // Load FSTri Shader @@ -1218,7 +1239,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const video::IGPUPipelineBase::SShaderSpecInfo fragSpec = { .shader = mainPipelineFragmentShaders.get(), .entryPoint = "resolveAlphaMain" }; - resolveAlphaGraphicsPipeline = fsTriangleProtoPipe.createPipeline(fragSpec, pipelineLayout.get(), compatibleRenderPass.get(), 0u, blendParams); + resolveAlphaGraphicsPipeline = fsTriangleProtoPipe.createPipeline(fragSpec, pipelineLayout.get(), compatibleRenderPass.get(), 0u, premultipliedOverBlendParams); if (!resolveAlphaGraphicsPipeline) return logFail("Graphics Pipeline Creation Failed."); @@ -1252,7 +1273,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio .faceCullingMode = EFCM_NONE, .depthWriteEnable = false, }, - .blend = blendParams, + .blend = premultipliedOverBlendParams, }; params[0].renderpass = compatibleRenderPass.get(); @@ -1309,9 +1330,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } ); - m_geoTextureRenderer = std::unique_ptr(new GeoTextureRenderer(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger))); - // m_geoTextureRenderer->initialize(geoTexturePipelineShaders[0].get(), geoTexturePipelineShaders[1].get(), compatibleRenderPass.get(), m_globalsBuffer); - // Create the Semaphores m_renderSemaphore = m_device->createSemaphore(0ull); m_renderSemaphore->setObjectDebugName("m_renderSemaphore"); @@ -3899,14 +3917,11 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const static auto startingViewportWidthVector = nbl::hlsl::mul(inverseViewProj, topRightViewportH - topLeftViewportH); const static auto dirU = startingViewportWidthVector * float64_t(drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath).x) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); - // Unnecessary but should go into a callback if window can change dimensions during execution - drawResourcesFiller.updateViewportInfo(uint32_t2(m_window->getWidth(), m_window->getHeight()), inverseViewProj); - const static auto startingTopLeft = nbl::hlsl::mul(inverseViewProj, topLeftViewportH); const uint32_t2 imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath); OrientedBoundingBox2D georefImageBB = { .topLeft = startingTopLeft, .dirU = dirU, .aspectRatio = float32_t(imageExtents.y) / imageExtents.x }; - auto streamingState = drawResourcesFiller.ensureGeoreferencedImageEntry(georefImageID, georefImageBB, georeferencedImagePath); + auto streamingState = drawResourcesFiller.ensureGeoreferencedImageEntry(georefImageID, georefImageBB, uint32_t2(m_window->getWidth(), m_window->getHeight()), inverseViewProj, georeferencedImagePath); constexpr static WorldClipRect invalidClipRect = { .minClip = float64_t2(std::numeric_limits::signaling_NaN()) }; drawResourcesFiller.launchGeoreferencedImageTileLoads(georefImageID, streamingState.get(), invalidClipRect); @@ -4012,8 +4027,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const std::chrono::steady_clock::time_point startBenchmark = std::chrono::high_resolution_clock::now(); bool stopBenchamrkFlag = false; #endif - - std::unique_ptr m_geoTextureRenderer; }; NBL_MAIN_FUNC(ComputerAidedDesign) diff --git a/62_CAD/shaders/geotexture/common.hlsl b/62_CAD/shaders/geotexture/common.hlsl deleted file mode 100644 index 691cd3d3b..000000000 --- a/62_CAD/shaders/geotexture/common.hlsl +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef _CAD_EXAMPLE_GEOTEXTURE_COMMON_HLSL_INCLUDED_ -#define _CAD_EXAMPLE_GEOTEXTURE_COMMON_HLSL_INCLUDED_ - -#include "../globals.hlsl" - -// Handle multiple geo textures, separate set, array of texture? index allocator? or multiple sets? -NBL_CONSTEXPR uint32_t MaxGeoTextures = 256; - -// GeoTexture Oriented Bounding Box -struct GeoTextureOBB -{ - pfloat64_t2 topLeft; // 2 * 8 = 16 bytes - float32_t2 dirU; // 2 * 4 = 8 bytes (24) - float32_t aspectRatio; // 4 bytes (32) -}; - -#ifdef __HLSL_VERSION -struct PSInput -{ - float4 position : SV_Position; - [[vk::location(0)]] float2 uv : COLOR0; -}; - -// Push Constant -[[vk::push_constant]] GeoTextureOBB geoTextureOBB; - -// Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated -// [[vk::binding(0, 0)]] ConstantBuffer globals; ---> moved to globals.hlsl - -// Set 1 - Window dependant data which has higher update frequency due to multiple windows and resize need image recreation and descriptor writes -[[vk::binding(0, 1)]] Texture2D geoTexture : register(t0); -[[vk::binding(1, 1)]] SamplerState geoTextureSampler : register(s0); -#endif - -#endif \ No newline at end of file diff --git a/62_CAD/shaders/geotexture/fragment_shader.hlsl b/62_CAD/shaders/geotexture/fragment_shader.hlsl deleted file mode 100644 index a7022cf03..000000000 --- a/62_CAD/shaders/geotexture/fragment_shader.hlsl +++ /dev/null @@ -1,9 +0,0 @@ -#pragma shader_stage(fragment) - -#include "common.hlsl" - -float4 main(PSInput input) : SV_TARGET -{ - const float2 uv = input.uv; - return geoTexture.Sample(geoTextureSampler, uv); -} \ No newline at end of file diff --git a/62_CAD/shaders/geotexture/vertex_shader.hlsl b/62_CAD/shaders/geotexture/vertex_shader.hlsl deleted file mode 100644 index e442b40fc..000000000 --- a/62_CAD/shaders/geotexture/vertex_shader.hlsl +++ /dev/null @@ -1,25 +0,0 @@ -#pragma shader_stage(vertex) - -#include "common.hlsl" - -PSInput main(uint vertexID : SV_VertexID) -{ - const uint vertexIdx = vertexID & 0x3u; - - PSInput outV; - ClipProjectionData clipProjectionData = globals.defaultClipProjection; - outV.position.z = 0.0; - - const float32_t2 dirV = float32_t2(geoTextureOBB.dirU.y, -geoTextureOBB.dirU.x) * geoTextureOBB.aspectRatio; - const float2 screenTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, geoTextureOBB.topLeft)); - const float2 screenDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(geoTextureOBB.dirU))); - const float2 screenDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); - - const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); // corners of square from (0, 0) to (1, 1) - - const float2 coord = screenTopLeft + corner.x * screenDirU + corner.y * screenDirV; - outV.position.xy = coord; - outV.uv = corner; - - return outV; -} diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 5a1bb0d2f..11122b4cb 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -43,6 +43,7 @@ struct PushConstants uint32_t isDTMRendering; }; +// Please note minClip.y > maxClip.y --> TODO[Erfan]: fix later, because I get confused everytime dealing with min/max clip stuff struct WorldClipRect { pfloat64_t2 minClip; // min clip of a rect in worldspace coordinates of the original space (globals.defaultProjectionToNDC) @@ -446,7 +447,7 @@ struct DTMHeightShadingSettings // height-color map float intervalLength; - float intervalIndexToHeightMultiplier; + float intervalIndexToHeightMultiplier; int isCenteredShading; uint32_t heightColorEntryCount; @@ -582,6 +583,18 @@ struct OrientedBoundingBox2D pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) float32_t2 dirU; // 2 * 4 = 8 bytes (24) float32_t aspectRatio; // 4 bytes (28) + +#ifndef __HLSL_VERSION + void transform(pfloat64_t3x3 transformation) + { + // We want to do tile streaming and clipping calculations in the same space; hence, we transform the obb (defined in local DWG or symbol) space to worldspace, and we use ndcToWorldTransformation + worldToUV to calculate which tiles are visible in current view) + const pfloat64_t2 prevDirV = pfloat64_t2(dirU.y, -dirU.x) * pfloat64_t(aspectRatio); + topLeft = nbl::hlsl::mul(transformation, pfloat64_t3(topLeft, 1)); + dirU = nbl::hlsl::mul(transformation, pfloat64_t3(dirU, 0)); + const pfloat64_t2 newDirV = nbl::hlsl::mul(transformation, pfloat64_t3(prevDirV, 0)); + aspectRatio = nbl::hlsl::length(newDirV) / nbl::hlsl::length(dirU); // TODO: maybe we could compute new transformed aspect ratio "smarter" + } +#endif }; #ifdef __HLSL_VERSION diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 1783cb145..7e9edab4d 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -82,15 +82,18 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, co template<> float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) { + float32_t4 color; uint32_t styleIdx = loadMainObject(currentMainObjectIdx).styleIdx; if (!colorFromTexture) { - float32_t4 col = loadLineStyle(styleIdx).color; - col.w *= localAlpha; - return float4(col); + color = loadLineStyle(styleIdx).color; + color.w *= localAlpha; } else - return float4(localTextureColor, localAlpha); + color = float4(localTextureColor, localAlpha); + + color.rgb *= color.a; + return color; } template<> float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) @@ -141,6 +144,7 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp color.a *= float(storedQuantizedAlpha) / 255.f; + color.rgb *= color.a; return color; } @@ -695,7 +699,6 @@ float4 fragMain(PSInput input) : SV_TARGET } } - if (localAlpha <= 0) discard; From 3820eb17c371626f189b242f1b0bdb36abb7ee85 Mon Sep 17 00:00:00 2001 From: Erfan Ahmadi Date: Tue, 11 Nov 2025 10:33:50 +0400 Subject: [PATCH 29/29] Small MonoDeviceApp fix and update examples --- common/include/nbl/examples/common/MonoWindowApplication.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/common/include/nbl/examples/common/MonoWindowApplication.hpp b/common/include/nbl/examples/common/MonoWindowApplication.hpp index 0f18012c0..881519c64 100644 --- a/common/include/nbl/examples/common/MonoWindowApplication.hpp +++ b/common/include/nbl/examples/common/MonoWindowApplication.hpp @@ -150,7 +150,8 @@ class MonoWindowApplication : public virtual SimpleWindowedApplication virtual inline bool onAppTerminated() { m_inputSystem = nullptr; - m_device->waitIdle(); + if (m_device) + m_device->waitIdle(); m_framesInFlight.clear(); m_surface = nullptr; m_window = nullptr;