diff --git a/62_CAD/CMakeLists.txt b/62_CAD/CMakeLists.txt index c3a0fa47e..bde4ade92 100644 --- a/62_CAD/CMakeLists.txt +++ b/62_CAD/CMakeLists.txt @@ -14,8 +14,7 @@ set(EXAMPLE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/DrawResourcesFiller.h" "${CMAKE_CURRENT_SOURCE_DIR}/SingleLineText.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/SingleLineText.h" - "${CMAKE_CURRENT_SOURCE_DIR}/GeoTexture.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/GeoTexture.h" + "${CMAKE_CURRENT_SOURCE_DIR}/Images.cpp" "../../src/nbl/ext/TextRendering/TextRendering.cpp" # TODO: this one will be a part of dedicated Nabla ext called "TextRendering" later on which uses MSDF + Freetype ) set(EXAMPLE_INCLUDES diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index ec5058232..97ae6621b 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -1,10 +1,14 @@ #include "DrawResourcesFiller.h" +using namespace nbl; + DrawResourcesFiller::DrawResourcesFiller() {} -DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue, core::smart_refctd_ptr&& logger) : - m_utilities(std::move(utils)), +DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& device, smart_refctd_ptr&& bufferUploadUtils, smart_refctd_ptr&& imageUploadUtils, IQueue* copyQueue, core::smart_refctd_ptr&& logger) : + m_device(std::move(device)), + m_bufferUploadUtils(std::move(bufferUploadUtils)), + m_imageUploadUtils(std::move(imageUploadUtils)), m_copyQueue(copyQueue), m_logger(std::move(logger)) { @@ -22,11 +26,12 @@ void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) void DrawResourcesFiller::setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr&& descriptorSet, uint32_t binding) { imagesArrayBinding = binding; - suballocatedDescriptorSet = core::make_smart_refctd_ptr(std::move(descriptorSet)); + imagesDescriptorIndexAllocator = core::make_smart_refctd_ptr(std::move(descriptorSet)); } -bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize) +bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span memoryTypeIndexTryOrder) { + // requiredImageMemorySize = core::alignUp(50'399'744 * 2, 1024); // single memory allocation sectioned into images+buffers (images start at offset=0) const size_t adjustedImagesMemorySize = core::alignUp(requiredImageMemorySize, GPUStructsMaxNaturalAlignment); const size_t adjustedBuffersMemorySize = core::max(requiredBufferMemorySize, getMinimumRequiredResourcesBufferSize()); @@ -36,6 +41,13 @@ bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, s resourcesBufferCreationParams.size = adjustedBuffersMemorySize; resourcesBufferCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; resourcesGPUBuffer = logicalDevice->createBuffer(std::move(resourcesBufferCreationParams)); + + if (!resourcesGPUBuffer) + { + m_logger.log("Failed to create resourcesGPUBuffer.", nbl::system::ILogger::ELL_ERROR); + return false; + } + resourcesGPUBuffer->setObjectDebugName("drawResourcesBuffer"); IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = resourcesGPUBuffer->getMemoryReqs(); @@ -53,39 +65,29 @@ bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, s const auto& memoryProperties = logicalDevice->getPhysicalDevice()->getMemoryProperties(); - uint32_t memoryTypeIdx = ~0u; - video::IDeviceMemoryAllocator::SAllocation allocation = {}; - for (uint32_t i = 0u; i < memoryProperties.memoryTypeCount; ++i) + for (const auto& memoryTypeIdx : memoryTypeIndexTryOrder) { - if (memoryProperties.memoryTypes[i].propertyFlags.hasFlags(IDeviceMemoryAllocation::EMPF_DEVICE_LOCAL_BIT)) + IDeviceMemoryAllocator::SAllocateInfo allocationInfo = { - memoryTypeIdx = i; - - IDeviceMemoryAllocator::SAllocateInfo allocationInfo = - { - .size = totalResourcesSize, - .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT, // for the buffers - .memoryTypeIndex = memoryTypeIdx, - .dedication = nullptr, - }; + .size = totalResourcesSize, + .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT, // for the buffers + .memoryTypeIndex = memoryTypeIdx, + .dedication = nullptr, + }; - allocation = logicalDevice->allocate(allocationInfo); + allocation = logicalDevice->allocate(allocationInfo); - if (allocation.isValid()) - break; - } + if (allocation.isValid()) + break; } - if (memoryTypeIdx == ~0u) + if (!allocation.isValid()) { - m_logger.log("allocateResourcesBuffer: no device local memory type found!", nbl::system::ILogger::ELL_ERROR); + m_logger.log("Failed Allocation for draw resources!", nbl::system::ILogger::ELL_ERROR); return false; } - if (!allocation.isValid()) - return false; - imagesMemoryArena = { .memory = allocation.memory, .offset = allocation.offset, @@ -115,7 +117,7 @@ bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, s return true; } -bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, uint32_t reductionPercent, uint32_t maxTries) +bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span memoryTypeIndexTryOrder, uint32_t reductionPercent, uint32_t maxTries) { const size_t minimumAcceptableSize = core::max(MinimumDrawResourcesMemorySize, getMinimumRequiredResourcesBufferSize()); @@ -136,13 +138,16 @@ bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevic uint32_t numTries = 0u; while ((currentBufferSize + currentImageSize) >= minimumAcceptableSize && numTries < maxTries) { - if (allocateDrawResources(logicalDevice, currentBufferSize, currentImageSize)) + if (allocateDrawResources(logicalDevice, currentImageSize, currentBufferSize, memoryTypeIndexTryOrder)) + { + m_logger.log("Successfully allocated memory for images (%zu) and buffers (%zu).", system::ILogger::ELL_INFO, currentImageSize, currentBufferSize); return true; + } + m_logger.log("Allocation of memory for images(%zu) and buffers(%zu) failed; Reducing allocation size by %u%% and retrying...", system::ILogger::ELL_WARNING, currentImageSize, currentBufferSize, reductionPercent); currentBufferSize = (currentBufferSize * (100 - reductionPercent)) / 100; currentImageSize = (currentImageSize * (100 - reductionPercent)) / 100; numTries++; - m_logger.log("Allocation of memory for images(%zu) and buffers(%zu) failed; Reducing allocation size by %u%% and retrying...", system::ILogger::ELL_WARNING, currentImageSize, currentBufferSize, reductionPercent); } m_logger.log("All attempts to allocate memory for images(%zu) and buffers(%zu) failed.", system::ILogger::ELL_ERROR, currentImageSize, currentBufferSize); @@ -513,13 +518,13 @@ void DrawResourcesFiller::drawFontGlyph( bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& staticImage, SIntendedSubmitInfo& intendedNextSubmit) { - // Try inserting or updating the image usage in the cache. - // If the image is already present, updates its semaphore value. - auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; - CachedImageRecord* cachedImageRecord = imagesCache->insert(staticImage.imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); - cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN - - if (cachedImageRecord->arrayIndex != InvalidTextureIndex && staticImage.forceUpdate) + // imagesCache->logState(m_logger); + + // Check if image already exists and requires force update. We do this before insertion and updating `lastUsedFrameIndex` to get correct overflow-submit behaviour + // otherwise we'd always overflow submit, even if not needed and image was not queued/intended to use in the next submit. + CachedImageRecord* cachedImageRecord = imagesCache->get(staticImage.imageID); + + if (cachedImageRecord && cachedImageRecord->arrayIndex != InvalidTextureIndex && staticImage.forceUpdate) { // found in cache, and we want to force new data into the image if (cachedImageRecord->staticCPUImage) @@ -530,7 +535,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s if (needsRecreation) { // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena along with it's allocated array slot from the suballocated descriptor set - evictCallback(staticImage.imageID, *cachedImageRecord); + evictImage_SubmitIfNeeded(staticImage.imageID, *cachedImageRecord, intendedNextSubmit); // Instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image // imagesCache->erase(imageID); @@ -551,6 +556,13 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s } } + // Try inserting or updating the image usage in the cache. + // If the image is already present, updates its semaphore value. + auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + cachedImageRecord = imagesCache->insert(staticImage.imageID, currentFrameIndex, evictCallback); + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN + + // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema // in which case we don't queue anything for upload, and return the idx if (cachedImageRecord->arrayIndex == InvalidTextureIndex) @@ -558,12 +570,12 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s // This is a new image (cache miss). Allocate a descriptor index for it. cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. - suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + imagesDescriptorIndexAllocator->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + cachedImageRecord->arrayIndexAllocatedUsingImageDescriptorIndexAllocator = true; if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) { - auto* device = m_utilities->getLogicalDevice(); - auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + auto* physDev = m_device->getPhysicalDevice(); IGPUImage::SCreationParams imageParams = {}; imageParams = staticImage.cpuImage->getCreationParameters(); @@ -584,11 +596,14 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s { cachedImageRecord->type = ImageType::STATIC; cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN cachedImageRecord->allocationOffset = allocResults.allocationOffset; cachedImageRecord->allocationSize = allocResults.allocationSize; cachedImageRecord->gpuImageView = allocResults.gpuImageView; cachedImageRecord->staticCPUImage = staticImage.cpuImage; + cachedImageRecord->georeferencedImageState = nullptr; + evictConflictingImagesInCache_SubmitIfNeeded(staticImage.imageID, *cachedImageRecord, intendedNextSubmit); } else { @@ -610,7 +625,7 @@ bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& s // We previously allocated a descriptor index, but failed to create a usable GPU image. // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. // No semaphore wait needed here, as the GPU never got to use this slot. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); + imagesDescriptorIndexAllocator->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); cachedImageRecord->arrayIndex = InvalidTextureIndex; } @@ -651,143 +666,6 @@ bool DrawResourcesFiller::ensureMultipleStaticImagesAvailability(std::spangetLogicalDevice(); - auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); - - // Try inserting or updating the image usage in the cache. - // If the image is already present, updates its semaphore value. - auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; - CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); - - // TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENED TYPE - IGPUImage::SCreationParams imageCreationParams = {}; - ImageType georeferenceImageType; - determineGeoreferencedImageCreationParams(imageCreationParams, georeferenceImageType, params); - - // imageParams = cpuImage->getCreationParameters(); - imageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; - // promote format because RGB8 and friends don't actually exist in HW - { - const IPhysicalDevice::SImageFormatPromotionRequest request = { - .originalFormat = imageCreationParams.format, - .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageCreationParams.usage) - }; - imageCreationParams.format = physDev->promoteImageFormat(request,imageCreationParams.tiling); - } - - // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema - // But we need to check if the cached image needs resizing/recreation. - if (cachedImageRecord->arrayIndex != InvalidTextureIndex) - { - // found in cache, but does it require resize? recreation? - if (cachedImageRecord->gpuImageView) - { - auto imgViewParams = cachedImageRecord->gpuImageView->getCreationParameters(); - if (imgViewParams.image) - { - const auto cachedParams = static_cast(imgViewParams.image->getCreationParameters()); - const auto cachedImageType = cachedImageRecord->type; - // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus - const auto currentParams = static_cast(imageCreationParams); - const bool needsRecreation = cachedImageType != georeferenceImageType || cachedParams != currentParams; - if (needsRecreation) - { - // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena. - evictCallback(imageID, *cachedImageRecord); - - // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image - *cachedImageRecord = CachedImageRecord(currentFrameIndex); - // imagesCache->erase(imageID); - // cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); - } - } - else - { - m_logger.log("Cached georeferenced image has invalid gpu image.", nbl::system::ILogger::ELL_ERROR); - } - } - else - { - m_logger.log("Cached georeferenced image has invalid gpu image view.", nbl::system::ILogger::ELL_ERROR); - } - } - - // in which case we don't queue anything for upload, and return the idx - if (cachedImageRecord->arrayIndex == InvalidTextureIndex) - { - // This is a new image (cache miss). Allocate a descriptor index for it. - cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; - // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. - suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint - - if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) - { - // Attempt to create a GPU image and image view for this texture. - ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(imageID)); - - if (allocResults.isValid()) - { - cachedImageRecord->type = georeferenceImageType; - cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; - cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN - cachedImageRecord->allocationOffset = allocResults.allocationOffset; - cachedImageRecord->allocationSize = allocResults.allocationSize; - cachedImageRecord->gpuImageView = allocResults.gpuImageView; - cachedImageRecord->staticCPUImage = nullptr; - } - else - { - // All attempts to try create the GPU image and its corresponding view have failed. - // Most likely cause: insufficient GPU memory or unsupported image parameters. - - m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed, likely due to low VRAM.", nbl::system::ILogger::ELL_ERROR); - _NBL_DEBUG_BREAK_IF(true); - - if (cachedImageRecord->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) - { - // We previously successfully create and allocated memory for the Image - // but failed to bind and create image view - // It's crucial to deallocate the offset+size form our images memory suballocator - imagesMemorySubAllocator->deallocate(cachedImageRecord->allocationOffset, cachedImageRecord->allocationSize); - } - - if (cachedImageRecord->arrayIndex != InvalidTextureIndex) - { - // We previously allocated a descriptor index, but failed to create a usable GPU image. - // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. - // No semaphore wait needed here, as the GPU never got to use this slot. - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); - cachedImageRecord->arrayIndex = InvalidTextureIndex; - } - - // erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation - imagesCache->erase(imageID); - } - } - else - { - m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed index allocation. shouldn't have happened.", nbl::system::ILogger::ELL_ERROR); - cachedImageRecord->arrayIndex = InvalidTextureIndex; - } - } - - - // cached or just inserted, we update the lastUsedFrameIndex - cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; - - assert(cachedImageRecord->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed - return (cachedImageRecord->arrayIndex != InvalidTextureIndex); -} - -bool DrawResourcesFiller::queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy) -{ - auto& vec = streamedImageCopies[imageID]; - vec.emplace_back(imageCopy); - return true; -} - // TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo // We don't have an allocator or memory management for texture updates yet, see how `_test_addImageObject` is being temporarily used (Descriptor updates and pipeline barriers) to upload an image into gpu and update a descriptor slot (it will become more sophisticated but doesn't block you) void DrawResourcesFiller::drawGridDTM( @@ -885,36 +763,473 @@ void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundin endMainObject(); } -void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) -{ - beginMainObject(MainObjectType::STREAMED_IMAGE); +uint32_t2 DrawResourcesFiller::computeStreamingImageExtentsForViewportCoverage(const uint32_t2 viewportExtents) +{ + const uint32_t diagonal = static_cast(nbl::hlsl::ceil( + nbl::hlsl::sqrt(static_cast( + viewportExtents.x * viewportExtents.x + viewportExtents.y * viewportExtents.y)) + )); + + const uint32_t gpuImageSidelength = + 2 * core::roundUp(diagonal, GeoreferencedImageTileSize) + + GeoreferencedImagePaddingTiles * GeoreferencedImageTileSize; + + return { gpuImageSidelength, gpuImageSidelength }; +} + +nbl::core::smart_refctd_ptr DrawResourcesFiller::ensureGeoreferencedImageEntry(image_id imageID, const OrientedBoundingBox2D& worldSpaceOBB, const uint32_t2 currentViewportExtents, const float64_t3x3& ndcToWorldMat, const std::filesystem::path& storagePath) +{ + nbl::core::smart_refctd_ptr ret = nullptr; + + auto* physDev = m_device->getPhysicalDevice(); + + if (!imageLoader) + { + m_logger.log("imageLoader is null/empty. make sure to register your loader!", nbl::system::ILogger::ELL_ERROR); + return nullptr; + } + + uint32_t2 fullResImageExtents = imageLoader->getExtents(storagePath); + asset::E_FORMAT format = imageLoader->getFormat(storagePath); + + uint32_t2 gpuImageExtents = computeStreamingImageExtentsForViewportCoverage(currentViewportExtents); + + IGPUImage::SCreationParams gpuImageCreationParams = {}; + gpuImageCreationParams.type = asset::IImage::ET_2D; + gpuImageCreationParams.samples = asset::IImage::ESCF_1_BIT; + gpuImageCreationParams.format = format; + gpuImageCreationParams.extent = { .width = gpuImageExtents.x, .height = gpuImageExtents.y, .depth = 1u }; + gpuImageCreationParams.mipLevels = 2u; + gpuImageCreationParams.arrayLayers = 1u; + + gpuImageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = gpuImageCreationParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(gpuImageCreationParams.usage) + }; + gpuImageCreationParams.format = physDev->promoteImageFormat(request,gpuImageCreationParams.tiling); + } + + CachedImageRecord* cachedImageRecord = imagesCache->get(imageID); + if (!cachedImageRecord) + { + ret = nbl::core::make_smart_refctd_ptr(); + const bool initSuccess = ret->init(worldSpaceOBB, fullResImageExtents, format, storagePath); + if (!initSuccess) + m_logger.log("Failed to init GeoreferencedImageStreamingState!", nbl::system::ILogger::ELL_ERROR); + } + else + { + // StreamingState already in cache, we return it; + if (!cachedImageRecord->georeferencedImageState) + m_logger.log("image had entry in the cache but cachedImageRecord->georeferencedImageState was nullptr, this shouldn't happen!", nbl::system::ILogger::ELL_ERROR); + ret = cachedImageRecord->georeferencedImageState; + } + + // Update GeoreferencedImageState with new viewport width/height and requirements + + // width only because gpu image is square + const uint32_t newGPUImageSideLengthTiles = gpuImageCreationParams.extent.width / GeoreferencedImageTileSize; + + // This will reset the residency state after a resize. it makes sense because when gpu image is resized, it's recreated and no previous tile is resident anymore + // We don't copy tiles between prev/next resized image, we're more focused on optimizing pan/zoom with a fixed window size. + if (ret->gpuImageSideLengthTiles != newGPUImageSideLengthTiles) + { + ret->gpuImageSideLengthTiles = newGPUImageSideLengthTiles; + ret->ResetTileOccupancyState(); + ret->currentMappedRegionTileRange = { .baseMipLevel = std::numeric_limits::max() }; + } + + ret->gpuImageCreationParams = std::move(gpuImageCreationParams); + // Update with current viewport + ret->updateStreamingStateForViewport(currentViewportExtents, ndcToWorldMat); + + return ret; +} + +bool DrawResourcesFiller::launchGeoreferencedImageTileLoads(image_id imageID, GeoreferencedImageStreamingState* imageStreamingState, const WorldClipRect clipRect) +{ + if (!imageStreamingState) + { + m_logger.log("imageStreamingState is null/empty, make sure `ensureGeoreferencedImageEntry` was called beforehand!", nbl::system::ILogger::ELL_ERROR); + assert(false); + return false; + } + + auto& thisImageQueuedCopies = streamedImageCopies[imageID]; + + const auto& viewportTileRange = imageStreamingState->currentViewportTileRange; + const uint32_t2 lastTileIndex = imageStreamingState->getLastTileIndex(viewportTileRange.baseMipLevel); + + // We need to make every tile that covers the viewport resident. We reserve the amount of tiles needed for upload. + auto tilesToLoad = imageStreamingState->tilesToLoad(); + + + // m_logger.log(std::format("Tiles to Load = {}.", tilesToLoad.size()).c_str(), nbl::system::ILogger::ELL_INFO); + + + const uint32_t2 imageExtents = imageStreamingState->fullResImageExtents; + const std::filesystem::path imageStoragePath = imageStreamingState->storagePath; + + // Figure out worldspace coordinates for each of the tile's corners - these are used if there's a clip rect + const float64_t2 imageTopLeft = imageStreamingState->worldspaceOBB.topLeft; + const float64_t2 dirU = float64_t2(imageStreamingState->worldspaceOBB.dirU); + const float64_t2 dirV = float64_t2(dirU.y, -dirU.x) * float64_t(imageStreamingState->worldspaceOBB.aspectRatio); + const uint32_t tileMipLevel = imageStreamingState->currentViewportTileRange.baseMipLevel; + + uint32_t ignored = 0; + for (auto [imageTileIndex, gpuImageTileIndex] : tilesToLoad) + { + // clip against current rect, if valid + if (clipRect.minClip.x != std::numeric_limits::signaling_NaN()) + { + float64_t2 topLeftWorld = imageTopLeft + dirU * (float64_t(GeoreferencedImageTileSize * imageTileIndex.x << tileMipLevel) / float64_t(imageExtents.x)) + dirV * (float64_t(GeoreferencedImageTileSize * imageTileIndex.y << tileMipLevel) / float64_t(imageExtents.y)); + float64_t2 topRightWorld = imageTopLeft + dirU * (float64_t(GeoreferencedImageTileSize * (imageTileIndex.x + 1) << tileMipLevel) / float64_t(imageExtents.x)) + dirV * (float64_t(GeoreferencedImageTileSize * imageTileIndex.y << tileMipLevel) / float64_t(imageExtents.y)); + float64_t2 bottomLeftWorld = imageTopLeft + dirU * (float64_t(GeoreferencedImageTileSize * imageTileIndex.x << tileMipLevel) / float64_t(imageExtents.x)) + dirV * (float64_t(GeoreferencedImageTileSize * (imageTileIndex.y + 1) << tileMipLevel) / float64_t(imageExtents.y)); + float64_t2 bottomRightWorld = imageTopLeft + dirU * (float64_t(GeoreferencedImageTileSize * (imageTileIndex.x + 1) << tileMipLevel) / float64_t(imageExtents.x)) + dirV * (float64_t(GeoreferencedImageTileSize * (imageTileIndex.y + 1) << tileMipLevel) / float64_t(imageExtents.y)); + + float64_t minX = std::min({ topLeftWorld.x, topRightWorld.x, bottomLeftWorld.x, bottomRightWorld.x }); + float64_t minY = std::min({ topLeftWorld.y, topRightWorld.y, bottomLeftWorld.y, bottomRightWorld.y }); + float64_t maxX = std::max({ topLeftWorld.x, topRightWorld.x, bottomLeftWorld.x, bottomRightWorld.x }); + float64_t maxY = std::max({ topLeftWorld.y, topRightWorld.y, bottomLeftWorld.y, bottomRightWorld.y }); + + // Check if the tile intersects clip rect at all. Note that y clips are inverted + if (maxX < clipRect.minClip.x || minX > clipRect.maxClip.x || maxY < clipRect.maxClip.y || minY > clipRect.minClip.y) + continue; + } + + uint32_t2 targetExtentMip0(GeoreferencedImageTileSize, GeoreferencedImageTileSize); + std::future> gpuMip0Tile; + std::future> gpuMip1Tile; + + { + uint32_t2 samplingExtentMip0 = uint32_t2(GeoreferencedImageTileSize, GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel; + const uint32_t2 samplingOffsetMip0 = (imageTileIndex * GeoreferencedImageTileSize) << viewportTileRange.baseMipLevel; + + // If on the last tile, we might not load a full `GeoreferencedImageTileSize x GeoreferencedImageTileSize` tile, so we figure out how many pixels to load in this case to have + // minimal artifacts and no stretching + if (imageTileIndex.x == lastTileIndex.x) + { + samplingExtentMip0.x = imageStreamingState->lastTileSamplingExtent.x; + targetExtentMip0.x = imageStreamingState->lastTileTargetExtent.x; + // If the last tile is too small just ignore it + if (targetExtentMip0.x == 0u) + continue; + } + if (imageTileIndex.y == lastTileIndex.y) + { + samplingExtentMip0.y = imageStreamingState->lastTileSamplingExtent.y; + targetExtentMip0.y = imageStreamingState->lastTileTargetExtent.y; + // If the last tile is too small just ignore it + if (targetExtentMip0.y == 0u) + continue; + } + + if (!imageLoader->hasPrecomputedMips(imageStoragePath)) + { + gpuMip0Tile = std::async(std::launch::async, [=, this]() { + return imageLoader->load(imageStoragePath, samplingOffsetMip0, samplingExtentMip0, targetExtentMip0); + }); + gpuMip1Tile = std::async(std::launch::async, [=, this]() { + return imageLoader->load(imageStoragePath, samplingOffsetMip0, samplingExtentMip0, targetExtentMip0 / 2u); + }); + } + else + { + gpuMip0Tile = std::async(std::launch::async, [=, this]() { + return imageLoader->load(imageStoragePath, imageTileIndex * GeoreferencedImageTileSize, targetExtentMip0, imageStreamingState->currentMappedRegionTileRange.baseMipLevel, false); + }); + gpuMip1Tile = std::async(std::launch::async, [=, this]() { + return imageLoader->load(imageStoragePath, imageTileIndex * GeoreferencedImageTileSizeMip1, targetExtentMip0 / 2u, imageStreamingState->currentMappedRegionTileRange.baseMipLevel, true); + }); + } + } + + asset::IImage::SBufferCopy bufCopy; + bufCopy.bufferOffset = 0; + bufCopy.bufferRowLength = targetExtentMip0.x; + bufCopy.bufferImageHeight = 0; + bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + bufCopy.imageSubresource.mipLevel = 0u; + bufCopy.imageSubresource.baseArrayLayer = 0u; + bufCopy.imageSubresource.layerCount = 1u; + uint32_t2 gpuImageOffset = gpuImageTileIndex * GeoreferencedImageTileSize; + bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; + bufCopy.imageExtent.width = targetExtentMip0.x; + bufCopy.imageExtent.height = targetExtentMip0.y; + bufCopy.imageExtent.depth = 1; + + thisImageQueuedCopies.emplace_back(imageStreamingState->sourceImageFormat, std::move(gpuMip0Tile), std::move(bufCopy)); + + // Upload the smaller tile to mip 1 + bufCopy = {}; + + bufCopy.bufferOffset = 0; + bufCopy.bufferRowLength = targetExtentMip0.x / 2; + bufCopy.bufferImageHeight = 0; + bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + bufCopy.imageSubresource.mipLevel = 1u; + bufCopy.imageSubresource.baseArrayLayer = 0u; + bufCopy.imageSubresource.layerCount = 1u; + gpuImageOffset /= 2; // Half tile size! + bufCopy.imageOffset = { gpuImageOffset.x, gpuImageOffset.y, 0u }; + bufCopy.imageExtent.width = targetExtentMip0.x / 2; + bufCopy.imageExtent.height = targetExtentMip0.y / 2; + bufCopy.imageExtent.depth = 1; + + thisImageQueuedCopies.emplace_back(imageStreamingState->sourceImageFormat, std::move(gpuMip1Tile), std::move(bufCopy)); + + // Mark tile as resident + imageStreamingState->currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y] = true; + } + + return true; +} + +bool DrawResourcesFiller::cancelGeoreferencedImageTileLoads(image_id imageID) +{ + auto it = streamedImageCopies.find(imageID); + if (it != streamedImageCopies.end()) + it->second.clear(); // clear the vector of copies for this image + + return true; +} + +void DrawResourcesFiller::drawGeoreferencedImage(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit) +{ + // OutputDebugStringA(std::format("Image Cache Size = {} ", imagesCache->size()).c_str()); + + const bool resourcesEnsured = ensureGeoreferencedImageResources_AllocateIfNeeded(imageID, std::move(imageStreamingState), intendedNextSubmit); + if (resourcesEnsured) + { + // Georefernced Image Data in the cache was already pre-transformed from local to main worldspace coordinates for tile calculation purposes + // Because of this reason, the pre-transformed obb in the cache doesn't need to be transformed by custom projection again anymore. + // we push the identity transform to prevent any more tranformation on the obb which is already in worldspace units. + float64_t3x3 identity = float64_t3x3(1, 0, 0, 0, 1, 0, 0, 0, 1); + pushCustomProjection(identity); + + beginMainObject(MainObjectType::STREAMED_IMAGE); + + uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjIdx != InvalidMainObjectIdx) + { + // Query imageType + auto cachedImageRecord = imagesCache->peek(imageID); + if (cachedImageRecord) + { + GeoreferencedImageInfo info = cachedImageRecord->georeferencedImageState->computeGeoreferencedImageAddressingAndPositioningInfo(); + info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + if (!addGeoreferencedImageInfo_Internal(info, mainObjIdx)) + { + // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + const bool success = addGeoreferencedImageInfo_Internal(info, mainObjIdx); + if (!success) + { + m_logger.log("addGeoreferencedImageInfo_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } + } + } + else + { + m_logger.log("drawGeoreferencedImage was not called immediately after enforceGeoreferencedImageAvailability!", nbl::system::ILogger::ELL_ERROR); + assert(false); + } + } + else + { + m_logger.log("drawGeoreferencedImage: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + } + + endMainObject(); + + popCustomProjection(); + } + else + { + m_logger.log("Failed to ensure resources (memory and descriptorIndex) for georeferencedImage", nbl::system::ILogger::ELL_ERROR); + } +} + +bool DrawResourcesFiller::finalizeGeoreferencedImageTileLoads(SIntendedSubmitInfo& intendedNextSubmit) +{ + bool success = true; + + if (streamedImageCopies.size() > 0ull) + { + auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); + + if (cmdBuffInfo) + { + std::vector validCopies; + validCopies.reserve(streamedImageCopies.size()); + + // Step 1: collect valid image iters + for (auto it = streamedImageCopies.begin(); it != streamedImageCopies.end(); ++it) + { + const auto& imageID = it->first; + auto* imageRecord = imagesCache->peek(imageID); + + if (it->second.size() > 0u) + { + if (imageRecord && imageRecord->gpuImageView && imageRecord->georeferencedImageState) + validCopies.push_back(it); + else + m_logger.log(std::format("Can't upload to imageId {} yet. (no gpu record yet).", imageID).c_str(), nbl::system::ILogger::ELL_INFO); + } + } + + // m_logger.log(std::format("{} Valid Copies, Frame Idx = {}.", validCopies.size(), currentFrameIndex).c_str(), nbl::system::ILogger::ELL_INFO); + + if (validCopies.size() > 0u) + { + IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; + std::vector beforeCopyImageBarriers; + beforeCopyImageBarriers.reserve(streamedImageCopies.size()); + + // Pipeline Barriers before imageCopy + for (auto it : validCopies) + { + auto& [imageID, imageCopies] = *it; + // OutputDebugStringA(std::format("Copying {} copies for Id = {} \n", imageCopies.size(), imageID).c_str()); + + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + { + m_logger.log(std::format("`pushStreamedImagesUploads` failed, no image record found for image id {}.", imageID).c_str(), nbl::system::ILogger::ELL_ERROR); + continue; + } + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + IImage::LAYOUT newLayout = IImage::LAYOUT::GENERAL; + + beforeCopyImageBarriers.push_back( + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = imageRecord->currentLayout, + .newLayout = newLayout, + }); + imageRecord->currentLayout = newLayout; + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); + + for (auto it : validCopies) + { + auto& [imageID, imageCopies] = *it; + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + continue; + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + for (auto& imageCopy : imageCopies) + { + auto srcBuffer = imageCopy.srcBufferFuture.get(); + if (srcBuffer) + { + const bool copySuccess = m_imageUploadUtils->updateImageViaStagingBuffer( + intendedNextSubmit, + srcBuffer->getPointer(), imageCopy.srcFormat, + gpuImg.get(), IImage::LAYOUT::GENERAL, + { &imageCopy.region, 1u }); + success &= copySuccess; + if (!copySuccess) + { + m_logger.log(std::format("updateImageViaStagingBuffer failed. region offset = ({}, {}), region size = ({}, {}), gpu image size = ({}, {})", + imageCopy.region.imageOffset.x,imageCopy.region.imageOffset.y, + imageCopy.region.imageExtent.width, imageCopy.region.imageExtent.height, + gpuImg->getCreationParameters().extent.width, gpuImg->getCreationParameters().extent.height).c_str(), nbl::system::ILogger::ELL_ERROR); + } + } + else + m_logger.log(std::format("srcBuffer was invalid for image id {}.", imageID).c_str(), nbl::system::ILogger::ELL_ERROR); + } + } + + commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change + + std::vector afterCopyImageBarriers; + afterCopyImageBarriers.reserve(streamedImageCopies.size()); + + // Pipeline Barriers after imageCopy + for (auto it : validCopies) + { + auto& [imageID, imageCopies] = *it; + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + { + m_logger.log(std::format("`pushStreamedImagesUploads` failed, no image record found for image id {}.", imageID).c_str(), nbl::system::ILogger::ELL_ERROR); + continue; + } + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; - uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); - if (mainObjIdx == InvalidMainObjectIdx) - { - m_logger.log("addGeoreferencedImage: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); - assert(false); - return; - } + IImage::LAYOUT newLayout = IImage::LAYOUT::GENERAL; - GeoreferencedImageInfo info = {}; - info.topLeft = params.worldspaceOBB.topLeft; - info.dirU = params.worldspaceOBB.dirU; - info.aspectRatio = params.worldspaceOBB.aspectRatio; - info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory - if (!addGeoreferencedImageInfo_Internal(info, mainObjIdx)) - { - // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects - submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); - const bool success = addGeoreferencedImageInfo_Internal(info, mainObjIdx); - if (!success) + afterCopyImageBarriers.push_back ( + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = imageRecord->currentLayout, + .newLayout = newLayout, + }); + imageRecord->currentLayout = newLayout; + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + // Remove the processed valid ones, keep invalids for later retries + for (auto it : validCopies) + streamedImageCopies.erase(it); + } + } + else { - m_logger.log("addGeoreferencedImageInfo_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); - assert(false); + _NBL_DEBUG_BREAK_IF(true); + success = false; } } - endMainObject(); + if (!success) + { + m_logger.log("Failure in `pushStreamedImagesUploads`.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + } + return success; } bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit) @@ -927,21 +1242,25 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit } bool success = true; + if (currentReplayCache) { + // In rare cases, we need to wait for the previous frame's submit to ensure all GPU usage of the any images has completed. + nbl::video::ISemaphore::SWaitInfo previousSubmitWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + // This means we're in a replay cache scope, use the replay cache to push to GPU instead of internal accumulation success &= pushBufferUploads(intendedNextSubmit, currentReplayCache->resourcesCollection); success &= pushMSDFImagesUploads(intendedNextSubmit, currentReplayCache->msdfImagesState); + bool evictedAnotherImage = false; + // Push Static Images Uploads from replay cache, all the work below is necessary to detect whether our image to replay is already in the cache in the exact form OR we need to create new image + bind memory and set array index - auto* device = m_utilities->getLogicalDevice(); - bool replayCacheFullyCovered = true; - for (auto& [imageID, toReplayRecord] : *currentReplayCache->imagesCache) + for (auto& [toReplayImageID, toReplayRecord] : *currentReplayCache->imagesCache) { if (toReplayRecord.type != ImageType::STATIC) // non-static images (Georeferenced) won't be replayed like this continue; - auto* cachedRecord = imagesCache->peek(imageID); + auto* cachedRecord = imagesCache->peek(toReplayImageID); bool alreadyResident = false; // compare with existing state, and check whether image id is already resident. @@ -953,92 +1272,104 @@ bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit const bool arrayIndexMatches = cachedRecord->arrayIndex == toReplayRecord.arrayIndex; - alreadyResident = allocationMatches && arrayIndexMatches && cachedRecord->state == ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA; + alreadyResident = allocationMatches && arrayIndexMatches && cachedRecord->state != ImageState::INVALID; } - // if already resident, just update the state to the cached state (to make sure it doesn't get issued for upload again) and move on. + // if already resident, ignore, no need to insert into cache anymore if (alreadyResident) { - toReplayRecord.state = cachedRecord->state; // update the toReplayImageRecords's state, to completely match the currently resident state - continue; + cachedRecord->lastUsedFrameIndex = currentFrameIndex; } - - replayCacheFullyCovered = false; - - bool successCreateNewImage = false; - - // Not already resident, we need to recreate the image and bind the image memory to correct location again, and update the descriptor set and push the uploads - auto existingGPUImageViewParams = toReplayRecord.gpuImageView->getCreationParameters(); - IGPUImage::SCreationParams imageParams = {}; - imageParams = existingGPUImageViewParams.image->getCreationParameters(); - - auto newGPUImage = device->createImage(std::move(imageParams)); - if (newGPUImage) + else { - nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = - { - .image = newGPUImage.get(), - .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + toReplayRecord.allocationOffset } - }; + // make sure to evict any cache entry that conflicts with the new entry (either in memory allocation or descriptor index) + if (evictConflictingImagesInCache_SubmitIfNeeded(toReplayImageID, toReplayRecord, intendedNextSubmit)) + evictedAnotherImage = true; - const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); - if (boundToMemorySuccessfully) + // creating and inserting new entry + bool successCreateNewImage = false; { - newGPUImage->setObjectDebugName((std::to_string(imageID) + " Static Image 2D").c_str()); - IGPUImageView::SCreationParams viewParams = existingGPUImageViewParams; - viewParams.image = newGPUImage; + // Not already resident, we need to recreate the image and bind the image memory to correct location again, and update the descriptor set and push the uploads + auto existingGPUImageViewParams = toReplayRecord.gpuImageView->getCreationParameters(); + IGPUImage::SCreationParams imageParams = {}; + imageParams = existingGPUImageViewParams.image->getCreationParameters(); - auto newGPUImageView = device->createImageView(std::move(viewParams)); - if (newGPUImageView) + auto newGPUImage = m_device->createImage(std::move(imageParams)); + if (newGPUImage) { - successCreateNewImage = true; - toReplayRecord.gpuImageView = newGPUImageView; - toReplayRecord.state = ImageState::CREATED_AND_MEMORY_BOUND; - newGPUImageView->setObjectDebugName((std::to_string(imageID) + " Static Image View 2D").c_str()); - } + nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = + { + .image = newGPUImage.get(), + .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + toReplayRecord.allocationOffset } + }; + + const bool boundToMemorySuccessfully = m_device->bindImageMemory({ &bindImageMemoryInfo, 1u }); + if (boundToMemorySuccessfully) + { + newGPUImage->setObjectDebugName((std::to_string(toReplayImageID) + " Static Image 2D").c_str()); + IGPUImageView::SCreationParams viewParams = existingGPUImageViewParams; + viewParams.image = newGPUImage; + + auto newGPUImageView = m_device->createImageView(std::move(viewParams)); + if (newGPUImageView) + { + successCreateNewImage = true; + toReplayRecord.arrayIndexAllocatedUsingImageDescriptorIndexAllocator = false; // array index wasn't allocated useing desc set suballocator. it's being replayed + toReplayRecord.gpuImageView = newGPUImageView; + toReplayRecord.state = ImageState::CREATED_AND_MEMORY_BOUND; + toReplayRecord.currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; + toReplayRecord.lastUsedFrameIndex = currentFrameIndex; + newGPUImageView->setObjectDebugName((std::to_string(toReplayImageID) + " Static Image View 2D").c_str()); + } + } + } } - } - if (!successCreateNewImage) - { - m_logger.log("Couldn't create new gpu image in pushAllUploads: cache and replay mode.", nbl::system::ILogger::ELL_ERROR); - _NBL_DEBUG_BREAK_IF(true); - success = false; + if (successCreateNewImage) + { + // inserting the new entry into the cache (With new image and memory binding) + imagesCache->base_t::insert(toReplayImageID, toReplayRecord); + } + else + { + m_logger.log("Couldn't create new gpu image in pushAllUploads: cache and replay mode.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + success = false; + } + } } - - // Our actual `imageCache` (which represents GPU state) didn't cover the replayCache fully, so new images had to be created, bound to memory. and they need to be written into their respective descriptor array indices again. - // imagesCache = std::make_unique(*currentReplayCache->imagesCache); - imagesCache->clear(); - for (auto it = currentReplayCache->imagesCache->rbegin(); it != currentReplayCache->imagesCache->rend(); it++) - imagesCache->base_t::insert(it->first, it->second); - if (!replayCacheFullyCovered) + success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); + + if (evictedAnotherImage) { - // We need to block for previous submit in order to safely update the descriptor set array index next. - // - // [FUTURE_CONSIDERATION]: To avoid stalling the CPU when replaying caches that overflow GPU memory, - // we could recreate the image and image view, binding them to entirely new memory locations. - // This would require an indirection mechanism in the shader to remap references from cached geometry or objects to the new image array indices. - // Note: This isn't a problem if the replayed scene fits in memory and doesn't require overflow submissions due to image memory exhaustion. - nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; - device->blockForSemaphores({ &waitInfo, 1u }); + // We're about to update the descriptor set binding using the replay's array indices. + // Normally, descriptor-set allocation and updates are synchronized to ensure the GPU + // isn't still using the same descriptor indices we're about to overwrite. + // + // However, in this case we bypassed the descriptor-set allocator (imagesDescriptorIndexAllocator) and are writing directly into the set. + // This means proper synchronization is not guaranteed. + // + // Since evicting another image can happen due to array index conflicts, + // we must ensure that any prior GPU work using those descriptor indices has finished before we update them. + // Therefore, wait for the previous frame (and any usage of these indices) to complete before proceeding to bind/write our images to their descriptor + m_device->blockForSemaphores({ &previousSubmitWaitInfo, 1u }); } - success &= bindImagesToArrayIndices(*imagesCache); - success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); - // Streamed uploads in cache&replay?! + success &= updateDescriptorSetImageBindings(*imagesCache); } else { flushDrawObjects(); success &= pushBufferUploads(intendedNextSubmit, resourcesCollection); success &= pushMSDFImagesUploads(intendedNextSubmit, msdfImagesState); - success &= bindImagesToArrayIndices(*imagesCache); success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); - success &= pushStreamedImagesUploads(intendedNextSubmit); + success &= updateDescriptorSetImageBindings(*imagesCache); } + + return success; } @@ -1132,13 +1463,25 @@ std::unique_ptr DrawResourcesFiller::createRep stagedMSDF.uploadedToGPU = false; // to trigger upload for all msdf functions again. ret->drawCallsData = drawCalls; ret->activeMainObjectIndex = activeMainObjectIndex; - ret->imagesCache = std::unique_ptr(new ImagesCache(*imagesCache)); + ret->imagesCache = std::unique_ptr(new ImagesCache(ImagesBindingArraySize)); + + // m_logger.log(std::format("== createReplayCache, currentFrameIndex = {} ==", currentFrameIndex).c_str(), nbl::system::ILogger::ELL_INFO); + // imagesCache->logState(m_logger); + + for (auto& [imageID, record] : *imagesCache) + { + // Only return images in the cache used within the last frame + if (record.lastUsedFrameIndex == currentFrameIndex) + ret->imagesCache->base_t::insert(imageID, record); + } + return ret; } void DrawResourcesFiller::setReplayCache(ReplayCache* cache) { currentReplayCache = cache; + // currentReplayCache->imagesCache->logState(m_logger); } void DrawResourcesFiller::unsetReplayCache() @@ -1146,6 +1489,29 @@ void DrawResourcesFiller::unsetReplayCache() currentReplayCache = nullptr; } +uint64_t DrawResourcesFiller::getImagesMemoryConsumption() const +{ + uint64_t ret = 0ull; + for (auto& [imageID, record] : *imagesCache) + ret += record.allocationSize; + return ret; +} + +DrawResourcesFiller::UsageData DrawResourcesFiller::getCurrentUsageData() +{ + UsageData ret = {}; + const auto& resources = getResourcesCollection(); + ret.lineStyleCount = resources.lineStyles.getCount(); + ret.dtmSettingsCount = resources.dtmSettings.getCount(); + ret.customProjectionsCount = resources.customProjections.getCount(); + ret.mainObjectCount = resources.mainObjects.getCount(); + ret.drawObjectCount = resources.drawObjects.getCount(); + ret.geometryBufferSize = resources.geometryInfo.getStorageSize(); + ret.bufferMemoryConsumption = resources.calculateTotalConsumption(); + ret.imageMemoryConsumption = getImagesMemoryConsumption(); + return ret; +} + bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resources) { copiedResourcesSize = 0ull; @@ -1172,7 +1538,7 @@ bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSub drawBuffer.bufferOffset = copyRange.offset; if (copyRange.size > 0ull) { - if (!m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, copyRange, drawBuffer.vector.data())) + if (!m_bufferUploadUtils->updateBufferRangeViaStagingBuffer(intendedNextSubmit, copyRange, drawBuffer.vector.data())) return false; copiedResourcesSize += drawBuffer.getAlignedStorageSize(); } @@ -1271,7 +1637,7 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex auto buffer = reinterpret_cast(stagedMSDF.image->getBuffer()->getPointer()); auto bufferOffset = mipImageRegion->bufferOffset; - stagedMSDF.uploadedToGPU = m_utilities->updateImageViaStagingBuffer( + stagedMSDF.uploadedToGPU = m_bufferUploadUtils->updateImageViaStagingBuffer( intendedNextSubmit, buffer + bufferOffset, nbl::ext::TextRendering::TextRenderer::MSDFTextureFormat, @@ -1333,12 +1699,11 @@ bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNex } } -bool DrawResourcesFiller::bindImagesToArrayIndices(ImagesCache& imagesCache) +bool DrawResourcesFiller::updateDescriptorSetImageBindings(ImagesCache& imagesCache) { bool success = true; - auto* device = m_utilities->getLogicalDevice(); - auto* descriptorSet = suballocatedDescriptorSet->getDescriptorSet(); + auto* descriptorSet = imagesDescriptorIndexAllocator->getDescriptorSet(); // DescriptorSet Updates std::vector descriptorInfos; @@ -1346,15 +1711,30 @@ bool DrawResourcesFiller::bindImagesToArrayIndices(ImagesCache& imagesCache) descriptorInfos.resize(imagesCache.size()); descriptorWrites.resize(imagesCache.size()); + // Potential GPU waits before writing to descriptor bindings that were previously deallocated manually (bypassing the imagesDescriptorIndexAllocator). + // The allocator normally guarantees safe reuse of array indices by synchronizing allocations and deallocations internally. + // but since these bindings were queued for deferred deallocation, we must ensure their previous GPU usage has completed before writing new data into those slots. + std::vector waitInfos; + waitInfos.reserve(deferredDescriptorIndexDeallocations.size()); + uint32_t descriptorWriteCount = 0u; for (auto& [id, record] : imagesCache) { if (record.state >= ImageState::BOUND_TO_DESCRIPTOR_SET || !record.gpuImageView) continue; + + // Check if this writing to this array index has a deferred deallocation pending + if (auto it = deferredDescriptorIndexDeallocations.find(record.arrayIndex); it != deferredDescriptorIndexDeallocations.end()) + { + // TODO: Assert we're not waiting for a value which hasn't been submitted yet. + waitInfos.push_back(it->second); + // erase -> it's a one-time wait: + deferredDescriptorIndexDeallocations.erase(it); + } // Bind gpu image view to descriptor set video::IGPUDescriptorSet::SDescriptorInfo descriptorInfo = {}; - descriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo.info.image.imageLayout = (record.type == ImageType::STATIC) ? IImage::LAYOUT::READ_ONLY_OPTIMAL : IImage::LAYOUT::GENERAL; // WARN: don't use `record.currentLayout`, it's the layout "At the time" the image is going to be accessed descriptorInfo.desc = record.gpuImageView; descriptorInfos[descriptorWriteCount] = descriptorInfo; @@ -1367,12 +1747,17 @@ bool DrawResourcesFiller::bindImagesToArrayIndices(ImagesCache& imagesCache) descriptorWrite.info = &descriptorInfos[descriptorWriteCount]; descriptorWrites[descriptorWriteCount] = descriptorWrite; + record.state = ImageState::BOUND_TO_DESCRIPTOR_SET; descriptorWriteCount++; } + if (!waitInfos.empty()) + m_device->blockForSemaphores(waitInfos, /*waitAll=*/true); + if (descriptorWriteCount > 0u) - success &= device->updateDescriptorSets(descriptorWriteCount, descriptorWrites.data(), 0u, nullptr); + success &= m_device->updateDescriptorSets(descriptorWriteCount, descriptorWrites.data(), 0u, nullptr); + return success; } @@ -1391,7 +1776,6 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN if (nonResidentImageRecords.size() > 0ull) { - auto* device = m_utilities->getLogicalDevice(); auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); if (cmdBuffInfo) @@ -1425,9 +1809,10 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN .baseArrayLayer = 0u, .layerCount = ICPUImageView::remaining_array_layers }, - .oldLayout = IImage::LAYOUT::UNDEFINED, + .oldLayout = imageRecord.currentLayout, .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, }; + imageRecord.currentLayout = beforeCopyImageBarriers[i].newLayout; } success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); @@ -1435,7 +1820,7 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN { auto& imageRecord = *nonResidentImageRecords[i]; auto& gpuImg = imageRecord.gpuImageView->getCreationParameters().image; - success &= m_utilities->updateImageViaStagingBuffer( + success &= m_imageUploadUtils->updateImageViaStagingBuffer( intendedNextSubmit, imageRecord.staticCPUImage->getBuffer()->getPointer(), imageRecord.staticCPUImage->getCreationParameters().format, gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, @@ -1478,9 +1863,10 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN .baseArrayLayer = 0u, .layerCount = ICPUImageView::remaining_array_layers }, - .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .oldLayout = imageRecord.currentLayout, .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, }; + imageRecord.currentLayout = afterCopyImageBarriers[i].newLayout; } success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); } @@ -1499,128 +1885,178 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN return success; } -bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::evictConflictingImagesInCache_SubmitIfNeeded(image_id toInsertImageID, const CachedImageRecord& toInsertRecord, nbl::video::SIntendedSubmitInfo& intendedNextSubmit) { - bool success = true; - - if (streamedImageCopies.size() > 0ull) + bool evictedSomething = false; + for (auto& [cachedImageID, cachedRecord] : *imagesCache) { - auto* device = m_utilities->getLogicalDevice(); - auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); - - if (cmdBuffInfo) - { - IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; + bool cachedImageConflictsWithImageToReplay = false; - std::vector beforeCopyImageBarriers; - beforeCopyImageBarriers.reserve(streamedImageCopies.size()); + // Case 1: Same imageID, but params differ (offset/size/arrayIndex mismatch) conflict + if (cachedImageID == toInsertImageID) + { + const bool allocationMatches = + cachedRecord.allocationOffset == toInsertRecord.allocationOffset && + cachedRecord.allocationSize == toInsertRecord.allocationSize; + const bool arrayIndexMatches = cachedRecord.arrayIndex == toInsertRecord.arrayIndex; + const bool exactSameImage = allocationMatches && arrayIndexMatches; + if (!exactSameImage) + cachedImageConflictsWithImageToReplay = true; + } + else + { + // Different Image ID: + // Conflicted if: 1. same array index or 2. conflict in allocation/mem + const bool sameArrayIndex = cachedRecord.arrayIndex == toInsertRecord.arrayIndex; + const bool conflictingMemory = + (cachedRecord.allocationOffset < toInsertRecord.allocationOffset + toInsertRecord.allocationSize) && + (toInsertRecord.allocationOffset < cachedRecord.allocationOffset + cachedRecord.allocationSize); + + if (sameArrayIndex || conflictingMemory) + cachedImageConflictsWithImageToReplay = true; + } - // Pipeline Barriers before imageCopy - for (auto& [imageID, imageCopies] : streamedImageCopies) - { - auto* imageRecord = imagesCache->peek(imageID); - if (imageRecord == nullptr) - continue; + if (cachedImageConflictsWithImageToReplay) + { + evictImage_SubmitIfNeeded(cachedImageID, cachedRecord, intendedNextSubmit); + imagesCache->erase(cachedImageID); + evictedSomething = true; + } + } + return evictedSomething; +} - const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; +bool DrawResourcesFiller::ensureGeoreferencedImageResources_AllocateIfNeeded(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit) +{ + auto* physDev = m_device->getPhysicalDevice(); - beforeCopyImageBarriers.push_back( - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - } - // .ownershipOp. No queueFam ownership transfer - }, - .image = gpuImg.get(), - .subresourceRange = { - .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = ICPUImageView::remaining_mip_levels, - .baseArrayLayer = 0u, - .layerCount = ICPUImageView::remaining_array_layers - }, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - }); - } - success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); - - for (auto& [imageID, imageCopies] : streamedImageCopies) + // Check if image already exists and requires resize. We do this before insertion and updating `lastUsedFrameIndex` to get correct overflow-submit behaviour + // otherwise we'd always overflow submit, even if not needed and image was not queued/intended to use in the next submit. + CachedImageRecord* cachedImageRecord = imagesCache->get(imageID); + + // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema + // But we need to check if the cached image needs resizing/recreation. + if (cachedImageRecord && cachedImageRecord->arrayIndex != InvalidTextureIndex) + { + // found in cache, but does it require resize? recreation? + if (cachedImageRecord->gpuImageView) + { + auto imgViewParams = cachedImageRecord->gpuImageView->getCreationParameters(); + if (imgViewParams.image) { - auto* imageRecord = imagesCache->peek(imageID); - if (imageRecord == nullptr) - continue; - - const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; - - for (auto& imageCopy : imageCopies) + const auto cachedParams = static_cast(imgViewParams.image->getCreationParameters()); + // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus + const auto toCreateParams = static_cast(imageStreamingState->gpuImageCreationParams); + const bool needsRecreation = cachedParams != toCreateParams; + if (needsRecreation) { - success &= m_utilities->updateImageViaStagingBuffer( - intendedNextSubmit, - imageCopy.srcBuffer->getPointer(), imageCopy.srcFormat, - gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - { &imageCopy.region, 1u }); + // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena. + // note: it doesn't remove the entry from lru cache. + evictImage_SubmitIfNeeded(imageID, *cachedImageRecord, intendedNextSubmit); + + // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image + CachedImageRecord newRecord = CachedImageRecord(currentFrameIndex); //reset everything except image streaming state + *cachedImageRecord = std::move(newRecord); } } + else + { + m_logger.log("Cached georeferenced image has invalid gpu image.", nbl::system::ILogger::ELL_ERROR); + } + } + else + { + m_logger.log("Cached georeferenced image has invalid gpu image view.", nbl::system::ILogger::ELL_ERROR); + } + } - commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change - std::vector afterCopyImageBarriers; - afterCopyImageBarriers.reserve(streamedImageCopies.size()); + // Try inserting or updating the image usage in the cache. + // If the image is already present, updates its semaphore value. + auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + cachedImageRecord = imagesCache->insert(imageID, currentFrameIndex, evictCallback); + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN + + // Setting the image streaming state returned in `ensureGeoreferencedImageEntry` which was either creating anew or gotten from this very own cache + cachedImageRecord->georeferencedImageState = std::move(imageStreamingState); + cachedImageRecord->georeferencedImageState->outOfDate = false; - // Pipeline Barriers before imageCopy - for (auto& [imageID, imageCopies] : streamedImageCopies) - { - auto* imageRecord = imagesCache->peek(imageID); - if (imageRecord == nullptr) - continue; + if (cachedImageRecord == nullptr) + { + m_logger.log("Couldn't insert image in cache; make sure you called `ensureGeoreferencedImageEntry` before anything else.", nbl::system::ILogger::ELL_ERROR); + return false; + } - const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + // in which case we don't queue anything for upload, and return the idx + if (cachedImageRecord->arrayIndex == InvalidTextureIndex) + { + // This is a new image (cache miss). Allocate a descriptor index for it. + cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. + imagesDescriptorIndexAllocator->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + cachedImageRecord->arrayIndexAllocatedUsingImageDescriptorIndexAllocator = true; - afterCopyImageBarriers.push_back ( - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, - } - // .ownershipOp. No queueFam ownership transfer - }, - .image = gpuImg.get(), - .subresourceRange = { - .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = ICPUImageView::remaining_mip_levels, - .baseArrayLayer = 0u, - .layerCount = ICPUImageView::remaining_array_layers - }, - .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, - }); + if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) + { + const auto& imageCreationParams = cachedImageRecord->georeferencedImageState->gpuImageCreationParams; + + std::string debugName = cachedImageRecord->georeferencedImageState->storagePath.string(); + + // Attempt to create a GPU image and image view for this texture. + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, debugName); + if (allocResults.isValid()) + { + cachedImageRecord->type = ImageType::GEOREFERENCED_STREAMED; + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + cachedImageRecord->allocationOffset = allocResults.allocationOffset; + cachedImageRecord->allocationSize = allocResults.allocationSize; + cachedImageRecord->gpuImageView = allocResults.gpuImageView; + cachedImageRecord->staticCPUImage = nullptr; + evictConflictingImagesInCache_SubmitIfNeeded(imageID, *cachedImageRecord, intendedNextSubmit); } - success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + else + { + // All attempts to try create the GPU image and its corresponding view have failed. + // Most likely cause: insufficient GPU memory or unsupported image parameters. + + m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed, likely due to low VRAM.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + + if (cachedImageRecord->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) + { + // We previously successfully create and allocated memory for the Image + // but failed to bind and create image view + // It's crucial to deallocate the offset+size form our images memory suballocator + imagesMemorySubAllocator->deallocate(cachedImageRecord->allocationOffset, cachedImageRecord->allocationSize); + } - streamedImageCopies.clear(); + if (cachedImageRecord->arrayIndex != InvalidTextureIndex) + { + // We previously allocated a descriptor index, but failed to create a usable GPU image. + // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. + // No semaphore wait needed here, as the GPU never got to use this slot. + imagesDescriptorIndexAllocator->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); + cachedImageRecord->arrayIndex = InvalidTextureIndex; + } + + // erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation + imagesCache->erase(imageID); + } } else { - _NBL_DEBUG_BREAK_IF(true); - success = false; + m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed index allocation. shouldn't have happened.", nbl::system::ILogger::ELL_ERROR); + cachedImageRecord->arrayIndex = InvalidTextureIndex; } } - if (!success) - { - m_logger.log("Failure in `pushStreamedImagesUploads`.", nbl::system::ILogger::ELL_ERROR); - _NBL_DEBUG_BREAK_IF(true); - } - return success; + // cached or just inserted, we update the lastUsedFrameIndex + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; + + assert(cachedImageRecord->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + return (cachedImageRecord->arrayIndex != InvalidTextureIndex); } const size_t DrawResourcesFiller::calculateRemainingResourcesSize() const @@ -2306,35 +2742,65 @@ void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const Cach _NBL_DEBUG_BREAK_IF(true); return; } - // Later used to release the image's memory range. - core::smart_refctd_ptr cleanupObject = core::make_smart_refctd_ptr(); - cleanupObject->imagesMemorySuballocator = imagesMemorySubAllocator; - cleanupObject->addr = evicted.allocationOffset; - cleanupObject->size = evicted.allocationSize; +#if 0 + m_logger.log(("Evicting Image: \n" + evicted.toString(imageID)).c_str(), nbl::system::ILogger::ELL_INFO); +#endif + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); - // NOTE: `deallocationWaitInfo` is crucial for both paths, we need to make sure we'll write to a descriptor arrayIndex when it's 100% done with previous usages. - if (imageUsedForNextIntendedSubmit) + if (evicted.arrayIndexAllocatedUsingImageDescriptorIndexAllocator) { - // The evicted image is scheduled for use in the upcoming submit. - // To avoid rendering artifacts, we must flush the current draw queue now. - // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. - submitDraws(intendedNextSubmit); - reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + // Image being evicted was allocated using image descriptor set allocator + // Later used to release the image's memory range. + core::smart_refctd_ptr cleanupObject = core::make_smart_refctd_ptr(); + cleanupObject->imagesMemorySuballocator = imagesMemorySubAllocator; + cleanupObject->addr = evicted.allocationOffset; + cleanupObject->size = evicted.allocationSize; + + if (evicted.type == ImageType::GEOREFERENCED_STREAMED) + { + // Important to mark this as out of date. + // because any other place still holding on to the state (which is possible) need to know the image associated with the state has been evicted and the state is no longer valid and needs to "ensure"d again. + evicted.georeferencedImageState->outOfDate = true; + // cancelGeoreferencedImageTileLoads(imageID); // clear any of the pending loads/futures requested for the image + } + + // NOTE: `deallocationWaitInfo` is crucial for both paths, we need to make sure we'll write to a descriptor arrayIndex when it's 100% done with previous usages. + if (imageUsedForNextIntendedSubmit) + { + // The evicted image is scheduled for use in the upcoming submit. + // To avoid rendering artifacts, we must flush the current draw queue now. + // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded - // Prepare wait info to defer index deallocation until the GPU has finished using the resource. - // we wait on the signal semaphore for the submit we just did above. - ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // we wait on the signal semaphore for the submit we just did above. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + imagesDescriptorIndexAllocator->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); + } + else + { + // The image is not used in the current frame, so we can deallocate without submitting any draws. + // Still wait on the semaphore to ensure past GPU usage is complete. + // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value conservatively, which is the immediate prev submit. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + imagesDescriptorIndexAllocator->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); + } } else { - // The image is not used in the current frame, so we can deallocate without submitting any draws. - // Still wait on the semaphore to ensure past GPU usage is complete. - // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value conservatively, which is the immediate prev submit. - ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; - suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); + // Less often case: index wasn't allocated using imageDescriptorSetAllocator, like replayed images which skip the allocator to write to the set directly. + // we won't cleanup + multi_dealloc in this case, instead we queue the deallocations and wait for them before any next image writes into the same index. + if (!imageUsedForNextIntendedSubmit) + deferredDescriptorIndexDeallocations[evicted.arrayIndex] = ISemaphore::SWaitInfo{ .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + else + { + m_logger.log(std::format("Image which is being evicted and had skipped descriptor set allocator requires overflow submit; This shouldn't happen. Image Info = {}", evicted.toString(imageID)).c_str(), nbl::system::ILogger::ELL_ERROR); + imagesCache->logState(m_logger); + } + } } @@ -2346,8 +2812,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc { ImageAllocateResults ret = {}; - auto* device = m_utilities->getLogicalDevice(); - auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + auto* physDev = m_device->getPhysicalDevice(); bool alreadyBlockedForDeferredFrees = false; @@ -2365,7 +2830,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc params.viewFormats.set(static_cast(imageViewFormatOverride), true); params.flags |= asset::IImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; } - auto gpuImage = device->createImage(std::move(params)); + auto gpuImage = m_device->createImage(std::move(params)); if (gpuImage) { @@ -2378,7 +2843,11 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc if (imageMemoryRequirementsMatch) { + // OutputDebugStringA(std::format("ALlocating {} !!!!\n", gpuImageMemoryRequirements.size).c_str()); + // m_logger.log(std::format(" [BEFORE] Allocator Free Size={} \n",imagesMemorySubAllocator->getFreeSize()).c_str(), nbl::system::ILogger::ELL_INFO); ret.allocationOffset = imagesMemorySubAllocator->allocate(gpuImageMemoryRequirements.size, 1u << gpuImageMemoryRequirements.alignmentLog2); + // m_logger.log(std::format(" [AFTER] Alloc Size = {}, Alloc Offset = {}, Alignment = {} \n",gpuImageMemoryRequirements.size, ret.allocationOffset, 1u << gpuImageMemoryRequirements.alignmentLog2).c_str(), nbl::system::ILogger::ELL_INFO); + // m_logger.log(std::format(" [AFTER] Allocator Free Size={} \n",imagesMemorySubAllocator->getFreeSize()).c_str(), nbl::system::ILogger::ELL_INFO); const bool allocationFromImagesMemoryArenaSuccessfull = ret.allocationOffset != ImagesMemorySubAllocator::InvalidAddress; if (allocationFromImagesMemoryArenaSuccessfull) { @@ -2388,7 +2857,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc .image = gpuImage.get(), .binding = { .memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + ret.allocationOffset } }; - const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); + const bool boundToMemorySuccessfully = m_device->bindImageMemory({ &bindImageMemoryInfo, 1u }); if (boundToMemorySuccessfully) { gpuImage->setObjectDebugName(imageDebugName.c_str()); @@ -2408,7 +2877,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc viewParams.components.a = nbl::asset::IImageViewBase::SComponentMapping::E_SWIZZLE::ES_ONE; } - ret.gpuImageView = device->createImageView(std::move(viewParams)); + ret.gpuImageView = m_device->createImageView(std::move(viewParams)); if (ret.gpuImageView) { // SUCCESS! @@ -2434,7 +2903,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc } else { - // printf(std::format("Allocation Failed, Trying again, ImageID={} Size={} \n", imageID, gpuImageMemoryRequirements.size).c_str()); + m_logger.log(std::format("Retrying Allocation after failure with Allocation Size={}, Allocator Free Size={} \n", gpuImageMemoryRequirements.size, imagesMemorySubAllocator->getFreeSize()).c_str(), nbl::system::ILogger::ELL_INFO); // recoverable error when allocation fails, we don't log anything, next code will try evicting other images and retry } } @@ -2476,7 +2945,7 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc imagesCache->erase(evictionCandidate); } - while (suballocatedDescriptorSet->cull_frees()) {}; // to make sure deallocation requests in eviction callback are blocked for. + while (imagesDescriptorIndexAllocator->cull_frees()) {}; // to make sure deallocation requests in eviction callback are blocked for. alreadyBlockedForDeferredFrees = true; // we don't hold any references to the GPUImageView or GPUImage so descriptor binding will be the last reference @@ -2486,37 +2955,6 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc return ret; } -void DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams) -{ - // Decide whether the image can reside fully into memory rather than get streamed. - // TODO: Improve logic, currently just a simple check to see if the full-screen image has more pixels that viewport or not - // TODO: add criterial that the size of the full-res image shouldn't consume more than 30% of the total memory arena for images (if we allowed larger than viewport extents) - const bool betterToResideFullyInMem = georeferencedImageParams.imageExtents.x * georeferencedImageParams.imageExtents.y <= georeferencedImageParams.viewportExtents.x * georeferencedImageParams.viewportExtents.y; - - if (betterToResideFullyInMem) - outImageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; - else - outImageType = ImageType::GEOREFERENCED_STREAMED; - - outImageParams.type = asset::IImage::ET_2D; - outImageParams.samples = asset::IImage::ESCF_1_BIT; - outImageParams.format = georeferencedImageParams.format; - - if (outImageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) - { - outImageParams.extent = { georeferencedImageParams.imageExtents.x, georeferencedImageParams.imageExtents.y, 1u }; - } - else - { - // TODO: Better Logic, area around the view, etc... - outImageParams.extent = { georeferencedImageParams.viewportExtents.x, georeferencedImageParams.viewportExtents.y, 1u }; - } - - - outImageParams.mipLevels = 1u; // TODO: Later do mipmapping - outImageParams.arrayLayers = 1u; -} - void DrawResourcesFiller::setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func) { getGlyphMSDF = func; @@ -2529,6 +2967,7 @@ void DrawResourcesFiller::setHatchFillMSDFTextureFunction(const GetHatchFillPatt void DrawResourcesFiller::markFrameUsageComplete(uint64_t drawSubmitWaitValue) { + // m_logger.log(std::format("Finished Frame Idx = {}", currentFrameIndex).c_str(), nbl::system::ILogger::ELL_INFO); currentFrameIndex++; // TODO[LATER]: take into account that currentFrameIndex was submitted with drawSubmitWaitValue; Use that value when deallocating the resources marked with this frame index // Currently, for evictions the worst case value will be waited for, as there is no way yet to know which semaphoroe value will signal the completion of the (to be evicted) resource's usage diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index 547926767..3b0e0c4bb 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -1,18 +1,33 @@ +/******************************************************************************/ +/* DrawResourcesFiller: This class provides important functionality to manage resources needed for a draw. +/******************************************************************************/ #pragma once + +#if __has_include("glm/glm/glm.hpp") // legacy +#include "glm/glm/glm.hpp" +#else +#include "glm/glm.hpp" // new build system +#endif +#include +#include +#include +#include +#include #include "Polyline.h" -#include "CTriangleMesh.h" #include "Hatch.h" #include "IndexAllocator.h" -#include "Images.h" #include -#include +#include "CTriangleMesh.h" +#include "Shaders/globals.hlsl" +#include "Images.h" + +//#include #include -// #include + using namespace nbl; using namespace nbl::video; using namespace nbl::core; using namespace nbl::asset; -using namespace nbl::ext::TextRendering; static_assert(sizeof(DrawObject) == 16u); static_assert(sizeof(MainObject) == 20u); @@ -120,13 +135,30 @@ struct DrawResourcesFiller geometryInfo.getAlignedStorageSize(); } }; + + // @brief Register a loader + void setGeoreferencedImageLoader(core::smart_refctd_ptr&& _imageLoader) + { + imageLoader = _imageLoader; + } + + uint32_t2 queryGeoreferencedImageExtents(std::filesystem::path imagePath) + { + return imageLoader->getExtents(imagePath); + } + + asset::E_FORMAT queryGeoreferencedImageFormat(std::filesystem::path imagePath) + { + return imageLoader->getFormat(imagePath); + } DrawResourcesFiller(); - DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue, core::smart_refctd_ptr&& logger); + DrawResourcesFiller(smart_refctd_ptr&& device, smart_refctd_ptr&& bufferUploadUtils, smart_refctd_ptr&& imageUploadUtils, IQueue* copyQueue, core::smart_refctd_ptr&& logger); typedef std::function SubmitFunc; void setSubmitDrawsFunction(const SubmitFunc& func); + // DrawResourcesFiller needs to access these in order to allocate GPUImages and write the to their correct descriptor set binding void setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr&& descriptorSet, uint32_t binding); @@ -147,10 +179,11 @@ struct DrawResourcesFiller * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation. * @param requiredImageMemorySize The size in bytes of the memory required for images. * @param requiredBufferMemorySize The size in bytes of the memory required for buffers. + * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried. * * @return true if the memory allocation and resource setup succeeded; false otherwise. */ - bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize); + bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize, std::span memoryTypeIndexTryOrder); /** * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure. @@ -162,12 +195,13 @@ struct DrawResourcesFiller * @param logicalDevice Pointer to the logical device used for allocation. * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with. * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with. + * @param memoryTypeIndexTryOrder Ordered list of memory type indices to attempt allocation with, in the order they should be tried. * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%). * @param maxTries Maximum number of attempts to try reducing and allocating memory. * * @return true if the allocation succeeded at any iteration; false if all attempts failed. */ - bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u); + bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, std::span memoryTypeIndexTryOrder, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u); bool allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent); @@ -323,35 +357,103 @@ struct DrawResourcesFiller */ bool ensureMultipleStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit); + // This function must be called immediately after `addStaticImage` for the same imageID. + void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); + + /* + Georeferenced Image Functions: + */ + /** - * @brief Ensures a GPU-resident georeferenced image exists in the cache, allocating resources if necessary. - * - * If the specified image ID is not already present in the cache, or if the cached version is incompatible - * with the requested parameters (e.g. extent, format, or type), this function allocates GPU memory, - * creates the image and its view, to be bound to a descriptor binding in the future. - * - * If the image already exists and matches the requested parameters, its usage metadata is updated. - * In either case, the cache is updated to reflect usage in the current frame. + * @brief Computes the recommended GPU image extents for streamed (georeferenced) imagery. * - * This function also handles automatic eviction of old images via an LRU policy when space is limited. + * This function estimates the required GPU-side image size to safely cover the current viewport, accounting for: + * - Full coverage of twice the viewport at mip 0 + * - Arbitrary rotation (by considering the diagonal) + * - Padding * - * @param imageID Unique identifier of the image to add or reuse. - * @param params Georeferenced Image Params - * @param intendedNextSubmit Submit info object used to track resources pending GPU submission. + * The resulting size is always rounded up to a multiple of the georeferenced tile size. * - * @return true if the image was successfully cached and is ready for use; false if allocation failed. - * [TODO]: should be internal protected member function. - */ - bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); + * @param viewportExtents The width and height of the viewport in pixels. + * @return A uint32_t2 representing the GPU image width and height for streamed imagery. + */ + static uint32_t2 computeStreamingImageExtentsForViewportCoverage(const uint32_t2 viewportExtents); + + /** + * @brief Creates a streaming state for a georeferenced image. + * + * This function prepares the required state for streaming and rendering a georeferenced image. + * + * WARNING: User should make sure to: + * - Transforms the OBB into world space if custom projections (such as dwg/symbols) are active. + * + * Specifically, this function: + * - Builds a new GeoreferencedImageStreamingState for the given image ID, OBB, and storage path. + * - Looks up image info such as format and extents from the registered loader and the storage path + * - Updates the returned state with current viewport. + * + * @note The returned state is not managed by the cache. The caller is responsible for + * storing it and passing the same state to subsequent streaming and draw functions. + * + * this function does **not** insert the image into the internal cache, because doing so could lead to + * premature eviction (either of this image or of another resource) before the draw call is made. + * + * @param imageID Unique identifier of the image. + * @param worldspaceOBB Oriented bounding box of the image in world space. + * @param viewportExtent Extent of the current viewport in pixels. + * @param ndcToWorldMat 3x3 matrix transforming NDC coordinates to world coordinates. + * @param storagePath Filesystem path where the image data is stored. + * @return A GeoreferencedImageStreamingState object initialized for this image. + */ + nbl::core::smart_refctd_ptr ensureGeoreferencedImageEntry(image_id imageID, const OrientedBoundingBox2D& worldSpaceOBB, const uint32_t2 currentViewportExtents, const float64_t3x3& ndcToWorldMat, const std::filesystem::path& storagePath); - // [TODO]: should be internal protected member function. - bool queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy); + /** + * @brief Launches tile loading for a cached georeferenced image. + * + * Queues all tiles visible in the current viewport for GPU upload. + * + * The work includes: + * - Calculating visible tile coverage from the OBB and viewport. + * - Loading the necessary tiles from disk via the registered `imageLoader`. + * - Preparing staging buffers and `IImage::SBufferCopy` upload regions for GPU transfer. + * - Appending the upload commands into `streamedImageCopies` for later execution. + * - Updating the state's tile occupancy map to reflect newly resident tiles. + * + * Context: this function is dedicated to streaming tiles for georeferenced images only. + * This function should be called anywhere between `ensureGeoreferencedImageEntry` and `finalizeGeoreferencedImageTileLoads` + * But It's prefered to start loading as soon as possible to hide the latency of loading tiles from disk. + * + * @note The `imageStreamingState` passed in must be exactly the one returned by `ensureGeoreferencedImageEntry` with same image_id. Passing a stale or unrelated state is undefined. + * @note This function only queues uploads; GPU transfer happens later when queued copies are executed. + * + * @param imageID Unique identifier of the image. + * @param imageStreamingState Reference to the GeoreferencedImageStreamingState created or returned by `ensureGeoreferencedImageEntry` with same image_id. + */ + bool launchGeoreferencedImageTileLoads(image_id imageID, GeoreferencedImageStreamingState* imageStreamingState, const WorldClipRect clipRect); - // This function must be called immediately after `addStaticImage` for the same imageID. - void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); + bool cancelGeoreferencedImageTileLoads(image_id imageID); + + /** + * @brief Issue Drawing a GeoreferencedImage + * + * Ensures streaming resources are allocated, computes addressing and positioning info (OBB and min/max UV), and pushes the image info to the geometry buffer for rendering. + * + * This function should be called anywhere between `ensureGeoreferencedImageEntry` and `finalizeGeoreferencedImageTileLoads` + * + * @note The `imageStreamingState` must be the one returned by `ensureGeoreferencedImageEntry`. + * + * @param imageID Unique identifier of the image. + * @param imageStreamingState Reference to the GeoreferencedImageStreamingState created or returned by `ensureGeoreferencedImageEntry` with same image_id. + * @param intendedNextSubmit Submission info describing synchronization and barriers for the next batch. + */ + void drawGeoreferencedImage(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit); - // This function must be called immediately after `addStaticImage` for the same imageID. - void addGeoreferencedImage(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); + /** + * @brief copies the queued up streamed copies. + * @note call this function after `drawGeoreferencedImage` to make sure there is a gpu resource to copy to. + * @because`drawGeoreferencedImage` internally calls `ensureGeoreferencedImageResources_AllocateIfNeeded` + */ + bool finalizeGeoreferencedImageTileLoads(SIntendedSubmitInfo& intendedNextSubmit); /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. @@ -488,6 +590,49 @@ struct DrawResourcesFiller /// Must be called once per corresponding `pushReplayCacheUse()`. void unsetReplayCache(); + uint64_t getImagesMemoryConsumption() const; + + struct UsageData + { + uint32_t lineStyleCount = 0u; + uint32_t dtmSettingsCount = 0u; + uint32_t customProjectionsCount = 0u; + uint32_t mainObjectCount = 0u; + uint32_t drawObjectCount = 0u; + uint32_t geometryBufferSize = 0u; + uint64_t bufferMemoryConsumption = 0ull; + uint64_t imageMemoryConsumption = 0ull; + + void add(const UsageData& other) + { + lineStyleCount += other.lineStyleCount; + dtmSettingsCount += other.dtmSettingsCount; + customProjectionsCount += other.customProjectionsCount; + mainObjectCount += other.mainObjectCount; + drawObjectCount += other.drawObjectCount; + geometryBufferSize += other.geometryBufferSize; + bufferMemoryConsumption = nbl::hlsl::max(bufferMemoryConsumption, other.bufferMemoryConsumption); + imageMemoryConsumption = nbl::hlsl::max(imageMemoryConsumption, other.imageMemoryConsumption); + } + + std::string toString() const + { + std::ostringstream oss; + oss << "Usage Data:\n"; + oss << " lineStyles (Count): " << lineStyleCount << "\n"; + oss << " dtmSettings (Count): " << dtmSettingsCount << "\n"; + oss << " customProjections (Count): " << customProjectionsCount << "\n"; + oss << " mainObject (Count): " << mainObjectCount << "\n"; + oss << " drawObject (Count): " << drawObjectCount << "\n"; + oss << " geometryBufferSize (Bytes): " << geometryBufferSize << "\n"; + oss << " Max Buffer Memory Consumption (Bytes): " << bufferMemoryConsumption << "\n"; + oss << " Max Image Memory Consumption (Bytes):" << imageMemoryConsumption; + return oss.str(); + } + }; + + UsageData getCurrentUsageData(); + protected: SubmitFunc submitDraws; @@ -499,13 +644,46 @@ struct DrawResourcesFiller bool pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& msdfImagesState); /// @brief binds cached images into their correct descriptor set slot if not already resident. - bool bindImagesToArrayIndices(ImagesCache& imagesCache); + bool updateDescriptorSetImageBindings(ImagesCache& imagesCache); /// @brief Records GPU copy commands for all staged images into the active command buffer. bool pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, ImagesCache& imagesCache); - /// @brief copies the queued up streamed copies. - bool pushStreamedImagesUploads(SIntendedSubmitInfo& intendedNextSubmit); + /// @brief Handles eviction of images with conflicting memory regions or array indices in cache & replay mode. + /// + /// In cache & replay mode, image allocations bypass the standard arena allocator and are rebound + /// to their original GPU memory locations. Since we can't depend on the allocator to avoid conflicting memory location, + /// this function scans the image cache for potential overlaps with the given image and evicts any conflicting entries, submitting work if necessary. + /// + /// @param toInsertImageID Identifier of the image being inserted. + /// @param toInsertRecord Record describing the image and its intended memory placement. + /// @param intendedNextSubmit Reference to the intended GPU submit info; may be used if eviction requires submission. + /// @return true if something was evicted, false otherwise + bool evictConflictingImagesInCache_SubmitIfNeeded(image_id toInsertImageID, const CachedImageRecord& toInsertRecord, nbl::video::SIntendedSubmitInfo& intendedNextSubmit); + + /* + GeoreferencesImage Protected Functions: + */ + + /** + * @brief Ensures a GPU-resident georeferenced image exists in the cache, allocating resources if necessary. + * + * If the specified image ID is not already present in the cache, or if the cached version is incompatible + * with the requested parameters (e.g. extent, format, or type), this function allocates GPU memory, + * creates the image and its view, to be bound to a descriptor binding in the future. + * + * If the image already exists and matches the requested parameters, its usage metadata is updated. + * In either case, the cache is updated to reflect usage in the current frame. + * + * This function also handles automatic eviction of old images via an LRU policy when space is limited. + * + * @param imageID Unique identifier of the image to add or reuse. + * @param imageStreamingState Reference to the GeoreferencedImageStreamingState created or returned by `ensureGeoreferencedImageEntry` with same image_id. + * @param intendedNextSubmit Submit info object used to track resources pending GPU submission. + * + * @return true if the image was successfully cached and is ready for use; false if allocation failed. + */ + bool ensureGeoreferencedImageResources_AllocateIfNeeded(image_id imageID, nbl::core::smart_refctd_ptr&& imageStreamingState, SIntendedSubmitInfo& intendedNextSubmit); const size_t calculateRemainingResourcesSize() const; @@ -596,7 +774,7 @@ struct DrawResourcesFiller bool addImageObject_Internal(const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx);; /// Attempts to upload a georeferenced image info considering resource limitations (not accounting for the resource image added using ensureStaticImageAvailability function) - bool addGeoreferencedImageInfo_Internal(const GeoreferencedImageInfo& georeferencedImageInfo, uint32_t mainObjIdx);; + bool addGeoreferencedImageInfo_Internal(const GeoreferencedImageInfo& georeferencedImageInfo, uint32_t mainObjIdx); uint32_t getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit); @@ -651,19 +829,6 @@ struct DrawResourcesFiller nbl::video::SIntendedSubmitInfo& intendedNextSubmit, std::string imageDebugName); - /** - * @brief Determines creation parameters for a georeferenced image based on heuristics. - * - * This function decides whether a georeferenced image should be treated as a fully resident GPU texture - * or as a streamable image based on the relationship between its total resolution and the viewport size. - * It then fills out the appropriate Nabla image creation parameters. - * - * @param[out] outImageParams Structure to be filled with image creation parameters (format, size, etc.). - * @param[out] outImageType Indicates whether the image should be fully resident or streamed. - * @param[in] georeferencedImageParams Parameters describing the full image extents, viewport extents, and format. - */ - void determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams); - /** * @brief Used to implement both `drawHatch` and `drawFixedGeometryHatch` without exposing the transformation type parameter */ @@ -744,9 +909,9 @@ struct DrawResourcesFiller { computeBlake3Hash(); } - bool operator==(const MSDFInputInfo& rhs) const - { return hash == rhs.hash && glyphIndex == rhs.glyphIndex && type == rhs.type; + { + return hash == rhs.hash && glyphIndex == rhs.glyphIndex && type == rhs.type; } MSDFType type; @@ -762,7 +927,6 @@ struct DrawResourcesFiller core::blake3_hash_t hash = {}; // actual hash, we will check in == operator size_t lookupHash = 0ull; // for containers expecting size_t hash - private: void computeBlake3Hash() @@ -795,7 +959,7 @@ struct DrawResourcesFiller uint32_t getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit); uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); - + // Flushes Current Draw Call and adds to drawCalls void flushDrawObjects(); @@ -818,12 +982,11 @@ struct DrawResourcesFiller nbl::core::smart_refctd_ptr resourcesGPUBuffer; size_t copiedResourcesSize; - // GPUImages Memory Arena + AddressAllocator - IDeviceMemoryAllocator::SAllocation imagesMemoryArena; - smart_refctd_ptr imagesMemorySubAllocator; - // Members - smart_refctd_ptr m_utilities; + smart_refctd_ptr m_device; + core::smart_refctd_ptr m_bufferUploadUtils; + core::smart_refctd_ptr m_imageUploadUtils; + IQueue* m_copyQueue; // Active Resources we need to keep track of and push to resources buffer if needed. @@ -858,10 +1021,19 @@ struct DrawResourcesFiller bool m_hasInitializedMSDFTextureArrays = false; // Images: + core::smart_refctd_ptr imageLoader; + // A. Image Cache std::unique_ptr imagesCache; - smart_refctd_ptr suballocatedDescriptorSet; + // B. GPUImages Memory Arena + AddressAllocator + IDeviceMemoryAllocator::SAllocation imagesMemoryArena; + smart_refctd_ptr imagesMemorySubAllocator; + // C. Images Descriptor Set Allocation/Deallocation uint32_t imagesArrayBinding = 0u; - + smart_refctd_ptr imagesDescriptorIndexAllocator; + // Tracks descriptor array indices that have been logically deallocated independant of the `imagesDescriptorSetAllocator` but may still be in use by the GPU. + // Notes: If `imagesDescriptorIndexAllocator` could give us functionality to force allocate and exact index, that would allow us to replay the cache perfectly + // remove the variable below and only rely on the `imagesDescriptorIndexAllocator` to synchronize accesses to descriptor sets for us. but unfortuantely it doesn't have that functionality yet. + std::unordered_map deferredDescriptorIndexDeallocations; + // D. Queued Up Copies/Futures for Streamed Images std::unordered_map> streamedImageCopies; -}; - +}; \ No newline at end of file diff --git a/62_CAD/GeoTexture.cpp b/62_CAD/GeoTexture.cpp deleted file mode 100644 index de8a974d0..000000000 --- a/62_CAD/GeoTexture.cpp +++ /dev/null @@ -1,117 +0,0 @@ -#include "GeoTexture.h" - -bool GeoTextureRenderer::initialize( - IShader* vertexShader, - IShader* fragmentShader, - IGPURenderpass* compatibleRenderPass, - const smart_refctd_ptr& globalsBuffer) -{ - video::IGPUDescriptorSetLayout::SBinding bindingsSet0[] = { - { - .binding = 0u, - .type = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - }; - m_descriptorSetLayout0 = m_device->createDescriptorSetLayout(bindingsSet0); - if (!m_descriptorSetLayout0) - return logFail("Failed to Create Descriptor Layout 0"); - - video::IGPUDescriptorSetLayout::SBinding bindingsSet1[] = { - { - .binding = 0u, - .type = asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - { - .binding = 1u, - .type = asset::IDescriptor::E_TYPE::ET_SAMPLER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - }; - m_descriptorSetLayout1 = m_device->createDescriptorSetLayout(bindingsSet1); - if (!m_descriptorSetLayout1) - return logFail("Failed to Create Descriptor Layout 1"); - - const video::IGPUDescriptorSetLayout* const layouts[2u] = { m_descriptorSetLayout0.get(), m_descriptorSetLayout1.get() }; - - { - const uint32_t setCounts[2u] = { 1u, MaxGeoTextures}; - m_descriptorPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); - if (!m_descriptorPool) - return logFail("Failed to Create Descriptor Pool"); - } - - - asset::SPushConstantRange pushConstantRanges[1u] = - { - {.stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, .offset = 0ull, .size = sizeof(GeoTextureOBB)}, - }; - m_pipelineLayout = m_device->createPipelineLayout(pushConstantRanges, core::smart_refctd_ptr(m_descriptorSetLayout0), core::smart_refctd_ptr(m_descriptorSetLayout1), nullptr, nullptr); - - // Set 0 Create and Bind - m_descriptorSet0 = m_descriptorPool->createDescriptorSet(smart_refctd_ptr(m_descriptorSetLayout0)); - constexpr uint32_t DescriptorCountSet0 = 1u; - IGPUDescriptorSet::SDescriptorInfo descriptorInfosSet0[DescriptorCountSet0] = {}; - - descriptorInfosSet0[0u].info.buffer.offset = 0u; - descriptorInfosSet0[0u].info.buffer.size = globalsBuffer->getCreationParams().size; - descriptorInfosSet0[0u].desc = globalsBuffer; - - constexpr uint32_t DescriptorUpdatesCount = DescriptorCountSet0; - video::IGPUDescriptorSet::SWriteDescriptorSet descriptorUpdates[DescriptorUpdatesCount] = {}; - - descriptorUpdates[0u].dstSet = m_descriptorSet0.get(); - descriptorUpdates[0u].binding = 0u; - descriptorUpdates[0u].arrayElement = 0u; - descriptorUpdates[0u].count = 1u; - descriptorUpdates[0u].info = &descriptorInfosSet0[0u]; - m_device->updateDescriptorSets(DescriptorUpdatesCount, descriptorUpdates, 0u, nullptr); - - // Shared Blend Params between pipelines - //TODO: Where does GeoTexture rendering fit into pipelines, separate renderpass? separate submit? under blending? over blending? - SBlendParams blendParams = {}; - blendParams.blendParams[0u].srcColorFactor = asset::EBF_SRC_ALPHA; - blendParams.blendParams[0u].dstColorFactor = asset::EBF_ONE_MINUS_SRC_ALPHA; - blendParams.blendParams[0u].colorBlendOp = asset::EBO_ADD; - blendParams.blendParams[0u].srcAlphaFactor = asset::EBF_ONE; - blendParams.blendParams[0u].dstAlphaFactor = asset::EBF_ZERO; - blendParams.blendParams[0u].alphaBlendOp = asset::EBO_ADD; - blendParams.blendParams[0u].colorWriteMask = (1u << 4u) - 1u; - - // Create Main Graphics Pipelines - { - video::IGPUPipelineBase::SShaderSpecInfo specInfo[2] = { - {.shader=vertexShader }, - {.shader=fragmentShader }, - }; - - IGPUGraphicsPipeline::SCreationParams params[1] = {}; - params[0].layout = m_pipelineLayout.get(); - params[0].vertexShader = specInfo[0]; - params[0].fragmentShader = specInfo[1]; - params[0].cached = { - .vertexInput = {}, - .primitiveAssembly = { - .primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST, - }, - .rasterization = { - .polygonMode = EPM_FILL, - .faceCullingMode = EFCM_NONE, - .depthWriteEnable = false, - }, - .blend = blendParams, - }; - params[0].renderpass = compatibleRenderPass; - - if (!m_device->createGraphicsPipelines(nullptr,params,&m_graphicsPipeline)) - return logFail("Graphics Pipeline Creation Failed."); - } - -} diff --git a/62_CAD/GeoTexture.h b/62_CAD/GeoTexture.h deleted file mode 100644 index f471009fc..000000000 --- a/62_CAD/GeoTexture.h +++ /dev/null @@ -1,64 +0,0 @@ -#pragma once - -using namespace nbl::hlsl; -#include "shaders/geotexture/common.hlsl" - -using namespace nbl; -using namespace core; -using namespace system; -using namespace asset; -using namespace ui; -using namespace video; - -class GeoTexture : public nbl::core::IReferenceCounted -{ - GeoTextureOBB obbInfo = {}; - smart_refctd_ptr descriptorSet; // or index allocated in main geo texture renderer - smart_refctd_ptr texture; -}; - -class GeoTextureRenderer -{ -public: - static constexpr const char* VertexShaderRelativePath = "../shaders/geotexture/vertex_shader.hlsl"; - static constexpr const char* FragmentShaderRelativePath = "../shaders/geotexture/fragment_shader.hlsl"; - - GeoTextureRenderer(smart_refctd_ptr&& device, smart_refctd_ptr&& logger) - : m_device(device) - , m_logger(logger) - {} - - bool initialize( - IShader* vertexShader, - IShader* fragmentShader, - IGPURenderpass* compatibleRenderPass, - const smart_refctd_ptr& globalsBuffer); - - void createGeoTexture(const nbl::system::path& geoTexturePath); // + OBB Info (center, rotation, aspect ratio from image?) - - void bindPipeline(video::IGPUCommandBuffer* commandBuffer); - - void drawGeoTexture(const GeoTexture* geoTexture, video::IGPUCommandBuffer* commandBuffer); - -private: - - // made it return false so we can save some lines writing `if (failCond) {logFail(); return false;}` - template - inline bool logFail(const char* msg, Args&&... args) - { - m_logger->log(msg,system::ILogger::ELL_ERROR,std::forward(args)...); - return false; - } - -private: - smart_refctd_ptr m_device; - smart_refctd_ptr m_logger; - - smart_refctd_ptr m_pipelineLayout; - smart_refctd_ptr m_graphicsPipeline; - smart_refctd_ptr m_sampler; - smart_refctd_ptr m_descriptorPool; - smart_refctd_ptr m_descriptorSetLayout0; // globals - smart_refctd_ptr m_descriptorSet0; - smart_refctd_ptr m_descriptorSetLayout1; // contains geo texture -}; diff --git a/62_CAD/Hatch.cpp b/62_CAD/Hatch.cpp index cfb10b9af..b383e7d81 100644 --- a/62_CAD/Hatch.cpp +++ b/62_CAD/Hatch.cpp @@ -812,7 +812,7 @@ static constexpr float64_t FillPatternShapeExtent = 32.0; void line(std::vector& polylines, float64_t2 begin, float64_t2 end) { - std::vector points = { + std::array points = { begin, end }; CPolyline polyline; @@ -846,15 +846,23 @@ void checkered(std::vector& polylines, const float64_t2& offset) float64_t2(0.0, 1.0), }; { - std::vector points; - points.reserve(squarePointsCW.size()); - for (const auto& p : squarePointsCW) points.push_back(p * FillPatternShapeExtent + offset); + std::array points; + auto i = 0u; + for (const auto& p : squarePointsCW) + { + points[i] = p * FillPatternShapeExtent + offset; + i++; + } polyline.addLinePoints(points); } { - std::vector points; - points.reserve(squarePointsCW.size()); - for (const auto& p : squarePointsCW) points.push_back((p + float64_t2(0.5, -0.5)) * FillPatternShapeExtent + offset); + std::array points; + auto i = 0u; + for (const auto& p : squarePointsCW) + { + points[i] = (p + float64_t2(0.5, -0.5)) * FillPatternShapeExtent + offset; + i++; + } polyline.addLinePoints(points); } polylines.push_back(std::move(polyline)); @@ -885,16 +893,24 @@ void diamonds(std::vector& polylines, const float64_t2& offset) // Outer { - std::vector points; - points.reserve(diamondPointsCW.size()); - for (const auto& p : diamondPointsCW) points.push_back(p * outerSize + origin); + std::array points; + auto i = 0u; + for (const auto& p : diamondPointsCW) + { + points[i] = p * outerSize + origin; + i++; + } polyline.addLinePoints(points); } // Inner { - std::vector points; - points.reserve(diamondPointsCCW.size()); - for (const auto& p : diamondPointsCCW) points.push_back(p * innerSize + origin); + std::array points; + auto i = 0u; + for (const auto& p : diamondPointsCCW) + { + points[i] = p * innerSize + origin; + i++; + } polyline.addLinePoints(points); } polylines.push_back(std::move(polyline)); @@ -915,9 +931,13 @@ void crossHatch(std::vector& polylines, const float64_t2& offset) float64_t2(0.375, 0.0), }; { - std::vector points; - points.reserve(outerPointsCW.size()); - for (const auto& p : outerPointsCW) points.push_back(p * FillPatternShapeExtent + offset); + std::array points; + auto i = 0u; + for (const auto& p : outerPointsCW) + { + points[i] = p * FillPatternShapeExtent + offset; + i++; + } polyline.addLinePoints(points); } @@ -930,9 +950,13 @@ void crossHatch(std::vector& polylines, const float64_t2& offset) }; { float64_t2 origin = float64_t2(FillPatternShapeExtent/2.0, FillPatternShapeExtent/2.0) + offset; - std::vector points; - points.reserve(diamondPointsCCW.size()); - for (const auto& p : diamondPointsCCW) points.push_back(p * 0.75 * FillPatternShapeExtent + origin); + std::array points; + auto i = 0u; + for (const auto& p : diamondPointsCCW) + { + points[i] = p * 0.75 * FillPatternShapeExtent + origin; + i++; + } polyline.addLinePoints(points); } polylines.push_back(std::move(polyline)); @@ -948,7 +972,7 @@ void hatch(std::vector& polylines, const float64_t2& offset) { float64_t2 radiusOffsetTL = float64_t2(+lineDiameter / 2.0, +lineDiameter / 2.0) * FillPatternShapeExtent / 8.0; float64_t2 radiusOffsetBL = float64_t2(-lineDiameter / 2.0, -lineDiameter / 2.0) * FillPatternShapeExtent / 8.0; - std::vector points = { + std::array points = { basePt0 + radiusOffsetTL, basePt0 + radiusOffsetBL, // 0 basePt1 + radiusOffsetBL, // 1 @@ -1052,7 +1076,7 @@ void reverseHatch(std::vector& polylines, const float64_t2& offset) { float64_t2 radiusOffsetTL = float64_t2(-lineDiameter / 2.0, +lineDiameter / 2.0) * FillPatternShapeExtent / 8.0; float64_t2 radiusOffsetBL = float64_t2(+lineDiameter / 2.0, -lineDiameter / 2.0) * FillPatternShapeExtent / 8.0; - std::vector points = { + std::array points = { basePt0 + radiusOffsetTL, basePt1 + radiusOffsetTL, // 0 basePt1 + radiusOffsetBL, // 1 diff --git a/62_CAD/Images.cpp b/62_CAD/Images.cpp new file mode 100644 index 000000000..f29ba1f61 --- /dev/null +++ b/62_CAD/Images.cpp @@ -0,0 +1,396 @@ +#include "Images.h" + +using namespace nbl::hlsl; + +ImageCleanup::ImageCleanup() + : imagesMemorySuballocator(nullptr) + , addr(ImagesMemorySubAllocator::InvalidAddress) + , size(0ull) +{ +} + +ImageCleanup::~ImageCleanup() +{ + // printf(std::format("Actual Eviction size={}, offset={} \n", size, addr).c_str()); + if (imagesMemorySuballocator && addr != ImagesMemorySubAllocator::InvalidAddress) + imagesMemorySuballocator->deallocate(addr, size); +} + +bool GeoreferencedImageStreamingState::init(const OrientedBoundingBox2D& worldspaceOBB, const uint32_t2 fullResImageExtents, const asset::E_FORMAT sourceImageFormat, const std::filesystem::path& storagePath) +{ + this->worldspaceOBB = std::move(worldspaceOBB); + this->fullResImageExtents = fullResImageExtents; + this->sourceImageFormat = sourceImageFormat; + this->storagePath = storagePath; + // 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point + // 2. Transform this displacement vector into the coordinates in the basis {dirU, dirV} (worldspace vectors that span the sides of the image). + // The composition of these matrices therefore transforms any point in worldspace into uv coordinates in imagespace + // To reduce code complexity, instead of computing the product of these matrices, since the first is a pure displacement matrix + // (non-homogenous 2x2 upper left is identity matrix) and the other is a pure rotation matrix (2x2) we can just put them together + // by putting the rotation in the upper left 2x2 of the result and the post-rotated displacement in the upper right 2x1. + // The result is also 2x3 and not 3x3 because we can drop he homogenous since the displacement yields a vector + + // 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression + // Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal) + const float64_t2 dirU = this->worldspaceOBB.dirU; + const float64_t2 dirV = float64_t2(dirU.y, -dirU.x) * float64_t(this->worldspaceOBB.aspectRatio); + const float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU); + const float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV); + const float64_t2 firstRow = dirU / dirULengthSquared; + const float64_t2 secondRow = dirV / dirVLengthSquared; + + const float64_t2 displacement = -(this->worldspaceOBB.topLeft); + // This is the same as multiplying the change of basis matrix by the displacement vector + const float64_t postRotatedShiftX = nbl::hlsl::dot(firstRow, displacement); + const float64_t postRotatedShiftY = nbl::hlsl::dot(secondRow, displacement); + + // Put them all together + this->worldToUV = float64_t2x3(firstRow.x, firstRow.y, postRotatedShiftX, secondRow.x, secondRow.y, postRotatedShiftY); + + // Also set the maxMipLevel - to keep stuff simple, we don't consider having less than one tile per dimension + // If you're zoomed out enough then at that point the whole image is just sampled as one tile along that dimension + // In pathological cases, such as images that are way bigger on one side than the other, this could cause aliasing and slow down sampling if zoomed out too much. + // If we were ever to observe such pathological cases, then maybe we should consider doing something else here. For example, making the loader able to handle different tile lengths per dimension + // (so for example a 128x64 tile) but again for now it should be left as-is. + uint32_t2 maxMipLevels = nbl::hlsl::findMSB(nbl::hlsl::roundUpToPoT(this->fullResImageExtents / GeoreferencedImageTileSize)); + this->maxMipLevel = nbl::hlsl::min(maxMipLevels.x, maxMipLevels.y); + + this->fullImageTileLength = (this->fullResImageExtents - 1u) / GeoreferencedImageTileSize + 1u; + + return true; +} + +void GeoreferencedImageStreamingState::updateStreamingStateForViewport(const uint32_t2 viewportExtent, const float64_t3x3& ndcToWorldMat) +{ + currentViewportTileRange = computeViewportTileRange(viewportExtent, ndcToWorldMat); + // Slide or remap the current mapped region to ensure the viewport falls inside it + ensureMappedRegionCoversViewport(currentViewportTileRange); + + const uint32_t2 lastTileIndex = getLastTileIndex(currentViewportTileRange.baseMipLevel); + const uint32_t2 lastTileSampligOffsetMip0 = (lastTileIndex * GeoreferencedImageTileSize) << currentViewportTileRange.baseMipLevel; + lastTileSamplingExtent = fullResImageExtents - lastTileSampligOffsetMip0; + const uint32_t2 lastTileTargetExtentMip1 = lastTileSamplingExtent >> (currentViewportTileRange.baseMipLevel + 1); + lastTileTargetExtent = lastTileTargetExtentMip1 << 1u; +} + +core::vector GeoreferencedImageStreamingState::tilesToLoad() const +{ + core::vector retVal; + for (uint32_t tileY = currentViewportTileRange.topLeftTile.y; tileY <= currentViewportTileRange.bottomRightTile.y; tileY++) + for (uint32_t tileX = currentViewportTileRange.topLeftTile.x; tileX <= currentViewportTileRange.bottomRightTile.x; tileX++) + { + uint32_t2 imageTileIndex = uint32_t2(tileX, tileY); + // Toroidal shift to find which gpu tile the image tile corresponds to + uint32_t2 gpuImageTileIndex = ((imageTileIndex - currentMappedRegionTileRange.topLeftTile) + gpuImageTopLeft) % gpuImageSideLengthTiles; + // Don't bother scheduling an upload if the tile is already resident + if (!currentMappedRegionOccupancy[gpuImageTileIndex.x][gpuImageTileIndex.y]) + retVal.push_back({ imageTileIndex , gpuImageTileIndex }); + } + return retVal; +} + +GeoreferencedImageInfo GeoreferencedImageStreamingState::computeGeoreferencedImageAddressingAndPositioningInfo() +{ + GeoreferencedImageInfo ret = {}; + + // Figure out an obb that covers only the currently loaded tiles + OrientedBoundingBox2D viewportEncompassingOBB = worldspaceOBB; + // The image's worldspace dirU corresponds to `fullResImageExtents.x` texels of the image, therefore one image texel in the U direction has a worldspace span of `dirU / fullResImageExtents.x`. + // One mip 0 tiles therefore spans `dirU * GeoreferencedImageTileSize/ fullResImageExtents.x`. A mip `n` tile spans `2^n` this amount, since each texel at that mip level spans + // `2^n` mip texels. Therefore the dirU offset from the image wordlspace's topLeft of the tile of index `currentViewportTileRange.topLeftTile.x` at mip level `currentMappedRegion.baseMipLevel` can be calculated as + const uint32_t oneTileTexelSpan = GeoreferencedImageTileSize << currentMappedRegionTileRange.baseMipLevel; + viewportEncompassingOBB.topLeft += worldspaceOBB.dirU * float32_t(currentViewportTileRange.topLeftTile.x * oneTileTexelSpan) / float32_t(fullResImageExtents.x); + // Same reasoning for offset in v direction + const float32_t2 dirV = float32_t2(worldspaceOBB.dirU.y, -worldspaceOBB.dirU.x) * worldspaceOBB.aspectRatio; + viewportEncompassingOBB.topLeft += dirV * float32_t(currentViewportTileRange.topLeftTile.y * oneTileTexelSpan) / float32_t(fullResImageExtents.y); + + const uint32_t2 viewportTileLength = currentViewportTileRange.bottomRightTile - currentViewportTileRange.topLeftTile + uint32_t2(1, 1); + // If the last tile is visible, we use the fractional span for the last tile. Otherwise it's just a normal tile + const bool2 lastTileVisible = isLastTileVisible(currentViewportTileRange.bottomRightTile); + const uint32_t2 lastGPUImageTileTexels = { lastTileVisible.x ? lastTileTargetExtent.x : GeoreferencedImageTileSize, lastTileVisible.y ? lastTileTargetExtent.y : GeoreferencedImageTileSize }; + + // Instead of grouping per tile like in the offset case, we group per texel: the same reasoning leads to a single texel at current mip level having a span of `dirU * 2^(currentMappedRegionTileRange.baseMipLevel)/ fullResImageExtents.x` + // in the U direction. Therefore the span in worldspace of the OBB we construct is just this number multiplied by the number of gpu texels spanned to draw. + // The number of texels is just `GeoreferencedImageTileSize` times the number of full tiles (all but the last) + the number of texels of the last tile, which might not be a full tile if near the right boundary + viewportEncompassingOBB.dirU = worldspaceOBB.dirU * float32_t((GeoreferencedImageTileSize * (viewportTileLength.x - 1) + lastGPUImageTileTexels.x) << currentMappedRegionTileRange.baseMipLevel) / float32_t(fullResImageExtents.x); + // Simply number of gpu texels in the y direction divided by number of texels in the x direction. + viewportEncompassingOBB.aspectRatio = float32_t(GeoreferencedImageTileSize * (viewportTileLength.y - 1) + lastGPUImageTileTexels.y) / float32_t(GeoreferencedImageTileSize * (viewportTileLength.x - 1) + lastGPUImageTileTexels.x); + + // GPU tile corresponding to the real image tile containing the viewport top left - we can let it be negative since wrapping mode is repeat, negative tiles are correct modulo `gpuImageSideLengthTiles` + const uint32_t2 viewportTopLeftGPUTile = currentViewportTileRange.topLeftTile - currentMappedRegionTileRange.topLeftTile + gpuImageTopLeft; + // To get the uv corresponding to the above, simply divide the tile index by the number of tiles in the GPU image. + // However to consider a one-texel shift inward (to prevent color bleeding at the edges) we map both numerator and denominator to texel units (by multiplying with `GeoreferencedImageTileSize`) and add + // a single texel to the numerator + const float32_t2 minUV = float32_t2(GeoreferencedImageTileSize * viewportTopLeftGPUTile + 1u) / float32_t(GeoreferencedImageTileSize * gpuImageSideLengthTiles); + // If the image was perfectly partitioned into tiles, we could get the maxUV in a similar fashion to minUV: Just compute `bottomRightTile - currentMappedRegionTileRange.topLeftTile` to get a tile + // then divide by `gpuImageSideLengthTiles` to get a coord in `(0,1)` (correct modulo `gpuImageSideLengthTiles`) + // However the last tile might not have all `GeoreferencedImageTileSize` texels in it. Therefore maxUV computation can be separated into a UV contribution by all full tiles (all but the last) + a contribution from the last tile + // UV contribution from full tiles will therefore be `(bottomRightTile - currentMappedRegionTileRange.topLeftTile) / gpuImageSideLengthTiles` while last tile contribution will be + // `lastGPUImageTileTexels / (gpuImageSideLengthTiles * GeoreferencedImageTileSize)`. We group terms below to reduce number of float ops. + // Again we first map to texel units then subtract one to add a single texel uv shift. + const uint32_t2 viewportBottomRightGPUTile = currentViewportTileRange.bottomRightTile - currentMappedRegionTileRange.topLeftTile + gpuImageTopLeft; + const float32_t2 maxUV = float32_t2(GeoreferencedImageTileSize * viewportBottomRightGPUTile + lastGPUImageTileTexels - 1u) / float32_t(GeoreferencedImageTileSize * gpuImageSideLengthTiles); + + ret.minUV = minUV; + ret.maxUV = maxUV; + ret.topLeft = viewportEncompassingOBB.topLeft; + ret.dirU = viewportEncompassingOBB.dirU; + ret.aspectRatio = viewportEncompassingOBB.aspectRatio; + + return ret; +} + +GeoreferencedImageTileRange GeoreferencedImageStreamingState::computeViewportTileRange(const uint32_t2 viewportExtent, const float64_t3x3& ndcToWorldMat) +{ + // These are vulkan standard, might be different in n4ce! + constexpr static float64_t3 topLeftViewportNDC = float64_t3(-1.0, -1.0, 1.0); + constexpr static float64_t3 topRightViewportNDC = float64_t3(1.0, -1.0, 1.0); + constexpr static float64_t3 bottomLeftViewportNDC = float64_t3(-1.0, 1.0, 1.0); + constexpr static float64_t3 bottomRightViewportNDC = float64_t3(1.0, 1.0, 1.0); + + // First get world coordinates for each of the viewport's corners + const float64_t3 topLeftViewportWorld = nbl::hlsl::mul(ndcToWorldMat, topLeftViewportNDC); + const float64_t3 topRightViewportWorld = nbl::hlsl::mul(ndcToWorldMat, topRightViewportNDC); + const float64_t3 bottomLeftViewportWorld = nbl::hlsl::mul(ndcToWorldMat, bottomLeftViewportNDC); + const float64_t3 bottomRightViewportWorld = nbl::hlsl::mul(ndcToWorldMat, bottomRightViewportNDC); + + // Then we get mip 0 tiles coordinates for each of them, into the image + const float64_t2 topLeftTileLattice = transformWorldCoordsToTileCoords(topLeftViewportWorld); + const float64_t2 topRightTileLattice = transformWorldCoordsToTileCoords(topRightViewportWorld); + const float64_t2 bottomLeftTileLattice = transformWorldCoordsToTileCoords(bottomLeftViewportWorld); + const float64_t2 bottomRightTileLattice = transformWorldCoordsToTileCoords(bottomRightViewportWorld); + + // Get the min and max of each lattice coordinate to get a bounding rectangle + const float64_t2 minTop = nbl::hlsl::min(topLeftTileLattice, topRightTileLattice); + const float64_t2 minBottom = nbl::hlsl::min(bottomLeftTileLattice, bottomRightTileLattice); + const float64_t2 minAll = nbl::hlsl::min(minTop, minBottom); + + const float64_t2 maxTop = nbl::hlsl::max(topLeftTileLattice, topRightTileLattice); + const float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice); + const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom); + + // Floor them to get an integer coordinate (index) for the tiles they fall in + int32_t2 minAllFloored = nbl::hlsl::floor(minAll); + int32_t2 maxAllFloored = nbl::hlsl::floor(maxAll); + + // We're undoing a previous division. Could be avoided but won't restructure the code atp. + // Here we compute how many image pixels each side of the viewport spans + const float64_t2 viewportSideUImageTexelsVector = float64_t(GeoreferencedImageTileSize) * (topRightTileLattice - topLeftTileLattice); + const float64_t2 viewportSideVImageTexelsVector = float64_t(GeoreferencedImageTileSize) * (bottomLeftTileLattice - topLeftTileLattice); + + // WARNING: This assumes pixels in the image are the same size along each axis. If the image is nonuniformly scaled or sheared, I *think* it should not matter + // (since the pixel span takes that transformation into account), BUT we have to check if we plan on allowing those + // Compute the side vectors of the viewport in image pixel(texel) space. + // These vectors represent how many image pixels each side of the viewport spans. + // They correspond to the local axes of the mapped OBB (not the mapped region one, the viewport one) in texel coordinates. + const float64_t viewportSideUImageTexels = nbl::hlsl::length(viewportSideUImageTexelsVector); + const float64_t viewportSideVImageTexels = nbl::hlsl::length(viewportSideVImageTexelsVector); + + // Mip is decided based on max of these + float64_t pixelRatio = nbl::hlsl::max(viewportSideUImageTexels / viewportExtent.x, viewportSideVImageTexels / viewportExtent.y); + pixelRatio = pixelRatio < 1.0 ? 1.0 : pixelRatio; + + GeoreferencedImageTileRange retVal = {}; + // Clamp mip level so we don't consider tiles that are too small along one dimension + // If on a pathological case this gets too expensive because the GPU starts sampling a lot, we can consider changing this, but I doubt that will happen + retVal.baseMipLevel = nbl::hlsl::min(nbl::hlsl::findMSB(uint32_t(nbl::hlsl::floor(pixelRatio))), int32_t(maxMipLevel)); + + // Current tiles are measured in mip 0. We want the result to measure mip `retVal.baseMipLevel` tiles. Each next mip level divides by 2. + minAllFloored >>= retVal.baseMipLevel; + maxAllFloored >>= retVal.baseMipLevel; + + + // Clamp them to reasonable tile indices + int32_t2 lastTileIndex = getLastTileIndex(retVal.baseMipLevel); + retVal.topLeftTile = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), lastTileIndex); + retVal.bottomRightTile = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), lastTileIndex); + + return retVal; +} + +void GeoreferencedImageStreamingState::ensureMappedRegionCoversViewport(const GeoreferencedImageTileRange& viewportTileRange) +{ + // A base mip level of x in the current mapped region means we can handle the viewport having mip level y, with x <= y < x + 1.0 + // without needing to remap the region. When the user starts zooming in or out and the mip level of the viewport falls outside this range, we have to remap + // the mapped region. + const bool mipBoundaryCrossed = viewportTileRange.baseMipLevel != currentMappedRegionTileRange.baseMipLevel; + + // If we moved a huge amount in any direction, no tiles will remain resident, so we simply reset state + // This only need be evaluated if the mip boundary was not already crossed + const bool relativeShiftTooBig = !mipBoundaryCrossed && + nbl::hlsl::any + ( + nbl::hlsl::abs(int32_t2(viewportTileRange.topLeftTile) - int32_t2(currentMappedRegionTileRange.topLeftTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) + ) + || nbl::hlsl::any + ( + nbl::hlsl::abs(int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegionTileRange.bottomRightTile)) >= int32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) + ); + + // If there is no overlap between previous mapped region and the next, just reset everything + if (mipBoundaryCrossed || relativeShiftTooBig) + remapCurrentRegion(viewportTileRange); + // Otherwise we can get away with (at worst) sliding the mapped region along the real image, preserving the residency of the tiles that overlap between previous mapped region and the next + else + slideCurrentRegion(viewportTileRange); +} + +void GeoreferencedImageStreamingState::remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) +{ + // Zoomed out + if (viewportTileRange.baseMipLevel > currentMappedRegionTileRange.baseMipLevel) + { + // TODO: Here we would move some mip 1 tiles to mip 0 image to save the work of reuploading them, reflect that in the tracked tiles + } + // Zoomed in + else if (viewportTileRange.baseMipLevel < currentMappedRegionTileRange.baseMipLevel) + { + // TODO: Here we would move some mip 0 tiles to mip 1 image to save the work of reuploading them, reflect that in the tracked tiles + } + currentMappedRegionTileRange = viewportTileRange; + // We can expand the currentMappedRegionTileRange to make it as big as possible, at no extra cost since we only upload tiles on demand + // Since we use toroidal updating it's kinda the same which way we expand the region. We first try to make the extent be `gpuImageSideLengthTiles` + currentMappedRegionTileRange.bottomRightTile = currentMappedRegionTileRange.topLeftTile + uint32_t2(gpuImageSideLengthTiles, gpuImageSideLengthTiles) - uint32_t2(1, 1); + // This extension can cause the mapped region to fall out of bounds on border cases, therefore we clamp it and extend it in the other direction + // by the amount of tiles we removed during clamping + const uint32_t2 excessTiles = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegionTileRange.bottomRightTile) - int32_t2(getLastTileIndex(currentMappedRegionTileRange.baseMipLevel)))); + currentMappedRegionTileRange.bottomRightTile -= excessTiles; + // Shifting of the topLeftTile could fall out of bounds in pathological cases or at very high mip levels (zooming out too much), so we shift if possible, otherwise set it to 0 + currentMappedRegionTileRange.topLeftTile = uint32_t2(nbl::hlsl::max(int32_t2(0, 0), int32_t2(currentMappedRegionTileRange.topLeftTile) - int32_t2(excessTiles))); + + ResetTileOccupancyState(); + // Reset state for gpu image so that it starts loading tiles at top left. Not really necessary. + gpuImageTopLeft = uint32_t2(0, 0); +} + +void GeoreferencedImageStreamingState::ResetTileOccupancyState() +{ + // Mark all gpu tiles as dirty + currentMappedRegionOccupancy.assign(gpuImageSideLengthTiles, std::vector(gpuImageSideLengthTiles, false)); +} + +void GeoreferencedImageStreamingState::slideCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange) +{ + // `topLeftShift` represents how many tiles up and to the left we have to move the mapped region to fit the viewport. + // First we compute a vector from the current mapped region's topleft to the viewport's topleft. If this vector is positive along a dimension it means + // the viewport's topleft is to the right or below the current mapped region's topleft, so we don't have to shift the mapped region to the left/up in that case + const int32_t2 topLeftShift = nbl::hlsl::min(int32_t2(0, 0), int32_t2(viewportTileRange.topLeftTile) - int32_t2(currentMappedRegionTileRange.topLeftTile)); + // `bottomRightShift` represents the same as above but in the other direction. + const int32_t2 bottomRightShift = nbl::hlsl::max(int32_t2(0, 0), int32_t2(viewportTileRange.bottomRightTile) - int32_t2(currentMappedRegionTileRange.bottomRightTile)); + + // The following is not necessarily equal to `gpuImageSideLengthTiles` since there can be pathological cases, as explained in the remapping method + const uint32_t2 mappedRegionDimensions = currentMappedRegionTileRange.bottomRightTile - currentMappedRegionTileRange.topLeftTile + 1u; + const uint32_t2 gpuImageBottomRight = (gpuImageTopLeft + mappedRegionDimensions - 1u) % gpuImageSideLengthTiles; + + // Mark dropped tiles as dirty/non-resident + if (topLeftShift.x < 0) + { + // Shift left + const uint32_t tilesToFit = -topLeftShift.x; + for (uint32_t tile = 0; tile < tilesToFit; tile++) + { + // Get actual tile index with wraparound + uint32_t tileIdx = (gpuImageBottomRight.x + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; + currentMappedRegionOccupancy[tileIdx].clear(); + currentMappedRegionOccupancy[tileIdx].resize(gpuImageSideLengthTiles, false); + } + } + else if (bottomRightShift.x > 0) + { + //Shift right + const uint32_t tilesToFit = bottomRightShift.x; + for (uint32_t tile = 0; tile < tilesToFit; tile++) + { + // Get actual tile index with wraparound + uint32_t tileIdx = (tile + gpuImageTopLeft.x) % gpuImageSideLengthTiles; + currentMappedRegionOccupancy[tileIdx].clear(); + currentMappedRegionOccupancy[tileIdx].resize(gpuImageSideLengthTiles, false); + } + } + + if (topLeftShift.y < 0) + { + // Shift up + const uint32_t tilesToFit = -topLeftShift.y; + for (uint32_t tile = 0; tile < tilesToFit; tile++) + { + // Get actual tile index with wraparound + uint32_t tileIdx = (gpuImageBottomRight.y + (gpuImageSideLengthTiles - tile)) % gpuImageSideLengthTiles; + for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) + currentMappedRegionOccupancy[i][tileIdx] = false; + } + } + else if (bottomRightShift.y > 0) + { + //Shift down + const uint32_t tilesToFit = bottomRightShift.y; + for (uint32_t tile = 0; tile < tilesToFit; tile++) + { + // Get actual tile index with wraparound + uint32_t tileIdx = (tile + gpuImageTopLeft.y) % gpuImageSideLengthTiles; + for (uint32_t i = 0u; i < gpuImageSideLengthTiles; i++) + currentMappedRegionOccupancy[i][tileIdx] = false; + } + } + + // Shift the mapped region accordingly + // A nice consequence of the mapped region being always maximally - sized is that + // along any dimension, only a shift in one direction is necessary, so we can simply add up the shifts + currentMappedRegionTileRange.topLeftTile = uint32_t2(int32_t2(currentMappedRegionTileRange.topLeftTile) + topLeftShift + bottomRightShift); + currentMappedRegionTileRange.bottomRightTile = uint32_t2(int32_t2(currentMappedRegionTileRange.bottomRightTile) + topLeftShift + bottomRightShift); + + // Toroidal shift for the gpu image top left + gpuImageTopLeft = (gpuImageTopLeft + uint32_t2(topLeftShift + bottomRightShift + int32_t(gpuImageSideLengthTiles))) % gpuImageSideLengthTiles; +} + +std::string CachedImageRecord::toString(uint64_t imageID) const +{ + auto stringifyImageState = [](ImageState state) -> std::string { + switch (state) + { + case ImageState::INVALID: return "INVALID"; + case ImageState::CREATED_AND_MEMORY_BOUND: return "CREATED_AND_MEMORY_BOUND"; + case ImageState::BOUND_TO_DESCRIPTOR_SET: return "BOUND_TO_DESCRIPTOR_SET"; + case ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA: return "GPU_RESIDENT_WITH_VALID_STATIC_DATA"; + default: return "UNKNOWN_STATE"; + } + }; + + auto stringifyImageType = [](ImageType type) -> std::string { + switch (type) + { + case ImageType::INVALID: return "INVALID"; + case ImageType::STATIC: return "STATIC"; + case ImageType::GEOREFERENCED_STREAMED: return "GEOREFERENCED_STREAMED"; + default: return "UNKNOWN_TYPE"; + } + }; + + std::string result; + if (imageID != std::numeric_limits::max()) + result += std::format(" ImageID: {}\n", imageID); + + result += std::format( + " Type: {}\n" + " State: {}\n" + " Array Index: {}\n" + " Allocation Offset: {}\n" + " Allocation Size: {}\n" + " Current Layout: {}\n" + " Last Used Frame Index: {}\n" + " GPU ImageView: {}\n" + " CPU Image: {}\n" + " Georeferenced Image State: {}\n", + stringifyImageType(type), + stringifyImageState(state), + arrayIndex, + allocationOffset, + allocationSize, + static_cast(currentLayout), + lastUsedFrameIndex, + gpuImageView ? "VALID" : "NULL", + staticCPUImage ? "VALID" : "NULL", + georeferencedImageState ? "VALID" : "NULL" + ); + return result; +} \ No newline at end of file diff --git a/62_CAD/Images.h b/62_CAD/Images.h index a341eadd6..d397141d1 100644 --- a/62_CAD/Images.h +++ b/62_CAD/Images.h @@ -1,4 +1,10 @@ +/* DrawResourcesFiller: This class provides important functionality to manage resources needed for a draw. +/******************************************************************************/ #pragma once + +#include "shaders/globals.hlsl" +#include + using namespace nbl; using namespace nbl::video; using namespace nbl::core; @@ -6,29 +12,26 @@ using namespace nbl::asset; using image_id = uint64_t; // Could later be templated or replaced with a stronger type or hash key. +// These are mip 0 pixels per tile, also size of each physical tile into the gpu resident image +constexpr static uint32_t GeoreferencedImageTileSize = 128u; +// Mip 1 tiles are naturally half the size +constexpr static uint32_t GeoreferencedImageTileSizeMip1 = GeoreferencedImageTileSize / 2; +// How many tiles of extra padding we give to the gpu image holding the tiles for a georeferenced image +constexpr static uint32_t GeoreferencedImagePaddingTiles = 2; + enum class ImageState : uint8_t { INVALID = 0, CREATED_AND_MEMORY_BOUND, // GPU image created, not bound to descriptor set yet - BOUND_TO_DESCRIPTOR_SET, // Bound to descriptor set, GPU resident, but may contain uninitialized or partial data - GPU_RESIDENT_WITH_VALID_STATIC_DATA, // When data for static images gets issued for upload successfully + GPU_RESIDENT_WITH_VALID_STATIC_DATA, // When data for static images gets issued for upload successfully, may not be bound to it's descriptor binding array index yet + BOUND_TO_DESCRIPTOR_SET, // Bound to descriptor set, GPU resident }; enum class ImageType : uint8_t { INVALID = 0, STATIC, // Regular non-georeferenced image, fully loaded once - GEOREFERENCED_STREAMED, // Streamed image, resolution depends on camera/view - GEOREFERENCED_FULL_RESOLUTION // For smaller georeferenced images, entire image is eventually loaded and not streamed or view-dependant -}; - -struct GeoreferencedImageParams -{ - OrientedBoundingBox2D worldspaceOBB = {}; - uint32_t2 imageExtents = {}; - uint32_t2 viewportExtents = {}; - asset::E_FORMAT format = {}; - // TODO: Need to add other stuff later. + GEOREFERENCED_STREAMED, // Streamed image, resolution depends on camera/view // TODO[DEVSH]: Probably best to rename this to STREAMED image }; /** @@ -69,6 +72,11 @@ class ImagesMemorySubAllocator : public core::IReferenceCounted { m_addressAllocator->free_addr(addr, size); } + + uint64_t getFreeSize() const + { + return m_addressAllocator->get_free_size(); + } ~ImagesMemorySubAllocator() { @@ -90,18 +98,9 @@ class ImagesMemorySubAllocator : public core::IReferenceCounted // Destructor will then deallocate from GeneralPurposeAllocator, making the previously allocated range of the image available/free again. struct ImageCleanup : public core::IReferenceCounted { - ImageCleanup() - : imagesMemorySuballocator(nullptr) - , addr(ImagesMemorySubAllocator::InvalidAddress) - , size(0ull) - {} + ImageCleanup(); - ~ImageCleanup() override - { - // printf(std::format("Actual Eviction size={}, offset={} \n", size, addr).c_str()); - if (imagesMemorySuballocator && addr != ImagesMemorySubAllocator::InvalidAddress) - imagesMemorySuballocator->deallocate(addr, size); - } + ~ImageCleanup() override; smart_refctd_ptr imagesMemorySuballocator; uint64_t addr; @@ -109,22 +108,211 @@ struct ImageCleanup : public core::IReferenceCounted }; +// Measures a range of mip `baseMipLevel` tiles in the georeferenced image, starting at `topLeftTile` that is `nTiles` long +struct GeoreferencedImageTileRange +{ + uint32_t2 topLeftTile; + uint32_t2 bottomRightTile; + uint32_t baseMipLevel; +}; + +// @brief Used to load tiles into VRAM, keep track of loaded tiles, determine how they get sampled etc. +struct GeoreferencedImageStreamingState : public IReferenceCounted +{ +public: + + GeoreferencedImageStreamingState() + { } + + //! Creates a new streaming state for a georeferenced image + /* + Initializes CPU-side state for image streaming. + Sets up world-to-UV transform, computes mip hierarchy parameters, + and stores metadata about the image. + + @param worldspaceOBB Oriented bounding box of the image in world space + @param fullResImageExtents Full resolution image size in pixels (width, height) + @param format Pixel format of the image + @param storagePath Filesystem path for image tiles + */ + bool init(const OrientedBoundingBox2D& worldSpaceOBB, const uint32_t2 fullResImageExtents, const asset::E_FORMAT format, const std::filesystem::path& storagePath); + + /** + * @brief Update the mapped region to cover the current viewport. + * + * Computes the required tile range from the viewport and updates + * `currentMappedRegion` by remapping or sliding as needed. + * + * @param currentViewportExtents Viewport size in pixels. + * @param ndcToWorldMat NDC to world space mattix. + * + * @see tilesToLoad + */ + void updateStreamingStateForViewport(const uint32_t2 viewportExtent, const float64_t3x3& ndcToWorldMat); + + // @brief Info to match a gpu tile to the tile in the real image it should hold image data for + struct ImageTileToGPUTileCorrespondence + { + uint32_t2 imageTileIndex; + uint32_t2 gpuImageTileIndex; + }; + + /* + * @brief Get the tiles required for rendering the current viewport. + * Uses the region set by `updateStreamingStateForViewport()` to return + * which image tiles need loading and their target GPU tile indices. + */ + core::vector tilesToLoad() const; + + // @brief Returns the index of the last tile when covering the image with `mipLevel` tiles + inline uint32_t2 getLastTileIndex(uint32_t mipLevel) const + { + return (fullImageTileLength - 1u) >> mipLevel; + } + + // @brief Returns whether the last tile in the image (along each dimension) is visible from the current viewport + inline bool2 isLastTileVisible(const uint32_t2 viewportBottomRightTile) const + { + const uint32_t2 lastTileIndex = getLastTileIndex(currentMappedRegionTileRange.baseMipLevel); + return bool2(lastTileIndex.x == viewportBottomRightTile.x, lastTileIndex.y == viewportBottomRightTile.y); + } + + /** + * @brief Compute viewport positioning and UV addressing for a georeferenced image. + * + * Returns a `GeoreferencedImageInfo` filled with: + * - `topLeft`, `dirU`, `aspectRatio` (world-space OBB) + * - `minUV`, `maxUV` (UV addressing for the viewport) + * + * Leaves `textureID` unmodified. + * + * @note Make sure to call `updateStreamingStateForViewport()` first so that + * the OBB and UVs reflect the latest viewport. + * + * @param imageStreamingState The streaming state of the georeferenced image. + * @return GeoreferencedImageInfo containing viewport positioning and UV info. + */ + GeoreferencedImageInfo computeGeoreferencedImageAddressingAndPositioningInfo(); + + bool isOutOfDate() const { return outOfDate; } + +private: + // These are NOT UV, pixel or tile coords into the mapped image region, rather into the real, huge image + // Tile coords are always in mip 0 tile size. Translating to other mips levels is trivial + + // @brief Transform worldspace coordinates into UV coordinates into the image + float64_t2 transformWorldCoordsToUV(const float64_t3 worldCoords) const { return nbl::hlsl::mul(worldToUV, worldCoords); } + // @brief Transform worldspace coordinates into texel coordinates into the image + float64_t2 transformWorldCoordsToTexelCoords(const float64_t3 worldCoords) const { return float64_t2(fullResImageExtents) * transformWorldCoordsToUV(worldCoords); } + // @brief Transform worldspace coordinates into tile coordinates into the image, where the image is broken up into tiles of size `GeoreferencedImageTileSize` + float64_t2 transformWorldCoordsToTileCoords(const float64_t3 worldCoords) const { return (1.0 / GeoreferencedImageTileSize) * transformWorldCoordsToTexelCoords(worldCoords); } + + /** + * @brief Compute the tile range and mip level needed to cover the viewport. + * + * Calculates which portion of the source image is visible through the given + * viewport and chooses the optimal mip level based on zoom (viewport size + * relative to the image). The returned range is always a subset of + * `currentMappedRegion` and covers only the visible tiles. + * + * @param currentViewportExtents Size of the viewport in pixels. + * @param ndcToWorldMat Transform from NDC to world space, used to project + * the viewport onto the image. + * + * @return A tile range (`GeoreferencedImageTileRange`) representing the + * visible region at the chosen mip level. + */ + GeoreferencedImageTileRange computeViewportTileRange(const uint32_t2 viewportExtent, const float64_t3x3& ndcToWorldMat); + + /* + * @brief The GPU image backs a mapped region which is a rectangular sub-region of the original image. Note that a region being mapped does NOT imply it's currently resident in GPU memory. + * To display the iomage on the screen, before even checking that the tiles needed to render the portion of the image currently visible are resident in GPU memory, we first must ensure that + * said region is included (as a sub-rectangle) in the mapped region. + * + * @param viewportTileRange Range of tiles + mip level indicating what sub-rectangle (and at which mip level) of the image is going to be visible from the viewport + */ + void ensureMappedRegionCoversViewport(const GeoreferencedImageTileRange& viewportTileRange); + + /* + * @brief Sets the mapped region into the image so it at least covers the sub-rectangle currently visible from the viewport. Also marks all gpu tiles dirty since none can be recycled + * + * @param viewportTileRange Range of tiles + mip level indicating a sub-rectangle of the image (visible from viewport) that the mapped region needs to cover + */ + void remapCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange); + + /** + * @brief Resets the streaming state's GPU tile occupancy map. + * - Clears all previously marked resident tiles. + * - After this call, every entry in `currentMappedRegionOccupancy` is `false`, + * meaning the GPU image is considered completely dirty (no tiles mapped). + */ + void ResetTileOccupancyState(); + + /* + * @brief Slides the mapped region along the image, marking the tiles dropped as dirty but preserving the residency for tiles that are inside both the previous and new mapped regions. + * Note that the checks for whether this is valid to do happen outside of this function. + * + * @param viewportTileRange Range of tiles + mip level indicating a sub-rectangle of the image (visible from viewport) that the mapped region needs to cover + */ + void slideCurrentRegion(const GeoreferencedImageTileRange& viewportTileRange); + +protected: + friend class DrawResourcesFiller; + + // Oriented bounding box of the original image in world space (position + orientation) + OrientedBoundingBox2D worldspaceOBB = {}; + // Full resolution original image size in pixels (width, height) + uint32_t2 fullResImageExtents = {}; + // Pixel format of the image as provided by storage/loader (may differ from GPU format) + asset::E_FORMAT sourceImageFormat = {}; + // Filesystem path where image tiles are stored + std::filesystem::path storagePath = {}; + // GPU Image Params for the image to be created with + IGPUImage::SCreationParams gpuImageCreationParams = {}; + // 2D bool set for tile validity of the currentMappedRegionTileRange + std::vector> currentMappedRegionOccupancy = {}; + // Sidelength of the gpu image, in mip 0 tiles that are `TileSize` (creation parameter) texels wide + uint32_t gpuImageSideLengthTiles = {}; + // We establish a max mipLevel for the image, which is the mip level at which any of width, height fit in a single tile + uint32_t maxMipLevel = {}; + // Number of mip 0 tiles needed to cover the whole image, counting the last tile that might be fractional if the image size is not perfectly divisible by TileSize + uint32_t2 fullImageTileLength = {}; + // Indicates on which tile of the gpu image the current mapped region's `topLeft` resides + uint32_t2 gpuImageTopLeft = {}; + // Converts a point (z = 1) in worldspace to UV coordinates in image space (origin shifted to topleft of the image) + float64_t2x3 worldToUV = {}; + // The GPU-mapped region covering a subrectangle of the source image + GeoreferencedImageTileRange currentMappedRegionTileRange = { .baseMipLevel = std::numeric_limits::max() }; + // Tile range covering only the tiles currently visible in the viewport + GeoreferencedImageTileRange currentViewportTileRange = { .baseMipLevel = std::numeric_limits::max() }; + // Extents used for sampling the last tile (handles partial tiles / NPOT images); gets updated with `updateStreamingStateForViewport` + uint32_t2 lastTileSamplingExtent; + // Extents used when writing/updating the last tile in GPU memory (handles partial tiles / NPOT images); gets updated with `updateStreamingStateForViewport` + uint32_t2 lastTileTargetExtent; + // We set this to true when image is evicted from cache, hinting at other places holding a smart_refctd_ptr to this objet that the GeoreferencedImageStreamingState isn't valid anymore and needs recreation/update + bool outOfDate = false; +}; + struct CachedImageRecord { static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; uint32_t arrayIndex = InvalidTextureIndex; // index in our array of textures binding + bool arrayIndexAllocatedUsingImageDescriptorIndexAllocator; // whether the index of this cache entry was allocated using suballocated descriptor set which ensures correct synchronized access to a set index. (if not extra synchro is needed) ImageType type = ImageType::INVALID; ImageState state = ImageState::INVALID; + nbl::asset::IImage::LAYOUT currentLayout = nbl::asset::IImage::LAYOUT::UNDEFINED; uint64_t lastUsedFrameIndex = 0ull; // last used semaphore value on this image uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; uint64_t allocationSize = 0ull; core::smart_refctd_ptr gpuImageView = nullptr; core::smart_refctd_ptr staticCPUImage = nullptr; // cached cpu image for uploading to gpuImageView when needed. + core::smart_refctd_ptr georeferencedImageState = nullptr; // Used to track tile residency for georeferenced images // In LRU Cache `insert` function, in case of cache miss, we need to construct the refereence with semaphore value CachedImageRecord(uint64_t currentFrameIndex) : arrayIndex(InvalidTextureIndex) + , arrayIndexAllocatedUsingImageDescriptorIndexAllocator(false) , type(ImageType::INVALID) , state(ImageState::INVALID) , lastUsedFrameIndex(currentFrameIndex) @@ -138,6 +326,8 @@ struct CachedImageRecord : CachedImageRecord(0ull) {} + std::string toString(uint64_t imageID = std::numeric_limits::max()) const; + // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value without changing `index` inline CachedImageRecord& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } }; @@ -160,7 +350,7 @@ class ImagesCache : public core::ResizableLRUCache // Attempts to insert a new image into the cache. // If the cache is full, invokes the provided `evictCallback` to evict an image. // Returns a pointer to the inserted or existing ImageReference. - template EvictionCallback> + template EvictionCallback> inline CachedImageRecord* insert(image_id imageID, uint64_t lastUsedSema, EvictionCallback&& evictCallback) { return base_t::insert(imageID, lastUsedSema, evictCallback); @@ -195,6 +385,16 @@ class ImagesCache : public core::ResizableLRUCache } } + inline void logState(nbl::system::logger_opt_smart_ptr logger) + { + logger.log("=== Image Cache Status ===", nbl::system::ILogger::ELL_INFO); + for (const auto& [imageID, record] : *this) + { + logger.log(("\n" + record.toString(imageID)).c_str(), nbl::system::ILogger::ELL_INFO); + } + logger.log("=== End of Image Cache ===", nbl::system::ILogger::ELL_INFO); + } + // Removes a specific image from the cache (manual eviction). inline void erase(image_id imageID) { @@ -205,7 +405,7 @@ class ImagesCache : public core::ResizableLRUCache struct StreamedImageCopy { asset::E_FORMAT srcFormat; - core::smart_refctd_ptr srcBuffer; // Make it 'std::future' later? + std::future> srcBufferFuture; asset::IImage::SBufferCopy region; }; @@ -217,3 +417,60 @@ struct StaticImageInfo bool forceUpdate = false; // If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. asset::E_FORMAT imageViewFormatOverride = asset::E_FORMAT::EF_COUNT; // if asset::E_FORMAT::EF_COUNT then image view will have the same format as `cpuImage` }; + +/// @brief Abstract class with two overridable methods to load a region of an image, either by requesting a region at a target extent (like the loaders in n4ce do) or to request a specific region from a mip level +// (like precomputed mips solution would use). +struct IImageRegionLoader : IReferenceCounted +{ + /** + * @brief Load a region from an image - used to load from images with precomputed mips + * + * @param imagePath Path to file holding the image data + * @param offset Offset into the image (at requested mipLevel!) at which the region begins + * @param extent Extent of the region to load (at requested mipLevel!) + * @param mipLevel From which mip level image to retrieve the data from + * @param downsample True if this request is supposed to go into GPU mip level 1, false otherwise + * + * @return ICPUBuffer with the requested image data + */ + core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) + { + assert(hasPrecomputedMips(imagePath)); + return load_impl(imagePath, offset, extent, mipLevel, downsample); + } + + /** + * @brief Load a region from an image - used to load from images using the n4ce loaders. Loads a region given by `offset, extent` as an image of size `targetExtent` + * where `targetExtent <= extent` so the loader is in charge of downsampling. + * + * @param imagePath Path to file holding the image data + * @param offset Offset into the image at which the region begins + * @param extent Extent of the region to load + * @param targetExtent Extent of the resulting image. Should NEVER be bigger than `extent` + * + * @return ICPUBuffer with the requested image data + */ + core::smart_refctd_ptr load(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t2 targetExtent) + { + assert(!hasPrecomputedMips(imagePath)); + return load_impl(imagePath, offset, extent, targetExtent); + } + + // @brief Get the extents (in texels) of an image. + virtual uint32_t2 getExtents(std::filesystem::path imagePath) = 0; + + /** + * @brief Get the texel format for an image. + */ + virtual asset::E_FORMAT getFormat(std::filesystem::path imagePath) = 0; + + // @brief Returns whether the image should be loaded with the precomputed mip method or the n4ce loader method. + virtual bool hasPrecomputedMips(std::filesystem::path imagePath) const = 0; +private: + + // @brief Override to support loading with precomputed mips + virtual core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) { return nullptr; } + + // @brief Override to support loading with n4ce-style loaders + virtual core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t2 targetExtent) { return nullptr; } +}; \ No newline at end of file diff --git a/62_CAD/Polyline.cpp b/62_CAD/Polyline.cpp index b00a64a88..4149942c7 100644 --- a/62_CAD/Polyline.cpp +++ b/62_CAD/Polyline.cpp @@ -387,7 +387,7 @@ CPolyline CPolyline::generateParallelPolyline(float64_t offset, const float64_t { // TODO: try merging lines if they have same tangent (resultin in less points) std::vector newLinePoints; - newLinePoints.reserve(m_linePoints.size()); + newLinePoints.reserve(section.count + 1); for (uint32_t j = 0; j < section.count + 1; ++j) { const uint32_t linePointIdx = section.index + j; diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index f4a886791..fd3d99aa2 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -32,7 +32,6 @@ using namespace video; #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "HatchGlyphBuilder.h" -#include "GeoTexture.h" #include @@ -49,6 +48,7 @@ static constexpr bool DebugRotatingViewProj = false; static constexpr bool FragmentShaderPixelInterlock = true; static constexpr bool LargeGeoTextureStreaming = true; static constexpr bool CacheAndReplay = false; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic +static constexpr bool testCameraRotation = false; enum class ExampleMode { @@ -64,6 +64,7 @@ enum class ExampleMode CASE_9, // DTM CASE_10, // testing fixed geometry and emulated fp64 corner cases CASE_11, // grid DTM + CASE_12, // Georeferenced streamed images CASE_COUNT }; @@ -80,10 +81,11 @@ constexpr std::array cameraExtents = 600.0, // CASE_8 600.0, // CASE_9 10.0, // CASE_10 - 1000.0 // CASE_11 + 1000.0, // CASE_11 + 10.0 // CASE_12 }; -constexpr ExampleMode mode = ExampleMode::CASE_8; +constexpr ExampleMode mode = ExampleMode::CASE_5; class Camera2D { @@ -133,7 +135,7 @@ class Camera2D if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) { - m_bounds = m_bounds + float64_t2{ (double)ev.scrollEvent.verticalScroll * -0.1 * m_aspectRatio, (double)ev.scrollEvent.verticalScroll * -0.1}; + m_bounds = m_bounds + float64_t2{ (double)ev.scrollEvent.verticalScroll * -0.025 * m_aspectRatio, (double)ev.scrollEvent.verticalScroll * -0.025}; m_bounds = float64_t2{ core::max(m_aspectRatio, m_bounds.x), core::max(1.0, m_bounds.y) }; } } @@ -361,6 +363,195 @@ bool performImageFormatPromotionCopy(const core::smart_refctd_ptr>>(inCPUImage, outCPUImage); } +// Used by case 12 +struct ImageLoader : public IImageRegionLoader +{ + ImageLoader(asset::IAssetManager* assetMgr, system::ILogger* logger, video::IPhysicalDevice* physicalDevice) + : m_assetMgr(assetMgr), m_logger(logger), m_physicalDevice(physicalDevice) + { + auto loadImage = [&](const std::string& imagePath) -> smart_refctd_ptr + { + system::path m_loadCWD = ".."; + constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); + const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger, m_loadCWD); + auto bundle = m_assetMgr->getAsset(imagePath, loadParams); + auto contents = bundle.getContents(); + if (contents.empty()) + { + m_logger->log("Failed to load image with path %s, skipping!", ILogger::ELL_ERROR, (m_loadCWD / imagePath).c_str()); + return nullptr; + } + + smart_refctd_ptr cpuImgView; + const auto& asset = contents[0]; + switch (asset->getAssetType()) + { + case IAsset::ET_IMAGE: + { + auto image = smart_refctd_ptr_static_cast(asset); + auto& flags = image->getCreationParameters().flags; + // assert if asset is mutable + const_cast&>(flags) |= asset::IImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; + const auto format = image->getCreationParameters().format; + + ICPUImageView::SCreationParams viewParams = { + .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, + .image = std::move(image), + .viewType = IImageView::E_TYPE::ET_2D, + .format = format, + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + } + }; + + cpuImgView = ICPUImageView::create(std::move(viewParams)); + } break; + + case IAsset::ET_IMAGE_VIEW: + cpuImgView = smart_refctd_ptr_static_cast(asset); + break; + default: + m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!", ILogger::ELL_ERROR); + return nullptr; + } + + const auto loadedCPUImage = cpuImgView->getCreationParameters().image; + const auto loadedCPUImageCreationParams = loadedCPUImage->getCreationParameters(); + + // Promoting the image to a format GPU supports. (so that updateImageViaStagingBuffer doesn't have to handle that each frame if overflow-submit needs to happen) + auto promotedCPUImageCreationParams = loadedCPUImage->getCreationParameters(); + + promotedCPUImageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT | IGPUImage::EUF_SAMPLED_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = promotedCPUImageCreationParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(promotedCPUImageCreationParams.usage) + }; + promotedCPUImageCreationParams.format = m_physicalDevice->promoteImageFormat(request, video::IGPUImage::TILING::OPTIMAL); + } + + if (loadedCPUImageCreationParams.format != promotedCPUImageCreationParams.format) + { + smart_refctd_ptr promotedCPUImage = ICPUImage::create(promotedCPUImageCreationParams); + core::rational bytesPerPixel = asset::getBytesPerPixel(promotedCPUImageCreationParams.format); + + const auto extent = loadedCPUImageCreationParams.extent; + const uint32_t mipLevels = loadedCPUImageCreationParams.mipLevels; + const uint32_t arrayLayers = loadedCPUImageCreationParams.arrayLayers; + + // Only supporting 1 mip, it's just for test.. + const size_t byteSize = (bytesPerPixel * extent.width * extent.height * extent.depth * arrayLayers).getIntegerApprox(); // TODO: consider mips + ICPUBuffer::SCreationParams bufferCreationParams = {}; + bufferCreationParams.size = byteSize; + smart_refctd_ptr promotedCPUImageBuffer = ICPUBuffer::create(std::move(bufferCreationParams)); + + auto newRegions = core::make_refctd_dynamic_array>(1u); + ICPUImage::SBufferCopy& region = newRegions->front(); + region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = 0u; // TODO + region.imageSubresource.baseArrayLayer = 0u; + region.imageSubresource.layerCount = arrayLayers; + region.bufferOffset = 0u; + region.bufferRowLength = 0u; + region.bufferImageHeight = 0u; + region.imageOffset = { 0u, 0u, 0u }; + region.imageExtent = extent; + promotedCPUImage->setBufferAndRegions(std::move(promotedCPUImageBuffer), newRegions); + + performImageFormatPromotionCopy(loadedCPUImage, promotedCPUImage); + return promotedCPUImage; + } + else + { + return loadedCPUImage; + } + }; + + // This is all hardcoded for the example + const std::string basePath = "../../media/npot_geotex_mip_"; + smart_refctd_ptr img = loadImage(basePath + "0_base.png"); + + // This is hardcoded + const uint32_t maxMipLevel = 7; + baseMipLevels.reserve(maxMipLevel + 1); + baseMipLevels.emplace_back(std::move(img)); + for (auto i = 1u; i <= maxMipLevel; i++) + { + baseMipLevels.emplace_back(loadImage(basePath + std::to_string(i) + "_base.png")); + } + downsampledMipLevels.reserve(maxMipLevel + 1); + for (auto i = 0u; i <= maxMipLevel; i++) + { + downsampledMipLevels.emplace_back(loadImage(basePath + std::to_string(i) + "_downsampled.png")); + } + } + + uint32_t2 getExtents(std::filesystem::path imagePath, uint32_t mipLevel) + { + return { baseMipLevels[mipLevel]->getCreationParameters().extent.width, baseMipLevels[mipLevel]->getCreationParameters().extent.height }; + } + + uint32_t2 getExtents(std::filesystem::path imagePath) override + { + return getExtents(imagePath, 0); + } + + asset::E_FORMAT getFormat(std::filesystem::path imagePath) override + { + return baseMipLevels[0]->getCreationParameters().format; + } + + bool hasPrecomputedMips(std::filesystem::path imagePath) const override + { + return true; + } + +private: + + // Assume offset always fits in the image, but maybe offset + extent doesn't + // Example of a precomputed mip loader with 2x mip levels + core::smart_refctd_ptr load_impl(std::filesystem::path imagePath, uint32_t2 offset, uint32_t2 extent, uint32_t mipLevel, bool downsample) override + { + // Hardcoded tile size that's not accessible + auto mippedImageExtents = getExtents(imagePath, mipLevel); + // If `offset + extent` exceeds the extent of the image at the current mip level, we clamp it + extent = nbl::hlsl::min(mippedImageExtents - offset, extent); + // Image path ignored for this hardcoded example + const auto& image = downsample ? downsampledMipLevels[mipLevel] : baseMipLevels[mipLevel]; + const auto& imageBuffer = image->getBuffer(); + const core::rational bytesPerPixel = asset::getBytesPerPixel(image->getCreationParameters().format); + const size_t bytesPerRow = (bytesPerPixel * extent.x).getIntegerApprox(); + const size_t loadedImageBytes = bytesPerRow * extent.y; + asset::IBuffer::SCreationParams bufCreationParams = { .size = loadedImageBytes, .usage = imageBuffer->getCreationParams().usage }; + ICPUBuffer::SCreationParams cpuBufCreationParams(std::move(bufCreationParams)); + core::smart_refctd_ptr retVal = ICPUBuffer::create(std::move(cpuBufCreationParams)); + + // Copy row by row into the new buffer + uint8_t* dataPtr = reinterpret_cast(retVal->getPointer()); + const uint8_t* imageBufferDataPtr = reinterpret_cast(imageBuffer->getPointer()); + const size_t bytesPerImageRow = (bytesPerPixel * image->getCreationParameters().extent.width).getIntegerApprox(); + for (auto row = 0u; row < extent.y; row++) + { + const size_t imageBufferOffset = bytesPerImageRow * (offset.y + row) + (bytesPerPixel * offset.x).getIntegerApprox(); + std::memcpy(dataPtr + row * bytesPerRow, imageBufferDataPtr + imageBufferOffset, bytesPerRow); + } + return retVal; + } + + // These are here for the example, might not be class members when porting to n4ce + asset::IAssetManager* m_assetMgr = {}; + system::ILogger* m_logger = {}; + video::IPhysicalDevice* m_physicalDevice = {}; + // We're going to fake it in the example so it's easier to work with, but the interface remains + core::vector> baseMipLevels = {}; + core::vector> downsampledMipLevels = {}; +}; + class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplication, public nbl::examples::BuiltinResourcesApplication { using device_base_t = nbl::examples::SimpleWindowedApplication; @@ -375,11 +566,22 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio void allocateResources() { - drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger)); - + // TODO: currently using the same utils for buffers and images, make them separate staging buffers + drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_device), core::smart_refctd_ptr(m_utils), core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger)); + + // Just wanting to try memory type indices with device local flag, TODO: later improve to prioritize pure device local + std::vector deviceLocalMemoryTypeIndices; + for (uint32_t i = 0u; i < m_physicalDevice->getMemoryProperties().memoryTypeCount; ++i) + { + const auto& memType = m_physicalDevice->getMemoryProperties().memoryTypes[i]; + if (memType.propertyFlags.hasFlags(IDeviceMemoryAllocation::EMPF_DEVICE_LOCAL_BIT)) + deviceLocalMemoryTypeIndices.push_back(i); + } + size_t maxImagesMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB size_t maxBufferMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB - drawResourcesFiller.allocateDrawResourcesWithinAvailableVRAM(m_device.get(), maxImagesMemSize, maxBufferMemSize); + + drawResourcesFiller.allocateDrawResourcesWithinAvailableVRAM(m_device.get(), maxImagesMemSize, maxBufferMemSize, deviceLocalMemoryTypeIndices); drawResourcesFiller.allocateMSDFTextures(m_device.get(), 256u, uint32_t2(MSDFSize, MSDFSize)); { @@ -495,9 +697,9 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio // Static Image Sampler { IGPUSampler::SParams samplerParams = {}; - samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; - samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; - samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; + samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_REPEAT; + samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_REPEAT; + samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_REPEAT; samplerParams.BorderColor = IGPUSampler::ETBC_FLOAT_TRANSPARENT_BLACK; samplerParams.MinFilter = IGPUSampler::ETF_LINEAR; samplerParams.MaxFilter = IGPUSampler::ETF_LINEAR; @@ -1009,16 +1211,27 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } } + // Shared Blend Params between pipelines - SBlendParams blendParams = {}; - blendParams.blendParams[0u].srcColorFactor = asset::EBF_SRC_ALPHA; - blendParams.blendParams[0u].dstColorFactor = asset::EBF_ONE_MINUS_SRC_ALPHA; - blendParams.blendParams[0u].colorBlendOp = asset::EBO_ADD; - blendParams.blendParams[0u].srcAlphaFactor = asset::EBF_ONE; - blendParams.blendParams[0u].dstAlphaFactor = asset::EBF_ZERO; - blendParams.blendParams[0u].alphaBlendOp = asset::EBO_ADD; - blendParams.blendParams[0u].colorWriteMask = (1u << 4u) - 1u; - + // Premultiplied over-blend (back-to-front) + SBlendParams premultipliedOverBlendParams = {}; + premultipliedOverBlendParams.blendParams[0u].srcColorFactor = asset::EBF_ONE; + premultipliedOverBlendParams.blendParams[0u].dstColorFactor = asset::EBF_ONE_MINUS_SRC_ALPHA; + premultipliedOverBlendParams.blendParams[0u].colorBlendOp = asset::EBO_ADD; + premultipliedOverBlendParams.blendParams[0u].srcAlphaFactor = asset::EBF_ONE; + premultipliedOverBlendParams.blendParams[0u].dstAlphaFactor = asset::EBF_ONE_MINUS_SRC_ALPHA; + premultipliedOverBlendParams.blendParams[0u].alphaBlendOp = asset::EBO_ADD; + premultipliedOverBlendParams.blendParams[0u].colorWriteMask = (1u << 4u) - 1u; + // Premultiplied UNDER-blend (front-to-back) + SBlendParams premultipliedUnderBlendParams = {}; + premultipliedUnderBlendParams.blendParams[0u].srcColorFactor = asset::EBF_ONE_MINUS_DST_ALPHA; + premultipliedUnderBlendParams.blendParams[0u].dstColorFactor = asset::EBF_ONE; + premultipliedUnderBlendParams.blendParams[0u].colorBlendOp = asset::EBO_ADD; + premultipliedUnderBlendParams.blendParams[0u].srcAlphaFactor = asset::EBF_ONE; + premultipliedUnderBlendParams.blendParams[0u].dstAlphaFactor = asset::EBF_ONE_MINUS_SRC_ALPHA; + premultipliedUnderBlendParams.blendParams[0u].alphaBlendOp = asset::EBO_ADD; + premultipliedUnderBlendParams.blendParams[0u].colorWriteMask = (1u << 4u) - 1u; + // Create Alpha Resovle Pipeline { // Load FSTri Shader @@ -1026,7 +1239,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const video::IGPUPipelineBase::SShaderSpecInfo fragSpec = { .shader = mainPipelineFragmentShaders.get(), .entryPoint = "resolveAlphaMain" }; - resolveAlphaGraphicsPipeline = fsTriangleProtoPipe.createPipeline(fragSpec, pipelineLayout.get(), compatibleRenderPass.get(), 0u, blendParams); + resolveAlphaGraphicsPipeline = fsTriangleProtoPipe.createPipeline(fragSpec, pipelineLayout.get(), compatibleRenderPass.get(), 0u, premultipliedOverBlendParams); if (!resolveAlphaGraphicsPipeline) return logFail("Graphics Pipeline Creation Failed."); @@ -1060,7 +1273,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio .faceCullingMode = EFCM_NONE, .depthWriteEnable = false, }, - .blend = blendParams, + .blend = premultipliedOverBlendParams, }; params[0].renderpass = compatibleRenderPass.get(); @@ -1117,9 +1330,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } ); - m_geoTextureRenderer = std::unique_ptr(new GeoTextureRenderer(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger))); - // m_geoTextureRenderer->initialize(geoTexturePipelineShaders[0].get(), geoTexturePipelineShaders[1].get(), compatibleRenderPass.get(), m_globalsBuffer); - // Create the Semaphores m_renderSemaphore = m_device->createSemaphore(0ull); m_renderSemaphore->setObjectDebugName("m_renderSemaphore"); @@ -1263,6 +1473,12 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio gridDTMHeightMap = loadImage("../../media/gridDTMHeightMap.exr"); + // Create case 12 image loader + if constexpr (mode == ExampleMode::CASE_12) + { + drawResourcesFiller.setGeoreferencedImageLoader(make_smart_refctd_ptr(m_assetMgr.get(), m_logger.get(), m_physicalDevice)); + } + // set diagonals of cells to TOP_LEFT_TO_BOTTOM_RIGHT or BOTTOM_LEFT_TO_TOP_RIGHT randomly { // assumption is that format of the grid DTM height map is *_SRGB, I don't think we need any code to ensure that @@ -1311,7 +1527,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { - m_Camera.mouseProcess(events); + if (m_window->hasMouseFocus()) + m_Camera.mouseProcess(events); } , m_logger.get()); keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void @@ -1492,17 +1709,9 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio projectionToNDC = m_Camera.constructViewProjection(); // TEST CAMERA ROTATION -#if 1 - // double rotation = 0.25 * PI(); - double rotation = abs(cos(m_timeElapsed * 0.0004)) * 0.25 * PI() ; - float64_t2 rotationVec = float64_t2(cos(rotation), sin(rotation)); - float64_t3x3 rotationParameter = float64_t3x3 { - rotationVec.x, rotationVec.y, 0.0, - -rotationVec.y, rotationVec.x, 0.0, - 0.0, 0.0, 1.0 - }; - projectionToNDC = nbl::hlsl::mul(projectionToNDC, rotationParameter); -#endif + if constexpr (testCameraRotation) + projectionToNDC = rotateBasedOnTime(projectionToNDC); + Globals globalData = {}; uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress(); globalData.pointers = { @@ -1523,7 +1732,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio 0.0f, 0.0f, 1.0f); globalData.miterLimit = 10.0f; globalData.currentlyActiveMainObjectIndex = drawResourcesFiller.getActiveMainObjectIndex(); - SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer.get() }; + SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer}; bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); assert(updateSuccess); @@ -1613,7 +1822,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio { if (drawCall.isDTMRendering) { - cb->bindIndexBuffer({ .offset = resourcesCollection.geometryInfo.bufferOffset + drawCall.dtm.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer().get()}, asset::EIT_32BIT); + cb->bindIndexBuffer({ .offset = resourcesCollection.geometryInfo.bufferOffset + drawCall.dtm.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer()}, asset::EIT_32BIT); PushConstants pc = { .triangleMeshVerticesBaseAddress = drawCall.dtm.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resourcesCollection.geometryInfo.bufferOffset, @@ -1635,7 +1844,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const uint64_t indexCount = drawCall.drawObj.drawObjectCount * 6u; // assert(currentIndexCount == resourcesCollection.indexBuffer.getCount()); - cb->bindIndexBuffer({ .offset = resourcesCollection.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer.get()}, asset::EIT_32BIT); + cb->bindIndexBuffer({ .offset = resourcesCollection.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer}, asset::EIT_32BIT); cb->drawIndexed(indexCount, 1u, 0u, 0u, 0u); } } @@ -1661,7 +1870,7 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const uint64_t indexCount = drawCall.drawObj.drawObjectCount * 6u; // assert(currentIndexCount == resourcesCollection.indexBuffer.getCount()); - cb->bindIndexBuffer({ .offset = resourcesCollection.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer.get()}, asset::EIT_32BIT); + cb->bindIndexBuffer({ .offset = resourcesCollection.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer}, asset::EIT_32BIT); cb->drawIndexed(indexCount, 1u, 0u, 0u, 0u); } @@ -3127,12 +3336,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio //printf("\n"); } - GeoreferencedImageParams geoRefParams = {}; - geoRefParams.format = asset::EF_R8G8B8A8_SRGB; - geoRefParams.imageExtents = uint32_t2 (2048, 2048); - geoRefParams.viewportExtents = (m_realFrameIx <= 5u) ? uint32_t2(1280, 720) : uint32_t2(3840, 2160); // to test trigerring resize/recreation - // drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(6996, geoRefParams, intendedNextSubmit); - LineStyleInfo lineStyle = { .color = float32_t4(1.0f, 0.1f, 0.1f, 0.9f), @@ -3686,6 +3889,50 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } #endif } + else if (mode == ExampleMode::CASE_12) + { + // placeholder, actual path is right now hardcoded into the loader + const static std::string georeferencedImagePath = "../../media/tiled_grid_mip_0.exr"; + + constexpr float64_t3 topLeftViewportH = float64_t3(-1.0, -1.0, 1.0); + constexpr float64_t3 topRightViewportH = float64_t3(1.0, -1.0, 1.0); + constexpr float64_t3 bottomLeftViewportH = float64_t3(-1.0, 1.0, 1.0); + constexpr float64_t3 bottomRightViewportH = float64_t3(1.0, 1.0, 1.0); + + //GeoreferencedImageParams georeferencedImageParams; + //georeferencedImageParams.storagePath = georeferencedImagePath; + //georeferencedImageParams.format = drawResourcesFiller.queryGeoreferencedImageFormat(georeferencedImagePath); + //georeferencedImageParams.imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath); + + image_id georefImageID = 6996; + // Position at topLeft viewport + auto projectionToNDC = m_Camera.constructViewProjection(); + // TEST CAMERA ROTATION + if constexpr (testCameraRotation) + projectionToNDC = rotateBasedOnTime(projectionToNDC); + auto inverseViewProj = nbl::hlsl::inverse(projectionToNDC); + + // Get 1 viewport pixel to match `startingImagePixelsPerViewportPixel` pixels of the image by choosing appropriate dirU + const static float64_t startingImagePixelsPerViewportPixels = 1.0; + const static auto startingViewportWidthVector = nbl::hlsl::mul(inverseViewProj, topRightViewportH - topLeftViewportH); + const static auto dirU = startingViewportWidthVector * float64_t(drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath).x) / float64_t(startingImagePixelsPerViewportPixels * m_window->getWidth()); + + const static auto startingTopLeft = nbl::hlsl::mul(inverseViewProj, topLeftViewportH); + const uint32_t2 imageExtents = drawResourcesFiller.queryGeoreferencedImageExtents(georeferencedImagePath); + OrientedBoundingBox2D georefImageBB = { .topLeft = startingTopLeft, .dirU = dirU, .aspectRatio = float32_t(imageExtents.y) / imageExtents.x }; + + auto streamingState = drawResourcesFiller.ensureGeoreferencedImageEntry(georefImageID, georefImageBB, uint32_t2(m_window->getWidth(), m_window->getHeight()), inverseViewProj, georeferencedImagePath); + constexpr static WorldClipRect invalidClipRect = { .minClip = float64_t2(std::numeric_limits::signaling_NaN()) }; + drawResourcesFiller.launchGeoreferencedImageTileLoads(georefImageID, streamingState.get(), invalidClipRect); + + drawResourcesFiller.drawGeoreferencedImage(georefImageID, std::move(streamingState), intendedNextSubmit); + + drawResourcesFiller.finalizeGeoreferencedImageTileLoads(intendedNextSubmit); + + //drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(georefImageID, std::move(georeferencedImageParams), intendedNextSubmit); + + //drawResourcesFiller.addGeoreferencedImage(georefImageID, inverseViewProj, intendedNextSubmit); + } } double getScreenToWorldRatio(const float64_t3x3& viewProjectionMatrix, uint32_t2 windowSize) @@ -3695,6 +3942,18 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio return hlsl::length(float64_t2(idx_0_0, idx_1_0)); } + float64_t3x3 rotateBasedOnTime(const float64_t3x3& projectionMatrix) + { + double rotation = abs(cos(m_timeElapsed * 0.0004)) * 0.25 * PI(); + float64_t2 rotationVec = float64_t2(cos(rotation), sin(rotation)); + float64_t3x3 rotationParameter = float64_t3x3{ + rotationVec.x, rotationVec.y, 0.0, + -rotationVec.y, rotationVec.x, 0.0, + 0.0, 0.0, 1.0 + }; + return nbl::hlsl::mul(projectionMatrix, rotationParameter); + } + protected: std::chrono::seconds timeout = std::chrono::seconds(0x7fffFFFFu); clock_t::time_point start; @@ -3768,8 +4027,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio const std::chrono::steady_clock::time_point startBenchmark = std::chrono::high_resolution_clock::now(); bool stopBenchamrkFlag = false; #endif - - std::unique_ptr m_geoTextureRenderer; }; NBL_MAIN_FUNC(ComputerAidedDesign) diff --git a/62_CAD/scripts/generate_mipmaps.py b/62_CAD/scripts/generate_mipmaps.py new file mode 100644 index 000000000..78420cda5 --- /dev/null +++ b/62_CAD/scripts/generate_mipmaps.py @@ -0,0 +1,47 @@ +import OpenEXR +import Imath +import numpy as np + +def read_exr(path): + exr = OpenEXR.InputFile(path) + dw = exr.header()['dataWindow'] + size = (dw.max.x - dw.min.x + 1, dw.max.y - dw.min.y + 1) + + pt = Imath.PixelType(Imath.PixelType.FLOAT) + channels = ['R', 'G', 'B'] + data = [np.frombuffer(exr.channel(c, pt), dtype=np.float32).reshape(size[1], size[0]) for c in channels] + return np.stack(data, axis=-1) # shape: (H, W, 3) + +def write_exr(path, arr): + H, W, C = arr.shape + assert C == 3, "Only RGB supported" + header = OpenEXR.Header(W, H) + pt = Imath.PixelType(Imath.PixelType.FLOAT) + channels = { + 'R': arr[:, :, 0].astype(np.float32).tobytes(), + 'G': arr[:, :, 1].astype(np.float32).tobytes(), + 'B': arr[:, :, 2].astype(np.float32).tobytes() + } + exr = OpenEXR.OutputFile(path, header) + exr.writePixels(channels) + +def mipmap_exr(): + img = read_exr("../../media/tiled_grid_mip_0.exr") + h, w, _ = img.shape + base_path = "../../media/tiled_grid_mip_" + tile_size = 128 + mip_level = 1 + tile_length = h // (2 * tile_size) + + while tile_length > 0: + # Reshape and average 2x2 blocks + reshaped = img.reshape(h//2, 2, w//2, 2, 3) + mipmap = reshaped.mean(axis=(1, 3)) + write_exr(base_path + str(mip_level) + ".exr", mipmap) + img = mipmap + mip_level = mip_level + 1 + tile_length = tile_length // 2 + h = h // 2 + w = w // 2 + +mipmap_exr() \ No newline at end of file diff --git a/62_CAD/scripts/tiled_grid.py b/62_CAD/scripts/tiled_grid.py new file mode 100644 index 000000000..89c637338 --- /dev/null +++ b/62_CAD/scripts/tiled_grid.py @@ -0,0 +1,266 @@ +from PIL import Image, ImageDraw, ImageFont +import numpy as np +import os +import OpenImageIO as oiio + + + +def create_single_tile(tile_size, color, x_coord, y_coord, font_path=None): + """ + Creates a single square tile image with a given color and two lines of centered text. + + Args: + tile_size (int): The sidelength of the square tile in pixels. + color (tuple): A tuple of three floats (R, G, B) representing the color (0.0-1.0). + x_coord (int): The X coordinate to display on the tile. + y_coord (int): The Y coordinate to display on the tile. + font_path (str, optional): The path to a TrueType font file (.ttf). + If None, a default PIL font will be used. + Returns: + PIL.Image.Image: The created tile image with text. + """ + # Convert float color (0.0-1.0) to 8-bit integer color (0-255) + int_color = tuple(int(max(0, min(1, c)) * 255) for c in color) # Ensure color components are clamped + + img = Image.new('RGB', (tile_size, tile_size), int_color) + draw = ImageDraw.Draw(img) + + text_line1 = f"x = {x_coord}" + text_line2 = f"y = {y_coord}" + + text_fill_color = (255, 255, 255) + + # --- Dynamic Font Size Adjustment --- + # Start with a relatively large font size and shrink if needed + font_size = int(tile_size * 0.25) # Initial guess for font size + max_font_size = int(tile_size * 0.25) # Don't exceed this + + font = None + max_iterations = 100 # Prevent infinite loops in font size reduction + + for _ in range(max_iterations): + current_font_path = font_path + current_font_size = max(1, font_size) # Ensure font size is at least 1 + + try: + if current_font_path and os.path.exists(current_font_path): + font = ImageFont.truetype(current_font_path, current_font_size) + else: + # Fallback to default font (size argument might not always work perfectly) + font = ImageFont.load_default() + # For default font, try to scale if load_default(size=...) is supported and works + try: + scaled_font = ImageFont.load_default(size=current_font_size) + if draw.textbbox((0, 0), text_line1, font=scaled_font)[2] > 0: # Check if usable + font = scaled_font + except Exception: + pass # Stick with original default font + + if font is None: # Last resort if no font could be loaded + font = ImageFont.load_default() + + # Measure text dimensions + bbox1 = draw.textbbox((0, 0), text_line1, font=font) + text_width1 = bbox1[2] - bbox1[0] + text_height1 = bbox1[3] - bbox1[1] + + bbox2 = draw.textbbox((0, 0), text_line2, font=font) + text_width2 = bbox2[2] - bbox2[0] + text_height2 = bbox2[3] - bbox2[1] + + # Calculate total height needed for both lines plus some padding + # Let's assume a small gap between lines (e.g., 0.1 * text_height) + line_gap = int(text_height1 * 0.2) # 20% of line height + total_text_height = text_height1 + text_height2 + line_gap + + # Check if text fits vertically and horizontally + if (total_text_height < tile_size * 0.9) and \ + (text_width1 < tile_size * 0.9) and \ + (text_width2 < tile_size * 0.9): + break # Font size is good, break out of loop + else: + font_size -= 1 # Reduce font size + if font_size <= 0: # Prevent infinite loop if text can never fit + font_size = 1 # Smallest possible font size + break + + except Exception as e: + # Handle cases where font loading or textbbox fails + print(f"Error during font sizing: {e}. Reducing font size and retrying.") + font_size -= 1 + if font_size <= 0: + font_size = 1 + break # Cannot make font smaller, stop + + # Final check: if font_size became 0 or less, ensure it's at least 1 + if font_size <= 0: + font_size = 1 + # Reload font with minimum size if needed + if font_path and os.path.exists(font_path): + font = ImageFont.truetype(font_path, font_size) + else: + font = ImageFont.load_default() + try: + scaled_font = ImageFont.load_default(size=font_size) + if draw.textbbox((0, 0), text_line1, font=scaled_font)[2] > 0: + font = scaled_font + except Exception: + pass + + + # Re-measure with final font size to ensure accurate positioning + bbox1 = draw.textbbox((0, 0), text_line1, font=font) + text_width1 = bbox1[2] - bbox1[0] + text_height1 = bbox1[3] - bbox1[1] + + bbox2 = draw.textbbox((0, 0), text_line2, font=font) + text_width2 = bbox2[2] - bbox2[0] + text_height2 = bbox2[3] - bbox2[1] + + # Calculate positions for centering + # Line 1: centered horizontally, midpoint at 1/3 tile height + x1 = (tile_size - text_width1) / 2 + y1 = (tile_size / 3) - (text_height1 / 2) + + # Line 2: centered horizontally, midpoint at 2/3 tile height + x2 = (tile_size - text_width2) / 2 + y2 = (tile_size * 2 / 3) - (text_height2 / 2) + + # Draw the text + draw.text((x1, y1), text_line1, fill=text_fill_color, font=font) + draw.text((x2, y2), text_line2, fill=text_fill_color, font=font) + + return img + +def generate_interpolated_grid_image(tile_size, count, font_path=None): + """ + Generates a large image composed of 'count' x 'count' tiles, + with colors bilinearly interpolated from corners and text indicating tile index. + + Args: + tile_size (int): The sidelength of each individual square tile in pixels. + count (int): The number of tiles per side of the large grid (e.g., if count=3, + it's a 3x3 grid of tiles). + font_path (str, optional): Path to a TrueType font file for the tile text. + If None, a default PIL font will be used. + + Returns: + PIL.Image.Image: The generated large grid image. + """ + if count <= 0: + raise ValueError("Count must be a positive integer.") + + total_image_size = count * tile_size + main_img = Image.new('RGB', (total_image_size, total_image_size)) + + # Corner colors (R, G, B) as floats (0.0-1.0) + corner_colors = { + "top_left": (1.0, 0.0, 0.0), # Red + "top_right": (1.0, 0.0, 1.0), # Purple + "bottom_left": (0.0, 1.0, 0.0), # Green + "bottom_right": (0.0, 0.0, 1.0) # Blue + } + + # Handle the edge case where count is 1 + if count == 1: + # If count is 1, there's only one tile, which is the top-left corner + tile_color = corner_colors["top_left"] + tile_image = create_single_tile(tile_size, tile_color, 0, 0, font_path=font_path) + main_img.paste(tile_image, (0, 0)) + return main_img + + for y_tile in range(count): + for x_tile in range(count): + # Calculate normalized coordinates (u, v) for interpolation + # We divide by (count - 1) to ensure 0 and 1 values at the edges + u = x_tile / (count - 1) + v = y_tile / (count - 1) + + # Apply the simplified bilinear interpolation formulas + r_component = 1 - v + g_component = v * (1 - u) + b_component = u + + # Clamp components to be within 0.0 and 1.0 (due to potential floating point inaccuracies) + current_color = ( + max(0.0, min(1.0, r_component)), + max(0.0, min(1.0, g_component)), + max(0.0, min(1.0, b_component)) + ) + + # Create the individual tile + tile_image = create_single_tile(tile_size, current_color, x_tile, y_tile, font_path=font_path) + + # Paste the tile onto the main image + paste_x = x_tile * tile_size + paste_y = y_tile * tile_size + main_img.paste(tile_image, (paste_x, paste_y)) + + return main_img + + + + +import argparse +parser = argparse.ArgumentParser(description="Process two optional named parameters.") +parser.add_argument('--ts', type=int, default=128, help='Tile Size') +parser.add_argument('--gs', type=int, default=128, help='Grid Size') + +# Parse the arguments +args = parser.parse_args() + + +# --- Configuration --- +tile_sidelength = args.ts # Size of each individual tile in pixels +grid_count = args.gs # Number of tiles per side (e.g., 15 means 15x15 grid) + +# Path to a font file (adjust this for your system) +# On Windows, you can typically use 'C:/Windows/Fonts/arial.ttf' or similar +# You might need to find a suitable font on your system. +# For testing, you can use None to let PIL use its default font. +# If a specific font path is provided and doesn't exist, it will fall back to default. +windows_font_path = "C:/Windows/Fonts/arial.ttf" # Example path for Windows +# If Arial is not found, try Times New Roman: +# windows_font_path = "C:/Windows/Fonts/times.ttf" + +font_to_use = None +if os.name == 'nt': # Check if OS is Windows + if os.path.exists(windows_font_path): + font_to_use = windows_font_path + print(f"Using font: {windows_font_path}") + else: + print(f"Warning: Windows font not found at '{windows_font_path}'. Using default PIL font.") +else: # Assume Linux/macOS for other OS types + # Common Linux/macOS font paths (adjust as needed) + linux_font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" + mac_font_path = "/Library/Fonts/Arial.ttf" + if os.path.exists(linux_font_path): + font_to_use = linux_font_path + print(f"Using font: {linux_font_path}") + elif os.path.exists(mac_font_path): + font_to_use = mac_font_path + print(f"Using font: {mac_font_path}") + else: + print("Warning: No common Linux/macOS font found. Using default PIL font.") + + +# --- Generate and save the image --- +print(f"Generating a {grid_count}x{grid_count} grid of tiles, each {tile_sidelength}x{tile_sidelength} pixels.") +print(f"Total image size will be {grid_count * tile_sidelength}x{grid_count * tile_sidelength} pixels.") + +try: + final_image = generate_interpolated_grid_image(tile_sidelength, grid_count, font_path=font_to_use) + output_filename = "../../media/tiled_grid_mip_0.exr" + np_img = np.array(final_image).astype(np.float32) / 255.0 # Normalize for EXR + spec = oiio.ImageSpec(final_image.width, final_image.height, 3, oiio.TypeDesc("float")) + out = oiio.ImageOutput.create(output_filename) + out.open(output_filename, spec) + out.write_image(np_img.reshape(-1)) # Flatten for OIIO’s expected input + out.close() + + print(f"Successfully created '{output_filename}'") + +except ValueError as e: + print(f"Error: {e}") +except Exception as e: + print(f"An unexpected error occurred: {e}") \ No newline at end of file diff --git a/62_CAD/shaders/geotexture/common.hlsl b/62_CAD/shaders/geotexture/common.hlsl deleted file mode 100644 index 691cd3d3b..000000000 --- a/62_CAD/shaders/geotexture/common.hlsl +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef _CAD_EXAMPLE_GEOTEXTURE_COMMON_HLSL_INCLUDED_ -#define _CAD_EXAMPLE_GEOTEXTURE_COMMON_HLSL_INCLUDED_ - -#include "../globals.hlsl" - -// Handle multiple geo textures, separate set, array of texture? index allocator? or multiple sets? -NBL_CONSTEXPR uint32_t MaxGeoTextures = 256; - -// GeoTexture Oriented Bounding Box -struct GeoTextureOBB -{ - pfloat64_t2 topLeft; // 2 * 8 = 16 bytes - float32_t2 dirU; // 2 * 4 = 8 bytes (24) - float32_t aspectRatio; // 4 bytes (32) -}; - -#ifdef __HLSL_VERSION -struct PSInput -{ - float4 position : SV_Position; - [[vk::location(0)]] float2 uv : COLOR0; -}; - -// Push Constant -[[vk::push_constant]] GeoTextureOBB geoTextureOBB; - -// Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated -// [[vk::binding(0, 0)]] ConstantBuffer globals; ---> moved to globals.hlsl - -// Set 1 - Window dependant data which has higher update frequency due to multiple windows and resize need image recreation and descriptor writes -[[vk::binding(0, 1)]] Texture2D geoTexture : register(t0); -[[vk::binding(1, 1)]] SamplerState geoTextureSampler : register(s0); -#endif - -#endif \ No newline at end of file diff --git a/62_CAD/shaders/geotexture/fragment_shader.hlsl b/62_CAD/shaders/geotexture/fragment_shader.hlsl deleted file mode 100644 index a7022cf03..000000000 --- a/62_CAD/shaders/geotexture/fragment_shader.hlsl +++ /dev/null @@ -1,9 +0,0 @@ -#pragma shader_stage(fragment) - -#include "common.hlsl" - -float4 main(PSInput input) : SV_TARGET -{ - const float2 uv = input.uv; - return geoTexture.Sample(geoTextureSampler, uv); -} \ No newline at end of file diff --git a/62_CAD/shaders/geotexture/vertex_shader.hlsl b/62_CAD/shaders/geotexture/vertex_shader.hlsl deleted file mode 100644 index e442b40fc..000000000 --- a/62_CAD/shaders/geotexture/vertex_shader.hlsl +++ /dev/null @@ -1,25 +0,0 @@ -#pragma shader_stage(vertex) - -#include "common.hlsl" - -PSInput main(uint vertexID : SV_VertexID) -{ - const uint vertexIdx = vertexID & 0x3u; - - PSInput outV; - ClipProjectionData clipProjectionData = globals.defaultClipProjection; - outV.position.z = 0.0; - - const float32_t2 dirV = float32_t2(geoTextureOBB.dirU.y, -geoTextureOBB.dirU.x) * geoTextureOBB.aspectRatio; - const float2 screenTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, geoTextureOBB.topLeft)); - const float2 screenDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(geoTextureOBB.dirU))); - const float2 screenDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); - - const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); // corners of square from (0, 0) to (1, 1) - - const float2 coord = screenTopLeft + corner.x * screenDirU + corner.y * screenDirV; - outV.position.xy = coord; - outV.uv = corner; - - return outV; -} diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 5c3681910..11122b4cb 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -43,6 +43,7 @@ struct PushConstants uint32_t isDTMRendering; }; +// Please note minClip.y > maxClip.y --> TODO[Erfan]: fix later, because I get confused everytime dealing with min/max clip stuff struct WorldClipRect { pfloat64_t2 minClip; // min clip of a rect in worldspace coordinates of the original space (globals.defaultProjectionToNDC) @@ -244,10 +245,12 @@ struct ImageObjectInfo // Currently a simple OBB like ImageObject, but later will be fullscreen with additional info about UV offset for toroidal(mirror) addressing struct GeoreferencedImageInfo { - pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) - float32_t2 dirU; // 2 * 4 = 8 bytes (24) + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + float32_t2 dirU; // 2 * 4 = 8 bytes (24) float32_t aspectRatio; // 4 bytes (28) - uint32_t textureID; // 4 bytes (32) + uint32_t textureID; // 4 bytes (32) + float32_t2 minUV; // 2 * 4 = 8 bytes (40) + float32_t2 maxUV; // 2 * 4 = 8 bytes (48) }; // Goes into geometry buffer, needs to be aligned by 8 @@ -444,7 +447,7 @@ struct DTMHeightShadingSettings // height-color map float intervalLength; - float intervalIndexToHeightMultiplier; + float intervalIndexToHeightMultiplier; int isCenteredShading; uint32_t heightColorEntryCount; @@ -580,6 +583,18 @@ struct OrientedBoundingBox2D pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) float32_t2 dirU; // 2 * 4 = 8 bytes (24) float32_t aspectRatio; // 4 bytes (28) + +#ifndef __HLSL_VERSION + void transform(pfloat64_t3x3 transformation) + { + // We want to do tile streaming and clipping calculations in the same space; hence, we transform the obb (defined in local DWG or symbol) space to worldspace, and we use ndcToWorldTransformation + worldToUV to calculate which tiles are visible in current view) + const pfloat64_t2 prevDirV = pfloat64_t2(dirU.y, -dirU.x) * pfloat64_t(aspectRatio); + topLeft = nbl::hlsl::mul(transformation, pfloat64_t3(topLeft, 1)); + dirU = nbl::hlsl::mul(transformation, pfloat64_t3(dirU, 0)); + const pfloat64_t2 newDirV = nbl::hlsl::mul(transformation, pfloat64_t3(prevDirV, 0)); + aspectRatio = nbl::hlsl::length(newDirV) / nbl::hlsl::length(dirU); // TODO: maybe we could compute new transformed aspect ratio "smarter" + } +#endif }; #ifdef __HLSL_VERSION diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index 1783cb145..7e9edab4d 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -82,15 +82,18 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, co template<> float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) { + float32_t4 color; uint32_t styleIdx = loadMainObject(currentMainObjectIdx).styleIdx; if (!colorFromTexture) { - float32_t4 col = loadLineStyle(styleIdx).color; - col.w *= localAlpha; - return float4(col); + color = loadLineStyle(styleIdx).color; + color.w *= localAlpha; } else - return float4(localTextureColor, localAlpha); + color = float4(localTextureColor, localAlpha); + + color.rgb *= color.a; + return color; } template<> float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) @@ -141,6 +144,7 @@ float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlp color.a *= float(storedQuantizedAlpha) / 255.f; + color.rgb *= color.a; return color; } @@ -695,7 +699,6 @@ float4 fragMain(PSInput input) : SV_TARGET } } - if (localAlpha <= 0) discard; diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 90394e935..6ee6c263f 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -739,20 +739,24 @@ PSInput vtxMain(uint vertexID : SV_VertexID) } else if (objType == ObjectType::STREAMED_IMAGE) { - pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); - float32_t2 dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); - float32_t aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); - uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); + const pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + const float32_t2 dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); + const float32_t aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float32_t2), 4u); + const uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float32_t2) + sizeof(float32_t), 4u); + // Remember we are constructing a quad in worldspace whose corners are matched to a quad in our toroidally-resident gpu image. `minUV` and `maxUV` are used to indicate where to sample from + // the gpu image to reconstruct the toroidal quad. + const float32_t2 minUV = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float32_t2) + sizeof(float32_t) + sizeof(uint32_t), 4u); + const float32_t2 maxUV = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + 2 * sizeof(float32_t2) + sizeof(float32_t) + sizeof(uint32_t), 4u); const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; - const float2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); - const float2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); - const float2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); + const float32_t2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); + const float32_t2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); + const float32_t2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); - float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); - float2 uv = corner; // non-dilated + const bool2 corner = bool2(vertexIdx & 0x1u, vertexIdx >> 1u); - float2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; + const float32_t2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; + const float32_t2 uv = float32_t2(corner.x ? maxUV.x : minUV.x, corner.y ? maxUV.y : minUV.y); outV.position = float4(ndcCorner, 0.f, 1.f); outV.setImageUV(uv); diff --git a/common/include/nbl/examples/common/MonoWindowApplication.hpp b/common/include/nbl/examples/common/MonoWindowApplication.hpp index 0f18012c0..881519c64 100644 --- a/common/include/nbl/examples/common/MonoWindowApplication.hpp +++ b/common/include/nbl/examples/common/MonoWindowApplication.hpp @@ -150,7 +150,8 @@ class MonoWindowApplication : public virtual SimpleWindowedApplication virtual inline bool onAppTerminated() { m_inputSystem = nullptr; - m_device->waitIdle(); + if (m_device) + m_device->waitIdle(); m_framesInFlight.clear(); m_surface = nullptr; m_window = nullptr;