diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index 15b4fc8ea..40b20d121 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -143,6 +143,7 @@ struct Resource { std::optional VKBinding; Buffer *BufferPtr = nullptr; bool HasCounter; + std::optional TilesMapped; bool isRaw() const { switch (Kind) { diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index b3dee60c3..1246b6a1b 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -149,9 +149,14 @@ static D3D12_RESOURCE_DESC getResourceDescription(const Resource &R) { const uint32_t Width = R.isTexture() ? B.OutputProps.Width : getUAVBufferSize(R); const uint32_t Height = R.isTexture() ? B.OutputProps.Height : 1; - const D3D12_TEXTURE_LAYOUT Layout = R.isTexture() - ? D3D12_TEXTURE_LAYOUT_UNKNOWN - : D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + D3D12_TEXTURE_LAYOUT Layout; + if (R.isTexture()) + Layout = getDXKind(R.Kind) == SRV + ? D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE + : D3D12_TEXTURE_LAYOUT_UNKNOWN; + else + Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + const D3D12_RESOURCE_FLAGS Flags = R.isReadWrite() ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE; @@ -244,9 +249,11 @@ class DXDevice : public offloadtest::Device { ComPtr Upload; ComPtr Buffer; ComPtr Readback; + ComPtr Heap; ResourceSet(ComPtr Upload, ComPtr Buffer, - ComPtr Readback) - : Upload(Upload), Buffer(Buffer), Readback(Readback) {} + ComPtr Readback, + ComPtr Heap = nullptr) + : Upload(Upload), Buffer(Buffer), Readback(Readback), Heap(Heap) {} }; // ResourceBundle will contain one ResourceSet for a singular resource @@ -521,51 +528,107 @@ class DXDevice : public offloadtest::Device { addUploadEndBarrier(IS, Destination, R.isReadWrite()); } + UINT getNumTiles(std::optional NumTiles, uint32_t Width) { + UINT Ret; + if (NumTiles.has_value()) + Ret = static_cast(*NumTiles); + else { + // Map the entire buffer by computing how many 64KB tiles cover it + Ret = static_cast( + (Width + D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES - 1) / + D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES); + // check for overflow + assert(Width < std::numeric_limits::max() - + D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES - 1); + } + return Ret; + } + llvm::Expected createSRV(Resource &R, InvocationState &IS) { ResourceBundle Bundle; - - const D3D12_HEAP_PROPERTIES HeapProp = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); const D3D12_RESOURCE_DESC ResDesc = getResourceDescription(R); - const D3D12_HEAP_PROPERTIES UploadHeapProp = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); const D3D12_RESOURCE_DESC UploadResDesc = CD3DX12_RESOURCE_DESC::Buffer(R.size()); + const D3D12_HEAP_PROPERTIES UploadHeapProps = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); uint32_t RegOffset = 0; + for (const auto &ResData : R.BufferPtr->Data) { llvm::outs() << "Creating SRV: { Size = " << R.size() << ", Register = t" << R.DXBinding.Register + RegOffset - << ", Space = " << R.DXBinding.Space << " }\n"; + << ", Space = " << R.DXBinding.Space; + + if (R.TilesMapped) + llvm::outs() << ", TilesMapped = " << *R.TilesMapped; + llvm::outs() << " }\n"; ComPtr Buffer; - if (auto Err = HR::toError( - Device->CreateCommittedResource( - &HeapProp, D3D12_HEAP_FLAG_NONE, &ResDesc, - D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&Buffer)), - "Failed to create committed resource (buffer).")) + if (auto Err = + HR::toError(Device->CreateReservedResource( + &ResDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, + IID_PPV_ARGS(&Buffer)), + "Failed to create reserved resource (buffer).")) return Err; + // Committed upload buffer ComPtr UploadBuffer; if (auto Err = HR::toError( Device->CreateCommittedResource( - &UploadHeapProp, D3D12_HEAP_FLAG_NONE, &UploadResDesc, + &UploadHeapProps, D3D12_HEAP_FLAG_NONE, &UploadResDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&UploadBuffer)), "Failed to create committed resource (upload buffer).")) return Err; - // Initialize the SRV data + // Tile mapping setup (only skipped when TilesMapped is set to 0) + const UINT NumTiles = getNumTiles(R.TilesMapped, ResDesc.Width); + ComPtr Heap; // optional, only created if NumTiles > 0 + + if (NumTiles > 0) { + // Create a Heap large enough for the mapped tiles + D3D12_HEAP_DESC HeapDesc = {}; + HeapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + HeapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + HeapDesc.SizeInBytes = static_cast(NumTiles) * + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + HeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; + + if (auto Err = + HR::toError(Device->CreateHeap(&HeapDesc, IID_PPV_ARGS(&Heap)), + "Failed to create heap for tiled SRV resource.")) + return Err; + + // Define one contiguous mapping region + const D3D12_TILED_RESOURCE_COORDINATE StartCoord = {0, 0, 0, 0}; + D3D12_TILE_REGION_SIZE RegionSize = {}; + RegionSize.NumTiles = NumTiles; + RegionSize.UseBox = FALSE; + + const D3D12_TILE_RANGE_FLAGS RangeFlag = D3D12_TILE_RANGE_FLAG_NONE; + const UINT HeapRangeStartOffset = 0; + const UINT RangeTileCount = NumTiles; + + ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); + CommandQueue->UpdateTileMappings( + Buffer.Get(), 1, &StartCoord, &RegionSize, // One region + Heap.Get(), 1, &RangeFlag, &HeapRangeStartOffset, &RangeTileCount, + D3D12_TILE_MAPPING_FLAG_NONE); + } + + // Upload data initialization void *ResDataPtr = nullptr; - if (auto Err = HR::toError(UploadBuffer->Map(0, nullptr, &ResDataPtr), - "Failed to acquire UAV data pointer.")) - return Err; - memcpy(ResDataPtr, ResData.get(), R.size()); - UploadBuffer->Unmap(0, nullptr); + if (SUCCEEDED(UploadBuffer->Map(0, NULL, &ResDataPtr))) { + memcpy(ResDataPtr, ResData.get(), R.size()); + UploadBuffer->Unmap(0, nullptr); + } else { + return llvm::createStringError(std::errc::io_error, + "Failed to map SRV upload buffer."); + } addResourceUploadCommands(R, IS, Buffer, UploadBuffer); - Bundle.emplace_back(UploadBuffer, Buffer, nullptr); + Bundle.emplace_back(UploadBuffer, Buffer, nullptr, Heap); RegOffset++; } return Bundle; @@ -684,6 +747,7 @@ class DXDevice : public offloadtest::Device { llvm::outs() << "UAV: HeapIdx = " << HeapIdx << " EltSize = " << EltSize << " NumElts = " << NumElts << " HasCounter = " << R.HasCounter << "\n"; + D3D12_CPU_DESCRIPTOR_HANDLE UAVHandle = UAVHandleHeapStart; UAVHandle.ptr += HeapIdx * DescHandleIncSize; ID3D12Resource *CounterBuffer = R.HasCounter ? RS.Buffer.Get() : nullptr; diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index e852c19f0..801487f59 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -776,8 +776,8 @@ class VKDevice : public offloadtest::Device { std::errc::invalid_argument, "No RenderTarget buffer specified for graphics pipeline."); Resource FrameBuffer = { - ResourceKind::Texture2D, "RenderTarget", {}, {}, - P.Bindings.RTargetBufferPtr, false}; + ResourceKind::Texture2D, "RenderTarget", {}, {}, + P.Bindings.RTargetBufferPtr, false, std::nullopt}; IS.FrameBufferResource.Size = P.Bindings.RTargetBufferPtr->size(); IS.FrameBufferResource.BufferPtr = P.Bindings.RTargetBufferPtr; IS.FrameBufferResource.ImageLayout = @@ -804,8 +804,8 @@ class VKDevice : public offloadtest::Device { std::errc::invalid_argument, "No Vertex buffer specified for graphics pipeline."); const Resource VertexBuffer = { - ResourceKind::StructuredBuffer, "VertexBuffer", {}, {}, - P.Bindings.VertexBufferPtr, false}; + ResourceKind::StructuredBuffer, "VertexBuffer", {}, {}, + P.Bindings.VertexBufferPtr, false, std::nullopt}; auto ExVHostBuf = createBuffer(IS, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VertexBuffer.size(), diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp index 74494f0d0..633456a76 100644 --- a/lib/Support/Pipeline.cpp +++ b/lib/Support/Pipeline.cpp @@ -291,6 +291,7 @@ void MappingTraits::mapping(IO &I, I.mapRequired("Name", R.Name); I.mapRequired("Kind", R.Kind); I.mapOptional("HasCounter", R.HasCounter, 0); + I.mapOptional("TilesMapped", R.TilesMapped); I.mapRequired("DirectXBinding", R.DXBinding); I.mapOptional("VulkanBinding", R.VKBinding); } diff --git a/test/Feature/HLSLLib/PartiallyMappedResources.test b/test/Feature/HLSLLib/PartiallyMappedResources.test new file mode 100644 index 000000000..30ce97195 --- /dev/null +++ b/test/Feature/HLSLLib/PartiallyMappedResources.test @@ -0,0 +1,157 @@ +#--- source.hlsl + +StructuredBuffer X : register(t0); +StructuredBuffer Y : register(t1); + +RWStructuredBuffer Out : register(u2); +RWStructuredBuffer CAFM : register(u3); + +[numthreads(1,1,1)] +void main() { + // 4096 int4's inside X or Y occupy 64KB of data. + // (4096 int4's * 4 ints * 4 bytes per int) + // So, any index into the buffer >= [4096] will access a new "tile" + + uint idx = 0; + + uint status; + int4 Result = X.Load(0, status); + bool CAFMResult = CheckAccessFullyMapped(status); + CAFM[idx] = CAFMResult; + if (CAFMResult) + Out[idx] = Result.x; + else + Out[idx] = 9003; + + idx += 1; + + Result = X.Load(4100, status); + CAFMResult = CheckAccessFullyMapped(status); + CAFM[idx] = CAFMResult; + if (CAFMResult) + Out[idx] = Result.x; + else + Out[idx] = 9003; + + idx += 1; + + Result = Y.Load(0, status); + CAFMResult = CheckAccessFullyMapped(status); + CAFM[idx] = CAFMResult; + if (CAFMResult) + Out[idx] = Result.x; + else + Out[idx] = 9003; + + idx += 1; + + Result = Y.Load(4100, status); + CAFMResult = CheckAccessFullyMapped(status); + CAFM[idx] = CAFMResult; + if (CAFMResult) + Out[idx] = Result.x; + else + Out[idx] = 9003; + + idx += 1; +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: X + Format: Int32 + Stride: 16 + FillSize: 131072 + FillValue: 9001 + - Name: Y + Format: Int32 + Stride: 16 + FillSize: 131072 + FillValue: 9002 + - Name: Out + Format: Int32 + Stride: 4 + FillSize: 16 + - Name: ExpectedOut + Format: Int32 + Stride: 4 + # first 4 values are the actual data retrieved. For non-resident loads, 0 is expected. + Data: [9001, 9003, 9003, 9003] + - Name: CAFM + Format: Bool + Stride: 4 + FillSize: 16 + FillValue: 0 + - Name: ExpectedCAFM + Format: Bool + Stride: 4 + # Only the first data access should be accessing fully mapped memory + Data: [1, 0, 0, 0] + +Results: + - Result: Test + Rule: BufferExact + Actual: Out + Expected: ExpectedOut + - Result: TestCAFM + Rule: BufferExact + Actual: CAFM + Expected: ExpectedCAFM +DescriptorSets: + - Resources: + - Name: X + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + TilesMapped: 1 + - Name: Y + Kind: StructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + TilesMapped: 0 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: CAFM + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 +#--- end + +# Unimplemented https://github.com/llvm/llvm-project/issues/138910 +# AND https://github.com/llvm/llvm-project/issues/99204 +# XFAIL: Clang + +# Unimplemented https://github.com/llvm/llvm-project/issues/138910 +# AND https://github.com/llvm/llvm-project/issues/99204 +# XFAIL: Vulkan + +# Bug https://github.com/llvm/offload-test-suite/issues/182 +# Metal API seems to have problems with reserved resources +# XFAIL: Metal + +# Bug https://github.com/llvm/offload-test-suite/issues/485 +# XFAIL: Intel + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o