From 2e64f0b6d512ff1659c7aa34c0443c88adb5371a Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 20 Oct 2025 21:21:20 -0700 Subject: [PATCH 1/7] [DirectX] Teach DXILResourceAccess about cbuffers This isn't reachable today but will come into play once we reorder passes for #147352 and #147351. --- .../lib/Target/DirectX/DXILResourceAccess.cpp | 152 ++++++++++++++++-- .../load-cbuffer-array-of-struct.ll | 59 +++++++ .../load-cbuffer-array-of-vector.ll | 45 ++++++ .../load-cbuffer-array-typedgep.ll | 27 ++++ .../ResourceAccess/load-cbuffer-arrays.ll | 129 +++++++++++++++ .../load-cbuffer-dynamic-struct.ll | 64 ++++++++ .../ResourceAccess/load-cbuffer-dynamic.ll | 42 +++++ .../ResourceAccess/load-cbuffer-scalars.ll | 87 ++++++++++ .../ResourceAccess/load-cbuffer-vectors.ll | 109 +++++++++++++ 9 files changed, 704 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp index 6579d3405cf39..e467e3c8a218c 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp +++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp @@ -10,6 +10,7 @@ #include "DirectX.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/DXILResource.h" +#include "llvm/Frontend/HLSL/HLSLResource.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -20,6 +21,7 @@ #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/User.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Transforms/Utils/ValueMapper.h" #define DEBUG_TYPE "dxil-resource-access" @@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset, APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); if (GEP->accumulateConstantOffset(DL, ConstantOffset)) { APInt Scaled = ConstantOffset.udiv(ScalarSize); - return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled); + return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled); } - auto IndexIt = GEP->idx_begin(); - assert(cast(IndexIt)->getZExtValue() == 0 && - "GEP is not indexing through pointer"); - ++IndexIt; - Value *Offset = *IndexIt; - assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP"); - return Offset; + unsigned NumIndices = GEP->getNumIndices(); + + // If we have a single index we're indexing into a top level array. This + // generally only happens with cbuffers. + if (NumIndices == 1) + return *GEP->idx_begin(); + + // If we have two indices, this should be a simple access through a pointer. + if (NumIndices == 2) { + auto IndexIt = GEP->idx_begin(); + assert(cast(IndexIt)->getZExtValue() == 0 && + "GEP is not indexing through pointer"); + ++IndexIt; + Value *Offset = *IndexIt; + assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP"); + return Offset; + } + + llvm_unreachable("Unhandled GEP structure for resource access"); } static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI, @@ -171,6 +185,123 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) { LI->replaceAllUsesWith(V); } +namespace { +/// Helper for building a `load.cbufferrow` intrinsic given a simple type. +struct CBufferRowIntrin { + Intrinsic::ID IID; + Type *RetTy; + unsigned int EltSize; + unsigned int NumElts; + + CBufferRowIntrin(const DataLayout &DL, Type *Ty) { + assert(Ty == Ty->getScalarType() && "Expected scalar type"); + + switch (DL.getTypeSizeInBits(Ty)) { + case 16: + IID = Intrinsic::dx_resource_load_cbufferrow_8; + RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty); + EltSize = 2; + NumElts = 8; + break; + case 32: + IID = Intrinsic::dx_resource_load_cbufferrow_4; + RetTy = StructType::get(Ty, Ty, Ty, Ty); + EltSize = 4; + NumElts = 4; + break; + case 64: + IID = Intrinsic::dx_resource_load_cbufferrow_2; + RetTy = StructType::get(Ty, Ty); + EltSize = 8; + NumElts = 2; + break; + default: + llvm_unreachable("Only 16, 32, and 64 bit types supported"); + } + } +}; +} // namespace + +static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset, + dxil::ResourceTypeInfo &RTI) { + const DataLayout &DL = LI->getDataLayout(); + + Type *Ty = LI->getType(); + assert(!isa(Ty) && "Structs not handled yet"); + CBufferRowIntrin Intrin(DL, Ty->getScalarType()); + + StringRef Name = LI->getName(); + Value *Handle = II->getOperand(0); + + IRBuilder<> Builder(LI); + + ConstantInt *GlobalOffset = dyn_cast(II->getOperand(1)); + assert(GlobalOffset && "CBuffer getpointer index must be constant"); + + unsigned int FixedOffset = GlobalOffset->getZExtValue(); + // If we have a further constant offset we can just fold it in to the fixed + // offset. + if (auto *ConstOffset = dyn_cast_if_present(Offset)) { + FixedOffset += ConstOffset->getZExtValue(); + Offset = nullptr; + } + + Value *CurrentRow = ConstantInt::get( + Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes); + unsigned int CurrentIndex = + (FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize; + + assert(!(CurrentIndex && Offset) && + "Dynamic indexing into elements of cbuffer rows is not supported"); + if (Offset) + CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset; + + auto *CBufLoad = Builder.CreateIntrinsic( + Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load"); + auto *Elt = + Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract"); + + unsigned int Remaining = + ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1; + if (Remaining == 0) { + // We only have a single element, so we're done. + Value *Result = Elt; + + // However, if we loaded a <1 x T>, then we need to adjust the type. + if (auto *VT = dyn_cast(Ty)) { + assert(VT->getNumElements() == 1 && "Can't have multiple elements here"); + Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result, + Builder.getInt32(0), Name); + } + LI->replaceAllUsesWith(Result); + return; + } + + // Walk each element and extract it, wrapping to new rows as needed. + SmallVector Extracts{Elt}; + while (Remaining--) { + CurrentIndex %= Intrin.NumElts; + + if (CurrentIndex == 0) { + CurrentRow = Builder.CreateAdd(CurrentRow, + ConstantInt::get(Builder.getInt32Ty(), 1)); + CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID, + {Handle, CurrentRow}, nullptr, + Name + ".load"); + } + + Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, + Name + ".extract")); + } + + // Finally, we build up the original loaded value. + Value *Result = PoisonValue::get(Ty); + for (int I = 0, E = Extracts.size(); I < E; ++I) + Result = Builder.CreateInsertElement( + Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I)); + LI->replaceAllUsesWith(Result); +} + static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, dxil::ResourceTypeInfo &RTI) { switch (RTI.getResourceKind()) { @@ -179,6 +310,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, case dxil::ResourceKind::RawBuffer: case dxil::ResourceKind::StructuredBuffer: return createRawLoad(II, LI, Offset); + case dxil::ResourceKind::CBuffer: + return createCBufferLoad(II, LI, Offset, RTI); case dxil::ResourceKind::Texture1D: case dxil::ResourceKind::Texture2D: case dxil::ResourceKind::Texture2DMS: @@ -190,9 +323,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, case dxil::ResourceKind::TextureCubeArray: case dxil::ResourceKind::FeedbackTexture2D: case dxil::ResourceKind::FeedbackTexture2DArray: - case dxil::ResourceKind::CBuffer: case dxil::ResourceKind::TBuffer: - // TODO: handle these + reportFatalUsageError("Load not yet implemented for resource type"); return; case dxil::ResourceKind::Sampler: case dxil::ResourceKind::RTAccelerationStructure: diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll new file mode 100644 index 0000000000000..22fba8c1d5f8c --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll @@ -0,0 +1,59 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s +; +; Tests for indexed types in dynamically indexed arrays in cbuffers. +; +; struct S { +; float x[2]; +; uint q; +; }; +; cbuffer CB : register(b0) { +; uint32_t3 w[3]; // offset 0, size 12 (+4) * 3 +; S v[3]; // offset 48, size 24 (+8) * 3 +; } +%S = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, i32 }> +%__cblayout_CB = type <{ + <{ + [2 x <{ <3 x i32>, target("dx.Padding", 4) }>], + <3 x i32> + }>, + target("dx.Padding", 4), + <{ + [2 x <{ %S, target("dx.Padding", 8) }>], %S + }> +}> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst, i32 %idx) { +entry: + %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + + ;; w[2].z + ; + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 + ; CHECK: store i32 [[X]], ptr %dst + %w_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %w_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %w_ptr, i32 40 + %w_load = load i32, ptr addrspace(2) %w_gep, align 4 + store i32 %w_load, ptr %dst, align 4 + + ;; v[2].q + ; + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 + ; CHECK: store i32 [[X]], ptr [[PTR]] + %v_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48) + %v_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %v_ptr, i32 84 + %v_load = load i32, ptr addrspace(2) %v_gep, align 4 + %v.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 + store i32 %v_load, ptr %v.i, align 4 + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll new file mode 100644 index 0000000000000..cbd76fc00b813 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll @@ -0,0 +1,45 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s +; +; Test for when we have indices into both the array and the vector: ie, s[1][3] + +; cbuffer CB : register(b0) { +; uint4 s[2]; // offset 0, size 16 * 2 +; } +%__cblayout_CB = type <{ [2 x <4 x i32>] }> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 + ; CHECK: store i32 [[X]], ptr %dst + %i8_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %i8_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %i8_ptr, i32 28 + %i8_vecext = load i32, ptr addrspace(2) %i8_gep, align 4 + store i32 %i8_vecext, ptr %dst, align 4 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 + ;; + ;; It would be nice to avoid the redundant vector creation here, but that's + ;; outside of the scope of this pass. + ;; + ; CHECK: [[X_VEC:%.*]] = insertelement <4 x i32> {{%.*}}, i32 [[X]], i32 3 + ; CHECK: [[X_EXT:%.*]] = extractelement <4 x i32> [[X_VEC]], i32 3 + ; CHECK: store i32 [[X_EXT]], ptr %dst + %typed_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %typed_gep = getelementptr <4 x i32>, ptr addrspace(2) %typed_ptr, i32 1 + %typed_load = load <4 x i32>, ptr addrspace(2) %typed_gep, align 16 + %typed_vecext = extractelement <4 x i32> %typed_load, i32 3 + store i32 %typed_vecext, ptr %dst, align 4 + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll new file mode 100644 index 0000000000000..a561d8c0aea5d --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll @@ -0,0 +1,27 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s + +; cbuffer CB : register(b0) { +; float a1[3]; +; } +%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }> + +@CB.cb = global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: store float [[X]], ptr %dst + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8 + %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %a1_gep = getelementptr inbounds <{ [2 x <{ float, [12 x i8] }>], float }>, ptr addrspace(2) %a1_ptr, i32 0, i32 0, i32 1 + %a1 = load float, ptr addrspace(2) %a1_gep, align 4 + store float %a1, ptr %dst, align 32 + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll new file mode 100644 index 0000000000000..68dfbb184083f --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll @@ -0,0 +1,129 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s + +; cbuffer CB : register(b0) { +; float a1[3]; // offset 0, size 4 (+12) * 3 +; double3 a2[2]; // offset 48, size 24 (+8) * 2 +; float16_t a3[2][2]; // offset 112, size 2 (+14) * 4 +; uint64_t a4[3]; // offset 176, size 8 (+8) * 3 +; int4 a5[2][3][4]; // offset 224, size 16 * 24 +; uint16_t a6[1]; // offset 608, size 2 (+14) * 1 +; int64_t a7[2]; // offset 624, size 8 (+8) * 2 +; bool a8[4]; // offset 656, size 4 (+12) * 4 +; } +%__cblayout_CB = type <{ + <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12), + <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8), + <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14), + <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8), + [24 x <4 x i32>], + [1 x i16], target("dx.Padding", 14), + <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8), + <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }> +}> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: store float [[X]], ptr %dst + %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %a1_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a1_ptr, i32 16 + %a1 = load float, ptr addrspace(2) %a1_gep, align 4 + store float %a1, ptr %dst, align 32 + + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5) + ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6) + ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8 + ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]] + %a2_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48) + %a2_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a2_ptr, i32 32 + %a2 = load <3 x double>, ptr addrspace(2) %a2_gep, align 8 + %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 8 + store <3 x double> %a2, ptr %a2.i, align 32 + + ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8) + ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 32 + ; CHECK: store half [[X]], ptr [[PTR]] + %a3_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 112) + %a3_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a3_ptr, i32 16 + %a3 = load half, ptr addrspace(2) %a3_gep, align 2 + %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 32 + store half %a3, ptr %a3.i, align 2 + + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 12) + ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40 + ; CHECK: store i64 [[X]], ptr [[PTR]] + %a4_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 176) + %a4_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a4_ptr, i32 16 + %a4 = load i64, ptr addrspace(2) %a4_gep, align 8 + %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 40 + store i64 %a4, ptr %a4.i, align 8 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 26) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 + ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 + ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 + ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2 + ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48 + ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]] + %a5_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 224) + %a5_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a5_ptr, i32 192 + %a5 = load <4 x i32>, ptr addrspace(2) %a5_gep, align 4 + %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 48 + store <4 x i32> %a5, ptr %a5.i, align 4 + + ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 38) + ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 64 + ; CHECK: store i16 [[X]], ptr [[PTR]] + %a6_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 608) + %a6 = load i16, ptr addrspace(2) %a6_ptr, align 2 + %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 64 + store i16 %a6, ptr %a6.i, align 2 + + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 40) + ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72 + ; CHECK: store i64 [[X]], ptr [[PTR]] + %a7_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 624) + %a7_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a7_ptr, i32 16 + %a7 = load i64, ptr addrspace(2) %a7_gep, align 8 + %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 72 + store i64 %a7, ptr %a7.i, align 8 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 42) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 80 + ; CHECK: store i32 [[X]], ptr [[PTR]] + %a8_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 656) + %a8_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a8_ptr, i32 16 + %a8 = load i32, ptr addrspace(2) %a8_gep, align 4, !range !0, !noundef !1 + %a8.i = getelementptr inbounds nuw i8, ptr %dst, i32 80 + store i32 %a8, ptr %a8.i, align 4 + + ret void +} + +!0 = !{i32 0, i32 2} +!1 = !{} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll new file mode 100644 index 0000000000000..9c2ec2399d8d5 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll @@ -0,0 +1,64 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s +; +; Tests for indexed types in dynamically indexed arrays in cbuffers. +; +; Bug https://github.com/llvm/llvm-project/issues/164517 +; XFAIL: * +; +; struct S { +; float x[2]; +; uint q; +; }; +; cbuffer CB : register(b0) { +; uint32_t3 w[3]; // offset 0, size 12 (+4) * 3 +; S v[3]; // offset 48, size 24 (+8) * 3 +; } +%S = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, i32 }> +%__cblayout_CB = type <{ + <{ + [2 x <{ <3 x i32>, target("dx.Padding", 4) }>], + <3 x i32> + }>, + target("dx.Padding", 4), + <{ + [2 x <{ %S, target("dx.Padding", 8) }>], %S + }> +}> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst, i32 %idx) { +entry: + %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + + ;; w[Tid.x].z + ; + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 + ; CHECK: store i32 [[X]], ptr %dst + %w_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %w_arrayidx = getelementptr <3 x i32>, ptr addrspace(2) %w_ptr, i32 %idx + %w_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %w_arrayidx, i32 4 + %w_load = load i32, ptr addrspace(2) %w_gep, align 4 + store i32 %w_load, ptr %dst, align 4 + + ;; v[Tid.x].q + ; + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 + ; CHECK: store i32 [[X]], ptr [[PTR]] + %v_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48) + %v_arrayidx = getelementptr <{ %struct.S, target("dx.Padding", 4) }>, ptr addrspace(2) %v_ptr, i32 %idx + %v_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %v_arrayidx, i32 8 + %v_load = load i32, ptr addrspace(2) %v_gep, align 4 + %v.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 + store i32 %v_load, ptr %v.i, align 4 + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll new file mode 100644 index 0000000000000..1b486a1f40f22 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll @@ -0,0 +1,42 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s +; +; Tests for dynamic indices into arrays in cbuffers. + +; cbuffer CB : register(b0) { +; uint s[10]; // offset 0, size 4 (+12) * 10 +; uint t[10]; // offset 160, size 4 (+12) * 10 +; } +%__cblayout_CB = type <{ <{ [9 x <{ i32, target("dx.Padding", 12) }>], i32 }> }> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst, i32 %idx) { +entry: + %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 + ; CHECK: store i32 [[X]], ptr %dst + %s_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %s_gep = getelementptr <{ i32, target("dx.Padding", 12) }>, ptr addrspace(2) %s_ptr, i32 %idx + %s_load = load i32, ptr addrspace(2) %s_gep, align 4 + store i32 %s_load, ptr %dst, align 4 + + ; CHECK: [[T_IDX:%.*]] = add i32 10, %idx + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 [[T_IDX]]) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 + ; CHECK: store i32 [[X]], ptr [[PTR]] + %t_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 160) + %t_gep = getelementptr <{ i32, target("dx.Padding", 12) }>, ptr addrspace(2) %t_ptr, i32 %idx + %t_load = load i32, ptr addrspace(2) %t_gep, align 4 + %t.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 + store i32 %t_load, ptr %t.i, align 4 + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll new file mode 100644 index 0000000000000..5a8275b4a6c12 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s + +; cbuffer CB { +; float a1; // offset 0, size 4 +; int a2; // offset 4, size 4 +; bool a3; // offset 8, size 4 +; float16_t a4; // offset 12, size 2 +; uint16_t a5; // offset 14, size 2 +; double a6; // offset 16, size 8 +; int64_t a7; // offset 24, size 8 +; } +%__cblayout_CB = type <{ float, i32, i32, half, i16, double, i64 }> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8 + + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) + ; CHECK: [[A1:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: store float [[A1]], ptr %dst + %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %a1 = load float, ptr addrspace(2) %a1_ptr, align 4 + store float %a1, ptr %dst, align 8 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) + ; CHECK: [[A2:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 + ; CHECK: store i32 [[A2]], ptr [[PTR]] + %a2_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 4) + %a2 = load i32, ptr addrspace(2) %a2_ptr, align 4 + %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 + store i32 %a2, ptr %a2.i, align 8 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) + ; CHECK: [[A3:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8 + ; CHECK: store i32 [[A3]], ptr [[PTR]] + %a3_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 8) + %a3 = load i32, ptr addrspace(2) %a3_ptr, align 4 + %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 8 + store i32 %a3, ptr %a3.i, align 4 + + ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) + ; CHECK: [[A4:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 6 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 12 + ; CHECK: store half [[A4]], ptr [[PTR]] + %a4_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 12) + %a4 = load half, ptr addrspace(2) %a4_ptr, align 2 + %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 12 + store half %a4, ptr %a4.i, align 4 + + ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) + ; CHECK: [[A5:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 7 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 14 + ; CHECK: store i16 [[A5]], ptr [[PTR]] + %a5_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 14) + %a5 = load i16, ptr addrspace(2) %a5_ptr, align 2 + %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 14 + store i16 %a5, ptr %a5.i, align 2 + + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[A6:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16 + ; CHECK: store double [[A6]], ptr [[PTR]] + %a6_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 16) + %a6 = load double, ptr addrspace(2) %a6_ptr, align 8 + %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 16 + store double %a6, ptr %a6.i, align 8 + + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[A7:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 24 + ; CHECK: store i64 [[A7]], ptr [[PTR]] + %a7_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 24) + %a7 = load i64, ptr addrspace(2) %a7_ptr, align 8 + %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 24 + store i64 %a7, ptr %a7.i, align 8 + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll new file mode 100644 index 0000000000000..1f20d0101c151 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll @@ -0,0 +1,109 @@ +; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s + +; cbuffer CB { +; float3 a1; // offset 0, size 12 (+4) +; double3 a2; // offset 16, size 24 +; float16_t2 a3; // offset 40, size 4 (+4) +; uint64_t3 a4; // offset 48, size 24 (+8) +; int4 a5; // offset 80, size 16 +; uint16_t3 a6; // offset 96, size 6 (+10) +; }; +%__cblayout_CB = type <{ <3 x float>, target("dx.Padding", 4), <3 x double>, <2 x half>, target("dx.Padding", 4), <3 x i64>, target("dx.Padding", 8), <4 x i32>, <3 x i16> }> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb + %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8 + + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) + ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 1 + ; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 2 + ; CHECK: [[VEC0:%.*]] = insertelement <3 x float> poison, float [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <3 x float> [[VEC0]], float [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <3 x float> [[VEC1]], float [[Z]], i32 2 + ; CHECK: store <3 x float> [[VEC2]], ptr %dst + %a1_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) + %a1 = load <3 x float>, ptr addrspace(2) %a1_gep, align 16 + store <3 x float> %a1, ptr %dst, align 4 + + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2) + ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16 + ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]] + %a2_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 16) + %a2 = load <3 x double>, ptr addrspace(2) %a2_gep, align 32 + %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 16 + store <3 x double> %a2, ptr %a2.i, align 8 + + ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2) + ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 4 + ; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 5 + ; CHECK: [[VEC0:%.*]] = insertelement <2 x half> poison, half [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <2 x half> [[VEC0]], half [[Y]], i32 1 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40 + ; CHECK: store <2 x half> [[VEC1]], ptr [[PTR]] + %a3_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 40) + %a3 = load <2 x half>, ptr addrspace(2) %a3_gep, align 4 + %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 40 + store <2 x half> %a3, ptr %a3.i, align 2 + + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 3) + ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1 + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 4) + ; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 + ; CHECK: [[VEC0:%.*]] = insertelement <3 x i64> poison, i64 [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <3 x i64> [[VEC0]], i64 [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <3 x i64> [[VEC1]], i64 [[Z]], i32 2 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48 + ; CHECK: store <3 x i64> [[VEC2]], ptr [[PTR]] + %a4_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48) + %a4 = load <3 x i64>, ptr addrspace(2) %a4_gep, align 32 + %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 48 + store <3 x i64> %a4, ptr %a4.i, align 8 + + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 + ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 + ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 + ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2 + ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72 + ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]] + %a5_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 80) + %a5 = load <4 x i32>, ptr addrspace(2) %a5_gep, align 16 + %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 72 + store <4 x i32> %a5, ptr %a5.i, align 4 + + ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6) + ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 1 + ; CHECK: [[Z:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 2 + ; CHECK: [[VEC0:%.*]] = insertelement <3 x i16> poison, i16 [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <3 x i16> [[VEC0]], i16 [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <3 x i16> [[VEC1]], i16 [[Z]], i32 2 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 88 + ; CHECK: store <3 x i16> [[VEC2]], ptr [[PTR]] + %a6_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 96) + %a6 = load <3 x i16>, ptr addrspace(2) %a6_gep, align 8 + %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 88 + store <3 x i16> %a6, ptr %a6.i, align 2 + + ret void +} From 83edf72baffc0c4942e77e5b7c561c63aed7d2b2 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Wed, 22 Oct 2025 17:09:01 -0700 Subject: [PATCH 2/7] fixup - improve test comments --- .../load-cbuffer-array-of-vector.ll | 4 ++++ .../load-cbuffer-array-typedgep.ll | 3 +++ .../ResourceAccess/load-cbuffer-arrays.ll | 16 ++++++++++++++++ .../load-cbuffer-dynamic-struct.ll | 4 ++-- .../ResourceAccess/load-cbuffer-dynamic.ll | 4 ++++ .../ResourceAccess/load-cbuffer-scalars.ll | 14 ++++++++++++++ .../ResourceAccess/load-cbuffer-vectors.ll | 12 ++++++++++++ 7 files changed, 55 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll index cbd76fc00b813..55b0f1f7a03dc 100644 --- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll @@ -18,6 +18,8 @@ entry: ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + ;; s[1].z + ; ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 ; CHECK: store i32 [[X]], ptr %dst @@ -26,6 +28,8 @@ entry: %i8_vecext = load i32, ptr addrspace(2) %i8_gep, align 4 store i32 %i8_vecext, ptr %dst, align 4 + ;; s[1].z + ; ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 ;; diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll index a561d8c0aea5d..eabc07c2fbb68 100644 --- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll @@ -13,6 +13,9 @@ entry: %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 + ;; a1[1] + ;; Note that the valid GEPs of a1 are `0, 0, 0`, `0, 0, 1`, and `0, 1`. + ; ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll index 68dfbb184083f..6f6166e820a6f 100644 --- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll @@ -32,6 +32,8 @@ entry: ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + ;; a1[1] + ; ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 ; CHECK: store float [[X]], ptr %dst @@ -40,6 +42,8 @@ entry: %a1 = load float, ptr addrspace(2) %a1_gep, align 4 store float %a1, ptr %dst, align 32 + ;; a2[1] + ; ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5) ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 @@ -56,6 +60,8 @@ entry: %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 8 store <3 x double> %a2, ptr %a2.i, align 32 + ;; a3[0][1] + ; ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8) ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 32 @@ -66,6 +72,8 @@ entry: %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 32 store half %a3, ptr %a3.i, align 2 + ;; a4[1] + ; ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 12) ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40 @@ -76,6 +84,8 @@ entry: %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 40 store i64 %a4, ptr %a4.i, align 8 + ;; a5[1][0][0] + ; ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 26) ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 @@ -93,6 +103,8 @@ entry: %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 48 store <4 x i32> %a5, ptr %a5.i, align 4 + ;; a6[0] + ; ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 38) ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 64 @@ -102,6 +114,8 @@ entry: %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 64 store i16 %a6, ptr %a6.i, align 2 + ;; a7[1] + ; ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 40) ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72 @@ -112,6 +126,8 @@ entry: %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 72 store i64 %a7, ptr %a7.i, align 8 + ;; a8[1] + ; ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 42) ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 80 diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll index 9c2ec2399d8d5..22994cfc3f48a 100644 --- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll @@ -36,7 +36,7 @@ entry: ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 - ;; w[Tid.x].z + ;; w[idx].z ; ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx) ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 @@ -47,7 +47,7 @@ entry: %w_load = load i32, ptr addrspace(2) %w_gep, align 4 store i32 %w_load, ptr %dst, align 4 - ;; v[Tid.x].q + ;; v[idx].q ; ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx) ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll index 1b486a1f40f22..56e296bf5c1e0 100644 --- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll @@ -19,6 +19,8 @@ entry: ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 + ;; s[idx] + ; ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx) ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 ; CHECK: store i32 [[X]], ptr %dst @@ -27,6 +29,8 @@ entry: %s_load = load i32, ptr addrspace(2) %s_gep, align 4 store i32 %s_load, ptr %dst, align 4 + ;; t[idx] + ; ; CHECK: [[T_IDX:%.*]] = add i32 10, %idx ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 [[T_IDX]]) ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll index 5a8275b4a6c12..65c9a3ec966e9 100644 --- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll @@ -22,6 +22,8 @@ entry: ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8 + ;; a1 + ; ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) ; CHECK: [[A1:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 ; CHECK: store float [[A1]], ptr %dst @@ -29,6 +31,8 @@ entry: %a1 = load float, ptr addrspace(2) %a1_ptr, align 4 store float %a1, ptr %dst, align 8 + ;; a2 + ; ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) ; CHECK: [[A2:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 @@ -38,6 +42,8 @@ entry: %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 store i32 %a2, ptr %a2.i, align 8 + ;; a3 + ; ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) ; CHECK: [[A3:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8 @@ -47,6 +53,8 @@ entry: %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 8 store i32 %a3, ptr %a3.i, align 4 + ;; a4 + ; ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) ; CHECK: [[A4:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 6 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 12 @@ -56,6 +64,8 @@ entry: %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 12 store half %a4, ptr %a4.i, align 4 + ;; a5 + ; ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) ; CHECK: [[A5:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 7 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 14 @@ -65,6 +75,8 @@ entry: %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 14 store i16 %a5, ptr %a5.i, align 2 + ;; a6 + ; ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) ; CHECK: [[A6:%.*]] = extractvalue { double, double } [[LOAD]], 0 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16 @@ -74,6 +86,8 @@ entry: %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 16 store double %a6, ptr %a6.i, align 8 + ;; a7 + ; ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) ; CHECK: [[A7:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1 ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 24 diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll index 1f20d0101c151..f6ae6339dcd36 100644 --- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll @@ -21,6 +21,8 @@ entry: ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8 + ;; a1 + ; ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0) ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 1 @@ -33,6 +35,8 @@ entry: %a1 = load <3 x float>, ptr addrspace(2) %a1_gep, align 16 store <3 x float> %a1, ptr %dst, align 4 + ;; a2 + ; ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 @@ -48,6 +52,8 @@ entry: %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 16 store <3 x double> %a2, ptr %a2.i, align 8 + ;; a3 + ; ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2) ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 4 ; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 5 @@ -60,6 +66,8 @@ entry: %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 40 store <2 x half> %a3, ptr %a3.i, align 2 + ;; a4 + ; ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 3) ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1 @@ -75,6 +83,8 @@ entry: %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 48 store <3 x i64> %a4, ptr %a4.i, align 8 + ;; a5 + ; ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5) ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 @@ -91,6 +101,8 @@ entry: %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 72 store <4 x i32> %a5, ptr %a5.i, align 4 + ;; a6 + ; ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6) ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 ; CHECK: [[Y:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 1 From 8d03da3a5ac7ca8b514715b35d6df12f92cb8cc2 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Thu, 23 Oct 2025 16:54:30 -0700 Subject: [PATCH 3/7] Update llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll Co-authored-by: Helena Kotas --- .../test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll index f6ae6339dcd36..0156a1a0472ab 100644 --- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll @@ -6,7 +6,7 @@ ; float16_t2 a3; // offset 40, size 4 (+4) ; uint64_t3 a4; // offset 48, size 24 (+8) ; int4 a5; // offset 80, size 16 -; uint16_t3 a6; // offset 96, size 6 (+10) +; uint16_t3 a6; // offset 96, size 6 ; }; %__cblayout_CB = type <{ <3 x float>, target("dx.Padding", 4), <3 x double>, <2 x half>, target("dx.Padding", 4), <3 x i64>, target("dx.Padding", 8), <4 x i32>, <3 x i16> }> From 6f706be3385f1e7a883e114fe8f597e0fb5afdcf Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 10 Nov 2025 09:52:10 -0800 Subject: [PATCH 4/7] fixup: Adjust type in test for clarity --- .../CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll index 56e296bf5c1e0..7daebaed70442 100644 --- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll @@ -4,9 +4,9 @@ ; cbuffer CB : register(b0) { ; uint s[10]; // offset 0, size 4 (+12) * 10 -; uint t[10]; // offset 160, size 4 (+12) * 10 +; uint t[12]; // offset 160, size 4 (+12) * 12 ; } -%__cblayout_CB = type <{ <{ [9 x <{ i32, target("dx.Padding", 12) }>], i32 }> }> +%__cblayout_CB = type <{ <{ [9 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12), <{ [11 x <{ i32, target("dx.Padding", 12) }>], i32 }> }> @CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison From 25a6b3a6d4c6674c9a76f580b967d792afdde0e4 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 10 Nov 2025 09:53:06 -0800 Subject: [PATCH 5/7] fixup: Fix test comments, adjust type for clarity --- .../ResourceAccess/load-cbuffer-array-of-vector.ll | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll index 55b0f1f7a03dc..615fc5ea07eca 100644 --- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll +++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll @@ -3,7 +3,7 @@ ; Test for when we have indices into both the array and the vector: ie, s[1][3] ; cbuffer CB : register(b0) { -; uint4 s[2]; // offset 0, size 16 * 2 +; uint4 s[3]; // offset 0, size 16 * 3 ; } %__cblayout_CB = type <{ [2 x <4 x i32>] }> @@ -18,7 +18,7 @@ entry: ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 - ;; s[1].z + ;; s[1][3] ; ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 @@ -28,9 +28,9 @@ entry: %i8_vecext = load i32, ptr addrspace(2) %i8_gep, align 4 store i32 %i8_vecext, ptr %dst, align 4 - ;; s[1].z + ;; s[2].w ; - ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2) ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 ;; ;; It would be nice to avoid the redundant vector creation here, but that's @@ -40,7 +40,7 @@ entry: ; CHECK: [[X_EXT:%.*]] = extractelement <4 x i32> [[X_VEC]], i32 3 ; CHECK: store i32 [[X_EXT]], ptr %dst %typed_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) - %typed_gep = getelementptr <4 x i32>, ptr addrspace(2) %typed_ptr, i32 1 + %typed_gep = getelementptr <4 x i32>, ptr addrspace(2) %typed_ptr, i32 2 %typed_load = load <4 x i32>, ptr addrspace(2) %typed_gep, align 16 %typed_vecext = extractelement <4 x i32> %typed_load, i32 3 store i32 %typed_vecext, ptr %dst, align 4 From ccdeadb1e826f95771f0ba4e28e231305bc08c3e Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 10 Nov 2025 09:58:03 -0800 Subject: [PATCH 6/7] fixup: Add comment about row offset --- llvm/lib/Target/DirectX/DXILResourceAccess.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp index e467e3c8a218c..d6e75b10965b9 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp +++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp @@ -253,6 +253,8 @@ static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset, assert(!(CurrentIndex && Offset) && "Dynamic indexing into elements of cbuffer rows is not supported"); + // At this point if we have a non-constant offset it has to be an array + // offset, so we can assume that it's a multiple of the row size. if (Offset) CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset; From 57beafcc1be33d5d8f3730f897135f182aa616e6 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 10 Nov 2025 10:15:28 -0800 Subject: [PATCH 7/7] fixup: add comment clarifying what `Remaining` is --- llvm/lib/Target/DirectX/DXILResourceAccess.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp index d6e75b10965b9..057d87bc3c6a9 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp +++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp @@ -263,6 +263,8 @@ static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset, auto *Elt = Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract"); + // At this point we've loaded the first scalar of our result, but our original + // type may have been a vector. unsigned int Remaining = ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1; if (Remaining == 0) {