-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[DirectX] Let data scalarizer pass account for sub-types when updating GEP type #166200
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-directx Author: Finn Plummer (inbelic) ChangesThis pr lets the The pass is updated so that the replaced GEP introduces zero indices such that the result type remains the same (with the vector -> array transform). Please see resolved issue for an annotated example. Resolves: #165473 Full diff: https://github.com/llvm/llvm-project/pull/166200.diff 3 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index d507d71b99fc9..88a9b5084cba2 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -304,40 +304,76 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
GEPOperator *GOp = cast<GEPOperator>(&GEPI);
Value *PtrOperand = GOp->getPointerOperand();
Type *NewGEPType = GOp->getSourceElementType();
- bool NeedsTransform = false;
// Unwrap GEP ConstantExprs to find the base operand and element type
- while (auto *CE = dyn_cast<ConstantExpr>(PtrOperand)) {
- if (auto *GEPCE = dyn_cast<GEPOperator>(CE)) {
- GOp = GEPCE;
- PtrOperand = GEPCE->getPointerOperand();
- NewGEPType = GEPCE->getSourceElementType();
- } else
- break;
+ while (auto *GEPCE = dyn_cast_or_null<GEPOperator>(
+ dyn_cast<ConstantExpr>(PtrOperand))) {
+ GOp = GEPCE;
+ PtrOperand = GEPCE->getPointerOperand();
+ NewGEPType = GEPCE->getSourceElementType();
}
+ Type *const OrigGEPType = NewGEPType;
+ Value *const OrigOperand = PtrOperand;
+
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
NewGEPType = NewGlobal->getValueType();
PtrOperand = NewGlobal;
- NeedsTransform = true;
} else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) {
Type *AllocatedType = Alloca->getAllocatedType();
if (isa<ArrayType>(AllocatedType) &&
- AllocatedType != GOp->getResultElementType()) {
+ AllocatedType != GOp->getResultElementType())
NewGEPType = AllocatedType;
- NeedsTransform = true;
+ } else
+ return false; // Only GEPs into an alloca or global variable are considered
+
+ // Defer changing i8 GEP types until dxil-flatten-arrays
+ if (OrigGEPType->isIntegerTy(8))
+ NewGEPType = OrigGEPType;
+
+ // If the original type is a "sub-type" of the new type, then ensure the gep
+ // correctly zero-indexes the extra dimensions to keep the offset calculation
+ // correct.
+ // Eg:
+ // i32, [4 x i32] and [8 x [ 4 x i32]] are sub-types of [8 x [4 x i32]], etc.
+ //
+ // So then:
+ // gep [4 x i32] %idx
+ // -> gep [8 x [4 x i32]], i32 0, i32 %idx
+ // gep i32 %idx
+ // -> gep [8 x [4 x i32]], i32 0, i32 0, i32 %idx
+ uint32_t MissingDims = 0;
+ Type *SubType = NewGEPType;
+
+ // The new type will be in it's array version so match accordingly
+ Type *const GEPArrType = equivalentArrayTypeFromVector(OrigGEPType);
+
+ while (SubType != GEPArrType) {
+ MissingDims++;
+
+ ArrayType *ArrType = dyn_cast<ArrayType>(SubType);
+ if (!ArrType) {
+ assert(SubType == GEPArrType && "GEP uses a strange sub-type of alloca/global variable");
+ break;
}
+
+ SubType = ArrType->getElementType();
}
+
+ bool NeedsTransform = OrigOperand != PtrOperand ||
+ OrigGEPType != NewGEPType || MissingDims != 0;
+
if (!NeedsTransform)
return false;
- // Keep scalar GEPs scalar; dxil-flatten-arrays will do flattening later
- if (!isa<ArrayType>(GOp->getSourceElementType()))
- NewGEPType = GOp->getSourceElementType();
-
IRBuilder<> Builder(&GEPI);
- SmallVector<Value *, MaxVecSize> Indices(GOp->indices());
+ SmallVector<Value *, MaxVecSize> Indices;
+
+ for (uint32_t I = 0; I < MissingDims; I++)
+ Indices.push_back(Builder.getInt32(0));
+ llvm::append_range(Indices, GOp->indices());
+
Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices,
GOp->getName(), GOp->getNoWrapFlags());
diff --git a/llvm/test/CodeGen/DirectX/scalarize-alloca.ll b/llvm/test/CodeGen/DirectX/scalarize-alloca.ll
index a8557e47b0ea6..475935d2eb135 100644
--- a/llvm/test/CodeGen/DirectX/scalarize-alloca.ll
+++ b/llvm/test/CodeGen/DirectX/scalarize-alloca.ll
@@ -42,3 +42,68 @@ define void @alloca_2d_gep_test() {
%3 = getelementptr inbounds nuw [2 x <2 x i32>], ptr %1, i32 0, i32 %2
ret void
}
+
+; CHECK-LABEL: subtype_array_test
+define void @subtype_array_test() {
+ ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
+ ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
+ ; CHECK: ret void
+ %arr = alloca [8 x [4 x i32]], align 4
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw [4 x i32], ptr %arr, i32 %i
+ ret void
+}
+
+; CHECK-LABEL: subtype_vector_test
+define void @subtype_vector_test() {
+ ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
+ ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
+ ; CHECK: ret void
+ %arr = alloca [8 x <4 x i32>], align 4
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw <4 x i32>, ptr %arr, i32 %i
+ ret void
+}
+
+; CHECK-LABEL: subtype_scalar_test
+define void @subtype_scalar_test() {
+ ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
+ ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 0, i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
+ ; CHECK: ret void
+ %arr = alloca [8 x [4 x i32]], align 4
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw i32, ptr %arr, i32 %i
+ ret void
+}
+
+; CHECK-LABEL: subtype_i8_test
+define void @subtype_i8_test() {
+ ; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
+ ; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i8, ptr [[alloca_val]], i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
+ ; FCHECK: [[flatidx_lshr:%.*]] = lshr i32 [[flatidx_mul]], 2
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_lshr]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
+ ; CHECK: ret void
+ %arr = alloca [8 x [4 x i32]], align 4
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw i8, ptr %arr, i32 %i
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/scalarize-global.ll b/llvm/test/CodeGen/DirectX/scalarize-global.ll
new file mode 100644
index 0000000000000..ca10f6ece5a85
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/scalarize-global.ll
@@ -0,0 +1,70 @@
+; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=SCHECK,CHECK
+; RUN: opt -S -passes='dxil-data-scalarization,dxil-flatten-arrays' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=FCHECK,CHECK
+
+@"arrayofVecData" = local_unnamed_addr addrspace(3) global [8 x <4 x i32>] zeroinitializer, align 16
+@"vecData" = external addrspace(3) global <4 x i32>, align 4
+
+; SCHECK: [[arrayofVecData:@arrayofVecData.*]] = local_unnamed_addr addrspace(3) global [8 x [4 x i32]] zeroinitializer, align 16
+; FCHECK: [[arrayofVecData:@arrayofVecData.*]] = local_unnamed_addr addrspace(3) global [32 x i32] zeroinitializer, align 16
+; CHECK: [[vecData:@vecData.*]] = external addrspace(3) global [4 x i32], align 4
+
+; CHECK-LABEL: subtype_array_test
+define <4 x i32> @subtype_array_test() {
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
+ ; CHECK: [[x:%.*]] = load <4 x i32>, ptr addrspace(3) [[gep]], align 4
+ ; CHECK: ret <4 x i32> [[x]]
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @"arrayofVecData", i32 %i
+ %x = load <4 x i32>, ptr addrspace(3) %gep, align 4
+ ret <4 x i32> %x
+}
+
+; CHECK-LABEL: subtype_vector_test
+define <4 x i32> @subtype_vector_test() {
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
+ ; CHECK: [[x:%.*]] = load <4 x i32>, ptr addrspace(3) [[gep]], align 4
+ ; CHECK: ret <4 x i32> [[x]]
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw <4 x i32>, ptr addrspace(3) @"arrayofVecData", i32 %i
+ %x = load <4 x i32>, ptr addrspace(3) %gep, align 4
+ ret <4 x i32> %x
+}
+
+; CHECK-LABEL: subtype_scalar_test
+define <4 x i32> @subtype_scalar_test() {
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 0, i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
+ ; CHECK: [[x:%.*]] = load <4 x i32>, ptr addrspace(3) [[gep]], align 4
+ ; CHECK: ret <4 x i32> [[x]]
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw i32, ptr addrspace(3) @"arrayofVecData", i32 %i
+ %x = load <4 x i32>, ptr addrspace(3) %gep, align 4
+ ret <4 x i32> %x
+}
+
+; CHECK-LABEL: subtype_i8_test
+define <4 x i32> @subtype_i8_test() {
+ ; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+ ; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[arrayofVecData]], i32 [[tid]]
+ ; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
+ ; FCHECK: [[flatidx_lshr:%.*]] = lshr i32 [[flatidx_mul]], 2
+ ; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_lshr]]
+ ; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
+ ; CHECK: [[x:%.*]] = load <4 x i32>, ptr addrspace(3) [[gep]], align 4
+ ; CHECK: ret <4 x i32> [[x]]
+ %i = tail call i32 @llvm.dx.thread.id(i32 0)
+ %gep = getelementptr inbounds nuw i8, ptr addrspace(3) @"arrayofVecData", i32 %i
+ %x = load <4 x i32>, ptr addrspace(3) %gep, align 4
+ ret <4 x i32> %x
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
| @@ -42,3 +42,68 @@ define void @alloca_2d_gep_test() { | |||
| %3 = getelementptr inbounds nuw [2 x <2 x i32>], ptr %1, i32 0, i32 %2 | |||
| ret void | |||
| } | |||
|
|
|||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FYI: SCHECK denotes just running the dxil-data-scalarization pass and FCHECK denotes running both that and the dxil-flatten-arrays pass
Icohedron
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
Co-authored-by: Deric C. <cheung.deric@gmail.com>
…g GEP type (llvm#166200) This pr lets the `dxil-data-scalarization` account for a GEP with a source type that is a sub-type of the pointer operand type. The pass is updated so that the replaced GEP introduces zero indices such that the result type remains the same (with the vector -> array transform). Please see resolved issue for an annotated example. Resolves: llvm#165473
This pr lets the
dxil-data-scalarizationaccount for a GEP with a source type that is a sub-type of the pointer operand type.The pass is updated so that the replaced GEP introduces zero indices such that the result type remains the same (with the vector -> array transform).
Please see resolved issue for an annotated example.
Resolves: #165473