Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 52 additions & 16 deletions llvm/lib/Target/DirectX/DXILDataScalarization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,40 +304,76 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
GEPOperator *GOp = cast<GEPOperator>(&GEPI);
Value *PtrOperand = GOp->getPointerOperand();
Type *NewGEPType = GOp->getSourceElementType();
bool NeedsTransform = false;

// Unwrap GEP ConstantExprs to find the base operand and element type
while (auto *CE = dyn_cast<ConstantExpr>(PtrOperand)) {
if (auto *GEPCE = dyn_cast<GEPOperator>(CE)) {
GOp = GEPCE;
PtrOperand = GEPCE->getPointerOperand();
NewGEPType = GEPCE->getSourceElementType();
} else
break;
while (auto *GEPCE = dyn_cast_or_null<GEPOperator>(
dyn_cast<ConstantExpr>(PtrOperand))) {
GOp = GEPCE;
PtrOperand = GEPCE->getPointerOperand();
NewGEPType = GEPCE->getSourceElementType();
}

Type *const OrigGEPType = NewGEPType;
Value *const OrigOperand = PtrOperand;

if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
NewGEPType = NewGlobal->getValueType();
PtrOperand = NewGlobal;
NeedsTransform = true;
} else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) {
Type *AllocatedType = Alloca->getAllocatedType();
if (isa<ArrayType>(AllocatedType) &&
AllocatedType != GOp->getResultElementType()) {
AllocatedType != GOp->getResultElementType())
NewGEPType = AllocatedType;
NeedsTransform = true;
} else
return false; // Only GEPs into an alloca or global variable are considered

// Defer changing i8 GEP types until dxil-flatten-arrays
if (OrigGEPType->isIntegerTy(8))
NewGEPType = OrigGEPType;

// If the original type is a "sub-type" of the new type, then ensure the gep
// correctly zero-indexes the extra dimensions to keep the offset calculation
// correct.
// Eg:
// i32, [4 x i32] and [8 x [4 x i32]] are sub-types of [8 x [4 x i32]], etc.
//
// So then:
// gep [4 x i32] %idx
// -> gep [8 x [4 x i32]], i32 0, i32 %idx
// gep i32 %idx
// -> gep [8 x [4 x i32]], i32 0, i32 0, i32 %idx
uint32_t MissingDims = 0;
Type *SubType = NewGEPType;

// The new type will be in its array version; so match accordingly.
Type *const GEPArrType = equivalentArrayTypeFromVector(OrigGEPType);

while (SubType != GEPArrType) {
MissingDims++;

ArrayType *ArrType = dyn_cast<ArrayType>(SubType);
if (!ArrType) {
assert(SubType == GEPArrType &&
"GEP uses an DXIL invalid sub-type of alloca/global variable");
break;
}

SubType = ArrType->getElementType();
}

bool NeedsTransform = OrigOperand != PtrOperand ||
OrigGEPType != NewGEPType || MissingDims != 0;

if (!NeedsTransform)
return false;

// Keep scalar GEPs scalar; dxil-flatten-arrays will do flattening later
if (!isa<ArrayType>(GOp->getSourceElementType()))
NewGEPType = GOp->getSourceElementType();

IRBuilder<> Builder(&GEPI);
SmallVector<Value *, MaxVecSize> Indices(GOp->indices());
SmallVector<Value *, MaxVecSize> Indices;

for (uint32_t I = 0; I < MissingDims; I++)
Indices.push_back(Builder.getInt32(0));
llvm::append_range(Indices, GOp->indices());

Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices,
GOp->getName(), GOp->getNoWrapFlags());

Expand Down
65 changes: 65 additions & 0 deletions llvm/test/CodeGen/DirectX/scalarize-alloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,68 @@ define void @alloca_2d_gep_test() {
%3 = getelementptr inbounds nuw [2 x <2 x i32>], ptr %1, i32 0, i32 %2
ret void
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: SCHECK denotes just running the dxil-data-scalarization pass and FCHECK denotes running both that and the dxil-flatten-arrays pass

; CHECK-LABEL: subtype_array_test
define void @subtype_array_test() {
; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
; CHECK: ret void
%arr = alloca [8 x [4 x i32]], align 4
%i = tail call i32 @llvm.dx.thread.id(i32 0)
%gep = getelementptr inbounds nuw [4 x i32], ptr %arr, i32 %i
ret void
}

; CHECK-LABEL: subtype_vector_test
define void @subtype_vector_test() {
; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
; CHECK: ret void
%arr = alloca [8 x <4 x i32>], align 4
%i = tail call i32 @llvm.dx.thread.id(i32 0)
%gep = getelementptr inbounds nuw <4 x i32>, ptr %arr, i32 %i
ret void
}

; CHECK-LABEL: subtype_scalar_test
define void @subtype_scalar_test() {
; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 0, i32 [[tid]]
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
; CHECK: ret void
%arr = alloca [8 x [4 x i32]], align 4
%i = tail call i32 @llvm.dx.thread.id(i32 0)
%gep = getelementptr inbounds nuw i32, ptr %arr, i32 %i
ret void
}

; CHECK-LABEL: subtype_i8_test
define void @subtype_i8_test() {
; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i8, ptr [[alloca_val]], i32 [[tid]]
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
; FCHECK: [[flatidx_lshr:%.*]] = lshr i32 [[flatidx_mul]], 2
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_lshr]]
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
; CHECK: ret void
%arr = alloca [8 x [4 x i32]], align 4
%i = tail call i32 @llvm.dx.thread.id(i32 0)
%gep = getelementptr inbounds nuw i8, ptr %arr, i32 %i
ret void
}
70 changes: 70 additions & 0 deletions llvm/test/CodeGen/DirectX/scalarize-global.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=SCHECK,CHECK
; RUN: opt -S -passes='dxil-data-scalarization,dxil-flatten-arrays' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=FCHECK,CHECK

@"arrayofVecData" = local_unnamed_addr addrspace(3) global [8 x <4 x i32>] zeroinitializer, align 16
@"vecData" = external addrspace(3) global <4 x i32>, align 4

; SCHECK: [[arrayofVecData:@arrayofVecData.*]] = local_unnamed_addr addrspace(3) global [8 x [4 x i32]] zeroinitializer, align 16
; FCHECK: [[arrayofVecData:@arrayofVecData.*]] = local_unnamed_addr addrspace(3) global [32 x i32] zeroinitializer, align 16
; CHECK: [[vecData:@vecData.*]] = external addrspace(3) global [4 x i32], align 4

; CHECK-LABEL: subtype_array_test
define <4 x i32> @subtype_array_test() {
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]]
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
; CHECK: [[x:%.*]] = load <4 x i32>, ptr addrspace(3) [[gep]], align 4
; CHECK: ret <4 x i32> [[x]]
%i = tail call i32 @llvm.dx.thread.id(i32 0)
%gep = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @"arrayofVecData", i32 %i
%x = load <4 x i32>, ptr addrspace(3) %gep, align 4
ret <4 x i32> %x
}

; CHECK-LABEL: subtype_vector_test
define <4 x i32> @subtype_vector_test() {
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]]
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
; CHECK: [[x:%.*]] = load <4 x i32>, ptr addrspace(3) [[gep]], align 4
; CHECK: ret <4 x i32> [[x]]
%i = tail call i32 @llvm.dx.thread.id(i32 0)
%gep = getelementptr inbounds nuw <4 x i32>, ptr addrspace(3) @"arrayofVecData", i32 %i
%x = load <4 x i32>, ptr addrspace(3) %gep, align 4
ret <4 x i32> %x
}

; CHECK-LABEL: subtype_scalar_test
define <4 x i32> @subtype_scalar_test() {
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 0, i32 [[tid]]
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
; CHECK: [[x:%.*]] = load <4 x i32>, ptr addrspace(3) [[gep]], align 4
; CHECK: ret <4 x i32> [[x]]
%i = tail call i32 @llvm.dx.thread.id(i32 0)
%gep = getelementptr inbounds nuw i32, ptr addrspace(3) @"arrayofVecData", i32 %i
%x = load <4 x i32>, ptr addrspace(3) %gep, align 4
ret <4 x i32> %x
}

; CHECK-LABEL: subtype_i8_test
define <4 x i32> @subtype_i8_test() {
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[arrayofVecData]], i32 [[tid]]
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
; FCHECK: [[flatidx_lshr:%.*]] = lshr i32 [[flatidx_mul]], 2
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_lshr]]
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
; CHECK: [[x:%.*]] = load <4 x i32>, ptr addrspace(3) [[gep]], align 4
; CHECK: ret <4 x i32> [[x]]
%i = tail call i32 @llvm.dx.thread.id(i32 0)
%gep = getelementptr inbounds nuw i8, ptr addrspace(3) @"arrayofVecData", i32 %i
%x = load <4 x i32>, ptr addrspace(3) %gep, align 4
ret <4 x i32> %x
}