From 3f072c75a7271d44a089a115fb09752d80cc858b Mon Sep 17 00:00:00 2001 From: Brendon Cahoon Date: Wed, 12 Nov 2025 17:12:30 -0600 Subject: [PATCH] Remove local changes to AttributorAttributes.cpp Updating lib/Transforms/IPO/AttributorAttributes.cpp to match the upstream version. A change to indirect calls was dropped due to failing tests. --- .../Transforms/IPO/AttributorAttributes.cpp | 14 +-- .../CodeGen/AMDGPU/simple-indirect-call-2.ll | 50 ++++++--- llvm/test/Transforms/Attributor/callgraph.ll | 21 +++- .../Transforms/Attributor/value-simplify.ll | 26 +++-- .../Transforms/OpenMP/spmdization_indirect.ll | 104 +++++++++++++++--- 5 files changed, 157 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 8883a527c2226..a6ac7610a2c7a 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -10686,24 +10686,14 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { } return Change; } -#ifndef AAIndirectCallInfo_nolonger_breaks_snap_miteams - // Process callee metadata if available. - if (auto *MD = getCtxI()->getMetadata(LLVMContext::MD_callees)) { - for (const auto &Op : MD->operands()) { - Function *Callee = mdconst::dyn_extract_or_null(Op); - if (Callee) - addCalledFunction(Callee, Change); - } - return Change; - } -#else + if (CB->isIndirectCall()) if (auto *IndirectCallAA = A.getAAFor( *this, getIRPosition(), DepClassTy::OPTIONAL)) if (IndirectCallAA->foreachCallee( [&](Function *Fn) { return VisitValue(*Fn, CB); })) return Change; -#endif + // The most simple case. ProcessCalledOperand(CB->getCalledOperand(), CB); diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll index 8d4657571b258..3b59ff401f612 100644 --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll @@ -53,14 +53,32 @@ entry: } define amdgpu_kernel void @foo(ptr noundef %fp) { -; CHECK-LABEL: define {{[^@]+}}@foo -; CHECK-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; CHECK-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8 -; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8 -; CHECK-NEXT: call void [[LOAD]]() -; CHECK-NEXT: ret void +; OW-LABEL: define {{[^@]+}}@foo +; OW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] { +; OW-NEXT: entry: +; OW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +; OW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8 +; OW-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8 +; OW-NEXT: call void [[LOAD]]() +; OW-NEXT: ret void +; +; CW-LABEL: define {{[^@]+}}@foo +; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR0]] { +; CW-NEXT: entry: +; CW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +; CW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8 +; CW-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8 +; CW-NEXT: call void [[LOAD]](), !callees [[META0:![0-9]+]] +; CW-NEXT: ret void +; +; NO-LABEL: define {{[^@]+}}@foo +; NO-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR0]] { +; NO-NEXT: entry: +; NO-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +; NO-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8 +; NO-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8 +; NO-NEXT: call void [[LOAD]](), !callees [[META0:![0-9]+]] +; NO-NEXT: ret void ; entry: %fp.addr = alloca ptr, addrspace(5) @@ -71,10 +89,14 @@ entry: } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } +; CW: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +;. +; NO: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +;. +; OW: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } +;. +; CW: [[META0]] = !{ptr @bar1, ptr @bar2} +;. +; NO: [[META0]] = !{ptr @bar1, ptr @bar2} ;. -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CW: {{.*}} -; NO: {{.*}} -; OW: {{.*}} diff --git a/llvm/test/Transforms/Attributor/callgraph.ll b/llvm/test/Transforms/Attributor/callgraph.ll index 98b1a661960bd..84e2c54bd832d 100644 --- a/llvm/test/Transforms/Attributor/callgraph.ll +++ b/llvm/test/Transforms/Attributor/callgraph.ll @@ -576,9 +576,20 @@ define void @func7(ptr %unknown) { ; Check there's no crash if something that isn't a function appears in !callees define void @undef_in_callees() { -; CHECK-LABEL: @undef_in_callees( -; CHECK-NEXT: cond.end.i: -; CHECK-NEXT: unreachable +; UNLIM-LABEL: @undef_in_callees( +; UNLIM-NEXT: cond.end.i: +; UNLIM-NEXT: call void undef(ptr undef, i32 undef, ptr undef), !callees [[META2:![0-9]+]] +; UNLIM-NEXT: ret void +; +; LIMI2-LABEL: @undef_in_callees( +; LIMI2-NEXT: cond.end.i: +; LIMI2-NEXT: call void undef(ptr undef, i32 undef, ptr undef), !callees [[META4:![0-9]+]] +; LIMI2-NEXT: ret void +; +; LIMI0-LABEL: @undef_in_callees( +; LIMI0-NEXT: cond.end.i: +; LIMI0-NEXT: call void undef(ptr undef, i32 undef, ptr undef), !callees [[META6:![0-9]+]] +; LIMI0-NEXT: ret void ; cond.end.i: call void undef(ptr undef, i32 undef, ptr undef), !callees !3 @@ -688,11 +699,13 @@ define void @as_cast(ptr %arg) { ;. ; OUNLM: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]} ; OUNLM: [[META1]] = !{i64 0, i1 false} +; OUNLM: [[META2]] = distinct !{ptr undef, ptr null} ;. ; LIMI2: [[META0]] = !{ptr @void, ptr @retFloatTakeFloat} ; LIMI2: [[META1]] = !{ptr @void} ; LIMI2: [[META2:![0-9]+]] = !{[[META3:![0-9]+]]} ; LIMI2: [[META3]] = !{i64 0, i1 false} +; LIMI2: [[META4]] = distinct !{ptr undef, ptr null} ;. ; LIMI0: [[META0]] = !{ptr @func4, ptr @internal_good} ; LIMI0: [[META1]] = !{ptr @func3, ptr @func4} @@ -700,9 +713,11 @@ define void @as_cast(ptr %arg) { ; LIMI0: [[META3]] = !{ptr @takeI32, ptr @retI32, ptr @void} ; LIMI0: [[META4:![0-9]+]] = !{[[META5:![0-9]+]]} ; LIMI0: [[META5]] = !{i64 0, i1 false} +; LIMI0: [[META6]] = distinct !{ptr undef, ptr null} ;. ; CWRLD: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]} ; CWRLD: [[META1]] = !{i64 0, i1 false} +; CWRLD: [[META2]] = distinct !{ptr undef, ptr null} ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; DOT: {{.*}} diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll index 463ecb3003a83..981964ec1efed 100644 --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -626,7 +626,7 @@ define internal ptr @test_byval2(ptr byval(%struct.X) %a) { ; CHECK-NEXT: [[A_PRIV:%.*]] = alloca [[STRUCT_X:%.*]], align 8 ; CHECK-NEXT: store ptr [[TMP0]], ptr [[A_PRIV]], align 8 ; CHECK-NEXT: call void @sync() -; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[A_PRIV]], align 8 +; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[A_PRIV]], align 8, !invariant.load [[META0:![0-9]+]] ; CHECK-NEXT: ret ptr [[L]] ; call void @sync() @@ -975,11 +975,11 @@ define i1 @icmp() { define void @test_callee_is_undef(ptr %fn) { ; TUNIT-LABEL: define {{[^@]+}}@test_callee_is_undef ; TUNIT-SAME: (ptr nofree captures(none) [[FN:%.*]]) { -; TUNIT-NEXT: unreachable +; TUNIT-NEXT: call void @unknown_calle_arg_is_undef(ptr nofree noundef captures(none) [[FN]]) +; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@test_callee_is_undef -; CGSCC-SAME: (ptr nofree captures(none) [[FN:%.*]]) { -; CGSCC-NEXT: call void @callee_is_undef() +; CGSCC-SAME: (ptr nofree noundef nonnull captures(none) [[FN:%.*]]) { ; CGSCC-NEXT: call void @unknown_calle_arg_is_undef(ptr nofree noundef nonnull captures(none) [[FN]]) ; CGSCC-NEXT: ret void ; @@ -989,9 +989,9 @@ define void @test_callee_is_undef(ptr %fn) { } define internal void @callee_is_undef(ptr %fn) { ; -; CGSCC: Function Attrs: memory(readwrite, argmem: none) +; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callee_is_undef -; CGSCC-SAME: () #[[ATTR2]] { +; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: unreachable ; call void %fn() @@ -999,10 +999,10 @@ define internal void @callee_is_undef(ptr %fn) { } define internal void @unknown_calle_arg_is_undef(ptr %fn, i32 %arg) { ; -; CGSCC-LABEL: define {{[^@]+}}@unknown_calle_arg_is_undef -; CGSCC-SAME: (ptr nofree noundef nonnull captures(none) [[FN:%.*]]) { -; CGSCC-NEXT: call void [[FN]](i32 undef) -; CGSCC-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unknown_calle_arg_is_undef +; CHECK-SAME: (ptr nofree noundef nonnull captures(none) [[FN:%.*]]) { +; CHECK-NEXT: call void [[FN]](i32 undef) +; CHECK-NEXT: ret void ; call void %fn(i32 %arg) ret void @@ -1359,7 +1359,7 @@ define internal i32 @ret_speculatable_expr(ptr %mem, i32 %a2) { ; CGSCC-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[MEM_PRIV:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: store i32 [[TMP0]], ptr [[MEM_PRIV]], align 4 -; CGSCC-NEXT: [[L:%.*]] = load i32, ptr [[MEM_PRIV]], align 4 +; CGSCC-NEXT: [[L:%.*]] = load i32, ptr [[MEM_PRIV]], align 4, !invariant.load [[META0]] ; CGSCC-NEXT: [[MUL:%.*]] = mul i32 [[L]], 13 ; CGSCC-NEXT: [[ADD:%.*]] = add i32 [[MUL]], 7 ; CGSCC-NEXT: ret i32 [[ADD]] @@ -1709,3 +1709,7 @@ define i32 @readExtInitZeroInit() { ; CGSCC: attributes #[[ATTR17]] = { nosync } ; CGSCC: attributes #[[ATTR18]] = { nounwind } ;. +; TUNIT: [[META0]] = !{} +;. +; CGSCC: [[META0]] = !{} +;. diff --git a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll index 0bcf6d9f0d9f5..9f057ca44f603 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll @@ -18,14 +18,14 @@ ; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 ; AMDGPU: @spmd_callees_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } -; AMDGPU: @spmd_callees_metadata_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } +; AMDGPU: @spmd_callees_metadata_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ; AMDGPU: @spmd_and_non_spmd_callees_metadata_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ; AMDGPU: @spmd_and_non_spmd_callee_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ;. ; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 ; NVPTX: @spmd_callees_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } -; NVPTX: @spmd_callees_metadata_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } +; NVPTX: @spmd_callees_metadata_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ; NVPTX: @spmd_and_non_spmd_callees_metadata_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ; NVPTX: @spmd_and_non_spmd_callee_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null } ;. @@ -62,7 +62,19 @@ define internal void @spmd_callees__debug(i1 %c) { ; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] ; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2 -; AMDGPU-NEXT: call void [[FP]](ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] +; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2 +; AMDGPU-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]] +; AMDGPU: [[BB3]]: +; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] +; AMDGPU-NEXT: br label %[[BB7:.*]] +; AMDGPU: [[BB4]]: +; AMDGPU-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB6:.*]] +; AMDGPU: [[BB5]]: +; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] +; AMDGPU-NEXT: br label %[[BB7]] +; AMDGPU: [[BB6]]: +; AMDGPU-NEXT: unreachable +; AMDGPU: [[BB7]]: ; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: br label %[[COMMON_RET]] ; @@ -83,7 +95,19 @@ define internal void @spmd_callees__debug(i1 %c) { ; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12:![0-9]+]] ; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2 -; NVPTX-NEXT: call void [[FP]](ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] +; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2 +; NVPTX-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]] +; NVPTX: [[BB3]]: +; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] +; NVPTX-NEXT: br label %[[BB7:.*]] +; NVPTX: [[BB4]]: +; NVPTX-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB6:.*]] +; NVPTX: [[BB5]]: +; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] +; NVPTX-NEXT: br label %[[BB7]] +; NVPTX: [[BB6]]: +; NVPTX-NEXT: unreachable +; NVPTX: [[BB7]]: ; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: br label %[[COMMON_RET]] ; @@ -376,7 +400,19 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 { ; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable -; AMDGPU-NEXT: call void [[FP]](ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] +; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable +; AMDGPU-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]] +; AMDGPU: [[BB3]]: +; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] +; AMDGPU-NEXT: br label %[[BB7:.*]] +; AMDGPU: [[BB4]]: +; AMDGPU-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB6:.*]] +; AMDGPU: [[BB5]]: +; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] +; AMDGPU-NEXT: br label %[[BB7]] +; AMDGPU: [[BB6]]: +; AMDGPU-NEXT: unreachable +; AMDGPU: [[BB7]]: ; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: br label %[[COMMON_RET]] ; @@ -397,7 +433,19 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 { ; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable -; NVPTX-NEXT: call void [[FP]](ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] +; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable +; NVPTX-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]] +; NVPTX: [[BB3]]: +; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] +; NVPTX-NEXT: br label %[[BB7:.*]] +; NVPTX: [[BB4]]: +; NVPTX-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB6:.*]] +; NVPTX: [[BB5]]: +; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]] +; NVPTX-NEXT: br label %[[BB7]] +; NVPTX: [[BB6]]: +; NVPTX-NEXT: unreachable +; NVPTX: [[BB7]]: ; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: br label %[[COMMON_RET]] ; @@ -585,7 +633,7 @@ define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 { ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] -; AMDGPU-NEXT: call void [[FP]](ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]), !callees [[META23:![0-9]+]] +; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) ; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: br label %[[COMMON_RET]] ; @@ -605,7 +653,7 @@ define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 { ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] -; NVPTX-NEXT: call void [[FP]](ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]), !callees [[META23:![0-9]+]] +; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) ; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: br label %[[COMMON_RET]] ; @@ -648,7 +696,19 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 { ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 ; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] -; AMDGPU-NEXT: call void [[FP]](ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]), !callees [[META24:![0-9]+]] +; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external +; AMDGPU-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]] +; AMDGPU: [[BB3]]: +; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) +; AMDGPU-NEXT: br label %[[BB7:.*]] +; AMDGPU: [[BB4]]: +; AMDGPU-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB6:.*]] +; AMDGPU: [[BB5]]: +; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) +; AMDGPU-NEXT: br label %[[BB7]] +; AMDGPU: [[BB6]]: +; AMDGPU-NEXT: unreachable +; AMDGPU: [[BB7]]: ; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: br label %[[COMMON_RET]] ; @@ -668,7 +728,19 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 { ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]] ; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4 ; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[INT_TBAA12]] -; NVPTX-NEXT: call void [[FP]](ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]), !callees [[META24:![0-9]+]] +; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external +; NVPTX-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB4:.*]] +; NVPTX: [[BB3]]: +; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) +; NVPTX-NEXT: br label %[[BB7:.*]] +; NVPTX: [[BB4]]: +; NVPTX-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB6:.*]] +; NVPTX: [[BB5]]: +; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) +; NVPTX-NEXT: br label %[[BB7]] +; NVPTX: [[BB6]]: +; NVPTX-NEXT: unreachable +; NVPTX: [[BB7]]: ; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: br label %[[COMMON_RET]] ; @@ -709,7 +781,7 @@ define void @__omp_outlined_spmd_amenable_external(ptr noalias %.global_tid., pt ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; AMDGPU-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; AMDGPU-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; ; NVPTX-LABEL: define void @__omp_outlined_spmd_amenable_external( ; NVPTX-SAME: ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { @@ -726,7 +798,7 @@ define void @__omp_outlined_spmd_amenable_external(ptr noalias %.global_tid., pt ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[INT_TBAA12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 -; NVPTX-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; NVPTX-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; entry: br label %for.cond @@ -969,9 +1041,7 @@ attributes #8 = { nounwind } ; AMDGPU: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} ; AMDGPU: [[META21]] = !{!"any pointer", [[META14]], i64 0} ; AMDGPU: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} -; AMDGPU: [[META23]] = !{ptr @__omp_outlined_spmd_amenable_external, ptr @__omp_outlined_not_spmd_amenable} -; AMDGPU: [[META24]] = !{ptr @__omp_outlined_spmd_amenable_external, ptr @__omp_outlined_not_spmd_amenable_external} -; AMDGPU: [[LOOP25]] = distinct !{[[LOOP25]], [[META17]], [[META18]]} +; AMDGPU: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} ;. ; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"", i32 74, i32 5} ; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} @@ -996,7 +1066,5 @@ attributes #8 = { nounwind } ; NVPTX: [[ANYPTR_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0} ; NVPTX: [[META21]] = !{!"any pointer", [[META14]], i64 0} ; NVPTX: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META18]]} -; NVPTX: [[META23]] = !{ptr @__omp_outlined_spmd_amenable_external, ptr @__omp_outlined_not_spmd_amenable} -; NVPTX: [[META24]] = !{ptr @__omp_outlined_spmd_amenable_external, ptr @__omp_outlined_not_spmd_amenable_external} -; NVPTX: [[LOOP25]] = distinct !{[[LOOP25]], [[META17]], [[META18]]} +; NVPTX: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META18]]} ;.