From 064e4369e2844b3a7cb7be5a70030eb3ab137be8 Mon Sep 17 00:00:00 2001 From: Thor Preimesberger Date: Tue, 25 Feb 2025 05:32:18 -0600 Subject: [PATCH 001/123] [asan][x86] Abort instrumenting memintrinsics that target fs, gs to prevent miscompilation (#124238) --- .../Instrumentation/AddressSanitizer.cpp | 12 ++++ .../AddressSanitizer/X86/bug_124238.ll | 60 +++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 llvm/test/Instrumentation/AddressSanitizer/X86/bug_124238.ll diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8d8d56035a48f..85edcb1276efe 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -797,6 +797,7 @@ struct AddressSanitizer { bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument, uint32_t Exp, RuntimeCallInserter &RTCI); + bool maybeIgnoreMemIntrinsic(MemIntrinsic *MI, const Triple &TargetTriple); void instrumentMemIntrinsic(MemIntrinsic *MI, RuntimeCallInserter &RTCI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool suppressInstrumentationSiteForDebug(int &Instrumented); @@ -1340,10 +1341,21 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { return IRB.CreateAdd(Shadow, ShadowBase); } +bool AddressSanitizer::maybeIgnoreMemIntrinsic(MemIntrinsic *MI, + const Triple &TargetTriple) { + // Ignore FS and GS registers to prevent miscompilation + if (MI->getDestAddressSpace() >= 256 && + TargetTriple.getArch() == Triple::x86_64) + return true; + return false; +} + // Instrument memset/memmove/memcpy void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI, RuntimeCallInserter &RTCI) { InstrumentationIRBuilder IRB(MI); + if (maybeIgnoreMemIntrinsic(MI, TargetTriple)) + return; if (isa(MI)) { RTCI.createRuntimeCall( IRB, isa(MI) ? AsanMemmove : AsanMemcpy, diff --git a/llvm/test/Instrumentation/AddressSanitizer/X86/bug_124238.ll b/llvm/test/Instrumentation/AddressSanitizer/X86/bug_124238.ll new file mode 100644 index 0000000000000..ce82bc48563a0 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/X86/bug_124238.ll @@ -0,0 +1,60 @@ +; RUN: opt -passes=asan %s -S | FileCheck %s + +;; Punt AddressSanitizer::instrumentMemIntrinsics out for MemIntrinsics +;; that need write to unsupported registers on X86 +;; PR124238: https://www.github.com/llvm/llvm-project/issues/124238 + +target triple = "x86_64-unknown-linux-gnu" + +$.str.658906a285b7a0f82dabd9915e07848c = comdat any +@.str = internal constant { [2 x i8], [30 x i8] } { [2 x i8] c"x\00", [30 x i8] zeroinitializer }, comdat($.str.658906a285b7a0f82dabd9915e07848c), align 32 +@0 = private alias { [2 x i8], [30 x i8] }, ptr @.str + +define void @test_memcpy(i64 noundef %addr) sanitize_address #0 { +entry: + %addr.addr = alloca i64, align 8 + store i64 %addr, ptr %addr.addr, align 8 + %0 = load i64, ptr %addr.addr, align 8 + %1 = inttoptr i64 %0 to ptr addrspace(257) + call void @llvm.memcpy.p257.p0.i64(ptr addrspace(257) align 1 %1, ptr align 1 @.str, i64 1, i1 false) +; CHECK: llvm.memcpy + %2 = load i64, ptr %addr.addr, align 8 + %3 = inttoptr i64 %2 to ptr addrspace(256) + call void @llvm.memcpy.p256.p0.i64(ptr addrspace(256) align 1 %3, ptr align 1 @.str, i64 1, i1 false) +; CHECK: llvm.memcpy + ret void +} + +define void @test_memset(i64 noundef %addr) sanitize_address #0 { +entry: + %addr.addr = alloca i64, align 8 + store i64 %addr, ptr %addr.addr, align 8 + %0 = load i64, ptr %addr.addr, align 8 + %1 = inttoptr i64 %0 to ptr addrspace(257) + call void @llvm.memset.p257.i64(ptr addrspace(257) align 1 %1, i8 0, i64 1, i1 false) +; CHECK: llvm.memset + %2 = load i64, ptr %addr.addr, align 8 + %3 = inttoptr i64 %2 to ptr addrspace(256) + call void @llvm.memset.p256.i64(ptr addrspace(256) align 1 %3, i8 0, i64 1, i1 false) +; CHECK: llvm.memset + ret void +} + +define void @test_memmove(i64 noundef %addr) sanitize_address #0 { +entry: + %addr.addr = alloca i64, align 8 + store i64 %addr, ptr %addr.addr, align 8 + %0 = load i64, ptr %addr.addr, align 8 + %1 = inttoptr i64 %0 to ptr addrspace(257) + %2 = load i64, ptr %addr.addr, align 8 + %3 = inttoptr i64 %2 to ptr + call void @llvm.memmove.p257.p0.i64(ptr addrspace(257) align 1 %1, ptr align 1 %3, i64 1, i1 false) +; CHECK: llvm.memmove + %4 = load i64, ptr %addr.addr, align 8 + %5 = inttoptr i64 %4 to ptr addrspace(256) + %6 = load i64, ptr %addr.addr, align 8 + %7 = inttoptr i64 %6 to ptr + call void @llvm.memmove.p256.p0.i64(ptr addrspace(256) align 1 %5, ptr align 1 %7, i64 1, i1 false) +; CHECK: llvm.memmove + ret void +} From 8dfeb5b660378d6a555a143f9a26bce7cbc3d225 Mon Sep 17 00:00:00 2001 From: vporpo Date: Thu, 27 Feb 2025 14:15:04 -0800 Subject: [PATCH 002/123] [SandboxVec] Add option -sbvec-allow-file for bisection debugging (#129127) This new option lets you specify an allow-list of source files and disables vectorization if the IR is not in the list. This can be used for debugging miscompiles. --- .../SandboxVectorizer/SandboxVectorizer.h | 3 ++ .../SandboxVectorizer/SandboxVectorizer.cpp | 42 +++++++++++++++++++ .../SandboxVectorizer/allow_files.ll | 39 +++++++++++++++++ 3 files changed, 84 insertions(+) create mode 100644 llvm/test/Transforms/SandboxVectorizer/allow_files.ll diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h index 7ea9386f08bee..fea53329719b9 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h @@ -37,6 +37,9 @@ class SandboxVectorizerPass : public PassInfoMixin { // within FPM may register/unregister callbacks, so they need access to // Context. sandboxir::FunctionPassManager FPM; + /// \Returns true if we should attempt to vectorize \p SrcFilePath based on + /// `AllowFiles` option. + bool allowFile(const std::string &SrcFilePath); bool runImpl(Function &F); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp index 5837cc16fcbac..bffb9f187e882 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp @@ -8,9 +8,11 @@ #include "llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Module.h" #include "llvm/SandboxIR/Constant.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.h" +#include using namespace llvm; @@ -29,6 +31,22 @@ static cl::opt UserDefinedPassPipeline( cl::desc("Comma-separated list of vectorizer passes. If not set " "we run the predefined pipeline.")); +// This option is useful for bisection debugging. +// For example you may use it to figure out which filename is the one causing a +// miscompile. You can specify a regex for the filename like: "/[a-m][^/]*" +// which will enable any file name starting with 'a' to 'm' and disable the +// rest. If the miscompile goes away, then we try "/[n-z][^/]*" for the other +// half of the range, from 'n' to 'z'. If we can reproduce the miscompile then +// we can keep looking in [n-r] and [s-z] and so on, in a binary-search fashion. +// +// Please note that we are using [^/]* and not .* to make sure that we are +// matching the actual filename and not some other directory in the path. +cl::opt AllowFiles( + "sbvec-allow-files", cl::init(".*"), cl::Hidden, + cl::desc("Run the vectorizer only on file paths that match any in the " + "list of comma-separated regex's.")); +static constexpr const char AllowFilesDelim = ','; + SandboxVectorizerPass::SandboxVectorizerPass() : FPM("fpm") { if (UserDefinedPassPipeline == DefaultPipelineMagicStr) { // TODO: Add passes to the default pipeline. It currently contains: @@ -66,6 +84,23 @@ PreservedAnalyses SandboxVectorizerPass::run(Function &F, return PA; } +bool SandboxVectorizerPass::allowFile(const std::string &SrcFilePath) { + // Iterate over all files in AllowFiles separated by `AllowFilesDelim`. + size_t DelimPos = 0; + do { + size_t LastPos = DelimPos != 0 ? DelimPos + 1 : DelimPos; + DelimPos = AllowFiles.find(AllowFilesDelim, LastPos); + auto FileNameToMatch = AllowFiles.substr(LastPos, DelimPos - LastPos); + if (FileNameToMatch.empty()) + return false; + // Note: This only runs when debugging so its OK not to reuse the regex. + std::regex FileNameRegex(std::string(".*") + FileNameToMatch); + if (std::regex_match(SrcFilePath, FileNameRegex)) + return true; + } while (DelimPos != std::string::npos); + return false; +} + bool SandboxVectorizerPass::runImpl(Function &LLVMF) { if (Ctx == nullptr) Ctx = std::make_unique(LLVMF.getContext()); @@ -75,6 +110,13 @@ bool SandboxVectorizerPass::runImpl(Function &LLVMF) { return false; } + // This is used for debugging. + if (LLVM_UNLIKELY(AllowFiles != ".*")) { + const auto &SrcFilePath = LLVMF.getParent()->getSourceFileName(); + if (!allowFile(SrcFilePath)) + return false; + } + // If the target claims to have no vector registers early return. if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true))) { LLVM_DEBUG(dbgs() << "SBVec: Target has no vector registers, return.\n"); diff --git a/llvm/test/Transforms/SandboxVectorizer/allow_files.ll b/llvm/test/Transforms/SandboxVectorizer/allow_files.ll new file mode 100644 index 0000000000000..0929eca6a1047 --- /dev/null +++ b/llvm/test/Transforms/SandboxVectorizer/allow_files.ll @@ -0,0 +1,39 @@ +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="some_other_file" %s -S | FileCheck %s --check-prefix=ALLOW_OTHER +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="allow_files.ll" %s -S | FileCheck %s --check-prefix=ALLOW_THIS +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="al.*_files.ll" %s -S | FileCheck %s --check-prefix=ALLOW_REGEX +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="some_file,.*_files.ll,some_other_file" %s -S | FileCheck %s --check-prefix=ALLOW_REGEX_CSV +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="allow" %s -S | FileCheck %s --check-prefix=ALLOW_BAD_REGEX +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="some_file,some_other_file1,some_other_file2" %s -S | FileCheck %s --check-prefix=ALLOW_OTHER_CSV +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" -sbvec-allow-files="" %s -S | FileCheck %s --check-prefix=ALLOW_EMPTY +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s --check-prefix=DEFAULT + +; Checks the command-line option `-sbvec-allow-files`. +define void @widen(ptr %ptr) { +; ALLOW_OTHER: store float {{%.*}}, ptr {{%.*}}, align 4 +; ALLOW_OTHER: store float {{%.*}}, ptr {{%.*}}, align 4 +; +; ALLOW_THIS: store <2 x float> {{%.*}}, ptr {{%.*}}, align 4 +; +; ALLOW_REGEX: store <2 x float> {{%.*}}, ptr {{%.*}}, align 4 +; +; ALLOW_REGEX_CSV: store <2 x float> {{%.*}}, ptr {{%.*}}, align 4 +; +; ALLOW_BAD_REGEX: store float {{%.*}}, ptr {{%.*}}, align 4 +; ALLOW_BAD_REGEX: store float {{%.*}}, ptr {{%.*}}, align 4 +; +; ALLOW_OTHER_CSV: store float {{%.*}}, ptr {{%.*}}, align 4 +; ALLOW_OTHER_CSV: store float {{%.*}}, ptr {{%.*}}, align 4 +; +; ALLOW_EMPTY: store float {{%.*}}, ptr {{%.*}}, align 4 +; ALLOW_EMPTY: store float {{%.*}}, ptr {{%.*}}, align 4 +; +; DEFAULT: store <2 x float> {{%.*}}, ptr {{%.*}}, align 4 +; + %ptr0 = getelementptr float, ptr %ptr, i32 0 + %ptr1 = getelementptr float, ptr %ptr, i32 1 + %ld0 = load float, ptr %ptr0 + %ld1 = load float, ptr %ptr1 + store float %ld0, ptr %ptr0 + store float %ld1, ptr %ptr1 + ret void +} From 178a3390e7104b252194193960ad9b8caa00266b Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Thu, 27 Feb 2025 16:16:30 -0600 Subject: [PATCH 003/123] [SystemZ] Handle scalar to vector bitcasts. (#128628) CSmith found a case where SROA produces bitcasts from scalar to vector. This was previously asserted against in SystemZTTI, but now the BaseT implementation takes care of it. --- .../SystemZ/SystemZTargetTransformInfo.cpp | 3 +- .../Analysis/CostModel/SystemZ/bitcast.ll | 36 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Analysis/CostModel/SystemZ/bitcast.ll diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 2b94832939419..06a0a3a631654 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -887,7 +887,8 @@ InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, unsigned SrcScalarBits = Src->getScalarSizeInBits(); if (!Src->isVectorTy()) { - assert (!Dst->isVectorTy()); + if (Dst->isVectorTy()) + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) { if (Src->isIntegerTy(128)) diff --git a/llvm/test/Analysis/CostModel/SystemZ/bitcast.ll b/llvm/test/Analysis/CostModel/SystemZ/bitcast.ll new file mode 100644 index 0000000000000..7927588623c52 --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/bitcast.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -mtriple=systemz-unknown -mcpu=z15 -passes="print" \ +; RUN: -disable-output 2>&1 | FileCheck %s + +; Check bitcast from scalar to vector. + +@Glob = dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local void @fun() { +entry: + %d.sroa.0 = alloca i64, align 8 + store i64 0, ptr %d.sroa.0, align 8 + store i32 2, ptr @Glob, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.cond1, %entry + %L = load i64, ptr %d.sroa.0, align 8 + %A0 = and i64 %L, 4294967295 + store i64 %A0, ptr %d.sroa.0, align 8 + %BC = bitcast i64 %A0 to <2 x i32> + %0 = and <2 x i32> %BC, splat (i32 10) + store <2 x i32> %0, ptr %d.sroa.0, align 8 + br label %for.cond1 + +; CHECK: Printing analysis 'Cost Model Analysis' for function 'fun': +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d.sroa.0 = alloca i64, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 0, ptr %d.sroa.0, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 2, ptr @Glob, align 4 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %for.cond1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %L = load i64, ptr %d.sroa.0, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A0 = and i64 %L, 4294967295 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %A0, ptr %d.sroa.0, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BC = bitcast i64 %A0 to <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %0 = and <2 x i32> %BC, splat (i32 10) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %0, ptr %d.sroa.0, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %for.cond1 +} From 5f457712d37a843eb8d1ade0b8af7a1c455f4d0b Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Thu, 27 Feb 2025 14:22:26 -0800 Subject: [PATCH 004/123] [CIR] Upstream basic alloca and load support (#128792) This change implements basic support in ClangIR for local variables using the cir.alloca and cir.load operations. --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 42 ++++++ .../include/clang/CIR/Dialect/IR/CIRAttrs.td | 15 ++ clang/include/clang/CIR/Dialect/IR/CIROps.td | 113 +++++++++++++++ clang/include/clang/CIR/MissingFeatures.h | 23 ++++ clang/lib/CIR/CodeGen/Address.h | 76 ++++++++++ clang/lib/CIR/CodeGen/CIRGenDecl.cpp | 113 +++++++++++++++ clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 130 ++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 63 +++++++++ clang/lib/CIR/CodeGen/CIRGenFunction.cpp | 33 +++++ clang/lib/CIR/CodeGen/CIRGenFunction.h | 72 +++++++++- clang/lib/CIR/CodeGen/CIRGenModule.h | 5 + clang/lib/CIR/CodeGen/CIRGenStmt.cpp | 11 ++ clang/lib/CIR/CodeGen/CIRGenValue.h | 125 +++++++++++++++++ clang/lib/CIR/CodeGen/CMakeLists.txt | 2 + clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 18 +++ clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp | 77 +++++++++++ clang/lib/CIR/Dialect/IR/CMakeLists.txt | 1 + clang/test/CIR/CodeGen/basic.cpp | 27 ++++ 18 files changed, 945 insertions(+), 1 deletion(-) create mode 100644 clang/lib/CIR/CodeGen/Address.h create mode 100644 clang/lib/CIR/CodeGen/CIRGenDecl.cpp create mode 100644 clang/lib/CIR/CodeGen/CIRGenExpr.cpp create mode 100644 clang/lib/CIR/CodeGen/CIRGenValue.h create mode 100644 clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp create mode 100644 clang/test/CIR/CodeGen/basic.cpp diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index f03241a875845..14afdfc2758ea 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -9,6 +9,7 @@ #ifndef LLVM_CLANG_CIR_DIALECT_BUILDER_CIRBASEBUILDER_H #define LLVM_CLANG_CIR_DIALECT_BUILDER_CIRBASEBUILDER_H +#include "clang/AST/CharUnits.h" #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" @@ -51,6 +52,47 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { return cir::ConstPtrAttr::get( getContext(), mlir::cast(type), valueAttr); } + + mlir::Value createAlloca(mlir::Location loc, cir::PointerType addrType, + mlir::Type type, llvm::StringRef name, + mlir::IntegerAttr alignment) { + return create(loc, addrType, type, name, alignment); + } + + cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr, + bool isVolatile = false, uint64_t alignment = 0) { + mlir::IntegerAttr intAttr; + if (alignment) + intAttr = mlir::IntegerAttr::get( + mlir::IntegerType::get(ptr.getContext(), 64), alignment); + + return create(loc, ptr); + } + + // + // Block handling helpers + // ---------------------- + // + static OpBuilder::InsertPoint getBestAllocaInsertPoint(mlir::Block *block) { + auto last = + std::find_if(block->rbegin(), block->rend(), [](mlir::Operation &op) { + // TODO: Add LabelOp missing feature here + return mlir::isa(&op); + }); + + if (last != block->rend()) + return OpBuilder::InsertPoint(block, ++mlir::Block::iterator(&*last)); + return OpBuilder::InsertPoint(block, block->begin()); + }; + + mlir::IntegerAttr getSizeFromCharUnits(mlir::MLIRContext *ctx, + clang::CharUnits size) { + // Note that mlir::IntegerType is used instead of cir::IntType here + // because we don't need sign information for this to be useful, so keep + // it simple. + return mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 64), + size.getQuantity()); + } }; } // namespace cir diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td index 097616ba06749..ece04c225e322 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td @@ -54,6 +54,21 @@ def CIR_BoolAttr : CIR_Attr<"Bool", "bool", [TypedAttrInterface]> { }]; } +//===----------------------------------------------------------------------===// +// UndefAttr +//===----------------------------------------------------------------------===// + +def UndefAttr : CIR_Attr<"Undef", "undef", [TypedAttrInterface]> { + let summary = "Represent an undef constant"; + let description = [{ + The UndefAttr represents an undef constant, corresponding to LLVM's notion + of undef. + }]; + + let parameters = (ins AttributeSelfTypeParameter<"">:$type); + let assemblyFormat = [{}]; +} + //===----------------------------------------------------------------------===// // IntegerAttr //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index f9ce38588e436..083cf46a93ae6 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -115,6 +115,119 @@ def ConstantOp : CIR_Op<"const", let hasFolder = 1; } +//===----------------------------------------------------------------------===// +// AllocaOp +//===----------------------------------------------------------------------===// + +class AllocaTypesMatchWith + : PredOpTrait> { + string lhs = lhsArg; + string rhs = rhsArg; + string transformer = transform; +} + +def AllocaOp : CIR_Op<"alloca", [ + AllocaTypesMatchWith<"'allocaType' matches pointee type of 'addr'", + "addr", "allocaType", + "cast($_self).getPointee()">, + DeclareOpInterfaceMethods]> { + let summary = "Defines a scope-local variable"; + let description = [{ + The `cir.alloca` operation defines a scope-local variable. + + The presence of the `const` attribute indicates that the local variable is + declared with C/C++ `const` keyword. + + The result type is a pointer to the input's type. + + Example: + + ```mlir + // int count; + %0 = cir.alloca i32, !cir.ptr, ["count"] {alignment = 4 : i64} + + // int *ptr; + %1 = cir.alloca !cir.ptr, !cir.ptr>, ["ptr"] {alignment = 8 : i64} + ... + ``` + }]; + + let arguments = (ins + TypeAttr:$allocaType, + StrAttr:$name, + UnitAttr:$init, + UnitAttr:$constant, + ConfinedAttr, [IntMinValue<0>]>:$alignment, + OptionalAttr:$annotations + ); + + let results = (outs Res]>:$addr); + + let skipDefaultBuilders = 1; + let builders = [ + OpBuilder<(ins "mlir::Type":$addr, + "mlir::Type":$allocaType, + "llvm::StringRef":$name, + "mlir::IntegerAttr":$alignment)> + ]; + + let extraClassDeclaration = [{ + // Whether the alloca input type is a pointer. + bool isPointerType() { return ::mlir::isa<::cir::PointerType>(getAllocaType()); } + }]; + + let assemblyFormat = [{ + $allocaType `,` qualified(type($addr)) `,` + `[` $name + (`,` `init` $init^)? + (`,` `const` $constant^)? + `]` + ($annotations^)? attr-dict + }]; + + let hasVerifier = 0; +} + +//===----------------------------------------------------------------------===// +// LoadOp +//===----------------------------------------------------------------------===// + +def LoadOp : CIR_Op<"load", [ + TypesMatchWith<"type of 'result' matches pointee type of 'addr'", + "addr", "result", + "cast($_self).getPointee()">, + DeclareOpInterfaceMethods]> { + + let summary = "Load value from memory adddress"; + let description = [{ + `cir.load` reads a value (lvalue to rvalue conversion) given an address + backed up by a `cir.ptr` type. + + Example: + + ```mlir + + // Read from local variable, address in %0. + %1 = cir.load %0 : !cir.ptr, i32 + ``` + }]; + + let arguments = (ins Arg:$addr); + let results = (outs CIR_AnyType:$result); + + let assemblyFormat = [{ + $addr `:` qualified(type($addr)) `,` type($result) attr-dict + }]; + + // FIXME: add verifier. +} + //===----------------------------------------------------------------------===// // ReturnOp //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index d4fcd52e7e6e3..5c7e10d018809 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -30,12 +30,35 @@ struct MissingFeatures { // This isn't needed until we add support for bools. static bool convertTypeForMemory() { return false; } + // CIRGenFunction implementation details + static bool cgfSymbolTable() { return false; } + // Unhandled global/linkage information. static bool opGlobalDSOLocal() { return false; } static bool opGlobalThreadLocal() { return false; } static bool opGlobalConstant() { return false; } static bool opGlobalAlignment() { return false; } static bool opGlobalLinkage() { return false; } + + // Load attributes + static bool opLoadThreadLocal() { return false; } + static bool opLoadEmitScalarRangeCheck() { return false; } + static bool opLoadBooleanRepresentation() { return false; } + + // AllocaOp handling + static bool opAllocaVarDeclContext() { return false; } + static bool opAllocaStaticLocal() { return false; } + static bool opAllocaNonGC() { return false; } + static bool opAllocaImpreciseLifetime() { return false; } + static bool opAllocaPreciseLifetime() { return false; } + static bool opAllocaTLS() { return false; } + static bool opAllocaOpenMPThreadPrivate() { return false; } + static bool opAllocaEscapeByReference() { return false; } + static bool opAllocaReference() { return false; } + + // Misc + static bool scalarConversionOpts() { return false; } + static bool tryEmitAsConstant() { return false; } }; } // namespace cir diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h new file mode 100644 index 0000000000000..72e7e1dcf1560 --- /dev/null +++ b/clang/lib/CIR/CodeGen/Address.h @@ -0,0 +1,76 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class provides a simple wrapper for a pair of a pointer and an +// alignment. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_LIB_CIR_ADDRESS_H +#define CLANG_LIB_CIR_ADDRESS_H + +#include "mlir/IR/Value.h" +#include "clang/AST/CharUnits.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "llvm/ADT/PointerIntPair.h" + +namespace clang::CIRGen { + +class Address { + + // The boolean flag indicates whether the pointer is known to be non-null. + llvm::PointerIntPair pointerAndKnownNonNull; + + /// The expected CIR type of the pointer. Carrying accurate element type + /// information in Address makes it more convenient to work with Address + /// values and allows frontend assertions to catch simple mistakes. + mlir::Type elementType; + + clang::CharUnits alignment; + +protected: + Address(std::nullptr_t) : elementType(nullptr) {} + +public: + Address(mlir::Value pointer, mlir::Type elementType, + clang::CharUnits alignment) + : pointerAndKnownNonNull(pointer, false), elementType(elementType), + alignment(alignment) { + assert(mlir::isa(pointer.getType()) && + "Expected cir.ptr type"); + + assert(pointer && "Pointer cannot be null"); + assert(elementType && "Element type cannot be null"); + assert(!alignment.isZero() && "Alignment cannot be zero"); + + assert(mlir::cast(pointer.getType()).getPointee() == + elementType); + } + + static Address invalid() { return Address(nullptr); } + bool isValid() const { + return pointerAndKnownNonNull.getPointer() != nullptr; + } + + mlir::Value getPointer() const { + assert(isValid()); + return pointerAndKnownNonNull.getPointer(); + } + + mlir::Type getElementType() const { + assert(isValid()); + assert(mlir::cast( + pointerAndKnownNonNull.getPointer().getType()) + .getPointee() == elementType); + return elementType; + } +}; + +} // namespace clang::CIRGen + +#endif // CLANG_LIB_CIR_ADDRESS_H diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp new file mode 100644 index 0000000000000..e44cad559d509 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -0,0 +1,113 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Decl nodes as CIR code. +// +//===----------------------------------------------------------------------===// + +#include "CIRGenFunction.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/CIR/MissingFeatures.h" + +using namespace clang; +using namespace clang::CIRGen; + +void CIRGenFunction::emitAutoVarAlloca(const VarDecl &d) { + QualType ty = d.getType(); + if (ty.getAddressSpace() != LangAS::Default) + cgm.errorNYI(d.getSourceRange(), "emitAutoVarAlloca: address space"); + + auto loc = getLoc(d.getSourceRange()); + + if (d.isEscapingByref()) + cgm.errorNYI(d.getSourceRange(), + "emitAutoVarDecl: decl escaping by reference"); + + CharUnits alignment = getContext().getDeclAlign(&d); + + // If the type is variably-modified, emit all the VLA sizes for it. + if (ty->isVariablyModifiedType()) + cgm.errorNYI(d.getSourceRange(), "emitAutoVarDecl: variably modified type"); + + Address address = Address::invalid(); + if (!ty->isConstantSizeType()) + cgm.errorNYI(d.getSourceRange(), "emitAutoVarDecl: non-constant size type"); + + // A normal fixed sized variable becomes an alloca in the entry block, + mlir::Type allocaTy = convertTypeForMem(ty); + // Create the temp alloca and declare variable using it. + address = createTempAlloca(allocaTy, alignment, loc, d.getName()); + declare(address, &d, ty, getLoc(d.getSourceRange()), alignment); + + setAddrOfLocalVar(&d, address); +} + +void CIRGenFunction::emitAutoVarInit(const clang::VarDecl &d) { + QualType type = d.getType(); + + // If this local has an initializer, emit it now. + const Expr *init = d.getInit(); + + if (init || !type.isPODType(getContext())) { + cgm.errorNYI(d.getSourceRange(), "emitAutoVarInit"); + } +} + +void CIRGenFunction::emitAutoVarCleanups(const clang::VarDecl &d) { + // Check the type for a cleanup. + if (QualType::DestructionKind dtorKind = d.needsDestruction(getContext())) + cgm.errorNYI(d.getSourceRange(), "emitAutoVarCleanups: type cleanup"); + + assert(!cir::MissingFeatures::opAllocaPreciseLifetime()); + + // Handle the cleanup attribute. + if (d.hasAttr()) + cgm.errorNYI(d.getSourceRange(), "emitAutoVarCleanups: CleanupAttr"); +} + +/// Emit code and set up symbol table for a variable declaration with auto, +/// register, or no storage class specifier. These turn into simple stack +/// objects, globals depending on target. +void CIRGenFunction::emitAutoVarDecl(const VarDecl &d) { + emitAutoVarAlloca(d); + emitAutoVarInit(d); + emitAutoVarCleanups(d); +} + +void CIRGenFunction::emitVarDecl(const VarDecl &d) { + // If the declaration has external storage, don't emit it now, allow it to be + // emitted lazily on its first use. + if (d.hasExternalStorage()) + return; + + if (d.getStorageDuration() != SD_Automatic) + cgm.errorNYI(d.getSourceRange(), "emitVarDecl automatic storage duration"); + if (d.getType().getAddressSpace() == LangAS::opencl_local) + cgm.errorNYI(d.getSourceRange(), "emitVarDecl openCL address space"); + + assert(d.hasLocalStorage()); + + assert(!cir::MissingFeatures::opAllocaVarDeclContext()); + return emitAutoVarDecl(d); +} + +void CIRGenFunction::emitDecl(const Decl &d) { + switch (d.getKind()) { + case Decl::Var: { + const VarDecl &vd = cast(d); + assert(vd.isLocalVarDecl() && + "Should not see file-scope variables inside a function!"); + emitVarDecl(vd); + return; + } + default: + cgm.errorNYI(d.getSourceRange(), "emitDecl: unhandled decl type"); + } +} diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp new file mode 100644 index 0000000000000..ccc3e20875263 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -0,0 +1,130 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Expr nodes as CIR code. +// +//===----------------------------------------------------------------------===// + +#include "Address.h" +#include "CIRGenFunction.h" +#include "CIRGenValue.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "clang/AST/Attr.h" +#include "clang/AST/CharUnits.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/MissingFeatures.h" + +using namespace clang; +using namespace clang::CIRGen; +using namespace cir; + +mlir::Value CIRGenFunction::emitLoadOfScalar(LValue lvalue, + SourceLocation loc) { + assert(!cir::MissingFeatures::opLoadThreadLocal()); + assert(!cir::MissingFeatures::opLoadEmitScalarRangeCheck()); + assert(!cir::MissingFeatures::opLoadBooleanRepresentation()); + + Address addr = lvalue.getAddress(); + mlir::Type eltTy = addr.getElementType(); + + mlir::Value ptr = addr.getPointer(); + if (mlir::isa(eltTy)) + cgm.errorNYI(loc, "emitLoadOfScalar: void type"); + + mlir::Value loadOp = builder.CIRBaseBuilderTy::createLoad( + getLoc(loc), ptr, false /*isVolatile*/); + + return loadOp; +} + +/// Given an expression that represents a value lvalue, this +/// method emits the address of the lvalue, then loads the result as an rvalue, +/// returning the rvalue. +RValue CIRGenFunction::emitLoadOfLValue(LValue lv, SourceLocation loc) { + assert(!lv.getType()->isFunctionType()); + assert(!(lv.getType()->isConstantMatrixType()) && "not implemented"); + + if (lv.isSimple()) + return RValue::get(emitLoadOfScalar(lv, loc)); + + cgm.errorNYI(loc, "emitLoadOfLValue"); + return RValue::get(nullptr); +} + +LValue CIRGenFunction::emitDeclRefLValue(const DeclRefExpr *e) { + const NamedDecl *nd = e->getDecl(); + QualType ty = e->getType(); + + assert(e->isNonOdrUse() != NOUR_Unevaluated && + "should not emit an unevaluated operand"); + + if (const auto *vd = dyn_cast(nd)) { + // Checks for omitted feature handling + assert(!cir::MissingFeatures::opAllocaStaticLocal()); + assert(!cir::MissingFeatures::opAllocaNonGC()); + assert(!cir::MissingFeatures::opAllocaImpreciseLifetime()); + assert(!cir::MissingFeatures::opAllocaTLS()); + assert(!cir::MissingFeatures::opAllocaOpenMPThreadPrivate()); + assert(!cir::MissingFeatures::opAllocaEscapeByReference()); + + // Check if this is a global variable + if (vd->hasLinkage() || vd->isStaticDataMember()) + cgm.errorNYI(vd->getSourceRange(), "emitDeclRefLValue: global variable"); + + Address addr = Address::invalid(); + + // The variable should generally be present in the local decl map. + auto iter = LocalDeclMap.find(vd); + if (iter != LocalDeclMap.end()) { + addr = iter->second; + } else { + // Otherwise, it might be static local we haven't emitted yet for some + // reason; most likely, because it's in an outer function. + cgm.errorNYI(vd->getSourceRange(), "emitDeclRefLValue: static local"); + } + + return LValue::makeAddr(addr, ty); + } + + cgm.errorNYI(e->getSourceRange(), "emitDeclRefLValue: unhandled decl type"); + return LValue(); +} + +mlir::Value CIRGenFunction::emitAlloca(StringRef name, mlir::Type ty, + mlir::Location loc, + CharUnits alignment) { + mlir::Block *entryBlock = getCurFunctionEntryBlock(); + + // CIR uses its own alloca address space rather than follow the target data + // layout like original CodeGen. The data layout awareness should be done in + // the lowering pass instead. + assert(!cir::MissingFeatures::addressSpace()); + cir::PointerType localVarPtrTy = builder.getPointerTo(ty); + mlir::IntegerAttr alignIntAttr = cgm.getSize(alignment); + + mlir::Value addr; + { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.restoreInsertionPoint(builder.getBestAllocaInsertPoint(entryBlock)); + addr = builder.createAlloca(loc, /*addr type*/ localVarPtrTy, + /*var type*/ ty, name, alignIntAttr); + assert(!cir::MissingFeatures::opAllocaVarDeclContext()); + } + return addr; +} + +/// This creates an alloca and inserts it at the current insertion point of the +/// builder. +Address CIRGenFunction::createTempAlloca(mlir::Type ty, CharUnits align, + mlir::Location loc, + const Twine &name) { + mlir::Value alloca = emitAlloca(name.str(), ty, loc, align); + return Address(alloca, ty, align); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 24a959108f73b..90a2fd2a5d806 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -11,9 +11,11 @@ //===----------------------------------------------------------------------===// #include "CIRGenFunction.h" +#include "CIRGenValue.h" #include "clang/AST/Expr.h" #include "clang/AST/StmtVisitor.h" +#include "clang/CIR/MissingFeatures.h" #include "mlir/IR/Value.h" @@ -52,6 +54,19 @@ class ScalarExprEmitter : public StmtVisitor { return {}; } + /// Emits the address of the l-value, then loads and returns the result. + mlir::Value emitLoadOfLValue(const Expr *e) { + LValue lv = cgf.emitLValue(e); + // FIXME: add some akin to EmitLValueAlignmentAssumption(E, V); + return cgf.emitLoadOfLValue(lv, e->getExprLoc()).getScalarVal(); + } + + // l-values + mlir::Value VisitDeclRefExpr(DeclRefExpr *e) { + assert(!cir::MissingFeatures::tryEmitAsConstant()); + return emitLoadOfLValue(e); + } + mlir::Value VisitIntegerLiteral(const IntegerLiteral *e) { mlir::Type type = cgf.convertType(e->getType()); return builder.create( @@ -65,7 +80,27 @@ class ScalarExprEmitter : public StmtVisitor { cgf.getLoc(e->getExprLoc()), type, builder.getCIRBoolAttr(e->getValue())); } + + mlir::Value VisitCastExpr(CastExpr *E); + + /// Emit a conversion from the specified type to the specified destination + /// type, both of which are CIR scalar types. + /// TODO: do we need ScalarConversionOpts here? Should be done in another + /// pass. + mlir::Value emitScalarConversion(mlir::Value src, QualType srcType, + QualType dstType, SourceLocation loc) { + // No sort of type conversion is implemented yet, but the path for implicit + // paths goes through here even if the type isn't being changed. + srcType = srcType.getCanonicalType(); + dstType = dstType.getCanonicalType(); + if (srcType == dstType) + return src; + + cgf.getCIRGenModule().errorNYI(loc, + "emitScalarConversion for unequal types"); + } }; + } // namespace /// Emit the computation of the specified expression of scalar type. @@ -75,3 +110,31 @@ mlir::Value CIRGenFunction::emitScalarExpr(const Expr *e) { return ScalarExprEmitter(*this, builder).Visit(const_cast(e)); } + +// Emit code for an explicit or implicit cast. Implicit +// casts have to handle a more broad range of conversions than explicit +// casts, as they handle things like function to ptr-to-function decay +// etc. +mlir::Value ScalarExprEmitter::VisitCastExpr(CastExpr *ce) { + Expr *e = ce->getSubExpr(); + QualType destTy = ce->getType(); + CastKind kind = ce->getCastKind(); + + switch (kind) { + case CK_LValueToRValue: + assert(cgf.getContext().hasSameUnqualifiedType(e->getType(), destTy)); + assert(e->isGLValue() && "lvalue-to-rvalue applied to r-value!"); + return Visit(const_cast(e)); + + case CK_IntegralCast: { + assert(!cir::MissingFeatures::scalarConversionOpts()); + return emitScalarConversion(Visit(e), e->getType(), destTy, + ce->getExprLoc()); + } + + default: + cgf.getCIRGenModule().errorNYI(e->getSourceRange(), + "CastExpr: ", ce->getCastKindName()); + } + return {}; +} diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index bba2f71a87627..86986b5847e98 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -13,6 +13,7 @@ #include "CIRGenFunction.h" #include "clang/AST/GlobalDecl.h" +#include "clang/CIR/MissingFeatures.h" #include @@ -131,6 +132,21 @@ mlir::Location CIRGenFunction::getLoc(mlir::Location lhs, mlir::Location rhs) { return mlir::FusedLoc::get(locs, metadata, &getMLIRContext()); } +mlir::LogicalResult CIRGenFunction::declare(Address addr, const Decl *var, + QualType ty, mlir::Location loc, + CharUnits alignment) { + const auto *namedVar = dyn_cast_or_null(var); + assert(namedVar && "Needs a named decl"); + assert(!cir::MissingFeatures::cgfSymbolTable()); + + mlir::Value addrVal = addr.getPointer(); + auto allocaOp = cast(addrVal.getDefiningOp()); + if (ty->isReferenceType() || ty.isConstQualified()) + allocaOp.setConstantAttr(mlir::UnitAttr::get(&getMLIRContext())); + + return mlir::success(); +} + void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType, cir::FuncOp fn, cir::FuncType funcType, SourceLocation loc, @@ -153,6 +169,7 @@ mlir::LogicalResult CIRGenFunction::emitFunctionBody(const clang::Stmt *body) { emitCompoundStmtWithoutScope(*block); else result = emitStmt(body, /*useCurrentScope=*/true); + return result; } @@ -217,4 +234,20 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn, return fn; } +/// Emit code to compute a designator that specifies the location +/// of the expression. +/// FIXME: document this function better. +LValue CIRGenFunction::emitLValue(const Expr *e) { + // FIXME: ApplyDebugLocation DL(*this, e); + switch (e->getStmtClass()) { + default: + getCIRGenModule().errorNYI(e->getSourceRange(), + std::string("l-value not implemented for '") + + e->getStmtClassName() + "'"); + break; + case Expr::DeclRefExprClass: + return emitDeclRefLValue(cast(e)); + } +} + } // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 92fbea16d3aa1..e0888acdc3dce 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -16,8 +16,12 @@ #include "CIRGenBuilder.h" #include "CIRGenModule.h" #include "CIRGenTypeCache.h" +#include "CIRGenValue.h" + +#include "Address.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" #include "clang/AST/Type.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" @@ -49,6 +53,11 @@ class CIRGenFunction : public CIRGenTypeCache { /// for. mlir::Operation *curFn = nullptr; + using DeclMapTy = llvm::DenseMap; + /// This keeps track of the CIR allocas or globals for local C + /// declarations. + DeclMapTy LocalDeclMap; + clang::ASTContext &getContext() const { return cgm.getASTContext(); } CIRGenBuilderTy &getBuilder() { return builder; } @@ -56,6 +65,12 @@ class CIRGenFunction : public CIRGenTypeCache { CIRGenModule &getCIRGenModule() { return cgm; } const CIRGenModule &getCIRGenModule() const { return cgm; } + mlir::Block *getCurFunctionEntryBlock() { + auto fn = mlir::dyn_cast(curFn); + assert(fn && "other callables NYI"); + return &fn.getRegion().front(); + } + mlir::Type convertTypeForMem(QualType T); mlir::Type convertType(clang::QualType T); @@ -78,6 +93,17 @@ class CIRGenFunction : public CIRGenTypeCache { mlir::MLIRContext &getMLIRContext() { return cgm.getMLIRContext(); } +private: + /// Declare a variable in the current scope, return success if the variable + /// wasn't declared yet. + mlir::LogicalResult declare(Address addr, const clang::Decl *var, + clang::QualType ty, mlir::Location loc, + clang::CharUnits alignment); + +public: + mlir::Value emitAlloca(llvm::StringRef name, mlir::Type ty, + mlir::Location loc, clang::CharUnits alignment); + /// Use to track source locations across nested visitor traversals. /// Always use a `SourceLocRAIIObject` to change currSrcLoc. std::optional currSrcLoc; @@ -121,8 +147,50 @@ class CIRGenFunction : public CIRGenTypeCache { void emitCompoundStmtWithoutScope(const clang::CompoundStmt &s); + mlir::LogicalResult emitDeclStmt(const clang::DeclStmt &s); + mlir::LogicalResult emitReturnStmt(const clang::ReturnStmt &s); + /// Given an expression that represents a value lvalue, this method emits + /// the address of the lvalue, then loads the result as an rvalue, + /// returning the rvalue. + RValue emitLoadOfLValue(LValue lv, SourceLocation loc); + + /// EmitLoadOfScalar - Load a scalar value from an address, taking + /// care to appropriately convert from the memory representation to + /// the LLVM value representation. The l-value must be a simple + /// l-value. + mlir::Value emitLoadOfScalar(LValue lvalue, SourceLocation loc); + + /// Emit code to compute a designator that specifies the location + /// of the expression. + /// FIXME: document this function better. + LValue emitLValue(const clang::Expr *e); + + void emitDecl(const clang::Decl &d); + + LValue emitDeclRefLValue(const clang::DeclRefExpr *e); + + /// Emit code and set up symbol table for a variable declaration with auto, + /// register, or no storage class specifier. These turn into simple stack + /// objects, globals depending on target. + void emitAutoVarDecl(const clang::VarDecl &d); + + void emitAutoVarAlloca(const clang::VarDecl &d); + void emitAutoVarInit(const clang::VarDecl &d); + void emitAutoVarCleanups(const clang::VarDecl &d); + + /// This method handles emission of any variable declaration + /// inside a function, including static vars etc. + void emitVarDecl(const clang::VarDecl &d); + + /// Set the address of a local variable. + void setAddrOfLocalVar(const clang::VarDecl *vd, Address addr) { + assert(!LocalDeclMap.count(vd) && "Decl already exists in LocalDeclMap!"); + LocalDeclMap.insert({vd, addr}); + // TODO: Add symbol table support + } + /// Emit the computation of the specified expression of scalar type. mlir::Value emitScalarExpr(const clang::Expr *e); cir::FuncOp generateCode(clang::GlobalDecl gd, cir::FuncOp fn, @@ -134,8 +202,10 @@ class CIRGenFunction : public CIRGenTypeCache { void startFunction(clang::GlobalDecl gd, clang::QualType retTy, cir::FuncOp fn, cir::FuncType funcType, clang::SourceLocation loc, clang::SourceLocation startLoc); -}; + Address createTempAlloca(mlir::Type ty, CharUnits align, mlir::Location loc, + const Twine &name = "tmp"); +}; } // namespace clang::CIRGen #endif diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index bf3a4d1130f15..71a37b8c9a2ea 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -17,6 +17,7 @@ #include "CIRGenTypeCache.h" #include "CIRGenTypes.h" +#include "clang/AST/CharUnits.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "mlir/IR/Builders.h" @@ -116,6 +117,10 @@ class CIRGenModule : public CIRGenTypeCache { cir::FuncType funcType, const clang::FunctionDecl *funcDecl); + mlir::IntegerAttr getSize(CharUnits size) { + return builder.getSizeFromCharUnits(&getMLIRContext(), size); + } + const llvm::Triple &getTriple() const { return target.getTriple(); } /// Helpers to emit "not yet implemented" error diagnostics diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index f42f30cc5a433..ed5d87a39704a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -68,6 +68,8 @@ mlir::LogicalResult CIRGenFunction::emitSimpleStmt(const Stmt *s, default: // Only compound and return statements are supported right now. return mlir::failure(); + case Stmt::DeclStmtClass: + return emitDeclStmt(cast(*s)); case Stmt::CompoundStmtClass: if (useCurrentScope) emitCompoundStmtWithoutScope(cast(*s)); @@ -81,6 +83,15 @@ mlir::LogicalResult CIRGenFunction::emitSimpleStmt(const Stmt *s, return mlir::success(); } +mlir::LogicalResult CIRGenFunction::emitDeclStmt(const DeclStmt &s) { + assert(builder.getInsertionBlock() && "expected valid insertion point"); + + for (const Decl *I : s.decls()) + emitDecl(*I); + + return mlir::success(); +} + mlir::LogicalResult CIRGenFunction::emitReturnStmt(const ReturnStmt &s) { mlir::Location loc = getLoc(s.getSourceRange()); const Expr *rv = s.getRetValue(); diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h new file mode 100644 index 0000000000000..d29646983fd30 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenValue.h @@ -0,0 +1,125 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// These classes implement wrappers around mlir::Value in order to fully +// represent the range of values for C L- and R- values. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_LIB_CIR_CIRGENVALUE_H +#define CLANG_LIB_CIR_CIRGENVALUE_H + +#include "Address.h" + +#include "clang/AST/CharUnits.h" +#include "clang/AST/Type.h" + +#include "llvm/ADT/PointerIntPair.h" + +#include "mlir/IR/Value.h" + +namespace clang::CIRGen { + +/// This trivial value class is used to represent the result of an +/// expression that is evaluated. It can be one of three things: either a +/// simple MLIR SSA value, a pair of SSA values for complex numbers, or the +/// address of an aggregate value in memory. +class RValue { + enum Flavor { Scalar, Complex, Aggregate }; + + // Stores first value and flavor. + llvm::PointerIntPair v1; + // Stores second value and volatility. + llvm::PointerIntPair, 1, bool> v2; + // Stores element type for aggregate values. + mlir::Type elementType; + +public: + bool isScalar() const { return v1.getInt() == Scalar; } + + /// Return the mlir::Value of this scalar value. + mlir::Value getScalarVal() const { + assert(isScalar() && "Not a scalar!"); + return v1.getPointer(); + } + + static RValue get(mlir::Value v) { + RValue er; + er.v1.setPointer(v); + er.v1.setInt(Scalar); + er.v2.setInt(false); + return er; + } +}; + +/// The source of the alignment of an l-value; an expression of +/// confidence in the alignment actually matching the estimate. +enum class AlignmentSource { + /// The l-value was an access to a declared entity or something + /// equivalently strong, like the address of an array allocated by a + /// language runtime. + Decl, + + /// The l-value was considered opaque, so the alignment was + /// determined from a type, but that type was an explicitly-aligned + /// typedef. + AttributedType, + + /// The l-value was considered opaque, so the alignment was + /// determined from a type. + Type +}; + +class LValue { + enum { + Simple, // This is a normal l-value, use getAddress(). + VectorElt, // This is a vector element l-value (V[i]), use getVector* + BitField, // This is a bitfield l-value, use getBitfield*. + ExtVectorElt, // This is an extended vector subset, use getExtVectorComp + GlobalReg, // This is a register l-value, use getGlobalReg() + MatrixElt // This is a matrix element, use getVector* + } lvType; + clang::QualType type; + + mlir::Value v; + mlir::Type elementType; + + void initialize(clang::QualType type) { this->type = type; } + +public: + bool isSimple() const { return lvType == Simple; } + + // TODO: Add support for volatile + bool isVolatile() const { return false; } + + clang::QualType getType() const { return type; } + + mlir::Value getPointer() const { return v; } + + clang::CharUnits getAlignment() const { + // TODO: Handle alignment + return clang::CharUnits::One(); + } + + Address getAddress() const { + return Address(getPointer(), elementType, getAlignment()); + } + + static LValue makeAddr(Address address, clang::QualType t) { + LValue r; + r.lvType = Simple; + r.v = address.getPointer(); + r.elementType = address.getElementType(); + r.initialize(t); + return r; + } +}; + +} // namespace clang::CIRGen + +#endif // CLANG_LIB_CIR_CIRGENVALUE_H diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt index 5602efae1ba41..dbb6d9e7b3807 100644 --- a/clang/lib/CIR/CodeGen/CMakeLists.txt +++ b/clang/lib/CIR/CodeGen/CMakeLists.txt @@ -8,6 +8,8 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_clang_library(clangCIR CIRGenerator.cpp + CIRGenDecl.cpp + CIRGenExpr.cpp CIRGenExprScalar.cpp CIRGenFunction.cpp CIRGenModule.cpp diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 3f1be930d71e5..aa21edcb5e99d 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -117,6 +117,24 @@ static void printOmittedTerminatorRegion(mlir::OpAsmPrinter &printer, /*printBlockTerminators=*/!omitRegionTerm(region)); } +//===----------------------------------------------------------------------===// +// AllocaOp +//===----------------------------------------------------------------------===// + +void cir::AllocaOp::build(mlir::OpBuilder &odsBuilder, + mlir::OperationState &odsState, mlir::Type addr, + mlir::Type allocaType, llvm::StringRef name, + mlir::IntegerAttr alignment) { + odsState.addAttribute(getAllocaTypeAttrName(odsState.name), + mlir::TypeAttr::get(allocaType)); + odsState.addAttribute(getNameAttrName(odsState.name), + odsBuilder.getStringAttr(name)); + if (alignment) { + odsState.addAttribute(getAlignmentAttrName(odsState.name), alignment); + } + odsState.addTypes(addr); +} + //===----------------------------------------------------------------------===// // ConstantOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp new file mode 100644 index 0000000000000..af6b5e4fbd9f6 --- /dev/null +++ b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp @@ -0,0 +1,77 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements MemorySlot-related interfaces for CIR dialect +// operations. +// +//===----------------------------------------------------------------------===// + +#include "clang/CIR/Dialect/IR/CIRDialect.h" + +using namespace mlir; + +//===----------------------------------------------------------------------===// +// Interfaces for AllocaOp +//===----------------------------------------------------------------------===// + +llvm::SmallVector cir::AllocaOp::getPromotableSlots() { + return {MemorySlot{getResult(), getAllocaType()}}; +} + +Value cir::AllocaOp::getDefaultValue(const MemorySlot &slot, + OpBuilder &builder) { + return builder.create( + getLoc(), slot.elemType, builder.getAttr(slot.elemType)); +} + +void cir::AllocaOp::handleBlockArgument(const MemorySlot &slot, + BlockArgument argument, + OpBuilder &builder) {} + +std::optional +cir::AllocaOp::handlePromotionComplete(const MemorySlot &slot, + Value defaultValue, OpBuilder &builder) { + if (defaultValue && defaultValue.use_empty()) + defaultValue.getDefiningOp()->erase(); + this->erase(); + return std::nullopt; +} + +//===----------------------------------------------------------------------===// +// Interfaces for LoadOp +//===----------------------------------------------------------------------===// + +bool cir::LoadOp::loadsFrom(const MemorySlot &slot) { + return getAddr() == slot.ptr; +} + +bool cir::LoadOp::storesTo(const MemorySlot &slot) { return false; } + +Value cir::LoadOp::getStored(const MemorySlot &slot, OpBuilder &builder, + Value reachingDef, const DataLayout &dataLayout) { + llvm_unreachable("getStored should not be called on LoadOp"); +} + +bool cir::LoadOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses, + const DataLayout &dataLayout) { + if (blockingUses.size() != 1) + return false; + Value blockingUse = (*blockingUses.begin())->get(); + return blockingUse == slot.ptr && getAddr() == slot.ptr && + getResult().getType() == slot.elemType; +} + +DeletionKind cir::LoadOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder, Value reachingDefinition, + const DataLayout &dataLayout) { + getResult().replaceAllUsesWith(reachingDefinition); + return DeletionKind::Delete; +} diff --git a/clang/lib/CIR/Dialect/IR/CMakeLists.txt b/clang/lib/CIR/Dialect/IR/CMakeLists.txt index baf8bff185221..925af0d61c984 100644 --- a/clang/lib/CIR/Dialect/IR/CMakeLists.txt +++ b/clang/lib/CIR/Dialect/IR/CMakeLists.txt @@ -1,6 +1,7 @@ add_clang_library(MLIRCIR CIRAttrs.cpp CIRDialect.cpp + CIRMemorySlot.cpp CIRTypes.cpp DEPENDS diff --git a/clang/test/CIR/CodeGen/basic.cpp b/clang/test/CIR/CodeGen/basic.cpp new file mode 100644 index 0000000000000..210afcd541159 --- /dev/null +++ b/clang/test/CIR/CodeGen/basic.cpp @@ -0,0 +1,27 @@ +// RUN: not %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - 2>&1 | FileCheck %s + +// This error is caused by the "const int i = 2" line in f2(). When +// initaliziers are implemented, the checks there should be updated +// and the "not" should be removed from the run line. +// CHECK: error: ClangIR code gen Not Yet Implemented: emitAutoVarInit + +int f1() { + int i; + return i; +} + +// CHECK: module +// CHECK: cir.func @f1() -> !cir.int +// CHECK: %[[I_PTR:.*]] = cir.alloca !cir.int, !cir.ptr>, ["i"] {alignment = 4 : i64} +// CHECK: %[[I:.*]] = cir.load %[[I_PTR]] : !cir.ptr>, !cir.int +// CHECK: cir.return %[[I]] : !cir.int + +int f2() { + const int i = 2; + return i; +} + +// CHECK: cir.func @f2() -> !cir.int +// CHECK: %[[I_PTR:.*]] = cir.alloca !cir.int, !cir.ptr>, ["i", const] {alignment = 4 : i64} +// CHECK: %[[I:.*]] = cir.load %[[I_PTR]] : !cir.ptr>, !cir.int +// CHECK: cir.return %[[I]] : !cir.int From b73a57f8296d119ae28460f924d54a05f36a1c7a Mon Sep 17 00:00:00 2001 From: vporpo Date: Thu, 27 Feb 2025 14:25:06 -0800 Subject: [PATCH 005/123] [SandboxIR][Region][NFC] Fix windows build issue (#129082) This should fix the issue reported here: https://discourse.llvm.org/t/second-stage-of-build-on-windows-fails-in-sandboxir/84841 --- llvm/lib/SandboxIR/Region.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/SandboxIR/Region.cpp b/llvm/lib/SandboxIR/Region.cpp index 086993e6dc872..2eb84bd72ed00 100644 --- a/llvm/lib/SandboxIR/Region.cpp +++ b/llvm/lib/SandboxIR/Region.cpp @@ -64,7 +64,7 @@ void Region::setAux(ArrayRef Aux) { auto &LLVMCtx = Ctx.LLVMCtx; for (auto [Idx, I] : enumerate(Aux)) { llvm::ConstantInt *IdxC = - llvm::ConstantInt::get(LLVMCtx, llvm::APInt(32, Idx, false)); + llvm::ConstantInt::get(llvm::Type::getInt32Ty(LLVMCtx), Idx, false); assert(cast(I->Val)->getMetadata(AuxMDKind) == nullptr && "Instruction already in Aux!"); cast(I->Val)->setMetadata( From 0b80ddaf4d36c2c395ce60eb77cd8b374171ce23 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:27:19 -0800 Subject: [PATCH 006/123] =?UTF-8?q?[flang]=20Refine=20handling=20of=20NULL?= =?UTF-8?q?()=20actual=20to=20non-optional=20allocatable=20=E2=80=A6=20(#1?= =?UTF-8?q?16126)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …dummy We presently allow a NULL() actual argument to associate with a non-optional dummy allocatable argument only under INTENT(IN). This is too strict, as it precludes the case of a dummy argument with default intent. Continue to require that the actual argument be definable under INTENT(OUT) and INTENT(IN OUT), and (contra XLF) interpret NULL() as being an expression, not a definable variable, even when it is given an allocatable MOLD. Fixes https://github.com/llvm/llvm-project/issues/115984. --- .../include/flang/Support/Fortran-features.h | 3 +- flang/lib/Semantics/check-call.cpp | 51 ++++++++++--------- flang/lib/Support/Fortran-features.cpp | 2 + flang/test/Semantics/call27.f90 | 16 +++++- 4 files changed, 47 insertions(+), 25 deletions(-) diff --git a/flang/include/flang/Support/Fortran-features.h b/flang/include/flang/Support/Fortran-features.h index 44ba6428e6c93..356623c643e46 100644 --- a/flang/include/flang/Support/Fortran-features.h +++ b/flang/include/flang/Support/Fortran-features.h @@ -74,7 +74,8 @@ ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable, IndexVarRedefinition, IncompatibleImplicitInterfaces, VectorSubscriptFinalization, UndefinedFunctionResult, UselessIomsg, MismatchingDummyProcedure, SubscriptedEmptyArray, UnsignedLiteralTruncation, - CompatibleDeclarationsFromDistinctModules) + CompatibleDeclarationsFromDistinctModules, + NullActualForDefaultIntentAllocatable) using LanguageFeatures = EnumSet; using UsageWarnings = EnumSet; diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index e396ece303103..93ae05e2902f0 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -793,21 +793,21 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, } } else if (actualIsNull) { if (dummyIsOptional) { - } else if (dummy.intent == common::Intent::In) { - // Extension (Intel, NAG, XLF): a NULL() pointer is an acceptable - // actual argument for an INTENT(IN) allocatable dummy, and it - // is treated as an unassociated allocatable. - if (context.ShouldWarn( - common::LanguageFeature::NullActualForAllocatable)) { - messages.Say(common::LanguageFeature::NullActualForAllocatable, - "Allocatable %s is associated with a null pointer"_port_en_US, - dummyName); - } - } else { + } else if (dummy.intent == common::Intent::Default && + context.ShouldWarn( + common::UsageWarning::NullActualForDefaultIntentAllocatable)) { messages.Say( - "A null pointer may not be associated with allocatable %s without INTENT(IN)"_err_en_US, + "A null pointer should not be associated with allocatable %s without INTENT(IN)"_warn_en_US, + dummyName); + } else if (dummy.intent == common::Intent::In && + context.ShouldWarn( + common::LanguageFeature::NullActualForAllocatable)) { + messages.Say(common::LanguageFeature::NullActualForAllocatable, + "Allocatable %s is associated with a null pointer"_port_en_US, dummyName); } + // INTENT(OUT) and INTENT(IN OUT) cases are caught elsewhere as being + // undefinable actual arguments. } else { messages.Say( "ALLOCATABLE %s must be associated with an ALLOCATABLE actual argument"_err_en_US, @@ -1292,19 +1292,24 @@ static void CheckExplicitInterfaceArg(evaluate::ActualArgument &arg, } else if (object.attrs.test(characteristics::DummyDataObject:: Attr::Allocatable) && evaluate::IsNullPointer(*expr)) { - if (object.intent == common::Intent::In) { - // Extension (Intel, NAG, XLF); see CheckExplicitDataArg. - if (context.ShouldWarn(common::LanguageFeature:: - NullActualForAllocatable)) { - messages.Say( - common::LanguageFeature::NullActualForAllocatable, - "Allocatable %s is associated with NULL()"_port_en_US, - dummyName); - } - } else { + if (object.intent == common::Intent::Out || + object.intent == common::Intent::InOut) { messages.Say( - "NULL() actual argument '%s' may not be associated with allocatable %s without INTENT(IN)"_err_en_US, + "NULL() actual argument '%s' may not be associated with allocatable dummy argument %s that is INTENT(OUT) or INTENT(IN OUT)"_err_en_US, expr->AsFortran(), dummyName); + } else if (object.intent == common::Intent::Default && + context.ShouldWarn(common::UsageWarning:: + NullActualForDefaultIntentAllocatable)) { + messages.Say(common::UsageWarning:: + NullActualForDefaultIntentAllocatable, + "NULL() actual argument '%s' should not be associated with allocatable dummy argument %s without INTENT(IN)"_warn_en_US, + expr->AsFortran(), dummyName); + } else if (context.ShouldWarn(common::LanguageFeature:: + NullActualForAllocatable)) { + messages.Say( + common::LanguageFeature::NullActualForAllocatable, + "Allocatable %s is associated with %s"_port_en_US, + dummyName, expr->AsFortran()); } } else { messages.Say( diff --git a/flang/lib/Support/Fortran-features.cpp b/flang/lib/Support/Fortran-features.cpp index bbeb4b15a0486..4bc92f3924ef6 100644 --- a/flang/lib/Support/Fortran-features.cpp +++ b/flang/lib/Support/Fortran-features.cpp @@ -84,8 +84,10 @@ LanguageFeatureControl::LanguageFeatureControl() { warnUsage_.set(UsageWarning::UndefinedFunctionResult); warnUsage_.set(UsageWarning::UselessIomsg); warnUsage_.set(UsageWarning::UnsignedLiteralTruncation); + warnUsage_.set(UsageWarning::NullActualForDefaultIntentAllocatable); // New warnings, on by default warnLanguage_.set(LanguageFeature::SavedLocalInSpecExpr); + warnLanguage_.set(LanguageFeature::NullActualForAllocatable); } // Ignore case and any inserted punctuation (like '-'/'_') diff --git a/flang/test/Semantics/call27.f90 b/flang/test/Semantics/call27.f90 index 062df6e45da89..135d6c06dcb4a 100644 --- a/flang/test/Semantics/call27.f90 +++ b/flang/test/Semantics/call27.f90 @@ -1,12 +1,26 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic ! Catch NULL() actual argument association with allocatable dummy argument program test - !ERROR: NULL() actual argument 'NULL()' may not be associated with allocatable dummy argument 'a=' without INTENT(IN) + real, allocatable :: a + !ERROR: NULL() actual argument 'NULL()' may not be associated with allocatable dummy argument dummy argument 'a=' that is INTENT(OUT) or INTENT(IN OUT) + call foo0(null()) + !WARNING: NULL() actual argument 'NULL()' should not be associated with allocatable dummy argument dummy argument 'a=' without INTENT(IN) call foo1(null()) !PORTABILITY: Allocatable dummy argument 'a=' is associated with NULL() call foo2(null()) call foo3(null()) ! ok + !ERROR: Actual argument associated with INTENT(IN OUT) dummy argument 'a=' is not definable + !BECAUSE: 'null(mold=a)' is a null pointer + call foo0(null(mold=a)) + !WARNING: A null pointer should not be associated with allocatable dummy argument 'a=' without INTENT(IN) + call foo1(null(mold=a)) + !PORTABILITY: Allocatable dummy argument 'a=' is associated with a null pointer + call foo2(null(mold=a)) + call foo3(null(mold=a)) ! ok contains + subroutine foo0(a) + real, allocatable, intent(in out) :: a + end subroutine subroutine foo1(a) real, allocatable :: a end subroutine From ad644dfc27eddbaa0817b52362dfaf72758b6f23 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:27:46 -0800 Subject: [PATCH 007/123] [flang] Support COSHAPE() intrinsic function (#125286) Enable COSHAPE in the intrinsics table and enable its test. --- flang/lib/Evaluate/intrinsics.cpp | 4 ++-- flang/test/Semantics/coshape.f90 | 31 +++++++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index 6d8f19388d8b7..e55a22dce8e99 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -421,6 +421,8 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"cos", {{"x", SameFloating}}, SameFloating}, {"cosd", {{"x", SameFloating}}, SameFloating}, {"cosh", {{"x", SameFloating}}, SameFloating}, + {"coshape", {{"coarray", AnyData, Rank::coarray}, SizeDefaultKIND}, KINDInt, + Rank::vector, IntrinsicClass::inquiryFunction}, {"count", {{"mask", AnyLogical, Rank::array}, OptionalDIM, DefaultingKIND}, KINDInt, Rank::dimReduced, IntrinsicClass::transformationalFunction}, {"cshift", @@ -1054,8 +1056,6 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"__builtin_numeric_storage_size", {}, DefaultInt}, }; -// TODO: Coarray intrinsic functions -// COSHAPE // TODO: Non-standard intrinsic functions // SHIFT, // COMPL, EQV, NEQV, INT8, JINT, JNINT, KNINT, diff --git a/flang/test/Semantics/coshape.f90 b/flang/test/Semantics/coshape.f90 index 476000b56411c..d4fb45df6600c 100644 --- a/flang/test/Semantics/coshape.f90 +++ b/flang/test/Semantics/coshape.f90 @@ -1,5 +1,4 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 -! XFAIL: * ! Check for semantic errors in coshape() function, ! as defined in section 16.9.55 of the Fortran ! 2018 standard @@ -8,18 +7,21 @@ program coshape_tests use iso_c_binding, only : c_int32_t, c_int64_t implicit none + type t + real x + end type integer array(1), non_coarray(1), scalar_coarray[*], array_coarray(1)[*], non_constant, scalar_result real real_coarray[*] complex complex_coarray[*] character char_array(1) logical non_integer, logical_coarray[*] + type(t) derived_scalar_coarray[*], derived_array_coarray(1)[*] integer, allocatable :: codimensions(:) !___ standard-conforming statement with no optional arguments present ___ codimensions = coshape(scalar_coarray) codimensions = coshape(array_coarray) codimensions = coshape(array_coarray(1)) - codimensions = coshape(scalar_coarray[1]) codimensions = coshape(real_coarray) codimensions = coshape(logical_coarray) codimensions = coshape(complex_coarray) @@ -33,54 +35,79 @@ program coshape_tests !___ non-conforming statements ___ ! coarray argument must be a coarray + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' codimensions = coshape(non_coarray) + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' + codimensions = coshape(derived_scalar_coarray[1]%x) + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' + codimensions = coshape(derived_array_coarray[1]%x) + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' + codimensions = coshape(array_coarray[1]) + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' + codimensions = coshape(scalar_coarray[1]) ! kind argument must be an integer + !ERROR: Actual argument for 'kind=' has bad type 'LOGICAL(4)' codimensions = coshape(scalar_coarray, non_integer) ! kind argument must be a constant expression + !ERROR: 'kind=' argument must be a constant scalar integer whose value is a supported kind for the intrinsic result type codimensions = coshape(real_coarray, non_constant) ! kind argument must be an integer scalar + !ERROR: 'kind=' argument has unacceptable rank 1 codimensions = coshape(complex_coarray, array) ! missing all arguments + !ERROR: missing mandatory 'coarray=' argument codimensions = coshape() ! missing mandatory argument + !ERROR: missing mandatory 'coarray=' argument codimensions = coshape(kind=c_int32_t) ! incorrect typing for mandatory argument + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' codimensions = coshape(3.4) ! incorrect typing for coarray argument + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' codimensions = coshape(coarray=3.4) ! too many arguments + !ERROR: too many actual arguments for intrinsic 'coshape' codimensions = coshape(scalar_coarray, c_int32_t, 0) ! incorrect typing with correct keyword for coarray argument + !ERROR: 'coarray=' argument must have corank > 0 for intrinsic 'coshape' codimensions = coshape(coarray=non_coarray) ! correct typing with incorrect keyword for coarray argument + !ERROR: unknown keyword argument to intrinsic 'coshape' codimensions = coshape(c=real_coarray) ! incorrect typing with correct keyword for kind argument + !ERROR: Actual argument for 'kind=' has bad type 'LOGICAL(4)' codimensions = coshape(complex_coarray, kind=non_integer) ! correct typing with incorrect keyword for kind argument + !ERROR: unknown keyword argument to intrinsic 'coshape' codimensions = coshape(logical_coarray, kinds=c_int32_t) ! repeated keyword for coarray argument + !ERROR: repeated keyword argument to intrinsic 'coshape' codimensions = coshape(coarray=scalar_coarray, coarray=real_coarray) ! repeated keyword for kind argument + !ERROR: repeated keyword argument to intrinsic 'coshape' codimensions = coshape(real_coarray, kind=c_int32_t, kind=c_int64_t) ! result must be a rank 1 array + !ERROR: No intrinsic or user-defined ASSIGNMENT(=) matches scalar INTEGER(4) and rank 1 array of INTEGER(4) scalar_result = coshape(scalar_coarray) ! result must be an integer array + !ERROR: No intrinsic or user-defined ASSIGNMENT(=) matches operand types CHARACTER(KIND=1) and INTEGER(4) char_array = coshape(real_coarray) end program coshape_tests From efa86103e78abd089ac5a8baa20b0f8d5e5263f2 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:28:08 -0800 Subject: [PATCH 008/123] [flang] Catch more semantic errors with coarrays (#125536) Detect and report a bunch of uncaught semantic errors with coarray declarations. Add more tests, and clean up bad usage in existing tests. --- flang/include/flang/Semantics/tools.h | 2 + flang/lib/Evaluate/tools.cpp | 6 +- flang/lib/Semantics/check-declarations.cpp | 98 ++++++++++++++++++---- flang/lib/Semantics/resolve-names.cpp | 41 ++++----- flang/lib/Semantics/tools.cpp | 7 ++ flang/test/Lower/pre-fir-tree04.f90 | 1 + flang/test/Semantics/allocate11.f90 | 18 +++- flang/test/Semantics/assign02.f90 | 6 +- flang/test/Semantics/associated.f90 | 2 +- flang/test/Semantics/bind-c09.f90 | 2 +- flang/test/Semantics/call10.f90 | 5 +- flang/test/Semantics/call12.f90 | 6 +- flang/test/Semantics/change_team01.f90 | 1 + flang/test/Semantics/coarrays01.f90 | 6 +- flang/test/Semantics/coarrays02.f90 | 50 +++++++++++ flang/test/Semantics/critical02.f90 | 2 +- flang/test/Semantics/doconcurrent01.f90 | 7 +- flang/test/Semantics/doconcurrent08.f90 | 4 +- flang/test/Semantics/form_team01.f90 | 3 +- flang/test/Semantics/init01.f90 | 1 + flang/test/Semantics/resolve07.f90 | 1 + flang/test/Semantics/resolve50.f90 | 3 +- flang/test/Semantics/resolve55.f90 | 2 +- flang/test/Semantics/resolve88.f90 | 6 +- flang/test/Semantics/resolve94.f90 | 1 + flang/test/Semantics/this_image01.f90 | 2 +- 26 files changed, 207 insertions(+), 76 deletions(-) create mode 100644 flang/test/Semantics/coarrays02.f90 diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 56dcfa88ad92d..16fd8d158b0e0 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -631,6 +631,8 @@ using PotentialAndPointerComponentIterator = // dereferenced. PotentialComponentIterator::const_iterator FindEventOrLockPotentialComponent( const DerivedTypeSpec &, bool ignoreCoarrays = false); +PotentialComponentIterator::const_iterator FindCoarrayPotentialComponent( + const DerivedTypeSpec &); UltimateComponentIterator::const_iterator FindCoarrayUltimateComponent( const DerivedTypeSpec &); UltimateComponentIterator::const_iterator FindPointerUltimateComponent( diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index 16b0260719097..7181265b862fb 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1812,7 +1812,11 @@ bool IsSaved(const Symbol &original) { } else if (scopeKind == Scope::Kind::DerivedType) { return false; // this is a component } else if (symbol.attrs().test(Attr::SAVE)) { - return true; // explicit SAVE attribute + // explicit or implied SAVE attribute + // N.B.: semantics sets implied SAVE for main program + // local variables whose derived types have coarray + // potential subobject components. + return true; } else if (IsDummy(symbol) || IsFunctionResult(symbol) || IsAutomatic(symbol) || IsNamedConstant(symbol)) { return false; diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index bf4dc16a15b4a..40a529b37e7dc 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -683,20 +683,10 @@ void CheckHelper::CheckObjectEntity( const DeclTypeSpec *type{details.type()}; const DerivedTypeSpec *derived{type ? type->AsDerived() : nullptr}; bool isComponent{symbol.owner().IsDerivedType()}; - if (details.coshape().empty()) { // not a coarray - if (!isComponent && !IsPointer(symbol) && derived) { - if (IsEventTypeOrLockType(derived)) { - messages_.Say( - "Variable '%s' with EVENT_TYPE or LOCK_TYPE must be a coarray"_err_en_US, - symbol.name()); - } else if (auto component{FindEventOrLockPotentialComponent( - *derived, /*ignoreCoarrays=*/true)}) { - messages_.Say( - "Variable '%s' with EVENT_TYPE or LOCK_TYPE potential component '%s' must be a coarray"_err_en_US, - symbol.name(), component.BuildResultDesignatorName()); - } - } - } else { // it's a coarray + const Symbol *commonBlock{FindCommonBlockContaining(symbol)}; + bool isLocalVariable{!commonBlock && !isComponent && !details.isDummy() && + symbol.owner().kind() != Scope::Kind::OtherConstruct}; + if (int corank{evaluate::GetCorank(symbol)}; corank > 0) { // it's a coarray bool isDeferredCoshape{details.coshape().CanBeDeferredShape()}; if (IsAllocatable(symbol)) { if (!isDeferredCoshape) { // C827 @@ -726,6 +716,46 @@ void CheckHelper::CheckObjectEntity( messages_.Say("Coarray '%s' may not be an assumed-rank array"_err_en_US, symbol.name()); } + if (IsNamedConstant(symbol)) { + messages_.Say( + "Coarray '%s' may not be a named constant"_err_en_US, symbol.name()); + } + if (IsFunctionResult(symbol)) { + messages_.Say("Function result may not be a coarray"_err_en_US); + } else if (commonBlock) { + messages_.Say("Coarray '%s' may not be in COMMON block '/%s/'"_err_en_US, + symbol.name(), commonBlock->name()); + } else if (isLocalVariable && !IsAllocatableOrPointer(symbol) && + !IsSaved(symbol)) { + messages_.Say("Local coarray must have the SAVE attribute"_err_en_US); + } + for (int j{0}; j < corank; ++j) { + if (auto lcbv{evaluate::ToInt64(evaluate::Fold( + context().foldingContext(), evaluate::GetLCOBOUND(symbol, j)))}) { + if (auto ucbv{ + evaluate::ToInt64(evaluate::Fold(context().foldingContext(), + evaluate::GetUCOBOUND(symbol, j)))}) { + if (ucbv < lcbv) { + messages_.Say( + "Cobounds %jd:%jd of codimension %d produce an empty coarray"_err_en_US, + std::intmax_t{*lcbv}, std::intmax_t{*ucbv}, j + 1); + } + } + } + } + } else { // not a coarray + if (!isComponent && !IsPointer(symbol) && derived) { + if (IsEventTypeOrLockType(derived)) { + messages_.Say( + "Variable '%s' with EVENT_TYPE or LOCK_TYPE must be a coarray"_err_en_US, + symbol.name()); + } else if (auto component{FindEventOrLockPotentialComponent( + *derived, /*ignoreCoarrays=*/true)}) { + messages_.Say( + "Variable '%s' with EVENT_TYPE or LOCK_TYPE potential component '%s' must be a coarray"_err_en_US, + symbol.name(), component.BuildResultDesignatorName()); + } + } } if (details.isDummy()) { if (IsIntentOut(symbol)) { @@ -926,6 +956,42 @@ void CheckHelper::CheckObjectEntity( symbol.name()); } + if (derived) { + bool isUnsavedLocal{ + isLocalVariable && !IsAllocatable(symbol) && !IsSaved(symbol)}; + if (IsFunctionResult(symbol) || IsPointer(symbol) || + evaluate::IsCoarray(symbol) || isUnsavedLocal) { + if (auto badPotential{FindCoarrayPotentialComponent(*derived)}) { + if (IsFunctionResult(symbol)) { // F'2023 C825 + SayWithDeclaration(*badPotential, + "Function result '%s' may not have a coarray potential component '%s'"_err_en_US, + symbol.name(), badPotential.BuildResultDesignatorName()); + } else if (IsPointer(symbol)) { // F'2023 C825 + SayWithDeclaration(*badPotential, + "Pointer '%s' may not have a coarray potential component '%s'"_err_en_US, + symbol.name(), badPotential.BuildResultDesignatorName()); + } else if (evaluate::IsCoarray(symbol)) { // F'2023 C825 + SayWithDeclaration(*badPotential, + "Coarray '%s' may not have a coarray potential component '%s'"_err_en_US, + symbol.name(), badPotential.BuildResultDesignatorName()); + } else if (isUnsavedLocal) { // F'2023 C826 + SayWithDeclaration(*badPotential, + "Local variable '%s' without the SAVE attribute may not have a coarray potential subobject component '%s'"_err_en_US, + symbol.name(), badPotential.BuildResultDesignatorName()); + } else { + DIE("caught unexpected bad coarray potential component"); + } + } + } else if (isComponent && (IsAllocatable(symbol) || symbol.Rank() > 0)) { + if (auto badUltimate{FindCoarrayUltimateComponent(*derived)}) { + // TODO: still an error in F'2023? + SayWithDeclaration(*badUltimate, + "Allocatable or array component '%s' may not have a coarray ultimate component '%s'"_err_en_US, + symbol.name(), badUltimate.BuildResultDesignatorName()); + } + } + } + // Check CUDA attributes and special circumstances of being in device // subprograms const Scope &progUnit{GetProgramUnitContaining(symbol)}; @@ -3161,10 +3227,6 @@ parser::Messages CheckHelper::WhyNotInteroperableFunctionResult( msgs.Say(symbol.name(), "Interoperable function result must be scalar"_err_en_US); } - if (symbol.Corank()) { - msgs.Say(symbol.name(), - "Interoperable function result may not be a coarray"_err_en_US); - } return msgs; } diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 17a6665dfb6a5..3fc18d1b7e219 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -6127,32 +6127,6 @@ void DeclarationVisitor::Post(const parser::ComponentDecl &x) { "POINTER or ALLOCATABLE"_err_en_US); } } - // TODO: This would be more appropriate in CheckDerivedType() - if (auto it{FindCoarrayUltimateComponent(*derived)}) { // C748 - std::string ultimateName{it.BuildResultDesignatorName()}; - // Strip off the leading "%" - if (ultimateName.length() > 1) { - ultimateName.erase(0, 1); - if (attrs.HasAny({Attr::POINTER, Attr::ALLOCATABLE})) { - evaluate::AttachDeclaration( - Say(name.source, - "A component with a POINTER or ALLOCATABLE attribute may " - "not " - "be of a type with a coarray ultimate component (named " - "'%s')"_err_en_US, - ultimateName), - derived->typeSymbol()); - } - if (!arraySpec().empty() || !coarraySpec().empty()) { - evaluate::AttachDeclaration( - Say(name.source, - "An array or coarray component may not be of a type with a " - "coarray ultimate component (named '%s')"_err_en_US, - ultimateName), - derived->typeSymbol()); - } - } - } } } if (OkToAddComponent(name)) { @@ -9889,6 +9863,21 @@ void ResolveNamesVisitor::ResolveSpecificationParts(ProgramTree &node) { object->set_cudaDataAttr(common::CUDADataAttr::Device); } } + // Main program local objects usually don't have an implied SAVE attribute, + // as one might think, but in the exceptional case of a derived type + // local object that contains a coarray, we have to mark it as an + // implied SAVE so that evaluate::IsSaved() will return true. + if (node.scope()->kind() == Scope::Kind::MainProgram) { + if (const auto *object{symbol.detailsIf()}) { + if (const DeclTypeSpec * type{object->type()}) { + if (const DerivedTypeSpec * derived{type->AsDerived()}) { + if (!IsSaved(symbol) && FindCoarrayPotentialComponent(*derived)) { + SetImplicitAttr(symbol, Attr::SAVE); + } + } + } + } + } } } diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 7544731a682ec..5bb8bae83a787 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -1386,6 +1386,13 @@ template class ComponentIterator; template class ComponentIterator; template class ComponentIterator; +PotentialComponentIterator::const_iterator FindCoarrayPotentialComponent( + const DerivedTypeSpec &derived) { + PotentialComponentIterator potentials{derived}; + return std::find_if(potentials.begin(), potentials.end(), + [](const Symbol &symbol) { return evaluate::IsCoarray(symbol); }); +} + UltimateComponentIterator::const_iterator FindCoarrayUltimateComponent( const DerivedTypeSpec &derived) { UltimateComponentIterator ultimates{derived}; diff --git a/flang/test/Lower/pre-fir-tree04.f90 b/flang/test/Lower/pre-fir-tree04.f90 index e5f8042458542..07077ff0473dd 100644 --- a/flang/test/Lower/pre-fir-tree04.f90 +++ b/flang/test/Lower/pre-fir-tree04.f90 @@ -5,6 +5,7 @@ ! CHECK: Subroutine test_coarray Subroutine test_coarray use iso_fortran_env, only: team_type, event_type, lock_type + save type(team_type) :: t type(event_type) :: done[*] type(lock_type) :: alock[*] diff --git a/flang/test/Semantics/allocate11.f90 b/flang/test/Semantics/allocate11.f90 index 6440248b6f4a9..1b7495e9fc07d 100644 --- a/flang/test/Semantics/allocate11.f90 +++ b/flang/test/Semantics/allocate11.f90 @@ -38,7 +38,14 @@ subroutine C937(var) type B type(A) y - !ERROR: A component with a POINTER or ALLOCATABLE attribute may not be of a type with a coarray ultimate component (named 'y%x') + !ERROR: Allocatable or array component 'forward' may not have a coarray ultimate component '%y%x' + type(B), allocatable :: forward + real :: u + end type + + type B2 + type(A) y + !ERROR: Pointer 'forward' may not have a coarray potential component '%y%x' type(B), pointer :: forward real :: u end type @@ -48,11 +55,14 @@ subroutine C937(var) end type type D - !ERROR: A component with a POINTER or ALLOCATABLE attribute may not be of a type with a coarray ultimate component (named 'x') - type(A), pointer :: potential + !ERROR: Allocatable or array component 'potential' may not have a coarray ultimate component '%x' + type(A), allocatable :: potential end type - + type D2 + !ERROR: Pointer 'potential' may not have a coarray potential component '%x' + type(A), pointer :: potential + end type class(*), allocatable :: var ! unlimited polymorphic is the ONLY way to get an allocatable/pointer 'var' that can be diff --git a/flang/test/Semantics/assign02.f90 b/flang/test/Semantics/assign02.f90 index 707d5ed3cfaa5..a40d204982b2f 100644 --- a/flang/test/Semantics/assign02.f90 +++ b/flang/test/Semantics/assign02.f90 @@ -74,8 +74,8 @@ subroutine s4(x) ! C1020 subroutine s5 - real, target :: x[*] - real, target, volatile :: y[*] + real, target, save :: x[*] + real, target, volatile, save :: y[*] real, pointer :: p real, pointer, volatile :: q p => x @@ -148,7 +148,7 @@ function f2() ! C1026 (R1037) A data-target shall not be a coindexed object. subroutine s10 - real, target :: a[*] + real, target, save :: a[*] real, pointer :: b !ERROR: A coindexed object may not be a pointer target b => a[1] diff --git a/flang/test/Semantics/associated.f90 b/flang/test/Semantics/associated.f90 index 1432744806599..c814980377b9f 100644 --- a/flang/test/Semantics/associated.f90 +++ b/flang/test/Semantics/associated.f90 @@ -90,7 +90,7 @@ subroutine test(assumedRank) type(t2) :: t2x type(t2), target :: t2xtarget integer, target :: targetIntArr(2) - integer, target :: targetIntCoarray[*] + integer, target, save :: targetIntCoarray[*] integer, pointer :: intPointerArr(:) procedure(objPtrFunc), pointer :: objPtrFuncPointer diff --git a/flang/test/Semantics/bind-c09.f90 b/flang/test/Semantics/bind-c09.f90 index 953f2d751234f..e08e4f001c696 100644 --- a/flang/test/Semantics/bind-c09.f90 +++ b/flang/test/Semantics/bind-c09.f90 @@ -44,6 +44,6 @@ function func8() result(res) bind(c) end function func9() result(res) bind(c) - ! ERROR: Interoperable function result may not be a coarray + ! ERROR: Function result may not be a coarray integer :: res[10, *] end diff --git a/flang/test/Semantics/call10.f90 b/flang/test/Semantics/call10.f90 index 2d2f57934cd8a..81c28082a843f 100644 --- a/flang/test/Semantics/call10.f90 +++ b/flang/test/Semantics/call10.f90 @@ -200,8 +200,9 @@ pure subroutine s13 !ERROR: An image control statement may not appear in a pure subprogram sync all ! C1599 end subroutine - pure subroutine s14 - integer :: img, nimgs, i[*], tmp + pure subroutine s14(i) + integer :: img, nimgs, tmp + integer, intent(in out) :: i[*] ! implicit sync all img = this_image() nimgs = num_images() diff --git a/flang/test/Semantics/call12.f90 b/flang/test/Semantics/call12.f90 index 2e5591ad927da..cd4006a53b3e7 100644 --- a/flang/test/Semantics/call12.f90 +++ b/flang/test/Semantics/call12.f90 @@ -40,7 +40,9 @@ pure function test(ptr, in, hpd, hhpd) type(hasHiddenPtr), intent(in) :: hhpd type(hasPtr), allocatable :: alloc type(hasHiddenPtr), allocatable :: hpAlloc + !ERROR: Pointer 'hcp' may not have a coarray potential component '%co' type(hasCoarray), pointer :: hcp + type(hasCoarray), allocatable :: hca integer :: n common /block/ y external :: extfunc @@ -60,8 +62,8 @@ pure function test(ptr, in, hpd, hhpd) !BECAUSE: 'in' is an INTENT(IN) dummy argument in%a = 0. ! C1594(1) !ERROR: Left-hand side of assignment is not definable - !BECAUSE: A pure subprogram may not define the coindexed object 'hcp%co[1_8]' - hcp%co[1] = 0. ! C1594(1) + !BECAUSE: A pure subprogram may not define the coindexed object 'hca%co[1_8]' + hca%co[1] = 0. ! C1594(1) !ERROR: The left-hand side of a pointer assignment is not definable !BECAUSE: 'ptr' may not be defined in pure subprogram 'test' because it is a POINTER dummy argument of a pure function ptr => z ! C1594(2) diff --git a/flang/test/Semantics/change_team01.f90 b/flang/test/Semantics/change_team01.f90 index 43be1c10fb842..a5e53e98fc986 100644 --- a/flang/test/Semantics/change_team01.f90 +++ b/flang/test/Semantics/change_team01.f90 @@ -4,6 +4,7 @@ subroutine test use, intrinsic :: iso_fortran_env, only: team_type + save type(team_type) :: team integer, codimension[*] :: selector integer, codimension[2,*] :: selector2d diff --git a/flang/test/Semantics/coarrays01.f90 b/flang/test/Semantics/coarrays01.f90 index 0a6f88a7e748c..0dfcd1a41c95d 100644 --- a/flang/test/Semantics/coarrays01.f90 +++ b/flang/test/Semantics/coarrays01.f90 @@ -2,7 +2,7 @@ ! Test selector and team-value in CHANGE TEAM statement ! OK -subroutine s1 +subroutine s1(y) use iso_fortran_env, only: team_type type(team_type) :: t real :: y[10,*] @@ -11,7 +11,7 @@ subroutine s1 form team(1, t) end -subroutine s2 +subroutine s2(y,y2,x) use iso_fortran_env type(team_type) :: t real :: y[10,*], y2[*], x[*] @@ -27,7 +27,7 @@ subroutine s2 end team end -subroutine s3 +subroutine s3(y) type :: team_type end type type :: foo diff --git a/flang/test/Semantics/coarrays02.f90 b/flang/test/Semantics/coarrays02.f90 new file mode 100644 index 0000000000000..e52f3e3ef3a40 --- /dev/null +++ b/flang/test/Semantics/coarrays02.f90 @@ -0,0 +1,50 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! More coarray error tests. +module m + integer :: local[*] ! ok in module +end +program main + use iso_fortran_env + !ERROR: Coarray 'namedconst' may not be a named constant + !ERROR: Local coarray must have the SAVE attribute + integer, parameter :: namedConst = 123 + codimension namedConst[*] + !ERROR: Coarray 'coarr1' may not be in COMMON block '//' + real :: coarr1[*] + common//coarr1 + !ERROR: Variable 'event' with EVENT_TYPE or LOCK_TYPE must be a coarray + type(event_type) event + !ERROR: Variable 'lock' with EVENT_TYPE or LOCK_TYPE must be a coarray + type(lock_type) lock + integer :: local[*] ! ok in main +end + +function func1() + !ERROR: Function result may not be a coarray + integer :: func1[*] + !ERROR: Local coarray must have the SAVE attribute + integer :: local[*] + integer, save :: saved[*] ! ok + integer :: inited[*] = 1 ! ok + func = 1 +end + +function func2() + type t + real, allocatable :: comp[:] + end type + type t2 + !ERROR: Allocatable or array component 'allo' may not have a coarray ultimate component '%comp' + type(t), allocatable :: allo + !ERROR: Allocatable or array component 'arr' may not have a coarray ultimate component '%comp' + type(t) :: arr(1) + end type + !ERROR: Function result 'func2' may not have a coarray potential component '%comp' + type(t) func2 + !ERROR: Pointer 'ptr' may not have a coarray potential component '%comp' + type(t), pointer :: ptr + !ERROR: Coarray 'coarr' may not have a coarray potential component '%comp' + type(t), save :: coarr[*] + !ERROR: Local variable 'local' without the SAVE attribute may not have a coarray potential subobject component '%comp' + type(t) :: local +end diff --git a/flang/test/Semantics/critical02.f90 b/flang/test/Semantics/critical02.f90 index 692b06b025861..9c957d1e859c5 100644 --- a/flang/test/Semantics/critical02.f90 +++ b/flang/test/Semantics/critical02.f90 @@ -61,7 +61,7 @@ end subroutine test6 subroutine test7() use iso_fortran_env - type(event_type) :: x[*], y[*] + type(event_type), save :: x[*], y[*] critical !ERROR: An image control statement is not allowed in a CRITICAL construct event post (x) diff --git a/flang/test/Semantics/doconcurrent01.f90 b/flang/test/Semantics/doconcurrent01.f90 index 9d2c9e1ab3115..ab14d970b8501 100644 --- a/flang/test/Semantics/doconcurrent01.f90 +++ b/flang/test/Semantics/doconcurrent01.f90 @@ -69,7 +69,7 @@ end subroutine do_concurrent_test2 subroutine s1() use iso_fortran_env - type(event_type) :: x[*] + type(event_type), save :: x[*] do concurrent (i = 1:n) !ERROR: An image control statement is not allowed in DO CONCURRENT event post (x) @@ -78,7 +78,7 @@ end subroutine s1 subroutine s2() use iso_fortran_env - type(event_type) :: x[*] + type(event_type), save :: x[*] do concurrent (i = 1:n) !ERROR: An image control statement is not allowed in DO CONCURRENT event wait (x) @@ -124,8 +124,7 @@ subroutine s6() type(type0) :: type1_field end type - type(type1) :: pvar; - type(type1) :: qvar; + type(type1), save :: pvar, qvar integer, allocatable, dimension(:) :: array1 integer, allocatable, dimension(:) :: array2 integer, allocatable, codimension[:] :: ca, cb diff --git a/flang/test/Semantics/doconcurrent08.f90 b/flang/test/Semantics/doconcurrent08.f90 index 52b382741d073..e09d1ab32acb2 100644 --- a/flang/test/Semantics/doconcurrent08.f90 +++ b/flang/test/Semantics/doconcurrent08.f90 @@ -85,13 +85,13 @@ subroutine s1() type(HasAllocPolyType) :: nonAllocatableWithAllocPoly ! OK because the declared variable is not allocatable - type(HasAllocPolyCoarrayType) :: nonAllocatableWithAllocPolyCoarray + type(HasAllocPolyCoarrayType), save :: nonAllocatableWithAllocPolyCoarray ! Bad because even though the declared the allocatable component is a coarray type(HasAllocPolyCoarrayType), allocatable :: allocWithAllocPolyCoarray ! OK since it has no polymorphic component - type(HasAllocCoarrayType) :: nonAllocWithAllocCoarray + type(HasAllocCoarrayType), save :: nonAllocWithAllocCoarray ! OK since it has no component that's polymorphic, oops type(HasPointerPolyType), allocatable :: allocatableWithPointerPoly diff --git a/flang/test/Semantics/form_team01.f90 b/flang/test/Semantics/form_team01.f90 index 3b82e5b41666e..1510a8bb98f74 100644 --- a/flang/test/Semantics/form_team01.f90 +++ b/flang/test/Semantics/form_team01.f90 @@ -8,8 +8,7 @@ subroutine test integer :: team_index integer :: statvar character(len=50) :: errvar - integer, codimension[*] :: co_team_number - integer, codimension[*] :: co_team_index + integer, codimension[*], save :: co_team_number, co_team_index type(team_type), dimension(1) :: array_team integer, dimension(1) :: array_team_number integer, dimension(1) :: array_team_index diff --git a/flang/test/Semantics/init01.f90 b/flang/test/Semantics/init01.f90 index 65d524b16a23a..a1313e7c234d5 100644 --- a/flang/test/Semantics/init01.f90 +++ b/flang/test/Semantics/init01.f90 @@ -18,6 +18,7 @@ subroutine objectpointers(j) end type type(t1), target, save :: o1 type(t1), save :: o2 +!ERROR: Local variable 'o3' without the SAVE attribute may not have a coarray potential subobject component '%c2' type(t1), target :: o3 !ERROR: An initial data target may not be a reference to an ALLOCATABLE 'x1' real, pointer :: p1 => x1 diff --git a/flang/test/Semantics/resolve07.f90 b/flang/test/Semantics/resolve07.f90 index 481094a51335f..a280769ac2525 100644 --- a/flang/test/Semantics/resolve07.f90 +++ b/flang/test/Semantics/resolve07.f90 @@ -18,6 +18,7 @@ subroutine s2 end subroutine s3 + save dimension :: x(4), x2(8) !ERROR: The dimensions of 'x' have already been declared allocatable :: x(:) diff --git a/flang/test/Semantics/resolve50.f90 b/flang/test/Semantics/resolve50.f90 index cc4dc030a9905..5650fff32e16a 100644 --- a/flang/test/Semantics/resolve50.f90 +++ b/flang/test/Semantics/resolve50.f90 @@ -3,6 +3,7 @@ subroutine s1 use iso_fortran_env + save type(team_type) :: t complex :: x[*] real :: y[*] @@ -22,7 +23,7 @@ subroutine s1 subroutine s2 use iso_fortran_env type(team_type) :: t - real :: y[10,*], y2[*], x[*] + real, save :: y[10,*], y2[*], x[*] ! C1113 !ERROR: The codimensions of 'x' have already been declared change team(t, x[10,*] => y, x[*] => y2) diff --git a/flang/test/Semantics/resolve55.f90 b/flang/test/Semantics/resolve55.f90 index 0a40a19435748..5f7a3044e834c 100644 --- a/flang/test/Semantics/resolve55.f90 +++ b/flang/test/Semantics/resolve55.f90 @@ -81,7 +81,7 @@ end subroutine s6 subroutine s7() ! Cannot have a coarray - integer, codimension[*] :: coarray_var + integer, codimension[*], save :: coarray_var !ERROR: Coarray 'coarray_var' not allowed in a LOCAL locality-spec do concurrent(i=1:5) local(coarray_var) end do diff --git a/flang/test/Semantics/resolve88.f90 b/flang/test/Semantics/resolve88.f90 index 3794e9b28a6d3..34eb192347d02 100644 --- a/flang/test/Semantics/resolve88.f90 +++ b/flang/test/Semantics/resolve88.f90 @@ -64,11 +64,11 @@ module m type testType type(coarrayType) :: goodField - !ERROR: A component with a POINTER or ALLOCATABLE attribute may not be of a type with a coarray ultimate component (named 'goodcoarrayfield') + !ERROR: Pointer 'pointerfield' may not have a coarray potential component '%goodcoarrayfield' type(coarrayType), pointer :: pointerField - !ERROR: A component with a POINTER or ALLOCATABLE attribute may not be of a type with a coarray ultimate component (named 'goodcoarrayfield') + !ERROR: Allocatable or array component 'allocatablefield' may not have a coarray ultimate component '%goodcoarrayfield' type(coarrayType), allocatable :: allocatableField - !ERROR: An array or coarray component may not be of a type with a coarray ultimate component (named 'goodcoarrayfield') + !ERROR: Allocatable or array component 'arrayfield' may not have a coarray ultimate component '%goodcoarrayfield' type(coarrayType), dimension(3) :: arrayField end type testType diff --git a/flang/test/Semantics/resolve94.f90 b/flang/test/Semantics/resolve94.f90 index 19c06ad0d1622..75755fb2b2038 100644 --- a/flang/test/Semantics/resolve94.f90 +++ b/flang/test/Semantics/resolve94.f90 @@ -6,6 +6,7 @@ ! C931 A stat-variable in an image-selector shall not be a coindexed object. subroutine s1() use ISO_FORTRAN_ENV + save type(team_type) :: team1, team2 real :: rCoarray[10,20,*] real :: rVar1, rVar2 diff --git a/flang/test/Semantics/this_image01.f90 b/flang/test/Semantics/this_image01.f90 index fdcccdaeed0e3..eb25cd4e5a7ef 100644 --- a/flang/test/Semantics/this_image01.f90 +++ b/flang/test/Semantics/this_image01.f90 @@ -8,7 +8,7 @@ subroutine test type(team_type) :: coteam[*] integer :: coscalar[*], coarray(3)[*] save :: coteam, coscalar, coarray - real coarray1[*], coarray2[2,*], coarray3[2,3,*] + real, save :: coarray1[*], coarray2[2,*], coarray3[2,3,*] integer indices(3) ! correct calls, should produce no errors From 7fbe89089241ecb5a1addced944c03bce7663967 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:28:34 -0800 Subject: [PATCH 009/123] [flang] Don't flag CLASS(*) ASSOCIATED() pointer or target as error (#125890) As I read the standard, an unlimited polymorphic pointer or target should be viewed as compatible with any data target or data pointer when used in the two-argument form of the intrinsic function ASSOCIATED(). Fixes https://github.com/llvm/llvm-project/issues/125774. --- flang/lib/Semantics/check-call.cpp | 5 ++++- flang/test/Semantics/bug125774.f90 | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 flang/test/Semantics/bug125774.f90 diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 93ae05e2902f0..5287c4f27005c 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -1483,6 +1483,8 @@ static void CheckAssociated(evaluate::ActualArguments &arguments, } if (const auto &targetArg{arguments[1]}) { // The standard requires that the TARGET= argument, when present, + // be type compatible with the POINTER= for a data pointer. In + // the case of procedure pointers, the standard requires that it // be a valid RHS for a pointer assignment that has the POINTER= // argument as its LHS. Some popular compilers misinterpret this // requirement more strongly than necessary, and actually validate @@ -1589,7 +1591,8 @@ static void CheckAssociated(evaluate::ActualArguments &arguments, } if (const auto pointerType{pointerArg->GetType()}) { if (const auto targetType{targetArg->GetType()}) { - ok = pointerType->IsTkCompatibleWith(*targetType); + ok = pointerType->IsTkCompatibleWith(*targetType) || + targetType->IsTkCompatibleWith(*pointerType); } } } else { diff --git a/flang/test/Semantics/bug125774.f90 b/flang/test/Semantics/bug125774.f90 new file mode 100644 index 0000000000000..9844f1ec5eb1e --- /dev/null +++ b/flang/test/Semantics/bug125774.f90 @@ -0,0 +1,15 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +type t +end type +real, pointer :: rptr +type(t), pointer :: tptr +class(*), pointer :: ulpp +print *, associated(rptr, ulpp) +print *, associated(ulpp, rptr) +print *, associated(tptr, ulpp) +print *, associated(ulpp, tptr) +!ERROR: Arguments of ASSOCIATED() must be a pointer and an optional valid target +print *, associated(rptr, tptr) +!ERROR: Arguments of ASSOCIATED() must be a pointer and an optional valid target +print *, associated(tptr, rptr) +end From b275887b72019d4e08b63c3e106b5f00a987e04b Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:29:00 -0800 Subject: [PATCH 010/123] [flang] Fix bogus error on defined I/O procedure. (#125898) The check that "v_list" be deferred shape is just wrong; there are no deferred shape non-pointer non-allocatable dummy arguments in Fortran. Correct to check for an assumed shape dummy argument. And de-split the error messages that were split across multiple source lines, making them much harder to find with grep. Fixes https://github.com/llvm/llvm-project/issues/125878. --- flang/lib/Semantics/check-declarations.cpp | 30 ++++++++-------------- flang/test/Semantics/io11.f90 | 2 +- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 40a529b37e7dc..c1eb78f9fbc3d 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -3430,8 +3430,7 @@ void CheckHelper::CheckDioDummyIsDerived(const Symbol &subp, const Symbol &arg, } } else { messages_.Say(arg.name(), - "Dummy argument '%s' of a defined input/output procedure must have a" - " derived type"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure must have a derived type"_err_en_US, arg.name()); } } @@ -3447,16 +3446,14 @@ void CheckHelper::CheckDioDummyIsDefaultInteger( } } messages_.Say(arg.name(), - "Dummy argument '%s' of a defined input/output procedure" - " must be an INTEGER of default KIND"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure must be an INTEGER of default KIND"_err_en_US, arg.name()); } void CheckHelper::CheckDioDummyIsScalar(const Symbol &subp, const Symbol &arg) { if (arg.Rank() > 0 || arg.Corank() > 0) { messages_.Say(arg.name(), - "Dummy argument '%s' of a defined input/output procedure" - " must be a scalar"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure must be a scalar"_err_en_US, arg.name()); } } @@ -3533,8 +3530,7 @@ void CheckHelper::CheckDioAssumedLenCharacterArg(const Symbol &subp, context_.defaultKinds().GetDefaultKind( TypeCategory::Character))) { messages_.Say(arg->name(), - "Dummy argument '%s' of a defined input/output procedure" - " must be assumed-length CHARACTER of default kind"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure must be assumed-length CHARACTER of default kind"_err_en_US, arg->name()); } } @@ -3547,10 +3543,9 @@ void CheckHelper::CheckDioVlistArg( CheckDioDummyIsDefaultInteger(subp, *arg); CheckDioDummyAttrs(subp, *arg, Attr::INTENT_IN); const auto *objectDetails{arg->detailsIf()}; - if (!objectDetails || !objectDetails->shape().CanBeDeferredShape()) { + if (!objectDetails || !objectDetails->shape().CanBeAssumedShape()) { messages_.Say(arg->name(), - "Dummy argument '%s' of a defined input/output procedure must be" - " deferred shape"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure must be assumed shape"_err_en_US, arg->name()); } } @@ -3565,8 +3560,7 @@ void CheckHelper::CheckDioArgCount( : 4)}; if (argCount != requiredArgCount) { SayWithDeclaration(subp, - "Defined input/output procedure '%s' must have" - " %d dummy arguments rather than %d"_err_en_US, + "Defined input/output procedure '%s' must have %d dummy arguments rather than %d"_err_en_US, subp.name(), requiredArgCount, argCount); context_.SetError(subp); } @@ -3578,15 +3572,13 @@ void CheckHelper::CheckDioDummyAttrs( Attrs attrs{arg.attrs()}; if (!attrs.test(goodIntent)) { messages_.Say(arg.name(), - "Dummy argument '%s' of a defined input/output procedure" - " must have intent '%s'"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure must have intent '%s'"_err_en_US, arg.name(), AttrToString(goodIntent)); } attrs = attrs - Attr::INTENT_IN - Attr::INTENT_OUT - Attr::INTENT_INOUT; if (!attrs.empty()) { messages_.Say(arg.name(), - "Dummy argument '%s' of a defined input/output procedure may not have" - " any attributes"_err_en_US, + "Dummy argument '%s' of a defined input/output procedure may not have any attributes"_err_en_US, arg.name()); } } @@ -3599,8 +3591,8 @@ void CheckHelper::CheckDefinedIoProc(const Symbol &symbol, const auto *binding{ultimate.detailsIf()}; const Symbol &specific{*(binding ? &binding->symbol() : &ultimate)}; if (ultimate.attrs().test(Attr::NOPASS)) { // C774 - messages_.Say("Defined input/output procedure '%s' may not have NOPASS " - "attribute"_err_en_US, + messages_.Say( + "Defined input/output procedure '%s' may not have NOPASS attribute"_err_en_US, ultimate.name()); context_.SetError(ultimate); } diff --git a/flang/test/Semantics/io11.f90 b/flang/test/Semantics/io11.f90 index 9b5ad1b8427d9..23f0081f4b9fa 100644 --- a/flang/test/Semantics/io11.f90 +++ b/flang/test/Semantics/io11.f90 @@ -355,7 +355,7 @@ subroutine formattedReadProc(dtv, unit, iotype, vlist, iostat, iomsg) class(t), intent(inout) :: dtv integer, intent(in) :: unit character(len=*), intent(in) :: iotype - !ERROR: Dummy argument 'vlist' of a defined input/output procedure must be deferred shape + !ERROR: Dummy argument 'vlist' of a defined input/output procedure must be assumed shape integer, intent(in) :: vlist(5) integer, intent(out) :: iostat character(len=*), intent(inout) :: iomsg From 13cc44e74c72f72547ec630f7a10892c3ee33429 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:29:35 -0800 Subject: [PATCH 011/123] [flang] Silence warnings from hermetic module files (#128763) Modules read from module files must have their symbols tagged with the ModFile flag to suppress all warnings messages that might be emitted for their contents. (Actionable warnings will have been emitted when the modules were originally compiled, so we don't want to repeat them later when the modules are USE'd.) The module symbols of the additional modules in hermetic module files were not being tagged with that flag; fix. --- flang/lib/Semantics/mod-file.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 82c43d96bea44..1dfd9c35b3f43 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -1546,6 +1546,10 @@ Scope *ModFileReader::Read(SourceName name, std::optional isIntrinsic, Scope &hermeticScope{topScope.MakeScope(Scope::Kind::Global)}; context_.set_currentHermeticModuleFileScope(&hermeticScope); ResolveNames(context_, hermeticModules, hermeticScope); + for (auto &[_, ref] : hermeticScope) { + CHECK(ref->has()); + ref->set(Symbol::Flag::ModFile); + } } GetModuleDependences(context_.moduleDependences(), sourceFile->content()); ResolveNames(context_, parseTree, topScope); From 6c9681cf2ca4a48434efad5aac4e78a308c68f59 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:30:55 -0800 Subject: [PATCH 012/123] [flang] Account for accessibility in extensibility check (#128765) A derived type with a component of the same name as the type is not extensible... unless the extension occurs in another module where the conflicting component is inaccessible. Fixes https://github.com/llvm/llvm-project/issues/126114. --- flang/lib/Semantics/resolve-names.cpp | 13 ++++++++----- flang/test/Semantics/resolve34.f90 | 5 +++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 3fc18d1b7e219..514c0b88d350a 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -7268,17 +7268,20 @@ bool DeclarationVisitor::OkToAddComponent( std::optional msg; std::optional warning; if (context().HasError(*prev)) { // don't pile on - } else if (extends) { - msg = "Type cannot be extended as it has a component named" - " '%s'"_err_en_US; } else if (CheckAccessibleSymbol(currScope(), *prev)) { // inaccessible component -- redeclaration is ok - if (context().ShouldWarn( - common::UsageWarning::RedeclaredInaccessibleComponent)) { + if (extends) { + // The parent type has a component of same name, but it remains + // extensible outside its module since that component is PRIVATE. + } else if (context().ShouldWarn( + common::UsageWarning::RedeclaredInaccessibleComponent)) { msg = "Component '%s' is inaccessibly declared in or as a parent of this derived type"_warn_en_US; warning = common::UsageWarning::RedeclaredInaccessibleComponent; } + } else if (extends) { + msg = + "Type cannot be extended as it has a component named '%s'"_err_en_US; } else if (prev->test(Symbol::Flag::ParentComp)) { msg = "'%s' is a parent type of this type and so cannot be a component"_err_en_US; diff --git a/flang/test/Semantics/resolve34.f90 b/flang/test/Semantics/resolve34.f90 index 4ddb8fd0b0eb8..39709a362b363 100644 --- a/flang/test/Semantics/resolve34.f90 +++ b/flang/test/Semantics/resolve34.f90 @@ -45,6 +45,11 @@ module m4 type, extends(t1) :: t2 end type end +module m4a + use m4 + type, extends(t1) :: t3 ! ok, inaccessible component + end type +end module m5 type :: t1 From 8e135f2c74c8c4df4cf4327e1e85f41f2d1dbf89 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:31:24 -0800 Subject: [PATCH 013/123] =?UTF-8?q?[flang]=20Accept=20proc=20ptr=20functio?= =?UTF-8?q?n=20result=20as=20actual=20argument=20without=20IN=E2=80=A6=20(?= =?UTF-8?q?#128771)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …TENT A dummy procedure pointer with no INTENT attribute may associate with an actual argument that is the result of a reference to a function that returns a procedure pointer, we think. Fixes https://github.com/llvm/llvm-project/issues/126950. --- flang/lib/Semantics/check-call.cpp | 40 +++++++++++++++++----------- flang/test/Semantics/call09.f90 | 17 ++++++------ flang/test/Semantics/call24.f90 | 2 +- flang/test/Semantics/definable01.f90 | 3 ++- 4 files changed, 35 insertions(+), 27 deletions(-) diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 5287c4f27005c..8485a7a1f5bc8 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -1049,8 +1049,8 @@ static void CheckProcedureArg(evaluate::ActualArgument &arg, SemanticsContext &context, bool ignoreImplicitVsExplicit) { evaluate::FoldingContext &foldingContext{context.foldingContext()}; parser::ContextualMessages &messages{foldingContext.messages()}; - auto restorer{ - messages.SetLocation(arg.sourceLocation().value_or(messages.at()))}; + parser::CharBlock location{arg.sourceLocation().value_or(messages.at())}; + auto restorer{messages.SetLocation(location)}; const characteristics::Procedure &interface { dummy.procedure.value() }; if (const auto *expr{arg.UnwrapExpr()}) { bool dummyIsPointer{ @@ -1175,22 +1175,30 @@ static void CheckProcedureArg(evaluate::ActualArgument &arg, dummyName); } } - if (dummyIsPointer && dummy.intent != common::Intent::In) { - const Symbol *last{GetLastSymbol(*expr)}; - if (last && IsProcedurePointer(*last)) { - if (dummy.intent != common::Intent::Default && - IsIntentIn(last->GetUltimate())) { // 19.6.8 - messages.Say( - "Actual argument associated with procedure pointer %s may not be INTENT(IN)"_err_en_US, - dummyName); - } - } else if (!(dummy.intent == common::Intent::Default && - IsNullProcedurePointer(*expr))) { - // 15.5.2.9(5) -- dummy procedure POINTER - // Interface compatibility has already been checked above + if (dummyIsPointer) { + if (dummy.intent == common::Intent::In) { + // need not be definable, can be a target + } else if (!IsProcedurePointer(*expr)) { messages.Say( - "Actual argument associated with procedure pointer %s must be a pointer unless INTENT(IN)"_err_en_US, + "Actual argument associated with procedure pointer %s is not a procedure pointer"_err_en_US, dummyName); + } else if (dummy.intent == common::Intent::Default) { + // ok, needs to be definable only if defined at run time + } else { + DefinabilityFlags flags{DefinabilityFlag::PointerDefinition}; + if (dummy.intent != common::Intent::Out) { + flags.set(DefinabilityFlag::DoNotNoteDefinition); + } + if (auto whyNot{WhyNotDefinable( + location, context.FindScope(location), flags, *expr)}) { + if (auto *msg{messages.Say( + "Actual argument associated with INTENT(%s) procedure pointer %s is not definable"_err_en_US, + dummy.intent == common::Intent::Out ? "OUT" : "IN OUT", + dummyName)}) { + msg->Attach( + std::move(whyNot->set_severity(parser::Severity::Because))); + } + } } } } else { diff --git a/flang/test/Semantics/call09.f90 b/flang/test/Semantics/call09.f90 index b8583ba4a4907..58b2382f600ef 100644 --- a/flang/test/Semantics/call09.f90 +++ b/flang/test/Semantics/call09.f90 @@ -82,27 +82,26 @@ subroutine test1 ! 15.5.2.9(5) call s01(null(intPtr)) !ERROR: Actual argument associated with procedure dummy argument 'p=' is typeless call s01(B"0101") - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) + !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' is not a procedure pointer call s02(realfunc) call s02(p) ! ok !ERROR: Actual procedure argument has interface incompatible with dummy argument 'p=': function results have distinct types: REAL(4) vs INTEGER(4) call s02(ip) - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) - call s02(procptr()) + call s02(procptr()) ! believed to be ok call s02(null()) ! ok - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) + !ERROR: Actual argument associated with INTENT(IN OUT) procedure pointer dummy argument 'p=' is not definable + !BECAUSE: 'NULL()' is a null pointer call s05(null()) - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) + !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' is not a procedure pointer call s02(sin) - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) + !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' is not a procedure pointer call s02b(realfunc) call s02b(p) ! ok !ERROR: Actual argument function associated with procedure dummy argument 'p=' is not compatible: function results have distinct types: REAL(4) vs INTEGER(4) call s02b(ip) - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) - call s02b(procptr()) + call s02b(procptr()) ! believed to be ok call s02b(null()) - !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' must be a pointer unless INTENT(IN) + !ERROR: Actual argument associated with procedure pointer dummy argument 'p=' is not a procedure pointer call s02b(sin) end subroutine diff --git a/flang/test/Semantics/call24.f90 b/flang/test/Semantics/call24.f90 index 78ee17b488676..c1053db93648f 100644 --- a/flang/test/Semantics/call24.f90 +++ b/flang/test/Semantics/call24.f90 @@ -39,7 +39,7 @@ subroutine test() !ERROR: References to the procedure 'bar' require an explicit interface !BECAUSE: a dummy procedure is optional or a pointer !WARNING: If the procedure's interface were explicit, this reference would be in error - !BECAUSE: Actual argument associated with procedure pointer dummy argument 'a_pointer=' must be a pointer unless INTENT(IN) + !BECAUSE: Actual argument associated with procedure pointer dummy argument 'a_pointer=' is not a procedure pointer call bar(sin) !ERROR: References to the procedure 'baz' require an explicit interface diff --git a/flang/test/Semantics/definable01.f90 b/flang/test/Semantics/definable01.f90 index d3b31ee38b2a3..5af7e954e4171 100644 --- a/flang/test/Semantics/definable01.f90 +++ b/flang/test/Semantics/definable01.f90 @@ -77,7 +77,8 @@ subroutine test3(objp, procp) !CHECK: error: Actual argument associated with INTENT(IN OUT) dummy argument 'op=' is not definable !CHECK: because: 'objp' is an INTENT(IN) dummy argument call test3a(objp) - !CHECK: error: Actual argument associated with procedure pointer dummy argument 'pp=' may not be INTENT(IN) + !CHECK: error: Actual argument associated with INTENT(IN OUT) procedure pointer dummy argument 'pp=' is not definable + !CHECK: because: 'procp' is an INTENT(IN) dummy argument call test3b(procp) end subroutine subroutine test3a(op) From c82fdf116699bbe6c5378f114e05c4fb9888526d Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:31:50 -0800 Subject: [PATCH 014/123] [flang] Silence spurious error (#128777) When checking for conflicts between type-bound generic defined I/O procedures and non-type-bound defined I/O generic interfaces, don't worry about conflicts where the type-bound generic interface is inaccessible in the scope around the non-type-bound interface. Fixes https://github.com/llvm/llvm-project/issues/126797. --- flang/lib/Semantics/check-declarations.cpp | 7 ++--- flang/test/Semantics/io11.f90 | 31 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index c1eb78f9fbc3d..bf4bb447c9526 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -3398,11 +3398,12 @@ void CheckHelper::CheckAlreadySeenDefinedIo(const DerivedTypeSpec &derivedType, return; } if (const Scope * dtScope{derivedType.scope()}) { - if (auto iter{dtScope->find(generic.name())}; iter != dtScope->end()) { + if (auto iter{dtScope->find(generic.name())}; iter != dtScope->end() && + IsAccessible(*iter->second, generic.owner())) { for (auto specRef : iter->second->get().specificProcs()) { const Symbol &specific{specRef->get().symbol()}; - if (specific == proc) { // unambiguous, accept - continue; + if (specific == proc) { + continue; // unambiguous, accept } if (const auto *specDT{GetDtvArgDerivedType(specific)}; specDT && evaluate::AreSameDerivedType(derivedType, *specDT)) { diff --git a/flang/test/Semantics/io11.f90 b/flang/test/Semantics/io11.f90 index 23f0081f4b9fa..5d3d90271c0a8 100644 --- a/flang/test/Semantics/io11.f90 +++ b/flang/test/Semantics/io11.f90 @@ -689,3 +689,34 @@ module m26b procedure unformattedRead end interface end + +module m27a + type t + integer c + contains + procedure ur1 + generic, private :: read(unformatted) => ur1 + end type + contains + subroutine ur1(dtv,unit,iostat,iomsg) + class(t),intent(inout) :: dtv + integer,intent(in) :: unit + integer,intent(out) :: iostat + character(*),intent(inout) :: iomsg + read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c + end +end +module m27b + use m27a + interface read(unformatted) + module procedure ur2 ! ok, t's generic is inaccessible + end interface + contains + subroutine ur2(dtv,unit,iostat,iomsg) + class(t),intent(inout) :: dtv + integer,intent(in) :: unit + integer,intent(out) :: iostat + character(*),intent(inout) :: iomsg + read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c + end +end From e4962962928ca98e408bb2d20f1d8bf75adf4988 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:32:12 -0800 Subject: [PATCH 015/123] [flang] Refine handling of SELECT TYPE associations in analyses (#128935) A few bits of semantic checking need a variant of the ResolveAssociations utility function that stops when hitting a construct entity for a type or class guard. This is necessary for cases like the bug below where the analysis is concerned with the type of the name in context, rather than its shape or storage or whatever. So add a flag to ResolveAssociations and GetAssociationRoot to make this happen, and use it at the appropriate call sites. Fixes https://github.com/llvm/llvm-project/issues/128608. --- flang/include/flang/Evaluate/tools.h | 4 ++-- flang/include/flang/Semantics/symbol.h | 3 +++ flang/lib/Evaluate/tools.cpp | 10 ++++++---- flang/lib/Semantics/check-call.cpp | 6 +++--- flang/lib/Semantics/check-do-forall.cpp | 14 +++++++------- flang/lib/Semantics/expression.cpp | 2 +- flang/lib/Semantics/resolve-names.cpp | 1 + flang/lib/Semantics/symbol.cpp | 1 + flang/lib/Semantics/tools.cpp | 8 ++++---- flang/test/Semantics/doconcurrent08.f90 | 12 ++++++++++++ 10 files changed, 40 insertions(+), 21 deletions(-) diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index 352f6b36458ce..f94981011b6e5 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -1417,8 +1417,8 @@ inline bool IsAssumedSizeArray(const Symbol &symbol) { // In a SELECT RANK construct, ResolveAssociations() stops at a // RANK(n) or RANK(*) case symbol, but traverses the selector for // RANK DEFAULT. -const Symbol &ResolveAssociations(const Symbol &); -const Symbol &GetAssociationRoot(const Symbol &); +const Symbol &ResolveAssociations(const Symbol &, bool stopAtTypeGuard = false); +const Symbol &GetAssociationRoot(const Symbol &, bool stopAtTypeGuard = false); const Symbol *FindCommonBlockContaining(const Symbol &); int CountLenParameters(const DerivedTypeSpec &); diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 4ae2775c0f849..715811885c219 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -329,9 +329,11 @@ class AssocEntityDetails : public EntityDetails { } bool IsAssumedSize() const { return rank_.value_or(0) == isAssumedSize; } bool IsAssumedRank() const { return rank_.value_or(0) == isAssumedRank; } + bool isTypeGuard() const { return isTypeGuard_; } void set_rank(int rank); void set_IsAssumedSize(); void set_IsAssumedRank(); + void set_isTypeGuard(bool yes = true); private: MaybeExpr expr_; @@ -340,6 +342,7 @@ class AssocEntityDetails : public EntityDetails { static constexpr int isAssumedSize{-1}; // RANK(*) static constexpr int isAssumedRank{-2}; // RANK DEFAULT std::optional rank_; + bool isTypeGuard_{false}; // TYPE IS or CLASS IS, but not CLASS(DEFAULT) }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const AssocEntityDetails &); diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index 7181265b862fb..36b7d0a69d2ba 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1540,10 +1540,12 @@ bool CheckForCoindexedObject(parser::ContextualMessages &messages, namespace Fortran::semantics { -const Symbol &ResolveAssociations(const Symbol &original) { +const Symbol &ResolveAssociations( + const Symbol &original, bool stopAtTypeGuard) { const Symbol &symbol{original.GetUltimate()}; if (const auto *details{symbol.detailsIf()}) { - if (!details->rank()) { // Not RANK(n) or RANK(*) + if (!details->rank() /* not RANK(n) or RANK(*) */ && + !(stopAtTypeGuard && details->isTypeGuard())) { if (const Symbol * nested{UnwrapWholeSymbolDataRef(details->expr())}) { return ResolveAssociations(*nested); } @@ -1567,8 +1569,8 @@ static const Symbol *GetAssociatedVariable(const AssocEntityDetails &details) { return nullptr; } -const Symbol &GetAssociationRoot(const Symbol &original) { - const Symbol &symbol{ResolveAssociations(original)}; +const Symbol &GetAssociationRoot(const Symbol &original, bool stopAtTypeGuard) { + const Symbol &symbol{ResolveAssociations(original, stopAtTypeGuard)}; if (const auto *details{symbol.detailsIf()}) { if (const Symbol * root{GetAssociatedVariable(*details)}) { return *root; diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 8485a7a1f5bc8..4042d7504396c 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -535,9 +535,6 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, if (actualLastSymbol) { actualLastSymbol = &ResolveAssociations(*actualLastSymbol); } - const ObjectEntityDetails *actualLastObject{actualLastSymbol - ? actualLastSymbol->detailsIf() - : nullptr}; int actualRank{actualType.Rank()}; if (dummy.type.attrs().test( characteristics::TypeAndShape::Attr::AssumedShape)) { @@ -689,6 +686,9 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, } } } + const ObjectEntityDetails *actualLastObject{actualLastSymbol + ? actualLastSymbol->detailsIf() + : nullptr}; if (actualLastObject && actualLastObject->IsCoarray() && dummy.attrs.test(characteristics::DummyDataObject::Attr::Allocatable) && dummy.intent == common::Intent::Out && diff --git a/flang/lib/Semantics/check-do-forall.cpp b/flang/lib/Semantics/check-do-forall.cpp index 84e6b6455cc61..cc1d4bf58745a 100644 --- a/flang/lib/Semantics/check-do-forall.cpp +++ b/flang/lib/Semantics/check-do-forall.cpp @@ -154,7 +154,8 @@ class DoConcurrentBodyEnforce { // of its components? static bool MightDeallocatePolymorphic(const Symbol &original, const std::function &WillDeallocate) { - const Symbol &symbol{ResolveAssociations(original)}; + const Symbol &symbol{ + ResolveAssociations(original, /*stopAtTypeGuard=*/true)}; // Check the entity itself, no coarray exception here if (IsPolymorphicAllocatable(symbol)) { return true; @@ -182,11 +183,10 @@ class DoConcurrentBodyEnforce { impure.name(), reason); } - void SayDeallocateOfPolymorph( + void SayDeallocateOfPolymorphic( parser::CharBlock location, const Symbol &entity, const char *reason) { context_.SayWithDecl(entity, location, - "Deallocation of a polymorphic entity caused by %s" - " not allowed in DO CONCURRENT"_err_en_US, + "Deallocation of a polymorphic entity caused by %s not allowed in DO CONCURRENT"_err_en_US, reason); } @@ -206,7 +206,7 @@ class DoConcurrentBodyEnforce { const Symbol &entity{*pair.second}; if (IsAllocatable(entity) && !IsSaved(entity) && MightDeallocatePolymorphic(entity, DeallocateAll)) { - SayDeallocateOfPolymorph(endBlockStmt.source, entity, reason); + SayDeallocateOfPolymorphic(endBlockStmt.source, entity, reason); } if (const Symbol * impure{HasImpureFinal(entity)}) { SayDeallocateWithImpureFinal(entity, reason, *impure); @@ -222,7 +222,7 @@ class DoConcurrentBodyEnforce { if (const Symbol * entity{GetLastName(variable).symbol}) { const char *reason{"assignment"}; if (MightDeallocatePolymorphic(*entity, DeallocateNonCoarray)) { - SayDeallocateOfPolymorph(variable.GetSource(), *entity, reason); + SayDeallocateOfPolymorphic(variable.GetSource(), *entity, reason); } if (const auto *assignment{GetAssignment(stmt)}) { const auto &lhs{assignment->lhs}; @@ -257,7 +257,7 @@ class DoConcurrentBodyEnforce { const DeclTypeSpec *entityType{entity.GetType()}; if ((entityType && entityType->IsPolymorphic()) || // POINTER case MightDeallocatePolymorphic(entity, DeallocateAll)) { - SayDeallocateOfPolymorph( + SayDeallocateOfPolymorphic( currentStatementSourcePosition_, entity, reason); } if (const Symbol * impure{HasImpureFinal(entity)}) { diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 6949e5693d08f..82e346bb4b6d6 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -3289,7 +3289,7 @@ const Assignment *ExpressionAnalyzer::Analyze(const parser::AssignmentStmt &x) { dyType && dyType->IsPolymorphic()) { // 10.2.1.2p1(1) const Symbol *lastWhole0{UnwrapWholeSymbolOrComponentDataRef(lhs)}; const Symbol *lastWhole{ - lastWhole0 ? &lastWhole0->GetUltimate() : nullptr}; + lastWhole0 ? &ResolveAssociations(*lastWhole0) : nullptr}; if (!lastWhole || !IsAllocatable(*lastWhole)) { Say("Left-hand side of assignment may not be polymorphic unless assignment is to an entire allocatable"_err_en_US); } else if (evaluate::IsCoarray(*lastWhole)) { diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 514c0b88d350a..1514c01a49528 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -7748,6 +7748,7 @@ void ConstructVisitor::Post(const parser::TypeGuardStmt::Guard &x) { SetTypeFromAssociation(*symbol); } else if (const auto *type{GetDeclTypeSpec()}) { symbol->SetType(*type); + symbol->get().set_isTypeGuard(); } SetAttrsFromAssociation(*symbol); } diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp index 61982295f323a..32eb6c2c5a188 100644 --- a/flang/lib/Semantics/symbol.cpp +++ b/flang/lib/Semantics/symbol.cpp @@ -155,6 +155,7 @@ void EntityDetails::set_type(const DeclTypeSpec &type) { void AssocEntityDetails::set_rank(int rank) { rank_ = rank; } void AssocEntityDetails::set_IsAssumedSize() { rank_ = isAssumedSize; } void AssocEntityDetails::set_IsAssumedRank() { rank_ = isAssumedRank; } +void AssocEntityDetails::set_isTypeGuard(bool yes) { isTypeGuard_ = yes; } void EntityDetails::ReplaceType(const DeclTypeSpec &type) { type_ = &type; } ObjectEntityDetails::ObjectEntityDetails(EntityDetails &&d) diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 5bb8bae83a787..5e58a0c75c77b 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -633,9 +633,9 @@ const EquivalenceSet *FindEquivalenceSet(const Symbol &symbol) { } bool IsOrContainsEventOrLockComponent(const Symbol &original) { - const Symbol &symbol{ResolveAssociations(original)}; - if (const auto *details{symbol.detailsIf()}) { - if (const DeclTypeSpec * type{details->type()}) { + const Symbol &symbol{ResolveAssociations(original, /*stopAtTypeGuard=*/true)}; + if (evaluate::IsVariable(symbol)) { + if (const DeclTypeSpec * type{symbol.GetType()}) { if (const DerivedTypeSpec * derived{type->AsDerived()}) { return IsEventTypeOrLockType(derived) || FindEventOrLockPotentialComponent(*derived); @@ -849,7 +849,7 @@ static const Symbol *HasImpureFinal( } const Symbol *HasImpureFinal(const Symbol &original, std::optional rank) { - const Symbol &symbol{ResolveAssociations(original)}; + const Symbol &symbol{ResolveAssociations(original, /*stopAtTypeGuard=*/true)}; if (symbol.has()) { if (const DeclTypeSpec * symType{symbol.GetType()}) { if (const DerivedTypeSpec * derived{symType->AsDerived()}) { diff --git a/flang/test/Semantics/doconcurrent08.f90 b/flang/test/Semantics/doconcurrent08.f90 index e09d1ab32acb2..48d653fc65896 100644 --- a/flang/test/Semantics/doconcurrent08.f90 +++ b/flang/test/Semantics/doconcurrent08.f90 @@ -125,6 +125,8 @@ subroutine s2() class(Base), allocatable, codimension[:] :: allocPolyComponentVar class(Base), allocatable, codimension[:] :: allocPolyComponentVar1 + class(*), allocatable :: unlimitedPoly + allocate(ChildType :: localVar) allocate(ChildType :: localVar1) allocate(Base :: localVar2) @@ -162,6 +164,16 @@ subroutine s2() !ERROR: Deallocation of a polymorphic entity caused by assignment not allowed in DO CONCURRENT allocPolyCoarray = allocPolyCoarray1 +!ERROR: Deallocation of a polymorphic entity caused by assignment not allowed in DO CONCURRENT + unlimitedPoly = 1 + select type (unlimitedPoly) + type is (integer) + unlimitedPoly = 1 ! ok + class default +!ERROR: Deallocation of a polymorphic entity caused by assignment not allowed in DO CONCURRENT + unlimitedPoly = 1 + end select + end do end subroutine s2 From 58308ef62d7d20d936e69174edc0f6d79399affe Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:32:30 -0800 Subject: [PATCH 016/123] [flang] Enforce C1503 (#128962) Enforce an obscure constraint from the standard: an abstract interface is not allowed to have the same name as an intrinsic type keyword. I suspect this is meant to prevent a declaration like "PROCEDURE(REAL), POINTER :: P" from being ambiguous. Fixes https://github.com/llvm/llvm-project/issues/128744. --- flang/lib/Semantics/check-declarations.cpp | 8 ++++++++ flang/test/Semantics/abstract02.f90 | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index bf4bb447c9526..c47f3d8aac99e 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -1562,6 +1562,14 @@ void CheckHelper::CheckSubprogram( messages_.Say(details.result().name(), "A function interface may not declare an assumed-length CHARACTER(*) result"_err_en_US); } + if (symbol.attrs().test(Attr::ABSTRACT) && + (symbol.name() == "integer" || symbol.name() == "unsigned" || + symbol.name() == "real" || symbol.name() == "complex" || + symbol.name() == "character" || + symbol.name() == "logical")) { // F'2023 C1503 + messages_.Say( + "An ABSTRACT interface may not have the same name as an intrinsic type"_err_en_US); + } } CheckExternal(symbol); CheckModuleProcedureDef(symbol); diff --git a/flang/test/Semantics/abstract02.f90 b/flang/test/Semantics/abstract02.f90 index 29aad7b03e537..22183e445d5c6 100644 --- a/flang/test/Semantics/abstract02.f90 +++ b/flang/test/Semantics/abstract02.f90 @@ -4,6 +4,12 @@ program test abstract interface subroutine abstract end subroutine + !ERROR: An ABSTRACT interface may not have the same name as an intrinsic type + function integer() + end + !ERROR: An ABSTRACT interface may not have the same name as an intrinsic type + subroutine logical + end end interface procedure(abstract), pointer :: p !ERROR: Abstract procedure interface 'abstract' may not be referenced From 3b033dee95ac9dd8d612ac0529486a1838af3ef0 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:32:50 -0800 Subject: [PATCH 017/123] [flang] Catch usage of : and * lengths in array c'tors (#128974) The definition of an array constructor doesn't preclude the use of [character(:)::] or [character(*)::] directly, but there is language elsewhere in the standard that restricts their use to specific contexts, neither of which include explicitly typed array constructors. Fixes https://github.com/llvm/llvm-project/issues/128755. --- flang/lib/Semantics/expression.cpp | 6 ++++-- flang/test/Semantics/array-constr-len.f90 | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 82e346bb4b6d6..3efdfb3fa49b8 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -78,8 +78,10 @@ static std::optional AnalyzeTypeSpec( const semantics::CharacterTypeSpec &cts{ typeSpec->characterTypeSpec()}; const semantics::ParamValue &len{cts.length()}; - // N.B. CHARACTER(LEN=*) is allowed in type-specs in ALLOCATE() & - // type guards, but not in array constructors. + if (len.isAssumed() || len.isDeferred()) { + context.messages().Say( + "A length specifier of '*' or ':' may not appear in the type of an array constructor"_err_en_US); + } DynamicTypeWithLength type{DynamicType{kind, len}}; if (auto lenExpr{type.LEN()}) { type.length = Fold(context, diff --git a/flang/test/Semantics/array-constr-len.f90 b/flang/test/Semantics/array-constr-len.f90 index 4de9c76c7041c..9b23026a16012 100644 --- a/flang/test/Semantics/array-constr-len.f90 +++ b/flang/test/Semantics/array-constr-len.f90 @@ -11,4 +11,8 @@ subroutine subr(s,n) print *, [(s(1:1),j=1,0)] ! ok print *, [character(2)::(s(1:n),j=1,0)] ! ok print *, [character(n)::(s(1:n),j=1,0)] + !ERROR: A length specifier of '*' or ':' may not appear in the type of an array constructor + print *, [ character(:) :: ] + !ERROR: A length specifier of '*' or ':' may not appear in the type of an array constructor + print *, [ character(*) :: ] end From 6e42f8810d8ee54087d7c6375519f672647e5f3e Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 14:33:11 -0800 Subject: [PATCH 018/123] =?UTF-8?q?[flang]=20Catch=20type-bound=20generic?= =?UTF-8?q?=20with=20inherited=20indistinguishable=20spe=E2=80=A6=20(#1289?= =?UTF-8?q?80)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …cific When checking generic procedures for indistinguishable specific procedures, don't neglect to include specific procedures from any accessible instance of the generic procedure inherited from its parent type.. Fixes https://github.com/llvm/llvm-project/issues/128760. --- flang/lib/Semantics/check-declarations.cpp | 43 +++++++++++++++++++--- flang/test/Semantics/generic07.f90 | 2 +- flang/test/Semantics/resolve117.f90 | 23 +++++++----- 3 files changed, 53 insertions(+), 15 deletions(-) diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index c47f3d8aac99e..7732cbff4faef 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -33,6 +33,8 @@ using characteristics::DummyProcedure; using characteristics::FunctionResult; using characteristics::Procedure; +class DistinguishabilityHelper; + class CheckHelper { public: explicit CheckHelper(SemanticsContext &c) : context_{c} {} @@ -89,6 +91,8 @@ class CheckHelper { const SourceName &, const Symbol &, const Procedure &, std::size_t); bool CheckDefinedAssignment(const Symbol &, const Procedure &); bool CheckDefinedAssignmentArg(const Symbol &, const DummyArgument &, int); + void CollectSpecifics( + DistinguishabilityHelper &, const Symbol &, const GenericDetails &); void CheckSpecifics(const Symbol &, const GenericDetails &); void CheckEquivalenceSet(const EquivalenceSet &); void CheckEquivalenceObject(const EquivalenceObject &); @@ -1931,10 +1935,9 @@ void CheckHelper::CheckGeneric( } // Check that the specifics of this generic are distinguishable from each other -void CheckHelper::CheckSpecifics( +void CheckHelper::CollectSpecifics(DistinguishabilityHelper &helper, const Symbol &generic, const GenericDetails &details) { GenericKind kind{details.kind()}; - DistinguishabilityHelper helper{context_}; for (const Symbol &specific : details.specificProcs()) { if (specific.attrs().test(Attr::ABSTRACT)) { if (auto *msg{messages_.Say(generic.name(), @@ -1989,6 +1992,23 @@ void CheckHelper::CheckSpecifics( } } } + if (const Scope * parent{generic.owner().GetDerivedTypeParent()}) { + if (const Symbol * inherited{parent->FindComponent(generic.name())}) { + if (IsAccessible(*inherited, generic.owner().parent())) { + if (const auto *details{inherited->detailsIf()}) { + // Include specifics of inherited generic of the same name, too + CollectSpecifics(helper, *inherited, *details); + } + } + } + } +} + +void CheckHelper::CheckSpecifics( + const Symbol &generic, const GenericDetails &details) { + GenericKind kind{details.kind()}; + DistinguishabilityHelper helper{context_}; + CollectSpecifics(helper, generic, details); helper.Check(generic.owner()); } @@ -3947,10 +3967,11 @@ evaluate::Shape SubprogramMatchHelper::FoldShape(const evaluate::Shape &shape) { } void DistinguishabilityHelper::Add(const Symbol &generic, GenericKind kind, - const Symbol &ultimateSpecific, const Procedure &procedure) { - if (!context_.HasError(ultimateSpecific)) { + const Symbol &specific, const Procedure &procedure) { + const Symbol &ultimate{specific.GetUltimate()}; + if (!context_.HasError(ultimate)) { nameToSpecifics_[generic.name()].emplace( - &ultimateSpecific, ProcedureInfo{kind, procedure}); + &ultimate, ProcedureInfo{kind, procedure}); } } @@ -3965,6 +3986,18 @@ void DistinguishabilityHelper::Check(const Scope &scope) { const auto &[ultimate, procInfo]{*iter1}; const auto &[kind, proc]{procInfo}; for (auto iter2{iter1}; ++iter2 != info.end();) { + if (&*ultimate == &*iter2->first) { + continue; // ok, actually the same procedure + } else if (const auto *binding1{ + ultimate->detailsIf()}) { + if (const auto *binding2{ + iter2->first->detailsIf()}) { + if (&binding1->symbol().GetUltimate() == + &binding2->symbol().GetUltimate()) { + continue; // ok, bindings resolve identically + } + } + } auto distinguishable{kind.IsName() ? evaluate::characteristics::Distinguishable : evaluate::characteristics::DistinguishableOpOrAssign}; diff --git a/flang/test/Semantics/generic07.f90 b/flang/test/Semantics/generic07.f90 index e7486c02a7d2b..5566c0f82633d 100644 --- a/flang/test/Semantics/generic07.f90 +++ b/flang/test/Semantics/generic07.f90 @@ -74,7 +74,7 @@ program test interface distinguishable3 procedure :: s1a, s1b end interface - !ERROR: Generic 'indistinguishable' may not have specific procedures 's2b' and 's2a' as their interfaces are not distinguishable + !ERROR: Generic 'indistinguishable' may not have specific procedures 's2a' and 's2b' as their interfaces are not distinguishable interface indistinguishable procedure :: s2a, s2b end interface diff --git a/flang/test/Semantics/resolve117.f90 b/flang/test/Semantics/resolve117.f90 index 3e3a813c0921b..b7b0ce7db6b0e 100644 --- a/flang/test/Semantics/resolve117.f90 +++ b/flang/test/Semantics/resolve117.f90 @@ -5,23 +5,28 @@ module m integer, kind :: k = 4 real x contains - procedure, nopass :: tbp => sub - generic :: gen => tbp + procedure, nopass :: tbp => sub1 + generic :: gen1 => tbp + generic :: gen2 => tbp end type type, extends(base1) :: ext1 contains - procedure, nopass :: sub + procedure, nopass :: sub1, sub2 !ERROR: Type parameter, component, or procedure binding 'base1' already defined in this type - generic :: base1 => sub + generic :: base1 => sub1 !ERROR: Type bound generic procedure 'k' may not have the same name as a non-generic symbol inherited from an ancestor type - generic :: k => sub + generic :: k => sub1 !ERROR: Type bound generic procedure 'x' may not have the same name as a non-generic symbol inherited from an ancestor type - generic :: x => sub + generic :: x => sub1 !ERROR: Type bound generic procedure 'tbp' may not have the same name as a non-generic symbol inherited from an ancestor type - generic :: tbp => sub - generic :: gen => sub ! ok + generic :: tbp => sub1 + generic :: gen1 => sub1 ! ok + !ERROR: Generic 'gen2' may not have specific procedures 'tbp' and 'sub2' as their interfaces are not distinguishable + generic :: gen2 => sub2 end type contains - subroutine sub + subroutine sub1 + end + subroutine sub2 end end From d2ca6556a3e669d494c370e32f7eb248b2c419e7 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 27 Feb 2025 14:48:13 -0800 Subject: [PATCH 019/123] [flang] Fix a warning This patch fixes: flang/lib/Semantics/check-declarations.cpp:2009:15: error: unused variable 'kind' [-Werror,-Wunused-variable] --- flang/lib/Semantics/check-declarations.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 7732cbff4faef..914d891cd9aa9 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -2006,7 +2006,6 @@ void CheckHelper::CollectSpecifics(DistinguishabilityHelper &helper, void CheckHelper::CheckSpecifics( const Symbol &generic, const GenericDetails &details) { - GenericKind kind{details.kind()}; DistinguishabilityHelper helper{context_}; CollectSpecifics(helper, generic, details); helper.Check(generic.owner()); From 201d3a6368a952529af8d6f32f7012bd60231794 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Feb 2025 14:56:54 -0800 Subject: [PATCH 020/123] [RISCV] Consolidate some DecoderNamespaces for standard extensions. (#128954) First thing to know is that the subtarget feature checks used to block accessing a decoder table are only a performance optimization and not required for functionality. The tables have their own predicate checks. I've removed them from all the standard extension tables. -RV32 Zacas decoder namespace has been renamed to RV32GPRPair, I think Zilsd(rv32 load/store pair) can go in here too. -The RV32 Zdinx table has been renamed to also use RV32GPRPair. -The Zfinx table has been renamed to remove superflous "RV" prefix. -Zcmp and Zcmt tables have been combined into a ZcOverlap table. I think Zclsd(rv32 compressed load/store pair) can go in here too. -All the extra standard extension tables are checked after the main standard extension table. This makes the common case of the main table matching occur earlier. -Zicfiss is the exception to this as it needs to be checked before the main table since it overrides some encodings from Zcmop. This can't be handled by a predicate based priority as Zicfiss only overrides a subset of Zcmop encodings. --- .../RISCV/Disassembler/RISCVDisassembler.cpp | 32 ++++++++----------- llvm/lib/Target/RISCV/RISCVInstrInfoD.td | 4 +-- llvm/lib/Target/RISCV/RISCVInstrInfoF.td | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfoZa.td | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfoZc.td | 12 +++---- llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td | 12 +++---- 6 files changed, 29 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index e99df34908d6e..61deaa827a6df 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -657,16 +657,6 @@ DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size, uint32_t Insn = support::endian::read32le(Bytes.data()); - TRY_TO_DECODE(STI.hasFeature(RISCV::FeatureStdExtZdinx) && - !STI.hasFeature(RISCV::Feature64Bit), - DecoderTableRV32Zdinx32, - "RV32Zdinx (Double in Integer and rv32)"); - TRY_TO_DECODE(STI.hasFeature(RISCV::FeatureStdExtZacas) && - !STI.hasFeature(RISCV::Feature64Bit), - DecoderTableRV32Zacas32, - "RV32Zacas (Compare-And-Swap and rv32)"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZfinx, DecoderTableRVZfinx32, - "RVZfinx (Float in Integer)"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXVentanaCondOps, DecoderTableXVentana32, "XVentanaCondOps"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBa, DecoderTableXTHeadBa32, @@ -721,6 +711,11 @@ DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size, TRY_TO_DECODE_FEATURE_ANY(XRivosFeatureGroup, DecoderTableXRivos32, "Rivos"); TRY_TO_DECODE(true, DecoderTable32, "RISCV32"); + TRY_TO_DECODE(true, DecoderTableRV32GPRPair32, + "RV32GPRPair (rv32 and GPR pairs)"); + TRY_TO_DECODE(true, DecoderTableZfinx32, "Zfinx (Float in Integer)"); + TRY_TO_DECODE(true, DecoderTableZdinxRV32GPRPair32, + "ZdinxRV32GPRPair (rv32 and Double in Integer)"); return MCDisassembler::Fail; } @@ -736,15 +731,6 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size, Size = 2; uint32_t Insn = support::endian::read16le(Bytes.data()); - TRY_TO_DECODE_AND_ADD_SP(!STI.hasFeature(RISCV::Feature64Bit), - DecoderTableRISCV32Only_16, - "RISCV32Only_16 (16-bit Instruction)"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZicfiss, DecoderTableZicfiss16, - "RVZicfiss (Shadow Stack)"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZcmt, DecoderTableRVZcmt16, - "Zcmt (16-bit Table Jump Instructions)"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZcmp, DecoderTableRVZcmp16, - "Zcmp (16-bit Push/Pop & Double Move Instructions)"); TRY_TO_DECODE_FEATURE_ANY(XqciFeatureGroup, DecoderTableXqci16, "Qualcomm uC 16bit"); @@ -753,8 +739,16 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size, "Xqccmp (Qualcomm 16-bit Push/Pop & Double Move Instructions)"); TRY_TO_DECODE_AND_ADD_SP(STI.hasFeature(RISCV::FeatureVendorXwchc), DecoderTableXwchc16, "WCH QingKe XW"); + + // DecoderTableZicfiss16 must be checked before DecoderTable16. + TRY_TO_DECODE(true, DecoderTableZicfiss16, "RVZicfiss (Shadow Stack)"); TRY_TO_DECODE_AND_ADD_SP(true, DecoderTable16, "RISCV_C (16-bit Instruction)"); + TRY_TO_DECODE_AND_ADD_SP(true, DecoderTableRISCV32Only_16, + "RISCV32Only_16 (16-bit Instruction)"); + // Zc* instructions incompatible with Zcf or Zcd. + TRY_TO_DECODE(true, DecoderTableZcOverlap16, + "ZcOverlap (16-bit Instructions overlapping with Zcf/Zcd)"); return MCDisassembler::Fail; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 349bc361c90fe..89254940a87f4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -60,9 +60,9 @@ def FPR64IN32X : RegisterOperand { def DExt : ExtInfo<"", "", [HasStdExtD], f64, FPR64, FPR32, FPR64, ?>; -def ZdinxExt : ExtInfo<"_INX", "RVZfinx", [HasStdExtZdinx, IsRV64], +def ZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZdinx, IsRV64], f64, FPR64INX, FPR32INX, FPR64INX, ?>; -def Zdinx32Ext : ExtInfo<"_IN32X", "RV32Zdinx", [HasStdExtZdinx, IsRV32], +def Zdinx32Ext : ExtInfo<"_IN32X", "ZdinxRV32GPRPair", [HasStdExtZdinx, IsRV32], f64, FPR64IN32X, FPR32INX, FPR64IN32X, ?>; defvar DExts = [DExt, ZdinxExt, Zdinx32Ext]; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 37ac48db06862..04328151adf8e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -116,7 +116,7 @@ class ExtInfo predicates, def FExt : ExtInfo<"", "", [HasStdExtF], f32, FPR32, FPR32, ?, ?>; -def ZfinxExt : ExtInfo<"_INX", "RVZfinx", [HasStdExtZfinx], f32, FPR32INX, FPR32INX, ?, ?>; +def ZfinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZfinx], f32, FPR32INX, FPR32INX, ?, ?>; defvar FExts = [FExt, ZfinxExt]; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td index 1ee78359bc4a5..e903df4d91933 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td @@ -59,7 +59,7 @@ let Predicates = [HasStdExtZacas], IsSignExtendingOpW = 1 in { defm AMOCAS_W : AMO_cas_aq_rl<0b00101, 0b010, "amocas.w", GPR>; } // Predicates = [HasStdExtZacas] -let Predicates = [HasStdExtZacas, IsRV32], DecoderNamespace = "RV32Zacas" in { +let Predicates = [HasStdExtZacas, IsRV32], DecoderNamespace = "RV32GPRPair" in { defm AMOCAS_D_RV32 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPRPairRV32>; } // Predicates = [HasStdExtZacas, IsRV32] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td index b5e3e6a3a8bbf..efed74ca8c870 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td @@ -216,7 +216,7 @@ def C_SH_INX : CStoreH_rri<0b100011, 0b0, "c.sh", GPRF16C>, } // Predicates = [HasStdExtZcb] // Zcmp -let DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp], +let DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmp], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { let Defs = [X10, X11] in def CM_MVA01S : RVInst16CA<0b101011, 0b11, 0b10, (outs), @@ -227,9 +227,9 @@ let Uses = [X10, X11] in def CM_MVSA01 : RVInst16CA<0b101011, 0b01, 0b10, (outs SR07:$rs1, SR07:$rs2), (ins), "cm.mvsa01", "$rs1, $rs2">, Sched<[WriteIALU, WriteIALU, ReadIALU, ReadIALU]>; -} // DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp]... +} // DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmp]... -let DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp] in { +let DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmp] in { let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Uses = [X2], Defs = [X2] in def CM_PUSH : RVInstZcCPPP<0b11000, "cm.push", negstackadj>, Sched<[WriteIALU, ReadIALU, ReadStoreData, ReadStoreData, @@ -258,9 +258,9 @@ def CM_POP : RVInstZcCPPP<0b11010, "cm.pop">, Sched<[WriteIALU, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, WriteLDW, ReadIALU]>; -} // DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp]... +} // DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmp]... -let DecoderNamespace = "RVZcmt", Predicates = [HasStdExtZcmt], +let DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmt], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def CM_JT : RVInst16CJ<0b101, 0b10, (outs), (ins uimm5:$index), "cm.jt", "$index">{ @@ -278,7 +278,7 @@ def CM_JALT : RVInst16CJ<0b101, 0b10, (outs), (ins uimm8ge32:$index), let Inst{12-10} = 0b000; let Inst{9-2} = index; } -} // DecoderNamespace = "RVZcmt", Predicates = [HasStdExtZcmt]... +} // DecoderNamespace = "ZcOverlap", Predicates = [HasStdExtZcmt]... let Predicates = [HasStdExtZcb, HasStdExtZmmul] in{ diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 625011c3b9f7c..ea0b814ac7ba5 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -52,22 +52,22 @@ def ZfhDExt : ExtInfo<"", "", [HasStdExtZfh, HasStdExtD], def ZfhminDExt : ExtInfo<"", "", [HasStdExtZfhmin, HasStdExtD], ?, ?, FPR32, FPR64, FPR16>; -def ZhinxExt : ExtInfo<"_INX", "RVZfinx", +def ZhinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZhinx], f16, FPR16INX, FPR32INX, ?, FPR16INX>; -def ZhinxminExt : ExtInfo<"_INX", "RVZfinx", +def ZhinxminExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZhinxmin], f16, FPR16INX, FPR32INX, ?, FPR16INX>; -def ZhinxZdinxExt : ExtInfo<"_INX", "RVZfinx", +def ZhinxZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZhinx, HasStdExtZdinx, IsRV64], ?, ?, FPR32INX, FPR64INX, FPR16INX>; -def ZhinxminZdinxExt : ExtInfo<"_INX", "RVZfinx", +def ZhinxminZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZhinxmin, HasStdExtZdinx, IsRV64], ?, ?, FPR32INX, FPR64INX, FPR16INX>; -def ZhinxZdinx32Ext : ExtInfo<"_IN32X", "RV32Zdinx", +def ZhinxZdinx32Ext : ExtInfo<"_IN32X", "ZdinxGPRPairRV32", [HasStdExtZhinx, HasStdExtZdinx, IsRV32], ?, ?, FPR32INX, FPR64IN32X, FPR16INX >; -def ZhinxminZdinx32Ext : ExtInfo<"_IN32X", "RV32Zdinx", +def ZhinxminZdinx32Ext : ExtInfo<"_IN32X", "ZdinxGPRPairRV32", [HasStdExtZhinxmin, HasStdExtZdinx, IsRV32], ?, ?, FPR32INX, FPR64IN32X, FPR16INX>; From f210eb065188584ec270168bea90fb6c068052a8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Feb 2025 14:59:17 -0800 Subject: [PATCH 021/123] [RISCV] Reduce dynamic relocations for RISCVOpcodesList table. NFC Inline the strings directly into the table instead of storing a pointer. Similar to what was done for other searchable tables in the last couple months. --- llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 80ff18d914dca..5cc20954fb95b 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -484,8 +484,8 @@ struct SysReg { namespace RISCVInsnOpcode { struct RISCVOpcode { - const char *Name; - unsigned Value; + char Name[10]; + uint8_t Value; }; #define GET_RISCVOpcodesList_DECL From 8f34a0fd48f7ba3b20a92c508efe9d919ab609c2 Mon Sep 17 00:00:00 2001 From: weiguozhi <57237827+weiguozhi@users.noreply.github.com> Date: Thu, 27 Feb 2025 15:40:21 -0800 Subject: [PATCH 022/123] [JumpThreading] Remove deleted BB from Unreachable (#126984) Although an unreachable BB is skipped by processBlock, its successor can still be handled by processBlock, and maybeMergeBasicBlockIntoOnlyPred may merge the two BBs and delete the unreachable BB. Then the garbage pointer is left in Unreachable set. This patch avoids merging a BB into unreachable predecessor. --- llvm/include/llvm/Transforms/Scalar/JumpThreading.h | 4 ++++ llvm/lib/Transforms/Scalar/JumpThreading.cpp | 10 +++++++--- llvm/test/Transforms/JumpThreading/pr62908.ll | 13 ++++++++++++- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 7d11fc0ad6938..84292c716a0a9 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -94,6 +94,10 @@ class JumpThreadingPass : public PassInfoMixin { SmallPtrSet LoopHeaders; #endif + // JumpThreading must not processes blocks unreachable from entry. It's a + // waste of compute time and can potentially lead to hangs. + SmallPtrSet Unreachable; + unsigned BBDupThreshold; unsigned DefaultBBDupThreshold; diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 7b221a814aabd..9cae65bbdcfbc 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -307,12 +307,11 @@ bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_, else BBDupThreshold = DefaultBBDupThreshold; - // JumpThreading must not processes blocks unreachable from entry. It's a - // waste of compute time and can potentially lead to hangs. - SmallPtrSet Unreachable; assert(DTU && "DTU isn't passed into JumpThreading before using it."); assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed."); DominatorTree &DT = DTU->getDomTree(); + + Unreachable.clear(); for (auto &BB : *F) if (!DT.isReachableFromEntry(&BB)) Unreachable.insert(&BB); @@ -1895,6 +1894,11 @@ bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) { SinglePred == BB || hasAddressTakenAndUsed(BB)) return false; + // MergeBasicBlockIntoOnlyPred may delete SinglePred, we need to avoid + // deleting a BB pointer from Unreachable. + if (Unreachable.count(SinglePred)) + return false; + // If SinglePred was a loop header, BB becomes one. if (LoopHeaders.erase(SinglePred)) LoopHeaders.insert(BB); diff --git a/llvm/test/Transforms/JumpThreading/pr62908.ll b/llvm/test/Transforms/JumpThreading/pr62908.ll index 4c389ee040b90..cfb647c509f8e 100644 --- a/llvm/test/Transforms/JumpThreading/pr62908.ll +++ b/llvm/test/Transforms/JumpThreading/pr62908.ll @@ -5,7 +5,18 @@ define i32 @test() { ; CHECK-LABEL: define i32 @test() { -; CHECK-NEXT: end: +; CHECK-NEXT: join.thread: +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: unreachable: +; CHECK-NEXT: [[SH_PROM:%.*]] = zext i32 -1 to i64 +; CHECK-NEXT: [[SHL:%.*]] = shl nsw i64 -1, [[SH_PROM]] +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SHL]] to i32 +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[CONV]], [[UNREACHABLE:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[END]], label [[END]] +; CHECK: end: ; CHECK-NEXT: ret i32 0 ; entry: From 47ec2a391ad2207be0dd9c38b9c1ed53a9e12a6e Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 27 Feb 2025 15:45:55 -0800 Subject: [PATCH 023/123] IR, CodeGen: Add command line flags for dumping instruction addresses and debug locations. As previously discussed [1], it is sometimes useful to be able to see instruction addresses and debug locations as part of IR dumps. The same applies to MachineInstrs which already dump debug locations but not addresses. Therefore add some flags that can be used to enable dumping of this information. [1] https://discourse.llvm.org/t/small-improvement-to-llvm-debugging-experience/79914 Reviewers: rnk Reviewed By: rnk Pull Request: https://github.com/llvm/llvm-project/pull/127944 --- llvm/lib/CodeGen/MachineInstr.cpp | 7 +++++++ llvm/lib/IR/AsmWriter.cpp | 20 ++++++++++++++++++++ llvm/test/Other/print-inst-addrs.ll | 6 ++++++ llvm/test/Other/print-inst-debug-locs.ll | 20 ++++++++++++++++++++ llvm/test/Other/print-mi-addrs.ll | 11 +++++++++++ 5 files changed, 64 insertions(+) create mode 100644 llvm/test/Other/print-inst-addrs.ll create mode 100644 llvm/test/Other/print-inst-debug-locs.ll create mode 100644 llvm/test/Other/print-mi-addrs.ll diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 5860a76c66bff..471666568e79a 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -63,6 +63,10 @@ using namespace llvm; +static cl::opt + PrintMIAddrs("print-mi-addrs", cl::Hidden, + cl::desc("Print addresses of MachineInstrs when dumping")); + static const MachineFunction *getMFIfAvailable(const MachineInstr &MI) { if (const MachineBasicBlock *MBB = MI.getParent()) if (const MachineFunction *MF = MBB->getParent()) @@ -2076,6 +2080,9 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, } // TODO: DBG_LABEL + if (PrintMIAddrs) + OS << " ; " << this; + if (AddNewLine) OS << '\n'; } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 11e5a9cd33260..a52c4d88ac836 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -88,6 +88,14 @@ using namespace llvm; +static cl::opt + PrintInstAddrs("print-inst-addrs", cl::Hidden, + cl::desc("Print addresses of instructions when dumping")); + +static cl::opt PrintInstDebugLocs( + "print-inst-debug-locs", cl::Hidden, + cl::desc("Pretty print debug locations of instructions when dumping")); + // Make virtual table appear in this compilation unit. AssemblyAnnotationWriter::~AssemblyAnnotationWriter() = default; @@ -4256,6 +4264,18 @@ void AssemblyWriter::printInfoComment(const Value &V) { if (AnnotationWriter) { AnnotationWriter->printInfoComment(V, Out); } + + if (PrintInstDebugLocs) { + if (auto *I = dyn_cast(&V)) { + if (I->getDebugLoc()) { + Out << " ; "; + I->getDebugLoc().print(Out); + } + } + } + + if (PrintInstAddrs) + Out << " ; " << &V; } static void maybePrintCallAddrSpace(const Value *Operand, const Instruction *I, diff --git a/llvm/test/Other/print-inst-addrs.ll b/llvm/test/Other/print-inst-addrs.ll new file mode 100644 index 0000000000000..5907b30f0f12c --- /dev/null +++ b/llvm/test/Other/print-inst-addrs.ll @@ -0,0 +1,6 @@ +; RUN: opt -S -print-inst-addrs %s | FileCheck %s + +define void @foo() { + ; CHECK: ret void ; 0x + ret void +} diff --git a/llvm/test/Other/print-inst-debug-locs.ll b/llvm/test/Other/print-inst-debug-locs.ll new file mode 100644 index 0000000000000..93210527e27a7 --- /dev/null +++ b/llvm/test/Other/print-inst-debug-locs.ll @@ -0,0 +1,20 @@ +; RUN: opt -S -print-inst-debug-locs < %s | FileCheck %s + +define weak i32 @foo(i32 %a, i32 %b) !dbg !3 { +entry: + ; CHECK: call {{.*}} ; foo.c:52 + %sum = call i32 @fastadd(i32 %a, i32 %b), !dbg !DILocation(line: 52, scope: !3) + ; CHECK: ret {{.*}} ; foo.c:53 + ret i32 %sum, !dbg !DILocation(line: 53, scope: !3) +} + +declare i32 @fastadd(i32, i32) + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} + +!llvm.dbg.cu = !{!1} +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, emissionKind: FullDebug) +!2 = !DIFile(filename: "foo.c", directory: "/path/to/dir") +!3 = distinct !DISubprogram(file: !2, scope: !2, line: 51, name: "foo", type: !4, unit: !1) +!4 = !DISubroutineType(types: !{}) diff --git a/llvm/test/Other/print-mi-addrs.ll b/llvm/test/Other/print-mi-addrs.ll new file mode 100644 index 0000000000000..5be006d9df282 --- /dev/null +++ b/llvm/test/Other/print-mi-addrs.ll @@ -0,0 +1,11 @@ +; RUN: llc -print-after=slotindexes -print-mi-addrs < %s 2>&1 | FileCheck %s +; REQUIRES: default_triple + +; CHECK: IR Dump {{.*}} +; CHECK: # Machine code for function foo{{.*}} + +define void @foo() { + ; CHECK: ; 0x + ret void +} + From 1f4303ea22dc2c5b127494b6b15866e54d6eaca0 Mon Sep 17 00:00:00 2001 From: Alex MacLean Date: Thu, 27 Feb 2025 16:05:30 -0800 Subject: [PATCH 024/123] [NVPTX] Combine addressing-mode variants of ld, st, wmma (#129102) This change fold together the _ari, _ari64, and _asi variants of these instructions into a single instruction capable of holding any address. This allows for the removal of a lot of unnecessary code and moves us towards a standard way of representing an address in NVPTX. --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 569 +++++------------- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 12 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 190 ++---- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 206 +++---- .../Target/NVPTX/NVPTXReplaceImageHandles.cpp | 2 +- .../NVPTX/expected-floating-point-literal.mir | 2 +- .../floating-point-immediate-operands.mir | 8 +- .../floating-point-invalid-type-error.mir | 2 +- 8 files changed, 296 insertions(+), 695 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 9bf346b916f8f..8a5cdd7412bf3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1078,8 +1078,6 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) { return tryLDGLDU(N); } - unsigned int PointerSize = - CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace()); SDLoc DL(N); SDValue Chain = N->getOperand(0); @@ -1112,37 +1110,24 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { FromType = getLdStRegType(ScalarVT); // Create the machine instruction DAG - SDValue N1 = N->getOperand(1); SDValue Offset, Base; - std::optional Opcode; - MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; - - SmallVector Ops({getI32Imm(Ordering, DL), getI32Imm(Scope, DL), - getI32Imm(CodeAddrSpace, DL), - getI32Imm(VecType, DL), getI32Imm(FromType, DL), - getI32Imm(FromTypeWidth, DL)}); - - if (SelectADDRsi(N1.getNode(), N1, Base, Offset)) { - Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi, - NVPTX::LD_i32_asi, NVPTX::LD_i64_asi, - NVPTX::LD_f32_asi, NVPTX::LD_f64_asi); - } else { - if (PointerSize == 64) { - SelectADDRri64(N1.getNode(), N1, Base, Offset); - Opcode = - pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64, - NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, - NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64); - } else { - SelectADDRri(N1.getNode(), N1, Base, Offset); - Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, - NVPTX::LD_i32_ari, NVPTX::LD_i64_ari, - NVPTX::LD_f32_ari, NVPTX::LD_f64_ari); - } - } + SelectADDR(N->getOperand(1), Base, Offset); + SDValue Ops[] = {getI32Imm(Ordering, DL), + getI32Imm(Scope, DL), + getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), + getI32Imm(FromType, DL), + getI32Imm(FromTypeWidth, DL), + Base, + Offset, + Chain}; + + const MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; + const std::optional Opcode = + pickOpcodeForVT(TargetVT, NVPTX::LD_i8, NVPTX::LD_i16, NVPTX::LD_i32, + NVPTX::LD_i64, NVPTX::LD_f32, NVPTX::LD_f64); if (!Opcode) return false; - Ops.append({Base, Offset, Chain}); SDNode *NVPTXLD = CurDAG->getMachineNode(*Opcode, DL, TargetVT, MVT::Other, Ops); @@ -1178,8 +1163,6 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) { return tryLDGLDU(N); } - unsigned int PointerSize = - CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); SDLoc DL(N); SDValue Chain = N->getOperand(0); @@ -1227,77 +1210,38 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { FromTypeWidth = 32; } - SDValue Op1 = N->getOperand(1); SDValue Offset, Base; - std::optional Opcode; - SDNode *LD; + SelectADDR(N->getOperand(1), Base, Offset); + SDValue Ops[] = {getI32Imm(Ordering, DL), + getI32Imm(Scope, DL), + getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), + getI32Imm(FromType, DL), + getI32Imm(FromTypeWidth, DL), + Base, + Offset, + Chain}; - SmallVector Ops({getI32Imm(Ordering, DL), getI32Imm(Scope, DL), - getI32Imm(CodeAddrSpace, DL), - getI32Imm(VecType, DL), getI32Imm(FromType, DL), - getI32Imm(FromTypeWidth, DL)}); - - if (SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi, - NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi, - NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi); - break; - case NVPTXISD::LoadV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi, - NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, - std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt); - break; - } - } else { - if (PointerSize == 64) { - SelectADDRri64(Op1.getNode(), Op1, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::LoadV2: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64, - NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64, - NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64); - break; - case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64, - NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt, - NVPTX::LDV_f32_v4_ari_64, std::nullopt); - break; - } - } else { - SelectADDRri(Op1.getNode(), Op1, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari, - NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari, - NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari); - break; - case NVPTXISD::LoadV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari, - NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, - std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt); - break; - } - } + std::optional Opcode; + switch (N->getOpcode()) { + default: + return false; + case NVPTXISD::LoadV2: + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2, + NVPTX::LDV_i16_v2, NVPTX::LDV_i32_v2, NVPTX::LDV_i64_v2, + NVPTX::LDV_f32_v2, NVPTX::LDV_f64_v2); + break; + case NVPTXISD::LoadV4: + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4, + NVPTX::LDV_i16_v4, NVPTX::LDV_i32_v4, std::nullopt, + NVPTX::LDV_f32_v4, std::nullopt); + break; } if (!Opcode) return false; - Ops.append({Base, Offset, Chain}); - LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); + + SDNode *LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); MachineMemOperand *MemRef = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(LD), {MemRef}); @@ -1344,177 +1288,60 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { SDVTList InstVTList = CurDAG->getVTList(InstVTs); SDValue Chain = N->getOperand(0); - std::optional Opcode; - SDLoc DL(N); - SDNode *LD; SDValue Base, Offset; + SelectADDR(Op1, Base, Offset); + SDValue Ops[] = {Base, Offset, Chain}; - if (SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { - switch (N->getOpcode()) { - default: - return false; - case ISD::LOAD: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8asi, - NVPTX::INT_PTX_LDG_GLOBAL_i16asi, NVPTX::INT_PTX_LDG_GLOBAL_i32asi, - NVPTX::INT_PTX_LDG_GLOBAL_i64asi, NVPTX::INT_PTX_LDG_GLOBAL_f32asi, - NVPTX::INT_PTX_LDG_GLOBAL_f64asi); - break; - case ISD::INTRINSIC_W_CHAIN: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8asi, - NVPTX::INT_PTX_LDU_GLOBAL_i16asi, NVPTX::INT_PTX_LDU_GLOBAL_i32asi, - NVPTX::INT_PTX_LDU_GLOBAL_i64asi, NVPTX::INT_PTX_LDU_GLOBAL_f32asi, - NVPTX::INT_PTX_LDU_GLOBAL_f64asi); - break; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v2i8_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2i16_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2i32_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2i64_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2f32_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2f64_ELE_asi); - break; - case NVPTXISD::LDUV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v2i8_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2i16_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2i32_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2i64_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2f32_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2f64_ELE_asi); - break; - case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_asi, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_asi, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_asi, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_asi, std::nullopt); - break; - case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_asi, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_asi, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_asi, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_asi, std::nullopt); - break; - } - } else { - if (TM.is64Bit()) { - SelectADDRri64(Op1.getNode(), Op1, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case ISD::LOAD: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_GLOBAL_i8ari64, - NVPTX::INT_PTX_LDG_GLOBAL_i16ari64, - NVPTX::INT_PTX_LDG_GLOBAL_i32ari64, - NVPTX::INT_PTX_LDG_GLOBAL_i64ari64, - NVPTX::INT_PTX_LDG_GLOBAL_f32ari64, - NVPTX::INT_PTX_LDG_GLOBAL_f64ari64); - break; - case ISD::INTRINSIC_W_CHAIN: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_GLOBAL_i8ari64, - NVPTX::INT_PTX_LDU_GLOBAL_i16ari64, - NVPTX::INT_PTX_LDU_GLOBAL_i32ari64, - NVPTX::INT_PTX_LDU_GLOBAL_i64ari64, - NVPTX::INT_PTX_LDU_GLOBAL_f32ari64, - NVPTX::INT_PTX_LDU_GLOBAL_f64ari64); - break; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64); - break; - case NVPTXISD::LDUV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64); - break; - case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt); - break; - case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt); - break; - } - } else { - SelectADDRri(Op1.getNode(), Op1, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case ISD::LOAD: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8ari, - NVPTX::INT_PTX_LDG_GLOBAL_i16ari, NVPTX::INT_PTX_LDG_GLOBAL_i32ari, - NVPTX::INT_PTX_LDG_GLOBAL_i64ari, NVPTX::INT_PTX_LDG_GLOBAL_f32ari, - NVPTX::INT_PTX_LDG_GLOBAL_f64ari); - break; - case ISD::INTRINSIC_W_CHAIN: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8ari, - NVPTX::INT_PTX_LDU_GLOBAL_i16ari, NVPTX::INT_PTX_LDU_GLOBAL_i32ari, - NVPTX::INT_PTX_LDU_GLOBAL_i64ari, NVPTX::INT_PTX_LDU_GLOBAL_f32ari, - NVPTX::INT_PTX_LDU_GLOBAL_f64ari); - break; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32); - break; - case NVPTXISD::LDUV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32); - break; - case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt); - break; - case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt); - break; - } - } + std::optional Opcode; + switch (N->getOpcode()) { + default: + return false; + case ISD::LOAD: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8, + NVPTX::INT_PTX_LDG_GLOBAL_i16, NVPTX::INT_PTX_LDG_GLOBAL_i32, + NVPTX::INT_PTX_LDG_GLOBAL_i64, NVPTX::INT_PTX_LDG_GLOBAL_f32, + NVPTX::INT_PTX_LDG_GLOBAL_f64); + break; + case ISD::INTRINSIC_W_CHAIN: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8, + NVPTX::INT_PTX_LDU_GLOBAL_i16, NVPTX::INT_PTX_LDU_GLOBAL_i32, + NVPTX::INT_PTX_LDU_GLOBAL_i64, NVPTX::INT_PTX_LDU_GLOBAL_f32, + NVPTX::INT_PTX_LDU_GLOBAL_f64); + break; + case NVPTXISD::LoadV2: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE, + NVPTX::INT_PTX_LDG_G_v2i16_ELE, NVPTX::INT_PTX_LDG_G_v2i32_ELE, + NVPTX::INT_PTX_LDG_G_v2i64_ELE, NVPTX::INT_PTX_LDG_G_v2f32_ELE, + NVPTX::INT_PTX_LDG_G_v2f64_ELE); + break; + case NVPTXISD::LDUV2: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v2i8_ELE, + NVPTX::INT_PTX_LDU_G_v2i16_ELE, NVPTX::INT_PTX_LDU_G_v2i32_ELE, + NVPTX::INT_PTX_LDU_G_v2i64_ELE, NVPTX::INT_PTX_LDU_G_v2f32_ELE, + NVPTX::INT_PTX_LDU_G_v2f64_ELE); + break; + case NVPTXISD::LoadV4: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE, + NVPTX::INT_PTX_LDG_G_v4i16_ELE, NVPTX::INT_PTX_LDG_G_v4i32_ELE, + std::nullopt, NVPTX::INT_PTX_LDG_G_v4f32_ELE, std::nullopt); + break; + case NVPTXISD::LDUV4: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE, + NVPTX::INT_PTX_LDU_G_v4i16_ELE, NVPTX::INT_PTX_LDU_G_v4i32_ELE, + std::nullopt, NVPTX::INT_PTX_LDU_G_v4f32_ELE, std::nullopt); + break; } if (!Opcode) return false; - SDValue Ops[] = {Base, Offset, Chain}; - LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); + + SDLoc DL(N); + SDNode *LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); // For automatic generation of LDG (through SelectLoad[Vector], not the // intrinsics), we may have an extending load like: @@ -1572,8 +1399,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(ST); - unsigned int PointerSize = - CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace()); SDLoc DL(N); SDValue Chain = ST->getChain(); @@ -1598,38 +1423,28 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { // Create the machine instruction DAG SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal(); - SDValue BasePtr = ST->getBasePtr(); + SDValue Offset, Base; - std::optional Opcode; - MVT::SimpleValueType SourceVT = + SelectADDR(ST->getBasePtr(), Base, Offset); + + SDValue Ops[] = {Value, + getI32Imm(Ordering, DL), + getI32Imm(Scope, DL), + getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), + getI32Imm(ToType, DL), + getI32Imm(ToTypeWidth, DL), + Base, + Offset, + Chain}; + + const MVT::SimpleValueType SourceVT = Value.getNode()->getSimpleValueType(0).SimpleTy; - - SmallVector Ops( - {Value, getI32Imm(Ordering, DL), getI32Imm(Scope, DL), - getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), - getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)}); - - if (SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) { - Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi, - NVPTX::ST_i32_asi, NVPTX::ST_i64_asi, - NVPTX::ST_f32_asi, NVPTX::ST_f64_asi); - } else { - if (PointerSize == 64) { - SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset); - Opcode = - pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64, - NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, - NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64); - } else { - SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset); - Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari, - NVPTX::ST_i32_ari, NVPTX::ST_i64_ari, - NVPTX::ST_f32_ari, NVPTX::ST_f64_ari); - } - } + const std::optional Opcode = + pickOpcodeForVT(SourceVT, NVPTX::ST_i8, NVPTX::ST_i16, NVPTX::ST_i32, + NVPTX::ST_i64, NVPTX::ST_f32, NVPTX::ST_f64); if (!Opcode) return false; - Ops.append({Base, Offset, Chain}); SDNode *NVPTXST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops); @@ -1644,9 +1459,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { SDValue Op1 = N->getOperand(1); - SDValue Offset, Base; - std::optional Opcode; - SDNode *ST; EVT EltVT = Op1.getValueType(); MemSDNode *MemSD = cast(N); EVT StoreVT = MemSD->getMemoryVT(); @@ -1657,8 +1469,6 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { report_fatal_error("Cannot store to pointer that points to constant " "memory space"); } - unsigned int PointerSize = - CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); SDLoc DL(N); SDValue Chain = N->getOperand(0); @@ -1697,72 +1507,35 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { ToTypeWidth = 32; } + SDValue Offset, Base; + SelectADDR(N2, Base, Offset); + Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL), getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), - getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)}); + getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL), Base, Offset, + Chain}); - if (SelectADDRsi(N2.getNode(), N2, Base, Offset)) { - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::StoreV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi, - NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi, - NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi); - break; - case NVPTXISD::StoreV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi, - NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, - std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt); - break; - } - } else { - if (PointerSize == 64) { - SelectADDRri64(N2.getNode(), N2, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::StoreV2: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64, - NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64, - NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64); - break; - case NVPTXISD::StoreV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64, - NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt, - NVPTX::STV_f32_v4_ari_64, std::nullopt); - break; - } - } else { - SelectADDRri(N2.getNode(), N2, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::StoreV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari, - NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari, - NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari); - break; - case NVPTXISD::StoreV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari, - NVPTX::STV_i32_v4_ari, std::nullopt, - NVPTX::STV_f32_v4_ari, std::nullopt); - break; - } - } + std::optional Opcode; + switch (N->getOpcode()) { + default: + return false; + case NVPTXISD::StoreV2: + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2, + NVPTX::STV_i16_v2, NVPTX::STV_i32_v2, NVPTX::STV_i64_v2, + NVPTX::STV_f32_v2, NVPTX::STV_f64_v2); + break; + case NVPTXISD::StoreV4: + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4, + NVPTX::STV_i16_v4, NVPTX::STV_i32_v4, std::nullopt, + NVPTX::STV_f32_v4, std::nullopt); + break; } + if (!Opcode) return false; - Ops.append({Base, Offset, Chain}); - ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops); + SDNode *ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops); MachineMemOperand *MemRef = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(ST), {MemRef}); @@ -2413,27 +2186,28 @@ static inline bool isAddLike(const SDValue V) { (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint()); } -// SelectDirectAddr - Match a direct address for DAG. -// A direct address could be a globaladdress or externalsymbol. -bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { +// selectBaseADDR - Match a dag node which will serve as the base address for an +// ADDR operand pair. +static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) { // Return true if TGA or ES. if (N.getOpcode() == ISD::TargetGlobalAddress || - N.getOpcode() == ISD::TargetExternalSymbol) { - Address = N; - return true; - } - if (N.getOpcode() == NVPTXISD::Wrapper) { - Address = N.getOperand(0); - return true; - } + N.getOpcode() == ISD::TargetExternalSymbol) + return N; + + if (N.getOpcode() == NVPTXISD::Wrapper) + return N.getOperand(0); + // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol - if (AddrSpaceCastSDNode *CastN = dyn_cast(N)) { + if (AddrSpaceCastSDNode *CastN = dyn_cast(N)) if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC && CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM && CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam) - return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address); - } - return false; + return selectBaseADDR(CastN->getOperand(0).getOperand(0), DAG); + + if (auto *FIN = dyn_cast(N)) + return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0)); + + return N; } static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) { @@ -2454,37 +2228,17 @@ static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) { MVT::i32); } -// symbol+offset -bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset) { - Offset = accumulateOffset(Addr, SDLoc(OpNode), CurDAG); - return SelectDirectAddr(Addr, Base); -} - -// register+offset -void NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset, - MVT VT) { - - Offset = accumulateOffset(Addr, SDLoc(OpNode), CurDAG); - if (auto *FIN = dyn_cast(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); - return; - } - Base = Addr; -} - -// register+offset -bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset) { - SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); - return true; -} - -// register+offset -bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset) { - SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); +// Select a pair of operands which represent a valid PTX address, this could be +// one of the following things: +// - [var] - Offset is simply set to 0 +// - [reg] - Offset is simply set to 0 +// - [reg+immOff] +// - [var+immOff] +// Note that immOff must fit into a 32-bit signed integer. +bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base, + SDValue &Offset) { + Offset = accumulateOffset(Addr, SDLoc(Addr), CurDAG); + Base = selectBaseADDR(Addr, CurDAG); return true; } @@ -2513,12 +2267,7 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( default: return true; case InlineAsm::ConstraintCode::m: // memory - if (SelectDirectAddr(Op, Op0)) { - OutOps.push_back(Op0); - OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); - return false; - } - if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { + if (SelectADDR(Op, Op0, Op1)) { OutOps.push_back(Op0); OutOps.push_back(Op1); return false; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index eb0c6fe982688..42891b8ca8d8d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -106,17 +106,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { return CurDAG->getTargetConstant(Imm, DL, MVT::i32); } - // Match direct address complex pattern. - bool SelectDirectAddr(SDValue N, SDValue &Address); - - void SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base, - SDValue &Offset, MVT VT); - bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base, - SDValue &Offset); - bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base, - SDValue &Offset); - bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base, - SDValue &Offset); + bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset); bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 6a0f708021a16..36a0a06bdb8aa 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1917,27 +1917,15 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>; // Data Movement (Load / Store, Move) //----------------------------------- -let WantsRoot = true in { - def ADDRri : ComplexPattern; - def ADDRri64 : ComplexPattern; -} -def ADDRvar : ComplexPattern; +def addr : ComplexPattern; -def MEMri : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops Int32Regs, i32imm); -} -def MEMri64 : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops Int64Regs, i64imm); -} - -def imem : Operand { +def ADDR_base : Operand { let PrintMethod = "printOperand"; } -def imemAny : Operand { - let PrintMethod = "printOperand"; +def ADDR : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops ADDR_base, i32imm); } def LdStCode : Operand { @@ -1956,10 +1944,10 @@ def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; // Load a memory address into a u32 or u64 register. -def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), +def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR_base:$a), "mov.u32 \t$dst, $a;", [(set i32:$dst, (Wrapper tglobaladdr:$a))]>; -def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), +def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR_base:$a), "mov.u64 \t$dst, $a;", [(set i64:$dst, (Wrapper tglobaladdr:$a))]>; @@ -2021,12 +2009,17 @@ def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>; //---- Copy Frame Index ---- -def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), - "add.u32 \t$dst, ${addr:add};", - [(set i32:$dst, ADDRri:$addr)]>; -def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), - "add.u64 \t$dst, ${addr:add};", - [(set i64:$dst, ADDRri64:$addr)]>; +def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR:$addr), + "add.u32 \t$dst, ${addr:add};", []>; +def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR:$addr), + "add.u64 \t$dst, ${addr:add};", []>; + +def to_tframeindex : SDNodeXFormgetTargetFrameIndex(N->getIndex(), N->getValueType(0)); +}]>; + +def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>; +def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>; //----------------------------------- // Comparison and Selection @@ -2660,7 +2653,7 @@ def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", [(LastCallArg (i32 1), (i32 imm:$a))]>; -def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ", +def CallVoidInst : NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ", [(CallVoid (Wrapper tglobaladdr:$addr))]>; def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ", [(CallVoid i32:$addr)]>; @@ -2753,109 +2746,56 @@ foreach vt = [v2f16, v2bf16, v2i16, v4i8] in { // // Load / Store Handling // -multiclass LD { - def _ari : NVPTXInst< +class LD + : NVPTXInst< (outs regclass:$dst), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr$offset];", []>; - def _ari_64 : NVPTXInst< - (outs regclass:$dst), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr$offset];", []>; - def _asi : NVPTXInst< - (outs regclass:$dst), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), + i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr$offset];", []>; -} + "\t$dst, [$addr];", []>; let mayLoad=1, hasSideEffects=0 in { - defm LD_i8 : LD; - defm LD_i16 : LD; - defm LD_i32 : LD; - defm LD_i64 : LD; - defm LD_f32 : LD; - defm LD_f64 : LD; + def LD_i8 : LD; + def LD_i16 : LD; + def LD_i32 : LD; + def LD_i64 : LD; + def LD_f32 : LD; + def LD_f64 : LD; } -multiclass ST { - def _ari : NVPTXInst< +class ST + : NVPTXInst< (outs), (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, - Offseti32imm:$offset), + LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr$offset], $src;", []>; - def _ari_64 : NVPTXInst< - (outs), - (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, - Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr$offset], $src;", []>; - def _asi : NVPTXInst< - (outs), - (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr, - Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr$offset], $src;", []>; -} + " \t[$addr], $src;", []>; let mayStore=1, hasSideEffects=0 in { - defm ST_i8 : ST; - defm ST_i16 : ST; - defm ST_i32 : ST; - defm ST_i64 : ST; - defm ST_f32 : ST; - defm ST_f64 : ST; + def ST_i8 : ST; + def ST_i16 : ST; + def ST_i32 : ST; + def ST_i64 : ST; + def ST_f32 : ST; + def ST_f64 : ST; } // The following is used only in and after vector elementizations. Vector // elementization happens at the machine instruction level, so the following // instructions never appear in the DAG. multiclass LD_VEC { - def _v2_ari : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr$offset];", []>; - def _v2_ari_64 : NVPTXInst< + def _v2 : NVPTXInst< (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr$offset];", []>; - def _v2_asi : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr$offset];", []>; - def _v4_ari : NVPTXInst< + "\t{{$dst1, $dst2}}, [$addr];", []>; + def _v4 : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; - def _v4_ari_64 : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; - def _v4_asi : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; } let mayLoad=1, hasSideEffects=0 in { defm LDV_i8 : LD_VEC; @@ -2867,48 +2807,20 @@ let mayLoad=1, hasSideEffects=0 in { } multiclass ST_VEC { - def _v2_ari : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, - LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - Int32Regs:$addr, Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2}};", []>; - def _v2_ari_64 : NVPTXInst< + def _v2 : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - Int64Regs:$addr, Offseti32imm:$offset), + ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2}};", []>; - def _v2_asi : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, - LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - imem:$addr, Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2}};", []>; - def _v4_ari : NVPTXInst< + "\t[$addr], {{$src1, $src2}};", []>; + def _v4 : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; - def _v4_ari_64 : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; - def _v4_asi : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}" - "$fromWidth \t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; + "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; } let mayStore=1, hasSideEffects=0 in { diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index bcb3f05f3f8a8..7d7e69adafcd0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2728,65 +2728,46 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; // Scalar -multiclass LDU_G { - def asi: NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset), - "ldu.global." # TyStr # " \t$result, [$src$offset];", - []>, Requires<[hasLDU]>; - def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), +class LDU_G + : NVPTXInst<(outs regclass:$result), (ins ADDR:$src), "ldu.global." # TyStr # " \t$result, [$src];", []>, Requires<[hasLDU]>; - def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), - "ldu.global." # TyStr # " \t$result, [$src];", - []>, Requires<[hasLDU]>; -} -defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8", Int16Regs>; -defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16", Int16Regs>; -defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32", Int32Regs>; -defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64", Int64Regs>; -defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32", Float32Regs>; -defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64", Float64Regs>; +def INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8", Int16Regs>; +def INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16", Int16Regs>; +def INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32", Int32Regs>; +def INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64", Int64Regs>; +def INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32", Float32Regs>; +def INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64", Float64Regs>; // vector // Elementized vector ldu -multiclass VLDU_G_ELE_V2 { - def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins MEMri:$src), - "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; - def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins MEMri64:$src), +class VLDU_G_ELE_V2 + : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins ADDR:$src), "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; - def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins imemAny:$src, Offseti32imm:$offset), - "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];", []>; -} -multiclass VLDU_G_ELE_V4 { - def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri:$src), - "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; - def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri64:$src), + +class VLDU_G_ELE_V4 + : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins ADDR:$src), "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; - def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset), - "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];", []>; -} -defm INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"u8", Int16Regs>; -defm INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"u16", Int16Regs>; -defm INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"u32", Int32Regs>; -defm INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"f32", Float32Regs>; -defm INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"u64", Int64Regs>; -defm INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"f64", Float64Regs>; -defm INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"u8", Int16Regs>; -defm INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"u16", Int16Regs>; -defm INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"u32", Int32Regs>; -defm INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>; -defm INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>; -defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>; +def INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"u8", Int16Regs>; +def INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"u16", Int16Regs>; +def INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"u32", Int32Regs>; +def INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"f32", Float32Regs>; +def INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"u64", Int64Regs>; +def INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"f64", Float64Regs>; + +def INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"u8", Int16Regs>; +def INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"u16", Int16Regs>; +def INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"u32", Int32Regs>; +def INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>; +def INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>; +def INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>; //----------------------------------- @@ -2797,64 +2778,44 @@ defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>; // non-coherent texture cache, and therefore the values read must be read-only // during the lifetime of the kernel. -multiclass LDG_G { - def asi: NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset), - "ld.global.nc." # TyStr # " \t$result, [$src$offset];", - []>, Requires<[hasLDG]>; - def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), - "ld.global.nc." # TyStr # " \t$result, [$src];", - []>, Requires<[hasLDG]>; - def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), +class LDG_G + : NVPTXInst<(outs regclass:$result), (ins ADDR:$src), "ld.global.nc." # TyStr # " \t$result, [$src];", []>, Requires<[hasLDG]>; -} -defm INT_PTX_LDG_GLOBAL_i8 : LDG_G<"u8", Int16Regs>; -defm INT_PTX_LDG_GLOBAL_i16 : LDG_G<"u16", Int16Regs>; -defm INT_PTX_LDG_GLOBAL_i32 : LDG_G<"u32", Int32Regs>; -defm INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64", Int64Regs>; -defm INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32", Float32Regs>; -defm INT_PTX_LDG_GLOBAL_f64 : LDG_G<"f64", Float64Regs>; +def INT_PTX_LDG_GLOBAL_i8 : LDG_G<"u8", Int16Regs>; +def INT_PTX_LDG_GLOBAL_i16 : LDG_G<"u16", Int16Regs>; +def INT_PTX_LDG_GLOBAL_i32 : LDG_G<"u32", Int32Regs>; +def INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64", Int64Regs>; +def INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32", Float32Regs>; +def INT_PTX_LDG_GLOBAL_f64 : LDG_G<"f64", Float64Regs>; // vector // Elementized vector ldg -multiclass VLDG_G_ELE_V2 { - def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins MEMri:$src), - "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; - def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins MEMri64:$src), - "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; - def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins imemAny:$src, Offseti32imm:$offset), - "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];", []>; -} - -multiclass VLDG_G_ELE_V4 { - def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri:$src), - "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; - def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri64:$src), - "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; - def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset), - "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];", []>; -} +class VLDG_G_ELE_V2 : + NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins ADDR:$src), + "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; + + +class VLDG_G_ELE_V4 : + NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), + (ins ADDR:$src), + "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. -defm INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"u8", Int16Regs>; -defm INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"u16", Int16Regs>; -defm INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"u32", Int32Regs>; -defm INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"f32", Float32Regs>; -defm INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"u64", Int64Regs>; -defm INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"f64", Float64Regs>; +def INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"u8", Int16Regs>; +def INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"u16", Int16Regs>; +def INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"u32", Int32Regs>; +def INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"f32", Float32Regs>; +def INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"u64", Int64Regs>; +def INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"f64", Float64Regs>; -defm INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"u8", Int16Regs>; -defm INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"u16", Int16Regs>; -defm INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>; -defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>; +def INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"u8", Int16Regs>; +def INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"u16", Int16Regs>; +def INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>; +def INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>; multiclass NG_TO_G { @@ -2929,17 +2890,17 @@ def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), // @TODO: Are these actually needed, or will we always just see symbols // copied to registers first? -/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), +/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins ADDR_base:$s), "mov.u32 \t$r, $s;", [(set Int32Regs:$r, (int_nvvm_move_ptr texternalsym:$s))]>; -def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), +def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins ADDR_base:$s), "mov.u64 \t$r, $s;", [(set Int64Regs:$r, (int_nvvm_move_ptr texternalsym:$s))]>;*/ def texsurf_handles - : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), + : NVPTXInst<(outs Int64Regs:$result), (ins ADDR_base:$src), "mov.u64 \t$result, $src;", []>; //----------------------------------- @@ -7223,20 +7184,16 @@ class WMMA_REGINFO class BuildPatternI { // Build a dag pattern that matches the intrinsic call. dag ret = !foreach(tmp, Ins, - !subst(imem, ADDRvar, - !subst(MEMri64, ADDRri64, - !subst(MEMri, ADDRri, - !subst(ins, Intr, tmp))))); + !subst(ADDR, addr, + !subst(ins, Intr, tmp))); } // Same as above, but uses PatFrag instead of an Intrinsic. class BuildPatternPF { // Build a dag pattern that matches the intrinsic call. dag ret = !foreach(tmp, Ins, - !subst(imem, ADDRvar, - !subst(MEMri64, ADDRri64, - !subst(MEMri, ADDRri, - !subst(ins, Intr, tmp))))); + !subst(ADDR, addr, + !subst(ins, Intr, tmp))); } // Common WMMA-related fields used for building patterns for all MMA instructions. @@ -7253,10 +7210,9 @@ class WMMA_INSTR _Args> // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] // -class WMMA_LOAD +class WMMA_LOAD : WMMA_INSTR.record, - [!con((ins SrcOp:$src), + [!con((ins ADDR:$src), !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, Requires { // Load/store intrinsics are overloaded on pointer's address space. @@ -7293,9 +7249,9 @@ class WMMA_LOAD + bit WithStride> : WMMA_INSTR.record, - [!con((ins DstOp:$dst), + [!con((ins ADDR:$dst), Frag.Ins, !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, Requires { @@ -7334,14 +7290,12 @@ defset list MMA_LDSTs = { foreach layout = ["row", "col"] in { foreach stride = [false, true] in { foreach space = [".global", ".shared", ""] in { - foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { - foreach frag = NVVM_MMA_OPS.all_ld_ops in - if NVVM_WMMA_LDST_SUPPORTED.ret then - def : WMMA_LOAD, layout, space, stride, addr>; - foreach frag = NVVM_MMA_OPS.all_st_ops in - if NVVM_WMMA_LDST_SUPPORTED.ret then - def : WMMA_STORE_D, layout, space, stride, addr>; - } // addr + foreach frag = NVVM_MMA_OPS.all_ld_ops in + if NVVM_WMMA_LDST_SUPPORTED.ret then + def : WMMA_LOAD, layout, space, stride>; + foreach frag = NVVM_MMA_OPS.all_st_ops in + if NVVM_WMMA_LDST_SUPPORTED.ret then + def : WMMA_STORE_D, layout, space, stride>; } // space } // stride } // layout @@ -7468,9 +7422,8 @@ defset list MMAs = { // // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 // -class LDMATRIX - : WMMA_INSTR.record, [(ins SrcOp:$src)]>, +class LDMATRIX + : WMMA_INSTR.record, [(ins ADDR:$src)]>, Requires { // Build PatFrag that only matches particular address space. PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), @@ -7494,12 +7447,9 @@ class LDMATRIX LDMATRIXs = { foreach transposed = [false, true] in { foreach space = [".shared", ""] in { - foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { - foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in - if NVVM_LDMATRIX_SUPPORTED.ret then - def : LDMATRIX, transposed, space, - addr>; - } // addr + foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in + if NVVM_LDMATRIX_SUPPORTED.ret then + def : LDMATRIX, transposed, space>; } // space } // transposed } // defset diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index 4971d31691c54..46e4a905aa09a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -1800,7 +1800,7 @@ bool NVPTXReplaceImageHandles::replaceImageHandle(MachineOperand &Op, MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg()); switch (TexHandleDef.getOpcode()) { - case NVPTX::LD_i64_asi: { + case NVPTX::LD_i64: { // The handle is a parameter value being loaded, replace with the // parameter symbol const auto &TM = static_cast(MF.getTarget()); diff --git a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir index 62ede3b9eef3b..400bff47c8f2e 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir @@ -16,7 +16,7 @@ registers: - { id: 1, class: float32regs } body: | bb.0.entry: - %0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0 + %0 = LD_f32 0, 4, 1, 2, 32, &test_param_0, 0 ; CHECK: [[@LINE+1]]:33: expected a floating point literal %1 = FADD_rnf32ri %0, float 3 StoreRetvalF32 %1, 0 diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir index 69c1e25a06024..486c6ca16a531 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir @@ -40,9 +40,9 @@ registers: - { id: 7, class: float32regs } body: | bb.0.entry: - %0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test_param_0, 0 + %0 = LD_f32 0, 0, 4, 1, 2, 32, &test_param_0, 0 %1 = CVT_f64_f32 %0, 0 - %2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test_param_1, 0 + %2 = LD_i32 0, 0, 4, 1, 0, 32, &test_param_1, 0 ; CHECK: %3:float64regs = FADD_rnf64ri %1, double 3.250000e+00 %3 = FADD_rnf64ri %1, double 3.250000e+00 %4 = CVT_f32_f64 %3, 5 @@ -66,9 +66,9 @@ registers: - { id: 7, class: float32regs } body: | bb.0.entry: - %0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test2_param_0, 0 + %0 = LD_f32 0, 0, 4, 1, 2, 32, &test2_param_0, 0 %1 = CVT_f64_f32 %0, 0 - %2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test2_param_1, 0 + %2 = LD_i32 0, 0, 4, 1, 0, 32, &test2_param_1, 0 ; CHECK: %3:float64regs = FADD_rnf64ri %1, double 0x7FF8000000000000 %3 = FADD_rnf64ri %1, double 0x7FF8000000000000 %4 = CVT_f32_f64 %3, 5 diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir index cc9a36509db33..114b0f9702033 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir @@ -16,7 +16,7 @@ registers: - { id: 1, class: float32regs } body: | bb.0.entry: - %0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0 + %0 = LD_f32 0, 4, 1, 2, 32, &test_param_0, 0 ; CHECK: [[@LINE+1]]:33: floating point constant does not have type 'float' %1 = FADD_rnf32ri %0, float 0xH3C00 StoreRetvalF32 %1, 0 From 77b96fe9b6874e834f40241abc40d8682a0d337a Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Thu, 27 Feb 2025 16:06:19 -0800 Subject: [PATCH 025/123] [MCA][RISCV] Mark one of the internal CustomBehavior functions static. NFC This function is only used in the same file. --- llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp index 0881de90700ab..289f9aa51195d 100644 --- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp +++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp @@ -205,7 +205,7 @@ getEEWAndEMUL(unsigned Opcode, RISCVVType::VLMUL LMUL, uint8_t SEW) { return std::make_pair(EEW, *EMUL); } -bool opcodeHasEEWAndEMULInfo(unsigned short Opcode) { +static bool opcodeHasEEWAndEMULInfo(unsigned short Opcode) { return Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V || Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V || Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V || From f9af10aaa109f70f201ea3fd876215f6bef11f2a Mon Sep 17 00:00:00 2001 From: YongKang Zhu Date: Thu, 27 Feb 2025 16:13:57 -0800 Subject: [PATCH 026/123] [BOLT][instr] Avoid WX segment (#128982) BOLT instrumented binary today has a readable (R), writeable (W) and also executable (X) segment, which Android system won't load due to its WX attribute. Such RWX segment was produced because BOLT has a two step linking, first for everything in the updated or rewritten input binary and next for runtime library. Each linking will layout sections in the order of RX sections followed by RO sections and then followed by RW sections. So we could end up having a RW section `.bolt.instr.counters` surrounded by a number of RO and RX sections, and a new text segment was then formed by including all RX sections which includes the RW section in the middle, and hence the RWX segment. One way to fix this is to separate the RW `.bolt.instr.counters` section into its own segment by a). assigning the starting addresses for section `.bolt.instr.counters` and its following section with regular page aligned addresses and b). creating two extra program headers accordingly. --- bolt/include/bolt/Rewrite/RewriteInstance.h | 3 + bolt/lib/Passes/Instrumentation.cpp | 2 +- bolt/lib/Rewrite/RewriteInstance.cpp | 108 +++++++++++++++++--- bolt/test/avoid-wx-segment.c | 15 +++ 4 files changed, 110 insertions(+), 18 deletions(-) create mode 100644 bolt/test/avoid-wx-segment.c diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h index 42094cb732107..fdd65bbd535f7 100644 --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -505,6 +505,9 @@ class RewriteInstance { /// Number of local symbols in newly written symbol table. uint64_t NumLocalSymbols{0}; + /// Flag indicating runtime library linking just started. + bool StartLinkingRuntimeLib{false}; + /// Information on special Procedure Linkage Table sections. There are /// multiple variants generated by different linkers. struct PLTSectionInfo { diff --git a/bolt/lib/Passes/Instrumentation.cpp b/bolt/lib/Passes/Instrumentation.cpp index 76766b05b9176..fbf889279f1c0 100644 --- a/bolt/lib/Passes/Instrumentation.cpp +++ b/bolt/lib/Passes/Instrumentation.cpp @@ -604,7 +604,7 @@ Error Instrumentation::runOnFunctions(BinaryContext &BC) { /*IsText=*/false, /*IsAllocatable=*/true); BC.registerOrUpdateSection(".bolt.instr.counters", ELF::SHT_PROGBITS, Flags, - nullptr, 0, 1); + nullptr, 0, BC.RegularPageSize); BC.registerOrUpdateNoteSection(".bolt.instr.tables", nullptr, 0, /*Alignment=*/1, diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 70a9f084f009b..a97762063eb1e 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -628,6 +628,11 @@ Error RewriteInstance::discoverStorage() { unsigned Phnum = Obj.getHeader().e_phnum; Phnum += 3; + // Reserve two more pheaders to avoid having writeable and executable + // segment in instrumented binary. + if (opts::Instrument) + Phnum += 2; + NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy); NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy); } @@ -2083,6 +2088,13 @@ void RewriteInstance::adjustCommandLineOptions() { opts::HotText = false; } + if (opts::Instrument && opts::UseGnuStack) { + BC->errs() << "BOLT-ERROR: cannot avoid having writeable and executable " + "segment in instrumented binary if program headers will be " + "updated in place\n"; + exit(1); + } + if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) { opts::HotTextMoveSections.addValue(".stub"); opts::HotTextMoveSections.addValue(".mover"); @@ -3612,11 +3624,13 @@ void RewriteInstance::emitAndLink() { static_cast(*Streamer).getAssembler()); } - if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) + if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) { + StartLinkingRuntimeLib = true; RtLibrary->link(*BC, ToolPath, *Linker, [this](auto MapSection) { // Map newly registered sections. this->mapAllocatableSections(MapSection); }); + } // Once the code is emitted, we can rename function sections to actual // output sections and de-register sections used for emission. @@ -4011,12 +4025,17 @@ void RewriteInstance::mapAllocatableSections( Section.setOutputFileOffset(Section.getInputFileOffset()); MapSection(Section, Section.getAddress()); } else { - NextAvailableAddress = - alignTo(NextAvailableAddress, Section.getAlignment()); + uint64_t Alignment = Section.getAlignment(); + if (opts::Instrument && StartLinkingRuntimeLib) { + Alignment = BC->RegularPageSize; + StartLinkingRuntimeLib = false; + } + NextAvailableAddress = alignTo(NextAvailableAddress, Alignment); + LLVM_DEBUG({ - dbgs() << "BOLT: mapping section " << Section.getName() << " (0x" - << Twine::utohexstr(Section.getAllocAddress()) << ") to 0x" - << Twine::utohexstr(NextAvailableAddress) << ":0x" + dbgs() << "BOLT-DEBUG: mapping section " << Section.getName() + << " (0x" << Twine::utohexstr(Section.getAllocAddress()) + << ") to 0x" << Twine::utohexstr(NextAvailableAddress) << ":0x" << Twine::utohexstr(NextAvailableAddress + Section.getOutputSize()) << '\n'; @@ -4079,6 +4098,9 @@ void RewriteInstance::patchELFPHDRTable() { } } + if (opts::Instrument) + Phnum += 2; + // NOTE Currently .eh_frame_hdr appends to the last segment, recalculate // last segments size based on the NextAvailableAddress variable. if (!NewWritableSegmentSize) { @@ -4093,7 +4115,8 @@ void RewriteInstance::patchELFPHDRTable() { const uint64_t SavedPos = OS.tell(); OS.seek(PHDRTableOffset); - auto createNewTextPhdr = [&]() { + auto createNewPhdrs = [&]() { + SmallVector NewPhdrs; ELF64LEPhdrTy NewPhdr; NewPhdr.p_type = ELF::PT_LOAD; if (PHDRTableAddress) { @@ -4108,20 +4131,67 @@ void RewriteInstance::patchELFPHDRTable() { NewPhdr.p_filesz = NewTextSegmentSize; NewPhdr.p_memsz = NewTextSegmentSize; NewPhdr.p_flags = ELF::PF_X | ELF::PF_R; - if (opts::Instrument) { - // FIXME: Currently instrumentation is experimental and the runtime data - // is emitted with code, thus everything needs to be writable. - NewPhdr.p_flags |= ELF::PF_W; - } NewPhdr.p_align = BC->PageAlign; - return NewPhdr; + if (!opts::Instrument) { + NewPhdrs.push_back(NewPhdr); + } else { + ErrorOr Sec = + BC->getUniqueSectionByName(".bolt.instr.counters"); + assert(Sec && "expected one and only one `.bolt.instr.counters` section"); + const uint64_t Addr = Sec->getOutputAddress(); + const uint64_t Offset = Sec->getOutputFileOffset(); + const uint64_t Size = Sec->getOutputSize(); + assert(Addr > NewPhdr.p_vaddr && + Addr + Size < NewPhdr.p_vaddr + NewPhdr.p_memsz && + "`.bolt.instr.counters` section is expected to be included in the " + "new text sgement"); + + // Set correct size for the previous header since we are breaking the + // new text segment into three segments. + uint64_t Delta = Addr - NewPhdr.p_vaddr; + NewPhdr.p_filesz = Delta; + NewPhdr.p_memsz = Delta; + NewPhdrs.push_back(NewPhdr); + + // Create a program header for a RW segment that includes the + // `.bolt.instr.counters` section only. + ELF64LEPhdrTy NewPhdrRWSegment; + NewPhdrRWSegment.p_type = ELF::PT_LOAD; + NewPhdrRWSegment.p_offset = Offset; + NewPhdrRWSegment.p_vaddr = Addr; + NewPhdrRWSegment.p_paddr = Addr; + NewPhdrRWSegment.p_filesz = Size; + NewPhdrRWSegment.p_memsz = Size; + NewPhdrRWSegment.p_flags = ELF::PF_R | ELF::PF_W; + NewPhdrRWSegment.p_align = BC->RegularPageSize; + NewPhdrs.push_back(NewPhdrRWSegment); + + // Create a program header for a RX segment that includes all the RX + // sections from runtime library. + ELF64LEPhdrTy NewPhdrRXSegment; + NewPhdrRXSegment.p_type = ELF::PT_LOAD; + const uint64_t AddrRX = alignTo(Addr + Size, BC->RegularPageSize); + const uint64_t OffsetRX = alignTo(Offset + Size, BC->RegularPageSize); + const uint64_t SizeRX = NewTextSegmentSize - (AddrRX - NewPhdr.p_paddr); + NewPhdrRXSegment.p_offset = OffsetRX; + NewPhdrRXSegment.p_vaddr = AddrRX; + NewPhdrRXSegment.p_paddr = AddrRX; + NewPhdrRXSegment.p_filesz = SizeRX; + NewPhdrRXSegment.p_memsz = SizeRX; + NewPhdrRXSegment.p_flags = ELF::PF_X | ELF::PF_R; + NewPhdrRXSegment.p_align = BC->RegularPageSize; + NewPhdrs.push_back(NewPhdrRXSegment); + } + + return NewPhdrs; }; auto writeNewSegmentPhdrs = [&]() { if (PHDRTableAddress || NewTextSegmentSize) { - ELF64LE::Phdr NewPhdr = createNewTextPhdr(); - OS.write(reinterpret_cast(&NewPhdr), sizeof(NewPhdr)); + SmallVector NewPhdrs = createNewPhdrs(); + OS.write(reinterpret_cast(NewPhdrs.data()), + sizeof(ELF64LE::Phdr) * NewPhdrs.size()); } if (NewWritableSegmentSize) { @@ -4169,8 +4239,12 @@ void RewriteInstance::patchELFPHDRTable() { } case ELF::PT_GNU_STACK: if (opts::UseGnuStack) { - // Overwrite the header with the new text segment header. - NewPhdr = createNewTextPhdr(); + // Overwrite the header with the new segment header. + assert(!opts::Instrument); + SmallVector NewPhdrs = createNewPhdrs(); + assert(NewPhdrs.size() == 1 && + "expect exactly one program header was created"); + NewPhdr = NewPhdrs[0]; ModdedGnuStack = true; } break; diff --git a/bolt/test/avoid-wx-segment.c b/bolt/test/avoid-wx-segment.c new file mode 100644 index 0000000000000..fcc3eb6e4c640 --- /dev/null +++ b/bolt/test/avoid-wx-segment.c @@ -0,0 +1,15 @@ +// Test bolt instrumentation won't generate a binary with any segment that +// is writable and executable. Basically we want to put `.bolt.instr.counters` +// section into its own segment, separated from its surrounding RX sections. + +// REQUIRES: system-linux + +void foo() {} +void bar() { foo(); } + +// RUN: %clang %cflags -c %s -o %t.o +// RUN: ld.lld -q -o %t.so %t.o -shared --init=foo --fini=foo +// RUN: llvm-bolt --instrument %t.so -o %tt.so +// RUN: llvm-readelf -l %tt.so | FileCheck %s +// CHECK-NOT: RWE +// CHECK: {{[0-9]*}} .bolt.instr.counters {{$}} From f27dec35b9e65d7029fb9e422947a9fd59bdd026 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 16:16:15 -0800 Subject: [PATCH 027/123] [flang][runtime] Detect byte order reversal problems (#129093) When reading an unformatted sequential file with variable-length records, detect byte order reversal problems with the first record's header and footer words, and emit a more detailed error message. --- flang-rt/lib/runtime/unit.cpp | 48 ++++++++++++++++++++++++++--------- flang-rt/lib/runtime/unit.h | 2 +- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/flang-rt/lib/runtime/unit.cpp b/flang-rt/lib/runtime/unit.cpp index 1d4d54ae01956..43501aeb48458 100644 --- a/flang-rt/lib/runtime/unit.cpp +++ b/flang-rt/lib/runtime/unit.cpp @@ -511,7 +511,7 @@ void ExternalFileUnit::EndIoStatement() { void ExternalFileUnit::BeginSequentialVariableUnformattedInputRecord( IoErrorHandler &handler) { RUNTIME_CHECK(handler, access == Access::Sequential); - std::int32_t header{0}, footer{0}; + std::uint32_t header{0}, footer{0}; std::size_t need{recordOffsetInFrame_ + sizeof header}; std::size_t got{ReadFrame(frameOffsetInFile_, need, handler)}; // Try to emit informative errors to help debug corrupted files. @@ -528,17 +528,41 @@ void ExternalFileUnit::BeginSequentialVariableUnformattedInputRecord( recordLength = sizeof header + header; // does not include footer need = recordOffsetInFrame_ + *recordLength + sizeof footer; got = ReadFrame(frameOffsetInFile_, need, handler); - if (got < need) { + if (got >= need) { + footer = ReadHeaderOrFooter(recordOffsetInFrame_ + *recordLength); + } + if (frameOffsetInFile_ == 0 && recordOffsetInFrame_ == 0 && + (got < need || footer != header)) { + // Maybe an omitted or incorrect byte swap flag setting? + // Try it the other way, since this is the first record. + // (N.B. Won't work on files starting with empty records, but there's + // no good way to know later if all preceding records were empty.) + swapEndianness_ = !swapEndianness_; + std::uint32_t header2{ReadHeaderOrFooter(0)}; + std::size_t recordLength2{sizeof header2 + header2}; + std::size_t need2{recordLength2 + sizeof footer}; + std::size_t got2{ReadFrame(0, need2, handler)}; + if (got2 >= need2) { + std::uint32_t footer2{ReadHeaderOrFooter(recordLength2)}; + if (footer2 == header2) { + error = "Unformatted variable-length sequential file input " + "failed on the first record, probably due to a need " + "for byte order data conversion; consider adding " + "CONVERT='SWAP' to the OPEN statement or adding " + "FORT_CONVERT=SWAP to the execution environment"; + } + } + swapEndianness_ = !swapEndianness_; + } + if (error) { + } else if (got < need) { error = "Unformatted variable-length sequential file input failed at " "record #%jd (file offset %jd): hit EOF reading record with " "length %jd bytes"; - } else { - footer = ReadHeaderOrFooter(recordOffsetInFrame_ + *recordLength); - if (footer != header) { - error = "Unformatted variable-length sequential file input failed at " - "record #%jd (file offset %jd): record header has length %jd " - "that does not match record footer (%jd)"; - } + } else if (footer != header) { + error = "Unformatted variable-length sequential file input failed at " + "record #%jd (file offset %jd): record header has length %jd " + "that does not match record footer (%jd)"; } } if (error) { @@ -590,7 +614,7 @@ void ExternalFileUnit::BackspaceFixedRecord(IoErrorHandler &handler) { void ExternalFileUnit::BackspaceVariableUnformattedRecord( IoErrorHandler &handler) { - std::int32_t header{0}; + std::uint32_t header{0}; auto headerBytes{static_cast(sizeof header)}; frameOffsetInFile_ += recordOffsetInFrame_; recordOffsetInFrame_ = 0; @@ -775,8 +799,8 @@ void ExternalFileUnit::PopChildIo(ChildIo &child) { child_.reset(child.AcquirePrevious().release()); // deletes top child } -std::int32_t ExternalFileUnit::ReadHeaderOrFooter(std::int64_t frameOffset) { - std::int32_t word; +std::uint32_t ExternalFileUnit::ReadHeaderOrFooter(std::int64_t frameOffset) { + std::uint32_t word; char *wordPtr{reinterpret_cast(&word)}; std::memcpy(wordPtr, Frame() + frameOffset, sizeof word); if (swapEndianness_) { diff --git a/flang-rt/lib/runtime/unit.h b/flang-rt/lib/runtime/unit.h index eb762a2d3b235..bb3d3650da34b 100644 --- a/flang-rt/lib/runtime/unit.h +++ b/flang-rt/lib/runtime/unit.h @@ -210,7 +210,7 @@ class ExternalFileUnit : public ConnectionState, RT_API_ATTRS void CommitWrites(); RT_API_ATTRS bool CheckDirectAccess(IoErrorHandler &); RT_API_ATTRS void HitEndOnRead(IoErrorHandler &); - RT_API_ATTRS std::int32_t ReadHeaderOrFooter(std::int64_t frameOffset); + RT_API_ATTRS std::uint32_t ReadHeaderOrFooter(std::int64_t frameOffset); Lock lock_; From f0da1cb3d31f05f516fae9b36e759299409579d5 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Thu, 27 Feb 2025 16:16:34 -0800 Subject: [PATCH 028/123] [flang] Catch more defined I/O conflicts (#129115) The code that checks for conflicts between type-bound defined I/O generic procedures and non-type-bound defined I/O interfaces only works when then procedures are defined in the same module as subroutines. It doesn't catch conflicts when either are external procedures, procedure pointers, dummy procedures, &c. Extend the checking to cover those cases as well. Fixes https://github.com/llvm/llvm-project/issues/128752. --- flang/lib/Semantics/check-declarations.cpp | 55 +++++++++++++--------- flang/test/Lower/io-derived-type.f90 | 6 +-- flang/test/Semantics/io11.f90 | 24 ++++++++++ 3 files changed, 61 insertions(+), 24 deletions(-) diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 914d891cd9aa9..c30c15a290b84 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -165,8 +165,8 @@ class CheckHelper { void CheckDioDummyIsDefaultInteger(const Symbol &, const Symbol &); void CheckDioDummyIsScalar(const Symbol &, const Symbol &); void CheckDioDummyAttrs(const Symbol &, const Symbol &, Attr); - void CheckDioDtvArg( - const Symbol &, const Symbol *, common::DefinedIo, const Symbol &); + void CheckDioDtvArg(const Symbol &proc, const Symbol &subp, const Symbol *arg, + common::DefinedIo, const Symbol &generic); void CheckGenericVsIntrinsic(const Symbol &, const GenericDetails &); void CheckDefaultIntegerArg(const Symbol &, const Symbol *, Attr); void CheckDioAssumedLenCharacterArg( @@ -3428,11 +3428,17 @@ void CheckHelper::CheckAlreadySeenDefinedIo(const DerivedTypeSpec &derivedType, if (auto iter{dtScope->find(generic.name())}; iter != dtScope->end() && IsAccessible(*iter->second, generic.owner())) { for (auto specRef : iter->second->get().specificProcs()) { - const Symbol &specific{specRef->get().symbol()}; - if (specific == proc) { + const Symbol *specific{&specRef->get().symbol()}; + if (specific == &proc) { continue; // unambiguous, accept } - if (const auto *specDT{GetDtvArgDerivedType(specific)}; + if (const auto *peDetails{specific->detailsIf()}) { + specific = peDetails->procInterface(); + if (!specific) { + continue; + } + } + if (const auto *specDT{GetDtvArgDerivedType(*specific)}; specDT && evaluate::AreSameDerivedType(derivedType, *specDT)) { SayWithDeclaration(*specRef, proc.name(), "Derived type '%s' has conflicting type-bound input/output procedure '%s'"_err_en_US, @@ -3444,11 +3450,11 @@ void CheckHelper::CheckAlreadySeenDefinedIo(const DerivedTypeSpec &derivedType, } } -void CheckHelper::CheckDioDummyIsDerived(const Symbol &subp, const Symbol &arg, +void CheckHelper::CheckDioDummyIsDerived(const Symbol &proc, const Symbol &arg, common::DefinedIo ioKind, const Symbol &generic) { if (const DeclTypeSpec *type{arg.GetType()}) { if (const DerivedTypeSpec *derivedType{type->AsDerived()}) { - CheckAlreadySeenDefinedIo(*derivedType, ioKind, subp, generic); + CheckAlreadySeenDefinedIo(*derivedType, ioKind, proc, generic); bool isPolymorphic{type->IsPolymorphic()}; if (isPolymorphic != IsExtensibleType(derivedType)) { messages_.Say(arg.name(), @@ -3486,11 +3492,11 @@ void CheckHelper::CheckDioDummyIsScalar(const Symbol &subp, const Symbol &arg) { } } -void CheckHelper::CheckDioDtvArg(const Symbol &subp, const Symbol *arg, - common::DefinedIo ioKind, const Symbol &generic) { +void CheckHelper::CheckDioDtvArg(const Symbol &proc, const Symbol &subp, + const Symbol *arg, common::DefinedIo ioKind, const Symbol &generic) { // Dtv argument looks like: dtv-type-spec, INTENT(INOUT) :: dtv if (CheckDioDummyIsData(subp, arg, 0)) { - CheckDioDummyIsDerived(subp, *arg, ioKind, generic); + CheckDioDummyIsDerived(proc, *arg, ioKind, generic); CheckDioDummyAttrs(subp, *arg, ioKind == common::DefinedIo::ReadFormatted || ioKind == common::DefinedIo::ReadUnformatted @@ -3617,57 +3623,64 @@ void CheckHelper::CheckDefinedIoProc(const Symbol &symbol, for (auto ref : details.specificProcs()) { const Symbol &ultimate{ref->GetUltimate()}; const auto *binding{ultimate.detailsIf()}; - const Symbol &specific{*(binding ? &binding->symbol() : &ultimate)}; if (ultimate.attrs().test(Attr::NOPASS)) { // C774 messages_.Say( "Defined input/output procedure '%s' may not have NOPASS attribute"_err_en_US, ultimate.name()); context_.SetError(ultimate); } - if (const auto *subpDetails{specific.detailsIf()}) { + const Symbol *specificProc{binding ? &binding->symbol() : &ultimate}; + const Symbol *specificSubp{specificProc}; + if (const auto *peDetails{specificSubp->detailsIf()}) { + specificSubp = peDetails->procInterface(); + if (!specificSubp) { + continue; + } + } + if (const auto *subpDetails{specificSubp->detailsIf()}) { const std::vector &dummyArgs{subpDetails->dummyArgs()}; - CheckDioArgCount(specific, ioKind, dummyArgs.size()); + CheckDioArgCount(*specificSubp, ioKind, dummyArgs.size()); int argCount{0}; for (auto *arg : dummyArgs) { switch (argCount++) { case 0: // dtv-type-spec, INTENT(INOUT) :: dtv - CheckDioDtvArg(specific, arg, ioKind, symbol); + CheckDioDtvArg(*specificProc, *specificSubp, arg, ioKind, symbol); break; case 1: // INTEGER, INTENT(IN) :: unit - CheckDefaultIntegerArg(specific, arg, Attr::INTENT_IN); + CheckDefaultIntegerArg(*specificSubp, arg, Attr::INTENT_IN); break; case 2: if (ioKind == common::DefinedIo::ReadFormatted || ioKind == common::DefinedIo::WriteFormatted) { // CHARACTER (LEN=*), INTENT(IN) :: iotype CheckDioAssumedLenCharacterArg( - specific, arg, argCount, Attr::INTENT_IN); + *specificSubp, arg, argCount, Attr::INTENT_IN); } else { // INTEGER, INTENT(OUT) :: iostat - CheckDefaultIntegerArg(specific, arg, Attr::INTENT_OUT); + CheckDefaultIntegerArg(*specificSubp, arg, Attr::INTENT_OUT); } break; case 3: if (ioKind == common::DefinedIo::ReadFormatted || ioKind == common::DefinedIo::WriteFormatted) { // INTEGER, INTENT(IN) :: v_list(:) - CheckDioVlistArg(specific, arg, argCount); + CheckDioVlistArg(*specificSubp, arg, argCount); } else { // CHARACTER (LEN=*), INTENT(INOUT) :: iomsg CheckDioAssumedLenCharacterArg( - specific, arg, argCount, Attr::INTENT_INOUT); + *specificSubp, arg, argCount, Attr::INTENT_INOUT); } break; case 4: // INTEGER, INTENT(OUT) :: iostat - CheckDefaultIntegerArg(specific, arg, Attr::INTENT_OUT); + CheckDefaultIntegerArg(*specificSubp, arg, Attr::INTENT_OUT); break; case 5: // CHARACTER (LEN=*), INTENT(INOUT) :: iomsg CheckDioAssumedLenCharacterArg( - specific, arg, argCount, Attr::INTENT_INOUT); + *specificSubp, arg, argCount, Attr::INTENT_INOUT); break; default:; } diff --git a/flang/test/Lower/io-derived-type.f90 b/flang/test/Lower/io-derived-type.f90 index 8ac995739afd7..f96feca77c485 100644 --- a/flang/test/Lower/io-derived-type.f90 +++ b/flang/test/Lower/io-derived-type.f90 @@ -22,7 +22,7 @@ subroutine wft(dtv, unit, iotype, v_list, iostat, iomsg) ! CHECK-LABEL: @_QMmPwftd subroutine wftd(dtv, unit, iotype, v_list, iostat, iomsg) - type(t), intent(in) :: dtv + class(t), intent(in) :: dtv integer, intent(in) :: unit character(*), intent(in) :: iotype integer, intent(in) :: v_list(:) @@ -91,13 +91,13 @@ subroutine test3(p, x) ! CHECK: %[[V_10:[0-9]+]] = fir.box_addr %arg0 : (!fir.boxproc<() -> ()>) -> !fir.ref ! CHECK: %[[V_11:[0-9]+]] = fir.insert_value %[[V_9]], %[[V_10]], [0 : index, 1 : index] : (!fir.array<1xtuple, !fir.ref, i32, i1>>, !fir.ref) -> !fir.array<1xtuple, !fir.ref, i32, i1>> ! CHECK: %[[V_12:[0-9]+]] = fir.insert_value %[[V_11]], %c2{{.*}}, [0 : index, 2 : index] : (!fir.array<1xtuple, !fir.ref, i32, i1>>, i32) -> !fir.array<1xtuple, !fir.ref, i32, i1>> - ! CHECK: %[[V_13:[0-9]+]] = fir.insert_value %[[V_12]], %false, [0 : index, 3 : index] : (!fir.array<1xtuple, !fir.ref, i32, i1>>, i1) -> !fir.array<1xtuple, !fir.ref, i32, i1>> + ! CHECK: %[[V_13:[0-9]+]] = fir.insert_value %[[V_12]], %true, [0 : index, 3 : index] : (!fir.array<1xtuple, !fir.ref, i32, i1>>, i1) -> !fir.array<1xtuple, !fir.ref, i32, i1>> ! CHECK: fir.store %[[V_13]] to %[[V_5]] : !fir.ref, !fir.ref, i32, i1>>> ! CHECK: %[[V_14:[0-9]+]] = fir.alloca tuple, !fir.ref, i32, i1>>>, i1> ! CHECK: %[[V_15:[0-9]+]] = fir.undefined tuple, !fir.ref, i32, i1>>>, i1> ! CHECK: %[[V_16:[0-9]+]] = fir.insert_value %[[V_15]], %c1{{.*}}, [0 : index] : (tuple, !fir.ref, i32, i1>>>, i1>, i64) -> tuple, !fir.ref, i32, i1>>>, i1> ! CHECK: %[[V_17:[0-9]+]] = fir.insert_value %[[V_16]], %[[V_5]], [1 : index] : (tuple, !fir.ref, i32, i1>>>, i1>, !fir.ref, !fir.ref, i32, i1>>>) -> tuple, !fir.ref, i32, i1>>>, i1> - ! CHECK: %[[V_18:[0-9]+]] = fir.insert_value %[[V_17]], %true, [2 : index] : (tuple, !fir.ref, i32, i1>>>, i1>, i1) -> tuple, !fir.ref, i32, i1>>>, i1> + ! CHECK: %[[V_18:[0-9]+]] = fir.insert_value %[[V_17]], %true_0, [2 : index] : (tuple, !fir.ref, i32, i1>>>, i1>, i1) -> tuple, !fir.ref, i32, i1>>>, i1> ! CHECK: fir.store %[[V_18]] to %[[V_14]] : !fir.ref, !fir.ref, i32, i1>>>, i1>> ! CHECK: %[[V_19:[0-9]+]] = fir.convert %[[V_14]] : (!fir.ref, !fir.ref, i32, i1>>>, i1>>) -> !fir.ref ! CHECK: %[[V_20:[0-9]+]] = fir.call @_FortranAioOutputDerivedType(%{{.*}}, %[[V_4]], %[[V_19]]) fastmath : (!fir.ref, !fir.box, !fir.ref) -> i1 diff --git a/flang/test/Semantics/io11.f90 b/flang/test/Semantics/io11.f90 index 5d3d90271c0a8..7565d35aeb407 100644 --- a/flang/test/Semantics/io11.f90 +++ b/flang/test/Semantics/io11.f90 @@ -720,3 +720,27 @@ subroutine ur2(dtv,unit,iostat,iomsg) read(unit,iotype,iostat=iostat,iomsg=iomsg) dtv%c end end + +module m28 + type t + contains + procedure, private :: write1 + generic :: write(formatted) => write1 + end type + abstract interface + subroutine absWrite(dtv, unit, iotype, v_list, iostat, iomsg) + import t + class(t), intent(in) :: dtv + integer, intent(in) :: unit + character(*), intent(in) :: iotype + integer, intent(in) :: v_list(:) + integer, intent(out) :: iostat + character(*), intent(inout) :: iomsg + end + end interface + !ERROR: Derived type 't' has conflicting type-bound input/output procedure 'write(formatted)' + procedure(absWrite) write1, write2 + interface write(formatted) + procedure write2 + end interface +end From d907c9f2a38140ab84fdea4db6657d1d04526cc8 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 27 Feb 2025 16:23:18 -0800 Subject: [PATCH 029/123] [WebAssembly] Generate __clang_call_terminate for Emscripten EH (#129020) When an exception thrown ends up calling `std::terminate`, for example, because an exception is thrown within a `noexcept` function or an exception is thrown from `__cxa_end_catch` during handling the previous exception, the libc++abi spec says we are supposed to call `__cxa_begin_catch` before `std::terminate`: https://libcxxabi.llvm.org/spec.html > When the personality routine encounters a termination condition, it will call `__cxa_begin_catch()` to mark the exception as handled and then call `terminate()`, which shall not return to its caller. The default Itanium ABI generates a call to `__clang_call_terminate()`, which is a function that calls `__cxa_begin_catch` and then `std::terminate`: ```ll define void @__clang_call_terminate(ptr noundef %0) { %2 = call ptr @__cxa_begin_catch(ptr %0) call void @_ZSt9terminatev() unreachable } ``` But we replaced this with just a call to `std::terminate` in https://github.com/llvm/llvm-project/commit/561abd83ffecc8d4ba8fcbbbcadb31efc55985c2 because this caused some tricky transformation problems for Wasm EH. The detailed explanation why is in the commit description, but the summary is for Wasm EH it needed a `try` with both `catch` and `catch_all` and it was tricky to deal with. But that commit replaced `__clang_call_terminate` with `std::terminate` for all Wasm programs and not only the ones that use Wasm EH. So Emscripten EH was also affected by that commit. Emscripten EH is not able to catch foreign exceptions anyway, so this is unnecessary compromise. This makes we use `__clang_call_terminate` as in the default Itanium EH for Emscripten EH. We may later fix Wasm EH too but that requires more efforts in the backend. Related issue: https://github.com/emscripten-core/emscripten/issues/23720 --- clang/lib/CodeGen/ItaniumCXXABI.cpp | 9 +++++++-- clang/test/CodeGenCXX/wasm-eh.cpp | 12 ++++++++++++ clang/test/CodeGenCXX/wasm-em-eh.cpp | 13 +++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGenCXX/wasm-em-eh.cpp diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index bcd171724c41d..a84412bd5c045 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -5150,9 +5150,14 @@ WebAssemblyCXXABI::emitTerminateForUnexpectedException(CodeGenFunction &CGF, // Itanium ABI calls __clang_call_terminate(), which __cxa_begin_catch() on // the violating exception to mark it handled, but it is currently hard to do // with wasm EH instruction structure with catch/catch_all, we just call - // std::terminate and ignore the violating exception as in CGCXXABI. + // std::terminate and ignore the violating exception as in CGCXXABI in Wasm EH + // and call __clang_call_terminate only in Emscripten EH. // TODO Consider code transformation that makes calling __clang_call_terminate - // possible. + // in Wasm EH possible. + if (Exn && !EHPersonality::get(CGF).isWasmPersonality()) { + assert(CGF.CGM.getLangOpts().CPlusPlus); + return CGF.EmitNounwindRuntimeCall(getClangCallTerminateFn(CGF.CGM), Exn); + } return CGCXXABI::emitTerminateForUnexpectedException(CGF, Exn); } diff --git a/clang/test/CodeGenCXX/wasm-eh.cpp b/clang/test/CodeGenCXX/wasm-eh.cpp index 9dc15633bfed9..e8797794e7c1e 100644 --- a/clang/test/CodeGenCXX/wasm-eh.cpp +++ b/clang/test/CodeGenCXX/wasm-eh.cpp @@ -6,6 +6,9 @@ // RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -fms-extensions -fexceptions -fcxx-exceptions -mllvm -wasm-enable-eh -exception-model=wasm -target-feature +exception-handling -emit-llvm -o - -std=c++11 | FileCheck %s // RUN: %clang_cc1 %s -triple wasm64-unknown-unknown -fms-extensions -fexceptions -fcxx-exceptions -mllvm -wasm-enable-eh -exception-model=wasm -target-feature +exception-handling -emit-llvm -o - -std=c++11 | FileCheck %s +// Test code generation for Wasm EH using WebAssembly EH proposal. +// (https://github.com/WebAssembly/exception-handling/blob/main/proposals/exception-handling/Exceptions.md) + void may_throw(); void dont_throw() noexcept; @@ -381,6 +384,15 @@ void test8() { // CHECK: unreachable +void noexcept_throw() noexcept { + throw 3; +} + +// CATCH-LABEL: define void @_Z14noexcept_throwv() +// CHECK: %{{.*}} = cleanuppad within none [] +// CHECK-NEXT: call void @_ZSt9terminatev() + + // RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -fms-extensions -fexceptions -fcxx-exceptions -exception-model=wasm -target-feature +exception-handling -emit-llvm -o - -std=c++11 2>&1 | FileCheck %s --check-prefix=WARNING-DEFAULT // RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -fms-extensions -fexceptions -fcxx-exceptions -exception-model=wasm -target-feature +exception-handling -Wwasm-exception-spec -emit-llvm -o - -std=c++11 2>&1 | FileCheck %s --check-prefix=WARNING-ON // RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -fms-extensions -fexceptions -fcxx-exceptions -exception-model=wasm -target-feature +exception-handling -Wno-wasm-exception-spec -emit-llvm -o - -std=c++11 2>&1 | FileCheck %s --check-prefix=WARNING-OFF diff --git a/clang/test/CodeGenCXX/wasm-em-eh.cpp b/clang/test/CodeGenCXX/wasm-em-eh.cpp new file mode 100644 index 0000000000000..fc96fa96b5140 --- /dev/null +++ b/clang/test/CodeGenCXX/wasm-em-eh.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 %s -triple wasm32-unknown-emscripten -fexceptions -fcxx-exceptions -emit-llvm -o - -std=c++11 2>&1 | FileCheck %s + +// Test code generation for Wasm's Emscripten (JavaScript-style) EH. + +void noexcept_throw() noexcept { + throw 3; +} + +// CATCH-LABEL: define void @_Z14noexcept_throwv() +// CHECK: %[[LPAD:.*]] = landingpad { ptr, i32 } +// CHECK-NEXT: catch ptr null +// CHECK-NEXT: %[[EXN:.*]] = extractvalue { ptr, i32 } %[[LPAD]], 0 +// CHECK-NEXT: call void @__clang_call_terminate(ptr %[[EXN]]) From 4fbe56e5e656089c4c53e89bcb739682e542d7f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Pir=C3=B3g?= Date: Fri, 28 Feb 2025 01:29:20 +0100 Subject: [PATCH 030/123] [X86][AVX10.2] Add comments for the avx10_2convertintrin.h file (#120766) As in title. I will create a sibling pr with comments to the 512 variant. --- clang/lib/Headers/avx10_2convertintrin.h | 2953 +++++++++++++++++++++- 1 file changed, 2878 insertions(+), 75 deletions(-) diff --git a/clang/lib/Headers/avx10_2convertintrin.h b/clang/lib/Headers/avx10_2convertintrin.h index 07722090c30ee..b425aa59251fa 100644 --- a/clang/lib/Headers/avx10_2convertintrin.h +++ b/clang/lib/Headers/avx10_2convertintrin.h @@ -24,24 +24,157 @@ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ __min_vector_width__(256))) +// clang-format off + +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 128-bit vector +/// containing FP16 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF i < 4 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4]) +/// FI +/// +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Lower 4 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1)); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 128-bit vector +/// containing FP16 elements. Merging mask \a __U is used to determine if given +/// element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// IF i < 4 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4]) +/// FI +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __W +/// A 128-bit vector of [8 x fp16]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtx2ps_ph(__m128h __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 128-bit vector +/// containing FP16 elements. Zeroing mask \a __U is used to determine if given +/// element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// IF i < 4 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4]) +/// FI +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then zero is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtx2ps_ph(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A, __m256 __B) { return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( @@ -49,6 +182,44 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A, _MM_FROUND_CUR_DIRECTION); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. Merging mask \a __U is used to determine if given +/// element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __W +/// A 256-bit vector of [16 x fp16]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) { return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( @@ -56,6 +227,42 @@ _mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) { _MM_FROUND_CUR_DIRECTION); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. Zeroing mask \a __U is used to determine if given +/// element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then zero is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) { return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( @@ -63,32 +270,240 @@ _mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm256_cvtx_round2ps_ph(A, B, R) \ +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. Rounding mode \a __R needs to be provided. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \param __R +/// Rounding mode. Valid inputs are: _MM_FROUND_CUR_DIRECTION or +/// result of bitwise or of _MM_FROUND_NO_EXC with at most one of the following: +/// _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_TO_NEG_INF, _MM_FROUND_TO_POS_INF, +/// _MM_FROUND_TO_ZERO. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. +#define _mm256_cvtx_round2ps_ph(__A, __B, __R) \ ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ - (__v8sf)(A), (__v8sf)(B), (__v16hf)_mm256_undefined_ph(), \ - (__mmask16)(-1), (const int)(R))) - -#define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \ + (__v8sf)(__A), (__v8sf)(__B), (__v16hf)_mm256_undefined_ph(), \ + (__mmask16)(-1), (const int)(__R))) + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. Merging mask \a __U is used to determine if given +/// element should be taken from \a __W instead. Rounding mode \a __R needs to +/// be provided. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __W +/// A 256-bit vector of [16 x fp16]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \param __R +/// Rounding mode. Valid inputs are: _MM_FROUND_CUR_DIRECTION or +/// result of bitwise or of _MM_FROUND_NO_EXC with at most one of the following: +/// _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_TO_NEG_INF, _MM_FROUND_TO_POS_INF, +/// _MM_FROUND_TO_ZERO. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +#define _mm256_mask_cvtx_round2ps_ph(__W, __U, __A, __B, __R) \ ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ - (__v8sf)(A), (__v8sf)(B), (__v16hf)(W), (__mmask16)(U), (const int)(R))) - -#define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \ + (__v8sf)(__A), (__v8sf)(__B), (__v16hf)(__W), (__mmask16)(__U), (const int)(__R))) + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. Zeroing mask \a __U is used to determine if given +/// element should be zeroed instead. Rounding mode \a __R needs to be provided. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \param __R +/// Rounding mode. Valid inputs are: _MM_FROUND_CUR_DIRECTION or +/// result of bitwise or of _MM_FROUND_NO_EXC with at most one of the following: +/// _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_TO_NEG_INF, _MM_FROUND_TO_POS_INF, +/// _MM_FROUND_TO_ZERO. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then zero is taken instead. +#define _mm256_maskz_cvtx_round2ps_ph(__U, __A, __B, __R) \ ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ - (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()), \ - (__mmask16)(U), (const int)(R))) - -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_bf8(__m128i __A, - __m128h __B) { + (__v8sf)(__A), (__v8sf)(__B), (__v16hf)(_mm256_setzero_ph()), \ + (__mmask16)(__U), (const int)(__R))) + +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B using biases from \a __A; higher order +/// elements are zeroed. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtbiasph_bf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtbiasph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbiasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( @@ -96,6 +511,29 @@ _mm_maskz_cvtbiasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { (__mmask8)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the +/// converted elements from \a __B using biases from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( @@ -103,12 +541,76 @@ _mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) { (__mmask16)-1); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_bf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( @@ -116,18 +618,107 @@ _mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { (__mmask16)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B using biases from \a __A; higher order +/// elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiassph_bf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbiassph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { - return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( - (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); -} - +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Merging mask \a __U +/// is used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// from \a __W is taken instead. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtbiassph_bf8(__m128i + __W, __mmask8 __U, __m128i __A, __m128h __B) { return + (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, + (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } + +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Zeroing mask \a __U +/// is used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbiassph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( @@ -135,6 +726,30 @@ _mm_maskz_cvtbiassph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { (__mmask8)__U); } + +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the +/// converted elements from \a __B using biases from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( @@ -142,12 +757,76 @@ _mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) { (__mmask16)-1); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Merging mask \a __U +/// is used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_bf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Zeroing mask \a __U +/// is used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbiassph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( @@ -155,18 +834,108 @@ _mm256_maskz_cvtbiassph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_hf8(__m128i __A, - __m128h __B) { +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B using biases from \a __A; higher order +/// elements are zeroed. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtbiasph_hf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtbiasph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbiasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( @@ -174,6 +943,29 @@ _mm_maskz_cvtbiasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { (__mmask8)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x half]. +/// \param __B +/// A 256-bit vector of [16 x i16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the +/// converted elements from \a __B using biases from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( @@ -181,12 +973,76 @@ _mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) { (__mmask16)-1); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_hf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Zeroing mask \a __U is used to determine if +/// given element should be taken zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x half]. +/// \param __B +/// A 256-bit vector of [16 x i16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( @@ -194,18 +1050,108 @@ _mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { (__mmask16)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S`instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B using biases from \a __A; higher order +/// elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiassph_hf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Merging mask \a __U +/// is used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtbiassph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Zeroing mask \a __U +/// is used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbiassph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( @@ -213,6 +1159,29 @@ _mm_maskz_cvtbiassph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { (__mmask8)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the +/// converted elements from \a __B using biases from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( @@ -220,12 +1189,76 @@ _mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) { (__mmask16)-1); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Merging mask \a __U +/// is used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_hf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Zeroing mask \a __U +/// is used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbiassph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( @@ -233,17 +1266,119 @@ _mm256_maskz_cvtbiassph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { (__mmask16)__U); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_bf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A), (__v8hf)(__B)); -} - + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A), + (__v8hf)(__B)); +} + +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvt2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), (__v16qi)__W); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvt2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( @@ -251,18 +1386,119 @@ _mm_maskz_cvt2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { (__v16qi)(__m128i)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_bf8(__m256h __A, - __m256h __B) { - return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A), - (__v16hf)(__B)); -} - +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 16]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvt2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { +_mm256_cvt2ph_bf8(__m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A), + (__v16hf)(__B)); +} + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __W +/// A 256-bit vector of [32 x bf8]. +/// \param __U +/// A 32-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvt2ph_bf8( + __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( (__mmask32)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __U +/// A 32-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// zero is taken instead. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( @@ -270,17 +1506,120 @@ _mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_bf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), (__v8hf)(__B)); -} - +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvts2ph_bf8(__m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), + (__v8hf)(__B)); +} + +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvts2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), (__v16qi)__W); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvts2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( @@ -288,18 +1627,120 @@ _mm_maskz_cvts2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { (__v16qi)(__m128i)_mm_setzero_si128()); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 16]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvts2ph_bf8(__m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_vcvt2ph2bf8s_256((__v16hf)(__A), - (__v16hf)(__B)); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvts2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { + (__v16hf)(__B)); +} + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __W +/// A 256-bit vector of [32 x bf8]. +/// \param __U +/// A 32-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvts2ph_bf8( + __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( (__mmask32)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)__W); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __U +/// A 32-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// zero is taken instead. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( @@ -307,17 +1748,119 @@ _mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { (__v32qi)(__m256i)_mm256_setzero_si256()); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_hf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A), (__v8hf)(__B)); -} - + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A), + (__v8hf)(__B)); +} + +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvt2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), (__v16qi)__W); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvt2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( @@ -325,18 +1868,119 @@ _mm_maskz_cvt2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { (__v16qi)(__m128i)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_hf8(__m256h __A, - __m256h __B) { - return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A), - (__v16hf)(__B)); -} - +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 16]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvt2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { +_mm256_cvt2ph_hf8(__m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A), + (__v16hf)(__B)); +} + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __W +/// A 256-bit vector of [32 x hf8]. +/// \param __U +/// A 32-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvt2ph_hf8( + __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( (__mmask32)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __U +/// A 32-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// zero is taken instead. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( @@ -344,17 +1988,120 @@ _mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_hf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), (__v8hf)(__B)); -} - +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvts2ph_hf8(__m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), + (__v8hf)(__B)); +} + +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvts2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), (__v16qi)__W); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvts2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( @@ -362,18 +2109,120 @@ _mm_maskz_cvts2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { (__v16qi)(__m128i)_mm_setzero_si128()); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 16]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvts2ph_hf8(__m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_vcvt2ph2hf8s_256((__v16hf)(__A), - (__v16hf)(__B)); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvts2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { + (__v16hf)(__B)); +} + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __W +/// A 256-bit vector of [32 x hf8]. +/// \param __U +/// A 32-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvts2ph_hf8( + __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( (__mmask32)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)__W); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __U +/// A 32-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// zero is taken instead. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( @@ -381,207 +2230,1161 @@ _mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { (__v32qi)(__m256i)_mm256_setzero_si256()); } +/// Convert 128-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is exact. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __A +/// A 128-bit vector of [16 x hf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvthf8_ph(__m128i __A) { return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( (__v16qi)__A, (__v8hf)(__m128h)_mm_undefined_ph(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is +/// exact. Merging mask \a __U is used to determine if given element should be +/// taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __W +/// A 128-bit vector of [8 x fp16]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [16 x hf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvthf8_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( (__v16qi)__A, (__v8hf)(__m128h)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is +/// exact. Zeroing mask \a __U is used to determine if given element should be +/// zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [16 x hf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvthf8_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( (__v16qi)__A, (__v8hf)(__m128h)_mm_setzero_ph(), (__mmask8)__U); } +/// Convert 256-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is exact. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __A +/// A 256-bit vector of [32 x hf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvthf8_ph(__m128i __A) { return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( (__v16qi)__A, (__v16hf)(__m256h)_mm256_undefined_ph(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is +/// exact. Merging mask \a __U is used to determine if given element should be +/// taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __W +/// A 256-bit vector of [16 x fp16]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [32 x hf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_cvthf8_ph(__m256h __W, __mmask16 __U, __m128i __A) { return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( (__v16qi)__A, (__v16hf)(__m256h)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is +/// exact. Zeroing mask \a __U is used to determine if given element should be +/// zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [32 x hf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvthf8_ph(__mmask16 __U, __m128i __A) { return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( (__v16qi)__A, (__v16hf)(__m256h)_mm256_setzero_ph(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the (converted) +/// elements from \a __A; upper elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_bf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_bf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_bf8(__m256h __A) { +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the (converted) +/// elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtph_bf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Merging mask \a __U is +/// used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Zeroing mask \a __U is +/// used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then element is zeroed instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_bf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the (converted) +/// elements from \a __A; upper elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_bf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. Merging mask \a __U is +/// used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. Zeroing mask \a __U is +/// used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsph_bf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_bf8(__m256h __A) { +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the (converted) +/// elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtsph_bf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Results are saturated. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Results are saturated. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then element is zeroed instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsph_bf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the (converted) +/// elements from \a __A; upper elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_hf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_hf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_hf8(__m256h __A) { +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the (converted) +/// elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtph_hf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Merging mask \a __U is +/// used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Zeroing mask \a __U is +/// used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then element is zeroed instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_hf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the (converted) +/// elements from \a __A; upper elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_hf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. Merging mask \a __U is +/// used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. Zeroing mask \a __U is +/// used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsph_hf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_hf8(__m256h __A) { +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the (converted) +/// elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtsph_hf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Results are saturated. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Results are saturated. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then element is zeroed instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsph_hf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is exact. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __A +/// A 128-bit vector of [16 x bf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtbf8_ph(__m128i __A) { return _mm_castsi128_ph(_mm_slli_epi16(_mm_cvtepi8_epi16(__A), 8)); } +/// Convert 128-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is +/// exact. Merging mask \a __U is used to determine if given element should be +/// taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __W +/// A 128-bit vector of [8 x fp16]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [16 x bf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbf8_ph(__m128h __S, __mmask8 __U, __m128i __A) { +_mm_mask_cvtbf8_ph(__m128h __W, __mmask8 __U, __m128i __A) { return _mm_castsi128_ph( - _mm_mask_slli_epi16((__m128i)__S, __U, _mm_cvtepi8_epi16(__A), 8)); -} - + _mm_mask_slli_epi16((__m128i)__W, __U, _mm_cvtepi8_epi16(__A), 8)); +} + +/// Convert 128-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is +/// exact. Zeroing mask \a __U is used to determine if given element should be +/// zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [16 x bf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbf8_ph(__mmask8 __U, __m128i __A) { return _mm_castsi128_ph(_mm_slli_epi16(_mm_maskz_cvtepi8_epi16(__U, __A), 8)); } +/// Convert 256-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is exact. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __A +/// A 256-bit vector of [32 x bf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtbf8_ph(__m128i __A) { return _mm256_castsi256_ph(_mm256_slli_epi16(_mm256_cvtepi8_epi16(__A), 8)); } +/// Convert 256-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is +/// exact. Merging mask \a __U is used to determine if given element should be +/// taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __W +/// A 256-bit vector of [16 x fp16]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [32 x bf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) { +_mm256_mask_cvtbf8_ph(__m256h __W, __mmask16 __U, __m128i __A) { return _mm256_castsi256_ph( - _mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8)); -} - + _mm256_mask_slli_epi16((__m256i)__W, __U, _mm256_cvtepi8_epi16(__A), 8)); +} + +/// Convert 256-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is +/// exact. Zeroing mask \a __U is used to determine if given element should be +/// zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [32 x bf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbf8_ph(__mmask16 __U, __m128i __A) { return _mm256_castsi256_ph( _mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8)); } +// clang-format on + #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 From 16a81e16674a1360e3522131278f5a222c107df2 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Fri, 28 Feb 2025 10:18:37 +0900 Subject: [PATCH 031/123] [flang][docs][NFC] Fix Markdown `/*comments*/` (#129018) `*` in `/*comments*/` were interpreted as emphasis marks and were not displayed in https://flang.llvm.org/docs/Extensions.html. --- flang/docs/Extensions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index e70f40306c4e1..d781dee75e07e 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -218,7 +218,7 @@ end the length parameter of the implicit type, not the first. * Outside a character literal, a comment after a continuation marker (&) need not begin with a comment marker (!). -* Classic C-style /*comments*/ are skipped, so multi-language header +* Classic C-style `/*comments*/` are skipped, so multi-language header files are easier to write and use. * $ and \ edit descriptors are supported in FORMAT to suppress newline output on user prompts. From dbab13b1e4114f449773979bc415ffe95648d118 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Feb 2025 17:21:17 -0800 Subject: [PATCH 032/123] [RISCV] Move RISCVVInversePseudosTable from RISCVMCTargetDesc.cpp to RISCVBaseInfo.cpp. NFC RISCVMCTargetDesc contains the instruction, register, etc. descriptions from TableGen. Other searchable tables in MCTargetDesc live in RISCVBaseInfo.cpp --- .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp | 6 ++++++ llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 12 ++++++++++++ .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp | 9 --------- .../Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h | 14 -------------- 4 files changed, 18 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 3b2c0cba66d12..9c1c364c18549 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -33,6 +33,12 @@ namespace RISCVInsnOpcode { #include "RISCVGenSearchableTables.inc" } // namespace RISCVInsnOpcode +namespace RISCVVInversePseudosTable { +using namespace RISCV; +#define GET_RISCVVInversePseudosTable_IMPL +#include "RISCVGenSearchableTables.inc" +} // namespace RISCVVInversePseudosTable + namespace RISCVABI { ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits, StringRef ABIName) { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 5cc20954fb95b..88435b2b52ca5 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -654,6 +654,18 @@ inline static bool getSpimm(unsigned RlistVal, unsigned &SpimmVal, void printRlist(unsigned SlistEncode, raw_ostream &OS); } // namespace RISCVZC +namespace RISCVVInversePseudosTable { +struct PseudoInfo { + uint16_t Pseudo; + uint16_t BaseInstr; + uint8_t VLMul; + uint8_t SEW; +}; + +#define GET_RISCVVInversePseudosTable_DECL +#include "RISCVGenSearchableTables.inc" +} // namespace RISCVVInversePseudosTable + } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp index 868614cbdad6d..5f1d7b03f3218 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -41,15 +41,6 @@ #define GET_SUBTARGETINFO_MC_DESC #include "RISCVGenSubtargetInfo.inc" -namespace llvm::RISCVVInversePseudosTable { - -using namespace RISCV; - -#define GET_RISCVVInversePseudosTable_IMPL -#include "RISCVGenSearchableTables.inc" - -} // namespace llvm::RISCVVInversePseudosTable - using namespace llvm; static MCInstrInfo *createRISCVMCInstrInfo() { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h index 6cc22af601fdb..bdee7ed4f011e 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h @@ -36,20 +36,6 @@ MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCSubtargetInfo &STI, std::unique_ptr createRISCVELFObjectWriter(uint8_t OSABI, bool Is64Bit); - -namespace RISCVVInversePseudosTable { - -struct PseudoInfo { - uint16_t Pseudo; - uint16_t BaseInstr; - uint8_t VLMul; - uint8_t SEW; -}; - -#define GET_RISCVVInversePseudosTable_DECL -#include "RISCVGenSearchableTables.inc" - -} // namespace RISCVVInversePseudosTable } // namespace llvm // Defines symbolic names for RISC-V registers. From d51470199f2b94ced62a358d7c5863805a08ef83 Mon Sep 17 00:00:00 2001 From: GkvJwa Date: Fri, 28 Feb 2025 09:38:08 +0800 Subject: [PATCH 033/123] [asan][win] Fix CreateThread leak (#126738) Fix #126541 Since ```t->Destroy``` cannot be called after ```start_routine```(When calling standard thread_start in crt) Intercept `ExitThread` and free the memory created by `VirtualAlloc' --- compiler-rt/lib/asan/asan_win.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/asan/asan_win.cpp b/compiler-rt/lib/asan/asan_win.cpp index 09a13b11cff1f..027340280e068 100644 --- a/compiler-rt/lib/asan/asan_win.cpp +++ b/compiler-rt/lib/asan/asan_win.cpp @@ -145,7 +145,6 @@ static thread_return_t THREAD_CALLING_CONV asan_thread_start(void *arg) { t->GetStartData(params); auto res = (*params.start_routine)(params.arg); - t->Destroy(); // POSIX calls this from TSD destructor. return res; } @@ -166,6 +165,13 @@ INTERCEPTOR_WINAPI(HANDLE, CreateThread, LPSECURITY_ATTRIBUTES security, thr_flags, tid); } +INTERCEPTOR_WINAPI(void, ExitThread, DWORD dwExitCode) { + AsanThread *t = (AsanThread *)__asan::GetCurrentThread(); + if (t) + t->Destroy(); + REAL(ExitThread)(dwExitCode); +} + // }}} namespace __asan { @@ -181,6 +187,7 @@ void InitializePlatformInterceptors() { (LPCWSTR)&InitializePlatformInterceptors, &pinned)); ASAN_INTERCEPT_FUNC(CreateThread); + ASAN_INTERCEPT_FUNC(ExitThread); ASAN_INTERCEPT_FUNC(SetUnhandledExceptionFilter); #ifdef _WIN64 From 6e039aaa66f5b7a61c300abfccc15a53b8a615c9 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 27 Feb 2025 19:56:56 -0600 Subject: [PATCH 034/123] [lldb-dap] Adaptor -> Adapter (NFC) (#129110) Both spellings are considered correct and acceptable, with adapter being more common in American English. Given that DAP stands for Debug Adapter Protocol (with an e) let's go with that as the canonical spelling. --- .../Python/lldbsuite/test/test_categories.py | 2 +- .../test/tools/lldb-dap/dap_server.py | 34 +++++++++---------- .../test/tools/lldb-dap/lldbdap_testcase.py | 18 +++++----- .../tools/lldb-dap/attach/TestDAP_attach.py | 10 +++--- .../attach/TestDAP_attachByPortNum.py | 8 ++--- .../TestDAP_breakpointEvents.py | 2 +- .../breakpoint/TestDAP_setBreakpoints.py | 8 ++--- .../TestDAP_setExceptionBreakpoints.py | 4 +-- .../TestDAP_setFunctionBreakpoints.py | 4 +-- .../lldb-dap/commands/TestDAP_commands.py | 2 +- .../lldb-dap/coreFile/TestDAP_coreFile.py | 6 ++-- .../lldb-dap/disconnect/TestDAP_disconnect.py | 2 +- .../tools/lldb-dap/launch/TestDAP_launch.py | 10 +++--- .../runInTerminal/TestDAP_runInTerminal.py | 4 +-- .../tools/lldb-dap/server/TestDAP_server.py | 6 ++-- .../lldb-dap/variables/TestDAP_variables.py | 2 +- lldb/tools/lldb-dap/DAP.cpp | 2 +- lldb/tools/lldb-dap/DAP.h | 2 +- .../tools/lldb-dap/Handler/RequestHandler.cpp | 2 +- lldb/tools/lldb-dap/JSONUtils.cpp | 4 +-- lldb/tools/lldb-dap/JSONUtils.h | 16 ++++----- lldb/tools/lldb-dap/Options.td | 2 +- lldb/tools/lldb-dap/RunInTerminal.cpp | 4 +-- lldb/tools/lldb-dap/RunInTerminal.h | 10 +++--- lldb/tools/lldb-dap/lldb-dap.cpp | 12 +++---- 25 files changed, 88 insertions(+), 88 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/test_categories.py b/lldb/packages/Python/lldbsuite/test/test_categories.py index 036bda9c957d1..b585f695adeab 100644 --- a/lldb/packages/Python/lldbsuite/test/test_categories.py +++ b/lldb/packages/Python/lldbsuite/test/test_categories.py @@ -31,7 +31,7 @@ "libc++": "Test for libc++ data formatters", "libstdcxx": "Test for libstdcxx data formatters", "lldb-server": "Tests related to lldb-server", - "lldb-dap": "Tests for the Debug Adaptor Protocol with lldb-dap", + "lldb-dap": "Tests for the Debug Adapter Protocol with lldb-dap", "llgs": "Tests for the gdb-server functionality of lldb-server", "pexpect": "Tests requiring the pexpect library to be available", "objc": "Tests related to the Objective-C programming language support", diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index 391378cf027bc..9471594b66012 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -76,7 +76,7 @@ def read_packet(f, verbose=False, trace_file=None): if verbose: print('json: "%s"' % (json_str)) if trace_file: - trace_file.write("from adaptor:\n%s\n" % (json_str)) + trace_file.write("from adapter:\n%s\n" % (json_str)) # Decode the JSON bytes into a python dictionary return json.loads(json_str) @@ -259,7 +259,7 @@ def handle_recv_packet(self, packet): def send_packet(self, command_dict, set_sequence=True): """Take the "command_dict" python dictionary and encode it as a JSON string and send the contents as a packet to the VSCode debug - adaptor""" + adapter""" # Set the sequence ID for this command automatically if set_sequence: command_dict["seq"] = self.sequence @@ -267,7 +267,7 @@ def send_packet(self, command_dict, set_sequence=True): # Encode our command dictionary as a JSON string json_str = json.dumps(command_dict, separators=(",", ":")) if self.trace_file: - self.trace_file.write("to adaptor:\n%s\n" % (json_str)) + self.trace_file.write("to adapter:\n%s\n" % (json_str)) length = len(json_str) if length > 0: # Send the encoded JSON packet and flush the 'send' file @@ -275,7 +275,7 @@ def send_packet(self, command_dict, set_sequence=True): self.send.flush() def recv_packet(self, filter_type=None, filter_event=None, timeout=None): - """Get a JSON packet from the VSCode debug adaptor. This function + """Get a JSON packet from the VSCode debug adapter. This function assumes a thread that reads packets is running and will deliver any received packets by calling handle_recv_packet(...). This function will wait for the packet to arrive and return it when @@ -1184,7 +1184,7 @@ def request_setInstructionBreakpoints(self, memory_reference=[]): return self.send_recv(command_dict) -class DebugAdaptorServer(DebugCommunication): +class DebugAdapterServer(DebugCommunication): def __init__( self, executable=None, @@ -1196,7 +1196,7 @@ def __init__( self.process = None self.connection = None if executable is not None: - process, connection = DebugAdaptorServer.launch( + process, connection = DebugAdapterServer.launch( executable=executable, connection=connection, env=env, log_file=log_file ) self.process = process @@ -1224,12 +1224,12 @@ def __init__( @classmethod def launch(cls, /, executable, env=None, log_file=None, connection=None): - adaptor_env = os.environ.copy() + adapter_env = os.environ.copy() if env is not None: - adaptor_env.update(env) + adapter_env.update(env) if log_file: - adaptor_env["LLDBDAP_LOG"] = log_file + adapter_env["LLDBDAP_LOG"] = log_file args = [executable] if connection is not None: @@ -1241,7 +1241,7 @@ def launch(cls, /, executable, env=None, log_file=None, connection=None): stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env=adaptor_env, + env=adapter_env, ) if connection is None: @@ -1271,7 +1271,7 @@ def get_pid(self): return -1 def terminate(self): - super(DebugAdaptorServer, self).terminate() + super(DebugAdapterServer, self).terminate() if self.process is not None: self.process.terminate() self.process.wait() @@ -1347,7 +1347,7 @@ def run_vscode(dbg, args, options): def main(): parser = optparse.OptionParser( description=( - "A testing framework for the Visual Studio Code Debug Adaptor protocol" + "A testing framework for the Visual Studio Code Debug Adapter protocol" ) ) @@ -1357,7 +1357,7 @@ def main(): dest="vscode_path", help=( "The path to the command line program that implements the " - "Visual Studio Code Debug Adaptor protocol." + "Visual Studio Code Debug Adapter protocol." ), default=None, ) @@ -1407,7 +1407,7 @@ def main(): dest="replay", help=( "Specify a file containing a packet log to replay with the " - "current Visual Studio Code Debug Adaptor executable." + "current Visual Studio Code Debug Adapter executable." ), default=None, ) @@ -1418,7 +1418,7 @@ def main(): action="store_true", dest="debug", default=False, - help="Pause waiting for a debugger to attach to the debug adaptor", + help="Pause waiting for a debugger to attach to the debug adapter", ) parser.add_option( @@ -1581,11 +1581,11 @@ def main(): if options.vscode_path is None and options.connection is None: print( "error: must either specify a path to a Visual Studio Code " - "Debug Adaptor vscode executable path using the --vscode " + "Debug Adapter vscode executable path using the --vscode " "option, or using the --connection option" ) return - dbg = DebugAdaptorServer( + dbg = DebugAdapterServer( executable=options.vscode_path, connection=options.connection ) if options.debug: diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index 8b0f74ba389c3..70b04b051e0ec 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -14,13 +14,13 @@ class DAPTestCaseBase(TestBase): timeoutval = 10 * (10 if ("ASAN_OPTIONS" in os.environ) else 1) NO_DEBUG_INFO_TESTCASE = True - def create_debug_adaptor(self, lldbDAPEnv=None, connection=None): - """Create the Visual Studio Code debug adaptor""" + def create_debug_adapter(self, lldbDAPEnv=None, connection=None): + """Create the Visual Studio Code debug adapter""" self.assertTrue( is_exe(self.lldbDAPExec), "lldb-dap must exist and be executable" ) log_file_path = self.getBuildArtifact("dap.txt") - self.dap_server = dap_server.DebugAdaptorServer( + self.dap_server = dap_server.DebugAdapterServer( executable=self.lldbDAPExec, connection=connection, init_commands=self.setUpCommands(), @@ -28,9 +28,9 @@ def create_debug_adaptor(self, lldbDAPEnv=None, connection=None): env=lldbDAPEnv, ) - def build_and_create_debug_adaptor(self, lldbDAPEnv=None): + def build_and_create_debug_adapter(self, lldbDAPEnv=None): self.build() - self.create_debug_adaptor(lldbDAPEnv) + self.create_debug_adapter(lldbDAPEnv) def set_source_breakpoints(self, source_path, lines, data=None): """Sets source breakpoints and returns an array of strings containing @@ -324,11 +324,11 @@ def attach( gdbRemotePort=None, gdbRemoteHostname=None, ): - """Build the default Makefile target, create the DAP debug adaptor, + """Build the default Makefile target, create the DAP debug adapter, and attach to the process. """ - # Make sure we disconnect and terminate the DAP debug adaptor even + # Make sure we disconnect and terminate the DAP debug adapter even # if we throw an exception during the test case. def cleanup(): if disconnectAutomatically: @@ -479,10 +479,10 @@ def build_and_launch( launchCommands=None, expectFailure=False, ): - """Build the default Makefile target, create the DAP debug adaptor, + """Build the default Makefile target, create the DAP debug adapter, and launch the process. """ - self.build_and_create_debug_adaptor(lldbDAPEnv) + self.build_and_create_debug_adapter(lldbDAPEnv) self.assertTrue(os.path.exists(program), "executable must exist") return self.launch( diff --git a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py index e143c2798b209..9df44cc454d5d 100644 --- a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py +++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py @@ -44,7 +44,7 @@ def test_by_pid(self): """ Tests attaching to a process by process ID. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") self.process = subprocess.Popen( [program], @@ -60,7 +60,7 @@ def test_by_name(self): """ Tests attaching to a process by process name. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() orig_program = self.getBuildArtifact("a.out") # Since we are going to attach by process name, we need a unique # process name that has minimal chance to match a process that is @@ -101,7 +101,7 @@ def test_by_name_waitFor(self): next instance of a process to be launched, ingoring all current ones. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") self.spawn_thread = threading.Thread( target=spawn_and_wait, @@ -137,7 +137,7 @@ def test_commands(self): "terminateCommands" are a list of LLDB commands that get executed when the debugger session terminates. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") # Here we just create a target and launch the process as a way to test # if we are able to use attach commands to create any kind of a target @@ -211,7 +211,7 @@ def test_terminate_commands(self): Tests that the "terminateCommands", that can be passed during attach, are run when the debugger is disconnected. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") # Here we just create a target and launch the process as a way to test # if we are able to use attach commands to create any kind of a target diff --git a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py index fbabc857bd0e0..9024120c868fd 100644 --- a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py +++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py @@ -60,7 +60,7 @@ def test_by_port(self): """ Tests attaching to a process by port. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") debug_server_tool = self.getBuiltinDebugServerTool() @@ -92,7 +92,7 @@ def test_by_port_and_pid(self): """ Tests attaching to a process by process ID and port number. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") # It is not necessary to launch "lldb-server" to obtain the actual port and pid for attaching. @@ -120,7 +120,7 @@ def test_by_invalid_port(self): """ Tests attaching to a process by invalid port number 0. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") port = 0 @@ -139,7 +139,7 @@ def test_by_illegal_port(self): """ Tests attaching to a process by illegal/greater port number 65536 """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") port = 65536 diff --git a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py index a20384b75f5c0..11573eba06907 100644 --- a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py +++ b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py @@ -41,7 +41,7 @@ def test_breakpoint_events(self): foo_bp1_line = line_number("foo.cpp", "foo breakpoint 1") foo_bp2_line = line_number("foo.cpp", "foo breakpoint 2") - # Visual Studio Code Debug Adaptors have no way to specify the file + # Visual Studio Code Debug Adapters have no way to specify the file # without launching or attaching to a process, so we must start a # process in order to be able to set breakpoints. program = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py index c62feda64a125..26df2573555df 100644 --- a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py +++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py @@ -27,7 +27,7 @@ def test_source_map(self): with the corresponding source maps to have breakpoints and frames working. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() other_basename = "other-copy.c" other_path = self.getBuildArtifact(other_basename) @@ -100,7 +100,7 @@ def test_source_map(self): @skipIfWindows def test_set_and_clear(self): """Tests setting and clearing source file and line breakpoints. - This packet is a bit tricky on the debug adaptor side since there + This packet is a bit tricky on the debug adapter side since there is no "clearBreakpoints" packet. Source file and line breakpoints are set by sending a "setBreakpoints" packet with a source file specified and zero or more source lines. If breakpoints have been @@ -116,7 +116,7 @@ def test_set_and_clear(self): third_line = line_number("main.cpp", "break 14") lines = [first_line, third_line, second_line] - # Visual Studio Code Debug Adaptors have no way to specify the file + # Visual Studio Code Debug Adapters have no way to specify the file # without launching or attaching to a process, so we must start a # process in order to be able to set breakpoints. program = self.getBuildArtifact("a.out") @@ -257,7 +257,7 @@ def test_clear_breakpoints_unset_breakpoints(self): line_number("main.cpp", "break 13"), ] - # Visual Studio Code Debug Adaptors have no way to specify the file + # Visual Studio Code Debug Adapters have no way to specify the file # without launching or attaching to a process, so we must start a # process in order to be able to set breakpoints. program = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py index b2ab12e51bf68..92ac66cd44c5d 100644 --- a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py +++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py @@ -14,7 +14,7 @@ class TestDAP_setExceptionBreakpoints(lldbdap_testcase.DAPTestCaseBase): @skipIfWindows def test_functionality(self): """Tests setting and clearing exception breakpoints. - This packet is a bit tricky on the debug adaptor side since there + This packet is a bit tricky on the debug adapter side since there is no "clear exception breakpoints" packet. Exception breakpoints are set by sending a "setExceptionBreakpoints" packet with zero or more exception filters. If exception breakpoints have been set @@ -26,7 +26,7 @@ def test_functionality(self): and the functionality of each breakpoint, like 'conditions' and x'hitCondition' settings. """ - # Visual Studio Code Debug Adaptors have no way to specify the file + # Visual Studio Code Debug Adapters have no way to specify the file # without launching or attaching to a process, so we must start a # process in order to be able to set breakpoints. program = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py index 8f00f42574b56..946595f639edc 100644 --- a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py +++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py @@ -14,7 +14,7 @@ class TestDAP_setFunctionBreakpoints(lldbdap_testcase.DAPTestCaseBase): @skipIfWindows def test_set_and_clear(self): """Tests setting and clearing function breakpoints. - This packet is a bit tricky on the debug adaptor side since there + This packet is a bit tricky on the debug adapter side since there is no "clearFunction Breakpoints" packet. Function breakpoints are set by sending a "setFunctionBreakpoints" packet with zero or more function names. If function breakpoints have been set before, @@ -25,7 +25,7 @@ def test_set_and_clear(self): correctly. It doesn't test hitting breakpoints and the functionality of each breakpoint, like 'conditions' and 'hitCondition' settings. """ - # Visual Studio Code Debug Adaptors have no way to specify the file + # Visual Studio Code Debug Adapters have no way to specify the file # without launching or attaching to a process, so we must start a # process in order to be able to set breakpoints. program = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py b/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py index e4cf903fc0d11..25ecbb5cf106b 100644 --- a/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py +++ b/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py @@ -75,7 +75,7 @@ def test_command_directive_abort_on_error_attach_commands(self): "settings set target.show-hex-variable-values-with-leading-zeroes false" ) command_abort_on_error = "settings set foo bar" - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() self.attach( program, attachCommands=["?!" + command_quiet, "!" + command_abort_on_error], diff --git a/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py b/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py index 5189435185607..1896acea15a99 100644 --- a/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py +++ b/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py @@ -18,7 +18,7 @@ def test_core_file(self): exe_file = os.path.join(current_dir, "linux-x86_64.out") core_file = os.path.join(current_dir, "linux-x86_64.core") - self.create_debug_adaptor() + self.create_debug_adapter() self.attach(exe_file, coreFile=core_file) expected_frames = [ @@ -64,7 +64,7 @@ def test_core_file_source_mapping_array(self): exe_file = os.path.join(current_dir, "linux-x86_64.out") core_file = os.path.join(current_dir, "linux-x86_64.core") - self.create_debug_adaptor() + self.create_debug_adapter() source_map = [["/home/labath/test", current_dir]] self.attach(exe_file, coreFile=core_file, sourceMap=source_map) @@ -78,7 +78,7 @@ def test_core_file_source_mapping_object(self): exe_file = os.path.join(current_dir, "linux-x86_64.out") core_file = os.path.join(current_dir, "linux-x86_64.core") - self.create_debug_adaptor() + self.create_debug_adapter() source_map = {"/home/labath/test": current_dir} self.attach(exe_file, coreFile=core_file, sourceMap=source_map) diff --git a/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py b/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py index f9e461adecb15..0cb792d662a80 100644 --- a/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py +++ b/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py @@ -52,7 +52,7 @@ def test_attach(self): before the file is created, and as the process is not terminated upon disconnection, the file is created anyway. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") # Use a file as a synchronization point between test and inferior. diff --git a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py index 7898d01457afc..0c92e5bff07c6 100644 --- a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py +++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py @@ -32,7 +32,7 @@ def test_termination(self): Tests the correct termination of lldb-dap upon a 'disconnect' request. """ - self.create_debug_adaptor() + self.create_debug_adapter() # The underlying lldb-dap process must be alive self.assertEqual(self.dap_server.process.poll(), None) @@ -92,7 +92,7 @@ def test_cwd(self): def test_debuggerRoot(self): """ Tests the "debuggerRoot" will change the working directory of - the lldb-dap debug adaptor. + the lldb-dap debug adapter. """ program = self.getBuildArtifact("a.out") program_parent_dir = os.path.realpath(os.path.dirname(os.path.dirname(program))) @@ -376,7 +376,7 @@ def test_extra_launch_commands(self): """ Tests the "launchCommands" with extra launching settings """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") source = "main.c" @@ -440,7 +440,7 @@ def test_failing_launch_commands(self): """ Tests "launchCommands" failures prevents a launch. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") # Run an invalid launch command, in this case a bad path. @@ -483,7 +483,7 @@ def test_terminate_commands(self): Tests that the "terminateCommands", that can be passed during launch, are run when the debugger is disconnected. """ - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() program = self.getBuildArtifact("a.out") terminateCommands = ["expr 4+2"] diff --git a/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py b/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py index ac96bcc1364a2..9141565ac1b9b 100644 --- a/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py +++ b/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py @@ -118,7 +118,7 @@ def test_runInTerminalWithObjectEnv(self): def test_runInTerminalInvalidTarget(self): if not self.isTestSupported(): return - self.build_and_create_debug_adaptor() + self.build_and_create_debug_adapter() response = self.launch( "INVALIDPROGRAM", runInTerminal=True, @@ -247,4 +247,4 @@ def test_NonAttachedRunInTerminalLauncher(self): self.readPidMessage(comm_file) _, stderr = proc.communicate() - self.assertIn("Timed out trying to get messages from the debug adaptor", stderr) + self.assertIn("Timed out trying to get messages from the debug adapter", stderr) diff --git a/lldb/test/API/tools/lldb-dap/server/TestDAP_server.py b/lldb/test/API/tools/lldb-dap/server/TestDAP_server.py index 1f562e989533a..7a9a4f434e04b 100644 --- a/lldb/test/API/tools/lldb-dap/server/TestDAP_server.py +++ b/lldb/test/API/tools/lldb-dap/server/TestDAP_server.py @@ -15,7 +15,7 @@ class TestDAP_server(lldbdap_testcase.DAPTestCaseBase): def start_server(self, connection): log_file_path = self.getBuildArtifact("dap.txt") - (process, connection) = dap_server.DebugAdaptorServer.launch( + (process, connection) = dap_server.DebugAdapterServer.launch( executable=self.lldbDAPExec, connection=connection, log_file=log_file_path, @@ -29,7 +29,7 @@ def cleanup(): return (process, connection) def run_debug_session(self, connection, name): - self.dap_server = dap_server.DebugAdaptorServer( + self.dap_server = dap_server.DebugAdapterServer( connection=connection, ) program = self.getBuildArtifact("a.out") @@ -83,7 +83,7 @@ def test_server_interrupt(self): """ self.build() (process, connection) = self.start_server(connection="tcp://localhost:0") - self.dap_server = dap_server.DebugAdaptorServer( + self.dap_server = dap_server.DebugAdapterServer( connection=connection, ) program = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py index 580ad38ab51c1..fde66a28382c7 100644 --- a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py +++ b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py @@ -113,7 +113,7 @@ def darwin_dwarf_missing_obj(self, initCommands): # error when we run to main and try to get variables os.unlink(main_obj) - self.create_debug_adaptor() + self.create_debug_adapter() self.assertTrue(os.path.exists(program), "executable must exist") self.launch(program=program, initCommands=initCommands) diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp index cd53e2aca3fb6..53c514b790f38 100644 --- a/lldb/tools/lldb-dap/DAP.cpp +++ b/lldb/tools/lldb-dap/DAP.cpp @@ -64,7 +64,7 @@ namespace lldb_dap { DAP::DAP(std::string name, llvm::StringRef path, std::ofstream *log, lldb::IOObjectSP input, lldb::IOObjectSP output, ReplMode repl_mode, std::vector pre_init_commands) - : name(std::move(name)), debug_adaptor_path(path), log(log), + : name(std::move(name)), debug_adapter_path(path), log(log), input(std::move(input)), output(std::move(output)), broadcaster("lldb-dap"), exception_breakpoints(), pre_init_commands(std::move(pre_init_commands)), diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h index a7c7e5d9bbc19..8b2e498a28c95 100644 --- a/lldb/tools/lldb-dap/DAP.h +++ b/lldb/tools/lldb-dap/DAP.h @@ -146,7 +146,7 @@ struct SendEventRequestHandler : public lldb::SBCommandPluginInterface { struct DAP { std::string name; - llvm::StringRef debug_adaptor_path; + llvm::StringRef debug_adapter_path; std::ofstream *log; InputStream input; OutputStream output; diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.cpp b/lldb/tools/lldb-dap/Handler/RequestHandler.cpp index 0a32e39ea3aff..606ada90ce2e5 100644 --- a/lldb/tools/lldb-dap/Handler/RequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/RequestHandler.cpp @@ -100,7 +100,7 @@ static llvm::Error RunInTerminal(DAP &dap, debugger_pid = getpid(); #endif llvm::json::Object reverse_request = CreateRunInTerminalReverseRequest( - launch_request, dap.debug_adaptor_path, comm_file.m_path, debugger_pid); + launch_request, dap.debug_adapter_path, comm_file.m_path, debugger_pid); dap.SendReverseRequest("runInTerminal", std::move(reverse_request)); diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp index 9f08efb2a3ac1..9dec4ca1df49a 100644 --- a/lldb/tools/lldb-dap/JSONUtils.cpp +++ b/lldb/tools/lldb-dap/JSONUtils.cpp @@ -1436,7 +1436,7 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit) { /// https://microsoft.github.io/debug-adapter-protocol/specification#Reverse_Requests_RunInTerminal llvm::json::Object CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request, - llvm::StringRef debug_adaptor_path, + llvm::StringRef debug_adapter_path, llvm::StringRef comm_file, lldb::pid_t debugger_pid) { llvm::json::Object run_in_terminal_args; @@ -1446,7 +1446,7 @@ CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request, const auto *launch_request_arguments = launch_request.getObject("arguments"); // The program path must be the first entry in the "args" field - std::vector args = {debug_adaptor_path.str(), "--comm-file", + std::vector args = {debug_adapter_path.str(), "--comm-file", comm_file.str()}; if (debugger_pid != LLDB_INVALID_PROCESS_ID) { args.push_back("--debugger-pid"); diff --git a/lldb/tools/lldb-dap/JSONUtils.h b/lldb/tools/lldb-dap/JSONUtils.h index db56d98777347..55d2360e0a224 100644 --- a/lldb/tools/lldb-dap/JSONUtils.h +++ b/lldb/tools/lldb-dap/JSONUtils.h @@ -233,7 +233,7 @@ void AppendBreakpoint( std::optional request_path = std::nullopt, std::optional request_line = std::nullopt); -/// Converts breakpoint location to a debug adaptor protocol "Breakpoint". +/// Converts breakpoint location to a debug adapter protocol "Breakpoint". /// /// \param[in] bp /// A LLDB breakpoint object to convert into a JSON value @@ -290,7 +290,7 @@ llvm::json::Value CreateModule(lldb::SBTarget &target, lldb::SBModule &module); llvm::json::Object CreateEventObject(const llvm::StringRef event_name); /// Create a "ExceptionBreakpointsFilter" JSON object as described in -/// the debug adaptor definition. +/// the debug adapter definition. /// /// \param[in] bp /// The exception breakpoint object to use @@ -301,7 +301,7 @@ llvm::json::Object CreateEventObject(const llvm::StringRef event_name); llvm::json::Value CreateExceptionBreakpointFilter(const ExceptionBreakpoint &bp); -/// Create a "Scope" JSON object as described in the debug adaptor definition. +/// Create a "Scope" JSON object as described in the debug adapter definition. /// /// \param[in] name /// The value to place into the "name" key @@ -322,7 +322,7 @@ llvm::json::Value CreateScope(const llvm::StringRef name, int64_t variablesReference, int64_t namedVariables, bool expensive); -/// Create a "Source" JSON object as described in the debug adaptor definition. +/// Create a "Source" JSON object as described in the debug adapter definition. /// /// \param[in] file /// The SBFileSpec to use when populating out the "Source" object @@ -332,7 +332,7 @@ llvm::json::Value CreateScope(const llvm::StringRef name, /// definition outlined by Microsoft. llvm::json::Value CreateSource(const lldb::SBFileSpec &file); -/// Create a "Source" JSON object as described in the debug adaptor definition. +/// Create a "Source" JSON object as described in the debug adapter definition. /// /// \param[in] line_entry /// The LLDB line table to use when populating out the "Source" @@ -573,8 +573,8 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit); /// The original launch_request object whose fields are used to construct /// the reverse request object. /// -/// \param[in] debug_adaptor_path -/// Path to the current debug adaptor. It will be used to delegate the +/// \param[in] debug_adapter_path +/// Path to the current debug adapter. It will be used to delegate the /// launch of the target. /// /// \param[in] comm_file @@ -590,7 +590,7 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit); /// Microsoft. llvm::json::Object CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request, - llvm::StringRef debug_adaptor_path, + llvm::StringRef debug_adapter_path, llvm::StringRef comm_file, lldb::pid_t debugger_pid); diff --git a/lldb/tools/lldb-dap/Options.td b/lldb/tools/lldb-dap/Options.td index 97a6ec118c47b..a1baf2f0370bd 100644 --- a/lldb/tools/lldb-dap/Options.td +++ b/lldb/tools/lldb-dap/Options.td @@ -33,7 +33,7 @@ def launch_target: S<"launch-target">, def comm_file: S<"comm-file">, MetaVarName<"">, - HelpText<"The fifo file used to communicate the with the debug adaptor " + HelpText<"The fifo file used to communicate the with the debug adapter " "when using --launch-target.">; def debugger_pid: S<"debugger-pid">, diff --git a/lldb/tools/lldb-dap/RunInTerminal.cpp b/lldb/tools/lldb-dap/RunInTerminal.cpp index 4fe09e2885a8e..9f309dd78221a 100644 --- a/lldb/tools/lldb-dap/RunInTerminal.cpp +++ b/lldb/tools/lldb-dap/RunInTerminal.cpp @@ -97,9 +97,9 @@ static Error ToError(const RunInTerminalMessage &message) { RunInTerminalLauncherCommChannel::RunInTerminalLauncherCommChannel( StringRef comm_file) - : m_io(comm_file, "debug adaptor") {} + : m_io(comm_file, "debug adapter") {} -Error RunInTerminalLauncherCommChannel::WaitUntilDebugAdaptorAttaches( +Error RunInTerminalLauncherCommChannel::WaitUntilDebugAdapterAttaches( std::chrono::milliseconds timeout) { if (Expected message = GetNextMessage(m_io, timeout)) { diff --git a/lldb/tools/lldb-dap/RunInTerminal.h b/lldb/tools/lldb-dap/RunInTerminal.h index b20f8beb6071d..457850c8ea538 100644 --- a/lldb/tools/lldb-dap/RunInTerminal.h +++ b/lldb/tools/lldb-dap/RunInTerminal.h @@ -72,7 +72,7 @@ class RunInTerminalLauncherCommChannel { public: RunInTerminalLauncherCommChannel(llvm::StringRef comm_file); - /// Wait until the debug adaptor attaches. + /// Wait until the debug adapter attaches. /// /// \param[in] timeout /// How long to wait to be attached. @@ -80,16 +80,16 @@ class RunInTerminalLauncherCommChannel { /// \return /// An \a llvm::Error object in case of errors or if this operation times /// out. - llvm::Error WaitUntilDebugAdaptorAttaches(std::chrono::milliseconds timeout); + llvm::Error WaitUntilDebugAdapterAttaches(std::chrono::milliseconds timeout); - /// Notify the debug adaptor this process' pid. + /// Notify the debug adapter this process' pid. /// /// \return /// An \a llvm::Error object in case of errors or if this operation times /// out. llvm::Error NotifyPid(); - /// Notify the debug adaptor that there's been an error. + /// Notify the debug adapter that there's been an error. void NotifyError(llvm::StringRef error); private: @@ -122,7 +122,7 @@ class RunInTerminalDebugAdapterCommChannel { FifoFileIO m_io; }; -/// Create a fifo file used to communicate the debug adaptor with +/// Create a fifo file used to communicate the debug adapter with /// the runInTerminal launcher. llvm::Expected> CreateRunInTerminalCommFile(); diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index 6dff960daede9..d005eccfae903 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -175,22 +175,22 @@ static void PrintHelp(LLDBDAPOptTable &table, llvm::StringRef tool_name) { // If --launch-target is provided, this instance of lldb-dap becomes a // runInTerminal launcher. It will ultimately launch the program specified in // the --launch-target argument, which is the original program the user wanted -// to debug. This is done in such a way that the actual debug adaptor can +// to debug. This is done in such a way that the actual debug adapter can // place breakpoints at the beginning of the program. // -// The launcher will communicate with the debug adaptor using a fifo file in the +// The launcher will communicate with the debug adapter using a fifo file in the // directory specified in the --comm-file argument. // -// Regarding the actual flow, this launcher will first notify the debug adaptor +// Regarding the actual flow, this launcher will first notify the debug adapter // of its pid. Then, the launcher will be in a pending state waiting to be -// attached by the adaptor. +// attached by the adapter. // // Once attached and resumed, the launcher will exec and become the program // specified by --launch-target, which is the original target the // user wanted to run. // // In case of errors launching the target, a suitable error message will be -// emitted to the debug adaptor. +// emitted to the debug adapter. static llvm::Error LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg, llvm::StringRef comm_file, lldb::pid_t debugger_pid, @@ -219,7 +219,7 @@ static llvm::Error LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg, const char *timeout_env_var = getenv("LLDB_DAP_RIT_TIMEOUT_IN_MS"); int timeout_in_ms = timeout_env_var != nullptr ? atoi(timeout_env_var) : 20000; - if (llvm::Error err = comm_channel.WaitUntilDebugAdaptorAttaches( + if (llvm::Error err = comm_channel.WaitUntilDebugAdapterAttaches( std::chrono::milliseconds(timeout_in_ms))) { return err; } From a17091de69ed8996aca48acf6b71575bba3f0567 Mon Sep 17 00:00:00 2001 From: Han-Chung Wang Date: Thu, 27 Feb 2025 17:59:27 -0800 Subject: [PATCH 035/123] [mlir] Add two clone methods about encoding to RankedTensorType. (#127709) There are clone methods for shape and element type, but not for encodings. The revision adds two clone method to RankedTensorType: - dropEncoding(): Return a clone of this type without the encoding. - cloneWithEncoding(Attribute encoding): Return a clone of this type with the given new encoding and the same shape and element type as this type. Signed-off-by: hanhanW --- mlir/include/mlir/IR/BuiltinTypes.td | 11 +++++++++++ mlir/unittests/IR/ShapedTypeTest.cpp | 14 ++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/mlir/include/mlir/IR/BuiltinTypes.td b/mlir/include/mlir/IR/BuiltinTypes.td index e5a2ae81da0c9..af474b3e3ec47 100644 --- a/mlir/include/mlir/IR/BuiltinTypes.td +++ b/mlir/include/mlir/IR/BuiltinTypes.td @@ -1035,6 +1035,17 @@ def Builtin_RankedTensor : Builtin_Type<"RankedTensor", "tensor", [ RankedTensorType clone(::mlir::Type elementType) { return ::llvm::cast(cloneWith(getShape(), elementType)); } + + /// Return a clone of this type without the encoding. + RankedTensorType dropEncoding() { + return RankedTensorType::get(getShape(), getElementType()); + } + + /// Return a clone of this type with the given new encoding and the same + /// shape and element type as this type. + RankedTensorType cloneWithEncoding(::mlir::Attribute encoding) { + return RankedTensorType::get(getShape(), getElementType(), encoding); + } }]; let skipDefaultBuilders = 1; let genVerifyDecl = 1; diff --git a/mlir/unittests/IR/ShapedTypeTest.cpp b/mlir/unittests/IR/ShapedTypeTest.cpp index c2900b5aaeeeb..bc4066ed210e8 100644 --- a/mlir/unittests/IR/ShapedTypeTest.cpp +++ b/mlir/unittests/IR/ShapedTypeTest.cpp @@ -282,6 +282,20 @@ TEST(ShapedTypeTest, RankedTensorTypeView) { ASSERT_TRUE(mlir::isa(viewCreated)); view = mlir::cast(viewCreated); EXPECT_EQ(view.getName(), "bob"); + + // Verify encoding clone methods. + EXPECT_EQ(unitEncodingRankedTensorType, + cast(noEncodingRankedTensorType) + .cloneWithEncoding(unitAttr)); + EXPECT_EQ(stringEncodingRankedTensorType, + cast(noEncodingRankedTensorType) + .cloneWithEncoding(stringAttr)); + EXPECT_EQ( + noEncodingRankedTensorType, + cast(unitEncodingRankedTensorType).dropEncoding()); + EXPECT_EQ( + noEncodingRankedTensorType, + cast(stringEncodingRankedTensorType).dropEncoding()); } } // namespace From b623106e2f323147c3ea75eabd699f593747e1f0 Mon Sep 17 00:00:00 2001 From: sstipano Date: Fri, 28 Feb 2025 03:14:51 +0100 Subject: [PATCH 036/123] [AMDGPU][NFC] Move isXDL and isDGEMM to SIInstrInfo. (#129103) --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 59 +++++++------------ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 14 +++++ llvm/lib/Target/AMDGPU/SIInstrInfo.h | 4 ++ 3 files changed, 39 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 1ff75095b220a..582da42a0dc4e 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -111,25 +111,6 @@ static bool isSMovRel(unsigned Opcode) { } } -static bool isDGEMM(unsigned Opcode) { - return AMDGPU::getMAIIsDGEMM(Opcode); -} - -static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) { - unsigned Opcode = MI.getOpcode(); - - if (!SIInstrInfo::isMAI(MI) || - isDGEMM(Opcode) || - Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 || - Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64) - return false; - - if (!ST.hasGFX940Insts()) - return true; - - return AMDGPU::getMAIIsGFX940XDL(Opcode); -} - static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, const MachineInstr &MI) { if (TII.isAlwaysGDS(MI.getOpcode())) @@ -2375,7 +2356,8 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { unsigned Opc1 = MI1->getOpcode(); int NeedWaitStates = 0; if (OpNo == SrcCIdx) { - if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) { + if (!SIInstrInfo::isDGEMM(Opc) && + (!ST.hasGFX940Insts() && SIInstrInfo::isDGEMM(Opc1))) { NeedWaitStates = 0; } else if (FullReg) { if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || @@ -2392,7 +2374,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64: - if (!isXDL(ST, *MI)) + if (!TII.isXDL(*MI)) NeedWaitStates = ST.hasGFX950Insts() ? GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates @@ -2400,18 +2382,18 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { break; case AMDGPU::V_MFMA_F64_4X4X4F64_e64: case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64: - if (!isXDL(ST, *MI)) + if (!TII.isXDL(*MI)) NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates; break; default: int NumPasses = TSchedModel.computeInstrLatency(MI1); if (ST.hasGFX940Insts()) { - if (isXDL(ST, *MI) && !isXDL(ST, *MI1)) + if (TII.isXDL(*MI) && !TII.isXDL(*MI1)) break; NeedWaitStates = - isXDL(ST, *MI1) - ? (isXDL(ST, *MI) + TII.isXDL(*MI1) + ? (TII.isXDL(*MI) ? GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates( NumPasses, ST.hasGFX950Insts()) : GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates( @@ -2424,18 +2406,19 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { switch (NumPasses) { case 2: NeedWaitStates = - isDGEMM(Opc) ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; + SIInstrInfo::isDGEMM(Opc) + ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; break; case 8: NeedWaitStates = - isDGEMM(Opc) + SIInstrInfo::isDGEMM(Opc) ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates; break; case 16: NeedWaitStates = - isDGEMM(Opc) + SIInstrInfo::isDGEMM(Opc) ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates; break; @@ -2464,7 +2447,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { if (ST.hasGFX940Insts()) { NeedWaitStates = - isXDL(ST, *MI1) + TII.isXDL(*MI1) ? GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates( NumPasses, ST.hasGFX950Insts()) : GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates( @@ -2631,7 +2614,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { return 0; auto IsDGEMMFn = [](const MachineInstr &MI) -> bool { - return isDGEMM(MI.getOpcode()); + return SIInstrInfo::isDGEMM(MI.getOpcode()); }; // This is checked in checkMAIHazards90A() @@ -2670,7 +2653,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { bool DGEMMAfterVALUWrite = false; auto IsDGEMMHazard = [&DGEMMAfterVALUWrite, this](const MachineInstr &MI) { // Found DGEMM on reverse traversal to def. - if (isDGEMM(MI.getOpcode())) + if (SIInstrInfo::isDGEMM(MI.getOpcode())) DGEMMAfterVALUWrite = true; // Only hazard if register is defined by a VALU and a DGEMM is found after @@ -2745,7 +2728,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { int NumPasses = HazardDefLatency; int NeedWaitStates = MaxWaitStates; - if (isDGEMM(MFMA->getOpcode())) { + if (SIInstrInfo::isDGEMM(MFMA->getOpcode())) { switch (HazardDefLatency) { case 4: NeedWaitStates = IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates @@ -2765,7 +2748,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { } } else if (ST.hasGFX940Insts()) { NeedWaitStates = - isXDL(ST, *MFMA) + TII.isXDL(*MFMA) ? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates( NumPasses, ST.hasGFX950Insts()) : GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates( @@ -2838,7 +2821,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { int NeedWaitStates = MaxWaitStates; int NumPasses = TSchedModel.computeInstrLatency(MFMA); - if (isDGEMM(MFMA->getOpcode())) { + if (SIInstrInfo::isDGEMM(MFMA->getOpcode())) { switch (NumPasses) { case 4: NeedWaitStates = DMFMA4x4WriteVgprVALUWriteWaitStates; @@ -2852,7 +2835,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { } } else if (ST.hasGFX940Insts()) { NeedWaitStates = - isXDL(ST, *MFMA) + TII.isXDL(*MFMA) ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates( NumPasses, ST.hasGFX950Insts()) : GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses); @@ -2880,11 +2863,11 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { } auto IsSMFMAReadAsCFn = [&Reg, &MFMA, this](const MachineInstr &MI) { - if (!SIInstrInfo::isMFMA(MI) || isDGEMM(MI.getOpcode()) || + if (!SIInstrInfo::isMFMA(MI) || SIInstrInfo::isDGEMM(MI.getOpcode()) || !MI.readsRegister(Reg, &TRI)) return false; - if (ST.hasGFX940Insts() && !isXDL(ST, MI)) + if (ST.hasGFX940Insts() && !TII.isXDL(MI)) return false; const MachineOperand *SrcC = diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9aec2bef0c18a..0face8108d249 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -10251,3 +10251,17 @@ bool SIInstrInfo::isGlobalMemoryObject(const MachineInstr *MI) const { return TargetInstrInfo::isGlobalMemoryObject(MI); } + +bool SIInstrInfo::isXDL(const MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + + if (!SIInstrInfo::isMAI(MI) || isDGEMM(Opcode) || + Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 || + Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64) + return false; + + if (!ST.hasGFX940Insts()) + return true; + + return AMDGPU::getMAIIsGFX940XDL(Opcode); +} diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 428322a5a2f04..88b1e477f13e4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -847,6 +847,10 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { return get(Opcode).TSFlags & SIInstrFlags::IsDOT; } + bool isXDL(const MachineInstr &MI) const; + + static bool isDGEMM(unsigned Opcode) { return AMDGPU::getMAIIsDGEMM(Opcode); } + static bool isLDSDIR(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; } From 2abd830258b449d6bea44181e53c153447e19bbb Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 27 Feb 2025 20:50:40 +1100 Subject: [PATCH 037/123] [ORC] Make callWrapperAsync forwards explicit in ExecutionSession. NFCI. This change is intended to make the overloads of callWrapperAsync clearer for clients that only look at the ExecutionSession API. Previously we forwarded calls to the three callWrapperAsync overloads in ExecutorProcessControl using one variadic template, but this obscures the API for clients who only look at ExecutionSession. --- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 30 ++++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index cecb4094c9a57..fbbd0d3d74b15 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -1580,18 +1580,36 @@ class ExecutionSession { return EPC->getBootstrapSymbols(Pairs); } - /// Run a wrapper function in the executor. + /// Run a wrapper function in the executor. The given WFRHandler will be + /// called on the result when it is returned. /// /// The wrapper function should be callable as: /// /// \code{.cpp} /// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size); /// \endcode{.cpp} - /// - /// The given OnComplete function will be called to return the result. - template - void callWrapperAsync(ArgTs &&... Args) { - EPC->callWrapperAsync(std::forward(Args)...); + void callWrapperAsync(ExecutorAddr WrapperFnAddr, + ExecutorProcessControl::IncomingWFRHandler OnComplete, + ArrayRef ArgBuffer) { + EPC->callWrapperAsync(WrapperFnAddr, std::move(OnComplete), ArgBuffer); + } + + /// Run a wrapper function in the executor using the given Runner to dispatch + /// OnComplete when the result is ready. + template + void callWrapperAsync(RunPolicyT &&Runner, ExecutorAddr WrapperFnAddr, + FnT &&OnComplete, ArrayRef ArgBuffer) { + EPC->callWrapperAsync(std::forward(Runner), WrapperFnAddr, + std::forward(OnComplete), ArgBuffer); + } + + /// Run a wrapper function in the executor. OnComplete will be dispatched + /// as a GenericNamedTask using this instance's TaskDispatch object. + template + void callWrapperAsync(ExecutorAddr WrapperFnAddr, FnT &&OnComplete, + ArrayRef ArgBuffer) { + EPC->callWrapperAsync(WrapperFnAddr, std::forward(OnComplete), + ArgBuffer); } /// Run a wrapper function in the executor. The wrapper function should be From f0a2b1cfbc6c1f645059e95333f3256bf0dea47f Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 27 Feb 2025 18:53:49 -0800 Subject: [PATCH 038/123] [RISCV][TTI] Fix a misuse of the getShuffleCost API [NFC] (#129137) The getShuffleCost api, in concept, expects to only deal with non-length changing shuffles. We were failing to extend the mask appropriately before invoking it. This came up in https://github.com/llvm/llvm-project/pull/128537 in discussion of a potential invariant, but is otherwise unrelated. --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index d19023b19ccdd..cde643a250be1 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -482,7 +482,6 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, ArrayRef Args, const Instruction *CxtI) { Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp); - std::pair LT = getTypeLegalizationCost(Tp); // First, handle cases where having a fixed length vector enables us to @@ -890,11 +889,12 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost( if (Opcode == Instruction::Load) { InstructionCost Cost = MemCost; for (unsigned Index : Indices) { - FixedVectorType *SubVecTy = + FixedVectorType *VecTy = FixedVectorType::get(FVTy->getElementType(), VF * Factor); auto Mask = createStrideMask(Index, Factor, VF); + Mask.resize(VF * Factor, -1); InstructionCost ShuffleCost = - getShuffleCost(TTI::ShuffleKind::SK_PermuteSingleSrc, SubVecTy, Mask, + getShuffleCost(TTI::ShuffleKind::SK_PermuteSingleSrc, VecTy, Mask, CostKind, 0, nullptr, {}); Cost += ShuffleCost; } From 22c407e0eeffb6a21ecfbc1d3bdbf1b5bdf7bcca Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 27 Feb 2025 18:58:42 -0800 Subject: [PATCH 039/123] [RISCV][TTI] Add shuffle costing for masked slide lowering (#128537) This change adds the TTI costing corresponding to the recently added isMaskedSlidePair lowering for vector shuffles. However, since the existing costing code hadn't covered either slideup, slidedown, or the (now removed) isElementRotate, the impact is larger in scope than just that new lowering. --------- Co-authored-by: Alexey Bataev Co-authored-by: Luke Lau --- llvm/include/llvm/Analysis/VectorUtils.h | 9 + llvm/lib/Analysis/VectorUtils.cpp | 30 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 41 +- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 72 +- .../Target/RISCV/RISCVTargetTransformInfo.h | 6 + .../CostModel/RISCV/shuffle-exact-vlen.ll | 12 +- .../RISCV/shuffle-extract_subvector.ll | 2 +- .../CostModel/RISCV/shuffle-transpose.ll | 64 +- .../SLPVectorizer/RISCV/complex-loads.ll | 744 +++--------------- .../SLPVectorizer/RISCV/reductions.ll | 24 +- 10 files changed, 303 insertions(+), 701 deletions(-) diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index f21594c557e0e..4390b45f1f730 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -203,6 +203,15 @@ bool getShuffleDemandedElts(int SrcWidth, ArrayRef Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts = false); +/// Does this shuffle mask represent either one slide shuffle or a pair of +/// two slide shuffles, combined with a select on some constant vector mask? +/// A slide is a shuffle mask which shifts some set of elements up or down +/// the vector, with all other elements being undefined. An identity shuffle +/// will be matched a slide by 0. The output parameter provides the source +/// (-1 means no source), and slide direction for each slide. +bool isMaskedSlidePair(ArrayRef Mask, int NumElts, + std::array, 2> &SrcInfo); + /// Replace each shuffle mask index with the scaled sequential indices for an /// equivalent mask of narrowed elements. Mask elements that are less than 0 /// (sentinel values) are repeated in the output mask. diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index dcfd3d5a8bd6e..160913841a6e2 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -415,6 +415,36 @@ bool llvm::getShuffleDemandedElts(int SrcWidth, ArrayRef Mask, return true; } +bool llvm::isMaskedSlidePair(ArrayRef Mask, int NumElts, + std::array, 2> &SrcInfo) { + const int SignalValue = NumElts * 2; + SrcInfo[0] = {-1, SignalValue}; + SrcInfo[1] = {-1, SignalValue}; + for (auto [i, M] : enumerate(Mask)) { + if (M < 0) + continue; + int Src = M >= (int)NumElts; + int Diff = (int)i - (M % NumElts); + bool Match = false; + for (int j = 0; j < 2; j++) { + auto &[SrcE, DiffE] = SrcInfo[j]; + if (SrcE == -1) { + assert(DiffE == SignalValue); + SrcE = Src; + DiffE = Diff; + } + if (SrcE == Src && DiffE == Diff) { + Match = true; + break; + } + } + if (!Match) + return false; + } + assert(SrcInfo[0].first != -1 && "Must find one slide"); + return true; +} + void llvm::narrowShuffleMaskElts(int Scale, ArrayRef Mask, SmallVectorImpl &ScaledMask) { assert(Scale > 0 && "Unexpected scaling factor"); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6076fe56416ad..71fd3ab28b273 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4562,32 +4562,9 @@ static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, int &EvenSrc, /// Is this mask representing a masked combination of two slides? static bool isMaskedSlidePair(ArrayRef Mask, - std::pair SrcInfo[2]) { - int NumElts = Mask.size(); - int SignalValue = NumElts * 2; - SrcInfo[0] = {-1, SignalValue}; - SrcInfo[1] = {-1, SignalValue}; - for (unsigned i = 0; i != Mask.size(); ++i) { - int M = Mask[i]; - if (M < 0) - continue; - int Src = M >= (int)NumElts; - int Diff = (int)i - (M % NumElts); - bool Match = false; - for (int j = 0; j < 2; j++) { - if (SrcInfo[j].first == -1) { - assert(SrcInfo[j].second == SignalValue); - SrcInfo[j].first = Src; - SrcInfo[j].second = Diff; - } - if (SrcInfo[j].first == Src && SrcInfo[j].second == Diff) { - Match = true; - break; - } - } - if (!Match) - return false; - } + std::array, 2> &SrcInfo) { + if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo)) + return false; // Avoid matching vselect idioms if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0) @@ -4603,7 +4580,8 @@ static bool isMaskedSlidePair(ArrayRef Mask, // Exactly matches the semantics of a previously existing custom matcher // to allow migration to new matcher without changing output. -static bool isElementRotate(std::pair SrcInfo[2], unsigned NumElts) { +static bool isElementRotate(std::array, 2> &SrcInfo, + unsigned NumElts) { if (SrcInfo[1].first == -1) return true; return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 && @@ -5604,10 +5582,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // without masking. Avoid matching bit rotates (which are not also element // rotates) as slide pairs. This is a performance heuristic, not a // functional check. - std::pair SrcInfo[2]; + std::array, 2> SrcInfo; unsigned RotateAmt; MVT RotateVT; - if (isMaskedSlidePair(Mask, SrcInfo) && + if (::isMaskedSlidePair(Mask, SrcInfo) && (isElementRotate(SrcInfo, NumElts) || !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) { SDValue Sources[2]; @@ -5964,10 +5942,11 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { if (SVT.getScalarType() == MVT::i1) return false; - std::pair SrcInfo[2]; + std::array, 2> SrcInfo; int Dummy1, Dummy2; return ShuffleVectorInst::isReverseMask(M, NumElts) || - (isMaskedSlidePair(M, SrcInfo) && isElementRotate(SrcInfo, NumElts)) || + (::isMaskedSlidePair(M, SrcInfo) && + isElementRotate(SrcInfo, NumElts)) || isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index cde643a250be1..6005c067428eb 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -475,6 +475,64 @@ costShuffleViaVRegSplitting(RISCVTTIImpl &TTI, MVT LegalVT, return InstructionCost::getInvalid(); } +InstructionCost RISCVTTIImpl::getSlideCost(FixedVectorType *Tp, + ArrayRef Mask, + TTI::TargetCostKind CostKind) { + // Avoid missing masks and length changing shuffles + if (Mask.size() <= 2 || Mask.size() != Tp->getNumElements()) + return InstructionCost::getInvalid(); + + int NumElts = Tp->getNumElements(); + std::pair LT = getTypeLegalizationCost(Tp); + // Avoid scalarization cases + if (!LT.second.isFixedLengthVector()) + return InstructionCost::getInvalid(); + + // Requires moving elements between parts, which requires additional + // unmodeled instructions. + if (LT.first != 1) + return InstructionCost::getInvalid(); + + auto GetSlideOpcode = [&](int SlideAmt) { + assert(SlideAmt != 0); + bool IsVI = isUInt<5>(std::abs(SlideAmt)); + if (SlideAmt < 0) + return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX; + return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX; + }; + + std::array, 2> SrcInfo; + if (!isMaskedSlidePair(Mask, NumElts, SrcInfo)) + return InstructionCost::getInvalid(); + + if (SrcInfo[1].second == 0) + std::swap(SrcInfo[0], SrcInfo[1]); + + InstructionCost FirstSlideCost = 0; + if (SrcInfo[0].second != 0) { + unsigned Opcode = GetSlideOpcode(SrcInfo[0].second); + FirstSlideCost = getRISCVInstructionCost(Opcode, LT.second, CostKind); + } + + if (SrcInfo[1].first == -1) + return FirstSlideCost; + + InstructionCost SecondSlideCost = 0; + if (SrcInfo[1].second != 0) { + unsigned Opcode = GetSlideOpcode(SrcInfo[1].second); + SecondSlideCost = getRISCVInstructionCost(Opcode, LT.second, CostKind); + } else { + SecondSlideCost = + getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, CostKind); + } + + auto EC = Tp->getElementCount(); + VectorType *MaskTy = + VectorType::get(IntegerType::getInt1Ty(Tp->getContext()), EC); + InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); + return FirstSlideCost + SecondSlideCost + MaskCost; +} + InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, @@ -487,8 +545,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // First, handle cases where having a fixed length vector enables us to // give a more accurate cost than falling back to generic scalable codegen. // TODO: Each of these cases hints at a modeling gap around scalable vectors. - if (ST->hasVInstructions() && isa(Tp) && - LT.second.isFixedLengthVector()) { + if (auto *FVTp = dyn_cast(Tp); + FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) { InstructionCost VRegSplittingCost = costShuffleViaVRegSplitting( *this, LT.second, ST->getRealVLen(), Tp, Mask, CostKind); if (VRegSplittingCost.isValid()) @@ -544,6 +602,11 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return Cost; } } + + if (InstructionCost SlideCost = getSlideCost(FVTp, Mask, CostKind); + SlideCost.isValid()) + return SlideCost; + // vrgather + cost of generating the mask constant. // We model this for an unknown mask with a single vrgather. if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 || @@ -558,6 +621,11 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, } case TTI::SK_Transpose: case TTI::SK_PermuteTwoSrc: { + + if (InstructionCost SlideCost = getSlideCost(FVTp, Mask, CostKind); + SlideCost.isValid()) + return SlideCost; + // 2 x (vrgather + cost of generating the mask constant) + cost of mask // register for the second vrgather. We model this for an unknown // (shuffle) mask. diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 134a7333b9b06..3f57560d3c127 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -63,6 +63,12 @@ class RISCVTTIImpl : public BasicTTIImplBase { /// type. InstructionCost getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind); + + /// If this shuffle can be lowered as a masked slide pair (at worst), + /// return a cost for it. + InstructionCost getSlideCost(FixedVectorType *Tp, ArrayRef Mask, + TTI::TargetCostKind CostKind); + public: explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F) : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll index c951184a31731..06c709e4cc879 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll @@ -186,7 +186,7 @@ define void @insert_subvec() vscale_range(2,2) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_1 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_3 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'insert_subvec' @@ -225,7 +225,7 @@ define void @insert_subvec() vscale_range(2,2) { ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_1 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_3 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v4i8_2_0 = shufflevector <4 x i8> poison, <4 x i8> poison, <4 x i32> @@ -737,8 +737,8 @@ define void @multipart() vscale_range(2,2) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16c = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16d = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32idrev = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v32many = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v32many2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> @@ -757,8 +757,8 @@ define void @multipart() vscale_range(2,2) { ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16c = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16d = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32idrev = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32many = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32many2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll index e8dd30345cc76..d2bfb61a11b00 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll @@ -19,7 +19,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; VLEN128-LABEL: 'test_vXf64' diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll index 8f784a07d3124..ef069fee8526e 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll @@ -10,11 +10,11 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: 'trn1.v8i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %tmp0 ; ; SIZE-LABEL: 'trn1.v8i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %tmp0 ; %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> @@ -23,11 +23,11 @@ define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: 'trn2.v8i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %tmp0 ; ; SIZE-LABEL: 'trn2.v8i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %tmp0 ; %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> @@ -36,11 +36,11 @@ define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: 'trn1.v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %tmp0 ; ; SIZE-LABEL: 'trn1.v16i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %tmp0 ; %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> @@ -49,11 +49,11 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: 'trn2.v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %tmp0 ; ; SIZE-LABEL: 'trn2.v16i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %tmp0 ; %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> @@ -62,11 +62,11 @@ define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: 'trn1.v4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %tmp0 ; ; SIZE-LABEL: 'trn1.v4i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %tmp0 ; %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> @@ -75,11 +75,11 @@ define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: 'trn2.v4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %tmp0 ; ; SIZE-LABEL: 'trn2.v4i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %tmp0 ; %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> @@ -88,11 +88,11 @@ define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: 'trn1.v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %tmp0 ; ; SIZE-LABEL: 'trn1.v8i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %tmp0 ; %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> @@ -101,11 +101,11 @@ define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: 'trn2.v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %tmp0 ; ; SIZE-LABEL: 'trn2.v8i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %tmp0 ; %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> @@ -140,11 +140,11 @@ define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: 'trn1.v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %tmp0 ; ; SIZE-LABEL: 'trn1.v4i32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %tmp0 ; %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> @@ -153,11 +153,11 @@ define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: 'trn2.v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %tmp0 ; ; SIZE-LABEL: 'trn2.v4i32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %tmp0 ; %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> @@ -218,11 +218,11 @@ define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: 'trn1.v4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %tmp0 ; ; SIZE-LABEL: 'trn1.v4f32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %tmp0 ; %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> @@ -231,11 +231,11 @@ define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: 'trn2.v4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %tmp0 ; ; SIZE-LABEL: 'trn2.v4f32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %tmp0 ; %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> @@ -270,11 +270,11 @@ define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: 'trn1.v4f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %tmp0 ; ; SIZE-LABEL: 'trn1.v4f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x half> %tmp0 ; %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> @@ -283,11 +283,11 @@ define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: 'trn2.v4f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %tmp0 ; ; SIZE-LABEL: 'trn2.v4f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x half> %tmp0 ; %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> @@ -296,11 +296,11 @@ define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: 'trn1.v8f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x half> %tmp0 ; ; SIZE-LABEL: 'trn1.v8f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x half> %tmp0 ; %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> @@ -309,11 +309,11 @@ define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: 'trn2.v8f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x half> %tmp0 ; ; SIZE-LABEL: 'trn2.v8f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x half> %tmp0 ; %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index 11fa3337544a1..18acae5835724 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -6,663 +6,175 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-LABEL: define i32 @test( ; CHECK-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[PIX1]], align 1 -; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PIX1]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[PIX2]], i64 4 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[PIX1]], i64 1 -; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr i8, ptr [[PIX1]], i64 3 -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1 -; CHECK-NEXT: [[CONV33:%.*]] = zext i8 [[TMP10]] to i32 ; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]] -; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ADD_PTR3]], align 1 -; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP11]] to i32 ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 4 -; CHECK-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 1 -; CHECK-NEXT: [[ARRAYIDX27_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 3 -; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX27_1]], align 1 -; CHECK-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP31:%.*]] = zext <2 x i8> [[TMP22]] to <2 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = sub <2 x i32> [[TMP21]], [[TMP31]] -; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 -; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP49]] to <2 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP50:%.*]] = zext <2 x i8> [[TMP27]] to <2 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = sub <2 x i32> [[TMP26]], [[TMP50]] -; CHECK-NEXT: [[TMP25:%.*]] = shl <2 x i32> [[TMP24]], splat (i32 16) -; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP25]], [[TMP23]] -; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32> -; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP57:%.*]] = zext <2 x i8> [[TMP56]] to <2 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP51]], [[TMP57]] -; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP39:%.*]] = zext <2 x i8> [[TMP38]] to <2 x i32> -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP61:%.*]] = zext <2 x i8> [[TMP40]] to <2 x i32> -; CHECK-NEXT: [[TMP36:%.*]] = sub <2 x i32> [[TMP39]], [[TMP61]] -; CHECK-NEXT: [[TMP37:%.*]] = shl <2 x i32> [[TMP36]], splat (i32 16) -; CHECK-NEXT: [[TMP42:%.*]] = add <2 x i32> [[TMP37]], [[TMP35]] -; CHECK-NEXT: [[TMP34:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]] -; CHECK-NEXT: [[TMP44:%.*]] = sub <2 x i32> [[TMP30]], [[TMP42]] -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP34]], i32 0 -; CHECK-NEXT: [[CONV_2:%.*]] = extractelement <2 x i32> [[TMP34]], i32 1 -; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[CONV_2]], [[TMP43]] -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP44]], i32 0 -; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i32> [[TMP44]], i32 1 -; CHECK-NEXT: [[ADD55_2:%.*]] = add i32 [[TMP47]], [[TMP46]] ; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4 -; CHECK-NEXT: [[TMP53:%.*]] = load <2 x i8>, ptr null, align 1 ; CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr null, align 1 -; CHECK-NEXT: [[TMP62:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> -; CHECK-NEXT: [[TMP77:%.*]] = zext i8 [[TMP52]] to i32 -; CHECK-NEXT: [[TMP54:%.*]] = load <2 x i8>, ptr null, align 1 -; CHECK-NEXT: [[TMP55:%.*]] = zext <2 x i8> [[TMP54]] to <2 x i32> -; CHECK-NEXT: [[TMP59:%.*]] = sub <2 x i32> [[TMP62]], [[TMP55]] -; CHECK-NEXT: [[TMP41:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) -; CHECK-NEXT: [[TMP58:%.*]] = zext <2 x i8> [[TMP41]] to <2 x i32> -; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <2 x i32> [[TMP58]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP63:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[TMP76:%.*]] = zext <2 x i8> [[TMP63]] to <2 x i32> -; CHECK-NEXT: [[TMP81:%.*]] = sub <2 x i32> [[TMP48]], [[TMP76]] -; CHECK-NEXT: [[TMP167:%.*]] = shl <2 x i32> [[TMP81]], splat (i32 16) -; CHECK-NEXT: [[TMP75:%.*]] = add <2 x i32> [[TMP167]], [[TMP59]] -; CHECK-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 -; CHECK-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 -; CHECK-NEXT: [[ARRAYIDX27_3:%.*]] = getelementptr i8, ptr null, i64 6 -; CHECK-NEXT: [[TMP64:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_3]], align 1 -; CHECK-NEXT: [[TMP79:%.*]] = zext <2 x i8> [[TMP64]] to <2 x i32> -; CHECK-NEXT: [[TMP82:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 -; CHECK-NEXT: [[TMP91:%.*]] = zext <2 x i8> [[TMP82]] to <2 x i32> -; CHECK-NEXT: [[TMP65:%.*]] = sub <2 x i32> [[TMP79]], [[TMP91]] -; CHECK-NEXT: [[TMP170:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) -; CHECK-NEXT: [[TMP171:%.*]] = zext <2 x i8> [[TMP170]] to <2 x i32> -; CHECK-NEXT: [[TMP172:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 -; CHECK-NEXT: [[TMP173:%.*]] = zext <2 x i8> [[TMP172]] to <2 x i32> -; CHECK-NEXT: [[TMP66:%.*]] = sub <2 x i32> [[TMP171]], [[TMP173]] -; CHECK-NEXT: [[TMP67:%.*]] = shl <2 x i32> [[TMP66]], splat (i32 16) -; CHECK-NEXT: [[TMP69:%.*]] = add <2 x i32> [[TMP67]], [[TMP65]] -; CHECK-NEXT: [[TMP176:%.*]] = extractelement <2 x i32> [[TMP75]], i32 0 -; CHECK-NEXT: [[TMP197:%.*]] = extractelement <2 x i32> [[TMP75]], i32 1 -; CHECK-NEXT: [[SUB59:%.*]] = add i32 [[TMP197]], [[TMP176]] -; CHECK-NEXT: [[SUB45_3:%.*]] = sub i32 [[TMP176]], [[TMP197]] -; CHECK-NEXT: [[ADD112_2:%.*]] = extractelement <2 x i32> [[TMP69]], i32 0 -; CHECK-NEXT: [[XOR_I63_2:%.*]] = extractelement <2 x i32> [[TMP69]], i32 1 -; CHECK-NEXT: [[SUB59_1:%.*]] = add i32 [[XOR_I63_2]], [[ADD112_2]] -; CHECK-NEXT: [[SUB47_3:%.*]] = sub i32 [[ADD112_2]], [[XOR_I63_2]] -; CHECK-NEXT: [[ADD94:%.*]] = add i32 [[SUB59_1]], [[SUB59]] -; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <2 x i32> [[TMP34]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i32> [[TMP70]], i32 [[SUB59]], i32 0 -; CHECK-NEXT: [[TMP72:%.*]] = insertelement <2 x i32> [[TMP34]], i32 [[SUB59_1]], i32 0 -; CHECK-NEXT: [[TMP222:%.*]] = sub <2 x i32> [[TMP71]], [[TMP72]] -; CHECK-NEXT: [[ADD55_3:%.*]] = add i32 [[SUB47_3]], [[SUB45_3]] -; CHECK-NEXT: [[TMP74:%.*]] = shufflevector <2 x i32> [[TMP44]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP78:%.*]] = insertelement <2 x i32> [[TMP74]], i32 [[SUB45_3]], i32 0 -; CHECK-NEXT: [[TMP80:%.*]] = insertelement <2 x i32> [[TMP44]], i32 [[SUB47_3]], i32 0 -; CHECK-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP78]], [[TMP80]] -; CHECK-NEXT: [[ADD95:%.*]] = add i32 [[ADD94]], [[ADD48_2]] -; CHECK-NEXT: [[SUB86_3:%.*]] = sub i32 [[ADD48_2]], [[ADD94]] -; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP77]], 15 -; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537 -; CHECK-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535 -; CHECK-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[CONV_2]], 15 -; CHECK-NEXT: [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537 -; CHECK-NEXT: [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535 -; CHECK-NEXT: [[TMP86:%.*]] = extractelement <2 x i32> [[TMP222]], i32 0 -; CHECK-NEXT: [[TMP87:%.*]] = extractelement <2 x i32> [[TMP222]], i32 1 -; CHECK-NEXT: [[ADD94_3:%.*]] = add i32 [[TMP86]], [[TMP87]] -; CHECK-NEXT: [[ADD112_1:%.*]] = sub i32 [[TMP87]], [[TMP86]] -; CHECK-NEXT: [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15 -; CHECK-NEXT: [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537 -; CHECK-NEXT: [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535 -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <2 x i32> [[TMP85]], i32 0 -; CHECK-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP85]], i32 1 -; CHECK-NEXT: [[ADD94_4:%.*]] = add i32 [[TMP88]], [[TMP89]] -; CHECK-NEXT: [[SUB102_3:%.*]] = sub i32 [[TMP89]], [[TMP88]] -; CHECK-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[CONV1]], 15 -; CHECK-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 -; CHECK-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 -; CHECK-NEXT: [[TMP90:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1 -; CHECK-NEXT: [[TMP102:%.*]] = zext <2 x i8> [[TMP90]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr null, align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1 ; CHECK-NEXT: [[TMP92:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 -; CHECK-NEXT: [[TMP93:%.*]] = shufflevector <4 x i8> [[TMP92]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP94:%.*]] = zext <2 x i8> [[TMP93]] to <2 x i32> ; CHECK-NEXT: [[TMP95:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP97:%.*]] = zext <2 x i8> [[TMP96]] to <2 x i32> ; CHECK-NEXT: [[TMP98:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <4 x i8> [[TMP98]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP100:%.*]] = zext <2 x i8> [[TMP99]] to <2 x i32> -; CHECK-NEXT: [[TMP101:%.*]] = sub <2 x i32> [[TMP97]], [[TMP100]] -; CHECK-NEXT: [[TMP224:%.*]] = shl <2 x i32> [[TMP101]], splat (i32 16) -; CHECK-NEXT: [[TMP103:%.*]] = shufflevector <4 x i8> [[TMP92]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP104:%.*]] = zext <2 x i8> [[TMP103]] to <2 x i32> -; CHECK-NEXT: [[TMP105:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP106:%.*]] = zext <2 x i8> [[TMP105]] to <2 x i32> -; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <4 x i8> [[TMP98]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP108:%.*]] = zext <2 x i8> [[TMP107]] to <2 x i32> -; CHECK-NEXT: [[TMP109:%.*]] = sub <2 x i32> [[TMP106]], [[TMP108]] -; CHECK-NEXT: [[TMP110:%.*]] = shl <2 x i32> [[TMP109]], splat (i32 16) -; CHECK-NEXT: [[TMP111:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV33]], i32 1 -; CHECK-NEXT: [[TMP112:%.*]] = sub <2 x i32> [[TMP111]], [[TMP104]] -; CHECK-NEXT: [[TMP113:%.*]] = add <2 x i32> [[TMP110]], [[TMP112]] -; CHECK-NEXT: [[TMP114:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV1]], i32 0 -; CHECK-NEXT: [[TMP115:%.*]] = sub <2 x i32> [[TMP114]], [[TMP94]] -; CHECK-NEXT: [[TMP116:%.*]] = add <2 x i32> [[TMP224]], [[TMP115]] -; CHECK-NEXT: [[TMP117:%.*]] = shufflevector <2 x i32> [[TMP113]], <2 x i32> [[TMP116]], <2 x i32> -; CHECK-NEXT: [[TMP126:%.*]] = add <2 x i32> [[TMP113]], [[TMP116]] -; CHECK-NEXT: [[TMP119:%.*]] = sub <2 x i32> [[TMP116]], [[TMP113]] -; CHECK-NEXT: [[TMP120:%.*]] = extractelement <2 x i32> [[TMP126]], i32 0 -; CHECK-NEXT: [[TMP127:%.*]] = extractelement <2 x i32> [[TMP126]], i32 1 -; CHECK-NEXT: [[ADD48:%.*]] = add i32 [[TMP127]], [[TMP120]] -; CHECK-NEXT: [[TMP166:%.*]] = sub i32 [[TMP120]], [[TMP127]] -; CHECK-NEXT: [[TMP128:%.*]] = extractelement <2 x i32> [[TMP119]], i32 0 -; CHECK-NEXT: [[TMP129:%.*]] = extractelement <2 x i32> [[TMP119]], i32 1 -; CHECK-NEXT: [[ADD55:%.*]] = add i32 [[TMP129]], [[TMP128]] -; CHECK-NEXT: [[SUB60:%.*]] = sub i32 [[TMP128]], [[TMP129]] -; CHECK-NEXT: [[SHR_I59:%.*]] = lshr i32 [[TMP127]], 15 -; CHECK-NEXT: [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537 -; CHECK-NEXT: [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535 -; CHECK-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[TMP129]], 15 -; CHECK-NEXT: [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537 -; CHECK-NEXT: [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535 -; CHECK-NEXT: [[TMP130:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 -; CHECK-NEXT: [[TMP131:%.*]] = zext <2 x i8> [[TMP130]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1 ; CHECK-NEXT: [[TMP132:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 -; CHECK-NEXT: [[TMP133:%.*]] = shufflevector <4 x i8> [[TMP132]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP133]] to <2 x i32> ; CHECK-NEXT: [[TMP135:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 -; CHECK-NEXT: [[TMP136:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP137:%.*]] = zext <2 x i8> [[TMP136]] to <2 x i32> ; CHECK-NEXT: [[TMP138:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; CHECK-NEXT: [[TMP139:%.*]] = shufflevector <4 x i8> [[TMP138]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP140:%.*]] = zext <2 x i8> [[TMP139]] to <2 x i32> -; CHECK-NEXT: [[TMP141:%.*]] = sub <2 x i32> [[TMP137]], [[TMP140]] -; CHECK-NEXT: [[TMP142:%.*]] = shl <2 x i32> [[TMP141]], splat (i32 16) -; CHECK-NEXT: [[TMP143:%.*]] = shufflevector <4 x i8> [[TMP132]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP144:%.*]] = zext <2 x i8> [[TMP143]] to <2 x i32> -; CHECK-NEXT: [[TMP145:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP146:%.*]] = zext <2 x i8> [[TMP145]] to <2 x i32> -; CHECK-NEXT: [[TMP147:%.*]] = shufflevector <4 x i8> [[TMP138]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP148:%.*]] = zext <2 x i8> [[TMP147]] to <2 x i32> -; CHECK-NEXT: [[TMP149:%.*]] = sub <2 x i32> [[TMP146]], [[TMP148]] -; CHECK-NEXT: [[TMP150:%.*]] = shl <2 x i32> [[TMP149]], splat (i32 16) -; CHECK-NEXT: [[TMP151:%.*]] = insertelement <2 x i32> [[TMP131]], i32 [[CONV33_1]], i32 1 -; CHECK-NEXT: [[TMP225:%.*]] = sub <2 x i32> [[TMP151]], [[TMP144]] -; CHECK-NEXT: [[TMP153:%.*]] = add <2 x i32> [[TMP150]], [[TMP225]] -; CHECK-NEXT: [[TMP154:%.*]] = insertelement <2 x i32> [[TMP131]], i32 [[CONV_1]], i32 0 -; CHECK-NEXT: [[TMP155:%.*]] = sub <2 x i32> [[TMP154]], [[TMP134]] -; CHECK-NEXT: [[TMP156:%.*]] = add <2 x i32> [[TMP142]], [[TMP155]] -; CHECK-NEXT: [[TMP157:%.*]] = add <2 x i32> [[TMP153]], [[TMP156]] -; CHECK-NEXT: [[TMP158:%.*]] = sub <2 x i32> [[TMP156]], [[TMP153]] -; CHECK-NEXT: [[TMP159:%.*]] = extractelement <2 x i32> [[TMP157]], i32 0 -; CHECK-NEXT: [[TMP160:%.*]] = extractelement <2 x i32> [[TMP157]], i32 1 -; CHECK-NEXT: [[ADD48_1:%.*]] = add i32 [[TMP160]], [[TMP159]] -; CHECK-NEXT: [[SUB51_1:%.*]] = sub i32 [[TMP159]], [[TMP160]] -; CHECK-NEXT: [[TMP161:%.*]] = extractelement <2 x i32> [[TMP158]], i32 0 -; CHECK-NEXT: [[TMP162:%.*]] = extractelement <2 x i32> [[TMP158]], i32 1 -; CHECK-NEXT: [[ADD55_1:%.*]] = add i32 [[TMP162]], [[TMP161]] -; CHECK-NEXT: [[SUB59_2:%.*]] = sub i32 [[TMP161]], [[TMP162]] -; CHECK-NEXT: [[SHR_I54:%.*]] = lshr i32 [[TMP160]], 15 -; CHECK-NEXT: [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537 -; CHECK-NEXT: [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535 -; CHECK-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[TMP162]], 15 -; CHECK-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 -; CHECK-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 -; CHECK-NEXT: [[TMP163:%.*]] = lshr <2 x i32> [[TMP131]], splat (i32 15) -; CHECK-NEXT: [[TMP164:%.*]] = and <2 x i32> [[TMP163]], splat (i32 65537) -; CHECK-NEXT: [[TMP165:%.*]] = mul <2 x i32> [[TMP164]], splat (i32 65535) -; CHECK-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD48]] -; CHECK-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_1]] -; CHECK-NEXT: [[ADD103:%.*]] = add i32 [[ADD95]], [[ADD78]] -; CHECK-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD95]] -; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB86_3]], [[SUB86]] -; CHECK-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB86_3]] -; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I_1]], [[ADD103]] -; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP77]] -; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51_1]], [[ADD105]] -; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[CONV_2]] -; CHECK-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]] -; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP160]] -; CHECK-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]] -; CHECK-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP127]] -; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] -; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] -; CHECK-NEXT: [[ADD105_3:%.*]] = add i32 [[ADD112]], [[XOR_I63]] -; CHECK-NEXT: [[TMP169:%.*]] = load <2 x i8>, ptr [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[TMP181:%.*]] = zext <2 x i8> [[TMP169]] to <2 x i32> -; CHECK-NEXT: [[TMP152:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_2]], i32 0 -; CHECK-NEXT: [[TMP182:%.*]] = shufflevector <2 x i32> [[TMP152]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP183:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_3]], i32 0 -; CHECK-NEXT: [[TMP184:%.*]] = shufflevector <2 x i32> [[TMP183]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP191:%.*]] = sub <2 x i32> [[TMP182]], [[TMP184]] -; CHECK-NEXT: [[TMP192:%.*]] = add <2 x i32> [[TMP182]], [[TMP184]] -; CHECK-NEXT: [[TMP194:%.*]] = shufflevector <2 x i32> [[TMP191]], <2 x i32> [[TMP192]], <2 x i32> -; CHECK-NEXT: [[TMP195:%.*]] = lshr <2 x i32> [[TMP181]], splat (i32 15) -; CHECK-NEXT: [[TMP196:%.*]] = and <2 x i32> [[TMP195]], splat (i32 65537) -; CHECK-NEXT: [[TMP198:%.*]] = mul <2 x i32> [[TMP196]], splat (i32 65535) -; CHECK-NEXT: [[TMP202:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55]], i32 0 -; CHECK-NEXT: [[TMP203:%.*]] = shufflevector <2 x i32> [[TMP202]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP205:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_1]], i32 0 -; CHECK-NEXT: [[TMP206:%.*]] = shufflevector <2 x i32> [[TMP205]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP207:%.*]] = sub <2 x i32> [[TMP203]], [[TMP206]] -; CHECK-NEXT: [[TMP210:%.*]] = add <2 x i32> [[TMP203]], [[TMP206]] -; CHECK-NEXT: [[TMP168:%.*]] = shufflevector <2 x i32> [[TMP207]], <2 x i32> [[TMP210]], <2 x i32> -; CHECK-NEXT: [[ADD94_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 1 -; CHECK-NEXT: [[ADD78_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 1 -; CHECK-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]] -; CHECK-NEXT: [[TMP220:%.*]] = add <2 x i32> [[TMP194]], [[TMP168]] -; CHECK-NEXT: [[SUB102_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 0 -; CHECK-NEXT: [[SUB86_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 0 -; CHECK-NEXT: [[TMP174:%.*]] = shufflevector <2 x i32> [[TMP168]], <2 x i32> [[TMP194]], <2 x i32> -; CHECK-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]] -; CHECK-NEXT: [[TMP175:%.*]] = add <2 x i32> [[TMP198]], [[TMP220]] -; CHECK-NEXT: [[TMP221:%.*]] = xor <2 x i32> [[TMP175]], [[TMP181]] -; CHECK-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]] -; CHECK-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[TMP162]] -; CHECK-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]] -; CHECK-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP129]] -; CHECK-NEXT: [[XOR_I53_1:%.*]] = extractelement <2 x i32> [[TMP221]], i32 0 -; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD105_3]] -; CHECK-NEXT: [[XOR_I_1:%.*]] = extractelement <2 x i32> [[TMP221]], i32 1 -; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]] -; CHECK-NEXT: [[ADD112_5:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]] -; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_5]], [[XOR_I63_1]] -; CHECK-NEXT: [[ADD78_3:%.*]] = add i32 [[SUB51_1]], [[TMP166]] -; CHECK-NEXT: [[TMP204:%.*]] = sub i32 [[TMP166]], [[SUB51_1]] -; CHECK-NEXT: [[TMP177:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_3]], i32 0 -; CHECK-NEXT: [[TMP178:%.*]] = shufflevector <2 x i32> [[TMP177]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP179:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_3]], i32 0 -; CHECK-NEXT: [[TMP180:%.*]] = shufflevector <2 x i32> [[TMP179]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP199:%.*]] = add <2 x i32> [[TMP178]], [[TMP180]] -; CHECK-NEXT: [[TMP200:%.*]] = sub <2 x i32> [[TMP178]], [[TMP180]] -; CHECK-NEXT: [[TMP201:%.*]] = shufflevector <2 x i32> [[TMP199]], <2 x i32> [[TMP200]], <2 x i32> -; CHECK-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[TMP204]] -; CHECK-NEXT: [[SUB106_2:%.*]] = sub i32 [[TMP204]], [[ADD112_1]] -; CHECK-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD113_1]] -; CHECK-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] -; CHECK-NEXT: [[TMP208:%.*]] = add <2 x i32> [[TMP165]], [[TMP201]] -; CHECK-NEXT: [[TMP209:%.*]] = xor <2 x i32> [[TMP208]], [[TMP131]] -; CHECK-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[TMP120]], 15 -; CHECK-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 -; CHECK-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 -; CHECK-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] -; CHECK-NEXT: [[XOR_I63_4:%.*]] = xor i32 [[ADD_I62_2]], [[TMP120]] -; CHECK-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_2]] -; CHECK-NEXT: [[TMP211:%.*]] = extractelement <2 x i32> [[TMP209]], i32 0 -; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP211]] -; CHECK-NEXT: [[TMP212:%.*]] = extractelement <2 x i32> [[TMP209]], i32 1 -; CHECK-NEXT: [[ADD112_4:%.*]] = add i32 [[ADD110_2]], [[TMP212]] -; CHECK-NEXT: [[ADD113_4:%.*]] = add i32 [[ADD112_4]], [[XOR_I63_4]] -; CHECK-NEXT: [[ADD78_4:%.*]] = add i32 [[SUB59_2]], [[SUB60]] -; CHECK-NEXT: [[SUB86_4:%.*]] = sub i32 [[SUB60]], [[SUB59_2]] -; CHECK-NEXT: [[TMP213:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_4]], i32 0 -; CHECK-NEXT: [[TMP214:%.*]] = shufflevector <2 x i32> [[TMP213]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP215:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_4]], i32 0 -; CHECK-NEXT: [[TMP216:%.*]] = shufflevector <2 x i32> [[TMP215]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP217:%.*]] = add <2 x i32> [[TMP214]], [[TMP216]] -; CHECK-NEXT: [[TMP218:%.*]] = sub <2 x i32> [[TMP214]], [[TMP216]] -; CHECK-NEXT: [[TMP219:%.*]] = shufflevector <2 x i32> [[TMP217]], <2 x i32> [[TMP218]], <2 x i32> -; CHECK-NEXT: [[ADD105_4:%.*]] = add i32 [[SUB102_3]], [[SUB86_4]] -; CHECK-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_4]], [[SUB102_3]] -; CHECK-NEXT: [[ADD_I52_4:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_4]] -; CHECK-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_4]], [[CONV1]] -; CHECK-NEXT: [[TMP185:%.*]] = lshr <2 x i32> [[TMP102]], splat (i32 15) -; CHECK-NEXT: [[TMP193:%.*]] = and <2 x i32> [[TMP185]], splat (i32 65537) -; CHECK-NEXT: [[TMP186:%.*]] = mul <2 x i32> [[TMP193]], splat (i32 65535) -; CHECK-NEXT: [[TMP187:%.*]] = add <2 x i32> [[TMP186]], [[TMP219]] -; CHECK-NEXT: [[TMP188:%.*]] = xor <2 x i32> [[TMP187]], [[TMP102]] -; CHECK-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 -; CHECK-NEXT: [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537 -; CHECK-NEXT: [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535 -; CHECK-NEXT: [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]] -; CHECK-NEXT: [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]] -; CHECK-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_4]] -; CHECK-NEXT: [[TMP189:%.*]] = extractelement <2 x i32> [[TMP188]], i32 0 -; CHECK-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP189]] -; CHECK-NEXT: [[TMP190:%.*]] = extractelement <2 x i32> [[TMP188]], i32 1 -; CHECK-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP190]] -; CHECK-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]] +; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i8>, ptr null, align 1 +; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP10]], i64 0) +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP15]], <4 x i8> [[TMP14]], i64 4) +; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP16]], <4 x i8> [[TMP2]], i64 8) +; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP17]], <4 x i8> [[TMP6]], i64 12) +; CHECK-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i8>, ptr null, align 1 +; CHECK-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP11]], i64 0) +; CHECK-NEXT: [[TMP22:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP21]], <4 x i8> [[TMP20]], i64 4) +; CHECK-NEXT: [[TMP23:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP22]], <4 x i8> [[TMP92]], i64 8) +; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP23]], <4 x i8> [[TMP132]], i64 12) +; CHECK-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = sub <16 x i32> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <2 x i8> [[TMP28]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP29]], <16 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> +; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> [[TMP33]], <16 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP34]], i8 [[TMP3]], i32 5 +; CHECK-NEXT: [[TMP36:%.*]] = insertelement <16 x i8> [[TMP35]], i8 [[TMP52]], i32 9 +; CHECK-NEXT: [[TMP37:%.*]] = zext <16 x i8> [[TMP36]] to <16 x i32> +; CHECK-NEXT: [[TMP38:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP13]], i64 0) +; CHECK-NEXT: [[TMP40:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP39]], <4 x i8> [[TMP38]], i64 4) +; CHECK-NEXT: [[TMP41:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP40]], <4 x i8> [[TMP98]], i64 8) +; CHECK-NEXT: [[TMP42:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP41]], <4 x i8> [[TMP138]], i64 12) +; CHECK-NEXT: [[TMP43:%.*]] = zext <16 x i8> [[TMP42]] to <16 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = sub <16 x i32> [[TMP37]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = shl <16 x i32> [[TMP45]], splat (i32 16) +; CHECK-NEXT: [[TMP47:%.*]] = add <16 x i32> [[TMP46]], [[TMP27]] +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = add <16 x i32> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = sub <16 x i32> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> +; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = add <16 x i32> [[TMP51]], [[TMP70]] +; CHECK-NEXT: [[TMP54:%.*]] = sub <16 x i32> [[TMP51]], [[TMP70]] +; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP57:%.*]] = sub <16 x i32> [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = add <16 x i32> [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = add <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = sub <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP19]], <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15) +; CHECK-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537) +; CHECK-NEXT: [[TMP67:%.*]] = mul <16 x i32> [[TMP66]], splat (i32 65535) +; CHECK-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP63]] +; CHECK-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP64]] +; CHECK-NEXT: [[ADD113_3:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) ; CHECK-NEXT: ret i32 [[ADD113_3]] ; ; THR15-LABEL: define i32 @test( ; THR15-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] { ; THR15-NEXT: entry: -; THR15-NEXT: [[TMP0:%.*]] = load i8, ptr [[PIX1]], align 1 -; THR15-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32 ; THR15-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 4 ; THR15-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[PIX2]], i64 4 -; THR15-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[PIX1]], i64 1 -; THR15-NEXT: [[ARRAYIDX32:%.*]] = getelementptr i8, ptr [[PIX1]], i64 3 -; THR15-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1 -; THR15-NEXT: [[CONV33:%.*]] = zext i8 [[TMP1]] to i32 ; THR15-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]] ; THR15-NEXT: [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]] -; THR15-NEXT: [[TMP2:%.*]] = load i8, ptr [[ADD_PTR3]], align 1 -; THR15-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP2]] to i32 ; THR15-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 4 ; THR15-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 4 -; THR15-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 1 -; THR15-NEXT: [[ARRAYIDX27_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 3 -; THR15-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX27_1]], align 1 -; THR15-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP5]] to i32 ; THR15-NEXT: [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; THR15-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] ; THR15-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4 ; THR15-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4 -; THR15-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 1 -; THR15-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 -; THR15-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 -; THR15-NEXT: [[TMP6:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 -; THR15-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP20:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32> -; THR15-NEXT: [[TMP87:%.*]] = zext i8 [[TMP6]] to i32 -; THR15-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 -; THR15-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP22:%.*]] = zext <2 x i8> [[TMP21]] to <2 x i32> -; THR15-NEXT: [[TMP23:%.*]] = sub <2 x i32> [[TMP20]], [[TMP22]] -; THR15-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 -; THR15-NEXT: [[TMP24:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP25:%.*]] = zext <2 x i8> [[TMP24]] to <2 x i32> -; THR15-NEXT: [[TMP16:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 -; THR15-NEXT: [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP27:%.*]] = zext <2 x i8> [[TMP26]] to <2 x i32> -; THR15-NEXT: [[TMP28:%.*]] = sub <2 x i32> [[TMP25]], [[TMP27]] -; THR15-NEXT: [[TMP29:%.*]] = shl <2 x i32> [[TMP28]], splat (i32 16) -; THR15-NEXT: [[TMP59:%.*]] = add <2 x i32> [[TMP29]], [[TMP23]] -; THR15-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP32:%.*]] = zext <2 x i8> [[TMP31]] to <2 x i32> -; THR15-NEXT: [[TMP86:%.*]] = zext i8 [[TMP7]] to i32 -; THR15-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP34:%.*]] = zext <2 x i8> [[TMP33]] to <2 x i32> -; THR15-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP32]], [[TMP34]] -; THR15-NEXT: [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP37:%.*]] = zext <2 x i8> [[TMP36]] to <2 x i32> -; THR15-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP39:%.*]] = zext <2 x i8> [[TMP38]] to <2 x i32> -; THR15-NEXT: [[TMP40:%.*]] = sub <2 x i32> [[TMP37]], [[TMP39]] -; THR15-NEXT: [[TMP41:%.*]] = shl <2 x i32> [[TMP40]], splat (i32 16) -; THR15-NEXT: [[TMP76:%.*]] = add <2 x i32> [[TMP41]], [[TMP35]] -; THR15-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP76]], [[TMP59]] -; THR15-NEXT: [[TMP42:%.*]] = sub <2 x i32> [[TMP59]], [[TMP76]] -; THR15-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP30]], i32 0 -; THR15-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP30]], i32 1 -; THR15-NEXT: [[ADD44_2:%.*]] = add i32 [[TMP44]], [[TMP43]] -; THR15-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[TMP42]], i32 0 -; THR15-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP42]], i32 1 -; THR15-NEXT: [[ADD46_2:%.*]] = add i32 [[TMP46]], [[TMP45]] ; THR15-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4 -; THR15-NEXT: [[TMP47:%.*]] = load <2 x i8>, ptr null, align 1 ; THR15-NEXT: [[TMP48:%.*]] = load i8, ptr null, align 1 -; THR15-NEXT: [[TMP49:%.*]] = zext <2 x i8> [[TMP47]] to <2 x i32> -; THR15-NEXT: [[CONV_3:%.*]] = zext i8 [[TMP48]] to i32 -; THR15-NEXT: [[TMP50:%.*]] = load <2 x i8>, ptr null, align 1 -; THR15-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP50]] to <2 x i32> -; THR15-NEXT: [[TMP52:%.*]] = sub <2 x i32> [[TMP49]], [[TMP51]] -; THR15-NEXT: [[TMP53:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) -; THR15-NEXT: [[TMP54:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> -; THR15-NEXT: [[TMP77:%.*]] = shufflevector <2 x i32> [[TMP54]], <2 x i32> poison, <2 x i32> -; THR15-NEXT: [[TMP55:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; THR15-NEXT: [[TMP56:%.*]] = zext <2 x i8> [[TMP55]] to <2 x i32> -; THR15-NEXT: [[TMP57:%.*]] = sub <2 x i32> [[TMP77]], [[TMP56]] -; THR15-NEXT: [[TMP58:%.*]] = shl <2 x i32> [[TMP57]], splat (i32 16) -; THR15-NEXT: [[TMP72:%.*]] = add <2 x i32> [[TMP58]], [[TMP52]] -; THR15-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 -; THR15-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 -; THR15-NEXT: [[ARRAYIDX27_3:%.*]] = getelementptr i8, ptr null, i64 6 -; THR15-NEXT: [[TMP60:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_3]], align 1 -; THR15-NEXT: [[TMP61:%.*]] = zext <2 x i8> [[TMP60]] to <2 x i32> -; THR15-NEXT: [[TMP62:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 -; THR15-NEXT: [[TMP63:%.*]] = zext <2 x i8> [[TMP62]] to <2 x i32> -; THR15-NEXT: [[TMP64:%.*]] = sub <2 x i32> [[TMP61]], [[TMP63]] -; THR15-NEXT: [[TMP65:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) -; THR15-NEXT: [[TMP66:%.*]] = zext <2 x i8> [[TMP65]] to <2 x i32> -; THR15-NEXT: [[TMP67:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 -; THR15-NEXT: [[TMP68:%.*]] = zext <2 x i8> [[TMP67]] to <2 x i32> -; THR15-NEXT: [[TMP69:%.*]] = sub <2 x i32> [[TMP66]], [[TMP68]] -; THR15-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], splat (i32 16) -; THR15-NEXT: [[TMP73:%.*]] = add <2 x i32> [[TMP70]], [[TMP64]] -; THR15-NEXT: [[TMP74:%.*]] = extractelement <2 x i32> [[TMP72]], i32 0 -; THR15-NEXT: [[TMP75:%.*]] = extractelement <2 x i32> [[TMP72]], i32 1 -; THR15-NEXT: [[ADD48_3:%.*]] = add i32 [[TMP75]], [[TMP74]] -; THR15-NEXT: [[SUB45_3:%.*]] = sub i32 [[TMP74]], [[TMP75]] -; THR15-NEXT: [[TMP80:%.*]] = extractelement <2 x i32> [[TMP73]], i32 0 -; THR15-NEXT: [[TMP81:%.*]] = extractelement <2 x i32> [[TMP73]], i32 1 -; THR15-NEXT: [[ADD55_3:%.*]] = add i32 [[TMP81]], [[TMP80]] -; THR15-NEXT: [[SUB47_3:%.*]] = sub i32 [[TMP80]], [[TMP81]] -; THR15-NEXT: [[ADD48_4:%.*]] = add i32 [[ADD55_3]], [[ADD48_3]] -; THR15-NEXT: [[TMP78:%.*]] = shufflevector <2 x i32> [[TMP30]], <2 x i32> poison, <2 x i32> -; THR15-NEXT: [[TMP71:%.*]] = insertelement <2 x i32> [[TMP78]], i32 [[ADD48_3]], i32 0 -; THR15-NEXT: [[TMP83:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[ADD55_3]], i32 0 -; THR15-NEXT: [[TMP79:%.*]] = sub <2 x i32> [[TMP71]], [[TMP83]] -; THR15-NEXT: [[ADD55_4:%.*]] = add i32 [[SUB47_3]], [[SUB45_3]] -; THR15-NEXT: [[TMP137:%.*]] = shufflevector <2 x i32> [[TMP42]], <2 x i32> poison, <2 x i32> -; THR15-NEXT: [[TMP82:%.*]] = insertelement <2 x i32> [[TMP137]], i32 [[SUB45_3]], i32 0 -; THR15-NEXT: [[TMP84:%.*]] = insertelement <2 x i32> [[TMP42]], i32 [[SUB47_3]], i32 0 -; THR15-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP82]], [[TMP84]] -; THR15-NEXT: [[ADD94:%.*]] = add i32 [[ADD48_4]], [[ADD44_2]] -; THR15-NEXT: [[SUB102:%.*]] = sub i32 [[ADD44_2]], [[ADD48_4]] -; THR15-NEXT: [[SHR_I:%.*]] = lshr i32 [[CONV_3]], 15 -; THR15-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537 -; THR15-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535 -; THR15-NEXT: [[SHR_I49:%.*]] = lshr i32 [[TMP44]], 15 -; THR15-NEXT: [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537 -; THR15-NEXT: [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535 -; THR15-NEXT: [[ADD94_1:%.*]] = add i32 [[ADD55_4]], [[ADD46_2]] -; THR15-NEXT: [[SUB102_1:%.*]] = sub i32 [[ADD46_2]], [[ADD55_4]] -; THR15-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP86]], 15 -; THR15-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537 -; THR15-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535 -; THR15-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[TMP87]], 15 -; THR15-NEXT: [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537 -; THR15-NEXT: [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535 -; THR15-NEXT: [[TMP88:%.*]] = extractelement <2 x i32> [[TMP79]], i32 0 -; THR15-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP79]], i32 1 -; THR15-NEXT: [[ADD94_2:%.*]] = add i32 [[TMP88]], [[TMP89]] -; THR15-NEXT: [[SUB102_2:%.*]] = sub i32 [[TMP89]], [[TMP88]] -; THR15-NEXT: [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15 -; THR15-NEXT: [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537 -; THR15-NEXT: [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535 -; THR15-NEXT: [[TMP90:%.*]] = extractelement <2 x i32> [[TMP85]], i32 0 -; THR15-NEXT: [[TMP91:%.*]] = extractelement <2 x i32> [[TMP85]], i32 1 -; THR15-NEXT: [[ADD94_3:%.*]] = add i32 [[TMP90]], [[TMP91]] -; THR15-NEXT: [[SUB102_3:%.*]] = sub i32 [[TMP91]], [[TMP90]] -; THR15-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[CONV]], 15 -; THR15-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 -; THR15-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 -; THR15-NEXT: [[TMP92:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1 -; THR15-NEXT: [[TMP93:%.*]] = zext <2 x i8> [[TMP92]] to <2 x i32> +; THR15-NEXT: [[TMP1:%.*]] = load i8, ptr null, align 1 +; THR15-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1 ; THR15-NEXT: [[TMP143:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 -; THR15-NEXT: [[TMP94:%.*]] = shufflevector <4 x i8> [[TMP143]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP95:%.*]] = zext <2 x i8> [[TMP94]] to <2 x i32> ; THR15-NEXT: [[TMP146:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 -; THR15-NEXT: [[TMP96:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP97:%.*]] = zext <2 x i8> [[TMP96]] to <2 x i32> ; THR15-NEXT: [[TMP147:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; THR15-NEXT: [[TMP98:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP99:%.*]] = zext <2 x i8> [[TMP98]] to <2 x i32> -; THR15-NEXT: [[TMP100:%.*]] = sub <2 x i32> [[TMP97]], [[TMP99]] -; THR15-NEXT: [[TMP101:%.*]] = shl <2 x i32> [[TMP100]], splat (i32 16) -; THR15-NEXT: [[TMP102:%.*]] = shufflevector <4 x i8> [[TMP143]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP103:%.*]] = zext <2 x i8> [[TMP102]] to <2 x i32> -; THR15-NEXT: [[TMP104:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP105:%.*]] = zext <2 x i8> [[TMP104]] to <2 x i32> -; THR15-NEXT: [[TMP106:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP107:%.*]] = zext <2 x i8> [[TMP106]] to <2 x i32> -; THR15-NEXT: [[TMP108:%.*]] = sub <2 x i32> [[TMP105]], [[TMP107]] -; THR15-NEXT: [[TMP109:%.*]] = shl <2 x i32> [[TMP108]], splat (i32 16) -; THR15-NEXT: [[TMP110:%.*]] = insertelement <2 x i32> [[TMP93]], i32 [[CONV33]], i32 1 -; THR15-NEXT: [[TMP111:%.*]] = sub <2 x i32> [[TMP110]], [[TMP103]] -; THR15-NEXT: [[TMP112:%.*]] = add <2 x i32> [[TMP109]], [[TMP111]] -; THR15-NEXT: [[TMP113:%.*]] = insertelement <2 x i32> [[TMP93]], i32 [[CONV]], i32 0 -; THR15-NEXT: [[TMP114:%.*]] = sub <2 x i32> [[TMP113]], [[TMP95]] -; THR15-NEXT: [[TMP115:%.*]] = add <2 x i32> [[TMP101]], [[TMP114]] -; THR15-NEXT: [[TMP116:%.*]] = shufflevector <2 x i32> [[TMP112]], <2 x i32> [[TMP115]], <2 x i32> -; THR15-NEXT: [[TMP117:%.*]] = add <2 x i32> [[TMP112]], [[TMP115]] -; THR15-NEXT: [[TMP118:%.*]] = sub <2 x i32> [[TMP115]], [[TMP112]] -; THR15-NEXT: [[TMP119:%.*]] = extractelement <2 x i32> [[TMP117]], i32 0 -; THR15-NEXT: [[TMP120:%.*]] = extractelement <2 x i32> [[TMP117]], i32 1 -; THR15-NEXT: [[ADD48:%.*]] = add i32 [[TMP120]], [[TMP119]] -; THR15-NEXT: [[SUB51:%.*]] = sub i32 [[TMP119]], [[TMP120]] -; THR15-NEXT: [[TMP121:%.*]] = extractelement <2 x i32> [[TMP118]], i32 0 -; THR15-NEXT: [[TMP122:%.*]] = extractelement <2 x i32> [[TMP118]], i32 1 -; THR15-NEXT: [[ADD55:%.*]] = add i32 [[TMP122]], [[TMP121]] -; THR15-NEXT: [[SUB59:%.*]] = sub i32 [[TMP121]], [[TMP122]] -; THR15-NEXT: [[SHR_I59:%.*]] = lshr i32 [[TMP120]], 15 -; THR15-NEXT: [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537 -; THR15-NEXT: [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535 -; THR15-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[TMP122]], 15 -; THR15-NEXT: [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537 -; THR15-NEXT: [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535 -; THR15-NEXT: [[TMP123:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 -; THR15-NEXT: [[TMP124:%.*]] = zext <2 x i8> [[TMP123]] to <2 x i32> +; THR15-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1 ; THR15-NEXT: [[TMP148:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 -; THR15-NEXT: [[TMP125:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP126:%.*]] = zext <2 x i8> [[TMP125]] to <2 x i32> ; THR15-NEXT: [[TMP152:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 -; THR15-NEXT: [[TMP127:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP128:%.*]] = zext <2 x i8> [[TMP127]] to <2 x i32> ; THR15-NEXT: [[TMP153:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; THR15-NEXT: [[TMP129:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP130:%.*]] = zext <2 x i8> [[TMP129]] to <2 x i32> -; THR15-NEXT: [[TMP131:%.*]] = sub <2 x i32> [[TMP128]], [[TMP130]] -; THR15-NEXT: [[TMP132:%.*]] = shl <2 x i32> [[TMP131]], splat (i32 16) -; THR15-NEXT: [[TMP138:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP139:%.*]] = zext <2 x i8> [[TMP138]] to <2 x i32> -; THR15-NEXT: [[TMP154:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP155:%.*]] = zext <2 x i8> [[TMP154]] to <2 x i32> -; THR15-NEXT: [[TMP133:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP133]] to <2 x i32> -; THR15-NEXT: [[TMP135:%.*]] = sub <2 x i32> [[TMP155]], [[TMP134]] -; THR15-NEXT: [[TMP170:%.*]] = shl <2 x i32> [[TMP135]], splat (i32 16) -; THR15-NEXT: [[TMP140:%.*]] = insertelement <2 x i32> [[TMP124]], i32 [[CONV33_1]], i32 1 -; THR15-NEXT: [[TMP141:%.*]] = sub <2 x i32> [[TMP140]], [[TMP139]] -; THR15-NEXT: [[TMP171:%.*]] = add <2 x i32> [[TMP170]], [[TMP141]] -; THR15-NEXT: [[TMP186:%.*]] = insertelement <2 x i32> [[TMP124]], i32 [[CONV_1]], i32 0 -; THR15-NEXT: [[TMP187:%.*]] = sub <2 x i32> [[TMP186]], [[TMP126]] -; THR15-NEXT: [[TMP142:%.*]] = add <2 x i32> [[TMP132]], [[TMP187]] -; THR15-NEXT: [[TMP136:%.*]] = add <2 x i32> [[TMP171]], [[TMP142]] -; THR15-NEXT: [[TMP149:%.*]] = sub <2 x i32> [[TMP142]], [[TMP171]] -; THR15-NEXT: [[TMP144:%.*]] = extractelement <2 x i32> [[TMP136]], i32 0 -; THR15-NEXT: [[TMP145:%.*]] = extractelement <2 x i32> [[TMP136]], i32 1 -; THR15-NEXT: [[ADD48_2:%.*]] = add i32 [[TMP145]], [[TMP144]] -; THR15-NEXT: [[SUB45_1:%.*]] = sub i32 [[TMP144]], [[TMP145]] -; THR15-NEXT: [[TMP150:%.*]] = extractelement <2 x i32> [[TMP149]], i32 0 -; THR15-NEXT: [[TMP151:%.*]] = extractelement <2 x i32> [[TMP149]], i32 1 -; THR15-NEXT: [[ADD48_1:%.*]] = add i32 [[TMP151]], [[TMP150]] -; THR15-NEXT: [[SUB51_1:%.*]] = sub i32 [[TMP150]], [[TMP151]] -; THR15-NEXT: [[SHR_I54:%.*]] = lshr i32 [[TMP145]], 15 -; THR15-NEXT: [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537 -; THR15-NEXT: [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535 -; THR15-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[TMP151]], 15 -; THR15-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 -; THR15-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 -; THR15-NEXT: [[TMP156:%.*]] = lshr <2 x i32> [[TMP124]], splat (i32 15) -; THR15-NEXT: [[TMP157:%.*]] = and <2 x i32> [[TMP156]], splat (i32 65537) -; THR15-NEXT: [[TMP158:%.*]] = mul <2 x i32> [[TMP157]], splat (i32 65535) -; THR15-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_2]], [[ADD48]] -; THR15-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_2]] -; THR15-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] -; THR15-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD94]] -; THR15-NEXT: [[ADD105:%.*]] = add i32 [[SUB102]], [[SUB86]] -; THR15-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB102]] -; THR15-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]] -; THR15-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[CONV_3]] -; THR15-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51]], [[ADD105]] -; THR15-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[TMP44]] -; THR15-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]] -; THR15-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP145]] -; THR15-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]] -; THR15-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP120]] -; THR15-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] -; THR15-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] -; THR15-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I63]] -; THR15-NEXT: [[ADD78_1:%.*]] = add i32 [[ADD48_1]], [[ADD55]] -; THR15-NEXT: [[SUB86_1:%.*]] = sub i32 [[ADD55]], [[ADD48_1]] -; THR15-NEXT: [[ADD103_1:%.*]] = add i32 [[ADD94_1]], [[ADD78_1]] -; THR15-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]] -; THR15-NEXT: [[ADD105_1:%.*]] = add i32 [[SUB102_1]], [[SUB86_1]] -; THR15-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]] -; THR15-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]] -; THR15-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[TMP86]] -; THR15-NEXT: [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_1]], [[ADD105_1]] -; THR15-NEXT: [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[TMP87]] -; THR15-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]] -; THR15-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[TMP151]] -; THR15-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]] -; THR15-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP122]] -; THR15-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD113]] -; THR15-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]] -; THR15-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]] -; THR15-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I63_1]] -; THR15-NEXT: [[ADD78_2:%.*]] = add i32 [[SUB45_1]], [[SUB51]] -; THR15-NEXT: [[SUB86_2:%.*]] = sub i32 [[SUB51]], [[SUB45_1]] -; THR15-NEXT: [[TMP159:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_2]], i32 0 -; THR15-NEXT: [[TMP160:%.*]] = shufflevector <2 x i32> [[TMP159]], <2 x i32> poison, <2 x i32> zeroinitializer -; THR15-NEXT: [[TMP161:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_2]], i32 0 -; THR15-NEXT: [[TMP162:%.*]] = shufflevector <2 x i32> [[TMP161]], <2 x i32> poison, <2 x i32> zeroinitializer -; THR15-NEXT: [[TMP163:%.*]] = add <2 x i32> [[TMP160]], [[TMP162]] -; THR15-NEXT: [[TMP164:%.*]] = sub <2 x i32> [[TMP160]], [[TMP162]] -; THR15-NEXT: [[TMP165:%.*]] = shufflevector <2 x i32> [[TMP163]], <2 x i32> [[TMP164]], <2 x i32> -; THR15-NEXT: [[ADD105_2:%.*]] = add i32 [[SUB102_2]], [[SUB86_2]] -; THR15-NEXT: [[SUB106_2:%.*]] = sub i32 [[SUB86_2]], [[SUB102_2]] -; THR15-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD105_2]] -; THR15-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] -; THR15-NEXT: [[TMP166:%.*]] = add <2 x i32> [[TMP158]], [[TMP165]] -; THR15-NEXT: [[TMP167:%.*]] = xor <2 x i32> [[TMP166]], [[TMP124]] -; THR15-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[TMP119]], 15 -; THR15-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 -; THR15-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 -; THR15-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] -; THR15-NEXT: [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[TMP119]] -; THR15-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_1]] -; THR15-NEXT: [[TMP168:%.*]] = extractelement <2 x i32> [[TMP167]], i32 0 -; THR15-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP168]] -; THR15-NEXT: [[TMP169:%.*]] = extractelement <2 x i32> [[TMP167]], i32 1 -; THR15-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[TMP169]] -; THR15-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I63_2]] -; THR15-NEXT: [[ADD78_3:%.*]] = add i32 [[SUB51_1]], [[SUB59]] -; THR15-NEXT: [[SUB86_3:%.*]] = sub i32 [[SUB59]], [[SUB51_1]] -; THR15-NEXT: [[TMP172:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_3]], i32 0 -; THR15-NEXT: [[TMP173:%.*]] = shufflevector <2 x i32> [[TMP172]], <2 x i32> poison, <2 x i32> zeroinitializer -; THR15-NEXT: [[TMP174:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_3]], i32 0 -; THR15-NEXT: [[TMP175:%.*]] = shufflevector <2 x i32> [[TMP174]], <2 x i32> poison, <2 x i32> zeroinitializer -; THR15-NEXT: [[TMP176:%.*]] = add <2 x i32> [[TMP173]], [[TMP175]] -; THR15-NEXT: [[TMP177:%.*]] = sub <2 x i32> [[TMP173]], [[TMP175]] -; THR15-NEXT: [[TMP178:%.*]] = shufflevector <2 x i32> [[TMP176]], <2 x i32> [[TMP177]], <2 x i32> -; THR15-NEXT: [[ADD105_3:%.*]] = add i32 [[SUB102_3]], [[SUB86_3]] -; THR15-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]] -; THR15-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_3]] -; THR15-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV]] -; THR15-NEXT: [[TMP179:%.*]] = lshr <2 x i32> [[TMP93]], splat (i32 15) -; THR15-NEXT: [[TMP180:%.*]] = and <2 x i32> [[TMP179]], splat (i32 65537) -; THR15-NEXT: [[TMP181:%.*]] = mul <2 x i32> [[TMP180]], splat (i32 65535) -; THR15-NEXT: [[TMP182:%.*]] = add <2 x i32> [[TMP181]], [[TMP178]] -; THR15-NEXT: [[TMP183:%.*]] = xor <2 x i32> [[TMP182]], [[TMP93]] -; THR15-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 -; THR15-NEXT: [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537 -; THR15-NEXT: [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535 -; THR15-NEXT: [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]] -; THR15-NEXT: [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]] -; THR15-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_2]] -; THR15-NEXT: [[TMP184:%.*]] = extractelement <2 x i32> [[TMP183]], i32 0 -; THR15-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP184]] -; THR15-NEXT: [[TMP185:%.*]] = extractelement <2 x i32> [[TMP183]], i32 1 -; THR15-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP185]] -; THR15-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]] +; THR15-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 +; THR15-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 +; THR15-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 +; THR15-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 +; THR15-NEXT: [[TMP14:%.*]] = load <4 x i8>, ptr null, align 1 +; THR15-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP10]], i64 0) +; THR15-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP15]], <4 x i8> [[TMP14]], i64 4) +; THR15-NEXT: [[TMP17:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP16]], <4 x i8> [[TMP2]], i64 8) +; THR15-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP17]], <4 x i8> [[TMP6]], i64 12) +; THR15-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32> +; THR15-NEXT: [[TMP20:%.*]] = load <4 x i8>, ptr null, align 1 +; THR15-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP11]], i64 0) +; THR15-NEXT: [[TMP22:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP21]], <4 x i8> [[TMP20]], i64 4) +; THR15-NEXT: [[TMP23:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP22]], <4 x i8> [[TMP143]], i64 8) +; THR15-NEXT: [[TMP24:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP23]], <4 x i8> [[TMP148]], i64 12) +; THR15-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32> +; THR15-NEXT: [[TMP26:%.*]] = sub <16 x i32> [[TMP19]], [[TMP25]] +; THR15-NEXT: [[TMP27:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP28:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) +; THR15-NEXT: [[TMP29:%.*]] = shufflevector <2 x i8> [[TMP28]], <2 x i8> poison, <4 x i32> +; THR15-NEXT: [[TMP30:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP29]], <16 x i32> +; THR15-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <16 x i32> +; THR15-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> +; THR15-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <16 x i32> +; THR15-NEXT: [[TMP34:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> [[TMP33]], <16 x i32> +; THR15-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP34]], i8 [[TMP1]], i32 5 +; THR15-NEXT: [[TMP36:%.*]] = insertelement <16 x i8> [[TMP35]], i8 [[TMP48]], i32 9 +; THR15-NEXT: [[TMP37:%.*]] = zext <16 x i8> [[TMP36]] to <16 x i32> +; THR15-NEXT: [[TMP38:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 +; THR15-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP13]], i64 0) +; THR15-NEXT: [[TMP40:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP39]], <4 x i8> [[TMP38]], i64 4) +; THR15-NEXT: [[TMP41:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP40]], <4 x i8> [[TMP147]], i64 8) +; THR15-NEXT: [[TMP42:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP41]], <4 x i8> [[TMP153]], i64 12) +; THR15-NEXT: [[TMP43:%.*]] = zext <16 x i8> [[TMP42]] to <16 x i32> +; THR15-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP45:%.*]] = sub <16 x i32> [[TMP37]], [[TMP44]] +; THR15-NEXT: [[TMP46:%.*]] = shl <16 x i32> [[TMP45]], splat (i32 16) +; THR15-NEXT: [[TMP47:%.*]] = add <16 x i32> [[TMP46]], [[TMP27]] +; THR15-NEXT: [[TMP70:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP49:%.*]] = add <16 x i32> [[TMP47]], [[TMP70]] +; THR15-NEXT: [[TMP50:%.*]] = sub <16 x i32> [[TMP47]], [[TMP70]] +; THR15-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> +; THR15-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP53:%.*]] = add <16 x i32> [[TMP51]], [[TMP52]] +; THR15-NEXT: [[TMP54:%.*]] = sub <16 x i32> [[TMP51]], [[TMP52]] +; THR15-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> +; THR15-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP57:%.*]] = sub <16 x i32> [[TMP55]], [[TMP56]] +; THR15-NEXT: [[TMP58:%.*]] = add <16 x i32> [[TMP55]], [[TMP56]] +; THR15-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> +; THR15-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP61:%.*]] = add <16 x i32> [[TMP59]], [[TMP60]] +; THR15-NEXT: [[TMP62:%.*]] = sub <16 x i32> [[TMP59]], [[TMP60]] +; THR15-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> +; THR15-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP19]], <16 x i32> +; THR15-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15) +; THR15-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537) +; THR15-NEXT: [[TMP67:%.*]] = mul <16 x i32> [[TMP66]], splat (i32 65535) +; THR15-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP63]] +; THR15-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP64]] +; THR15-NEXT: [[ADD113_3:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) ; THR15-NEXT: ret i32 [[ADD113_3]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index 5b0f4a69de4c3..7723746dda301 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -342,8 +342,8 @@ define void @reduce_or_2() { ; ZVFHMIN-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 6 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer ; ZVFHMIN-NEXT: [[RDX_OP:%.*]] = or <16 x i1> [[TMP3]], [[TMP5]] -; ZVFHMIN-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) -; ZVFHMIN-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] +; ZVFHMIN-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) +; ZVFHMIN-NEXT: br i1 [[TMP6]], label [[TMP8:%.*]], label [[TMP7:%.*]] ; ZVFHMIN: 7: ; ZVFHMIN-NEXT: ret void ; ZVFHMIN: 8: @@ -356,8 +356,8 @@ define void @reduce_or_2() { ; ZVL128-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 6 ; ZVL128-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer ; ZVL128-NEXT: [[RDX_OP:%.*]] = or <16 x i1> [[TMP3]], [[TMP5]] -; ZVL128-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) -; ZVL128-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] +; ZVL128-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) +; ZVL128-NEXT: br i1 [[TMP6]], label [[TMP8:%.*]], label [[TMP7:%.*]] ; ZVL128: 7: ; ZVL128-NEXT: ret void ; ZVL128: 8: @@ -365,16 +365,14 @@ define void @reduce_or_2() { ; ; ZVL256-LABEL: @reduce_or_2( ; ZVL256-NEXT: [[TMP1:%.*]] = shl i64 0, 0 -; ZVL256-NEXT: [[TMP2:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 15 -; ZVL256-NEXT: [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer -; ZVL256-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 6 -; ZVL256-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer -; ZVL256-NEXT: [[RDX_OP:%.*]] = or <16 x i1> [[TMP3]], [[TMP5]] -; ZVL256-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) -; ZVL256-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] -; ZVL256: 7: +; ZVL256-NEXT: [[TMP2:%.*]] = insertelement <32 x i64> , i64 [[TMP1]], i32 15 +; ZVL256-NEXT: [[TMP3:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> +; ZVL256-NEXT: [[TMP4:%.*]] = icmp ult <32 x i64> [[TMP3]], zeroinitializer +; ZVL256-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP4]]) +; ZVL256-NEXT: br i1 [[TMP5]], label [[TMP7:%.*]], label [[TMP6:%.*]] +; ZVL256: 6: ; ZVL256-NEXT: ret void -; ZVL256: 8: +; ZVL256: 7: ; ZVL256-NEXT: ret void ; ; ZVL512-LABEL: @reduce_or_2( From f159ee736db7f73d825497e5f8d01f1432c05939 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Thu, 27 Feb 2025 19:04:18 -0800 Subject: [PATCH 040/123] [HLSL] Add HLSLResourceBindingAttr to default constant buffer numeric declarations ($Globals) (#128981) Translates `register(c#`) annotations on numeric constants in the global scope to `HLSLResourceBindingAttr`. Applies to scalar, vector and array constants. Fixes #128964 --- clang/lib/Sema/SemaHLSL.cpp | 10 +-- .../test/AST/HLSL/resource_binding_attr.hlsl | 67 ++++++++++++------- 2 files changed, 47 insertions(+), 30 deletions(-) diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 283a9801fc707..ffc3ac1b65854 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1446,18 +1446,20 @@ static bool DiagnoseLocalRegisterBinding(Sema &S, SourceLocation &ArgLoc, Ty = Ty->getArrayElementTypeNoTypeQual(); // Basic types - if (Ty->isArithmeticType()) { + if (Ty->isArithmeticType() || Ty->isVectorType()) { bool DeclaredInCOrTBuffer = isa(D->getDeclContext()); if (SpecifiedSpace && !DeclaredInCOrTBuffer) S.Diag(ArgLoc, diag::err_hlsl_space_on_global_constant); - if (!DeclaredInCOrTBuffer && - (Ty->isIntegralType(S.getASTContext()) || Ty->isFloatingType())) { - // Default Globals + if (!DeclaredInCOrTBuffer && (Ty->isIntegralType(S.getASTContext()) || + Ty->isFloatingType() || Ty->isVectorType())) { + // Register annotation on default constant buffer declaration ($Globals) if (RegType == RegisterType::CBuffer) S.Diag(ArgLoc, diag::warn_hlsl_deprecated_register_type_b); else if (RegType != RegisterType::C) S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << RegTypeNum; + else + return true; } else { if (RegType == RegisterType::C) S.Diag(ArgLoc, diag::warn_hlsl_register_type_c_packoffset); diff --git a/clang/test/AST/HLSL/resource_binding_attr.hlsl b/clang/test/AST/HLSL/resource_binding_attr.hlsl index 6fac903f75e18..26ab85b7d18a5 100644 --- a/clang/test/AST/HLSL/resource_binding_attr.hlsl +++ b/clang/test/AST/HLSL/resource_binding_attr.hlsl @@ -1,41 +1,56 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -ast-dump -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -ast-dump -o - %s | FileCheck %s -// CHECK:HLSLBufferDecl 0x[[CB:[0-9a-f]+]] {{.*}} line:8:9 cbuffer CB -// CHECK-NEXT:HLSLResourceClassAttr 0x[[CB:[0-9a-f]+]] {{.*}} Implicit CBuffer -// CHECK-NEXT:HLSLResourceAttr 0x[[CB:[0-9a-f]+]] {{.*}} Implicit CBuffer -// CHECK-NEXT:HLSLResourceBindingAttr 0x{{[0-9a-f]+}} "b3" "space2" -// CHECK-NEXT:VarDecl 0x[[A:[0-9a-f]+]] {{.*}} col:9 used a 'hlsl_constant float' +// CHECK: HLSLBufferDecl {{.*}} line:[[# @LINE + 5]]:9 cbuffer CB +// CHECK-NEXT: HLSLResourceClassAttr {{.*}} Implicit CBuffer +// CHECK-NEXT: HLSLResourceAttr {{.*}} Implicit CBuffer +// CHECK-NEXT: HLSLResourceBindingAttr {{.*}} "b3" "space2" +// CHECK-NEXT: VarDecl {{.*}} used a 'hlsl_constant float' cbuffer CB : register(b3, space2) { float a; } -// CHECK:HLSLBufferDecl 0x[[TB:[0-9a-f]+]] {{.*}} line:17:9 tbuffer TB -// CHECK-NEXT:HLSLResourceClassAttr 0x[[CB:[0-9a-f]+]] {{.*}} Implicit SRV -// CHECK-NEXT:HLSLResourceAttr 0x[[CB:[0-9a-f]+]] {{.*}} Implicit TBuffer -// CHECK-NEXT:HLSLResourceBindingAttr 0x{{[0-9a-f]+}} "t2" "space1" -// CHECK-NEXT:VarDecl 0x[[B:[0-9a-f]+]] {{.*}} col:9 used b 'hlsl_constant float' +// CHECK: HLSLBufferDecl {{.*}} line:[[# @LINE + 5]]:9 tbuffer TB +// CHECK-NEXT: HLSLResourceClassAttr {{.*}} Implicit SRV +// CHECK-NEXT: HLSLResourceAttr {{.*}} Implicit TBuffer +// CHECK-NEXT: HLSLResourceBindingAttr {{.*}} "t2" "space1" +// CHECK-NEXT: VarDecl {{.*}} used b 'hlsl_constant float' tbuffer TB : register(t2, space1) { float b; } -float foo() { -// CHECK: BinaryOperator 0x{{[0-9a-f]+}} 'float' '+' -// CHECK-NEXT: ImplicitCastExpr 0x{{[0-9a-f]+}} 'float' -// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} 'hlsl_constant float' lvalue Var 0x[[A]] 'a' 'hlsl_constant float' -// CHECK-NEXT: ImplicitCastExpr 0x{{[0-9a-f]+}} 'float' -// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-f]+}} 'hlsl_constant float' lvalue Var 0x[[B]] 'b' 'hlsl_constant float' +export float foo() { return a + b; } -// CHECK: VarDecl 0x{{[0-9a-f]+}} <{{.*}}> col:17 UAV 'RWBuffer':'hlsl::RWBuffer' callinit -// CHECK-NEXT:-CXXConstructExpr 0x{{[0-9a-f]+}} 'RWBuffer':'hlsl::RWBuffer' 'void ()' -// CHECK-NEXT:-HLSLResourceBindingAttr 0x{{[0-9a-f]+}} "u3" "space0" +// CHECK: VarDecl {{.*}} UAV 'RWBuffer':'hlsl::RWBuffer' +// CHECK: HLSLResourceBindingAttr {{.*}} "u3" "space0" RWBuffer UAV : register(u3); -// CHECK: -VarDecl 0x{{[0-9a-f]+}} <{{.*}}> col:17 UAV1 'RWBuffer':'hlsl::RWBuffer' callinit -// CHECK-NEXT:-CXXConstructExpr 0x{{[0-9a-f]+}} 'RWBuffer':'hlsl::RWBuffer' 'void ()' -// CHECK-NEXT:-HLSLResourceBindingAttr 0x{{[0-9a-f]+}} "u2" "space0" -// CHECK-NEXT:-VarDecl 0x{{[0-9a-f]+}} col:38 UAV2 'RWBuffer':'hlsl::RWBuffer' callinit -// CHECK-NEXT:-CXXConstructExpr 0x{{[0-9a-f]+}} 'RWBuffer':'hlsl::RWBuffer' 'void ()' -// CHECK-NEXT:-HLSLResourceBindingAttr 0x{{[0-9a-f]+}} "u4" "space0" +// CHECK: VarDecl {{.*}} UAV1 'RWBuffer':'hlsl::RWBuffer' +// CHECK: HLSLResourceBindingAttr {{.*}} "u2" "space0" +// CHECK: VarDecl {{.*}} UAV2 'RWBuffer':'hlsl::RWBuffer' +// CHECK: HLSLResourceBindingAttr {{.*}} "u4" "space0" RWBuffer UAV1 : register(u2), UAV2 : register(u4); + +// +// Default constants ($Globals) layout annotations + +// CHECK: VarDecl {{.*}} f 'hlsl_constant float' +// CHECK: HLSLResourceBindingAttr {{.*}} "c5" "space0" +float f : register(c5); + +// CHECK: VarDecl {{.*}} intv 'hlsl_constant int4':'vector' +// CHECK: HLSLResourceBindingAttr {{.*}} "c2" "space0" +int4 intv : register(c2); + +// CHECK: VarDecl {{.*}} dar 'hlsl_constant double[5]' +// CHECK: HLSLResourceBindingAttr {{.*}} "c3" "space0" +double dar[5] : register(c3); + +struct S { + int a; +}; + +// CHECK: VarDecl {{.*}} s 'hlsl_constant S' +// CHECK: HLSLResourceBindingAttr {{.*}} "c10" "space0 +S s : register(c10); From f64daaac8135007051199a4a3ad8b4049b0f56ca Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Feb 2025 19:25:14 -0800 Subject: [PATCH 041/123] [RISCV] Add VL and VTYPE to implicit uses on MC vector instructions that also use FRM (#129130) We accidentally overwote the VL, VTYPE uses from the base class on any instruction that also uses FRM. Not sure why the llvm-mca test changed cycle time. --- llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 34 +++++++++---------- llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td | 4 +-- .../tools/llvm-mca/RISCV/SiFiveP600/div.s | 2 +- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index fdb2334b131da..9bd1b0de82757 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -1358,7 +1358,7 @@ defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111>; let Predicates = [HasVInstructionsAnyF] in { // Vector Single-Width Floating-Point Add/Subtract Instructions -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true in { defm VFADD_V : VALU_FV_V_F<"vfadd", 0b000000>; defm VFSUB_V : VALU_FV_V_F<"vfsub", 0b000010>; defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>; @@ -1366,7 +1366,7 @@ defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>; // Vector Widening Floating-Point Add/Subtract Instructions let Constraints = "@earlyclobber $vd", - Uses = [FRM], + Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { let RVVConstraint = WidenV in { @@ -1381,10 +1381,10 @@ let RVVConstraint = WidenW in { defm VFWADD_W : VWALU_FV_V_F<"vfwadd", 0b110100, "w">; defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">; } // RVVConstraint = WidenW -} // Constraints = "@earlyclobber $vd", Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 +} // Constraints = "@earlyclobber $vd", Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 // Vector Single-Width Floating-Point Multiply/Divide Instructions -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true in { defm VFMUL_V : VMUL_FV_V_F<"vfmul", 0b100100>; defm VFDIV_V : VDIV_FV_V_F<"vfdiv", 0b100000>; defm VFRDIV_V : VDIV_FV_F<"vfrdiv", 0b100001>; @@ -1392,12 +1392,12 @@ defm VFRDIV_V : VDIV_FV_F<"vfrdiv", 0b100001>; // Vector Widening Floating-Point Multiply let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, - Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { + Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 +} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 // Vector Single-Width Floating-Point Fused Multiply-Add Instructions -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true in { defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>; defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>; defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>; @@ -1409,15 +1409,15 @@ defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>; } // Vector Widening Floating-Point Fused Multiply-Add Instructions -let Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>; defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>; defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>; defm VFWNMSAC_V : VWMAC_FV_V_F<"vfwnmsac", 0b111111>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true, DestEEW = EEWSEWx2 +} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 // Vector Floating-Point Square-Root Instruction -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true in { defm VFSQRT_V : VSQR_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>; defm VFREC7_V : VRCP_FV_VS2<"vfrec7.v", 0b010011, 0b00101>; } @@ -1482,13 +1482,13 @@ def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd), // Single-Width Floating-Point/Integer Type-Convert Instructions let mayRaiseFPException = true in { -let Uses = [FRM] in { +let Uses = [FRM, VL, VTYPE] in { defm VFCVT_XU_F_V : VCVTI_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>; defm VFCVT_X_F_V : VCVTI_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>; } defm VFCVT_RTZ_XU_F_V : VCVTI_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>; defm VFCVT_RTZ_X_F_V : VCVTI_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>; -let Uses = [FRM] in { +let Uses = [FRM, VL, VTYPE] in { defm VFCVT_F_XU_V : VCVTF_IV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>; defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; } @@ -1497,7 +1497,7 @@ defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; // Widening Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt, mayRaiseFPException = true, DestEEW = EEWSEWx2 in { -let Uses = [FRM] in { +let Uses = [FRM, VL, VTYPE] in { defm VFWCVT_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>; defm VFWCVT_X_F_V : VWCVTI_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>; } @@ -1510,13 +1510,13 @@ defm VFWCVT_F_F_V : VWCVTF_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>; // Narrowing Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd", mayRaiseFPException = true in { -let Uses = [FRM] in { +let Uses = [FRM, VL, VTYPE] in { defm VFNCVT_XU_F_W : VNCVTI_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>; defm VFNCVT_X_F_W : VNCVTI_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>; } defm VFNCVT_RTZ_XU_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>; defm VFNCVT_RTZ_X_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>; -let Uses = [FRM] in { +let Uses = [FRM, VL, VTYPE] in { defm VFNCVT_F_XU_W : VNCVTF_IV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>; defm VFNCVT_F_X_W : VNCVTF_IV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>; defm VFNCVT_F_F_W : VNCVTF_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>; @@ -1554,7 +1554,7 @@ defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>; let Predicates = [HasVInstructionsAnyF] in { // Vector Single-Width Floating-Point Reduction Instructions let RVVConstraint = NoConstraint, ElementsDependOn = EltDepsVLMask in { -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true in { defm VFREDOSUM : VREDO_FV_V<"vfredosum", 0b000011>; defm VFREDUSUM : VRED_FV_V<"vfredusum", 0b000001>; } @@ -1573,7 +1573,7 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, ElementsDep // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. -let Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM, VL, VTYPE], mayRaiseFPException = true in { defm VFWREDOSUM : VWREDO_FV_V<"vfwredosum", 0b110011>; defm VFWREDUSUM : VWRED_FV_V<"vfwredusum", 0b110001>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 7a79d438fb596..cd13433847c54 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -223,7 +223,7 @@ let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvector", } let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvector", - Uses = [FRM] in { + Uses = [FRM, VL, VTYPE] in { def VFNRCLIP_XU_F_QF : CustomSiFiveVFNRCLIP<0b100010, OPFVF, "sf.vfnrclip.xu.f.qf">; def VFNRCLIP_X_F_QF : CustomSiFiveVFNRCLIP<0b100011, OPFVF, "sf.vfnrclip.x.f.qf">; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td index a79f757753325..cbeec9ba75f16 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td @@ -21,13 +21,13 @@ let Predicates = [HasStdExtZvfbfmin], Constraints = "@earlyclobber $vd", mayRaiseFPException = true in { let RVVConstraint = WidenCvt, DestEEW = EEWSEWx2 in defm VFWCVTBF16_F_F_V : VWCVTF_FV_VS2<"vfwcvtbf16.f.f.v", 0b010010, 0b01101>; -let Uses = [FRM] in +let Uses = [FRM, VL, VTYPE] in defm VFNCVTBF16_F_F_W : VNCVTF_FV_VS2<"vfncvtbf16.f.f.w", 0b010010, 0b11101>; } let Predicates = [HasStdExtZvfbfwma], Constraints = "@earlyclobber $vd_wb, $vd = $vd_wb", - RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true, + RVVConstraint = WidenV, Uses = [FRM, VL, VTYPE], mayRaiseFPException = true, DestEEW = EEWSEWx2 in { defm VFWMACCBF16_V : VWMAC_FV_V_F<"vfwmaccbf16", 0b111011>; } diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s index 0d14a0f734bdc..c0b702ef0449b 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s @@ -328,7 +328,7 @@ vfsqrt.v v8, v16 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 320 -# CHECK-NEXT: Total Cycles: 14397 +# CHECK-NEXT: Total Cycles: 14435 # CHECK-NEXT: Total uOps: 320 # CHECK: Dispatch Width: 4 From ade49b8d1aa372a1ef87bcc2086aa84d70692e7d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Feb 2025 19:30:31 -0800 Subject: [PATCH 042/123] [RISCV] Move RISCV vector load/store searchable tables from RISCVISelDAGToDAG.cpp to RISCVBaseInfo.cpp. NFC (#129172) llvm-mca needs some of them for #128978. I'm relying on -ffunction-sections and -fdata-sections allowing these to be stripped from tools that don't need them like llvm-mc. --- .../RISCV/MCTargetDesc/RISCVBaseInfo.cpp | 12 +++ .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 77 +++++++++++++++++++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 12 --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 77 ------------------- 4 files changed, 89 insertions(+), 89 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 9c1c364c18549..1829291cd0348 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -39,6 +39,18 @@ using namespace RISCV; #include "RISCVGenSearchableTables.inc" } // namespace RISCVVInversePseudosTable +namespace RISCV { +#define GET_RISCVVSSEGTable_IMPL +#define GET_RISCVVLSEGTable_IMPL +#define GET_RISCVVLXSEGTable_IMPL +#define GET_RISCVVSXSEGTable_IMPL +#define GET_RISCVVLETable_IMPL +#define GET_RISCVVSETable_IMPL +#define GET_RISCVVLXTable_IMPL +#define GET_RISCVVSXTable_IMPL +#include "RISCVGenSearchableTables.inc" +} // namespace RISCV + namespace RISCVABI { ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits, StringRef ABIName) { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 88435b2b52ca5..433be1f1e87d4 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -666,6 +666,83 @@ struct PseudoInfo { #include "RISCVGenSearchableTables.inc" } // namespace RISCVVInversePseudosTable +namespace RISCV { +struct VLSEGPseudo { + uint16_t NF : 4; + uint16_t Masked : 1; + uint16_t Strided : 1; + uint16_t FF : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t Pseudo; +}; + +struct VLXSEGPseudo { + uint16_t NF : 4; + uint16_t Masked : 1; + uint16_t Ordered : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t IndexLMUL : 3; + uint16_t Pseudo; +}; + +struct VSSEGPseudo { + uint16_t NF : 4; + uint16_t Masked : 1; + uint16_t Strided : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t Pseudo; +}; + +struct VSXSEGPseudo { + uint16_t NF : 4; + uint16_t Masked : 1; + uint16_t Ordered : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t IndexLMUL : 3; + uint16_t Pseudo; +}; + +struct VLEPseudo { + uint16_t Masked : 1; + uint16_t Strided : 1; + uint16_t FF : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t Pseudo; +}; + +struct VSEPseudo { + uint16_t Masked : 1; + uint16_t Strided : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t Pseudo; +}; + +struct VLX_VSXPseudo { + uint16_t Masked : 1; + uint16_t Ordered : 1; + uint16_t Log2SEW : 3; + uint16_t LMUL : 3; + uint16_t IndexLMUL : 3; + uint16_t Pseudo; +}; + +#define GET_RISCVVSSEGTable_DECL +#define GET_RISCVVLSEGTable_DECL +#define GET_RISCVVLXSEGTable_DECL +#define GET_RISCVVSXSEGTable_DECL +#define GET_RISCVVLETable_DECL +#define GET_RISCVVSETable_DECL +#define GET_RISCVVLXTable_DECL +#define GET_RISCVVSXTable_DECL +#include "RISCVGenSearchableTables.inc" +} // namespace RISCV + } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 7ea4bd94c0065..f3cce950ed7b5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -34,18 +34,6 @@ static cl::opt UsePseudoMovImm( "constant materialization"), cl::init(false)); -namespace llvm::RISCV { -#define GET_RISCVVSSEGTable_IMPL -#define GET_RISCVVLSEGTable_IMPL -#define GET_RISCVVLXSEGTable_IMPL -#define GET_RISCVVSXSEGTable_IMPL -#define GET_RISCVVLETable_IMPL -#define GET_RISCVVSETable_IMPL -#define GET_RISCVVLXTable_IMPL -#define GET_RISCVVSXTable_IMPL -#include "RISCVGenSearchableTables.inc" -} // namespace llvm::RISCV - void RISCVDAGToDAGISel::PreprocessISelDAG() { SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index bb786e4b2bb40..5048a80fdd18f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -204,83 +204,6 @@ class RISCVDAGToDAGISelLegacy : public SelectionDAGISelLegacy { CodeGenOptLevel OptLevel); }; -namespace RISCV { -struct VLSEGPseudo { - uint16_t NF : 4; - uint16_t Masked : 1; - uint16_t Strided : 1; - uint16_t FF : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t Pseudo; -}; - -struct VLXSEGPseudo { - uint16_t NF : 4; - uint16_t Masked : 1; - uint16_t Ordered : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t IndexLMUL : 3; - uint16_t Pseudo; -}; - -struct VSSEGPseudo { - uint16_t NF : 4; - uint16_t Masked : 1; - uint16_t Strided : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t Pseudo; -}; - -struct VSXSEGPseudo { - uint16_t NF : 4; - uint16_t Masked : 1; - uint16_t Ordered : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t IndexLMUL : 3; - uint16_t Pseudo; -}; - -struct VLEPseudo { - uint16_t Masked : 1; - uint16_t Strided : 1; - uint16_t FF : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t Pseudo; -}; - -struct VSEPseudo { - uint16_t Masked :1; - uint16_t Strided : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t Pseudo; -}; - -struct VLX_VSXPseudo { - uint16_t Masked : 1; - uint16_t Ordered : 1; - uint16_t Log2SEW : 3; - uint16_t LMUL : 3; - uint16_t IndexLMUL : 3; - uint16_t Pseudo; -}; - -#define GET_RISCVVSSEGTable_DECL -#define GET_RISCVVLSEGTable_DECL -#define GET_RISCVVLXSEGTable_DECL -#define GET_RISCVVSXSEGTable_DECL -#define GET_RISCVVLETable_DECL -#define GET_RISCVVSETable_DECL -#define GET_RISCVVLXTable_DECL -#define GET_RISCVVSXTable_DECL -#include "RISCVGenSearchableTables.inc" -} // namespace RISCV - } // namespace llvm #endif From 0fb6a4050dafbd991262aa0b0bc750294b4cadcc Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Thu, 27 Feb 2025 21:49:19 -0600 Subject: [PATCH 043/123] [libc++] Fix the locale base API on Linux with musl (#128936) Since `363bfd6090b0 ([libc++] Use the new locale base API on Linux (#128007), 2025-02-24)`, musl targets will fail to build with errors due to missing strtoll_l functions. Co-authored-by: Pirama Arumuga Nainar --- libcxx/include/__locale_dir/support/linux.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libcxx/include/__locale_dir/support/linux.h b/libcxx/include/__locale_dir/support/linux.h index f1662c0112603..fa0b03c646a2a 100644 --- a/libcxx/include/__locale_dir/support/linux.h +++ b/libcxx/include/__locale_dir/support/linux.h @@ -95,12 +95,22 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __ } inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { +#if !_LIBCPP_HAS_MUSL_LIBC return ::strtoll_l(__nptr, __endptr, __base, __loc); +#else + (void)__loc; + return ::strtoll(__nptr, __endptr, __base); +#endif } inline _LIBCPP_HIDE_FROM_ABI unsigned long long __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { +#if !_LIBCPP_HAS_MUSL_LIBC return ::strtoull_l(__nptr, __endptr, __base, __loc); +#else + (void)__loc; + return ::strtoull(__nptr, __endptr, __base); +#endif } // From ddacd05b32df2ab3278099681f94414172f6ca7e Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 27 Feb 2025 20:07:04 -0800 Subject: [PATCH 044/123] [libc][bazel] Add py_binary rule to build hdrgen. (#129161) --- utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index b45145acc2cfe..cc0c7c65b736b 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -5,6 +5,7 @@ # LLVM libc project. load("@bazel_skylib//lib:selects.bzl", "selects") load("@bazel_skylib//rules:common_settings.bzl", "string_flag") +load("@rules_python//python:defs.bzl", "py_binary") load( ":libc_build_rules.bzl", "libc_function", @@ -51,6 +52,14 @@ config_setting( flag_values = {":mpfr": "system"}, ) +########################### Header Generation ################################## + +py_binary( + name = "hdrgen", + srcs = glob(["utils/hdrgen/hdrgen/**/*.py"]), + imports = ["utils/hdrgen"], +) + ################################## Base Config ################################# libc_support_library( From abe6e5b09beb96b839d77c9c4238b6fb96b985fb Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Thu, 27 Feb 2025 20:18:02 -0800 Subject: [PATCH 045/123] [clang-format] Change BracedInitializerIndentWidth to int (#128988) Fixes #108526 --- clang/docs/ClangFormatStyleOptions.rst | 4 ++-- clang/include/clang/Format/Format.h | 4 ++-- clang/lib/Format/ContinuationIndenter.cpp | 6 +++--- clang/lib/Format/Format.cpp | 2 +- clang/unittests/Format/ConfigParseTest.cpp | 10 +++++++--- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index d157c07c9cef8..9ecac68ae72bf 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -2582,9 +2582,9 @@ the configuration (without a prefix: ``Auto``). .. _BracedInitializerIndentWidth: -**BracedInitializerIndentWidth** (``Unsigned``) :versionbadge:`clang-format 17` :ref:`¶ ` +**BracedInitializerIndentWidth** (``Integer``) :versionbadge:`clang-format 17` :ref:`¶ ` The number of columns to use to indent the contents of braced init lists. - If unset, ``ContinuationIndentWidth`` is used. + If unset or negative, ``ContinuationIndentWidth`` is used. .. code-block:: c++ diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 46fb1d52701b3..fec47a248abb4 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -1289,7 +1289,7 @@ struct FormatStyle { BitFieldColonSpacingStyle BitFieldColonSpacing; /// The number of columns to use to indent the contents of braced init lists. - /// If unset, ``ContinuationIndentWidth`` is used. + /// If unset or negative, ``ContinuationIndentWidth`` is used. /// \code /// AlignAfterOpenBracket: AlwaysBreak /// BracedInitializerIndentWidth: 2 @@ -1319,7 +1319,7 @@ struct FormatStyle { /// } /// \endcode /// \version 17 - std::optional BracedInitializerIndentWidth; + int BracedInitializerIndentWidth; /// Different ways to wrap braces after control statements. enum BraceWrappingAfterControlStatementStyle : int8_t { diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index d49128c2b40f8..972dceb697a8b 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1921,9 +1921,9 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, NewIndent = Style.IndentWidth + std::min(State.Column, CurrentState.NestedBlockIndent); } else if (Current.is(tok::l_brace)) { - NewIndent = - CurrentState.LastSpace + Style.BracedInitializerIndentWidth.value_or( - Style.ContinuationIndentWidth); + const auto Width = Style.BracedInitializerIndentWidth; + NewIndent = CurrentState.LastSpace + + (Width < 0 ? Style.ContinuationIndentWidth : Width); } else { NewIndent = CurrentState.LastSpace + Style.ContinuationIndentWidth; } diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 68ef119fb4d65..92678a031178a 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1512,7 +1512,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.BinPackLongBracedList = true; LLVMStyle.BinPackParameters = FormatStyle::BPPS_BinPack; LLVMStyle.BitFieldColonSpacing = FormatStyle::BFCS_Both; - LLVMStyle.BracedInitializerIndentWidth = std::nullopt; + LLVMStyle.BracedInitializerIndentWidth = -1; LLVMStyle.BraceWrapping = {/*AfterCaseLabel=*/false, /*AfterClass=*/false, /*AfterControlStatement=*/FormatStyle::BWACS_Never, diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp index 9cd262960b724..273bab87b1ee1 100644 --- a/clang/unittests/Format/ConfigParseTest.cpp +++ b/clang/unittests/Format/ConfigParseTest.cpp @@ -265,9 +265,9 @@ TEST(ConfigParseTest, ParsesConfigurationIntegers) { Style.Language = FormatStyle::LK_Cpp; CHECK_PARSE_INT(AccessModifierOffset); + CHECK_PARSE_INT(BracedInitializerIndentWidth); CHECK_PARSE_INT(PPIndentWidth); - CHECK_PARSE_UNSIGNED(BracedInitializerIndentWidth); CHECK_PARSE_UNSIGNED(ColumnLimit); CHECK_PARSE_UNSIGNED(ConstructorInitializerIndentWidth); CHECK_PARSE_UNSIGNED(ContinuationIndentWidth); @@ -1441,8 +1441,10 @@ TEST(ConfigParseTest, GetStyleOfFile) { ASSERT_EQ(*Style9, SubSubStyle); // Test 9.8: use inheritance from a file without BasedOnStyle - ASSERT_TRUE(FS.addFile("/e/withoutbase/.clang-format", 0, - llvm::MemoryBuffer::getMemBuffer("ColumnLimit: 123"))); + ASSERT_TRUE(FS.addFile( + "/e/withoutbase/.clang-format", 0, + llvm::MemoryBuffer::getMemBuffer("BracedInitializerIndentWidth: 2\n" + "ColumnLimit: 123"))); ASSERT_TRUE( FS.addFile("/e/withoutbase/sub/.clang-format", 0, llvm::MemoryBuffer::getMemBuffer( @@ -1452,6 +1454,7 @@ TEST(ConfigParseTest, GetStyleOfFile) { ASSERT_TRUE(static_cast(Style9)); ASSERT_EQ(*Style9, [] { auto Style = getLLVMStyle(); + Style.BracedInitializerIndentWidth = 2; Style.ColumnLimit = 123; return Style; }()); @@ -1460,6 +1463,7 @@ TEST(ConfigParseTest, GetStyleOfFile) { ASSERT_TRUE(static_cast(Style9)); ASSERT_EQ(*Style9, [] { auto Style = getLLVMStyle(); + Style.BracedInitializerIndentWidth = 2; Style.ColumnLimit = 123; Style.IndentWidth = 7; return Style; From 9d9ac2162281fdf2f19b30ad30a0d01dde2d069c Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 28 Feb 2025 15:08:55 +1100 Subject: [PATCH 046/123] [JITLink][AArch64] Ensure that nulls remain null during ptrauth signing. Signing a null pointer value can, and usually will, result in some high bits being set, causing null checks to fail. E.g. in extern void __attribute__((weak_import)) f(void); void (*p) = &f; if f is undefined then p should be null (left unsigned). This patch updates lowerPointer64AuthEdgesToSigningFunction to check for Pointer64Authenticated edges to null targets. Where found, these edges are turned into plain Pointer64 edges (which we know from context will write a null value to the fixup location), and signing instructions for these locations are omitted from the signing function. --- llvm/lib/ExecutionEngine/JITLink/aarch64.cpp | 9 ++++- .../AArch64/MachO_ptrauth-null-global.s | 34 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 llvm/test/ExecutionEngine/JITLink/AArch64/MachO_ptrauth-null-global.s diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp index 8067e2a173a00..8ce7e74d67cde 100644 --- a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp @@ -324,11 +324,18 @@ Error lowerPointer64AuthEdgesToSigningFunction(LinkGraph &G) { uint64_t EncodedInfo = E.getAddend(); int32_t RealAddend = (uint32_t)(EncodedInfo & 0xffffffff); + auto ValueToSign = E.getTarget().getAddress() + RealAddend; + if (!ValueToSign) { + LLVM_DEBUG(dbgs() << " " << B->getFixupAddress(E) << " <- null\n"); + E.setAddend(RealAddend); + E.setKind(aarch64::Pointer64); + continue; + } + uint32_t InitialDiscriminator = (EncodedInfo >> 32) & 0xffff; bool AddressDiversify = (EncodedInfo >> 48) & 0x1; uint32_t Key = (EncodedInfo >> 49) & 0x3; uint32_t HighBits = EncodedInfo >> 51; - auto ValueToSign = E.getTarget().getAddress() + RealAddend; if (HighBits != 0x1000) return make_error( diff --git a/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_ptrauth-null-global.s b/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_ptrauth-null-global.s new file mode 100644 index 0000000000000..22ccc1630c63d --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_ptrauth-null-global.s @@ -0,0 +1,34 @@ +# RUN: llvm-mc -triple=arm64e-apple-macosx -filetype=obj -o %t.o %s +# RUN: llvm-jitlink %t.o +# +# REQUIRES: system-darwin && host=arm64{{.*}} +# +# Check that arm64e ptrauth pass preserves nulls. +# +# Testcase derived from: +# extern void __attribute__((weak_import)) f(void); +# void (*p) = &f; +# +# int main(int argc, char *argv[]) { +# return p ? 1 : 0; +# } + + .section __TEXT,__text,regular,pure_instructions + .globl _main + .p2align 2 +_main: + adrp x8, _p@PAGE + ldr x8, [x8, _p@PAGEOFF] + cmp x8, #0 + cset w0, ne + ret + + .section __DATA,__data + .globl _p + .p2align 3, 0x0 +_p: + .quad _f@AUTH(ia,0) + + .weak_reference _f + .weak_reference l_f.ptrauth +.subsections_via_symbols From 4af9b19e6d412baa3a1b494034562f04229dbdfe Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 27 Feb 2025 23:28:48 -0500 Subject: [PATCH 047/123] [Clang][AMDGPU] Use 32-bit index for SWMMAC builtins (#129101) Currently, the index of SWMMAC builtins is of type `short`, likely based on the assumption that K can only be up to 32, meaning there are only 16 non-zero elements. However, this is not future-proof. This patch updates all of them to `int`. The intrinsics themselves don't need to be updated since they accept any integer type, and in the backend, they are already extended to 32-bit. Additionally, the tests already use various kinds of integers. Partially fixes SWDEV-518183. --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 46 +++++++++---------- .../builtins-amdgcn-swmmac-w32.cl | 44 +++++++++--------- .../builtins-amdgcn-swmmac-w64.cl | 44 +++++++++--------- .../amdgpu/builtins-amdgcn-swmmac-w32.cl | 22 ++++----- 4 files changed, 78 insertions(+), 78 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 39e295aced96b..6d00862dde5ed 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -544,29 +544,29 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12, "V4fiiV4f", TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64") TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12, "V4iIbiIbiV4iIb", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32, "V8fV8hV16hV8fs", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32, "V8fV8sV16sV8fs", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32, "V8hV8hV16hV8hs", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32, "V8sV8sV16sV8ss", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32, "V8iIbV2iIbV4iV8isIb", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32, "V8iIbiIbV2iV8isIb", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32, "V8iIbV2iIbV4iV8isIb", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32, "V8fV2iV4iV8fs", "nc", "gfx12-insts,wavefrontsize32") - -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64, "V4fV4hV8hV4fs", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64, "V4fV4sV8sV4fs", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64, "V4hV4hV8hV4hs", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64, "V4sV4sV8sV4ss", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64, "V4iIbiIbV2iV4isIb", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64, "V4iIbiIbiV4isIb", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64, "V4iIbiIbV2iV4isIb", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") -TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32, "V8fV8hV16hV8fi", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32, "V8fV8sV16sV8fi", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32, "V8hV8hV16hV8hi", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32, "V8sV8sV16sV8si", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32, "V8iIbV2iIbV4iV8iiIb", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32, "V8iIbiIbV2iV8iiIb", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32, "V8iIbV2iIbV4iV8iiIb", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32, "V8fV2iV4iV8fi", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32, "V8fV2iV4iV8fi", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32, "V8fV2iV4iV8fi", "nc", "gfx12-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32, "V8fV2iV4iV8fi", "nc", "gfx12-insts,wavefrontsize32") + +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64, "V4fV4hV8hV4fi", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64, "V4fV4sV8sV4fi", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64, "V4hV4hV8hV4hi", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64, "V4sV4sV8sV4si", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64, "V4iIbiIbV2iV4iiIb", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64, "V4iIbiIbiV4iiIb", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64, "V4iIbiIbV2iV4iiIb", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64, "V4fiV2iV4fi", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fi", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, "V4fiV2iV4fi", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, "V4fiV2iV4fi", "nc", "gfx12-insts,wavefrontsize64") TARGET_BUILTIN(__builtin_amdgcn_prng_b32, "UiUi", "nc", "prng-inst") TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_fp6_f16, "V6UiV32hf", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts") diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl index 56495c85bf1fd..9927bb334c486 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32.cl @@ -15,121 +15,121 @@ typedef short v16s __attribute__((ext_vector_type(16))); // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_f16_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i16(<8 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i32(<8 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4:![0-9]+]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf16_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i16(<8 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i32(<8 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f16_16x16x32_f16_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i16(<8 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x half> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i32(<8 x half> [[A:%.*]], <16 x half> [[B:%.*]], <8 x half> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h c, short index) +void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h c, int index) { *out = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_bf16_16x16x32_bf16_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i16(<8 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i32(<8 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v8s c, short index) +void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v8s c, int index) { *out = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu8_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i16(i1 true, <2 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i16 [[INDEX:%.*]], i1 true) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) // CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i c, short index) +void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32(true, a, true, b, c, index, true); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu4_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i16(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i16 [[INDEX:%.*]], i1 true) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i32(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) // CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i c, short index) +void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32(true, a, true, b, c, index, true); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x64_iu4_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i16(i1 true, <2 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i16 [[INDEX:%.*]], i1 true) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <4 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) // CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i c, short index) +void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32(true, a, true, b, c, index, true); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i32(<2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(a, b, c, index); } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl index 89b26edb2f02b..eaa6b14d2a792 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64.cl @@ -14,121 +14,121 @@ typedef short v8s __attribute__((ext_vector_type(8))); // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_f16_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i16(<4 x half> [[A:%.*]], <8 x half> [[B:%.*]], <4 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i32(<4 x half> [[A:%.*]], <8 x half> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4:![0-9]+]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_f16_w64(global v4f* out, v4h a, v8h b, v4f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_f16_w64(global v4f* out, v4h a, v8h b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf16_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i16(<4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i32(<4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_bf16_w64(global v4f* out, v4s a, v8s b, v4f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf16_w64(global v4f* out, v4s a, v8s b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f16_16x16x32_f16_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i16(<4 x half> [[A:%.*]], <8 x half> [[B:%.*]], <4 x half> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i32(<4 x half> [[A:%.*]], <8 x half> [[B:%.*]], <4 x half> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 8, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f16_16x16x32_f16_w64(global v4h* out, v4h a, v8h b, v4h c, short index) +void test_amdgcn_swmmac_f16_16x16x32_f16_w64(global v4h* out, v4h a, v8h b, v4h c, int index) { *out = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_bf16_16x16x32_bf16_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i16(<4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i32(<4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 8, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_bf16_16x16x32_bf16_w64(global v4s* out, v4s a, v8s b, v4s c, short index) +void test_amdgcn_swmmac_bf16_16x16x32_bf16_w64(global v4s* out, v4s a, v8s b, v4s c, int index) { *out = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu8_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i16(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i16 [[INDEX:%.*]], i1 true) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i32(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) // CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_i32_16x16x32_iu8_w64(global v4i* out, int a, v2i b, v4i c, short index) +void test_amdgcn_swmmac_i32_16x16x32_iu8_w64(global v4i* out, int a, v2i b, v4i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64(true, a, true, b, c, index, true); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x32_iu4_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i16(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i16 [[INDEX:%.*]], i1 true) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) // CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_i32_16x16x32_iu4_w64(global v4i* out, int a, int b, v4i c, short index) +void test_amdgcn_swmmac_i32_16x16x32_iu4_w64(global v4i* out, int a, int b, v4i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64(true, a, true, b, c, index, true); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_i32_16x16x64_iu4_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i16(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i16 [[INDEX:%.*]], i1 true) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i32(i1 true, i32 [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 true) // CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_i32_16x16x64_iu4_w64(global v4i* out, int a, v2i b, v4i c, short index) +void test_amdgcn_swmmac_i32_16x16x64_iu4_w64(global v4i* out, int a, v2i b, v4i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64(true, a, true, b, c, index, true); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i16(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(global v4f* out, int a, v2i b, v4f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(global v4f* out, int a, v2i b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i16(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(global v4f* out, int a, v2i b, v4f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(global v4f* out, int a, v2i b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i16(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(global v4f* out, int a, v2i b, v4f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(global v4f* out, int a, v2i b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(a, b, c, index); } // CHECK-GFX1200-LABEL: @test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64( // CHECK-GFX1200-NEXT: entry: -// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i16(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i16 [[INDEX:%.*]]) +// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i32(i32 [[A:%.*]], <2 x i32> [[B:%.*]], <4 x float> [[C:%.*]], i32 [[INDEX:%.*]]) // CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] // CHECK-GFX1200-NEXT: ret void // -void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(global v4f* out, int a, v2i b, v4f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(global v4f* out, int a, v2i b, v4f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(a, b, c, index); } diff --git a/cross-project-tests/amdgpu/builtins-amdgcn-swmmac-w32.cl b/cross-project-tests/amdgpu/builtins-amdgcn-swmmac-w32.cl index 317d9a1102ccf..e6adc7bea525c 100644 --- a/cross-project-tests/amdgpu/builtins-amdgcn-swmmac-w32.cl +++ b/cross-project-tests/amdgpu/builtins-amdgcn-swmmac-w32.cl @@ -15,7 +15,7 @@ typedef short v16s __attribute__((ext_vector_type(16))); // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f32_16x16x32_f16_w32: // CHECK-GFX1200: v_swmmac_f32_16x16x32_f16 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32(a, b, c, index); } @@ -24,7 +24,7 @@ void test_amdgcn_swmmac_f32_16x16x32_f16_w32(global v8f* out, v8h a, v16h b, v8f // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f32_16x16x32_bf16_w32: // CHECK-GFX1200: v_swmmac_f32_16x16x32_bf16 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32(a, b, c, index); } @@ -33,7 +33,7 @@ void test_amdgcn_swmmac_f32_16x16x32_bf16_w32(global v8f* out, v8s a, v16s b, v8 // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f16_16x16x32_f16_w32: // CHECK-GFX1200: v_swmmac_f16_16x16x32_f16 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h c, short index) +void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h c, int index) { *out = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32(a, b, c, index); } @@ -42,7 +42,7 @@ void test_amdgcn_swmmac_f16_16x16x32_f16_w32(global v8h* out, v8h a, v16h b, v8h // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_bf16_16x16x32_bf16_w32: // CHECK-GFX1200: v_swmmac_bf16_16x16x32_bf16 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v8s c, short index) +void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v8s c, int index) { *out = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32(a, b, c, index); } @@ -51,7 +51,7 @@ void test_amdgcn_swmmac_bf16_16x16x32_bf16_w32(global v8s* out, v8s a, v16s b, v // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_i32_16x16x32_iu8_w32: // CHECK-GFX1200: v_swmmac_i32_16x16x32_iu8 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} neg_lo:[1,1,0] clamp // -void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i c, short index) +void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32(true, a, true, b, c, index, true); } @@ -60,7 +60,7 @@ void test_amdgcn_swmmac_i32_16x16x32_iu8_w32(global v8i* out, v2i a, v4i b, v8i // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_i32_16x16x32_iu4_w32: // CHECK-GFX1200: v_swmmac_i32_16x16x32_iu4 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} neg_lo:[1,1,0] clamp // -void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i c, short index) +void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32(true, a, true, b, c, index, true); } @@ -69,7 +69,7 @@ void test_amdgcn_swmmac_i32_16x16x32_iu4_w32(global v8i* out, int a, v2i b, v8i // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_i32_16x16x64_iu4_w32: // CHECK-GFX1200: v_swmmac_i32_16x16x64_iu4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} neg_lo:[1,1,0] clamp // -void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i c, short index) +void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i c, int index) { *out = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32(true, a, true, b, c, index, true); } @@ -78,7 +78,7 @@ void test_amdgcn_swmmac_i32_16x16x64_iu4_w32(global v8i* out, v2i a, v4i b, v8i // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32: // CHECK-GFX1200: v_swmmac_f32_16x16x32_fp8_fp8 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(a, b, c, index); } @@ -87,7 +87,7 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(global v8f* out, v2i a, v4i b, // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32: // CHECK-GFX1200: v_swmmac_f32_16x16x32_fp8_bf8 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(a, b, c, index); } @@ -96,7 +96,7 @@ void test_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(global v8f* out, v2i a, v4i b, // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32: // CHECK-GFX1200: v_swmmac_f32_16x16x32_bf8_fp8 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(a, b, c, index); } @@ -104,7 +104,7 @@ void test_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(global v8f* out, v2i a, v4i b, // CHECK-GFX1200-LABEL: test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32: // CHECK-GFX1200: v_swmmac_f32_16x16x32_bf8_bf8 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} // -void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, short index) +void test_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(global v8f* out, v2i a, v4i b, v8f c, int index) { *out = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(a, b, c, index); } From 686f4cb413410022b68a5da2f88a947934628a83 Mon Sep 17 00:00:00 2001 From: Kai Sasaki Date: Fri, 28 Feb 2025 13:37:06 +0900 Subject: [PATCH 048/123] [mlir][math] Rsqrt math expand pass expects static shaped operand (#129006) Similar to the issue reported in https://github.com/llvm/llvm-project/pull/128299#pullrequestreview-2636142506, ExpandMath pattern for rsqrt expects the static shaped operands. Otherwise, it crashes due to the assertion violation. See: https://github.com/llvm/llvm-project/pull/128299 --- .../Math/Transforms/ExpandPatterns.cpp | 5 ++++ mlir/test/Dialect/Math/expand-math.mlir | 26 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp b/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp index bb592c667549c..7b5350ca26b60 100644 --- a/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp +++ b/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp @@ -646,6 +646,11 @@ static LogicalResult convertRsqrtOp(math::RsqrtOp op, auto operand = op.getOperand(); auto operandTy = operand.getType(); + // Operand type must be shatic shaped type to create const float. + auto shapedOperandType = dyn_cast(operandTy); + if (shapedOperandType && !shapedOperandType.hasStaticShape()) + return failure(); + auto eTy = getElementTypeOrSelf(operandTy); if (!isa(eTy)) return failure(); diff --git a/mlir/test/Dialect/Math/expand-math.mlir b/mlir/test/Dialect/Math/expand-math.mlir index 946a411e4cc4b..1420acaa40d35 100644 --- a/mlir/test/Dialect/Math/expand-math.mlir +++ b/mlir/test/Dialect/Math/expand-math.mlir @@ -787,3 +787,29 @@ func.func @unranked_ceil_op(%arg: tensor<*xf32>) -> tensor<*xf32>{ %a = math.ceil %arg : tensor<*xf32> return %a: tensor<*xf32> } + +// ----- + +// CHECK-LABEL: func.func @non_static_shape_rsqrt_op +// CHECK-SAME: (%[[ARG:.*]]: tensor) +// CHECK-SAME: -> tensor +// CHECK: %[[RSQRT:.*]] = math.rsqrt %[[ARG]] : tensor +// CHECK: return %[[RSQRT]] : tensor + +func.func @non_static_shape_rsqrt_op(%arg: tensor) -> tensor{ + %a = math.rsqrt %arg : tensor + return %a: tensor +} + +// ----- + +// CHECK-LABEL: func.func @unranked_rsqrt_op +// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) +// CHECK-SAME: -> tensor<*xf32> +// CHECK: %[[RSQRT:.*]] = math.rsqrt %[[ARG]] : tensor<*xf32> +// CHECK: return %[[RSQRT]] : tensor<*xf32> + +func.func @unranked_rsqrt_op(%arg: tensor<*xf32>) -> tensor<*xf32>{ + %a = math.rsqrt %arg : tensor<*xf32> + return %a: tensor<*xf32> +} From f17066e1bec2e87ba7a5e7bddadee74de85ff255 Mon Sep 17 00:00:00 2001 From: Kareem Ergawy Date: Fri, 28 Feb 2025 05:46:03 +0100 Subject: [PATCH 049/123] [flang][OpenMP] Add `reduction` clause support to `loop` directive (#128849) Extends `loop` directive transformation by adding support for the `reduction` clause. --- .../OpenMP/GenericLoopConversion.cpp | 65 +++++++++++++++---- flang/test/Lower/OpenMP/loop-directive.f90 | 45 ++++++++++++- .../generic-loop-rewriting-todo.mlir | 16 +---- 3 files changed, 98 insertions(+), 28 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp index bf94166edc079..b0014a3aced6b 100644 --- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp @@ -15,6 +15,8 @@ #include "mlir/Transforms/DialectConversion.h" #include +#include +#include namespace flangomp { #define GEN_PASS_DEF_GENERICLOOPCONVERSIONPASS @@ -58,7 +60,7 @@ class GenericLoopConversionPattern if (teamsLoopCanBeParallelFor(loopOp)) rewriteToDistributeParallelDo(loopOp, rewriter); else - rewriteToDistrbute(loopOp, rewriter); + rewriteToDistribute(loopOp, rewriter); break; } @@ -77,9 +79,6 @@ class GenericLoopConversionPattern if (loopOp.getOrder()) return todo("order"); - if (!loopOp.getReductionVars().empty()) - return todo("reduction"); - return mlir::success(); } @@ -168,7 +167,7 @@ class GenericLoopConversionPattern case ClauseBindKind::Parallel: return rewriteToWsloop(loopOp, rewriter); case ClauseBindKind::Teams: - return rewriteToDistrbute(loopOp, rewriter); + return rewriteToDistribute(loopOp, rewriter); case ClauseBindKind::Thread: return rewriteToSimdLoop(loopOp, rewriter); } @@ -211,8 +210,9 @@ class GenericLoopConversionPattern loopOp, rewriter); } - void rewriteToDistrbute(mlir::omp::LoopOp loopOp, - mlir::ConversionPatternRewriter &rewriter) const { + void rewriteToDistribute(mlir::omp::LoopOp loopOp, + mlir::ConversionPatternRewriter &rewriter) const { + assert(loopOp.getReductionVars().empty()); rewriteToSingleWrapperOp(loopOp, rewriter); } @@ -246,6 +246,12 @@ class GenericLoopConversionPattern Fortran::common::openmp::EntryBlockArgs args; args.priv.vars = clauseOps.privateVars; + if constexpr (!std::is_same_v) { + populateReductionClauseOps(loopOp, clauseOps); + args.reduction.vars = clauseOps.reductionVars; + } + auto wrapperOp = rewriter.create(loopOp.getLoc(), clauseOps); mlir::Block *opBlock = genEntryBlock(rewriter, args, wrapperOp.getRegion()); @@ -275,8 +281,7 @@ class GenericLoopConversionPattern auto parallelOp = rewriter.create(loopOp.getLoc(), parallelClauseOps); - mlir::Block *parallelBlock = - genEntryBlock(rewriter, parallelArgs, parallelOp.getRegion()); + genEntryBlock(rewriter, parallelArgs, parallelOp.getRegion()); parallelOp.setComposite(true); rewriter.setInsertionPoint( rewriter.create(loopOp.getLoc())); @@ -288,20 +293,54 @@ class GenericLoopConversionPattern rewriter.createBlock(&distributeOp.getRegion()); mlir::omp::WsloopOperands wsloopClauseOps; + populateReductionClauseOps(loopOp, wsloopClauseOps); + Fortran::common::openmp::EntryBlockArgs wsloopArgs; + wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; + auto wsloopOp = rewriter.create(loopOp.getLoc(), wsloopClauseOps); wsloopOp.setComposite(true); - rewriter.createBlock(&wsloopOp.getRegion()); + genEntryBlock(rewriter, wsloopArgs, wsloopOp.getRegion()); mlir::IRMapping mapper; - mlir::Block &loopBlock = *loopOp.getRegion().begin(); - for (auto [loopOpArg, parallelOpArg] : llvm::zip_equal( - loopBlock.getArguments(), parallelBlock->getArguments())) + auto loopBlockInterface = + llvm::cast(*loopOp); + auto parallelBlockInterface = + llvm::cast(*parallelOp); + auto wsloopBlockInterface = + llvm::cast(*wsloopOp); + + for (auto [loopOpArg, parallelOpArg] : + llvm::zip_equal(loopBlockInterface.getPrivateBlockArgs(), + parallelBlockInterface.getPrivateBlockArgs())) mapper.map(loopOpArg, parallelOpArg); + for (auto [loopOpArg, wsloopOpArg] : + llvm::zip_equal(loopBlockInterface.getReductionBlockArgs(), + wsloopBlockInterface.getReductionBlockArgs())) + mapper.map(loopOpArg, wsloopOpArg); + rewriter.clone(*loopOp.begin(), mapper); } + + void + populateReductionClauseOps(mlir::omp::LoopOp loopOp, + mlir::omp::ReductionClauseOps &clauseOps) const { + clauseOps.reductionMod = loopOp.getReductionModAttr(); + clauseOps.reductionVars = loopOp.getReductionVars(); + + std::optional reductionSyms = loopOp.getReductionSyms(); + if (reductionSyms) + clauseOps.reductionSyms.assign(reductionSyms->begin(), + reductionSyms->end()); + + std::optional> reductionByref = + loopOp.getReductionByref(); + if (reductionByref) + clauseOps.reductionByref.assign(reductionByref->begin(), + reductionByref->end()); + } }; class GenericLoopConversionPass diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90 index ffa4a6ff24f24..795f2a440fd0d 100644 --- a/flang/test/Lower/OpenMP/loop-directive.f90 +++ b/flang/test/Lower/OpenMP/loop-directive.f90 @@ -75,7 +75,7 @@ subroutine test_order() subroutine test_reduction() integer :: i, dummy = 1 - ! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction + ! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction ! CHECK-SAME: (@[[RED]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]] : !{{.*}}) { ! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} { ! CHECK: %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_reductionEdummy"} @@ -294,3 +294,46 @@ subroutine teams_loop_cannot_be_parallel_for_4 !$omp end parallel END DO end subroutine + +! CHECK-LABEL: func.func @_QPloop_parallel_bind_reduction +subroutine loop_parallel_bind_reduction + implicit none + integer :: x, i + + ! CHECK: omp.wsloop + ! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref) + ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref) { + ! CHECK-NEXT: omp.loop_nest {{.*}} { + ! CHECK-NEXT: hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"} + ! CHECK-NEXT: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"} + ! CHECK: } + ! CHECK: } + !$omp loop bind(parallel) reduction(+: x) + do i = 0, 10 + x = x + i + end do +end subroutine + +! CHECK-LABEL: func.func @_QPloop_teams_loop_reduction +subroutine loop_teams_loop_reduction + implicit none + integer :: x, i + ! CHECK: omp.teams { + ! CHECK: omp.parallel + ! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref) { + ! CHECK: omp.distribute { + ! CHECK: omp.wsloop + ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref) { + ! CHECK-NEXT: omp.loop_nest {{.*}} { + ! CHECK-NEXT: hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"} + ! CHECK-NEXT: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"} + ! CHECK: } + ! CHECK: } + ! CHECK: } + ! CHECK: } + ! CHECK: } + !$omp teams loop reduction(+: x) + do i = 0, 10 + x = x + i + end do +end subroutine diff --git a/flang/test/Transforms/generic-loop-rewriting-todo.mlir b/flang/test/Transforms/generic-loop-rewriting-todo.mlir index e992296c9a837..64094d61eb9a3 100644 --- a/flang/test/Transforms/generic-loop-rewriting-todo.mlir +++ b/flang/test/Transforms/generic-loop-rewriting-todo.mlir @@ -1,24 +1,12 @@ // RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s - -omp.declare_reduction @add_reduction_i32 : i32 init { - ^bb0(%arg0: i32): - %c0_i32 = arith.constant 0 : i32 - omp.yield(%c0_i32 : i32) - } combiner { - ^bb0(%arg0: i32, %arg1: i32): - %0 = arith.addi %arg0, %arg1 : i32 - omp.yield(%0 : i32) - } - func.func @_QPloop_order() { omp.teams { %c0 = arith.constant 0 : i32 %c10 = arith.constant 10 : i32 %c1 = arith.constant 1 : i32 - %sum = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_orderEi"} - // expected-error@below {{not yet implemented: Unhandled clause reduction in omp.loop operation}} - omp.loop reduction(@add_reduction_i32 %sum -> %arg2 : !fir.ref) { + // expected-error@below {{not yet implemented: Unhandled clause order in omp.loop operation}} + omp.loop order(reproducible:concurrent) { omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) { omp.yield } From f327c41e3dcd82e656195226ecdbd340015d91cb Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 27 Feb 2025 20:59:08 -0800 Subject: [PATCH 050/123] [Attributor][NFC] Clang format (#129163) --- llvm/include/llvm/Transforms/IPO/Attributor.h | 3 ++- llvm/lib/Transforms/IPO/Attributor.cpp | 3 ++- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 11 +++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 995ee54a73ce4..05951f87b5062 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1208,7 +1208,8 @@ struct InformationCache { TargetTriple(M.getTargetTriple()) { if (UseExplorer) Explorer = new (Allocator) MustBeExecutedContextExplorer( - /* ExploreInterBlock */ true, /* ExploreCFGForward */ true, + /* ExploreInterBlock */ + true, /* ExploreCFGForward */ true, /* ExploreCFGBackward */ true, /* LIGetter */ [&](const Function &F) { return AG.getAnalysis(F); }, diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index a1e1a51b201b0..48725a5505514 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1478,7 +1478,8 @@ bool Attributor::getAssumedSimplifiedValues( // AAPotentialValues. const auto *PotentialValuesAA = getOrCreateAAFor(IRP, AA, DepClassTy::OPTIONAL); - if (PotentialValuesAA && PotentialValuesAA->getAssumedSimplifiedValues(*this, Values, S)) { + if (PotentialValuesAA && + PotentialValuesAA->getAssumedSimplifiedValues(*this, Values, S)) { UsedAssumedInformation |= !PotentialValuesAA->isAtFixpoint(); } else if (IRP.getPositionKind() != IRPosition::IRP_RETURNED) { Values.push_back({IRP.getAssociatedValue(), IRP.getCtxI()}); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index c1dd8bc393f33..60aa758917b0b 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -129,10 +129,7 @@ STATISTIC(NumIndirectCallsPromoted, "Number of indirect calls promoted"); STATS_DECL_(BUILD_STAT_NAME(NAME, TYPE), MSG); #define STATS_TRACK(NAME, TYPE) ++(BUILD_STAT_NAME(NAME, TYPE)); #define STATS_DECLTRACK(NAME, TYPE, MSG) \ - { \ - STATS_DECL(NAME, TYPE, MSG) \ - STATS_TRACK(NAME, TYPE) \ - } + {STATS_DECL(NAME, TYPE, MSG) STATS_TRACK(NAME, TYPE)} #define STATS_DECLTRACK_ARG_ATTR(NAME) \ STATS_DECLTRACK(NAME, Arguments, BUILD_STAT_MSG_IR_ATTR(arguments, NAME)) #define STATS_DECLTRACK_CSARG_ATTR(NAME) \ @@ -2420,7 +2417,7 @@ struct AANoFreeCallSiteArgument final : AANoFreeFloating { } /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nofree)}; + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nofree) }; }; /// NoFree attribute for function return value. @@ -6078,7 +6075,9 @@ struct AANoCaptureCallSiteArgument final : AANoCaptureImpl { } /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nocapture)}; + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(nocapture) + }; }; /// NoCapture attribute for floating values. From b8512846dd9804434ace953d27425e5342b314dd Mon Sep 17 00:00:00 2001 From: Arnab Dutta <85476402+arnab-polymage@users.noreply.github.com> Date: Fri, 28 Feb 2025 10:45:38 +0530 Subject: [PATCH 051/123] [MLIR][Tensor] Enhance bufferization of tensor.expand_shape op (#128871) Instead of inferring the output shape argument of memref.expand_shape op, use output_shape argument of tensor.expand_shape op by adding dynamic dimension support for bufferization of tensor.expand_shape when there are more than one dynamic dim within a reassociation set. --- .../BufferizableOpInterfaceImpl.cpp | 14 ++++----- mlir/test/Dialect/Tensor/bufferize.mlir | 31 ++++++++++++------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp index 81404fa664cd4..a9ba662348a52 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp @@ -337,14 +337,12 @@ struct ExpandShapeOpInterface if (failed(buffer)) return failure(); - // Memref result type is inferred by the builder based on reassociation - // indices and result shape. - // TODO: Instead of inferring the output shape argument of - // memref.expand_shape op, use output_shape argument of tensor.expand_shape - // op. - replaceOpWithNewBufferizedOp( - rewriter, op, tensorResultType.getShape(), *buffer, - expandShapeOp.getReassociationIndices()); + auto memrefExpandShape = rewriter.create( + op->getLoc(), tensorResultType.getShape(), *buffer, + expandShapeOp.getReassociationIndices(), + expandShapeOp.getMixedOutputShape()); + replaceOpWithBufferizedValues(rewriter, op, + memrefExpandShape->getResults()); return success(); } }; diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir index 9ea0a15f31185..c1beed95f2006 100644 --- a/mlir/test/Dialect/Tensor/bufferize.mlir +++ b/mlir/test/Dialect/Tensor/bufferize.mlir @@ -366,14 +366,10 @@ func.func @tensor.insert(%t1: tensor<5xf32>, %idx1: index, %f: f32) -> tensor<5x // ----- // CHECK-LABEL: func @tensor.expand_shape( -// CHECK-SAME: %[[t1:.*]]: tensor +// CHECK-SAME: %[[t1:.*]]: tensor, %[[sz0:.*]]: index func.func @tensor.expand_shape(%t1: tensor, %sz0: index) -> tensor<2x?x10xf32> { // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[DIM:.*]] = memref.dim %[[m1]], %[[C0]] : memref - // CHECK: %[[C2:.*]] = arith.constant 2 : index - // CHECK: %[[VAL_1:.*]] = arith.divsi %[[DIM]], %[[C2]] : index - // CHECK: %[[expanded:.*]] = memref.expand_shape %[[m1]] {{\[\[}}0, 1], [2]] output_shape [2, %[[VAL_1]], 10] : memref into memref<2x?x10xf32> + // CHECK: %[[expanded:.*]] = memref.expand_shape %[[m1]] {{\[\[}}0, 1], [2]] output_shape [2, %[[sz0]], 10] : memref into memref<2x?x10xf32> %0 = tensor.expand_shape %t1 [[0, 1], [2]] output_shape [2, %sz0, 10] : tensor into tensor<2x?x10xf32> @@ -385,23 +381,20 @@ func.func @tensor.expand_shape(%t1: tensor, %sz0: index) -> tensor<2x? // ----- // CHECK-LABEL: func @tensor.expand_shape_of_slice( -// CHECK-SAME: %[[t1:.*]]: tensor +// CHECK-SAME: %[[t1:.*]]: tensor, %{{.*}}: index, %{{.*}}: index, %[[sz0:.*]]: index func.func @tensor.expand_shape_of_slice( %t1: tensor, %o1: index, %s1: index, %sz0: index) -> tensor { // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : // CHECK: %[[subview:.*]] = memref.subview %[[m1]][%{{.*}}, 5] [%{{.*}}, 10] [1, 1] : memref to memref> %0 = tensor.extract_slice %t1[%o1, 5][%s1, 10][1, 1] : tensor to tensor - // CHECK: %[[C7:.*]] = arith.constant 7 : index - // CHECK: %[[VAL_1:.*]] = arith.divsi %{{.*}}, %[[C7]] : index - // CHECK: %[[expanded:.*]] = memref.expand_shape %[[subview]] {{\[\[}}0, 1], [2, 3]] output_shape [%[[VAL_1]], 7, 2, 5] : memref> into memref> + // CHECK: %[[expanded:.*]] = memref.expand_shape %[[subview]] {{\[\[}}0, 1], [2, 3]] output_shape [%[[sz0]], 7, 2, 5] : memref> into memref> %1 = tensor.expand_shape %0 [[0, 1], [2, 3]] output_shape [%sz0, 7, 2, 5] : tensor into tensor // CHECK: %[[r:.*]] = bufferization.to_tensor %[[expanded]] // CHECK: return %[[r]] return %1 : tensor } - // ----- // CHECK-LABEL: func @tensor.expand_shape_of_scalar_slice( @@ -417,7 +410,20 @@ func.func @tensor.expand_shape_of_scalar_slice( // CHECK: return %[[r]] return %1 : tensor<1xf32> } +// ----- +// CHECK-LABEL: func @tensor.expand_shape_multiple_dynamic_indices( +// CHECK-SAME: %[[t1:.*]]: tensor, %[[sz0:.*]]: index, %[[sz1:.*]]: index, %[[sz2:.*]]: index +func.func @tensor.expand_shape_multiple_dynamic_indices(%t1: tensor, %sz0: index, %sz1: index, %sz2: index) -> tensor { + // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] + // CHECK: %[[expanded:.*]] = memref.expand_shape %[[m1]] {{\[\[}}0, 1, 2], [3]] output_shape [%[[sz0]], %[[sz1]], %[[sz2]], 256] : memref into memref + %0 = tensor.expand_shape %t1 [[0, 1, 2], [3]] output_shape [%sz0, %sz1, %sz2, 256] + : tensor into tensor + + // CHECK: %[[r:.*]] = bufferization.to_tensor %[[expanded]] + // CHECK: return %[[r]] + return %0 : tensor +} // ----- // CHECK-LABEL: func @tensor.collapse_shape( @@ -646,3 +652,6 @@ func.func @parallel_insert_slice_copy_before_write(%in: tensor<4xf32>, %out: ten // CHECK: } return } + +// ----- + From 16a11e6ab7afa8c124e824316171232edcb64312 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Fri, 28 Feb 2025 13:33:31 +0800 Subject: [PATCH 052/123] [Driver] [C++20] [Modules] Warning for the surprising useless case for reduced BMI Found in downstream. I didn't realize the output file for precompile and reduced BMI refers to the same location. Then the generating process of reduced BMI is basically a waste of time. --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 5 +++++ clang/lib/Driver/ToolChains/Clang.cpp | 10 +++++++++- clang/test/Driver/module-fgen-reduced-bmi.cppm | 10 ++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 8f532a63f9e04..058fecd4e91ef 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -563,6 +563,11 @@ def err_test_module_file_extension_format : Error< def err_drv_module_output_with_multiple_arch : Error< "option '-fmodule-output' cannot be used with multiple arch options">; +def err_drv_reduced_module_output_overrided : Warning< + "the implicit output of reduced BMI may be overrided by the output file specified by '--precompile'. " + "please consider use '-fmodule-output=' to specify the output file for reduced BMI explicitly">, + InGroup>; + def warn_drv_delayed_template_parsing_after_cxx20 : Warning< "-fdelayed-template-parsing is deprecated after C++20">, InGroup>; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 86db3f7678436..4ebbd241d2f0b 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4250,10 +4250,18 @@ static bool RenderModulesOptions(Compilation &C, const Driver &D, if (Args.hasArg(options::OPT_fmodule_output_EQ)) Args.AddLastArg(CmdArgs, options::OPT_fmodule_output_EQ); - else + else { + if (Args.hasArg(options::OPT__precompile) && + (!Args.hasArg(options::OPT_o) || + Args.getLastArg(options::OPT_o)->getValue() == + getCXX20NamedModuleOutputPath(Args, Input.getBaseInput()))) { + D.Diag(diag::err_drv_reduced_module_output_overrided); + } + CmdArgs.push_back(Args.MakeArgString( "-fmodule-output=" + getCXX20NamedModuleOutputPath(Args, Input.getBaseInput()))); + } } // Noop if we see '-fmodules-reduced-bmi' with other translation diff --git a/clang/test/Driver/module-fgen-reduced-bmi.cppm b/clang/test/Driver/module-fgen-reduced-bmi.cppm index 7329c12941d73..9bdd4c9f6682f 100644 --- a/clang/test/Driver/module-fgen-reduced-bmi.cppm +++ b/clang/test/Driver/module-fgen-reduced-bmi.cppm @@ -48,6 +48,14 @@ // RUN: %clang -std=c++20 Hello.cppm --precompile -fmodules-reduced-bmi \ // RUN: -o Hello.full.pcm -### 2>&1 | FileCheck Hello.cppm \ // RUN: --check-prefix=CHECK-EMIT-MODULE-INTERFACE + +// RUN: %clang -std=c++20 Hello.cppm --precompile -fmodules-reduced-bmi \ +// RUN: -### 2>&1 | FileCheck Hello.cppm \ +// RUN: --check-prefix=CHECK-OVERRIDE-WARN + +// RUN: %clang -std=c++20 Hello.cppm --precompile -fmodules-reduced-bmi \ +// RUN: -o Hello.pcm -### 2>&1 | FileCheck Hello.cppm \ +// RUN: --check-prefix=CHECK-OVERRIDE-WARN // // RUN: %clang -std=c++20 Hello.cc -fmodules-reduced-bmi -Wall -Werror \ // RUN: -c -o Hello.o -### 2>&1 | FileCheck Hello.cc @@ -74,6 +82,8 @@ export module Hello; // flag. // CHECK-EMIT-MODULE-INTERFACE: -emit-module-interface +// CHECK-OVERRIDE-WARN: warning: the implicit output of reduced BMI may be overrided by the output file specified by '--precompile'. {{.*}}-Wreduced-bmi-output-overrided + // NO_WARN-NOT: warning //--- Hello.cc From 7d625cbcf97087c2e22f0764d96d00ef53314e42 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 28 Feb 2025 12:56:58 +0700 Subject: [PATCH 053/123] AMDGPU: Add baseline tests for simplify elts of readfirstlane (#128645) --- ...fy-demanded-vector-elts-lane-intrinsics.ll | 444 ++++++++++++++++++ 1 file changed, 444 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll new file mode 100644 index 0000000000000..83d9d0d032ed1 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll @@ -0,0 +1,444 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < %s | FileCheck %s + +define i16 @extract_elt0_v2i16_readfirstlane(<2 x i16> %src) { +; CHECK-LABEL: define i16 @extract_elt0_v2i16_readfirstlane( +; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i16> [[VEC]], i64 0 +; CHECK-NEXT: ret i16 [[ELT]] +; + %vec = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> %src) + %elt = extractelement <2 x i16> %vec, i32 0 + ret i16 %elt +} + +define i16 @extract_elt0_v1i16_readfirstlane(<1 x i16> %src) { +; CHECK-LABEL: define i16 @extract_elt0_v1i16_readfirstlane( +; CHECK-SAME: <1 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <1 x i16> @llvm.amdgcn.readfirstlane.v1i16(<1 x i16> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <1 x i16> [[VEC]], i64 0 +; CHECK-NEXT: ret i16 [[ELT]] +; + %vec = call <1 x i16> @llvm.amdgcn.readfirstlane.v1i16(<1 x i16> %src) + %elt = extractelement <1 x i16> %vec, i32 0 + ret i16 %elt +} + +define i16 @extract_elt1_v2i16_readfirstlane(<2 x i16> %src) { +; CHECK-LABEL: define i16 @extract_elt1_v2i16_readfirstlane( +; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i16> [[VEC]], i64 1 +; CHECK-NEXT: ret i16 [[ELT]] +; + %vec = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> %src) + %elt = extractelement <2 x i16> %vec, i32 1 + ret i16 %elt +} + +define i16 @extract_elt0_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define i16 @extract_elt0_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x i16> [[VEC]], i64 0 +; CHECK-NEXT: ret i16 [[ELT]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %elt = extractelement <4 x i16> %vec, i32 0 + ret i16 %elt +} + +define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define i16 @extract_elt2_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x i16> [[VEC]], i64 2 +; CHECK-NEXT: ret i16 [[ELT]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %elt = extractelement <4 x i16> %vec, i32 2 + ret i16 %elt +} + +define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt23_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt10_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt32_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i16> @extract_elt30_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt30_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define half @extract_elt0_v2f16_readfirstlane(<2 x half> %src) { +; CHECK-LABEL: define half @extract_elt0_v2f16_readfirstlane( +; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x half> [[VEC]], i64 0 +; CHECK-NEXT: ret half [[ELT]] +; + %vec = call <2 x half> @llvm.amdgcn.readfirstlane.v2i16(<2 x half> %src) + %elt = extractelement <2 x half> %vec, i32 0 + ret half %elt +} + +define half @extract_elt1_v2f16_readfirstlane(<2 x half> %src) { +; CHECK-LABEL: define half @extract_elt1_v2f16_readfirstlane( +; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x half> [[VEC]], i64 1 +; CHECK-NEXT: ret half [[ELT]] +; + %vec = call <2 x half> @llvm.amdgcn.readfirstlane.v2i16(<2 x half> %src) + %elt = extractelement <2 x half> %vec, i32 1 + ret half %elt +} + +; Don't break on illegal types +define i8 @extract_elt0_v4i8_readfirstlane(<4 x i8> %src) { +; CHECK-LABEL: define i8 @extract_elt0_v4i8_readfirstlane( +; CHECK-SAME: <4 x i8> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i8> @llvm.amdgcn.readfirstlane.v4i8(<4 x i8> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x i8> [[VEC]], i64 0 +; CHECK-NEXT: ret i8 [[ELT]] +; + %vec = call <4 x i8> @llvm.amdgcn.readfirstlane.v4ii8(<4 x i8> %src) + %elt = extractelement <4 x i8> %vec, i32 0 + ret i8 %elt +} + +; Don't break on illegal types +define i32 @extract_elt0_nxv4i32_readfirstlane( %src) { +; CHECK-LABEL: define i32 @extract_elt0_nxv4i32_readfirstlane( +; CHECK-SAME: [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call @llvm.amdgcn.readfirstlane.nxv2i32( [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement [[VEC]], i64 0 +; CHECK-NEXT: ret i32 [[ELT]] +; + %vec = call @llvm.amdgcn.readfirstlane.nxv2i32( %src) + %elt = extractelement %vec, i32 0 + ret i32 %elt +} + +define i32 @extract_elt0_v2i32_readfirstlane(<2 x i32> %src) { +; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane( +; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i32> [[VEC]], i64 0 +; CHECK-NEXT: ret i32 [[ELT]] +; + %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src) + %elt = extractelement <2 x i32> %vec, i32 0 + ret i32 %elt +} + +define ptr addrspace(3) @extract_elt0_v2p3_readfirstlane(<2 x ptr addrspace(3)> %src) { +; CHECK-LABEL: define ptr addrspace(3) @extract_elt0_v2p3_readfirstlane( +; CHECK-SAME: <2 x ptr addrspace(3)> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v2p3(<2 x ptr addrspace(3)> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x ptr addrspace(3)> [[VEC]], i64 0 +; CHECK-NEXT: ret ptr addrspace(3) [[ELT]] +; + %vec = call <2 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v2p3(<2 x ptr addrspace(3)> %src) + %elt = extractelement <2 x ptr addrspace(3)> %vec, i32 0 + ret ptr addrspace(3) %elt +} + +define i64 @extract_elt0_v2i64_readfirstlane(<2 x i64> %src) { +; CHECK-LABEL: define i64 @extract_elt0_v2i64_readfirstlane( +; CHECK-SAME: <2 x i64> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i64> [[VEC]], i64 0 +; CHECK-NEXT: ret i64 [[ELT]] +; + %vec = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src) + %elt = extractelement <2 x i64> %vec, i32 0 + ret i64 %elt +} + +define i64 @extract_elt1_v2i64_readfirstlane(<2 x i64> %src) { +; CHECK-LABEL: define i64 @extract_elt1_v2i64_readfirstlane( +; CHECK-SAME: <2 x i64> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i64> [[VEC]], i64 1 +; CHECK-NEXT: ret i64 [[ELT]] +; + %vec = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src) + %elt = extractelement <2 x i64> %vec, i32 1 + ret i64 %elt +} + +define <3 x i16> @extract_elt012_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <3 x i16> @extract_elt012_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <3 x i32> + ret <3 x i16> %shuffle +} + +define <3 x i16> @extract_elt123_v4i16_readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <3 x i16> @extract_elt123_v4i16_readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <3 x i32> + ret <3 x i16> %shuffle +} + +define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) { +; CHECK-LABEL: define <3 x i32> @extract_elt012_v4i32_readfirstlane( +; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]] +; + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <3 x i32> + ret <3 x i32> %shuffle +} + +define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) { +; CHECK-LABEL: define <3 x i32> @extract_elt123_v4i32_readfirstlane( +; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]] +; + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <3 x i32> + ret <3 x i32> %shuffle +} + +define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) { +; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane( +; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define <2 x i16> @extract_elt13_v4i16readfirstlane(<4 x i16> %src) { +; CHECK-LABEL: define <2 x i16> @extract_elt13_v4i16readfirstlane( +; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] +; + %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) + %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> + ret <2 x i16> %shuffle +} + +define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(i32 %src0, i32 %src2) { +; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0( +; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0 +; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0 + %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 1 + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0, i32 %src2) { +; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1( +; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0 +; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1 + %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 3 + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(i32 %src0, i32 %src2) { +; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2( +; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0 +; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0 + %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 2 + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(<2 x i32> %src) convergent { +; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken( +; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]]) [ "convergencectrl"(token [[T]]) ] +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i32> [[VEC]], i64 0 +; CHECK-NEXT: ret i32 [[ELT]] +; + %t = call token @llvm.experimental.convergence.entry() + %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src) [ "convergencectrl"(token %t) ] + %elt = extractelement <2 x i32> %vec, i32 0 + ret i32 %elt +} + +define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) convergent { +; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken( +; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0 +; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %t = call token @llvm.experimental.convergence.entry() + %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1 + %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 3 + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1) [ "convergencectrl"(token %t) ] + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define i1 @extract_elt0_v2i1_readfirstlane(<2 x i1> %src) { +; CHECK-LABEL: define i1 @extract_elt0_v2i1_readfirstlane( +; CHECK-SAME: <2 x i1> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <2 x i1> @llvm.amdgcn.readfirstlane.v2i1(<2 x i1> [[SRC]]) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i1> [[VEC]], i64 0 +; CHECK-NEXT: ret i1 [[ELT]] +; + %vec = call <2 x i1> @llvm.amdgcn.readfirstlane.v2i1(<2 x i1> %src) + %elt = extractelement <2 x i1> %vec, i32 0 + ret i1 %elt +} + +define <2 x i1> @extract_elt01_v4i1_readfirstlane(<4 x i1> %src) { +; CHECK-LABEL: define <2 x i1> @extract_elt01_v4i1_readfirstlane( +; CHECK-SAME: <4 x i1> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i1> @llvm.amdgcn.readfirstlane.v4i1(<4 x i1> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i1> [[VEC]], <4 x i1> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i1> [[SHUFFLE]] +; + %vec = call <4 x i1> @llvm.amdgcn.readfirstlane.v4i1(<4 x i1> %src) + %shuffle = shufflevector <4 x i1> %vec, <4 x i1> poison, <2 x i32> + ret <2 x i1> %shuffle +} + +define <2 x i32> @extract_elt13_v8i32_readfirstlane(<8 x i32> %src) { +; CHECK-LABEL: define <2 x i32> @extract_elt13_v8i32_readfirstlane( +; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %vec = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src) + %shuffle = shufflevector <8 x i32> %vec, <8 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define <2 x i32> @extract_elt03_v4i32_readfirstlane(<4 x i32> %src) { +; CHECK-LABEL: define <2 x i32> @extract_elt03_v4i32_readfirstlane( +; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; + %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src) + %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> + ret <2 x i32> %shuffle +} + +define <3 x i32> @extract_elt124_v8i32_readfirstlane(<8 x i32> %src) { +; CHECK-LABEL: define <3 x i32> @extract_elt124_v8i32_readfirstlane( +; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]] +; + %vec = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src) + %shuffle = shufflevector <8 x i32> %vec, <8 x i32> poison, <3 x i32> + ret <3 x i32> %shuffle +} From 60224c35bd66be1a789f165fd1a7e08f2719ba99 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 28 Feb 2025 13:01:10 +0700 Subject: [PATCH 054/123] AMDGPU: Simplify demanded vector elts of readfirstlane sources (#128646) Stub implementation of simplifyDemandedVectorEltsIntrinsic for readfirstlane. --- llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 4 ++++ .../simplify-demanded-vector-elts-lane-intrinsics.ll | 11 +++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index cb918e16f0f3b..d69cfbbe4088e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -1569,6 +1569,10 @@ std::optional GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic( std::function SimplifyAndSetOp) const { switch (II.getIntrinsicID()) { + case Intrinsic::amdgcn_readfirstlane: + // TODO: For a vector extract, should reduce the intrinsic call type. + SimplifyAndSetOp(&II, 0, DemandedElts, UndefElts); + return std::nullopt; case Intrinsic::amdgcn_raw_buffer_load: case Intrinsic::amdgcn_raw_ptr_buffer_load: case Intrinsic::amdgcn_raw_buffer_load_format: diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll index 83d9d0d032ed1..836c739048411 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll @@ -306,10 +306,9 @@ define <2 x i16> @extract_elt13_v4i16readfirstlane(<4 x i16> %src) { define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(i32 %src0, i32 %src2) { ; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0( ; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0 -; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 1 ; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] ; %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0 @@ -338,11 +337,7 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0, define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(i32 %src0, i32 %src2) { ; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2( ; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0 -; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] +; CHECK-NEXT: ret <2 x i32> poison ; %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0 %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 2 From a798d7dd681e632749e62efcd5672e903346ab48 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 27 Feb 2025 21:53:10 -0800 Subject: [PATCH 055/123] Revert "[RISCV][TTI] Add shuffle costing for masked slide lowering (#128537)" This reverts commit 4904728cab8596320a77a895cb712fba07ea7bb1. Downstream test failed, reverting during investigation. --- llvm/include/llvm/Analysis/VectorUtils.h | 9 - llvm/lib/Analysis/VectorUtils.cpp | 30 - llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 41 +- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 72 +- .../Target/RISCV/RISCVTargetTransformInfo.h | 6 - .../CostModel/RISCV/shuffle-exact-vlen.ll | 12 +- .../RISCV/shuffle-extract_subvector.ll | 2 +- .../CostModel/RISCV/shuffle-transpose.ll | 64 +- .../SLPVectorizer/RISCV/complex-loads.ll | 744 +++++++++++++++--- .../SLPVectorizer/RISCV/reductions.ll | 24 +- 10 files changed, 701 insertions(+), 303 deletions(-) diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 4390b45f1f730..f21594c557e0e 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -203,15 +203,6 @@ bool getShuffleDemandedElts(int SrcWidth, ArrayRef Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts = false); -/// Does this shuffle mask represent either one slide shuffle or a pair of -/// two slide shuffles, combined with a select on some constant vector mask? -/// A slide is a shuffle mask which shifts some set of elements up or down -/// the vector, with all other elements being undefined. An identity shuffle -/// will be matched a slide by 0. The output parameter provides the source -/// (-1 means no source), and slide direction for each slide. -bool isMaskedSlidePair(ArrayRef Mask, int NumElts, - std::array, 2> &SrcInfo); - /// Replace each shuffle mask index with the scaled sequential indices for an /// equivalent mask of narrowed elements. Mask elements that are less than 0 /// (sentinel values) are repeated in the output mask. diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 160913841a6e2..dcfd3d5a8bd6e 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -415,36 +415,6 @@ bool llvm::getShuffleDemandedElts(int SrcWidth, ArrayRef Mask, return true; } -bool llvm::isMaskedSlidePair(ArrayRef Mask, int NumElts, - std::array, 2> &SrcInfo) { - const int SignalValue = NumElts * 2; - SrcInfo[0] = {-1, SignalValue}; - SrcInfo[1] = {-1, SignalValue}; - for (auto [i, M] : enumerate(Mask)) { - if (M < 0) - continue; - int Src = M >= (int)NumElts; - int Diff = (int)i - (M % NumElts); - bool Match = false; - for (int j = 0; j < 2; j++) { - auto &[SrcE, DiffE] = SrcInfo[j]; - if (SrcE == -1) { - assert(DiffE == SignalValue); - SrcE = Src; - DiffE = Diff; - } - if (SrcE == Src && DiffE == Diff) { - Match = true; - break; - } - } - if (!Match) - return false; - } - assert(SrcInfo[0].first != -1 && "Must find one slide"); - return true; -} - void llvm::narrowShuffleMaskElts(int Scale, ArrayRef Mask, SmallVectorImpl &ScaledMask) { assert(Scale > 0 && "Unexpected scaling factor"); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 71fd3ab28b273..6076fe56416ad 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4562,9 +4562,32 @@ static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, int &EvenSrc, /// Is this mask representing a masked combination of two slides? static bool isMaskedSlidePair(ArrayRef Mask, - std::array, 2> &SrcInfo) { - if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo)) - return false; + std::pair SrcInfo[2]) { + int NumElts = Mask.size(); + int SignalValue = NumElts * 2; + SrcInfo[0] = {-1, SignalValue}; + SrcInfo[1] = {-1, SignalValue}; + for (unsigned i = 0; i != Mask.size(); ++i) { + int M = Mask[i]; + if (M < 0) + continue; + int Src = M >= (int)NumElts; + int Diff = (int)i - (M % NumElts); + bool Match = false; + for (int j = 0; j < 2; j++) { + if (SrcInfo[j].first == -1) { + assert(SrcInfo[j].second == SignalValue); + SrcInfo[j].first = Src; + SrcInfo[j].second = Diff; + } + if (SrcInfo[j].first == Src && SrcInfo[j].second == Diff) { + Match = true; + break; + } + } + if (!Match) + return false; + } // Avoid matching vselect idioms if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0) @@ -4580,8 +4603,7 @@ static bool isMaskedSlidePair(ArrayRef Mask, // Exactly matches the semantics of a previously existing custom matcher // to allow migration to new matcher without changing output. -static bool isElementRotate(std::array, 2> &SrcInfo, - unsigned NumElts) { +static bool isElementRotate(std::pair SrcInfo[2], unsigned NumElts) { if (SrcInfo[1].first == -1) return true; return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 && @@ -5582,10 +5604,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // without masking. Avoid matching bit rotates (which are not also element // rotates) as slide pairs. This is a performance heuristic, not a // functional check. - std::array, 2> SrcInfo; + std::pair SrcInfo[2]; unsigned RotateAmt; MVT RotateVT; - if (::isMaskedSlidePair(Mask, SrcInfo) && + if (isMaskedSlidePair(Mask, SrcInfo) && (isElementRotate(SrcInfo, NumElts) || !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) { SDValue Sources[2]; @@ -5942,11 +5964,10 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { if (SVT.getScalarType() == MVT::i1) return false; - std::array, 2> SrcInfo; + std::pair SrcInfo[2]; int Dummy1, Dummy2; return ShuffleVectorInst::isReverseMask(M, NumElts) || - (::isMaskedSlidePair(M, SrcInfo) && - isElementRotate(SrcInfo, NumElts)) || + (isMaskedSlidePair(M, SrcInfo) && isElementRotate(SrcInfo, NumElts)) || isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 6005c067428eb..cde643a250be1 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -475,64 +475,6 @@ costShuffleViaVRegSplitting(RISCVTTIImpl &TTI, MVT LegalVT, return InstructionCost::getInvalid(); } -InstructionCost RISCVTTIImpl::getSlideCost(FixedVectorType *Tp, - ArrayRef Mask, - TTI::TargetCostKind CostKind) { - // Avoid missing masks and length changing shuffles - if (Mask.size() <= 2 || Mask.size() != Tp->getNumElements()) - return InstructionCost::getInvalid(); - - int NumElts = Tp->getNumElements(); - std::pair LT = getTypeLegalizationCost(Tp); - // Avoid scalarization cases - if (!LT.second.isFixedLengthVector()) - return InstructionCost::getInvalid(); - - // Requires moving elements between parts, which requires additional - // unmodeled instructions. - if (LT.first != 1) - return InstructionCost::getInvalid(); - - auto GetSlideOpcode = [&](int SlideAmt) { - assert(SlideAmt != 0); - bool IsVI = isUInt<5>(std::abs(SlideAmt)); - if (SlideAmt < 0) - return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX; - return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX; - }; - - std::array, 2> SrcInfo; - if (!isMaskedSlidePair(Mask, NumElts, SrcInfo)) - return InstructionCost::getInvalid(); - - if (SrcInfo[1].second == 0) - std::swap(SrcInfo[0], SrcInfo[1]); - - InstructionCost FirstSlideCost = 0; - if (SrcInfo[0].second != 0) { - unsigned Opcode = GetSlideOpcode(SrcInfo[0].second); - FirstSlideCost = getRISCVInstructionCost(Opcode, LT.second, CostKind); - } - - if (SrcInfo[1].first == -1) - return FirstSlideCost; - - InstructionCost SecondSlideCost = 0; - if (SrcInfo[1].second != 0) { - unsigned Opcode = GetSlideOpcode(SrcInfo[1].second); - SecondSlideCost = getRISCVInstructionCost(Opcode, LT.second, CostKind); - } else { - SecondSlideCost = - getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, CostKind); - } - - auto EC = Tp->getElementCount(); - VectorType *MaskTy = - VectorType::get(IntegerType::getInt1Ty(Tp->getContext()), EC); - InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); - return FirstSlideCost + SecondSlideCost + MaskCost; -} - InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, @@ -545,8 +487,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // First, handle cases where having a fixed length vector enables us to // give a more accurate cost than falling back to generic scalable codegen. // TODO: Each of these cases hints at a modeling gap around scalable vectors. - if (auto *FVTp = dyn_cast(Tp); - FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) { + if (ST->hasVInstructions() && isa(Tp) && + LT.second.isFixedLengthVector()) { InstructionCost VRegSplittingCost = costShuffleViaVRegSplitting( *this, LT.second, ST->getRealVLen(), Tp, Mask, CostKind); if (VRegSplittingCost.isValid()) @@ -602,11 +544,6 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return Cost; } } - - if (InstructionCost SlideCost = getSlideCost(FVTp, Mask, CostKind); - SlideCost.isValid()) - return SlideCost; - // vrgather + cost of generating the mask constant. // We model this for an unknown mask with a single vrgather. if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 || @@ -621,11 +558,6 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, } case TTI::SK_Transpose: case TTI::SK_PermuteTwoSrc: { - - if (InstructionCost SlideCost = getSlideCost(FVTp, Mask, CostKind); - SlideCost.isValid()) - return SlideCost; - // 2 x (vrgather + cost of generating the mask constant) + cost of mask // register for the second vrgather. We model this for an unknown // (shuffle) mask. diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 3f57560d3c127..134a7333b9b06 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -63,12 +63,6 @@ class RISCVTTIImpl : public BasicTTIImplBase { /// type. InstructionCost getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind); - - /// If this shuffle can be lowered as a masked slide pair (at worst), - /// return a cost for it. - InstructionCost getSlideCost(FixedVectorType *Tp, ArrayRef Mask, - TTI::TargetCostKind CostKind); - public: explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F) : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll index 06c709e4cc879..c951184a31731 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll @@ -186,7 +186,7 @@ define void @insert_subvec() vscale_range(2,2) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_1 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_3 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'insert_subvec' @@ -225,7 +225,7 @@ define void @insert_subvec() vscale_range(2,2) { ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_1 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_3 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v4i8_2_0 = shufflevector <4 x i8> poison, <4 x i8> poison, <4 x i32> @@ -737,8 +737,8 @@ define void @multipart() vscale_range(2,2) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16c = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16d = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32idrev = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v32many = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v32many2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> @@ -757,8 +757,8 @@ define void @multipart() vscale_range(2,2) { ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16c = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16d = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32idrev = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32many = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32many2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll index d2bfb61a11b00..e8dd30345cc76 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll @@ -19,7 +19,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; VLEN128-LABEL: 'test_vXf64' diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll index ef069fee8526e..8f784a07d3124 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll @@ -10,11 +10,11 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: 'trn1.v8i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %tmp0 ; ; SIZE-LABEL: 'trn1.v8i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %tmp0 ; %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> @@ -23,11 +23,11 @@ define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: 'trn2.v8i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %tmp0 ; ; SIZE-LABEL: 'trn2.v8i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %tmp0 ; %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> @@ -36,11 +36,11 @@ define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: 'trn1.v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %tmp0 ; ; SIZE-LABEL: 'trn1.v16i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %tmp0 ; %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> @@ -49,11 +49,11 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: 'trn2.v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %tmp0 ; ; SIZE-LABEL: 'trn2.v16i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %tmp0 ; %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> @@ -62,11 +62,11 @@ define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: 'trn1.v4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %tmp0 ; ; SIZE-LABEL: 'trn1.v4i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %tmp0 ; %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> @@ -75,11 +75,11 @@ define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: 'trn2.v4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %tmp0 ; ; SIZE-LABEL: 'trn2.v4i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %tmp0 ; %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> @@ -88,11 +88,11 @@ define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: 'trn1.v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %tmp0 ; ; SIZE-LABEL: 'trn1.v8i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %tmp0 ; %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> @@ -101,11 +101,11 @@ define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: 'trn2.v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %tmp0 ; ; SIZE-LABEL: 'trn2.v8i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %tmp0 ; %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> @@ -140,11 +140,11 @@ define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: 'trn1.v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %tmp0 ; ; SIZE-LABEL: 'trn1.v4i32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %tmp0 ; %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> @@ -153,11 +153,11 @@ define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: 'trn2.v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %tmp0 ; ; SIZE-LABEL: 'trn2.v4i32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %tmp0 ; %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> @@ -218,11 +218,11 @@ define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: 'trn1.v4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %tmp0 ; ; SIZE-LABEL: 'trn1.v4f32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %tmp0 ; %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> @@ -231,11 +231,11 @@ define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: 'trn2.v4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %tmp0 ; ; SIZE-LABEL: 'trn2.v4f32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %tmp0 ; %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> @@ -270,11 +270,11 @@ define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: 'trn1.v4f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %tmp0 ; ; SIZE-LABEL: 'trn1.v4f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x half> %tmp0 ; %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> @@ -283,11 +283,11 @@ define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: 'trn2.v4f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %tmp0 ; ; SIZE-LABEL: 'trn2.v4f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x half> %tmp0 ; %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> @@ -296,11 +296,11 @@ define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: 'trn1.v8f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x half> %tmp0 ; ; SIZE-LABEL: 'trn1.v8f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x half> %tmp0 ; %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> @@ -309,11 +309,11 @@ define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: 'trn2.v8f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x half> %tmp0 ; ; SIZE-LABEL: 'trn2.v8f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x half> %tmp0 ; %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index 18acae5835724..11fa3337544a1 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -6,175 +6,663 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-LABEL: define i32 @test( ; CHECK-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[PIX1]], align 1 +; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PIX1]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[PIX2]], i64 4 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[PIX1]], i64 1 +; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr i8, ptr [[PIX1]], i64 3 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1 +; CHECK-NEXT: [[CONV33:%.*]] = zext i8 [[TMP10]] to i32 ; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]] +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ADD_PTR3]], align 1 +; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP11]] to i32 ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 4 +; CHECK-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 1 +; CHECK-NEXT: [[ARRAYIDX27_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX27_1]], align 1 +; CHECK-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = zext <2 x i8> [[TMP22]] to <2 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = sub <2 x i32> [[TMP21]], [[TMP31]] +; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 +; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP49]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP50:%.*]] = zext <2 x i8> [[TMP27]] to <2 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = sub <2 x i32> [[TMP26]], [[TMP50]] +; CHECK-NEXT: [[TMP25:%.*]] = shl <2 x i32> [[TMP24]], splat (i32 16) +; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP25]], [[TMP23]] +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP57:%.*]] = zext <2 x i8> [[TMP56]] to <2 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP51]], [[TMP57]] +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = zext <2 x i8> [[TMP38]] to <2 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = zext <2 x i8> [[TMP40]] to <2 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = sub <2 x i32> [[TMP39]], [[TMP61]] +; CHECK-NEXT: [[TMP37:%.*]] = shl <2 x i32> [[TMP36]], splat (i32 16) +; CHECK-NEXT: [[TMP42:%.*]] = add <2 x i32> [[TMP37]], [[TMP35]] +; CHECK-NEXT: [[TMP34:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]] +; CHECK-NEXT: [[TMP44:%.*]] = sub <2 x i32> [[TMP30]], [[TMP42]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP34]], i32 0 +; CHECK-NEXT: [[CONV_2:%.*]] = extractelement <2 x i32> [[TMP34]], i32 1 +; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[CONV_2]], [[TMP43]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP44]], i32 0 +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i32> [[TMP44]], i32 1 +; CHECK-NEXT: [[ADD55_2:%.*]] = add i32 [[TMP47]], [[TMP46]] ; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4 +; CHECK-NEXT: [[TMP53:%.*]] = load <2 x i8>, ptr null, align 1 ; CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr null, align 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr null, align 1 -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1 +; CHECK-NEXT: [[TMP62:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> +; CHECK-NEXT: [[TMP77:%.*]] = zext i8 [[TMP52]] to i32 +; CHECK-NEXT: [[TMP54:%.*]] = load <2 x i8>, ptr null, align 1 +; CHECK-NEXT: [[TMP55:%.*]] = zext <2 x i8> [[TMP54]] to <2 x i32> +; CHECK-NEXT: [[TMP59:%.*]] = sub <2 x i32> [[TMP62]], [[TMP55]] +; CHECK-NEXT: [[TMP41:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[TMP58:%.*]] = zext <2 x i8> [[TMP41]] to <2 x i32> +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <2 x i32> [[TMP58]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP63:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 +; CHECK-NEXT: [[TMP76:%.*]] = zext <2 x i8> [[TMP63]] to <2 x i32> +; CHECK-NEXT: [[TMP81:%.*]] = sub <2 x i32> [[TMP48]], [[TMP76]] +; CHECK-NEXT: [[TMP167:%.*]] = shl <2 x i32> [[TMP81]], splat (i32 16) +; CHECK-NEXT: [[TMP75:%.*]] = add <2 x i32> [[TMP167]], [[TMP59]] +; CHECK-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 +; CHECK-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 +; CHECK-NEXT: [[ARRAYIDX27_3:%.*]] = getelementptr i8, ptr null, i64 6 +; CHECK-NEXT: [[TMP64:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_3]], align 1 +; CHECK-NEXT: [[TMP79:%.*]] = zext <2 x i8> [[TMP64]] to <2 x i32> +; CHECK-NEXT: [[TMP82:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 +; CHECK-NEXT: [[TMP91:%.*]] = zext <2 x i8> [[TMP82]] to <2 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = sub <2 x i32> [[TMP79]], [[TMP91]] +; CHECK-NEXT: [[TMP170:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) +; CHECK-NEXT: [[TMP171:%.*]] = zext <2 x i8> [[TMP170]] to <2 x i32> +; CHECK-NEXT: [[TMP172:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 +; CHECK-NEXT: [[TMP173:%.*]] = zext <2 x i8> [[TMP172]] to <2 x i32> +; CHECK-NEXT: [[TMP66:%.*]] = sub <2 x i32> [[TMP171]], [[TMP173]] +; CHECK-NEXT: [[TMP67:%.*]] = shl <2 x i32> [[TMP66]], splat (i32 16) +; CHECK-NEXT: [[TMP69:%.*]] = add <2 x i32> [[TMP67]], [[TMP65]] +; CHECK-NEXT: [[TMP176:%.*]] = extractelement <2 x i32> [[TMP75]], i32 0 +; CHECK-NEXT: [[TMP197:%.*]] = extractelement <2 x i32> [[TMP75]], i32 1 +; CHECK-NEXT: [[SUB59:%.*]] = add i32 [[TMP197]], [[TMP176]] +; CHECK-NEXT: [[SUB45_3:%.*]] = sub i32 [[TMP176]], [[TMP197]] +; CHECK-NEXT: [[ADD112_2:%.*]] = extractelement <2 x i32> [[TMP69]], i32 0 +; CHECK-NEXT: [[XOR_I63_2:%.*]] = extractelement <2 x i32> [[TMP69]], i32 1 +; CHECK-NEXT: [[SUB59_1:%.*]] = add i32 [[XOR_I63_2]], [[ADD112_2]] +; CHECK-NEXT: [[SUB47_3:%.*]] = sub i32 [[ADD112_2]], [[XOR_I63_2]] +; CHECK-NEXT: [[ADD94:%.*]] = add i32 [[SUB59_1]], [[SUB59]] +; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <2 x i32> [[TMP34]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i32> [[TMP70]], i32 [[SUB59]], i32 0 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <2 x i32> [[TMP34]], i32 [[SUB59_1]], i32 0 +; CHECK-NEXT: [[TMP222:%.*]] = sub <2 x i32> [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[ADD55_3:%.*]] = add i32 [[SUB47_3]], [[SUB45_3]] +; CHECK-NEXT: [[TMP74:%.*]] = shufflevector <2 x i32> [[TMP44]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP78:%.*]] = insertelement <2 x i32> [[TMP74]], i32 [[SUB45_3]], i32 0 +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <2 x i32> [[TMP44]], i32 [[SUB47_3]], i32 0 +; CHECK-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP78]], [[TMP80]] +; CHECK-NEXT: [[ADD95:%.*]] = add i32 [[ADD94]], [[ADD48_2]] +; CHECK-NEXT: [[SUB86_3:%.*]] = sub i32 [[ADD48_2]], [[ADD94]] +; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP77]], 15 +; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537 +; CHECK-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535 +; CHECK-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[CONV_2]], 15 +; CHECK-NEXT: [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537 +; CHECK-NEXT: [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535 +; CHECK-NEXT: [[TMP86:%.*]] = extractelement <2 x i32> [[TMP222]], i32 0 +; CHECK-NEXT: [[TMP87:%.*]] = extractelement <2 x i32> [[TMP222]], i32 1 +; CHECK-NEXT: [[ADD94_3:%.*]] = add i32 [[TMP86]], [[TMP87]] +; CHECK-NEXT: [[ADD112_1:%.*]] = sub i32 [[TMP87]], [[TMP86]] +; CHECK-NEXT: [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15 +; CHECK-NEXT: [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537 +; CHECK-NEXT: [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535 +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <2 x i32> [[TMP85]], i32 0 +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP85]], i32 1 +; CHECK-NEXT: [[ADD94_4:%.*]] = add i32 [[TMP88]], [[TMP89]] +; CHECK-NEXT: [[SUB102_3:%.*]] = sub i32 [[TMP89]], [[TMP88]] +; CHECK-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[CONV1]], 15 +; CHECK-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 +; CHECK-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 +; CHECK-NEXT: [[TMP90:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1 +; CHECK-NEXT: [[TMP102:%.*]] = zext <2 x i8> [[TMP90]] to <2 x i32> ; CHECK-NEXT: [[TMP92:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 +; CHECK-NEXT: [[TMP93:%.*]] = shufflevector <4 x i8> [[TMP92]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP94:%.*]] = zext <2 x i8> [[TMP93]] to <2 x i32> ; CHECK-NEXT: [[TMP95:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP97:%.*]] = zext <2 x i8> [[TMP96]] to <2 x i32> ; CHECK-NEXT: [[TMP98:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1 +; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <4 x i8> [[TMP98]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP100:%.*]] = zext <2 x i8> [[TMP99]] to <2 x i32> +; CHECK-NEXT: [[TMP101:%.*]] = sub <2 x i32> [[TMP97]], [[TMP100]] +; CHECK-NEXT: [[TMP224:%.*]] = shl <2 x i32> [[TMP101]], splat (i32 16) +; CHECK-NEXT: [[TMP103:%.*]] = shufflevector <4 x i8> [[TMP92]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP104:%.*]] = zext <2 x i8> [[TMP103]] to <2 x i32> +; CHECK-NEXT: [[TMP105:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP106:%.*]] = zext <2 x i8> [[TMP105]] to <2 x i32> +; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <4 x i8> [[TMP98]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP108:%.*]] = zext <2 x i8> [[TMP107]] to <2 x i32> +; CHECK-NEXT: [[TMP109:%.*]] = sub <2 x i32> [[TMP106]], [[TMP108]] +; CHECK-NEXT: [[TMP110:%.*]] = shl <2 x i32> [[TMP109]], splat (i32 16) +; CHECK-NEXT: [[TMP111:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV33]], i32 1 +; CHECK-NEXT: [[TMP112:%.*]] = sub <2 x i32> [[TMP111]], [[TMP104]] +; CHECK-NEXT: [[TMP113:%.*]] = add <2 x i32> [[TMP110]], [[TMP112]] +; CHECK-NEXT: [[TMP114:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV1]], i32 0 +; CHECK-NEXT: [[TMP115:%.*]] = sub <2 x i32> [[TMP114]], [[TMP94]] +; CHECK-NEXT: [[TMP116:%.*]] = add <2 x i32> [[TMP224]], [[TMP115]] +; CHECK-NEXT: [[TMP117:%.*]] = shufflevector <2 x i32> [[TMP113]], <2 x i32> [[TMP116]], <2 x i32> +; CHECK-NEXT: [[TMP126:%.*]] = add <2 x i32> [[TMP113]], [[TMP116]] +; CHECK-NEXT: [[TMP119:%.*]] = sub <2 x i32> [[TMP116]], [[TMP113]] +; CHECK-NEXT: [[TMP120:%.*]] = extractelement <2 x i32> [[TMP126]], i32 0 +; CHECK-NEXT: [[TMP127:%.*]] = extractelement <2 x i32> [[TMP126]], i32 1 +; CHECK-NEXT: [[ADD48:%.*]] = add i32 [[TMP127]], [[TMP120]] +; CHECK-NEXT: [[TMP166:%.*]] = sub i32 [[TMP120]], [[TMP127]] +; CHECK-NEXT: [[TMP128:%.*]] = extractelement <2 x i32> [[TMP119]], i32 0 +; CHECK-NEXT: [[TMP129:%.*]] = extractelement <2 x i32> [[TMP119]], i32 1 +; CHECK-NEXT: [[ADD55:%.*]] = add i32 [[TMP129]], [[TMP128]] +; CHECK-NEXT: [[SUB60:%.*]] = sub i32 [[TMP128]], [[TMP129]] +; CHECK-NEXT: [[SHR_I59:%.*]] = lshr i32 [[TMP127]], 15 +; CHECK-NEXT: [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537 +; CHECK-NEXT: [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535 +; CHECK-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[TMP129]], 15 +; CHECK-NEXT: [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537 +; CHECK-NEXT: [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535 +; CHECK-NEXT: [[TMP130:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 +; CHECK-NEXT: [[TMP131:%.*]] = zext <2 x i8> [[TMP130]] to <2 x i32> ; CHECK-NEXT: [[TMP132:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 +; CHECK-NEXT: [[TMP133:%.*]] = shufflevector <4 x i8> [[TMP132]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP133]] to <2 x i32> ; CHECK-NEXT: [[TMP135:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 +; CHECK-NEXT: [[TMP136:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP137:%.*]] = zext <2 x i8> [[TMP136]] to <2 x i32> ; CHECK-NEXT: [[TMP138:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 -; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i8>, ptr null, align 1 -; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP10]], i64 0) -; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP15]], <4 x i8> [[TMP14]], i64 4) -; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP16]], <4 x i8> [[TMP2]], i64 8) -; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP17]], <4 x i8> [[TMP6]], i64 12) -; CHECK-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i8>, ptr null, align 1 -; CHECK-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP11]], i64 0) -; CHECK-NEXT: [[TMP22:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP21]], <4 x i8> [[TMP20]], i64 4) -; CHECK-NEXT: [[TMP23:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP22]], <4 x i8> [[TMP92]], i64 8) -; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP23]], <4 x i8> [[TMP132]], i64 12) -; CHECK-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = sub <16 x i32> [[TMP19]], [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) -; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <2 x i8> [[TMP28]], <2 x i8> poison, <4 x i32> -; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP29]], <16 x i32> -; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> -; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> [[TMP33]], <16 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP34]], i8 [[TMP3]], i32 5 -; CHECK-NEXT: [[TMP36:%.*]] = insertelement <16 x i8> [[TMP35]], i8 [[TMP52]], i32 9 -; CHECK-NEXT: [[TMP37:%.*]] = zext <16 x i8> [[TMP36]] to <16 x i32> -; CHECK-NEXT: [[TMP38:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP13]], i64 0) -; CHECK-NEXT: [[TMP40:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP39]], <4 x i8> [[TMP38]], i64 4) -; CHECK-NEXT: [[TMP41:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP40]], <4 x i8> [[TMP98]], i64 8) -; CHECK-NEXT: [[TMP42:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP41]], <4 x i8> [[TMP138]], i64 12) -; CHECK-NEXT: [[TMP43:%.*]] = zext <16 x i8> [[TMP42]] to <16 x i32> -; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP45:%.*]] = sub <16 x i32> [[TMP37]], [[TMP44]] -; CHECK-NEXT: [[TMP46:%.*]] = shl <16 x i32> [[TMP45]], splat (i32 16) -; CHECK-NEXT: [[TMP47:%.*]] = add <16 x i32> [[TMP46]], [[TMP27]] -; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP49:%.*]] = add <16 x i32> [[TMP47]], [[TMP48]] -; CHECK-NEXT: [[TMP50:%.*]] = sub <16 x i32> [[TMP47]], [[TMP48]] -; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> -; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP53:%.*]] = add <16 x i32> [[TMP51]], [[TMP70]] -; CHECK-NEXT: [[TMP54:%.*]] = sub <16 x i32> [[TMP51]], [[TMP70]] -; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> -; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP57:%.*]] = sub <16 x i32> [[TMP55]], [[TMP56]] -; CHECK-NEXT: [[TMP58:%.*]] = add <16 x i32> [[TMP55]], [[TMP56]] -; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> -; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP61:%.*]] = add <16 x i32> [[TMP59]], [[TMP60]] -; CHECK-NEXT: [[TMP62:%.*]] = sub <16 x i32> [[TMP59]], [[TMP60]] -; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> -; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP19]], <16 x i32> -; CHECK-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15) -; CHECK-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537) -; CHECK-NEXT: [[TMP67:%.*]] = mul <16 x i32> [[TMP66]], splat (i32 65535) -; CHECK-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP63]] -; CHECK-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP64]] -; CHECK-NEXT: [[ADD113_3:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) +; CHECK-NEXT: [[TMP139:%.*]] = shufflevector <4 x i8> [[TMP138]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP140:%.*]] = zext <2 x i8> [[TMP139]] to <2 x i32> +; CHECK-NEXT: [[TMP141:%.*]] = sub <2 x i32> [[TMP137]], [[TMP140]] +; CHECK-NEXT: [[TMP142:%.*]] = shl <2 x i32> [[TMP141]], splat (i32 16) +; CHECK-NEXT: [[TMP143:%.*]] = shufflevector <4 x i8> [[TMP132]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP144:%.*]] = zext <2 x i8> [[TMP143]] to <2 x i32> +; CHECK-NEXT: [[TMP145:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP146:%.*]] = zext <2 x i8> [[TMP145]] to <2 x i32> +; CHECK-NEXT: [[TMP147:%.*]] = shufflevector <4 x i8> [[TMP138]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP148:%.*]] = zext <2 x i8> [[TMP147]] to <2 x i32> +; CHECK-NEXT: [[TMP149:%.*]] = sub <2 x i32> [[TMP146]], [[TMP148]] +; CHECK-NEXT: [[TMP150:%.*]] = shl <2 x i32> [[TMP149]], splat (i32 16) +; CHECK-NEXT: [[TMP151:%.*]] = insertelement <2 x i32> [[TMP131]], i32 [[CONV33_1]], i32 1 +; CHECK-NEXT: [[TMP225:%.*]] = sub <2 x i32> [[TMP151]], [[TMP144]] +; CHECK-NEXT: [[TMP153:%.*]] = add <2 x i32> [[TMP150]], [[TMP225]] +; CHECK-NEXT: [[TMP154:%.*]] = insertelement <2 x i32> [[TMP131]], i32 [[CONV_1]], i32 0 +; CHECK-NEXT: [[TMP155:%.*]] = sub <2 x i32> [[TMP154]], [[TMP134]] +; CHECK-NEXT: [[TMP156:%.*]] = add <2 x i32> [[TMP142]], [[TMP155]] +; CHECK-NEXT: [[TMP157:%.*]] = add <2 x i32> [[TMP153]], [[TMP156]] +; CHECK-NEXT: [[TMP158:%.*]] = sub <2 x i32> [[TMP156]], [[TMP153]] +; CHECK-NEXT: [[TMP159:%.*]] = extractelement <2 x i32> [[TMP157]], i32 0 +; CHECK-NEXT: [[TMP160:%.*]] = extractelement <2 x i32> [[TMP157]], i32 1 +; CHECK-NEXT: [[ADD48_1:%.*]] = add i32 [[TMP160]], [[TMP159]] +; CHECK-NEXT: [[SUB51_1:%.*]] = sub i32 [[TMP159]], [[TMP160]] +; CHECK-NEXT: [[TMP161:%.*]] = extractelement <2 x i32> [[TMP158]], i32 0 +; CHECK-NEXT: [[TMP162:%.*]] = extractelement <2 x i32> [[TMP158]], i32 1 +; CHECK-NEXT: [[ADD55_1:%.*]] = add i32 [[TMP162]], [[TMP161]] +; CHECK-NEXT: [[SUB59_2:%.*]] = sub i32 [[TMP161]], [[TMP162]] +; CHECK-NEXT: [[SHR_I54:%.*]] = lshr i32 [[TMP160]], 15 +; CHECK-NEXT: [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537 +; CHECK-NEXT: [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535 +; CHECK-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[TMP162]], 15 +; CHECK-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 +; CHECK-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 +; CHECK-NEXT: [[TMP163:%.*]] = lshr <2 x i32> [[TMP131]], splat (i32 15) +; CHECK-NEXT: [[TMP164:%.*]] = and <2 x i32> [[TMP163]], splat (i32 65537) +; CHECK-NEXT: [[TMP165:%.*]] = mul <2 x i32> [[TMP164]], splat (i32 65535) +; CHECK-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD48]] +; CHECK-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_1]] +; CHECK-NEXT: [[ADD103:%.*]] = add i32 [[ADD95]], [[ADD78]] +; CHECK-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD95]] +; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB86_3]], [[SUB86]] +; CHECK-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB86_3]] +; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I_1]], [[ADD103]] +; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP77]] +; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51_1]], [[ADD105]] +; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[CONV_2]] +; CHECK-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]] +; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP160]] +; CHECK-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]] +; CHECK-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP127]] +; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] +; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] +; CHECK-NEXT: [[ADD105_3:%.*]] = add i32 [[ADD112]], [[XOR_I63]] +; CHECK-NEXT: [[TMP169:%.*]] = load <2 x i8>, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[TMP181:%.*]] = zext <2 x i8> [[TMP169]] to <2 x i32> +; CHECK-NEXT: [[TMP152:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_2]], i32 0 +; CHECK-NEXT: [[TMP182:%.*]] = shufflevector <2 x i32> [[TMP152]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP183:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_3]], i32 0 +; CHECK-NEXT: [[TMP184:%.*]] = shufflevector <2 x i32> [[TMP183]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP191:%.*]] = sub <2 x i32> [[TMP182]], [[TMP184]] +; CHECK-NEXT: [[TMP192:%.*]] = add <2 x i32> [[TMP182]], [[TMP184]] +; CHECK-NEXT: [[TMP194:%.*]] = shufflevector <2 x i32> [[TMP191]], <2 x i32> [[TMP192]], <2 x i32> +; CHECK-NEXT: [[TMP195:%.*]] = lshr <2 x i32> [[TMP181]], splat (i32 15) +; CHECK-NEXT: [[TMP196:%.*]] = and <2 x i32> [[TMP195]], splat (i32 65537) +; CHECK-NEXT: [[TMP198:%.*]] = mul <2 x i32> [[TMP196]], splat (i32 65535) +; CHECK-NEXT: [[TMP202:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55]], i32 0 +; CHECK-NEXT: [[TMP203:%.*]] = shufflevector <2 x i32> [[TMP202]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP205:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_1]], i32 0 +; CHECK-NEXT: [[TMP206:%.*]] = shufflevector <2 x i32> [[TMP205]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP207:%.*]] = sub <2 x i32> [[TMP203]], [[TMP206]] +; CHECK-NEXT: [[TMP210:%.*]] = add <2 x i32> [[TMP203]], [[TMP206]] +; CHECK-NEXT: [[TMP168:%.*]] = shufflevector <2 x i32> [[TMP207]], <2 x i32> [[TMP210]], <2 x i32> +; CHECK-NEXT: [[ADD94_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 1 +; CHECK-NEXT: [[ADD78_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 1 +; CHECK-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]] +; CHECK-NEXT: [[TMP220:%.*]] = add <2 x i32> [[TMP194]], [[TMP168]] +; CHECK-NEXT: [[SUB102_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 0 +; CHECK-NEXT: [[SUB86_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 0 +; CHECK-NEXT: [[TMP174:%.*]] = shufflevector <2 x i32> [[TMP168]], <2 x i32> [[TMP194]], <2 x i32> +; CHECK-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]] +; CHECK-NEXT: [[TMP175:%.*]] = add <2 x i32> [[TMP198]], [[TMP220]] +; CHECK-NEXT: [[TMP221:%.*]] = xor <2 x i32> [[TMP175]], [[TMP181]] +; CHECK-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]] +; CHECK-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[TMP162]] +; CHECK-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]] +; CHECK-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP129]] +; CHECK-NEXT: [[XOR_I53_1:%.*]] = extractelement <2 x i32> [[TMP221]], i32 0 +; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD105_3]] +; CHECK-NEXT: [[XOR_I_1:%.*]] = extractelement <2 x i32> [[TMP221]], i32 1 +; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]] +; CHECK-NEXT: [[ADD112_5:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]] +; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_5]], [[XOR_I63_1]] +; CHECK-NEXT: [[ADD78_3:%.*]] = add i32 [[SUB51_1]], [[TMP166]] +; CHECK-NEXT: [[TMP204:%.*]] = sub i32 [[TMP166]], [[SUB51_1]] +; CHECK-NEXT: [[TMP177:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_3]], i32 0 +; CHECK-NEXT: [[TMP178:%.*]] = shufflevector <2 x i32> [[TMP177]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP179:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_3]], i32 0 +; CHECK-NEXT: [[TMP180:%.*]] = shufflevector <2 x i32> [[TMP179]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP199:%.*]] = add <2 x i32> [[TMP178]], [[TMP180]] +; CHECK-NEXT: [[TMP200:%.*]] = sub <2 x i32> [[TMP178]], [[TMP180]] +; CHECK-NEXT: [[TMP201:%.*]] = shufflevector <2 x i32> [[TMP199]], <2 x i32> [[TMP200]], <2 x i32> +; CHECK-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[TMP204]] +; CHECK-NEXT: [[SUB106_2:%.*]] = sub i32 [[TMP204]], [[ADD112_1]] +; CHECK-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD113_1]] +; CHECK-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] +; CHECK-NEXT: [[TMP208:%.*]] = add <2 x i32> [[TMP165]], [[TMP201]] +; CHECK-NEXT: [[TMP209:%.*]] = xor <2 x i32> [[TMP208]], [[TMP131]] +; CHECK-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[TMP120]], 15 +; CHECK-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 +; CHECK-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 +; CHECK-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] +; CHECK-NEXT: [[XOR_I63_4:%.*]] = xor i32 [[ADD_I62_2]], [[TMP120]] +; CHECK-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_2]] +; CHECK-NEXT: [[TMP211:%.*]] = extractelement <2 x i32> [[TMP209]], i32 0 +; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP211]] +; CHECK-NEXT: [[TMP212:%.*]] = extractelement <2 x i32> [[TMP209]], i32 1 +; CHECK-NEXT: [[ADD112_4:%.*]] = add i32 [[ADD110_2]], [[TMP212]] +; CHECK-NEXT: [[ADD113_4:%.*]] = add i32 [[ADD112_4]], [[XOR_I63_4]] +; CHECK-NEXT: [[ADD78_4:%.*]] = add i32 [[SUB59_2]], [[SUB60]] +; CHECK-NEXT: [[SUB86_4:%.*]] = sub i32 [[SUB60]], [[SUB59_2]] +; CHECK-NEXT: [[TMP213:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_4]], i32 0 +; CHECK-NEXT: [[TMP214:%.*]] = shufflevector <2 x i32> [[TMP213]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP215:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_4]], i32 0 +; CHECK-NEXT: [[TMP216:%.*]] = shufflevector <2 x i32> [[TMP215]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP217:%.*]] = add <2 x i32> [[TMP214]], [[TMP216]] +; CHECK-NEXT: [[TMP218:%.*]] = sub <2 x i32> [[TMP214]], [[TMP216]] +; CHECK-NEXT: [[TMP219:%.*]] = shufflevector <2 x i32> [[TMP217]], <2 x i32> [[TMP218]], <2 x i32> +; CHECK-NEXT: [[ADD105_4:%.*]] = add i32 [[SUB102_3]], [[SUB86_4]] +; CHECK-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_4]], [[SUB102_3]] +; CHECK-NEXT: [[ADD_I52_4:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_4]] +; CHECK-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_4]], [[CONV1]] +; CHECK-NEXT: [[TMP185:%.*]] = lshr <2 x i32> [[TMP102]], splat (i32 15) +; CHECK-NEXT: [[TMP193:%.*]] = and <2 x i32> [[TMP185]], splat (i32 65537) +; CHECK-NEXT: [[TMP186:%.*]] = mul <2 x i32> [[TMP193]], splat (i32 65535) +; CHECK-NEXT: [[TMP187:%.*]] = add <2 x i32> [[TMP186]], [[TMP219]] +; CHECK-NEXT: [[TMP188:%.*]] = xor <2 x i32> [[TMP187]], [[TMP102]] +; CHECK-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 +; CHECK-NEXT: [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537 +; CHECK-NEXT: [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535 +; CHECK-NEXT: [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]] +; CHECK-NEXT: [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]] +; CHECK-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_4]] +; CHECK-NEXT: [[TMP189:%.*]] = extractelement <2 x i32> [[TMP188]], i32 0 +; CHECK-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP189]] +; CHECK-NEXT: [[TMP190:%.*]] = extractelement <2 x i32> [[TMP188]], i32 1 +; CHECK-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP190]] +; CHECK-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]] ; CHECK-NEXT: ret i32 [[ADD113_3]] ; ; THR15-LABEL: define i32 @test( ; THR15-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] { ; THR15-NEXT: entry: +; THR15-NEXT: [[TMP0:%.*]] = load i8, ptr [[PIX1]], align 1 +; THR15-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32 ; THR15-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 4 ; THR15-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[PIX2]], i64 4 +; THR15-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[PIX1]], i64 1 +; THR15-NEXT: [[ARRAYIDX32:%.*]] = getelementptr i8, ptr [[PIX1]], i64 3 +; THR15-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1 +; THR15-NEXT: [[CONV33:%.*]] = zext i8 [[TMP1]] to i32 ; THR15-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]] ; THR15-NEXT: [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]] +; THR15-NEXT: [[TMP2:%.*]] = load i8, ptr [[ADD_PTR3]], align 1 +; THR15-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP2]] to i32 ; THR15-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 4 ; THR15-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 4 +; THR15-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 1 +; THR15-NEXT: [[ARRAYIDX27_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 3 +; THR15-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX27_1]], align 1 +; THR15-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP5]] to i32 ; THR15-NEXT: [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; THR15-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] ; THR15-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4 ; THR15-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4 +; THR15-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 1 +; THR15-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 +; THR15-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 +; THR15-NEXT: [[TMP6:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 +; THR15-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP20:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32> +; THR15-NEXT: [[TMP87:%.*]] = zext i8 [[TMP6]] to i32 +; THR15-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 +; THR15-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP22:%.*]] = zext <2 x i8> [[TMP21]] to <2 x i32> +; THR15-NEXT: [[TMP23:%.*]] = sub <2 x i32> [[TMP20]], [[TMP22]] +; THR15-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 +; THR15-NEXT: [[TMP24:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP25:%.*]] = zext <2 x i8> [[TMP24]] to <2 x i32> +; THR15-NEXT: [[TMP16:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 +; THR15-NEXT: [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP27:%.*]] = zext <2 x i8> [[TMP26]] to <2 x i32> +; THR15-NEXT: [[TMP28:%.*]] = sub <2 x i32> [[TMP25]], [[TMP27]] +; THR15-NEXT: [[TMP29:%.*]] = shl <2 x i32> [[TMP28]], splat (i32 16) +; THR15-NEXT: [[TMP59:%.*]] = add <2 x i32> [[TMP29]], [[TMP23]] +; THR15-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP32:%.*]] = zext <2 x i8> [[TMP31]] to <2 x i32> +; THR15-NEXT: [[TMP86:%.*]] = zext i8 [[TMP7]] to i32 +; THR15-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP34:%.*]] = zext <2 x i8> [[TMP33]] to <2 x i32> +; THR15-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP32]], [[TMP34]] +; THR15-NEXT: [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP37:%.*]] = zext <2 x i8> [[TMP36]] to <2 x i32> +; THR15-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP39:%.*]] = zext <2 x i8> [[TMP38]] to <2 x i32> +; THR15-NEXT: [[TMP40:%.*]] = sub <2 x i32> [[TMP37]], [[TMP39]] +; THR15-NEXT: [[TMP41:%.*]] = shl <2 x i32> [[TMP40]], splat (i32 16) +; THR15-NEXT: [[TMP76:%.*]] = add <2 x i32> [[TMP41]], [[TMP35]] +; THR15-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP76]], [[TMP59]] +; THR15-NEXT: [[TMP42:%.*]] = sub <2 x i32> [[TMP59]], [[TMP76]] +; THR15-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP30]], i32 0 +; THR15-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP30]], i32 1 +; THR15-NEXT: [[ADD44_2:%.*]] = add i32 [[TMP44]], [[TMP43]] +; THR15-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[TMP42]], i32 0 +; THR15-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP42]], i32 1 +; THR15-NEXT: [[ADD46_2:%.*]] = add i32 [[TMP46]], [[TMP45]] ; THR15-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4 +; THR15-NEXT: [[TMP47:%.*]] = load <2 x i8>, ptr null, align 1 ; THR15-NEXT: [[TMP48:%.*]] = load i8, ptr null, align 1 -; THR15-NEXT: [[TMP1:%.*]] = load i8, ptr null, align 1 -; THR15-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1 +; THR15-NEXT: [[TMP49:%.*]] = zext <2 x i8> [[TMP47]] to <2 x i32> +; THR15-NEXT: [[CONV_3:%.*]] = zext i8 [[TMP48]] to i32 +; THR15-NEXT: [[TMP50:%.*]] = load <2 x i8>, ptr null, align 1 +; THR15-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP50]] to <2 x i32> +; THR15-NEXT: [[TMP52:%.*]] = sub <2 x i32> [[TMP49]], [[TMP51]] +; THR15-NEXT: [[TMP53:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) +; THR15-NEXT: [[TMP54:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> +; THR15-NEXT: [[TMP77:%.*]] = shufflevector <2 x i32> [[TMP54]], <2 x i32> poison, <2 x i32> +; THR15-NEXT: [[TMP55:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 +; THR15-NEXT: [[TMP56:%.*]] = zext <2 x i8> [[TMP55]] to <2 x i32> +; THR15-NEXT: [[TMP57:%.*]] = sub <2 x i32> [[TMP77]], [[TMP56]] +; THR15-NEXT: [[TMP58:%.*]] = shl <2 x i32> [[TMP57]], splat (i32 16) +; THR15-NEXT: [[TMP72:%.*]] = add <2 x i32> [[TMP58]], [[TMP52]] +; THR15-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 +; THR15-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 +; THR15-NEXT: [[ARRAYIDX27_3:%.*]] = getelementptr i8, ptr null, i64 6 +; THR15-NEXT: [[TMP60:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_3]], align 1 +; THR15-NEXT: [[TMP61:%.*]] = zext <2 x i8> [[TMP60]] to <2 x i32> +; THR15-NEXT: [[TMP62:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 +; THR15-NEXT: [[TMP63:%.*]] = zext <2 x i8> [[TMP62]] to <2 x i32> +; THR15-NEXT: [[TMP64:%.*]] = sub <2 x i32> [[TMP61]], [[TMP63]] +; THR15-NEXT: [[TMP65:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) +; THR15-NEXT: [[TMP66:%.*]] = zext <2 x i8> [[TMP65]] to <2 x i32> +; THR15-NEXT: [[TMP67:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 +; THR15-NEXT: [[TMP68:%.*]] = zext <2 x i8> [[TMP67]] to <2 x i32> +; THR15-NEXT: [[TMP69:%.*]] = sub <2 x i32> [[TMP66]], [[TMP68]] +; THR15-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], splat (i32 16) +; THR15-NEXT: [[TMP73:%.*]] = add <2 x i32> [[TMP70]], [[TMP64]] +; THR15-NEXT: [[TMP74:%.*]] = extractelement <2 x i32> [[TMP72]], i32 0 +; THR15-NEXT: [[TMP75:%.*]] = extractelement <2 x i32> [[TMP72]], i32 1 +; THR15-NEXT: [[ADD48_3:%.*]] = add i32 [[TMP75]], [[TMP74]] +; THR15-NEXT: [[SUB45_3:%.*]] = sub i32 [[TMP74]], [[TMP75]] +; THR15-NEXT: [[TMP80:%.*]] = extractelement <2 x i32> [[TMP73]], i32 0 +; THR15-NEXT: [[TMP81:%.*]] = extractelement <2 x i32> [[TMP73]], i32 1 +; THR15-NEXT: [[ADD55_3:%.*]] = add i32 [[TMP81]], [[TMP80]] +; THR15-NEXT: [[SUB47_3:%.*]] = sub i32 [[TMP80]], [[TMP81]] +; THR15-NEXT: [[ADD48_4:%.*]] = add i32 [[ADD55_3]], [[ADD48_3]] +; THR15-NEXT: [[TMP78:%.*]] = shufflevector <2 x i32> [[TMP30]], <2 x i32> poison, <2 x i32> +; THR15-NEXT: [[TMP71:%.*]] = insertelement <2 x i32> [[TMP78]], i32 [[ADD48_3]], i32 0 +; THR15-NEXT: [[TMP83:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[ADD55_3]], i32 0 +; THR15-NEXT: [[TMP79:%.*]] = sub <2 x i32> [[TMP71]], [[TMP83]] +; THR15-NEXT: [[ADD55_4:%.*]] = add i32 [[SUB47_3]], [[SUB45_3]] +; THR15-NEXT: [[TMP137:%.*]] = shufflevector <2 x i32> [[TMP42]], <2 x i32> poison, <2 x i32> +; THR15-NEXT: [[TMP82:%.*]] = insertelement <2 x i32> [[TMP137]], i32 [[SUB45_3]], i32 0 +; THR15-NEXT: [[TMP84:%.*]] = insertelement <2 x i32> [[TMP42]], i32 [[SUB47_3]], i32 0 +; THR15-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP82]], [[TMP84]] +; THR15-NEXT: [[ADD94:%.*]] = add i32 [[ADD48_4]], [[ADD44_2]] +; THR15-NEXT: [[SUB102:%.*]] = sub i32 [[ADD44_2]], [[ADD48_4]] +; THR15-NEXT: [[SHR_I:%.*]] = lshr i32 [[CONV_3]], 15 +; THR15-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537 +; THR15-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535 +; THR15-NEXT: [[SHR_I49:%.*]] = lshr i32 [[TMP44]], 15 +; THR15-NEXT: [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537 +; THR15-NEXT: [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535 +; THR15-NEXT: [[ADD94_1:%.*]] = add i32 [[ADD55_4]], [[ADD46_2]] +; THR15-NEXT: [[SUB102_1:%.*]] = sub i32 [[ADD46_2]], [[ADD55_4]] +; THR15-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP86]], 15 +; THR15-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537 +; THR15-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535 +; THR15-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[TMP87]], 15 +; THR15-NEXT: [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537 +; THR15-NEXT: [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535 +; THR15-NEXT: [[TMP88:%.*]] = extractelement <2 x i32> [[TMP79]], i32 0 +; THR15-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP79]], i32 1 +; THR15-NEXT: [[ADD94_2:%.*]] = add i32 [[TMP88]], [[TMP89]] +; THR15-NEXT: [[SUB102_2:%.*]] = sub i32 [[TMP89]], [[TMP88]] +; THR15-NEXT: [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15 +; THR15-NEXT: [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537 +; THR15-NEXT: [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535 +; THR15-NEXT: [[TMP90:%.*]] = extractelement <2 x i32> [[TMP85]], i32 0 +; THR15-NEXT: [[TMP91:%.*]] = extractelement <2 x i32> [[TMP85]], i32 1 +; THR15-NEXT: [[ADD94_3:%.*]] = add i32 [[TMP90]], [[TMP91]] +; THR15-NEXT: [[SUB102_3:%.*]] = sub i32 [[TMP91]], [[TMP90]] +; THR15-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[CONV]], 15 +; THR15-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 +; THR15-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 +; THR15-NEXT: [[TMP92:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1 +; THR15-NEXT: [[TMP93:%.*]] = zext <2 x i8> [[TMP92]] to <2 x i32> ; THR15-NEXT: [[TMP143:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 +; THR15-NEXT: [[TMP94:%.*]] = shufflevector <4 x i8> [[TMP143]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP95:%.*]] = zext <2 x i8> [[TMP94]] to <2 x i32> ; THR15-NEXT: [[TMP146:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 +; THR15-NEXT: [[TMP96:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP97:%.*]] = zext <2 x i8> [[TMP96]] to <2 x i32> ; THR15-NEXT: [[TMP147:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; THR15-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1 +; THR15-NEXT: [[TMP98:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP99:%.*]] = zext <2 x i8> [[TMP98]] to <2 x i32> +; THR15-NEXT: [[TMP100:%.*]] = sub <2 x i32> [[TMP97]], [[TMP99]] +; THR15-NEXT: [[TMP101:%.*]] = shl <2 x i32> [[TMP100]], splat (i32 16) +; THR15-NEXT: [[TMP102:%.*]] = shufflevector <4 x i8> [[TMP143]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP103:%.*]] = zext <2 x i8> [[TMP102]] to <2 x i32> +; THR15-NEXT: [[TMP104:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP105:%.*]] = zext <2 x i8> [[TMP104]] to <2 x i32> +; THR15-NEXT: [[TMP106:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP107:%.*]] = zext <2 x i8> [[TMP106]] to <2 x i32> +; THR15-NEXT: [[TMP108:%.*]] = sub <2 x i32> [[TMP105]], [[TMP107]] +; THR15-NEXT: [[TMP109:%.*]] = shl <2 x i32> [[TMP108]], splat (i32 16) +; THR15-NEXT: [[TMP110:%.*]] = insertelement <2 x i32> [[TMP93]], i32 [[CONV33]], i32 1 +; THR15-NEXT: [[TMP111:%.*]] = sub <2 x i32> [[TMP110]], [[TMP103]] +; THR15-NEXT: [[TMP112:%.*]] = add <2 x i32> [[TMP109]], [[TMP111]] +; THR15-NEXT: [[TMP113:%.*]] = insertelement <2 x i32> [[TMP93]], i32 [[CONV]], i32 0 +; THR15-NEXT: [[TMP114:%.*]] = sub <2 x i32> [[TMP113]], [[TMP95]] +; THR15-NEXT: [[TMP115:%.*]] = add <2 x i32> [[TMP101]], [[TMP114]] +; THR15-NEXT: [[TMP116:%.*]] = shufflevector <2 x i32> [[TMP112]], <2 x i32> [[TMP115]], <2 x i32> +; THR15-NEXT: [[TMP117:%.*]] = add <2 x i32> [[TMP112]], [[TMP115]] +; THR15-NEXT: [[TMP118:%.*]] = sub <2 x i32> [[TMP115]], [[TMP112]] +; THR15-NEXT: [[TMP119:%.*]] = extractelement <2 x i32> [[TMP117]], i32 0 +; THR15-NEXT: [[TMP120:%.*]] = extractelement <2 x i32> [[TMP117]], i32 1 +; THR15-NEXT: [[ADD48:%.*]] = add i32 [[TMP120]], [[TMP119]] +; THR15-NEXT: [[SUB51:%.*]] = sub i32 [[TMP119]], [[TMP120]] +; THR15-NEXT: [[TMP121:%.*]] = extractelement <2 x i32> [[TMP118]], i32 0 +; THR15-NEXT: [[TMP122:%.*]] = extractelement <2 x i32> [[TMP118]], i32 1 +; THR15-NEXT: [[ADD55:%.*]] = add i32 [[TMP122]], [[TMP121]] +; THR15-NEXT: [[SUB59:%.*]] = sub i32 [[TMP121]], [[TMP122]] +; THR15-NEXT: [[SHR_I59:%.*]] = lshr i32 [[TMP120]], 15 +; THR15-NEXT: [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537 +; THR15-NEXT: [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535 +; THR15-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[TMP122]], 15 +; THR15-NEXT: [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537 +; THR15-NEXT: [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535 +; THR15-NEXT: [[TMP123:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 +; THR15-NEXT: [[TMP124:%.*]] = zext <2 x i8> [[TMP123]] to <2 x i32> ; THR15-NEXT: [[TMP148:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 +; THR15-NEXT: [[TMP125:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP126:%.*]] = zext <2 x i8> [[TMP125]] to <2 x i32> ; THR15-NEXT: [[TMP152:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 +; THR15-NEXT: [[TMP127:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP128:%.*]] = zext <2 x i8> [[TMP127]] to <2 x i32> ; THR15-NEXT: [[TMP153:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; THR15-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 -; THR15-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 -; THR15-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 -; THR15-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 -; THR15-NEXT: [[TMP14:%.*]] = load <4 x i8>, ptr null, align 1 -; THR15-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP10]], i64 0) -; THR15-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP15]], <4 x i8> [[TMP14]], i64 4) -; THR15-NEXT: [[TMP17:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP16]], <4 x i8> [[TMP2]], i64 8) -; THR15-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP17]], <4 x i8> [[TMP6]], i64 12) -; THR15-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32> -; THR15-NEXT: [[TMP20:%.*]] = load <4 x i8>, ptr null, align 1 -; THR15-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP11]], i64 0) -; THR15-NEXT: [[TMP22:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP21]], <4 x i8> [[TMP20]], i64 4) -; THR15-NEXT: [[TMP23:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP22]], <4 x i8> [[TMP143]], i64 8) -; THR15-NEXT: [[TMP24:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP23]], <4 x i8> [[TMP148]], i64 12) -; THR15-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32> -; THR15-NEXT: [[TMP26:%.*]] = sub <16 x i32> [[TMP19]], [[TMP25]] -; THR15-NEXT: [[TMP27:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> poison, <16 x i32> -; THR15-NEXT: [[TMP28:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) -; THR15-NEXT: [[TMP29:%.*]] = shufflevector <2 x i8> [[TMP28]], <2 x i8> poison, <4 x i32> -; THR15-NEXT: [[TMP30:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP29]], <16 x i32> -; THR15-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <16 x i32> -; THR15-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> -; THR15-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <16 x i32> -; THR15-NEXT: [[TMP34:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> [[TMP33]], <16 x i32> -; THR15-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP34]], i8 [[TMP1]], i32 5 -; THR15-NEXT: [[TMP36:%.*]] = insertelement <16 x i8> [[TMP35]], i8 [[TMP48]], i32 9 -; THR15-NEXT: [[TMP37:%.*]] = zext <16 x i8> [[TMP36]] to <16 x i32> -; THR15-NEXT: [[TMP38:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; THR15-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP13]], i64 0) -; THR15-NEXT: [[TMP40:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP39]], <4 x i8> [[TMP38]], i64 4) -; THR15-NEXT: [[TMP41:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP40]], <4 x i8> [[TMP147]], i64 8) -; THR15-NEXT: [[TMP42:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP41]], <4 x i8> [[TMP153]], i64 12) -; THR15-NEXT: [[TMP43:%.*]] = zext <16 x i8> [[TMP42]] to <16 x i32> -; THR15-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> -; THR15-NEXT: [[TMP45:%.*]] = sub <16 x i32> [[TMP37]], [[TMP44]] -; THR15-NEXT: [[TMP46:%.*]] = shl <16 x i32> [[TMP45]], splat (i32 16) -; THR15-NEXT: [[TMP47:%.*]] = add <16 x i32> [[TMP46]], [[TMP27]] -; THR15-NEXT: [[TMP70:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> -; THR15-NEXT: [[TMP49:%.*]] = add <16 x i32> [[TMP47]], [[TMP70]] -; THR15-NEXT: [[TMP50:%.*]] = sub <16 x i32> [[TMP47]], [[TMP70]] -; THR15-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> -; THR15-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> -; THR15-NEXT: [[TMP53:%.*]] = add <16 x i32> [[TMP51]], [[TMP52]] -; THR15-NEXT: [[TMP54:%.*]] = sub <16 x i32> [[TMP51]], [[TMP52]] -; THR15-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> -; THR15-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> -; THR15-NEXT: [[TMP57:%.*]] = sub <16 x i32> [[TMP55]], [[TMP56]] -; THR15-NEXT: [[TMP58:%.*]] = add <16 x i32> [[TMP55]], [[TMP56]] -; THR15-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> -; THR15-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> -; THR15-NEXT: [[TMP61:%.*]] = add <16 x i32> [[TMP59]], [[TMP60]] -; THR15-NEXT: [[TMP62:%.*]] = sub <16 x i32> [[TMP59]], [[TMP60]] -; THR15-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> -; THR15-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP19]], <16 x i32> -; THR15-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15) -; THR15-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537) -; THR15-NEXT: [[TMP67:%.*]] = mul <16 x i32> [[TMP66]], splat (i32 65535) -; THR15-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP63]] -; THR15-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP64]] -; THR15-NEXT: [[ADD113_3:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) +; THR15-NEXT: [[TMP129:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP130:%.*]] = zext <2 x i8> [[TMP129]] to <2 x i32> +; THR15-NEXT: [[TMP131:%.*]] = sub <2 x i32> [[TMP128]], [[TMP130]] +; THR15-NEXT: [[TMP132:%.*]] = shl <2 x i32> [[TMP131]], splat (i32 16) +; THR15-NEXT: [[TMP138:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP139:%.*]] = zext <2 x i8> [[TMP138]] to <2 x i32> +; THR15-NEXT: [[TMP154:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP155:%.*]] = zext <2 x i8> [[TMP154]] to <2 x i32> +; THR15-NEXT: [[TMP133:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> +; THR15-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP133]] to <2 x i32> +; THR15-NEXT: [[TMP135:%.*]] = sub <2 x i32> [[TMP155]], [[TMP134]] +; THR15-NEXT: [[TMP170:%.*]] = shl <2 x i32> [[TMP135]], splat (i32 16) +; THR15-NEXT: [[TMP140:%.*]] = insertelement <2 x i32> [[TMP124]], i32 [[CONV33_1]], i32 1 +; THR15-NEXT: [[TMP141:%.*]] = sub <2 x i32> [[TMP140]], [[TMP139]] +; THR15-NEXT: [[TMP171:%.*]] = add <2 x i32> [[TMP170]], [[TMP141]] +; THR15-NEXT: [[TMP186:%.*]] = insertelement <2 x i32> [[TMP124]], i32 [[CONV_1]], i32 0 +; THR15-NEXT: [[TMP187:%.*]] = sub <2 x i32> [[TMP186]], [[TMP126]] +; THR15-NEXT: [[TMP142:%.*]] = add <2 x i32> [[TMP132]], [[TMP187]] +; THR15-NEXT: [[TMP136:%.*]] = add <2 x i32> [[TMP171]], [[TMP142]] +; THR15-NEXT: [[TMP149:%.*]] = sub <2 x i32> [[TMP142]], [[TMP171]] +; THR15-NEXT: [[TMP144:%.*]] = extractelement <2 x i32> [[TMP136]], i32 0 +; THR15-NEXT: [[TMP145:%.*]] = extractelement <2 x i32> [[TMP136]], i32 1 +; THR15-NEXT: [[ADD48_2:%.*]] = add i32 [[TMP145]], [[TMP144]] +; THR15-NEXT: [[SUB45_1:%.*]] = sub i32 [[TMP144]], [[TMP145]] +; THR15-NEXT: [[TMP150:%.*]] = extractelement <2 x i32> [[TMP149]], i32 0 +; THR15-NEXT: [[TMP151:%.*]] = extractelement <2 x i32> [[TMP149]], i32 1 +; THR15-NEXT: [[ADD48_1:%.*]] = add i32 [[TMP151]], [[TMP150]] +; THR15-NEXT: [[SUB51_1:%.*]] = sub i32 [[TMP150]], [[TMP151]] +; THR15-NEXT: [[SHR_I54:%.*]] = lshr i32 [[TMP145]], 15 +; THR15-NEXT: [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537 +; THR15-NEXT: [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535 +; THR15-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[TMP151]], 15 +; THR15-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 +; THR15-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 +; THR15-NEXT: [[TMP156:%.*]] = lshr <2 x i32> [[TMP124]], splat (i32 15) +; THR15-NEXT: [[TMP157:%.*]] = and <2 x i32> [[TMP156]], splat (i32 65537) +; THR15-NEXT: [[TMP158:%.*]] = mul <2 x i32> [[TMP157]], splat (i32 65535) +; THR15-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_2]], [[ADD48]] +; THR15-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_2]] +; THR15-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] +; THR15-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD94]] +; THR15-NEXT: [[ADD105:%.*]] = add i32 [[SUB102]], [[SUB86]] +; THR15-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB102]] +; THR15-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]] +; THR15-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[CONV_3]] +; THR15-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51]], [[ADD105]] +; THR15-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[TMP44]] +; THR15-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]] +; THR15-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP145]] +; THR15-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]] +; THR15-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP120]] +; THR15-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] +; THR15-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] +; THR15-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I63]] +; THR15-NEXT: [[ADD78_1:%.*]] = add i32 [[ADD48_1]], [[ADD55]] +; THR15-NEXT: [[SUB86_1:%.*]] = sub i32 [[ADD55]], [[ADD48_1]] +; THR15-NEXT: [[ADD103_1:%.*]] = add i32 [[ADD94_1]], [[ADD78_1]] +; THR15-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]] +; THR15-NEXT: [[ADD105_1:%.*]] = add i32 [[SUB102_1]], [[SUB86_1]] +; THR15-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]] +; THR15-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]] +; THR15-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[TMP86]] +; THR15-NEXT: [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_1]], [[ADD105_1]] +; THR15-NEXT: [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[TMP87]] +; THR15-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]] +; THR15-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[TMP151]] +; THR15-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]] +; THR15-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP122]] +; THR15-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD113]] +; THR15-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]] +; THR15-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]] +; THR15-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I63_1]] +; THR15-NEXT: [[ADD78_2:%.*]] = add i32 [[SUB45_1]], [[SUB51]] +; THR15-NEXT: [[SUB86_2:%.*]] = sub i32 [[SUB51]], [[SUB45_1]] +; THR15-NEXT: [[TMP159:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_2]], i32 0 +; THR15-NEXT: [[TMP160:%.*]] = shufflevector <2 x i32> [[TMP159]], <2 x i32> poison, <2 x i32> zeroinitializer +; THR15-NEXT: [[TMP161:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_2]], i32 0 +; THR15-NEXT: [[TMP162:%.*]] = shufflevector <2 x i32> [[TMP161]], <2 x i32> poison, <2 x i32> zeroinitializer +; THR15-NEXT: [[TMP163:%.*]] = add <2 x i32> [[TMP160]], [[TMP162]] +; THR15-NEXT: [[TMP164:%.*]] = sub <2 x i32> [[TMP160]], [[TMP162]] +; THR15-NEXT: [[TMP165:%.*]] = shufflevector <2 x i32> [[TMP163]], <2 x i32> [[TMP164]], <2 x i32> +; THR15-NEXT: [[ADD105_2:%.*]] = add i32 [[SUB102_2]], [[SUB86_2]] +; THR15-NEXT: [[SUB106_2:%.*]] = sub i32 [[SUB86_2]], [[SUB102_2]] +; THR15-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD105_2]] +; THR15-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] +; THR15-NEXT: [[TMP166:%.*]] = add <2 x i32> [[TMP158]], [[TMP165]] +; THR15-NEXT: [[TMP167:%.*]] = xor <2 x i32> [[TMP166]], [[TMP124]] +; THR15-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[TMP119]], 15 +; THR15-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 +; THR15-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 +; THR15-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] +; THR15-NEXT: [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[TMP119]] +; THR15-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_1]] +; THR15-NEXT: [[TMP168:%.*]] = extractelement <2 x i32> [[TMP167]], i32 0 +; THR15-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP168]] +; THR15-NEXT: [[TMP169:%.*]] = extractelement <2 x i32> [[TMP167]], i32 1 +; THR15-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[TMP169]] +; THR15-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I63_2]] +; THR15-NEXT: [[ADD78_3:%.*]] = add i32 [[SUB51_1]], [[SUB59]] +; THR15-NEXT: [[SUB86_3:%.*]] = sub i32 [[SUB59]], [[SUB51_1]] +; THR15-NEXT: [[TMP172:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_3]], i32 0 +; THR15-NEXT: [[TMP173:%.*]] = shufflevector <2 x i32> [[TMP172]], <2 x i32> poison, <2 x i32> zeroinitializer +; THR15-NEXT: [[TMP174:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_3]], i32 0 +; THR15-NEXT: [[TMP175:%.*]] = shufflevector <2 x i32> [[TMP174]], <2 x i32> poison, <2 x i32> zeroinitializer +; THR15-NEXT: [[TMP176:%.*]] = add <2 x i32> [[TMP173]], [[TMP175]] +; THR15-NEXT: [[TMP177:%.*]] = sub <2 x i32> [[TMP173]], [[TMP175]] +; THR15-NEXT: [[TMP178:%.*]] = shufflevector <2 x i32> [[TMP176]], <2 x i32> [[TMP177]], <2 x i32> +; THR15-NEXT: [[ADD105_3:%.*]] = add i32 [[SUB102_3]], [[SUB86_3]] +; THR15-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]] +; THR15-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_3]] +; THR15-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV]] +; THR15-NEXT: [[TMP179:%.*]] = lshr <2 x i32> [[TMP93]], splat (i32 15) +; THR15-NEXT: [[TMP180:%.*]] = and <2 x i32> [[TMP179]], splat (i32 65537) +; THR15-NEXT: [[TMP181:%.*]] = mul <2 x i32> [[TMP180]], splat (i32 65535) +; THR15-NEXT: [[TMP182:%.*]] = add <2 x i32> [[TMP181]], [[TMP178]] +; THR15-NEXT: [[TMP183:%.*]] = xor <2 x i32> [[TMP182]], [[TMP93]] +; THR15-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 +; THR15-NEXT: [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537 +; THR15-NEXT: [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535 +; THR15-NEXT: [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]] +; THR15-NEXT: [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]] +; THR15-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_2]] +; THR15-NEXT: [[TMP184:%.*]] = extractelement <2 x i32> [[TMP183]], i32 0 +; THR15-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP184]] +; THR15-NEXT: [[TMP185:%.*]] = extractelement <2 x i32> [[TMP183]], i32 1 +; THR15-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP185]] +; THR15-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]] ; THR15-NEXT: ret i32 [[ADD113_3]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index 7723746dda301..5b0f4a69de4c3 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -342,8 +342,8 @@ define void @reduce_or_2() { ; ZVFHMIN-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 6 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer ; ZVFHMIN-NEXT: [[RDX_OP:%.*]] = or <16 x i1> [[TMP3]], [[TMP5]] -; ZVFHMIN-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) -; ZVFHMIN-NEXT: br i1 [[TMP6]], label [[TMP8:%.*]], label [[TMP7:%.*]] +; ZVFHMIN-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) +; ZVFHMIN-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] ; ZVFHMIN: 7: ; ZVFHMIN-NEXT: ret void ; ZVFHMIN: 8: @@ -356,8 +356,8 @@ define void @reduce_or_2() { ; ZVL128-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 6 ; ZVL128-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer ; ZVL128-NEXT: [[RDX_OP:%.*]] = or <16 x i1> [[TMP3]], [[TMP5]] -; ZVL128-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) -; ZVL128-NEXT: br i1 [[TMP6]], label [[TMP8:%.*]], label [[TMP7:%.*]] +; ZVL128-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) +; ZVL128-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] ; ZVL128: 7: ; ZVL128-NEXT: ret void ; ZVL128: 8: @@ -365,15 +365,17 @@ define void @reduce_or_2() { ; ; ZVL256-LABEL: @reduce_or_2( ; ZVL256-NEXT: [[TMP1:%.*]] = shl i64 0, 0 -; ZVL256-NEXT: [[TMP2:%.*]] = insertelement <32 x i64> , i64 [[TMP1]], i32 15 -; ZVL256-NEXT: [[TMP3:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> -; ZVL256-NEXT: [[TMP4:%.*]] = icmp ult <32 x i64> [[TMP3]], zeroinitializer -; ZVL256-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP4]]) -; ZVL256-NEXT: br i1 [[TMP5]], label [[TMP7:%.*]], label [[TMP6:%.*]] -; ZVL256: 6: -; ZVL256-NEXT: ret void +; ZVL256-NEXT: [[TMP2:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 15 +; ZVL256-NEXT: [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer +; ZVL256-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 6 +; ZVL256-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer +; ZVL256-NEXT: [[RDX_OP:%.*]] = or <16 x i1> [[TMP3]], [[TMP5]] +; ZVL256-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) +; ZVL256-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] ; ZVL256: 7: ; ZVL256-NEXT: ret void +; ZVL256: 8: +; ZVL256-NEXT: ret void ; ; ZVL512-LABEL: @reduce_or_2( ; ZVL512-NEXT: [[TMP1:%.*]] = shl i64 0, 0 From a6fe5ec5aaa0e0621013e4c0217e66f588f3a232 Mon Sep 17 00:00:00 2001 From: Madhur Amilkanthwar Date: Fri, 28 Feb 2025 11:43:45 +0530 Subject: [PATCH 056/123] [GVN/PRE] Remove triple from GVN/PRE tests (#129073) The tests in GVN/PRE need not to depend on target triple. Removing the triple dependence from all the tests in this directory. --- llvm/test/Transforms/GVN/PRE/2009-06-17-InvalidPRE.ll | 1 - .../Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll | 1 - llvm/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll | 1 - llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll | 1 - .../GVN/PRE/2018-06-08-pre-load-dbgloc-no-null-opt.ll | 1 - llvm/test/Transforms/GVN/PRE/atomic.ll | 3 +-- llvm/test/Transforms/GVN/PRE/load-pre-licm.ll | 1 - llvm/test/Transforms/GVN/PRE/lpre-call-wrap-2.ll | 1 - llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll | 1 - llvm/test/Transforms/GVN/PRE/nonintegral.ll | 1 - llvm/test/Transforms/GVN/PRE/pre-gep-load.ll | 1 - llvm/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll | 1 - llvm/test/Transforms/GVN/PRE/rle-phi-translate.ll | 3 +-- 13 files changed, 2 insertions(+), 15 deletions(-) diff --git a/llvm/test/Transforms/GVN/PRE/2009-06-17-InvalidPRE.ll b/llvm/test/Transforms/GVN/PRE/2009-06-17-InvalidPRE.ll index 506ad7ce6cd35..95a3b5cbfcd10 100644 --- a/llvm/test/Transforms/GVN/PRE/2009-06-17-InvalidPRE.ll +++ b/llvm/test/Transforms/GVN/PRE/2009-06-17-InvalidPRE.ll @@ -4,7 +4,6 @@ ; This is invalid as it bypasses the check for %m.0.ph==null in bb4. ; ModuleID = 'mbuf.c' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin9.6" %struct.mbuf = type { ptr, ptr, i32, ptr, i16, i16, i32 } define void @m_adj(ptr %mp, i32 %req_len) nounwind optsize { diff --git a/llvm/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll b/llvm/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll index 3f0475dc79ca2..05d505c603c75 100644 --- a/llvm/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll +++ b/llvm/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll @@ -4,7 +4,6 @@ ; rdar://9429882 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-macosx10.7.0" define i1 @rb_intern(ptr %foo) nounwind ssp { ; CHECK-LABEL: @rb_intern( diff --git a/llvm/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll b/llvm/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll index b2b0216ed8f72..92c01002975de 100644 --- a/llvm/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll +++ b/llvm/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll @@ -26,7 +26,6 @@ ; ^ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-gnu" %struct.desc = type { ptr } %struct.node = type { ptr, ptr } diff --git a/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll b/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll index 04de10a5cc1dc..0c172dcfa565e 100644 --- a/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll +++ b/llvm/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll @@ -1,7 +1,6 @@ ; RUN: opt -S -passes=gvn -enable-load-pre < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" %ArrayImpl = type { i64, ptr addrspace(100), [1 x i64], [1 x i64], [1 x i64], i64, i64, ptr addrspace(100), ptr addrspace(100), i8, i64 } diff --git a/llvm/test/Transforms/GVN/PRE/2018-06-08-pre-load-dbgloc-no-null-opt.ll b/llvm/test/Transforms/GVN/PRE/2018-06-08-pre-load-dbgloc-no-null-opt.ll index 2f63ed0016c2b..c238fe880cd5a 100644 --- a/llvm/test/Transforms/GVN/PRE/2018-06-08-pre-load-dbgloc-no-null-opt.ll +++ b/llvm/test/Transforms/GVN/PRE/2018-06-08-pre-load-dbgloc-no-null-opt.ll @@ -26,7 +26,6 @@ ; ^ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-gnu" %struct.desc = type { ptr } %struct.node = type { ptr, ptr } diff --git a/llvm/test/Transforms/GVN/PRE/atomic.ll b/llvm/test/Transforms/GVN/PRE/atomic.ll index e8bf25548ba89..ed530bec22e84 100644 --- a/llvm/test/Transforms/GVN/PRE/atomic.ll +++ b/llvm/test/Transforms/GVN/PRE/atomic.ll @@ -1,8 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -passes=gvn -S < %s | FileCheck %s +; RUN: opt -S -passes=gvn < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-macosx10.7.0" @x = common global i32 0, align 4 @y = common global i32 0, align 4 diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll b/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll index 7028edb4732bd..efbc71b06256d 100644 --- a/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll +++ b/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -passes=gvn < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" -target triple = "i386-apple-darwin11.0.0" @sortlist = external global [5001 x i32], align 4 diff --git a/llvm/test/Transforms/GVN/PRE/lpre-call-wrap-2.ll b/llvm/test/Transforms/GVN/PRE/lpre-call-wrap-2.ll index 177b8a080bb0a..dde5c225e4ff6 100644 --- a/llvm/test/Transforms/GVN/PRE/lpre-call-wrap-2.ll +++ b/llvm/test/Transforms/GVN/PRE/lpre-call-wrap-2.ll @@ -10,7 +10,6 @@ ; outbuf[outcnt] = bi_buf; ; } target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin7" @outcnt = common global i32 0 ; [#uses=3] define void @bi_windup(ptr %outbuf, i8 zeroext %bi_buf) nounwind { diff --git a/llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll b/llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll index 1f6a5c7a11a89..06a7f11aff14b 100644 --- a/llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll +++ b/llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll @@ -15,7 +15,6 @@ ; void testfunction(A& iter) { A const end; while (iter != end) ++iter; } ; target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin7" %struct.A = type { i32, i32 } define void @_Z12testfunctionR1A(ptr %iter) { diff --git a/llvm/test/Transforms/GVN/PRE/nonintegral.ll b/llvm/test/Transforms/GVN/PRE/nonintegral.ll index 240c985b23580..d989e81b8e76c 100644 --- a/llvm/test/Transforms/GVN/PRE/nonintegral.ll +++ b/llvm/test/Transforms/GVN/PRE/nonintegral.ll @@ -2,7 +2,6 @@ ; RUN: opt -passes=gvn -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4" -target triple = "x86_64-unknown-linux-gnu" define void @nipre(ptr noalias %p, ptr noalias %p2, i8 %jmp) { diff --git a/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll b/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll index 6b5211ebc00cc..edadcbead2223 100644 --- a/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll @@ -4,7 +4,6 @@ ; RUN: opt < %s -aa-pipeline=basic-aa -passes="gvn" -enable-load-pre=false -S | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-gnu" define double @foo(i32 %stat, i32 %i, ptr %p) { ; CHECK-LABEL: @foo( diff --git a/llvm/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll b/llvm/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll index 331344b767436..0585781e7985f 100644 --- a/llvm/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll @@ -1,7 +1,6 @@ ; RUN: opt -S -passes=gvn -enable-load-pre < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" ; These tests exercise situations when instructions that were first instructions ; with implicit control flow get removed. We make sure that after that we don't diff --git a/llvm/test/Transforms/GVN/PRE/rle-phi-translate.ll b/llvm/test/Transforms/GVN/PRE/rle-phi-translate.ll index 519e0ca29a971..2d63344fa79b0 100644 --- a/llvm/test/Transforms/GVN/PRE/rle-phi-translate.ll +++ b/llvm/test/Transforms/GVN/PRE/rle-phi-translate.ll @@ -1,7 +1,6 @@ -; RUN: opt < %s -passes=gvn -S | FileCheck %s +; RUN: opt < %s -S -passes=gvn | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin7" define i32 @test1(ptr %b, ptr %c) nounwind { ; CHECK-LABEL: @test1( From 47f63a4fbaffd0be6e67968c3bc189f6a042ce18 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 27 Feb 2025 22:26:07 -0800 Subject: [PATCH 057/123] [PowerPC] Simplify ELFStreamer and XCOFFStreamer --- .../PowerPC/MCTargetDesc/PPCELFStreamer.cpp | 11 ++++++----- .../PowerPC/MCTargetDesc/PPCELFStreamer.h | 8 ++++---- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 18 ------------------ .../PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp | 12 ++++++------ .../PowerPC/MCTargetDesc/PPCXCOFFStreamer.h | 8 ++++---- 5 files changed, 20 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp index b9e03b6cb6d21..addac6f41a715 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp @@ -217,10 +217,11 @@ std::optional llvm::isPartOfGOTToPCRelPair(const MCInst &Inst, return (Inst.getOpcode() == PPC::PLDpc); } -MCELFStreamer *llvm::createPPCELFStreamer( - MCContext &Context, std::unique_ptr MAB, - std::unique_ptr OW, - std::unique_ptr Emitter) { - return new PPCELFStreamer(Context, std::move(MAB), std::move(OW), +MCStreamer * +llvm::createPPCELFStreamer(const Triple &T, MCContext &C, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter) { + return new PPCELFStreamer(C, std::move(MAB), std::move(OW), std::move(Emitter)); } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h index 10204b184a49f..2b803950073f9 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h @@ -50,10 +50,10 @@ class PPCELFStreamer : public MCELFStreamer { std::optional isPartOfGOTToPCRelPair(const MCInst &Inst, const MCSubtargetInfo &STI); -MCELFStreamer *createPPCELFStreamer(MCContext &Context, - std::unique_ptr MAB, - std::unique_ptr OW, - std::unique_ptr Emitter); +MCStreamer *createPPCELFStreamer(const Triple &, MCContext &, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter); } // end namespace llvm #endif // LLVM_LIB_TARGET_PPC_MCELFSTREAMER_PPCELFSTREAMER_H diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 0a0facb10e48a..5dde48fba5605 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -199,24 +199,6 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCStreamer * -createPPCELFStreamer(const Triple &T, MCContext &Context, - std::unique_ptr &&MAB, - std::unique_ptr &&OW, - std::unique_ptr &&Emitter) { - return createPPCELFStreamer(Context, std::move(MAB), std::move(OW), - std::move(Emitter)); -} - -static MCStreamer * -createPPCXCOFFStreamer(const Triple &T, MCContext &Context, - std::unique_ptr &&MAB, - std::unique_ptr &&OW, - std::unique_ptr &&Emitter) { - return createPPCXCOFFStreamer(Context, std::move(MAB), std::move(OW), - std::move(Emitter)); -} - namespace { class PPCTargetAsmStreamer : public PPCTargetStreamer { diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp index 72e3cff615662..2a6da4c097fc1 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp @@ -64,11 +64,11 @@ void PPCXCOFFStreamer::emitInstruction(const MCInst &Inst, emitPrefixedInstruction(Inst, STI); } -MCXCOFFStreamer * -llvm::createPPCXCOFFStreamer(MCContext &Context, - std::unique_ptr MAB, - std::unique_ptr OW, - std::unique_ptr Emitter) { - return new PPCXCOFFStreamer(Context, std::move(MAB), std::move(OW), +MCStreamer * +llvm::createPPCXCOFFStreamer(const Triple &, MCContext &C, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter) { + return new PPCXCOFFStreamer(C, std::move(MAB), std::move(OW), std::move(Emitter)); } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h index 5fa35127b70b4..1e3671c817eb8 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h @@ -29,10 +29,10 @@ class PPCXCOFFStreamer : public MCXCOFFStreamer { void emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI); }; -MCXCOFFStreamer *createPPCXCOFFStreamer(MCContext &Context, - std::unique_ptr MAB, - std::unique_ptr OW, - std::unique_ptr Emitter); +MCStreamer *createPPCXCOFFStreamer(const Triple &, MCContext &, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter); } // end namespace llvm #endif // LLVM_LIB_TARGET_PPC_MCXCOFFSTREAMER_PPCXCOFFSTREAMER_H From 6dea54524462e623c9fe43ed70a2f91d71565e9b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 27 Feb 2025 22:41:35 -0800 Subject: [PATCH 058/123] [AMDGPU] Avoid repeated hash lookups (NFC) (#129189) --- llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index f293b3aba7b79..33018ae9677a3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -314,18 +314,20 @@ RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const { Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS || Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) { unsigned IntrID = cast(MI).getIntrinsicID(); - if (!IRulesAlias.contains(IntrID)) { + auto IRAIt = IRulesAlias.find(IntrID); + if (IRAIt == IRulesAlias.end()) { LLVM_DEBUG(dbgs() << "MI: "; MI.dump();); llvm_unreachable("No rules defined for intrinsic opcode"); } - return IRules.at(IRulesAlias.at(IntrID)); + return IRules.at(IRAIt->second); } - if (!GRulesAlias.contains(Opc)) { + auto GRAIt = GRulesAlias.find(Opc); + if (GRAIt == GRulesAlias.end()) { LLVM_DEBUG(dbgs() << "MI: "; MI.dump();); llvm_unreachable("No rules defined for generic opcode"); } - return GRules.at(GRulesAlias.at(Opc)); + return GRules.at(GRAIt->second); } // Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'. From e1ca5f1308a8721c983c194c3547fdc486a79162 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 27 Feb 2025 22:41:46 -0800 Subject: [PATCH 059/123] [ProfileData] Avoid repeated hash lookups (NFC) (#129194) --- llvm/lib/ProfileData/InstrProfWriter.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index f112ea2efcaa9..18aa76c865bc8 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -230,7 +230,8 @@ void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other, auto Name = Other.Name; auto Hash = Other.Hash; Other.accumulateCounts(FuncLevelOverlap.Test); - if (!FunctionData.contains(Name)) { + auto It = FunctionData.find(Name); + if (It == FunctionData.end()) { Overlap.addOneUnique(FuncLevelOverlap.Test); return; } @@ -238,7 +239,7 @@ void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other, Overlap.Overlap.NumEntries += 1; return; } - auto &ProfileDataMap = FunctionData[Name]; + auto &ProfileDataMap = It->second; bool NewFunc; ProfilingData::iterator Where; std::tie(Where, NewFunc) = From 81529e34cefa821f2948d470dba4238ec4e56f83 Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Fri, 28 Feb 2025 12:16:21 +0530 Subject: [PATCH 060/123] [MLIR][Affine] Fix affine data copy generate for zero-ranked memrefs (#129186) Fix affine data copy generate for zero-ranked memrefs. Fixes: https://github.com/llvm/llvm-project/issues/122210 and https://github.com/llvm/llvm-project/issues/61167 Test cases borrowed from https://reviews.llvm.org/D147298, authored by Lewuathe . Co-authored-by: Kai Sasaki --- mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp | 50 ++++++++------ .../test/Dialect/Affine/affine-data-copy.mlir | 65 +++++++++++++++++++ 2 files changed, 94 insertions(+), 21 deletions(-) diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index 6833d6583c27a..a8c24e1423425 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -1830,14 +1830,14 @@ static void getMultiLevelStrides(const MemRefRegion ®ion, } } -/// Generates a point-wise copy from/to `memref' to/from `fastMemRef' and -/// returns the outermost AffineForOp of the copy loop nest. `lbMaps` and -/// `ubMaps` along with `lbOperands` and `ubOperands` hold the lower and upper -/// bound information for the copy loop nest. `fastBufOffsets` contain the -/// expressions to be subtracted out from the respective copy loop iterators in -/// order to index the fast buffer. If `copyOut' is true, generates a copy-out; -/// otherwise a copy-in. Builder `b` should be set to the point the copy nest is -/// inserted. +/// Generates a point-wise copy from/to a non-zero ranked `memref' to/from +/// `fastMemRef' and returns the outermost AffineForOp of the copy loop nest. +/// `lbMaps` and `ubMaps` along with `lbOperands` and `ubOperands` hold the +/// lower and upper bound information for the copy loop nest. `fastBufOffsets` +/// contain the expressions to be subtracted out from the respective copy loop +/// iterators in order to index the fast buffer. If `copyOut' is true, generates +/// a copy-out; otherwise a copy-in. Builder `b` should be set to the point the +/// copy nest is inserted. // /// The copy-in nest is generated as follows as an example for a 2-d region: /// for x = ... @@ -1858,6 +1858,8 @@ generatePointWiseCopy(Location loc, Value memref, Value fastMemRef, })); unsigned rank = cast(memref.getType()).getRank(); + // A copy nest can't be generated for 0-ranked memrefs. + assert(rank != 0 && "non-zero rank memref expected"); assert(lbMaps.size() == rank && "wrong number of lb maps"); assert(ubMaps.size() == rank && "wrong number of ub maps"); @@ -1921,19 +1923,20 @@ emitRemarkForBlock(Block &block) { return block.getParentOp()->emitRemark(); } -/// Creates a buffer in the faster memory space for the specified memref region; -/// generates a copy from the lower memory space to this one, and replaces all -/// loads/stores in the block range [`begin', `end') of `block' to load/store -/// from that buffer. Returns failure if copies could not be generated due to -/// yet unimplemented cases. `copyInPlacementStart` and `copyOutPlacementStart` -/// in copyPlacementBlock specify the insertion points where the incoming copies -/// and outgoing copies, respectively, should be inserted (the insertion happens -/// right before the insertion point). Since `begin` can itself be invalidated -/// due to the memref rewriting done from this method, the output argument -/// `nBegin` is set to its replacement (set to `begin` if no invalidation -/// happens). Since outgoing copies could have been inserted at `end`, the -/// output argument `nEnd` is set to the new end. `sizeInBytes` is set to the -/// size of the fast buffer allocated. +/// Creates a buffer in the faster memory space for the specified memref region +/// (memref has to be non-zero ranked); generates a copy from the lower memory +/// space to this one, and replaces all loads/stores in the block range +/// [`begin', `end') of `block' to load/store from that buffer. Returns failure +/// if copies could not be generated due to yet unimplemented cases. +/// `copyInPlacementStart` and `copyOutPlacementStart` in copyPlacementBlock +/// specify the insertion points where the incoming copies and outgoing copies, +/// respectively, should be inserted (the insertion happens right before the +/// insertion point). Since `begin` can itself be invalidated due to the memref +/// rewriting done from this method, the output argument `nBegin` is set to its +/// replacement (set to `begin` if no invalidation happens). Since outgoing +/// copies could have been inserted at `end`, the output argument `nEnd` is set +/// to the new end. `sizeInBytes` is set to the size of the fast buffer +/// allocated. static LogicalResult generateCopy( const MemRefRegion ®ion, Block *block, Block::iterator begin, Block::iterator end, Block *copyPlacementBlock, @@ -1984,6 +1987,11 @@ static LogicalResult generateCopy( SmallVector bufIndices; unsigned rank = memRefType.getRank(); + if (rank == 0) { + LLVM_DEBUG(llvm::dbgs() << "Non-zero ranked memrefs supported\n"); + return failure(); + } + SmallVector fastBufferShape; // Compute the extents of the buffer. diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir index 5615acae5ecc4..26eef0a7925a7 100644 --- a/mlir/test/Dialect/Affine/affine-data-copy.mlir +++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir @@ -354,3 +354,68 @@ func.func @arbitrary_memory_space() { } return } + +// CHECK-LABEL: zero_ranked +func.func @zero_ranked(%3:memref<480xi1>) { + %false = arith.constant false + %4 = memref.alloc() {alignment = 128 : i64} : memref + affine.store %false, %4[] : memref + %5 = memref.alloc() {alignment = 128 : i64} : memref + memref.copy %4, %5 : memref to memref + affine.for %arg0 = 0 to 480 { + %11 = affine.load %3[%arg0] : memref<480xi1> + %12 = affine.load %5[] : memref + %13 = arith.cmpi slt, %11, %12 : i1 + %14 = arith.select %13, %11, %12 : i1 + affine.store %14, %5[] : memref + } + return +} + +// CHECK-LABEL: func @scalar_memref_copy_without_dma +func.func @scalar_memref_copy_without_dma() { + %false = arith.constant false + %4 = memref.alloc() {alignment = 128 : i64} : memref + affine.store %false, %4[] : memref + + // CHECK: %[[FALSE:.*]] = arith.constant false + // CHECK: %[[MEMREF:.*]] = memref.alloc() {alignment = 128 : i64} : memref + // CHECK: affine.store %[[FALSE]], %[[MEMREF]][] : memref + return +} + +// CHECK-LABEL: func @scalar_memref_copy_in_loop +func.func @scalar_memref_copy_in_loop(%3:memref<480xi1>) { + %false = arith.constant false + %4 = memref.alloc() {alignment = 128 : i64} : memref + affine.store %false, %4[] : memref + %5 = memref.alloc() {alignment = 128 : i64} : memref + memref.copy %4, %5 : memref to memref + affine.for %arg0 = 0 to 480 { + %11 = affine.load %3[%arg0] : memref<480xi1> + %12 = affine.load %5[] : memref + %13 = arith.cmpi slt, %11, %12 : i1 + %14 = arith.select %13, %11, %12 : i1 + affine.store %14, %5[] : memref + } + + // CHECK: %[[FALSE:.*]] = arith.constant false + // CHECK: %[[MEMREF:.*]] = memref.alloc() {alignment = 128 : i64} : memref + // CHECK: affine.store %[[FALSE]], %[[MEMREF]][] : memref + // CHECK: %[[TARGET:.*]] = memref.alloc() {alignment = 128 : i64} : memref + // CHECK: memref.copy %alloc, %[[TARGET]] : memref to memref + // CHECK: %[[FAST_MEMREF:.*]] = memref.alloc() : memref<480xi1> + // CHECK: affine.for %{{.*}} = 0 to 480 { + // CHECK: %{{.*}} = affine.load %arg0[%{{.*}}] : memref<480xi1> + // CHECK: affine.store %{{.*}}, %[[FAST_MEMREF]][%{{.*}}] : memref<480xi1> + // CHECK: } + // CHECK: affine.for %arg1 = 0 to 480 { + // CHECK: %[[L0:.*]] = affine.load %[[FAST_MEMREF]][%arg1] : memref<480xi1> + // CHECK: %[[L1:.*]] = affine.load %[[TARGET]][] : memref + // CHECK: %[[CMPI:.*]] = arith.cmpi slt, %[[L0]], %[[L1]] : i1 + // CHECK: %[[SELECT:.*]] = arith.select %[[CMPI]], %[[L0]], %[[L1]] : i1 + // CHECK: affine.store %[[SELECT]], %[[TARGET]][] : memref + // CHECK: } + // CHECK: memref.dealloc %[[FAST_MEMREF]] : memref<480xi1> + return +} From f9e3c295d51eafa287ae9faa7e485b02db1909fa Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 27 Feb 2025 22:57:20 -0800 Subject: [PATCH 061/123] [SPIRV] Remove unused variable. NFC --- llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index b2c12411ab782..c013e122a85dc 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -319,7 +319,6 @@ void SPIRVModuleAnalysis::visitDecl( std::map &GlobalToGReg, const MachineFunction *MF, const MachineInstr &MI) { unsigned Opcode = MI.getOpcode(); - DenseSet Deps; // Process each operand of the instruction to resolve dependencies for (const MachineOperand &MO : MI.operands()) { From 0116feeea2aa69e8d6bf5364a8217198be70f2f1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 27 Feb 2025 23:01:19 -0800 Subject: [PATCH 062/123] [PowerPC] Avoid repeated hash lookups (NFC) (#129193) --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 9b526066fe75b..f12400490832b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -5427,8 +5427,8 @@ void PPCInstrInfo::promoteInstr32To64ForElimEXTSW(const Register &Reg, --Iter; MachineInstrBuilder MIBuilder(*Iter->getMF(), Iter); for (unsigned i = 1; i < MI->getNumOperands(); i++) { - if (PromoteRegs.find(i) != PromoteRegs.end()) - MIBuilder.addReg(PromoteRegs[i], RegState::Kill); + if (auto It = PromoteRegs.find(i); It != PromoteRegs.end()) + MIBuilder.addReg(It->second, RegState::Kill); else Iter->addOperand(MI->getOperand(i)); } From c95175898395d54c2be26e022e627e086d0d4843 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 27 Feb 2025 23:01:35 -0800 Subject: [PATCH 063/123] [CodeGen] Avoid repeated hash lookups (NFC) (#129190) --- llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index dbc724629d3be..8d91e7119d0ba 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -600,12 +600,12 @@ class MemLocFragmentFill { break; } - auto CurrentLiveInEntry = LiveIn.find(&BB); // If there's no LiveIn entry for the block yet, add it. - if (CurrentLiveInEntry == LiveIn.end()) { + auto [CurrentLiveInEntry, Inserted] = LiveIn.try_emplace(&BB); + if (Inserted) { LLVM_DEBUG(dbgs() << "change=true (first) on meet on " << BB.getName() << "\n"); - LiveIn[&BB] = std::move(BBLiveIn); + CurrentLiveInEntry->second = std::move(BBLiveIn); return /*Changed=*/true; } From 71fb66c87d405dee1273b63b7c75249dbe3a5731 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Fri, 28 Feb 2025 15:59:05 +0800 Subject: [PATCH 064/123] [Coroutines] [CodeGen] Don't change AST in CodeGen/Coroutines The root source of other odd bugs. We performed a hack in CodeGen/Coroutines. But we didn't recognize that the CodeGen is a consumer of AST. The CodeGen shouldn't change AST in any ways. It'll break the assumption about the ASTConsumer in Clang's framework, which may break any other clang-based tools which depends on multiple consumers to work together. The fix here is simple. But I am not super happy about the test. It is too specific and verbose. We can remove this if we can get the signature of the AST in ASTContext. --- clang/lib/CodeGen/CGCoroutine.cpp | 9 +- clang/unittests/Frontend/CMakeLists.txt | 2 + .../Frontend/NoAlterCodeGenActionTest.cpp | 198 ++++++++++++++++++ 3 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 clang/unittests/Frontend/NoAlterCodeGenActionTest.cpp diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 9abf2e8c9190d..058ec01f8ce0e 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -942,9 +942,16 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { if (Stmt *Ret = S.getReturnStmt()) { // Since we already emitted the return value above, so we shouldn't // emit it again here. - if (GroManager.DirectEmit) + Expr *PreviousRetValue = nullptr; + if (GroManager.DirectEmit) { + PreviousRetValue = cast(Ret)->getRetValue(); cast(Ret)->setRetValue(nullptr); + } EmitStmt(Ret); + // Set the return value back. The code generator, as the AST **Consumer**, + // shouldn't change the AST. + if (PreviousRetValue) + cast(Ret)->setRetValue(PreviousRetValue); } // LLVM require the frontend to mark the coroutine. diff --git a/clang/unittests/Frontend/CMakeLists.txt b/clang/unittests/Frontend/CMakeLists.txt index 0f05813338f2a..3c94846243870 100644 --- a/clang/unittests/Frontend/CMakeLists.txt +++ b/clang/unittests/Frontend/CMakeLists.txt @@ -10,6 +10,7 @@ add_clang_unittest(FrontendTests FixedPointString.cpp FrontendActionTest.cpp CodeGenActionTest.cpp + NoAlterCodeGenActionTest.cpp ParsedSourceLocationTest.cpp PCHPreambleTest.cpp ReparseWorkingDirTest.cpp @@ -27,4 +28,5 @@ clang_target_link_libraries(FrontendTests clangCodeGen clangFrontendTool clangSerialization + clangTooling ) diff --git a/clang/unittests/Frontend/NoAlterCodeGenActionTest.cpp b/clang/unittests/Frontend/NoAlterCodeGenActionTest.cpp new file mode 100644 index 0000000000000..e7a3bf5a7f87a --- /dev/null +++ b/clang/unittests/Frontend/NoAlterCodeGenActionTest.cpp @@ -0,0 +1,198 @@ +//===- unittests/Frontend/NoAlterCodeGenActionTest.cpp --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Unit tests for CodeGenAction may not alter the AST. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/LangStandard.h" +#include "clang/CodeGen/BackendUtil.h" +#include "clang/CodeGen/CodeGenAction.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/MultiplexConsumer.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; +using namespace clang::frontend; +using namespace clang::tooling; + +namespace { + +class ASTChecker : public RecursiveASTVisitor { +public: + ASTContext &Ctx; + ASTChecker(ASTContext &Ctx) : Ctx(Ctx) {} + bool VisitReturnStmt(ReturnStmt *RS) { + EXPECT_TRUE(RS->getRetValue()); + return true; + } + + bool VisitCoroutineBodyStmt(CoroutineBodyStmt *CS) { + return VisitReturnStmt(cast(CS->getReturnStmt())); + } +}; + +class ASTCheckerConsumer : public ASTConsumer { +public: + void HandleTranslationUnit(ASTContext &Ctx) override { + ASTChecker Checker(Ctx); + Checker.TraverseAST(Ctx); + } +}; + +class TestCodeGenAction : public EmitLLVMAction { +public: + using Base = EmitLLVMAction; + TestCodeGenAction(llvm::LLVMContext *_VMContext = nullptr) + : EmitLLVMAction(_VMContext) {} + + std::unique_ptr CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) override { + std::vector> Consumers; + Consumers.push_back(std::make_unique()); + Consumers.push_back(Base::CreateASTConsumer(CI, InFile)); + return std::make_unique(std::move(Consumers)); + } +}; + +const char *test_contents = R"cpp( + +namespace std { + +template struct coroutine_traits { + using promise_type = typename R::promise_type; +}; + +template struct coroutine_handle; + +template <> struct coroutine_handle { + static coroutine_handle from_address(void *addr) noexcept; + void operator()() { resume(); } + void *address() const noexcept; + void resume() const { __builtin_coro_resume(ptr); } + void destroy() const { __builtin_coro_destroy(ptr); } + bool done() const; + coroutine_handle &operator=(decltype(nullptr)); + coroutine_handle(decltype(nullptr)) : ptr(nullptr) {} + coroutine_handle() : ptr(nullptr) {} +// void reset() { ptr = nullptr; } // add to P0057? + explicit operator bool() const; + +protected: + void *ptr; +}; + +template struct coroutine_handle : coroutine_handle<> { + using coroutine_handle<>::operator=; + + static coroutine_handle from_address(void *addr) noexcept; + + Promise &promise() const; + static coroutine_handle from_promise(Promise &promise); +}; + +template +bool operator==(coroutine_handle<_PromiseT> const &_Left, + coroutine_handle<_PromiseT> const &_Right) noexcept { + return _Left.address() == _Right.address(); +} + +template +bool operator!=(coroutine_handle<_PromiseT> const &_Left, + coroutine_handle<_PromiseT> const &_Right) noexcept { + return !(_Left == _Right); +} + +struct noop_coroutine_promise {}; + +template <> +struct coroutine_handle { + operator coroutine_handle<>() const noexcept; + + constexpr explicit operator bool() const noexcept { return true; } + constexpr bool done() const noexcept { return false; } + + constexpr void operator()() const noexcept {} + constexpr void resume() const noexcept {} + constexpr void destroy() const noexcept {} + + noop_coroutine_promise &promise() const noexcept { + return *static_cast( + __builtin_coro_promise(this->__handle_, alignof(noop_coroutine_promise), false)); + } + + constexpr void *address() const noexcept { return __handle_; } + +private: + friend coroutine_handle noop_coroutine() noexcept; + + coroutine_handle() noexcept { + this->__handle_ = __builtin_coro_noop(); + } + + void *__handle_ = nullptr; +}; + +using noop_coroutine_handle = coroutine_handle; + +inline noop_coroutine_handle noop_coroutine() noexcept { return noop_coroutine_handle(); } + +struct suspend_always { + bool await_ready() noexcept { return false; } + void await_suspend(coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; +struct suspend_never { + bool await_ready() noexcept { return true; } + void await_suspend(coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; + +} // namespace std + +using namespace std; + +class invoker { +public: + class invoker_promise { + public: + invoker get_return_object() { return invoker{}; } + auto initial_suspend() { return suspend_always{}; } + auto final_suspend() noexcept { return suspend_always{}; } + void return_void() {} + void unhandled_exception() {} + }; + using promise_type = invoker_promise; + invoker() {} + invoker(const invoker &) = delete; + invoker &operator=(const invoker &) = delete; + invoker(invoker &&) = delete; + invoker &operator=(invoker &&) = delete; +}; + +invoker g() { + co_return; +} + +)cpp"; + +TEST(CodeGenTest, TestNonAlterTest) { + EXPECT_TRUE(runToolOnCodeWithArgs(std::make_unique(), + test_contents, + { + "-std=c++20", + })); +} +} // namespace From 08994357df807b08200e6b39719a4d571c82ab1e Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Fri, 28 Feb 2025 09:35:59 +0100 Subject: [PATCH 065/123] [clang] Fix issues with #embed and intializer lists/template arguments (#128890) Sometimes number of expressions in InitListExpr is used for template argument deduction. So, in these cases we need to pay attention to real number of expressions including expanded #embed data. Fixes https://github.com/llvm/llvm-project/issues/122306 --- clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/AST/Expr.h | 10 ++++ clang/lib/Sema/SemaInit.cpp | 2 +- clang/lib/Sema/SemaOverload.cpp | 12 ++-- clang/lib/Sema/SemaTemplateDeduction.cpp | 3 +- clang/test/SemaCXX/embed-init-list.cpp | 71 ++++++++++++++++++++++++ 6 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 clang/test/SemaCXX/embed-init-list.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2b72143482943..7873c2048e53c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -225,6 +225,8 @@ Bug Fixes in This Version - Clang now outputs correct values when #embed data contains bytes with negative signed char values (#GH102798). +- Fixed rejects-valid problem when #embed appears in std::initializer_list or + when it can affect template argument deduction (#GH122306). Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 0f98d237dcbcd..cfe49acf20b77 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -5189,6 +5189,16 @@ class InitListExpr : public Expr { unsigned getNumInits() const { return InitExprs.size(); } + /// getNumInits but if the list has an EmbedExpr inside includes full length + /// of embedded data. + unsigned getNumInitsWithEmbedExpanded() const { + unsigned Sum = InitExprs.size(); + for (auto *IE : InitExprs) + if (auto *EE = dyn_cast(IE)) + Sum += EE->getDataElementCount() - 1; + return Sum; + } + /// Retrieve the set of initializers. Expr **getInits() { return reinterpret_cast(InitExprs.data()); } diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 925af06894f72..86f5a5c1d4434 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -4261,7 +4261,7 @@ static bool TryInitializerListConstruction(Sema &S, QualType ArrayType = S.Context.getConstantArrayType( E.withConst(), llvm::APInt(S.Context.getTypeSize(S.Context.getSizeType()), - List->getNumInits()), + List->getNumInitsWithEmbedExpanded()), nullptr, clang::ArraySizeModifier::Normal, 0); InitializedEntity HiddenArray = InitializedEntity::InitializeTemporary(ArrayType); diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 08586b4908dd4..c344b6fff40c6 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -5710,12 +5710,14 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType, // - if the initializer list has one element that is not itself an // initializer list, the implicit conversion sequence is the one // required to convert the element to the parameter type. + // Bail out on EmbedExpr as well since we never create EmbedExpr for a + // single integer. unsigned NumInits = From->getNumInits(); - if (NumInits == 1 && !isa(From->getInit(0))) - Result = TryCopyInitialization(S, From->getInit(0), ToType, - SuppressUserConversions, - InOverloadResolution, - AllowObjCWritebackConversion); + if (NumInits == 1 && !isa(From->getInit(0)) && + !isa(From->getInit(0))) + Result = TryCopyInitialization( + S, From->getInit(0), ToType, SuppressUserConversions, + InOverloadResolution, AllowObjCWritebackConversion); // - if the initializer list has no elements, the implicit conversion // sequence is the identity conversion. else if (NumInits == 0) { diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 627cd82ed1c77..dbd73ead8a63f 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -4506,7 +4506,8 @@ static TemplateDeductionResult DeduceFromInitializerList( // C++ [temp.deduct.type]p13: // The type of N in the type T[N] is std::size_t. QualType T = S.Context.getSizeType(); - llvm::APInt Size(S.Context.getIntWidth(T), ILE->getNumInits()); + llvm::APInt Size(S.Context.getIntWidth(T), + ILE->getNumInitsWithEmbedExpanded()); if (auto Result = DeduceNonTypeTemplateArgument( S, TemplateParams, NTTP, llvm::APSInt(Size), T, /*ArrayBound=*/true, Info, /*PartialOrdering=*/false, Deduced, diff --git a/clang/test/SemaCXX/embed-init-list.cpp b/clang/test/SemaCXX/embed-init-list.cpp new file mode 100644 index 0000000000000..c511ca707a537 --- /dev/null +++ b/clang/test/SemaCXX/embed-init-list.cpp @@ -0,0 +1,71 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -Wno-c23-extensions %s +// expected-no-diagnostics + +namespace std { +typedef decltype(sizeof(int)) size_t; + +template class initializer_list { + const _E *__begin_; + size_t __size_; + + constexpr initializer_list(const _E *__b, size_t __s) + : __begin_(__b), __size_(__s) {} + +public: + constexpr initializer_list() : __begin_(nullptr), __size_(0) {} +}; +} // namespace std + +template struct S { + S(std::initializer_list); +}; + +template <> struct S { + S(std::initializer_list); +}; + +struct S1 { + S data; + int a; +}; + +template void to_array(_Tp (&&__a)[_Nm]) {} + + +template +void tfn(T) {} + +void tests() { + + S{{ +#embed __FILE__ + }}; + + S1 ss{std::initializer_list{ +#embed __FILE__ + }}; + + S sss = { +#embed __FILE__ + }; + + std::initializer_list il{ +#embed __FILE__ + }; + + static constexpr auto initializer_list = std::initializer_list{ +#embed __FILE__ + , '\0'}; + + static constexpr auto intinitializer_list = std::initializer_list{ +#embed __FILE__ + , '\0'}; + + to_array({ +#embed __FILE__ + }); + + tfn>({ +#embed __FILE__ + }); +} From 961b5ddfff78d53d8e0c808dd1cccf15091faa7e Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Fri, 28 Feb 2025 09:50:05 +0100 Subject: [PATCH 066/123] [flang] update fir.coordinate_of to carry the fields (#127231) This patch updates fir.coordinate_op to carry the field index as attributes instead of relying on getting it from the fir.field_index operations defining its operands. The rational is that FIR currently has a few operations that require DAGs to be preserved in order to be able to do code generation. This is the case of fir.coordinate_op, which requires its fir.field operand producer to be visible. This makes IR transformation harder/brittle, so I want to update FIR to get rid if this. Codegen/printer/parser of fir.coordinate_of and many tests need to be updated after this change. --- .../include/flang/Optimizer/Dialect/FIROps.h | 86 ++++++ .../include/flang/Optimizer/Dialect/FIROps.td | 19 +- flang/lib/Lower/OpenMP/Utils.cpp | 10 +- .../lib/Optimizer/CodeGen/BoxedProcedure.cpp | 5 +- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 253 +++++++++--------- flang/lib/Optimizer/Dialect/FIROps.cpp | 130 ++++++++- .../Optimizer/OpenMP/MapInfoFinalization.cpp | 9 +- flang/test/Fir/Todo/coordinate_of_2.fir | 3 +- flang/test/Fir/Todo/coordinate_of_3.fir | 3 +- flang/test/Fir/abstract-results-bindc.fir | 3 +- flang/test/Fir/abstract-results.fir | 28 +- flang/test/Fir/array-value-copy.fir | 9 +- .../Fir/convert-to-llvm-openmp-and-fir.fir | 37 ++- flang/test/Fir/convert-to-llvm.fir | 26 +- flang/test/Fir/dispatch.f90 | 63 ++--- flang/test/Fir/field-index.fir | 9 +- flang/test/Fir/pdt.fir | 6 +- flang/test/HLFIR/assign-codegen-derived.fir | 4 +- flang/test/HLFIR/c_ptr_byvalue.f90 | 6 +- .../designate-codegen-component-refs.fir | 12 +- .../OpenMP/map-types-and-sizes.f90 | 11 +- flang/test/Lower/CUDA/cuda-cdevloc.cuf | 6 +- flang/test/Lower/CUDA/cuda-devptr.cuf | 18 +- .../Lower/HLFIR/assumed-rank-inquiries.f90 | 6 +- .../test/Lower/HLFIR/c_ptr-constant-init.f90 | 2 - .../HLFIR/intrinsic-module-procedures.f90 | 3 +- flang/test/Lower/Intrinsics/c_associated.f90 | 30 +-- flang/test/Lower/Intrinsics/c_f_pointer.f90 | 12 +- .../test/Lower/Intrinsics/c_f_procpointer.f90 | 6 +- .../Intrinsics/c_funloc-proc-pointers.f90 | 6 +- flang/test/Lower/Intrinsics/c_funloc.f90 | 3 +- flang/test/Lower/Intrinsics/c_loc.f90 | 39 +-- flang/test/Lower/Intrinsics/c_ptr_eq_ne.f90 | 12 +- flang/test/Lower/Intrinsics/ieee_class.f90 | 21 +- flang/test/Lower/Intrinsics/ieee_flag.f90 | 57 ++-- flang/test/Lower/Intrinsics/ieee_logb.f90 | 5 +- flang/test/Lower/Intrinsics/ieee_max_min.f90 | 33 ++- .../Lower/Intrinsics/ieee_operator_eq.f90 | 18 +- flang/test/Lower/Intrinsics/ieee_rint_int.f90 | 21 +- flang/test/Lower/Intrinsics/ieee_rounding.f90 | 9 +- .../test/Lower/Intrinsics/ieee_unordered.f90 | 12 +- flang/test/Lower/Intrinsics/storage_size.f90 | 3 +- flang/test/Lower/Intrinsics/transfer.f90 | 3 +- flang/test/Lower/OpenMP/declare-mapper.f90 | 6 +- .../OpenMP/derived-type-allocatable-map.f90 | 27 +- flang/test/Lower/OpenMP/target.f90 | 2 +- flang/test/Lower/array-elemental-calls-2.f90 | 6 +- .../Lower/c-interoperability-c-pointer.f90 | 24 +- flang/test/Lower/c_ptr-constant-init.f90 | 2 - flang/test/Lower/call-by-value.f90 | 3 +- flang/test/Lower/call-copy-in-out.f90 | 9 +- .../Lower/derived-allocatable-components.f90 | 150 ++++------- .../test/Lower/derived-pointer-components.f90 | 216 +++++---------- .../test/Lower/derived-type-finalization.f90 | 3 +- flang/test/Lower/derived-types.f90 | 19 +- flang/test/Lower/equivalence-1.f90 | 3 +- flang/test/Lower/forall/array-pointer.f90 | 15 +- .../Lower/forall/forall-allocatable-2.f90 | 3 +- flang/test/Lower/forall/forall-where.f90 | 3 +- .../Lower/identical-block-merge-disable.f90 | 16 +- flang/test/Lower/io-derived-type.f90 | 16 +- flang/test/Lower/parent-component.f90 | 158 ++++------- flang/test/Lower/pointer-assignments.f90 | 2 +- flang/test/Lower/polymorphic-temp.f90 | 6 +- flang/test/Lower/polymorphic.f90 | 18 +- flang/test/Lower/select-type.f90 | 18 +- flang/test/Lower/structure-constructors.f90 | 78 ++---- ...p-map-info-finalization-implicit-field.fir | 2 +- 68 files changed, 835 insertions(+), 1027 deletions(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.h b/flang/include/flang/Optimizer/Dialect/FIROps.h index a21f8bbe17685..ed301016ad01c 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.h +++ b/flang/include/flang/Optimizer/Dialect/FIROps.h @@ -50,9 +50,95 @@ struct DebuggingResource mlir::StringRef getName() final { return "DebuggingResource"; } }; +class CoordinateIndicesAdaptor; +using IntOrValue = llvm::PointerUnion; + } // namespace fir #define GET_OP_CLASSES #include "flang/Optimizer/Dialect/FIROps.h.inc" +namespace fir { +class CoordinateIndicesAdaptor { +public: + using value_type = IntOrValue; + + CoordinateIndicesAdaptor(mlir::DenseI32ArrayAttr fieldIndices, + mlir::ValueRange values) + : fieldIndices(fieldIndices), values(values) {} + + value_type operator[](size_t index) const { + assert(index < size() && "index out of bounds"); + return *std::next(begin(), index); + } + + size_t size() const { + return fieldIndices ? fieldIndices.size() : values.size(); + } + + bool empty() const { + return values.empty() && (!fieldIndices || fieldIndices.empty()); + } + + class iterator + : public llvm::iterator_facade_base { + public: + iterator(const CoordinateIndicesAdaptor *base, + std::optional::iterator> fieldIter, + llvm::detail::IterOfRange valuesIter) + : base(base), fieldIter(fieldIter), valuesIter(valuesIter) {} + + value_type operator*() const { + if (fieldIter && **fieldIter != fir::CoordinateOp::kDynamicIndex) { + return mlir::IntegerAttr::get(base->fieldIndices.getElementType(), + **fieldIter); + } + return *valuesIter; + } + + iterator &operator++() { + if (fieldIter) { + if (**fieldIter == fir::CoordinateOp::kDynamicIndex) + valuesIter++; + (*fieldIter)++; + } else { + valuesIter++; + } + return *this; + } + + bool operator==(const iterator &rhs) const { + return base == rhs.base && fieldIter == rhs.fieldIter && + valuesIter == rhs.valuesIter; + } + + private: + const CoordinateIndicesAdaptor *base; + std::optional::const_iterator> fieldIter; + llvm::detail::IterOfRange valuesIter; + }; + + iterator begin() const { + std::optional::const_iterator> fieldIter; + if (fieldIndices) + fieldIter = fieldIndices.asArrayRef().begin(); + return iterator(this, fieldIter, values.begin()); + } + + iterator end() const { + std::optional::const_iterator> fieldIter; + if (fieldIndices) + fieldIter = fieldIndices.asArrayRef().end(); + return iterator(this, fieldIter, values.end()); + } + +private: + mlir::DenseI32ArrayAttr fieldIndices; + mlir::ValueRange values; +}; + +} // namespace fir + #endif // FORTRAN_OPTIMIZER_DIALECT_FIROPS_H diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 8dbc9df9f553d..c83c57186b46d 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -1748,10 +1748,16 @@ def fir_CoordinateOp : fir_Op<"coordinate_of", [NoMemoryEffect]> { Unlike LLVM's GEP instruction, one cannot stride over the outermost reference; therefore, the leading 0 index must be omitted. + This operation can be used to index derived type fields, in which case + the operand is the name of the index field. + ``` %i = ... : index %h = ... : !fir.heap> %p = fir.coordinate_of %h, %i : (!fir.heap>, index) -> !fir.ref + + %d = ... : !fir.ref> + %f = fir.coordinate_of %d, field2 : (!fir.ref>) -> !fir.ref ``` In the example, `%p` will be a pointer to the `%i`-th f32 value in the @@ -1761,7 +1767,8 @@ def fir_CoordinateOp : fir_Op<"coordinate_of", [NoMemoryEffect]> { let arguments = (ins AnyRefOrBox:$ref, Variadic:$coor, - TypeAttr:$baseType + TypeAttr:$baseType, + OptionalAttr:$field_indices ); let results = (outs RefOrLLVMPtr); @@ -1771,10 +1778,14 @@ def fir_CoordinateOp : fir_Op<"coordinate_of", [NoMemoryEffect]> { let builders = [ OpBuilder<(ins "mlir::Type":$resultType, - "mlir::Value":$ref, "mlir::ValueRange":$coor), - [{ return build($_builder, $_state, resultType, ref, coor, - mlir::TypeAttr::get(ref.getType())); }]>, + "mlir::Value":$ref, "mlir::ValueRange":$coor)>, + OpBuilder<(ins "mlir::Type":$resultType, + "mlir::Value":$ref, "llvm::ArrayRef":$coor)> ]; + let extraClassDeclaration = [{ + constexpr static int32_t kDynamicIndex = std::numeric_limits::min(); + CoordinateIndicesAdaptor getIndices(); + }]; } def fir_ExtractValueOp : fir_OneResultOp<"extract_value", [NoMemoryEffect]> { diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index fa1975dac789b..48bcf492fd368 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -354,14 +354,12 @@ mlir::Value createParentSymAndGenIntermediateMaps( // type. if (fir::RecordType recordType = mlir::dyn_cast( fir::unwrapPassByRefType(curValue.getType()))) { - mlir::Value idxConst = firOpBuilder.createIntegerConstant( - clauseLocation, firOpBuilder.getIndexType(), - indices[currentIndicesIdx]); - mlir::Type memberTy = - recordType.getTypeList().at(indices[currentIndicesIdx]).second; + fir::IntOrValue idxConst = mlir::IntegerAttr::get( + firOpBuilder.getI32Type(), indices[currentIndicesIdx]); + mlir::Type memberTy = recordType.getType(indices[currentIndicesIdx]); curValue = firOpBuilder.create( clauseLocation, firOpBuilder.getRefType(memberTy), curValue, - idxConst); + llvm::SmallVector{idxConst}); // Skip mapping and the subsequent load if we're the final member or not // a type with a descriptor such as a pointer/allocatable. If we're a diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp index 26f4aee21d8bd..82b11ad7db32a 100644 --- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp +++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp @@ -348,8 +348,9 @@ class BoxedProcedurePass rewriter.setInsertionPoint(coor); auto toTy = typeConverter.convertType(ty); auto toBaseTy = typeConverter.convertType(baseTy); - rewriter.replaceOpWithNewOp(coor, toTy, coor.getRef(), - coor.getCoor(), toBaseTy); + rewriter.replaceOpWithNewOp( + coor, toTy, coor.getRef(), coor.getCoor(), toBaseTy, + coor.getFieldIndicesAttr()); opIsValid = false; } } else if (auto index = mlir::dyn_cast(op)) { diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index aaefe675730e1..a2743edd7844a 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -2653,57 +2653,78 @@ struct CoordinateOpConversion return mlir::isa(type); } - /// Check whether this form of `!fir.coordinate_of` is supported. These - /// additional checks are required, because we are not yet able to convert - /// all valid forms of `!fir.coordinate_of`. - /// TODO: Either implement the unsupported cases or extend the verifier - /// in FIROps.cpp instead. - static bool supportedCoordinate(mlir::Type type, mlir::ValueRange coors) { - const std::size_t numOfCoors = coors.size(); - std::size_t i = 0; - bool subEle = false; - bool ptrEle = false; - for (; i < numOfCoors; ++i) { - mlir::Value nxtOpnd = coors[i]; - if (auto arrTy = mlir::dyn_cast(type)) { - subEle = true; - i += arrTy.getDimension() - 1; - type = arrTy.getEleTy(); - } else if (auto recTy = mlir::dyn_cast(type)) { - subEle = true; - type = recTy.getType(getFieldNumber(recTy, nxtOpnd)); - } else if (auto tupTy = mlir::dyn_cast(type)) { - subEle = true; - type = tupTy.getType(getConstantIntValue(nxtOpnd)); - } else { - ptrEle = true; - } - } - if (ptrEle) - return (!subEle) && (numOfCoors == 1); - return subEle && (i >= numOfCoors); - } + // Helper structure to analyze the CoordinateOp path and decide if and how + // the GEP should be generated for it. + struct ShapeAnalysis { + bool hasKnownShape; + bool columnIsDeferred; + }; /// Walk the abstract memory layout and determine if the path traverses any /// array types with unknown shape. Return true iff all the array types have a /// constant shape along the path. - static bool arraysHaveKnownShape(mlir::Type type, mlir::ValueRange coors) { - for (std::size_t i = 0, sz = coors.size(); i < sz; ++i) { - mlir::Value nxtOpnd = coors[i]; + /// TODO: move the verification logic into the verifier. + static std::optional + arraysHaveKnownShape(mlir::Type type, fir::CoordinateOp coor) { + fir::CoordinateIndicesAdaptor indices = coor.getIndices(); + auto begin = indices.begin(); + bool hasKnownShape = true; + bool columnIsDeferred = false; + for (auto it = begin, end = indices.end(); it != end;) { if (auto arrTy = mlir::dyn_cast(type)) { - if (fir::sequenceWithNonConstantShape(arrTy)) - return false; - i += arrTy.getDimension() - 1; + bool addressingStart = (it == begin); + unsigned arrayDim = arrTy.getDimension(); + for (auto dimExtent : llvm::enumerate(arrTy.getShape())) { + if (dimExtent.value() == fir::SequenceType::getUnknownExtent()) { + hasKnownShape = false; + if (addressingStart && dimExtent.index() + 1 == arrayDim) { + // If this point was reached, the raws of the first array have + // constant extents. + columnIsDeferred = true; + } else { + // One of the array dimension that is not the column of the first + // array has dynamic extent. It will not possible to do + // code generation for the CoordinateOp if the base is not a + // fir.box containing the value of that extent. + return ShapeAnalysis{false, false}; + } + } + // There may be less operands than the array size if the + // fir.coordinate_of result is not an element but a sub-array. + if (it != end) + ++it; + } type = arrTy.getEleTy(); - } else if (auto strTy = mlir::dyn_cast(type)) { - type = strTy.getType(getFieldNumber(strTy, nxtOpnd)); + continue; + } + if (auto strTy = mlir::dyn_cast(type)) { + auto intAttr = llvm::dyn_cast(*it); + if (!intAttr) { + mlir::emitError(coor.getLoc(), + "expected field name in fir.coordinate_of"); + return std::nullopt; + } + type = strTy.getType(intAttr.getInt()); } else if (auto strTy = mlir::dyn_cast(type)) { - type = strTy.getType(getConstantIntValue(nxtOpnd)); - } else { - return true; + auto value = llvm::dyn_cast(*it); + if (!value) { + mlir::emitError( + coor.getLoc(), + "expected constant value to address tuple in fir.coordinate_of"); + return std::nullopt; + } + type = strTy.getType(getConstantIntValue(value)); + } else if (auto charType = mlir::dyn_cast(type)) { + // Addressing character in string. Fortran strings degenerate to arrays + // in LLVM, so they are handled like arrays of characters here. + if (charType.getLen() == fir::CharacterType::unknownLen()) + return ShapeAnalysis{false, true}; + type = fir::CharacterType::getSingleton(charType.getContext(), + charType.getFKind()); } + ++it; } - return true; + return ShapeAnalysis{hasKnownShape, columnIsDeferred}; } private: @@ -2754,9 +2775,11 @@ struct CoordinateOpConversion mlir::LLVM::IntegerOverflowFlags nsw = mlir::LLVM::IntegerOverflowFlags::nsw; - for (unsigned i = 1, last = operands.size(); i < last; ++i) { + int nextIndexValue = 1; + fir::CoordinateIndicesAdaptor indices = coor.getIndices(); + for (auto it = indices.begin(), end = indices.end(); it != end;) { if (auto arrTy = mlir::dyn_cast(cpnTy)) { - if (i != 1) + if (it != indices.begin()) TODO(loc, "fir.array nested inside other array and/or derived type"); // Applies byte strides from the box. Ignore lower bound from box // since fir.coordinate_of indexes are zero based. Lowering takes care @@ -2764,26 +2787,31 @@ struct CoordinateOpConversion // types and non contiguous arrays. auto idxTy = lowerTy().indexType(); mlir::Value off = genConstantIndex(loc, idxTy, rewriter, 0); - for (unsigned index = i, lastIndex = i + arrTy.getDimension(); - index < lastIndex; ++index) { - mlir::Value stride = getStrideFromBox(loc, boxTyPair, operands[0], - index - i, rewriter); + unsigned arrayDim = arrTy.getDimension(); + for (unsigned dim = 0; dim < arrayDim && it != end; ++dim, ++it) { + mlir::Value stride = + getStrideFromBox(loc, boxTyPair, operands[0], dim, rewriter); auto sc = rewriter.create( - loc, idxTy, operands[index], stride, nsw); + loc, idxTy, operands[nextIndexValue + dim], stride, nsw); off = rewriter.create(loc, idxTy, sc, off, nsw); } + nextIndexValue += arrayDim; resultAddr = rewriter.create( loc, llvmPtrTy, byteTy, resultAddr, llvm::ArrayRef{off}); - i += arrTy.getDimension() - 1; cpnTy = arrTy.getEleTy(); } else if (auto recTy = mlir::dyn_cast(cpnTy)) { - mlir::Value nxtOpnd = operands[i]; - cpnTy = recTy.getType(getFieldNumber(recTy, nxtOpnd)); + auto intAttr = llvm::dyn_cast(*it); + if (!intAttr) + return mlir::emitError(loc, + "expected field name in fir.coordinate_of"); + int fieldIndex = intAttr.getInt(); + ++it; + cpnTy = recTy.getType(fieldIndex); auto llvmRecTy = lowerTy().convertType(recTy); resultAddr = rewriter.create( loc, llvmPtrTy, llvmRecTy, resultAddr, - llvm::ArrayRef{0, nxtOpnd}); + llvm::ArrayRef{0, fieldIndex}); } else { fir::emitFatalError(loc, "unexpected type in coordinate_of"); } @@ -2801,92 +2829,71 @@ struct CoordinateOpConversion // Component Type mlir::Type cpnTy = fir::dyn_cast_ptrOrBoxEleTy(baseObjectTy); - bool hasSubdimension = hasSubDimensions(cpnTy); - bool columnIsDeferred = !hasSubdimension; - - if (!supportedCoordinate(cpnTy, operands.drop_front(1))) - TODO(loc, "unsupported combination of coordinate operands"); - - const bool hasKnownShape = - arraysHaveKnownShape(cpnTy, operands.drop_front(1)); - - // If only the column is `?`, then we can simply place the column value in - // the 0-th GEP position. - if (auto arrTy = mlir::dyn_cast(cpnTy)) { - if (!hasKnownShape) { - const unsigned sz = arrTy.getDimension(); - if (arraysHaveKnownShape(arrTy.getEleTy(), - operands.drop_front(1 + sz))) { - fir::SequenceType::ShapeRef shape = arrTy.getShape(); - bool allConst = true; - for (unsigned i = 0; i < sz - 1; ++i) { - if (shape[i] < 0) { - allConst = false; - break; - } - } - if (allConst) - columnIsDeferred = true; - } - } - } + + const std::optional shapeAnalysis = + arraysHaveKnownShape(cpnTy, coor); + if (!shapeAnalysis) + return mlir::failure(); if (fir::hasDynamicSize(fir::unwrapSequenceType(cpnTy))) return mlir::emitError( loc, "fir.coordinate_of with a dynamic element size is unsupported"); - if (hasKnownShape || columnIsDeferred) { + if (shapeAnalysis->hasKnownShape || shapeAnalysis->columnIsDeferred) { llvm::SmallVector offs; - if (hasKnownShape && hasSubdimension) { + if (shapeAnalysis->hasKnownShape) { offs.push_back(0); } + // Else, only the column is `?` and we can simply place the column value + // in the 0-th GEP position. + std::optional dims; llvm::SmallVector arrIdx; - for (std::size_t i = 1, sz = operands.size(); i < sz; ++i) { - mlir::Value nxtOpnd = operands[i]; - - if (!cpnTy) - return mlir::emitError(loc, "invalid coordinate/check failed"); - - // check if the i-th coordinate relates to an array - if (dims) { - arrIdx.push_back(nxtOpnd); - int dimsLeft = *dims; - if (dimsLeft > 1) { - dims = dimsLeft - 1; - continue; - } - cpnTy = mlir::cast(cpnTy).getElementType(); - // append array range in reverse (FIR arrays are column-major) - offs.append(arrIdx.rbegin(), arrIdx.rend()); - arrIdx.clear(); - dims.reset(); + int nextIndexValue = 1; + for (auto index : coor.getIndices()) { + if (auto intAttr = llvm::dyn_cast(index)) { + // Addressing derived type component. + auto recordType = llvm::dyn_cast(cpnTy); + if (!recordType) + return mlir::emitError( + loc, + "fir.coordinate base type is not consistent with operands"); + int fieldId = intAttr.getInt(); + cpnTy = recordType.getType(fieldId); + offs.push_back(fieldId); continue; } - if (auto arrTy = mlir::dyn_cast(cpnTy)) { - int d = arrTy.getDimension() - 1; - if (d > 0) { - dims = d; - arrIdx.push_back(nxtOpnd); - continue; + // Value index (addressing array, tuple, or complex part). + mlir::Value indexValue = operands[nextIndexValue++]; + if (auto tupTy = mlir::dyn_cast(cpnTy)) { + cpnTy = tupTy.getType(getConstantIntValue(indexValue)); + offs.push_back(indexValue); + } else { + if (!dims) { + if (auto arrayType = llvm::dyn_cast(cpnTy)) { + // Starting addressing array or array component. + dims = arrayType.getDimension(); + cpnTy = arrayType.getElementType(); + } + } + if (dims) { + arrIdx.push_back(indexValue); + if (--(*dims) == 0) { + // Append array range in reverse (FIR arrays are column-major). + offs.append(arrIdx.rbegin(), arrIdx.rend()); + arrIdx.clear(); + dims.reset(); + } + } else { + offs.push_back(indexValue); } - cpnTy = mlir::cast(cpnTy).getElementType(); - offs.push_back(nxtOpnd); - continue; } - - // check if the i-th coordinate relates to a field - if (auto recTy = mlir::dyn_cast(cpnTy)) - cpnTy = recTy.getType(getFieldNumber(recTy, nxtOpnd)); - else if (auto tupTy = mlir::dyn_cast(cpnTy)) - cpnTy = tupTy.getType(getConstantIntValue(nxtOpnd)); - else - cpnTy = nullptr; - - offs.push_back(nxtOpnd); } - if (dims) + // It is possible the fir.coordinate_of result is a sub-array, in which + // case there may be some "unfinished" array indices to reverse and push. + if (!arrIdx.empty()) offs.append(arrIdx.rbegin(), arrIdx.rend()); + mlir::Value base = operands[0]; mlir::Value retval = genGEP(loc, llvmObjectTy, rewriter, base, offs); rewriter.replaceOp(coor, retval); diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 7e50622db08c9..7efb733eb565c 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -1460,9 +1460,89 @@ llvm::LogicalResult fir::ConvertOp::verify() { // CoordinateOp //===----------------------------------------------------------------------===// +void fir::CoordinateOp::build(mlir::OpBuilder &builder, + mlir::OperationState &result, + mlir::Type resultType, mlir::Value ref, + mlir::ValueRange coor) { + llvm::SmallVector fieldIndices; + llvm::SmallVector dynamicIndices; + bool anyField = false; + for (mlir::Value index : coor) { + if (auto field = index.getDefiningOp()) { + auto recTy = mlir::cast(field.getOnType()); + fieldIndices.push_back(recTy.getFieldIndex(field.getFieldId())); + anyField = true; + } else { + fieldIndices.push_back(fir::CoordinateOp::kDynamicIndex); + dynamicIndices.push_back(index); + } + } + auto typeAttr = mlir::TypeAttr::get(ref.getType()); + if (anyField) { + build(builder, result, resultType, ref, dynamicIndices, typeAttr, + builder.getDenseI32ArrayAttr(fieldIndices)); + } else { + build(builder, result, resultType, ref, dynamicIndices, typeAttr, nullptr); + } +} + +void fir::CoordinateOp::build(mlir::OpBuilder &builder, + mlir::OperationState &result, + mlir::Type resultType, mlir::Value ref, + llvm::ArrayRef coor) { + llvm::SmallVector fieldIndices; + llvm::SmallVector dynamicIndices; + bool anyField = false; + for (fir::IntOrValue index : coor) { + llvm::TypeSwitch(index) + .Case([&](mlir::IntegerAttr intAttr) { + fieldIndices.push_back(intAttr.getInt()); + anyField = true; + }) + .Case([&](mlir::Value value) { + dynamicIndices.push_back(value); + fieldIndices.push_back(fir::CoordinateOp::kDynamicIndex); + }); + } + auto typeAttr = mlir::TypeAttr::get(ref.getType()); + if (anyField) { + build(builder, result, resultType, ref, dynamicIndices, typeAttr, + builder.getDenseI32ArrayAttr(fieldIndices)); + } else { + build(builder, result, resultType, ref, dynamicIndices, typeAttr, nullptr); + } +} + void fir::CoordinateOp::print(mlir::OpAsmPrinter &p) { - p << ' ' << getRef() << ", " << getCoor(); - p.printOptionalAttrDict((*this)->getAttrs(), /*elideAttrs=*/{"baseType"}); + p << ' ' << getRef(); + if (!getFieldIndicesAttr()) { + p << ", " << getCoor(); + } else { + mlir::Type eleTy = fir::getFortranElementType(getRef().getType()); + for (auto index : getIndices()) { + p << ", "; + llvm::TypeSwitch(index) + .Case([&](mlir::IntegerAttr intAttr) { + if (auto recordType = llvm::dyn_cast(eleTy)) { + int fieldId = intAttr.getInt(); + if (fieldId < static_cast(recordType.getNumFields())) { + auto nameAndType = recordType.getTypeList()[fieldId]; + p << std::get(nameAndType); + eleTy = fir::getFortranElementType( + std::get(nameAndType)); + return; + } + } + // Invalid index, still print it so that invalid IR can be + // investigated. + p << intAttr; + }) + .Case([&](mlir::Value value) { p << value; }); + } + } + p.printOptionalAttrDict( + (*this)->getAttrs(), + /*elideAttrs=*/{getBaseTypeAttrName(), getFieldIndicesAttrName()}); p << " : "; p.printFunctionalType(getOperandTypes(), (*this)->getResultTypes()); } @@ -1473,8 +1553,24 @@ mlir::ParseResult fir::CoordinateOp::parse(mlir::OpAsmParser &parser, if (parser.parseOperand(memref) || parser.parseComma()) return mlir::failure(); llvm::SmallVector coorOperands; - if (parser.parseOperandList(coorOperands)) - return mlir::failure(); + llvm::SmallVector> fieldNames; + llvm::SmallVector fieldIndices; + while (true) { + llvm::StringRef fieldName; + if (mlir::succeeded(parser.parseOptionalKeyword(&fieldName))) { + fieldNames.push_back({fieldName, static_cast(fieldIndices.size())}); + // Actual value will be computed later when base type has been parsed. + fieldIndices.push_back(0); + } else { + mlir::OpAsmParser::UnresolvedOperand index; + if (parser.parseOperand(index)) + return mlir::failure(); + fieldIndices.push_back(fir::CoordinateOp::kDynamicIndex); + coorOperands.push_back(index); + } + if (mlir::failed(parser.parseOptionalComma())) + break; + } llvm::SmallVector allOperands; allOperands.push_back(memref); allOperands.append(coorOperands.begin(), coorOperands.end()); @@ -1486,7 +1582,27 @@ mlir::ParseResult fir::CoordinateOp::parse(mlir::OpAsmParser &parser, result.operands) || parser.addTypesToList(funcTy.getResults(), result.types)) return mlir::failure(); - result.addAttribute("baseType", mlir::TypeAttr::get(funcTy.getInput(0))); + result.addAttribute(getBaseTypeAttrName(result.name), + mlir::TypeAttr::get(funcTy.getInput(0))); + if (!fieldNames.empty()) { + mlir::Type eleTy = fir::getFortranElementType(funcTy.getInput(0)); + for (auto [fieldName, operandPosition] : fieldNames) { + auto recTy = llvm::dyn_cast(eleTy); + if (!recTy) + return parser.emitError( + loc, "base must be a derived type when field name appears"); + unsigned fieldNum = recTy.getFieldIndex(fieldName); + if (fieldNum > recTy.getNumFields()) + return parser.emitError(loc) + << "field '" << fieldName + << "' is not a component or subcomponent of the base type"; + fieldIndices[operandPosition] = fieldNum; + eleTy = fir::getFortranElementType( + std::get(recTy.getTypeList()[fieldNum])); + } + result.addAttribute(getFieldIndicesAttrName(result.name), + parser.getBuilder().getDenseI32ArrayAttr(fieldIndices)); + } return mlir::success(); } @@ -1567,6 +1683,10 @@ llvm::LogicalResult fir::CoordinateOp::verify() { return mlir::success(); } +fir::CoordinateIndicesAdaptor fir::CoordinateOp::getIndices() { + return CoordinateIndicesAdaptor(getFieldIndicesAttr(), getCoor()); +} + //===----------------------------------------------------------------------===// // DispatchOp //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index beea7543e54b3..ab4dc582d5804 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -579,7 +579,7 @@ class MapInfoFinalizationPass if (!shouldMapField) continue; - int64_t fieldIdx = recordType.getFieldIndex(field); + int32_t fieldIdx = recordType.getFieldIndex(field); bool alreadyMapped = [&]() { if (op.getMembersIndexAttr()) for (auto indexList : op.getMembersIndexAttr()) { @@ -597,12 +597,11 @@ class MapInfoFinalizationPass continue; builder.setInsertionPoint(op); - mlir::Value fieldIdxVal = builder.createIntegerConstant( - op.getLoc(), mlir::IndexType::get(builder.getContext()), - fieldIdx); + fir::IntOrValue idxConst = + mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx); auto fieldCoord = builder.create( op.getLoc(), builder.getRefType(memTy), op.getVarPtr(), - fieldIdxVal); + llvm::SmallVector{idxConst}); fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( builder, fieldCoord, /*isOptional=*/false, op.getLoc()); diff --git a/flang/test/Fir/Todo/coordinate_of_2.fir b/flang/test/Fir/Todo/coordinate_of_2.fir index 7ceead8de5279..759f2eab097e9 100644 --- a/flang/test/Fir/Todo/coordinate_of_2.fir +++ b/flang/test/Fir/Todo/coordinate_of_2.fir @@ -4,7 +4,6 @@ // `!fir.coordinate_of` - `!fir.array` inside "boxed" `!fir.type` func.func @coordinate_box_array_inside_derived(%arg0: !fir.box, field_2:i32}>>, %arg1 : index) { - %idx0 = arith.constant 0 : i32 - %q = fir.coordinate_of %arg0, %idx0, %arg1 : (!fir.box, field_2:i32}>>, i32, index) -> !fir.ref + %q = fir.coordinate_of %arg0, field_1, %arg1 : (!fir.box, field_2:i32}>>, index) -> !fir.ref return } diff --git a/flang/test/Fir/Todo/coordinate_of_3.fir b/flang/test/Fir/Todo/coordinate_of_3.fir index 305422052be27..aff936d0e1a41 100644 --- a/flang/test/Fir/Todo/coordinate_of_3.fir +++ b/flang/test/Fir/Todo/coordinate_of_3.fir @@ -4,7 +4,6 @@ // `fir.coordinate_of` - `fir.array` inside "boxed" `!fir.type}` (i.e. nested `!fir.type`) func.func @coordinate_box_array_inside_derived(%arg0: !fir.box}>}>>, %arg1 : index) { - %idx0 = arith.constant 0 : i32 - %q = fir.coordinate_of %arg0, %idx0, %idx0, %arg1 : (!fir.box}>}>>, i32, i32, index) -> !fir.ref + %q = fir.coordinate_of %arg0, field_1, field_2, %arg1 : (!fir.box}>}>>, index) -> !fir.ref return } diff --git a/flang/test/Fir/abstract-results-bindc.fir b/flang/test/Fir/abstract-results-bindc.fir index b2efffea31fb1..695098a82098e 100644 --- a/flang/test/Fir/abstract-results-bindc.fir +++ b/flang/test/Fir/abstract-results-bindc.fir @@ -54,7 +54,6 @@ func.func @test_return_cptr(%x: !fir.ref) { // CHECK-LABEL: func.func @test_return_cptr( // CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>) { // CHECK: %[[VAL_1:.*]] = fir.call @return_cptr() : () -> !fir.ref -// CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -// CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +// CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref // CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_1]] : (!fir.ref) -> i64 // CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref diff --git a/flang/test/Fir/abstract-results.fir b/flang/test/Fir/abstract-results.fir index 93e63dc657f0c..d112ec66f434d 100644 --- a/flang/test/Fir/abstract-results.fir +++ b/flang/test/Fir/abstract-results.fir @@ -54,18 +54,17 @@ func.func private @arrayfunc_callee(%n : index) -> !fir.array { // FUNC-BOX-SAME: %[[box:.*]]: !fir.box>, %[[v:.*]]: f32) { func.func @derivedfunc_callee(%v: f32) -> !fir.type { %buffer = fir.alloca !fir.type - %0 = fir.field_index x, !fir.type - %1 = fir.coordinate_of %buffer, %0 : (!fir.ref>, !fir.field) -> !fir.ref + %1 = fir.coordinate_of %buffer, x : (!fir.ref>) -> !fir.ref fir.store %v to %1 : !fir.ref %res = fir.load %buffer : !fir.ref> return %res : !fir.type - // FUNC-REF: %[[coor:.*]] = fir.coordinate_of %[[buffer]], %{{.*}} : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-REF: %[[coor:.*]] = fir.coordinate_of %[[buffer]], x : (!fir.ref>) -> !fir.ref // FUNC-REF: fir.store %[[v]] to %[[coor]] : !fir.ref // FUNC-REF: return // FUNC-BOX: %[[buffer:.*]] = fir.box_addr %[[box]] : (!fir.box>) -> !fir.ref> - // FUNC-BOX: %[[coor:.*]] = fir.coordinate_of %[[buffer]], %{{.*}} : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-BOX: %[[coor:.*]] = fir.coordinate_of %[[buffer]], x : (!fir.ref>) -> !fir.ref // FUNC-BOX: fir.store %[[v]] to %[[coor]] : !fir.ref // FUNC-BOX: return } @@ -95,14 +94,12 @@ func.func @retcptr() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__addres return %1 : !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> // FUNC-REF: %[[ALLOC:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "rec", uniq_name = "_QFrecErec"} - // FUNC-REF: %[[FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - // FUNC-REF: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], %[[FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-REF: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], __address : (!fir.ref>) -> !fir.ref // FUNC-REF: %[[VAL:.*]] = fir.load %[[ADDR]] : !fir.ref // FUNC-REF: %[[CAST:.*]] = fir.convert %[[VAL]] : (i64) -> !fir.ref // FUNC-REF: return %[[CAST]] : !fir.ref // FUNC-BOX: %[[ALLOC:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "rec", uniq_name = "_QFrecErec"} - // FUNC-BOX: %[[FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - // FUNC-BOX: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], %[[FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-BOX: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], __address : (!fir.ref>) -> !fir.ref // FUNC-BOX: %[[VAL:.*]] = fir.load %[[ADDR]] : !fir.ref // FUNC-BOX: %[[CAST:.*]] = fir.convert %[[VAL]] : (i64) -> !fir.ref // FUNC-BOX: return %[[CAST]] : !fir.ref @@ -256,20 +253,17 @@ func.func @call_chararrayfunc() { func.func @_QPtest_return_cptr() { %0 = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = ".result"} %1 = fir.call @retcptr() : () -> i64 - %2 = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - %3 = fir.coordinate_of %0, %2 : (!fir.ref>, !fir.field) -> !fir.ref + %3 = fir.coordinate_of %0, __address : (!fir.ref>) -> !fir.ref fir.store %1 to %3 : !fir.ref return // FUNC-REF: %[[ALLOC:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = ".result"} // FUNC-REF: %[[VAL:.*]] = fir.call @retcptr() : () -> i64 - // FUNC-REF: %[[FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - // FUNC-REF: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], %[[FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-REF: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], __address : (!fir.ref>) -> !fir.ref // FUNC-REF: fir.store %[[VAL]] to %[[ADDR]] : !fir.ref // FUNC-BOX: %[[ALLOC:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = ".result"} // FUNC-BOX: %[[VAL:.*]] = fir.call @retcptr() : () -> i64 - // FUNC-BOX: %[[FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - // FUNC-BOX: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], %[[FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-BOX: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], __address : (!fir.ref>) -> !fir.ref // FUNC-BOX: fir.store %[[VAL]] to %[[ADDR]] : !fir.ref } @@ -384,16 +378,14 @@ func.func @test_indirect_calls_return_cptr(%arg0: () -> ()) { // FUNC-REF: %[[VAL_1:.*]] = fir.convert %[[ARG0]] : (() -> ()) -> (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) // FUNC-REF: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> (() -> !fir.ref) // FUNC-REF: %[[VAL_3:.*]] = fir.call %[[VAL_2]]() : () -> !fir.ref - // FUNC-REF: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - // FUNC-REF: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-REF: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref // FUNC-REF: %[[CAST:.*]] = fir.convert %[[VAL_3]] : (!fir.ref) -> i64 // FUNC-REF: fir.store %[[CAST]] to %[[VAL_5]] : !fir.ref // FUNC-BOX: %[[VAL_0:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = ".result"} // FUNC-BOX: %[[VAL_1:.*]] = fir.convert %[[ARG0]] : (() -> ()) -> (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) // FUNC-BOX: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> (() -> !fir.ref) // FUNC-BOX: %[[VAL_3:.*]] = fir.call %[[VAL_2]]() : () -> !fir.ref - // FUNC-BOX: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> - // FUNC-BOX: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref + // FUNC-BOX: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref // FUNC-BOX: %[[CAST:.*]] = fir.convert %[[VAL_3]] : (!fir.ref) -> i64 // FUNC-BOX: fir.store %[[CAST]] to %[[VAL_5]] : !fir.ref } diff --git a/flang/test/Fir/array-value-copy.fir b/flang/test/Fir/array-value-copy.fir index 58db8b3ae4cd2..3d44407b5fcf8 100644 --- a/flang/test/Fir/array-value-copy.fir +++ b/flang/test/Fir/array-value-copy.fir @@ -333,8 +333,7 @@ func.func @array_of_types() { %c1_i64 = arith.constant 1 : i64 %9 = arith.subi %8, %c1_i64 : i64 %10 = fir.coordinate_of %1, %9 : (!fir.ref}>>>, i64) -> !fir.ref}>> - %11 = fir.field_index i, !fir.type<_QTd{i:!fir.array<10xi32>}> - %12 = fir.coordinate_of %10, %11 : (!fir.ref}>>, !fir.field) -> !fir.ref> + %12 = fir.coordinate_of %10, i : (!fir.ref}>>) -> !fir.ref> %c10 = arith.constant 10 : index %13 = arith.addi %c1_0, %c10 : index %14 = arith.subi %13, %c1_0 : index @@ -363,7 +362,7 @@ func.func @array_of_types() { // CHECK-LABEL: func @array_of_types() { // CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} -> index { -// CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%arg2 = %17) -> (!fir.array<10xi32>) { +// CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%{{.*}} = %{{.*}}) -> (!fir.array<10xi32>) { // CHECK-NOT: %{{.*}} = fir.array_update // CHECK: %[[COOR0:.*]] = fir.array_coor %{{.*}}(%{{.*}}) [%{{.*}}] %{{.*}} : (!fir.ref>, !fir.shape<1>, !fir.slice<1>, index) -> !fir.ref // CHECK: fir.store %{{.*}} to %[[COOR0]] : !fir.ref @@ -482,11 +481,9 @@ func.func @array_fetch_derived_type(%0 : !fir.ref}>>>) { // CHECK: %{{.*}} = fir.do_loop -// CHECK: %[[FIELD_MT:.*]] = fir.field_index mt, !fir.type<_QTu{mt:!fir.type<_QTt{mem:i32}>}> -// CHECK: %[[FIELD_MEM:.*]] = fir.field_index mem, !fir.type<_QTt{mem:i32}> // CHECK-NOT: %{{.*}} = fir.array_fetch // CHECK: %[[COOR0:.*]] = fir.array_coor %[[ARR0]](%{{.*}}) %{{.*}} : (!fir.ref}>>>, !fir.shape<1>, index) -> !fir.ref}>> -// CHECK: %[[COOR_OF:.*]] = fir.coordinate_of %[[COOR0]], %[[FIELD_MT]], %[[FIELD_MEM]] : (!fir.ref}>>, !fir.field, !fir.field) -> !fir.ref +// CHECK: %[[COOR_OF:.*]] = fir.coordinate_of %[[COOR0]], mt, mem : (!fir.ref}>>) -> !fir.ref // CHECK: %{{.*}} = fir.load %[[COOR_OF]] : !fir.ref // ----- diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 7cdcd2a10e975..a429a14518182 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -948,13 +948,11 @@ func.func @omp_map_info_descriptor_type_conversion(%arg0 : !fir.ref,int:i32}>>) { // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ARG_0]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFderived_type", (f32, array<10 x i32>, i32)> - %0 = fir.field_index int, !fir.type<_QFderived_type{real:f32,array:!fir.array<10xi32>,int:i32}> - %1 = fir.coordinate_of %arg0, %0 : (!fir.ref,int:i32}>>, !fir.field) -> !fir.ref + %1 = fir.coordinate_of %arg0, int : (!fir.ref,int:i32}>>) -> !fir.ref // CHECK: %[[MAP_MEMBER_1:.*]] = omp.map.info var_ptr(%[[GEP]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "dtype%int"} %2 = omp.map.info var_ptr(%1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "dtype%int"} // CHECK: %[[GEP_2:.*]] = llvm.getelementptr %[[ARG_0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFderived_type", (f32, array<10 x i32>, i32)> - %3 = fir.field_index real, !fir.type<_QFderived_type{real:f32,array:!fir.array<10xi32>,int:i32}> - %4 = fir.coordinate_of %arg0, %3 : (!fir.ref,int:i32}>>, !fir.field) -> !fir.ref + %4 = fir.coordinate_of %arg0, real : (!fir.ref,int:i32}>>) -> !fir.ref // CHECK: %[[MAP_MEMBER_2:.*]] = omp.map.info var_ptr(%[[GEP_2]] : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "dtype%real"} %5 = omp.map.info var_ptr(%4 : !fir.ref, f32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "dtype%real"} // CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFderived_type", (f32, array<10 x i32>, i32)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [2], [0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "dtype", partial_map = true} @@ -973,16 +971,13 @@ func.func @omp_map_info_derived_type_explicit_member_conversion(%arg0 : !fir.ref func.func @omp_map_info_nested_derived_type_explicit_member_conversion(%arg0 : !fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>) { // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ARG_0]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFTtop_layer", (array<10 x i32>, struct<"_QFTbottom_layer", (array<10 x f32>, f64)>, i32)> - %0 = fir.field_index nested, !fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}> - %1 = fir.coordinate_of %arg0, %0 : (!fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>, !fir.field) -> !fir.ref,i2:f64}>> + %1 = fir.coordinate_of %arg0, nested : (!fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>) -> !fir.ref,i2:f64}>> // CHECK: %[[GEP_2:.*]] = llvm.getelementptr %[[GEP]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFTbottom_layer", (array<10 x f32>, f64)> - %2 = fir.field_index i2, !fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}> - %3 = fir.coordinate_of %1, %2 : (!fir.ref,i2:f64}>>, !fir.field) -> !fir.ref + %3 = fir.coordinate_of %1, i2 : (!fir.ref,i2:f64}>>) -> !fir.ref // CHECK: %[[MAP_MEMBER_1:.*]] = omp.map.info var_ptr(%[[GEP_2]] : !llvm.ptr, f64) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr %4 = omp.map.info var_ptr(%3 : !fir.ref, f64) map_clauses(tofrom) capture(ByRef) -> !fir.ref // CHECK: %[[GEP_3:.*]] = llvm.getelementptr %[[ARG_0]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFTtop_layer", (array<10 x i32>, struct<"_QFTbottom_layer", (array<10 x f32>, f64)>, i32)> - %5 = fir.field_index k, !fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}> - %6 = fir.coordinate_of %arg0, %5 : (!fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>, !fir.field) -> !fir.ref + %6 = fir.coordinate_of %arg0, k : (!fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>) -> !fir.ref // CHECK: %[[MAP_MEMBER_2:.*]] = omp.map.info var_ptr(%[[GEP_3]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr %7 = omp.map.info var_ptr(%6 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref // CHECK: %[[PARENT_MAP:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFTtop_layer", (array<10 x i32>, struct<"_QFTbottom_layer", (array<10 x f32>, f64)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [1, 1], [2] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true} @@ -1131,7 +1126,7 @@ func.func @map_dtype_alloca_mem(%arg0 : !fir.ref !llvm.ptr, [[STRUCT_TY:!llvm.struct<"_QFRecTy", \(f32, struct<\(ptr, i64, i32, i8, i8, i8, i8\)>, array<10 x i32>, f32, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32\)>]] - %1 = fir.coordinate_of %arg0, %c4 : (!fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>, index) -> !fir.ref>>> + %1 = fir.coordinate_of %arg0, array_j : (!fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>) -> !fir.ref>>> // CHECK: %[[BADDR_GEP:.*]] = llvm.getelementptr %[[GEP]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[STRUCT_TY2:!llvm.struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>]] %2 = fir.box_offset %1 base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> // CHECK: %[[MAP_MEMBER_BADDR:.*]] = omp.map.info var_ptr(%[[GEP]] : !llvm.ptr, i32) var_ptr_ptr(%[[BADDR_GEP]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr @@ -1165,7 +1160,7 @@ func.func @map_dtype_alloca_mem2(%arg0 : !fir.ref !llvm.ptr, [[DESC_TY]] // CHECK: %[[LOAD_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr // CHECK: %[[GEP_DTYPE_MEMBER:.*]] = llvm.getelementptr %[[LOAD_DTYPE_BADDR]][0, 4] : (!llvm.ptr) -> !llvm.ptr, [[REC_TY:!llvm.struct<"_QFRecTy", \(f32, struct<\(ptr, i64, i32, i8, i8, i8, i8\)>, array<10 x i32>, f32, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32\)>]] - %2 = fir.coordinate_of %1, %c4 : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>, index) -> !fir.ref>>> + %2 = fir.coordinate_of %1, array_j : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>) -> !fir.ref>>> // CHECK: %[[DTYPE_MEMBER_BADDR:.*]] = llvm.getelementptr %[[GEP_DTYPE_MEMBER]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY2:!llvm.struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>]] %3 = fir.box_offset %2 base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> // CHECK: %[[MAP_MEMBER_BADDR:.*]] = omp.map.info var_ptr(%[[GEP_DTYPE_MEMBER]] : !llvm.ptr, i32) var_ptr_ptr(%[[DTYPE_MEMBER_BADDR]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr @@ -1177,7 +1172,7 @@ func.func @map_dtype_alloca_mem2(%arg0 : !fir.ref !llvm.ptr, [[DESC_TY]] // CHECK: %[[LOAD_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr // CHECK: %[[GEP_DTYPE_REGULAR_MEMBER:.*]] = llvm.getelementptr %[[LOAD_DTYPE_BADDR]][0, 5] : (!llvm.ptr) -> !llvm.ptr, [[REC_TY]] - %7 = fir.coordinate_of %6, %c5 : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>, index) -> !fir.ref + %7 = fir.coordinate_of %6, k : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>) -> !fir.ref // CHECK: %[[MAP_REGULAR_MEMBER:.*]] = omp.map.info var_ptr(%[[GEP_DTYPE_REGULAR_MEMBER]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr %8 = omp.map.info var_ptr(%7 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref // CHECK: %[[GEP_DTYPE_BADDR:.*]] = llvm.getelementptr %[[ARG_0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY]] @@ -1213,9 +1208,9 @@ func.func @map_nested_dtype_alloca_mem(%arg0 : !fir.ref !llvm.ptr, [[DESC_TY]] // CHECK: %[[LOAD_GEP_DTYPE_BADDR:.*]] = llvm.load %[[GEP_DTYPE_BADDR]] : !llvm.ptr -> !llvm.ptr // CHECK: %[[LOAD_NESTED_DTYPE:.*]] = llvm.getelementptr %[[LOAD_GEP_DTYPE_BADDR]][0, 6] : (!llvm.ptr) -> !llvm.ptr, [[REC_TY:!llvm.struct<"_QFRecTy", \(f32, struct<\(ptr, i64, i32, i8, i8, i8, i8\)>, array<10 x i32>, f32, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32, struct<"_QFRecTy2", \(f32, array<10 x i32>, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32\)>\)>]] - %2 = fir.coordinate_of %1, %c6 : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>, index) -> !fir.ref,array_k:!fir.box>>,k:i32}>> + %2 = fir.coordinate_of %1, nest : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>) -> !fir.ref,array_k:!fir.box>>,k:i32}>> // CHECK: %[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER:.*]] = llvm.getelementptr %[[LOAD_NESTED_DTYPE]][0, 2] : (!llvm.ptr) -> !llvm.ptr, [[REC_TY2:!llvm.struct<"_QFRecTy2", \(f32, array<10 x i32>, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32\)>]] - %3 = fir.coordinate_of %2, %c2 : (!fir.ref,array_k:!fir.box>>,k:i32}>>, index) -> !fir.ref>>> + %3 = fir.coordinate_of %2, array_k : (!fir.ref,array_k:!fir.box>>,k:i32}>>) -> !fir.ref>>> // CHECK: %[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER_BADDR:.*]] = llvm.getelementptr %[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY2:!llvm.struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>]] %4 = fir.box_offset %3 base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> // CHECK: %[[MAP_NESTED_MEMBER_BADDR:.*]] = omp.map.info var_ptr(%[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER]] : !llvm.ptr, i32) var_ptr_ptr(%[[GEP_NESTED_DTYPE_ALLOCATABLE_MEMBER_BADDR]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr @@ -1227,9 +1222,9 @@ func.func @map_nested_dtype_alloca_mem(%arg0 : !fir.ref !llvm.ptr %7 = fir.load %arg0 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>> // CHECK: %[[LOAD_NESTED_DTYPE:.*]] = llvm.getelementptr %[[LOAD_GEP_DTYPE_BADDR]][0, 6] : (!llvm.ptr) -> !llvm.ptr, [[REC_TY]] - %8 = fir.coordinate_of %7, %c6 : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>, index) -> !fir.ref,array_k:!fir.box>>,k:i32}>> + %8 = fir.coordinate_of %7, nest : (!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>) -> !fir.ref,array_k:!fir.box>>,k:i32}>> // CHECK: %[[NESTED_DTYPE_REGULAR_MEMBER_GEP:.*]] = llvm.getelementptr %[[LOAD_NESTED_DTYPE]][0, 3] : (!llvm.ptr) -> !llvm.ptr, [[REC_TY2]] - %9 = fir.coordinate_of %8, %c3 : (!fir.ref,array_k:!fir.box>>,k:i32}>>, index) -> !fir.ref + %9 = fir.coordinate_of %8, k : (!fir.ref,array_k:!fir.box>>,k:i32}>>) -> !fir.ref // CHECK: %[[MAP_REGULAR_NESTED_MEMBER:.*]] = omp.map.info var_ptr(%[[NESTED_DTYPE_REGULAR_MEMBER_GEP]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr %10 = omp.map.info var_ptr(%9 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref // CHECK: %[[DTYPE_BADDR_GEP:.*]] = llvm.getelementptr %[[ARG_0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY]] @@ -1258,9 +1253,9 @@ func.func @map_nested_dtype_alloca_mem2(%arg0 : !fir.ref !llvm.ptr, [[REC_TY:!llvm.struct<"_QFRecTy", \(f32, struct<\(ptr, i64, i32, i8, i8, i8, i8\)>, array<10 x i32>, f32, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32, struct<"_QFRecTy2", \(f32, array<10 x i32>, struct<\(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>\)>, i32\)>\)>]] - %1 = fir.coordinate_of %arg0, %c6 : (!fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>, index) -> !fir.ref,array_k:!fir.box>>,k:i32}>> + %1 = fir.coordinate_of %arg0, nest : (!fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>) -> !fir.ref,array_k:!fir.box>>,k:i32}>> // CHECK: %[[NESTED_ALLOCATABLE_MEMBER_GEP:.*]] = llvm.getelementptr %[[NESTED_DTYPE_MEMBER_GEP]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFRecTy2", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)> - %2 = fir.coordinate_of %1, %c2 : (!fir.ref,array_k:!fir.box>>,k:i32}>>, index) -> !fir.ref>>> + %2 = fir.coordinate_of %1, array_k : (!fir.ref,array_k:!fir.box>>,k:i32}>>) -> !fir.ref>>> // CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP:.*]] = llvm.getelementptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]][0, 0] : (!llvm.ptr) -> !llvm.ptr, [[DESC_TY2]] %3 = fir.box_offset %2 base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> // CHECK: %[[MAP_NESTED_ALLOCATABLE_MEMBER_BADDR:.*]] = omp.map.info var_ptr(%[[NESTED_ALLOCATABLE_MEMBER_GEP]] : !llvm.ptr, i32) var_ptr_ptr(%[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]] : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr @@ -1282,10 +1277,8 @@ func.func @map_nested_dtype_alloca_mem2(%arg0 : !fir.ref { // CHECK: ^bb0(%[[VAL_0:.*]]: !llvm.ptr): ^bb0(%0: !fir.ref>): -// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(0 : i32) : i32 - %1 = fir.field_index data, !fir.type<_QFdeclare_mapperTmy_type{data:i32}> // CHECK: %[[VAL_2:.*]] = llvm.getelementptr %[[VAL_0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFdeclare_mapperTmy_type", (i32)> - %2 = fir.coordinate_of %0, %1 : (!fir.ref>, !fir.field) -> !fir.ref + %2 = fir.coordinate_of %0, data : (!fir.ref>) -> !fir.ref // CHECK: %[[VAL_3:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var%[[VAL_4:.*]]"} %3 = omp.map.info var_ptr(%2 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "var%data"} // CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !llvm.ptr, !llvm.struct<"_QFdeclare_mapperTmy_type", (i32)>) map_clauses(tofrom) capture(ByRef) members(%[[VAL_3]] : [0] : !llvm.ptr) -> !llvm.ptr {name = "var", partial_map = true} diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index 8727c0ab08e70..c7037019ee701 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -2575,13 +2575,11 @@ func.func @coordinate_box_complex(%arg0: !fir.box>) { // 2. BOX TYPE (objects wrapped in `fir.box`) // Derived type - basic case (1 index) func.func @coordinate_box_derived_1(%arg0: !fir.box>) { - %idx = fir.field_index field_2, !fir.type - %q = fir.coordinate_of %arg0, %idx : (!fir.box>, !fir.field) -> !fir.ref + %q = fir.coordinate_of %arg0, field_2 : (!fir.box>) -> !fir.ref return } // CHECK-LABEL: llvm.func @coordinate_box_derived_1 // CHECK-SAME: %[[BOX:.*]]: !llvm.ptr) -// CHECK: %[[COORDINATE:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[DERIVED_ADDR:.*]] = llvm.getelementptr %[[BOX]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i64>)> // CHECK: %[[DERIVED_VAL:.*]] = llvm.load %[[DERIVED_ADDR]] : !llvm.ptr -> !llvm.ptr // CHECK: %[[SUBOBJECT_ADDR:.*]] = llvm.getelementptr %[[DERIVED_VAL]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"derived_1", (i32, i32)> @@ -2589,16 +2587,12 @@ func.func @coordinate_box_derived_1(%arg0: !fir.box, field_2:i32}>>) { - %idx0 = fir.field_index field_1, !fir.type, field_2:i32}> - %idx1 = fir.field_index inner2, !fir.type - %q = fir.coordinate_of %arg0, %idx0, %idx1 : (!fir.box, field_2:i32}>>, !fir.field, !fir.field) -> !fir.ref + %q = fir.coordinate_of %arg0, field_1, inner2 : (!fir.box, field_2:i32}>>) -> !fir.ref return } // CHECK-LABEL: llvm.func @coordinate_box_derived_2 // CHECK-SAME: (%[[BOX:.*]]: !llvm.ptr) -// CHECK-NEXT: %[[C0_0:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK-NEXT: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[DERIVED_ADDR:.*]] = llvm.getelementptr %[[BOX]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i{{.*}}, i{{.*}}32, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i64>)> // CHECK-NEXT: %[[DERIVED_VAL:.*]] = llvm.load %[[DERIVED_ADDR]] : !llvm.ptr -> !llvm.ptr // CHECK-NEXT: %[[ANOTHER_DERIVED_ADDR:.*]] = llvm.getelementptr %[[DERIVED_VAL]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"derived_2", (struct<"another_derived", (i32, f32)>, i32)> @@ -2683,8 +2677,7 @@ func.func @coordinate_box_array_2d(%arg0: !fir.box>, % // 4. BOX TYPE - `fir.derived` inside `fir.array` func.func @coordinate_box_derived_inside_array(%arg0: !fir.box>>, %arg1 : index) { - %idx0 = fir.field_index field_2, !fir.type - %q = fir.coordinate_of %arg0, %arg1, %idx0 : (!fir.box>>, index, !fir.field) -> !fir.ref + %q = fir.coordinate_of %arg0, %arg1, field_2 : (!fir.box>>, index) -> !fir.ref return } // CHECK-LABEL: llvm.func @coordinate_box_derived_inside_array( @@ -2761,8 +2754,7 @@ func.func @coordinate_array_known_size_2d_get_array(%arg0: !fir.ref>) { - %idx = fir.field_index field_2, !fir.type - %q = fir.coordinate_of %arg0, %idx : (!fir.ref>, !fir.field) -> !fir.ref + %q = fir.coordinate_of %arg0, field_2 : (!fir.ref>) -> !fir.ref return } // CHECK-LABEL: llvm.func @coordinate_ref_derived( @@ -2774,9 +2766,7 @@ func.func @coordinate_ref_derived(%arg0: !fir.ref, field_2:i32}>>) { - %idx0 = fir.field_index field_1, !fir.type, field_2:i32}> - %idx1 = fir.field_index inner2, !fir.type - %q = fir.coordinate_of %arg0, %idx0, %idx1 : (!fir.ref, field_2:i32}>>, !fir.field, !fir.field) -> !fir.ref + %q = fir.coordinate_of %arg0, field_1, inner2 : (!fir.ref, field_2:i32}>>) -> !fir.ref return } // CHECK-LABEL: llvm.func @coordinate_ref_derived_nested( @@ -2788,15 +2778,15 @@ func.func @coordinate_ref_derived_nested(%arg0: !fir.ref>) { +func.func @test_coordinate_of_char(%arr : !fir.ref>) { %1 = arith.constant 10 : i32 - %2 = fir.coordinate_of %arr, %1 : (!fir.ref>, i32) -> !fir.ref> + %2 = fir.coordinate_of %arr, %1 : (!fir.ref>, i32) -> !fir.ref> return } // CHECK-LABEL: llvm.func @test_coordinate_of_char( // CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) { // CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(10 : i32) : i32 -// CHECK: %[[VAL_2:.*]] = llvm.getelementptr %[[VAL_0]]{{\[}}%[[VAL_1]]] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<2 x i80> +// CHECK: %[[VAL_2:.*]] = llvm.getelementptr %[[VAL_0]]{{\[}}0, %[[VAL_1]]] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i16> // CHECK: llvm.return // CHECK: } diff --git a/flang/test/Fir/dispatch.f90 b/flang/test/Fir/dispatch.f90 index 2ffdcd5b1884d..2b1ae225986ca 100644 --- a/flang/test/Fir/dispatch.f90 +++ b/flang/test/Fir/dispatch.f90 @@ -200,15 +200,12 @@ program test_type_to_class ! Check dynamic dispatch equal to `call p%display2()` with binding index = 2. ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#0 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c2{{.*}} : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> ()) ! CHECK: fir.call %[[FUNC_PTR]](%[[ARG_DECL]]#0) : (!fir.class>) -> () @@ -216,15 +213,12 @@ program test_type_to_class ! Check dynamic dispatch equal to `call p%display1()` with binding index = 1. ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#0 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c1{{.*}} : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> ()) ! CHECK: fir.call %[[FUNC_PTR]](%[[ARG_DECL]]#0) : (!fir.class>) -> () @@ -232,15 +226,12 @@ program test_type_to_class ! Check dynamic dispatch equal to `call p%aproc()` with binding index = 0. ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#0 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c0{{.*}}: (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> ()) ! CHECK: fir.call %[[FUNC_PTR]](%[[ARG_DECL]]#0) : (!fir.class>) -> () @@ -248,15 +239,12 @@ program test_type_to_class ! Check dynamic dispatch of a function with result. ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#0 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c3 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> i32) ! CHECK: %[[RES:.*]] = fir.call %[[FUNC_PTR]](%[[ARG_DECL]]#0) : (!fir.class>) -> i32 @@ -264,15 +252,12 @@ program test_type_to_class ! Check dynamic dispatch of call with passed-object and additional argument ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#0 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c6{{.*}} : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]], !fir.ref) -> ()) ! CHECK: fir.call %[[FUNC_PTR]](%[[ARG_DECL]]#0, %{{.*}}) : (!fir.class>, !fir.ref) -> () @@ -280,30 +265,24 @@ program test_type_to_class ! Check dynamic dispatch of a call with NOPASS ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#1 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref>>>> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : (!fir.box>> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c4{{.*}} : (!fir.ptr>>, index) -> !fir.ref> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (() -> ()) ! CHECK: fir.call %[[FUNC_PTR]]() : () -> () ! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG_DECL]]#0 : ([[CLASS]]) -> !fir.tdesc ! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> -! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] -! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], binding : (!fir.ref<[[TYPEINFO]]>) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> ! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> ! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> ! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c5{{.*}} : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> -! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] -! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], proc : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], __address ! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref ! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> ((!fir.ref, [[CLASS]]) -> ()) ! CHECK: fir.call %[[FUNC_PTR]](%{{.*}}, %[[ARG_DECL]]#0) : (!fir.ref, [[CLASS]]) -> () diff --git a/flang/test/Fir/field-index.fir b/flang/test/Fir/field-index.fir index 4f2551b380d55..55d173201f29a 100644 --- a/flang/test/Fir/field-index.fir +++ b/flang/test/Fir/field-index.fir @@ -1,4 +1,4 @@ -// Test fir.field_index llvm code generation +// Test llvm code generation of fir.coordinate_of with field names // RUN: fir-opt %s | tco | FileCheck %s @@ -9,9 +9,8 @@ // CHECK-LABEL: @simple_field // CHECK-SAME: (ptr captures(none) %[[arg0:.*]]) func.func @simple_field(%arg0: !fir.ref>) -> i32 { - %1 = fir.field_index i, !fir.type // CHECK: %[[GEP:.*]] = getelementptr %a, ptr %[[arg0]], i32 0, i32 1 - %2 = fir.coordinate_of %arg0, %1 : (!fir.ref>, !fir.field) -> !fir.ref + %2 = fir.coordinate_of %arg0, i : (!fir.ref>) -> !fir.ref // CHECK: load i32, ptr %[[GEP]] %3 = fir.load %2 : !fir.ref return %3 : i32 @@ -20,10 +19,8 @@ func.func @simple_field(%arg0: !fir.ref>) -> i32 { // CHECK-LABEL: @derived_field // CHECK-SAME: (ptr captures(none) %[[arg0:.*]]) func.func @derived_field(%arg0: !fir.ref}>>) -> i32 { - %1 = fir.field_index some_b, !fir.type}> - %2 = fir.field_index i, !fir.type // CHECK: %[[GEP:.*]] = getelementptr %c, ptr %[[arg0]], i32 0, i32 1, i32 1 - %3 = fir.coordinate_of %arg0, %1, %2 : (!fir.ref}>>, !fir.field, !fir.field) -> !fir.ref + %3 = fir.coordinate_of %arg0, some_b, i : (!fir.ref}>>) -> !fir.ref // CHECK: load i32, ptr %[[GEP]] %4 = fir.load %3 : !fir.ref return %4 : i32 diff --git a/flang/test/Fir/pdt.fir b/flang/test/Fir/pdt.fir index ce1fb7a379b8b..a200cd7e7cc03 100644 --- a/flang/test/Fir/pdt.fir +++ b/flang/test/Fir/pdt.fir @@ -49,8 +49,7 @@ func.func @_QQmain(%arg0 : i32, %arg1 : i16) { // CHECK: %[[size:.*]] = call i64 @_QTtP.mem.size(i32 %0, i16 %1) // CHECK: %[[alloc:.*]] = alloca i8, i64 %[[size]] %0 = fir.alloca !fir.type<_QTt(p1:i32,p2:i16){f1:i32,f2:f32}>(%arg0, %arg1 : i32, i16) {name = "_QEvar"} - %1 = fir.field_index f1, !fir.type<_QTt(p1:i32,p2:i16){f1:i32,f2:f32}>(%arg0, %arg1 : i32, i16) - %2 = fir.coordinate_of %0, %1 : (!fir.ref>, !fir.field) -> !fir.ref + %2 = fir.coordinate_of %0, f1 : (!fir.ref>) -> !fir.ref %c4_i32 = arith.constant 4 : i32 fir.store %c4_i32 to %2 : !fir.ref return @@ -102,8 +101,7 @@ func.func @_QPfoo(%arg0 : i32, %arg1 : i32) { // CHECK: %[[size:.*]] = call i64 @_QTt1P.mem.size(i32 %0, i32 %1) // CHECK: %[[alloc:.*]] = alloca i8, i64 %[[size]] %0 = fir.alloca !fir.type<_QTt1(p1:i32,p2:i32){f1:!fir.char<1,?>,f2:!fir.char<1,?>}>(%arg0, %arg1 : i32, i32) - %1 = fir.field_index f2, !fir.type<_QTt1>(%arg0, %arg1 : i32, i32) - //%2 = fir.coordinate_of %0, %1 : (!fir.ref>, !fir.field) -> !fir.ref> + //%2 = fir.coordinate_of %0, f2 : (!fir.ref>) -> !fir.ref> %2 = fir.zero_bits !fir.ref> fir.call @bar(%2) : (!fir.ref>) -> () return diff --git a/flang/test/HLFIR/assign-codegen-derived.fir b/flang/test/HLFIR/assign-codegen-derived.fir index c45c118ed46c5..9bba0d31a6ea6 100644 --- a/flang/test/HLFIR/assign-codegen-derived.fir +++ b/flang/test/HLFIR/assign-codegen-derived.fir @@ -12,8 +12,8 @@ func.func @test_simple(%a: !fir.ref, %b: !fir.ref) { } // CHECK-LABEL: func.func @test_simple( // CHECK-NOT: Destroy -// CHECK: %[[VAL_1:.*]] = fir.coordinate_of %{{.*}}, %{{.*}} : (!fir.ref>, !fir.field) -> !fir.ref -// CHECK: %[[VAL_3:.*]] = fir.coordinate_of %{{.*}}, %{{.*}} : (!fir.ref>, !fir.field) -> !fir.ref +// CHECK: %[[VAL_1:.*]] = fir.coordinate_of %{{.*}}, i : (!fir.ref>) -> !fir.ref +// CHECK: %[[VAL_3:.*]] = fir.coordinate_of %{{.*}}, i : (!fir.ref>) -> !fir.ref // CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_1]] : !fir.ref // CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref diff --git a/flang/test/HLFIR/c_ptr_byvalue.f90 b/flang/test/HLFIR/c_ptr_byvalue.f90 index b2c8da5e22579..f39059a8cfa8d 100644 --- a/flang/test/HLFIR/c_ptr_byvalue.f90 +++ b/flang/test/HLFIR/c_ptr_byvalue.f90 @@ -2,8 +2,7 @@ ! CHECK-LABEL: func.func @_QPtest1() { ! CHECK: %[[VAL_110:.*]]:3 = hlfir.associate %{{.*}} {uniq_name = "adapt.cptrbyval"} : (!hlfir.expr>) -> (!fir.ref>, !fir.ref>, i1) -! CHECK: %[[VAL_111:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_112:.*]] = fir.coordinate_of %[[VAL_110]]#1, %[[VAL_111]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_112:.*]] = fir.coordinate_of %[[VAL_110]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_113:.*]] = fir.load %[[VAL_112]] : !fir.ref ! CHECK: %[[VAL_114:.*]] = fir.convert %[[VAL_113]] : (i64) -> !fir.ref ! CHECK: hlfir.end_associate %[[VAL_110]]#1, %[[VAL_110]]#2 : !fir.ref>, i1 @@ -24,8 +23,7 @@ end subroutine get_expected_f ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "cptr"}) { ! CHECK: %[[DSCOPE:.*]] = fir.dummy_scope : !fir.dscope ! CHECK: %[[VAL_97:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[DSCOPE]] {uniq_name = "_QFtest2Ecptr"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[VAL_98:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_99:.*]] = fir.coordinate_of %[[VAL_97]]#0, %[[VAL_98]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_99:.*]] = fir.coordinate_of %[[VAL_97]]#0, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_100:.*]] = fir.load %[[VAL_99]] : !fir.ref ! CHECK: %[[VAL_101:.*]] = fir.convert %[[VAL_100]] : (i64) -> !fir.ref ! CHECK: fir.call @get_expected_f(%[[VAL_101]]) proc_attrs fastmath : (!fir.ref) -> () diff --git a/flang/test/HLFIR/designate-codegen-component-refs.fir b/flang/test/HLFIR/designate-codegen-component-refs.fir index 0e9d81f5cff8b..278a7be0e2da1 100644 --- a/flang/test/HLFIR/designate-codegen-component-refs.fir +++ b/flang/test/HLFIR/designate-codegen-component-refs.fir @@ -10,8 +10,7 @@ func.func @test_scalar(%arg0: !fir.ref> // CHECK-LABEL: func.func @test_scalar( // CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>) { // CHECK: %[[VAL_1:.*]] = fir.declare %[[VAL_0]] {uniq_name = "a"} -// CHECK: %[[VAL_2:.*]] = fir.field_index scalar_x, !fir.type -// CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +// CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], scalar_x : (!fir.ref>) -> !fir.ref func.func @test_array_char_comp_1(%arg0: !fir.ref>}>>) { %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.ref>}>>) -> (!fir.ref>}>>, !fir.ref>}>>) @@ -29,8 +28,7 @@ func.func @test_array_char_comp_1(%arg0: !fir.ref !fir.shape<2> // CHECK: %[[VAL_5:.*]] = arith.constant 5 : index -// CHECK: %[[VAL_6:.*]] = fir.field_index array_char_comp, !fir.type>}> -// CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_6]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> +// CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], array_char_comp : (!fir.ref>}>>) -> !fir.ref>> func.func @test_array(%arg0: !fir.box>>) { %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box>>) -> (!fir.box>>, !fir.box>>) @@ -189,8 +187,7 @@ func.func @test_array_comp_slice(%arg0: !fir.ref !fir.shape<2> -// CHECK: %[[VAL_9:.*]] = fir.field_index array_comp, !fir.type}> -// CHECK: %[[VAL_10:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_9]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +// CHECK: %[[VAL_10:.*]] = fir.coordinate_of %[[VAL_1]], array_comp : (!fir.ref}>>) -> !fir.ref> // CHECK: %[[VAL_11:.*]] = fir.array_coor %[[VAL_10]](%[[VAL_4]]) %[[VAL_5]], %[[VAL_6]] : (!fir.ref>, !fir.shape<2>, index, index) -> !fir.ref // CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (!fir.ref) -> !fir.ref> @@ -219,8 +216,7 @@ func.func @test_array_comp_non_contiguous_slice(%arg0: !fir.ref !fir.shape<2> -// CHECK: %[[VAL_10:.*]] = fir.field_index array_comp, !fir.type}> -// CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_10]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +// CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_1]], array_comp : (!fir.ref}>>) -> !fir.ref> // CHECK: %[[VAL_12:.*]] = fir.undefined index // CHECK: %[[VAL_13:.*]] = fir.slice %[[VAL_5]], %[[VAL_6]], %[[VAL_5]], %[[VAL_7]], %[[VAL_3]], %[[VAL_5]] : (index, index, index, index, index, index) -> !fir.slice<2> // CHECK: %[[VAL_14:.*]] = fir.embox %[[VAL_11]](%[[VAL_4]]) {{\[}}%[[VAL_13]]] : (!fir.ref>, !fir.shape<2>, !fir.slice<2>) -> !fir.box> diff --git a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 b/flang/test/Integration/OpenMP/map-types-and-sizes.f90 index e0221ef254192..70ae353ced214 100644 --- a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 +++ b/flang/test/Integration/OpenMP/map-types-and-sizes.f90 @@ -504,10 +504,10 @@ end subroutine mapType_common_block_members !CHECK-LABEL: define {{.*}} @{{.*}}maptype_derived_type_alloca_{{.*}} !CHECK: %[[ALLOCATABLE_DESC_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 !CHECK: %[[ALLOCA:.*]] = alloca %_QFmaptype_derived_type_allocaTone_layer, i64 1, align 8 +!CHECK: %[[MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_derived_type_allocaTone_layer, ptr %[[ALLOCA]], i32 0, i32 4 !CHECK: %[[DESC_BOUND_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCATABLE_DESC_ALLOCA]], i32 0, i32 7, i64 0, i32 1 !CHECK: %[[DESC_BOUND_ACCESS_LOAD:.*]] = load i64, ptr %[[DESC_BOUND_ACCESS]], align 8 !CHECK: %[[OFFSET_UB:.*]] = sub i64 %[[DESC_BOUND_ACCESS_LOAD]], 1 -!CHECK: %[[MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_derived_type_allocaTone_layer, ptr %[[ALLOCA]], i32 0, i32 4 !CHECK: %[[MEMBER_DESCRIPTOR_BASE_ADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[MEMBER_ACCESS]], i32 0, i32 0 !CHECK: %[[CALCULATE_DIM_SIZE:.*]] = sub i64 %[[OFFSET_UB]], 0 !CHECK: %[[RESTORE_OFFSET:.*]] = add i64 %[[CALCULATE_DIM_SIZE]], 1 @@ -549,12 +549,12 @@ end subroutine mapType_common_block_members !CHECK: %[[DTYPE_ARRAY_MEMBER_DESC_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 !CHECK: %[[DTYPE_DESC_ALLOCA_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8 !CHECK: %[[DTYPE_DESC_ALLOCA_3:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1, align 8 -!CHECK: %[[ACCESS_DESC_MEMBER_UB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ARRAY_MEMBER_DESC_ALLOCA]], i32 0, i32 7, i64 0, i32 1 -!CHECK: %[[LOAD_DESC_MEMBER_UB:.*]] = load i64, ptr %[[ACCESS_DESC_MEMBER_UB]], align 8 -!CHECK: %[[OFFSET_MEMBER_UB:.*]] = sub i64 %[[LOAD_DESC_MEMBER_UB]], 1 !CHECK: %[[DTYPE_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_2]], i32 0, i32 0 !CHECK: %[[DTYPE_BASE_ADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS]], align 8 !CHECK: %[[DTYPE_ALLOCA_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_derived_typeTone_layer, ptr %[[DTYPE_BASE_ADDR_LOAD]], i32 0, i32 4 +!CHECK: %[[ACCESS_DESC_MEMBER_UB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ARRAY_MEMBER_DESC_ALLOCA]], i32 0, i32 7, i64 0, i32 1 +!CHECK: %[[LOAD_DESC_MEMBER_UB:.*]] = load i64, ptr %[[ACCESS_DESC_MEMBER_UB]], align 8 +!CHECK: %[[OFFSET_MEMBER_UB:.*]] = sub i64 %[[LOAD_DESC_MEMBER_UB]], 1 !CHECK: %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ALLOCA_MEMBER_ACCESS]], i32 0, i32 0 !CHECK: %[[DTYPE_BASE_ADDR_ACCESS_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA]], i32 0, i32 0 !CHECK: %[[DTYPE_BASE_ADDR_LOAD_2:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_2]], align 8 @@ -729,13 +729,12 @@ end subroutine mapType_common_block_members !CHECK: %[[ALLOCA_1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 !CHECK: %[[ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, align 8 !CHECK: %[[BASE_PTR_1:.*]] = alloca %_QFmaptype_nested_derived_type_member_idxTdtype, i64 1, align 8 -!CHECK: %{{.*}} = getelementptr %_QFmaptype_nested_derived_type_member_idxTdtype, ptr %[[BASE_PTR_1]], i32 0, i32 1 +!CHECK: %[[OFF_PTR_1:.*]] = getelementptr %_QFmaptype_nested_derived_type_member_idxTdtype, ptr %[[BASE_PTR_1]], i32 0, i32 1 !CHECK: %[[BOUNDS_ACC:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA]], i32 0, i32 7, i64 0, i32 1 !CHECK: %[[BOUNDS_LD:.*]] = load i64, ptr %[[BOUNDS_ACC]], align 8 !CHECK: %[[BOUNDS_ACC_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCA_1]], i32 0, i32 7, i64 0, i32 1 !CHECK: %[[BOUNDS_LD_2:.*]] = load i64, ptr %[[BOUNDS_ACC_2]], align 8 !CHECK: %[[BOUNDS_CALC:.*]] = sub i64 %[[BOUNDS_LD_2]], 1 -!CHECK: %[[OFF_PTR_1:.*]] = getelementptr %_QFmaptype_nested_derived_type_member_idxTdtype, ptr %[[BASE_PTR_1]], i32 0, i32 1 !CHECK: %[[OFF_PTR_CALC_0:.*]] = sub i64 %[[BOUNDS_LD]], 1 !CHECK: %[[OFF_PTR_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[OFF_PTR_1]], i32 0, i32 0 !CHECK: %[[GEP_DESC_PTR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA_0]], i32 0, i32 0 diff --git a/flang/test/Lower/CUDA/cuda-cdevloc.cuf b/flang/test/Lower/CUDA/cuda-cdevloc.cuf index a71490207909a..d663e6eda478b 100644 --- a/flang/test/Lower/CUDA/cuda-cdevloc.cuf +++ b/flang/test/Lower/CUDA/cuda-cdevloc.cuf @@ -12,10 +12,8 @@ end ! CHECK: %[[A1:.*]] = hlfir.designate %[[A]]#0 (%c1{{.*}}) : (!fir.ref>, index) -> !fir.ref ! CHECK: %[[BOX:.*]] = fir.embox %[[A1]] : (!fir.ref) -> !fir.box ! CHECK: %[[CDEVPTR:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> -! CHECK: %[[FIELD_CPTR:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> -! CHECK: %[[COORD_CPTR:.*]] = fir.coordinate_of %[[CDEVPTR]], %[[FIELD_CPTR]] : (!fir.ref}>>, !fir.field) -> !fir.ref> -! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[COORD_ADDRESS:.*]] = fir.coordinate_of %[[COORD_CPTR]], %[[FIELD_ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_CPTR:.*]] = fir.coordinate_of %[[CDEVPTR]], cptr : (!fir.ref}>>) -> !fir.ref> +! CHECK: %[[COORD_ADDRESS:.*]] = fir.coordinate_of %[[COORD_CPTR]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box) -> !fir.ref ! CHECK: %[[ADDRESS_A1:.*]] = fir.convert %[[BOX_ADDR]] : (!fir.ref) -> i64 ! CHECK: fir.store %[[ADDRESS_A1]] to %[[COORD_ADDRESS]] : !fir.ref diff --git a/flang/test/Lower/CUDA/cuda-devptr.cuf b/flang/test/Lower/CUDA/cuda-devptr.cuf index 0a9087cf6c133..2d6af2a9693a4 100644 --- a/flang/test/Lower/CUDA/cuda-devptr.cuf +++ b/flang/test/Lower/CUDA/cuda-devptr.cuf @@ -48,10 +48,8 @@ end ! CHECK-LABEL: func.func @_QPsub2() ! CHECK: %[[X:.*]] = fir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub2Ex"} : (!fir.ref>>>) -> !fir.ref>>> -! CHECK: %[[CPTR:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{{[<]?}}{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}{{[>]?}}> -! CHECK: %[[CPTR_COORD:.*]] = fir.coordinate_of %{{.*}}, %[[CPTR]] : (!fir.ref}{{[>]?}}>>, !fir.field) -> !fir.ref> -! CHECK: %[[ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[ADDRESS_COORD:.*]] = fir.coordinate_of %[[CPTR_COORD]], %[[ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[CPTR_COORD:.*]] = fir.coordinate_of %{{.*}}, cptr : (!fir.ref}{{[>]?}}>>) -> !fir.ref> +! CHECK: %[[ADDRESS_COORD:.*]] = fir.coordinate_of %[[CPTR_COORD]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[ADDRESS_LOADED:.*]] = fir.load %[[ADDRESS_COORD]] : !fir.ref ! CHECK: %[[ADDRESS_IDX:.*]] = fir.convert %[[ADDRESS_LOADED]] : (i64) -> !fir.ptr> ! CHECK: %[[EMBOX:.*]] = fir.embox %[[ADDRESS_IDX]](%{{.*}}) : (!fir.ptr>, !fir.shape<1>) -> !fir.box>> @@ -68,14 +66,10 @@ end subroutine ! CHECK-LABEL: func.func @_QPassign_c_devptr ! CHECK: %[[P:.*]] = fir.declare %arg0 dummy_scope %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFassign_c_devptrEp"} ! CHECK: %[[C_DEVLOC_RES:.*]] = fir.declare %15 {uniq_name = ".tmp.intrinsic_result"} : (!fir.ref}>>) -> !fir.ref}>> -! CHECK: %[[CPTR_FIELD:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> -! CHECK: %[[RES_CPTR_COORD:.*]] = fir.coordinate_of %[[C_DEVLOC_RES]], %[[CPTR_FIELD]] : (!fir.ref}>>, !fir.field) -> !fir.ref> -! CHECK: %[[CPTR_FIELD:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> -! CHECK: %[[P_CPTR_COORD:.*]] = fir.coordinate_of %[[P]], %[[CPTR_FIELD]] : (!fir.ref}>>, !fir.field) -> !fir.ref> -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[RES_ADDR_COORD:.*]] = fir.coordinate_of %[[RES_CPTR_COORD]], %[[ADDRESS_FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref -! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[P_ADDR_COORD:.*]] = fir.coordinate_of %[[P_CPTR_COORD]], %[[ADDRESS_FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[RES_CPTR_COORD:.*]] = fir.coordinate_of %[[C_DEVLOC_RES]], cptr : (!fir.ref}>>) -> !fir.ref> +! CHECK: %[[P_CPTR_COORD:.*]] = fir.coordinate_of %[[P]], cptr : (!fir.ref}>>) -> !fir.ref> +! CHECK: %[[RES_ADDR_COORD:.*]] = fir.coordinate_of %[[RES_CPTR_COORD]], __address : (!fir.ref>) -> !fir.ref +! CHECK: %[[P_ADDR_COORD:.*]] = fir.coordinate_of %[[P_CPTR_COORD]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[ADDR:.*]] = fir.load %[[RES_ADDR_COORD]] : !fir.ref ! CHECK: fir.store %[[ADDR]] to %[[P_ADDR_COORD]] : !fir.ref diff --git a/flang/test/Lower/HLFIR/assumed-rank-inquiries.f90 b/flang/test/Lower/HLFIR/assumed-rank-inquiries.f90 index 6a44cbd86e80d..d55ebaaad99eb 100644 --- a/flang/test/Lower/HLFIR/assumed-rank-inquiries.f90 +++ b/flang/test/Lower/HLFIR/assumed-rank-inquiries.f90 @@ -346,8 +346,7 @@ subroutine c_loc_2(x) ! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFc_loc_1Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.box_addr %[[VAL_2]]#0 : (!fir.box>) -> !fir.ref> ! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>) -> i64 ! CHECK: fir.store %[[VAL_7]] to %[[VAL_5]] : !fir.ref @@ -367,8 +366,7 @@ subroutine c_loc_2(x) ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFc_loc_2Ex"} : (!fir.ref>>>, !fir.dscope) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref>>> ! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_6:.*]] = fir.coordinate_of %[[VAL_4]], %[[VAL_5]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_6:.*]] = fir.coordinate_of %[[VAL_4]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_7:.*]] = fir.box_addr %[[VAL_3]] : (!fir.box>>) -> !fir.ptr> ! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (!fir.ptr>) -> i64 ! CHECK: fir.store %[[VAL_8]] to %[[VAL_6]] : !fir.ref diff --git a/flang/test/Lower/HLFIR/c_ptr-constant-init.f90 b/flang/test/Lower/HLFIR/c_ptr-constant-init.f90 index fcf2d1e31475d..1797d473fda0d 100644 --- a/flang/test/Lower/HLFIR/c_ptr-constant-init.f90 +++ b/flang/test/Lower/HLFIR/c_ptr-constant-init.f90 @@ -15,7 +15,6 @@ end subroutine test ! CHECK: %[[VAL_1:.*]] = fir.field_index d, !fir.type<_QFtestTt1{d:!fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>}> ! CHECK: %[[VAL_2:.*]] = fir.undefined !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> ! CHECK: %[[VAL_3:.*]] = fir.undefined !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK: %[[VAL_5:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_6:.*]] = fir.insert_value %[[VAL_3]], %[[VAL_5]], ["__address", !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>] : (!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>, i64) -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_2]], %[[VAL_6]], [0 : index] : (!fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> @@ -39,7 +38,6 @@ end subroutine test2 ! CHECK: %[[VAL_1:.*]] = fir.field_index d, !fir.type<_QFtest2Tt1{d:!fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>>}> ! CHECK: %[[VAL_2:.*]] = fir.undefined !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>> ! CHECK: %[[VAL_3:.*]] = fir.undefined !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> ! CHECK: %[[VAL_5:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_6:.*]] = fir.insert_value %[[VAL_3]], %[[VAL_5]], ["__address", !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>] : (!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>, i64) -> !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> ! CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_2]], %[[VAL_6]], [0 : index] : (!fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>>, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>) -> !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>> diff --git a/flang/test/Lower/HLFIR/intrinsic-module-procedures.f90 b/flang/test/Lower/HLFIR/intrinsic-module-procedures.f90 index 6406d00bebb59..8a5a52be68019 100644 --- a/flang/test/Lower/HLFIR/intrinsic-module-procedures.f90 +++ b/flang/test/Lower/HLFIR/intrinsic-module-procedures.f90 @@ -16,8 +16,7 @@ subroutine foo(cptr, x) ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare {{.*}}Ex" ! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#1 : (!fir.ref) -> !fir.box ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_5]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_5]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_8:.*]] = fir.box_addr %[[VAL_4]] : (!fir.box) -> !fir.ref ! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (!fir.ref) -> i64 ! CHECK: fir.store %[[VAL_9]] to %[[VAL_7]] : !fir.ref diff --git a/flang/test/Lower/Intrinsics/c_associated.f90 b/flang/test/Lower/Intrinsics/c_associated.f90 index ba2d7f130f760..3956957853372 100644 --- a/flang/test/Lower/Intrinsics/c_associated.f90 +++ b/flang/test/Lower/Intrinsics/c_associated.f90 @@ -8,15 +8,13 @@ ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> {fir.bindc_name = "cptr2"}) { ! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z1", uniq_name = "_QFtest_c_ptrEz1"} ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z2", uniq_name = "_QFtest_c_ptrEz2"} -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_7:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_8:.*]] = arith.cmpi ne, %[[VAL_6]], %[[VAL_7]] : i64 ! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[VAL_9]] to %[[VAL_2]] : !fir.ref> -! CHECK: %[[VAL_10:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_10]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_11]] : !fir.ref ! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_12]], %[[VAL_13]] : i64 @@ -26,8 +24,7 @@ ! CHECK: %[[VAL_18:.*]] = fir.if %[[VAL_17]] -> (i1) { ! CHECK: fir.result %[[VAL_14]] : i1 ! CHECK: } else { -! CHECK: %[[VAL_19:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_20:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_19]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_20:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref ! CHECK: %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_12]], %[[VAL_21]] : i64 ! CHECK: %[[VAL_23:.*]] = arith.andi %[[VAL_14]], %[[VAL_22]] : i1 @@ -53,15 +50,13 @@ subroutine test_c_ptr(cptr1, cptr2) ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> {fir.bindc_name = "cptr2"}) { ! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z1", uniq_name = "_QFtest_c_funptrEz1"} ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z2", uniq_name = "_QFtest_c_funptrEz2"} -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_7:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_8:.*]] = arith.cmpi ne, %[[VAL_6]], %[[VAL_7]] : i64 ! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[VAL_9]] to %[[VAL_2]] : !fir.ref> -! CHECK: %[[VAL_10:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_10]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_11]] : !fir.ref ! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_12]], %[[VAL_13]] : i64 @@ -71,8 +66,7 @@ subroutine test_c_ptr(cptr1, cptr2) ! CHECK: %[[VAL_18:.*]] = fir.if %[[VAL_17]] -> (i1) { ! CHECK: fir.result %[[VAL_14]] : i1 ! CHECK: } else { -! CHECK: %[[VAL_19:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_20:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_19]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_20:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref ! CHECK: %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_12]], %[[VAL_21]] : i64 ! CHECK: %[[VAL_23:.*]] = arith.andi %[[VAL_14]], %[[VAL_22]] : i1 @@ -100,8 +94,7 @@ subroutine test_c_funptr(cptr1, cptr2) ! CHECK-SAME: %[[VAL_3:.*]]: !fir.ref> {fir.bindc_name = "cfunptr2", fir.optional}) { ! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z1", uniq_name = "_QFtest_optional_argumentEz1"} ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z2", uniq_name = "_QFtest_optional_argumentEz2"} -! CHECK: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_7]] : !fir.ref ! CHECK: %[[VAL_9:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_10:.*]] = arith.cmpi ne, %[[VAL_8]], %[[VAL_9]] : i64 @@ -111,8 +104,7 @@ subroutine test_c_funptr(cptr1, cptr2) ! CHECK: %[[VAL_14:.*]] = fir.if %[[VAL_13]] -> (i1) { ! CHECK: fir.result %[[VAL_10]] : i1 ! CHECK: } else { -! CHECK: %[[VAL_15:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_15]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref ! CHECK: %[[VAL_18:.*]] = arith.cmpi eq, %[[VAL_8]], %[[VAL_17]] : i64 ! CHECK: %[[VAL_19:.*]] = arith.andi %[[VAL_10]], %[[VAL_18]] : i1 @@ -120,8 +112,7 @@ subroutine test_c_funptr(cptr1, cptr2) ! CHECK: } ! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_21:.*]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[VAL_20]] to %[[VAL_4]] : !fir.ref> -! CHECK: %[[VAL_22:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_23:.*]] = fir.coordinate_of %[[VAL_2]], %[[VAL_22]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_23:.*]] = fir.coordinate_of %[[VAL_2]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref ! CHECK: %[[VAL_25:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_26:.*]] = arith.cmpi ne, %[[VAL_24]], %[[VAL_25]] : i64 @@ -131,8 +122,7 @@ subroutine test_c_funptr(cptr1, cptr2) ! CHECK: %[[VAL_30:.*]] = fir.if %[[VAL_29]] -> (i1) { ! CHECK: fir.result %[[VAL_26]] : i1 ! CHECK: } else { -! CHECK: %[[VAL_31:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_32:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_31]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_32:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_32]] : !fir.ref ! CHECK: %[[VAL_34:.*]] = arith.cmpi eq, %[[VAL_24]], %[[VAL_33]] : i64 ! CHECK: %[[VAL_35:.*]] = arith.andi %[[VAL_26]], %[[VAL_34]] : i1 diff --git a/flang/test/Lower/Intrinsics/c_f_pointer.f90 b/flang/test/Lower/Intrinsics/c_f_pointer.f90 index 8e8680777275d..67817e39d5c2b 100644 --- a/flang/test/Lower/Intrinsics/c_f_pointer.f90 +++ b/flang/test/Lower/Intrinsics/c_f_pointer.f90 @@ -6,8 +6,7 @@ ! CHECK-LABEL: func.func @_QPtest_scalar( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "cptr"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>> {fir.bindc_name = "fptr"}) { -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i64) -> !fir.ptr ! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_5]] : (!fir.ptr) -> !fir.box> @@ -26,8 +25,7 @@ subroutine test_scalar(cptr, fptr) ! CHECK-LABEL: func.func @_QPtest_array( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "cptr"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>>> {fir.bindc_name = "fptr"}) { -! CHECK: %[[VAL_65:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_66:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_65]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_66:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_67:.*]] = fir.load %[[VAL_66]] : !fir.ref ! CHECK: %[[VAL_68:.*]] = fir.convert %[[VAL_67]] : (i64) -> !fir.ptr> ! CHECK: %[[VAL_69:.*]] = arith.constant 0 : index @@ -56,8 +54,7 @@ subroutine test_array(cptr, fptr) ! CHECK-LABEL: func.func @_QPtest_char( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "cptr"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>>> {fir.bindc_name = "fptr"}) { -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i64) -> !fir.ptr> ! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_5]] : (!fir.ptr>) -> !fir.box>> @@ -81,8 +78,7 @@ subroutine test_char(cptr, fptr) ! CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32 ! CHECK: %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_7]], %[[VAL_8]] : i32 ! CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_7]], %[[VAL_8]] : i32 -! CHECK: %[[VAL_70:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_71:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_70]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_71:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_72:.*]] = fir.load %[[VAL_71]] : !fir.ref ! CHECK: %[[VAL_73:.*]] = fir.convert %[[VAL_72]] : (i64) -> !fir.ptr>> ! CHECK: %[[VAL_74:.*]] = arith.constant 0 : index diff --git a/flang/test/Lower/Intrinsics/c_f_procpointer.f90 b/flang/test/Lower/Intrinsics/c_f_procpointer.f90 index f8792e4c1be0f..69f3f398cb12e 100644 --- a/flang/test/Lower/Intrinsics/c_f_procpointer.f90 +++ b/flang/test/Lower/Intrinsics/c_f_procpointer.f90 @@ -12,8 +12,7 @@ subroutine test_c_funloc(fptr, cptr) ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> {fir.bindc_name = "cptr"}) { ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFtest_c_funlocEcptr"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_c_funlocEfptr"} : (!fir.ref ()>>, !fir.dscope) -> (!fir.ref ()>>, !fir.ref ()>>) -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_2]]#1, %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_2]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (i64) -> (() -> ()) ! CHECK: %[[VAL_8:.*]] = fir.emboxproc %[[VAL_7]] : (() -> ()) -> !fir.boxproc<() -> ()> @@ -34,8 +33,7 @@ character(10) function char_func() ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> {fir.bindc_name = "cptr"}) { ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFtest_c_funloc_charEcptr"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_c_funloc_charEfptr"} : (!fir.ref ()>>, !fir.dscope) -> (!fir.ref ()>>, !fir.ref ()>>) -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_2]]#1, %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_2]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (i64) -> (() -> ()) ! CHECK: %[[VAL_8:.*]] = fir.emboxproc %[[VAL_7]] : (() -> ()) -> !fir.boxproc<() -> ()> diff --git a/flang/test/Lower/Intrinsics/c_funloc-proc-pointers.f90 b/flang/test/Lower/Intrinsics/c_funloc-proc-pointers.f90 index 0f398a346d459..fbd196832ba65 100644 --- a/flang/test/Lower/Intrinsics/c_funloc-proc-pointers.f90 +++ b/flang/test/Lower/Intrinsics/c_funloc-proc-pointers.f90 @@ -11,8 +11,7 @@ subroutine test_c_funloc(p) ! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_c_funlocEp"} : (!fir.ref ()>>, !fir.dscope) -> (!fir.ref ()>>, !fir.ref ()>>) ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref ()>> ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.box_addr %[[VAL_2]] : (!fir.boxproc<() -> ()>) -> (() -> ()) ! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (() -> ()) -> i64 ! CHECK: fir.store %[[VAL_7]] to %[[VAL_5]] : !fir.ref @@ -31,8 +30,7 @@ character(10) function char_func() ! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_c_funloc_charEp"} : (!fir.ref ()>>, !fir.dscope) -> (!fir.ref ()>>, !fir.ref ()>>) ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref ()>> ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_4]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.box_addr %[[VAL_2]] : (!fir.boxproc<() -> ()>) -> (() -> ()) ! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (() -> ()) -> i64 ! CHECK: fir.store %[[VAL_7]] to %[[VAL_5]] : !fir.ref diff --git a/flang/test/Lower/Intrinsics/c_funloc.f90 b/flang/test/Lower/Intrinsics/c_funloc.f90 index 29a0e10e2b94f..93be2215ffef4 100644 --- a/flang/test/Lower/Intrinsics/c_funloc.f90 +++ b/flang/test/Lower/Intrinsics/c_funloc.f90 @@ -9,8 +9,7 @@ ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> ! CHECK-DAG: %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.boxproc<(!fir.ref) -> ()>) -> ((!fir.ref) -> ()) ! CHECK-DAG: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : ((!fir.ref) -> ()) -> i64 -! CHECK-DAG: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK-DAG: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_5]] to %[[VAL_7]] : !fir.ref subroutine test() diff --git a/flang/test/Lower/Intrinsics/c_loc.f90 b/flang/test/Lower/Intrinsics/c_loc.f90 index f46b80fd9b980..ecd5ce590fd5d 100644 --- a/flang/test/Lower/Intrinsics/c_loc.f90 +++ b/flang/test/Lower/Intrinsics/c_loc.f90 @@ -10,8 +10,7 @@ ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box) -> !fir.ref ! CHECK-DAG: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.ref) -> i64 -! CHECK-DAG: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_5]] to %[[VAL_7]] : !fir.ref ! CHECK: } @@ -29,8 +28,7 @@ subroutine c_loc_scalar() ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box>) -> !fir.ref> ! CHECK-DAG: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.ref>) -> i64 -! CHECK-DAG: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_3]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_5]] to %[[VAL_7]] : !fir.ref ! CHECK: } @@ -62,8 +60,7 @@ subroutine c_loc_char() ! CHECK: %[[VAL_17:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_18:.*]] = fir.box_addr %[[VAL_16]] : (!fir.box>) -> !fir.ref> ! CHECK-DAG: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (!fir.ref>) -> i64 -! CHECK-DAG: %[[VAL_20:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_21:.*]] = fir.coordinate_of %[[VAL_17]], %[[VAL_20]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_21:.*]] = fir.coordinate_of %[[VAL_17]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_19]] to %[[VAL_21]] : !fir.ref ! CHECK: } @@ -83,8 +80,7 @@ subroutine c_loc_substring() ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_6:.*]] = fir.box_addr %[[VAL_4]] : (!fir.box>) -> !fir.ref> ! CHECK-DAG: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>) -> i64 -! CHECK-DAG: %[[VAL_8:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], %[[VAL_8]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_7]] to %[[VAL_9]] : !fir.ref ! CHECK: } @@ -104,8 +100,7 @@ subroutine c_loc_array ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_6:.*]] = fir.box_addr %[[VAL_4]] : (!fir.box>>) -> !fir.ref>> ! CHECK-DAG: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>>) -> i64 -! CHECK-DAG: %[[VAL_8:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], %[[VAL_8]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_7]] to %[[VAL_9]] : !fir.ref ! CHECK: } @@ -127,8 +122,7 @@ subroutine c_loc_chararray() ! CHECK: %[[VAL_7:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_8:.*]] = fir.box_addr %[[VAL_6]] : (!fir.box) -> !fir.ref ! CHECK-DAG: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (!fir.ref) -> i64 -! CHECK-DAG: %[[VAL_10:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_7]], %[[VAL_10]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_7]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_9]] to %[[VAL_11]] : !fir.ref ! CHECK: } @@ -158,8 +152,7 @@ subroutine c_loc_arrayelement() ! CHECK: %[[VAL_15:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_16:.*]] = fir.box_addr %[[VAL_14]] : (!fir.box>) -> !fir.ref> ! CHECK-DAG: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (!fir.ref>) -> i64 -! CHECK-DAG: %[[VAL_18:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_19:.*]] = fir.coordinate_of %[[VAL_15]], %[[VAL_18]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_19:.*]] = fir.coordinate_of %[[VAL_15]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_17]] to %[[VAL_19]] : !fir.ref ! CHECK: } @@ -196,15 +189,12 @@ subroutine c_loc_arraysection() ! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_1:.*]] : !fir.ref> ! CHECK: %[[VAL_15:.*]] = fir.embox %[[VAL_14:.*]] : (!fir.ptr) -> !fir.box ! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_17:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_16:.*]], %[[VAL_17:.*]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_16:.*]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_19:.*]] = fir.box_addr %[[VAL_15:.*]] : (!fir.box) -> !fir.ref ! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19:.*]] : (!fir.ref) -> i64 ! CHECK: fir.store %[[VAL_20:.*]] to %[[VAL_18:.*]] : !fir.ref -! CHECK: %[[VAL_21:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_22:.*]] = fir.coordinate_of %[[VAL_16:.*]], %[[VAL_21:.*]] : (!fir.ref>, !fir.field) -> !fir.ref -! CHECK: %[[VAL_23:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_3:.*]], %[[VAL_23:.*]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_22:.*]] = fir.coordinate_of %[[VAL_16:.*]], __address : (!fir.ref>) -> !fir.ref +! CHECK: %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_3:.*]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_22:.*]] : !fir.ref ! CHECK: fir.store %[[VAL_25:.*]] to %[[VAL_24:.*]] : !fir.ref ! CHECK: return @@ -227,8 +217,7 @@ subroutine c_loc_non_save_pointer_scalar() ! CHECK: %[[VAL_10:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_11:.*]] = fir.box_addr %[[VAL_9]] : (!fir.box) -> !fir.ref ! CHECK-DAG: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (!fir.ref) -> i64 -! CHECK-DAG: %[[VAL_13:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_14:.*]] = fir.coordinate_of %[[VAL_10]], %[[VAL_13]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_14:.*]] = fir.coordinate_of %[[VAL_10]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_12]] to %[[VAL_14]] : !fir.ref ! CHECK: } @@ -247,8 +236,7 @@ subroutine c_loc_save_pointer_scalar() ! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_10:.*]] = fir.box_addr %[[VAL_8:.*]] : (!fir.box>) -> !fir.ref> ! CHECK-DAG: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (!fir.ref>) -> i64 -! CHECK-DAG: %[[VAL_12:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_13:.*]] = fir.coordinate_of %[[VAL_9]], %[[VAL_12]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_13:.*]] = fir.coordinate_of %[[VAL_9]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_11]] to %[[VAL_13]] : !fir.ref ! CHECK: } @@ -268,8 +256,7 @@ subroutine c_loc_derived_type ! CHECK: %[[VAL_31:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK-DAG: %[[VAL_32:.*]] = fir.box_addr %[[VAL_30:.*]] : (!fir.box>>) -> !fir.ptr> ! CHECK-DAG: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (!fir.ptr>) -> i64 -! CHECK-DAG: %[[VAL_34:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK-DAG: %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_31]], %[[VAL_34]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK-DAG: %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_31]], __address : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[VAL_33]] to %[[VAL_35]] : !fir.ref ! CHECK: } diff --git a/flang/test/Lower/Intrinsics/c_ptr_eq_ne.f90 b/flang/test/Lower/Intrinsics/c_ptr_eq_ne.f90 index c6a2f186e4c12..b304ee924ec57 100644 --- a/flang/test/Lower/Intrinsics/c_ptr_eq_ne.f90 +++ b/flang/test/Lower/Intrinsics/c_ptr_eq_ne.f90 @@ -14,11 +14,9 @@ function test_c_ptr_eq(ptr1, ptr2) ! CHECK: %[[DECL_ARG1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_c_ptr_eqEptr2"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.logical<4> {bindc_name = "test_c_ptr_eq", uniq_name = "_QFtest_c_ptr_eqEtest_c_ptr_eq"} ! CHECK: %[[DECL_RET:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFtest_c_ptr_eqEtest_c_ptr_eq"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[COORD_ADDRESS0:.*]] = fir.coordinate_of %[[DECL_ARG0]]#1, %[[FIELD_ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_ADDRESS0:.*]] = fir.coordinate_of %[[DECL_ARG0]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[ADDRESS0:.*]] = fir.load %[[COORD_ADDRESS0]] : !fir.ref -! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[COORD_ADDRESS1:.*]] = fir.coordinate_of %[[DECL_ARG1]]#1, %[[FIELD_ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_ADDRESS1:.*]] = fir.coordinate_of %[[DECL_ARG1]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[ADDRESS1:.*]] = fir.load %[[COORD_ADDRESS1]] : !fir.ref ! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[ADDRESS0]], %[[ADDRESS1]] : i64 ! CHECK: %[[RES:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4> @@ -41,11 +39,9 @@ function test_c_ptr_ne(ptr1, ptr2) ! CHECK: %[[DECL_ARG1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_c_ptr_neEptr2"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.logical<4> {bindc_name = "test_c_ptr_ne", uniq_name = "_QFtest_c_ptr_neEtest_c_ptr_ne"} ! CHECK: %[[DECL_RET:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFtest_c_ptr_neEtest_c_ptr_ne"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[COORD_ADDRESS0:.*]] = fir.coordinate_of %[[DECL_ARG0]]#1, %[[FIELD_ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_ADDRESS0:.*]] = fir.coordinate_of %[[DECL_ARG0]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[ADDRESS0:.*]] = fir.load %[[COORD_ADDRESS0]] : !fir.ref -! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[COORD_ADDRESS1:.*]] = fir.coordinate_of %[[DECL_ARG1]]#1, %[[FIELD_ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_ADDRESS1:.*]] = fir.coordinate_of %[[DECL_ARG1]]#1, __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[ADDRESS1:.*]] = fir.load %[[COORD_ADDRESS1]] : !fir.ref ! CHECK: %[[CMP:.*]] = arith.cmpi ne, %[[ADDRESS0]], %[[ADDRESS1]] : i64 ! CHECK: %[[RES:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4> diff --git a/flang/test/Lower/Intrinsics/ieee_class.f90 b/flang/test/Lower/Intrinsics/ieee_class.f90 index 2c1cdf95275c9..acef959656539 100644 --- a/flang/test/Lower/Intrinsics/ieee_class.f90 +++ b/flang/test/Lower/Intrinsics/ieee_class.f90 @@ -65,23 +65,18 @@ subroutine classify(x) ! CHECK: %[[V_25:[0-9]+]] = arith.ori %[[V_22]], %[[V_24]] : i64 ! CHECK: %[[V_26:[0-9]+]] = fir.address_of(@_FortranAIeeeClassTable) : !fir.ref> ! CHECK: %[[V_27:[0-9]+]] = fir.coordinate_of %[[V_26]], %[[V_25]] : (!fir.ref>, i64) -> !fir.ref> - ! CHECK: %[[V_28:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_29:[0-9]+]] = fir.coordinate_of %[[V_27]], %[[V_28]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: %[[V_30:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_31:[0-9]+]] = fir.coordinate_of %[[V_2]], %[[V_30]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_29:[0-9]+]] = fir.coordinate_of %[[V_27]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref + ! CHECK: %[[V_31:[0-9]+]] = fir.coordinate_of %[[V_2]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_32:[0-9]+]] = fir.load %[[V_29]] : !fir.ref ! CHECK: fir.store %[[V_32]] to %[[V_31]] : !fir.ref r = ieee_class(x) ! if (r==ieee_signaling_nan) call out(x, 1) ! if (r==ieee_quiet_nan) call out(x, 2) - ! CHECK: %[[V_38:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_39:[0-9]+]] = fir.coordinate_of %[[V_1]], %[[V_38]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_39:[0-9]+]] = fir.coordinate_of %[[V_1]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c3{{.*}} to %[[V_39]] : !fir.ref - ! CHECK: %[[V_40:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_41:[0-9]+]] = fir.coordinate_of %[[V_2]], %[[V_40]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: %[[V_42:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_43:[0-9]+]] = fir.coordinate_of %[[V_1]], %[[V_42]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_41:[0-9]+]] = fir.coordinate_of %[[V_2]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref + ! CHECK: %[[V_43:[0-9]+]] = fir.coordinate_of %[[V_1]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_44:[0-9]+]] = fir.load %[[V_41]] : !fir.ref ! CHECK: %[[V_45:[0-9]+]] = fir.load %[[V_43]] : !fir.ref ! CHECK: %[[V_46:[0-9]+]] = arith.cmpi eq, %[[V_44]], %[[V_45]] : i8 @@ -111,11 +106,9 @@ program p ! x(2) = ieee_value(x(1), ieee_quiet_nan) ! CHECK: %[[V_0:[0-9]+]] = fir.alloca !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> ! CHECK: %[[V_2:[0-9]+]] = fir.address_of(@_QFEx) : !fir.ref> - ! CHECK: %[[V_8:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_9:[0-9]+]] = fir.coordinate_of %[[V_0]], %[[V_8]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_9:[0-9]+]] = fir.coordinate_of %[[V_0]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c3{{.*}} to %[[V_9]] : !fir.ref - ! CHECK: %[[V_10:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_11:[0-9]+]] = fir.coordinate_of %[[V_0]], %[[V_10]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_11:[0-9]+]] = fir.coordinate_of %[[V_0]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_12:[0-9]+]] = fir.load %[[V_11]] : !fir.ref ! CHECK: %[[V_13:[0-9]+]] = fir.address_of(@_FortranAIeeeValueTable_8) : !fir.ref> ! CHECK: %[[V_14:[0-9]+]] = fir.coordinate_of %[[V_13]], %[[V_12]] : (!fir.ref>, i8) -> !fir.ref diff --git a/flang/test/Lower/Intrinsics/ieee_flag.f90 b/flang/test/Lower/Intrinsics/ieee_flag.f90 index e4addc0d658dc..13ca7ba48a74c 100644 --- a/flang/test/Lower/Intrinsics/ieee_flag.f90 +++ b/flang/test/Lower/Intrinsics/ieee_flag.f90 @@ -33,8 +33,7 @@ ! CHECK: %[[V_80:[0-9]+]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0) : !fir.ref> ! CHECK: %[[V_95:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_82:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_flag_type.flag, !fir.type<_QM__fortran_builtinsT__builtin_ieee_flag_type{_QM__fortran_builtinsT__builtin_ieee_flag_type.flag:i8}> - ! CHECK: %[[V_96:[0-9]+]] = fir.coordinate_of %[[V_95]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_96:[0-9]+]] = fir.coordinate_of %[[V_95]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_97:[0-9]+]] = fir.load %[[V_96]] : !fir.ref ! CHECK: %[[V_98:[0-9]+]] = fir.convert %[[V_97]] : (i8) -> i32 ! CHECK: %[[V_99:[0-9]+]] = fir.call @_FortranAMapException(%[[V_98]]) fastmath : (i32) -> i32 @@ -46,7 +45,7 @@ call ieee_set_flag(ieee_invalid, .false.) ! CHECK: %[[V_100:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_101:[0-9]+]] = fir.coordinate_of %[[V_100]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_101:[0-9]+]] = fir.coordinate_of %[[V_100]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_102:[0-9]+]] = fir.load %[[V_101]] : !fir.ref ! CHECK: %[[V_103:[0-9]+]] = fir.convert %[[V_102]] : (i8) -> i32 ! CHECK: %[[V_104:[0-9]+]] = fir.call @_FortranAMapException(%[[V_103]]) fastmath : (i32) -> i32 @@ -60,7 +59,7 @@ print*, 'invalid[F]: ', v ! CHECK: %[[V_118:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_119:[0-9]+]] = fir.coordinate_of %[[V_118]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_119:[0-9]+]] = fir.coordinate_of %[[V_118]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_120:[0-9]+]] = fir.load %[[V_119]] : !fir.ref ! CHECK: %[[V_121:[0-9]+]] = fir.convert %[[V_120]] : (i8) -> i32 ! CHECK: %[[V_122:[0-9]+]] = fir.call @_FortranAMapException(%[[V_121]]) fastmath : (i32) -> i32 @@ -72,7 +71,7 @@ call ieee_set_flag(ieee_invalid, .true.) ! CHECK: %[[V_123:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_124:[0-9]+]] = fir.coordinate_of %[[V_123]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_124:[0-9]+]] = fir.coordinate_of %[[V_123]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_125:[0-9]+]] = fir.load %[[V_124]] : !fir.ref ! CHECK: %[[V_126:[0-9]+]] = fir.convert %[[V_125]] : (i8) -> i32 ! CHECK: %[[V_127:[0-9]+]] = fir.call @_FortranAMapException(%[[V_126]]) fastmath : (i32) -> i32 @@ -89,7 +88,7 @@ ! CHECK: %[[V_141:[0-9]+]] = fir.declare %[[V_140]](%[[V_59]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.2x_QM__fortran_builtinsT__builtin_ieee_flag_type.1"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c2{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_141]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.convert %[[V_312]] : (i8) -> i32 ! CHECK: %[[V_314:[0-9]+]] = fir.call @_FortranAMapException(%[[V_313]]) fastmath : (i32) -> i32 @@ -106,7 +105,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c2{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_143]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_60]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.call @_FortranAMapException(%[[V_314]]) fastmath : (i32) -> i32 @@ -127,7 +126,7 @@ ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_154]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_156]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref> - ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.load %[[V_313]] : !fir.ref ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_314]] : (i8) -> i32 ! CHECK: %[[V_316:[0-9]+]] = fir.call @_FortranAMapException(%[[V_315]]) fastmath : (i32) -> i32 @@ -144,7 +143,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c2{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_157]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_60]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.call @_FortranAMapException(%[[V_314]]) fastmath : (i32) -> i32 @@ -162,7 +161,7 @@ ! CHECK: %[[V_166:[0-9]+]] = fir.declare %[[V_165]](%[[V_54]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.3x_QM__fortran_builtinsT__builtin_ieee_flag_type.4"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c3{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_166]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.convert %[[V_312]] : (i8) -> i32 ! CHECK: %[[V_314:[0-9]+]] = fir.call @_FortranAMapException(%[[V_313]]) fastmath : (i32) -> i32 @@ -178,7 +177,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c3{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_167]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_64]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.call @_FortranAMapException(%[[V_314]]) fastmath : (i32) -> i32 @@ -199,7 +198,7 @@ ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_178]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_180]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref> - ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.load %[[V_313]] : !fir.ref ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_314]] : (i8) -> i32 ! CHECK: %[[V_316:[0-9]+]] = fir.call @_FortranAMapException(%[[V_315]]) fastmath : (i32) -> i32 @@ -216,7 +215,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c3{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_181]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_64]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.call @_FortranAMapException(%[[V_314]]) fastmath : (i32) -> i32 @@ -234,7 +233,7 @@ ! CHECK: %[[V_190:[0-9]+]] = fir.declare %[[V_189]](%[[V_1]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.5x_QM__fortran_builtinsT__builtin_ieee_flag_type.6"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c5{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_190]](%[[V_1]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.convert %[[V_312]] : (i8) -> i32 ! CHECK: %[[V_314:[0-9]+]] = fir.call @_FortranAMapException(%[[V_313]]) fastmath : (i32) -> i32 @@ -250,7 +249,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c5{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_191]](%[[V_1]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_62]](%[[V_1]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.call @_FortranAMapException(%[[V_314]]) fastmath : (i32) -> i32 @@ -275,7 +274,7 @@ print*, 'support invalid: ', ieee_support_halting(ieee_invalid) ! CHECK: %[[V_222:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_223:[0-9]+]] = fir.coordinate_of %[[V_222]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_223:[0-9]+]] = fir.coordinate_of %[[V_222]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_224:[0-9]+]] = fir.load %[[V_223]] : !fir.ref ! CHECK: %[[V_225:[0-9]+]] = fir.convert %[[V_224]] : (i8) -> i32 ! CHECK: %[[V_226:[0-9]+]] = fir.call @_FortranAMapException(%[[V_225]]) fastmath : (i32) -> i32 @@ -287,7 +286,7 @@ call ieee_set_halting_mode(ieee_invalid, .false.) ! CHECK: %[[V_227:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_228:[0-9]+]] = fir.coordinate_of %[[V_227]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_228:[0-9]+]] = fir.coordinate_of %[[V_227]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_229:[0-9]+]] = fir.load %[[V_228]] : !fir.ref ! CHECK: %[[V_230:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_231:[0-9]+]] = fir.convert %[[V_229]] : (i8) -> i32 @@ -302,7 +301,7 @@ print*, 'invalid[F]: ', v ! CHECK: %[[V_244:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_245:[0-9]+]] = fir.coordinate_of %[[V_244]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_245:[0-9]+]] = fir.coordinate_of %[[V_244]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_246:[0-9]+]] = fir.load %[[V_245]] : !fir.ref ! CHECK: %[[V_247:[0-9]+]] = fir.convert %[[V_246]] : (i8) -> i32 ! CHECK: %[[V_248:[0-9]+]] = fir.call @_FortranAMapException(%[[V_247]]) fastmath : (i32) -> i32 @@ -314,7 +313,7 @@ call ieee_set_halting_mode(ieee_invalid, .true.) ! CHECK: %[[V_249:[0-9]+]] = fir.declare %[[V_80]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_250:[0-9]+]] = fir.coordinate_of %[[V_249]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_250:[0-9]+]] = fir.coordinate_of %[[V_249]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_251:[0-9]+]] = fir.load %[[V_250]] : !fir.ref ! CHECK: %[[V_252:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_253:[0-9]+]] = fir.convert %[[V_251]] : (i8) -> i32 @@ -331,7 +330,7 @@ ! CHECK: %[[V_266:[0-9]+]] = fir.declare %[[V_140]](%[[V_59]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.2x_QM__fortran_builtinsT__builtin_ieee_flag_type.1"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c2{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_266]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.convert %[[V_312]] : (i8) -> i32 ! CHECK: %[[V_314:[0-9]+]] = fir.call @_FortranAMapException(%[[V_313]]) fastmath : (i32) -> i32 @@ -347,7 +346,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c2{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_267]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_60]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 @@ -368,7 +367,7 @@ ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_274]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_275]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref> - ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.load %[[V_313]] : !fir.ref ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_314]] : (i8) -> i32 ! CHECK: %[[V_316:[0-9]+]] = fir.call @_FortranAMapException(%[[V_315]]) fastmath : (i32) -> i32 @@ -385,7 +384,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c2{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_276]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_60]](%[[V_59]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 @@ -403,7 +402,7 @@ ! CHECK: %[[V_283:[0-9]+]] = fir.declare %[[V_165]](%[[V_54]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.3x_QM__fortran_builtinsT__builtin_ieee_flag_type.4"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c3{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_283]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.convert %[[V_312]] : (i8) -> i32 ! CHECK: %[[V_314:[0-9]+]] = fir.call @_FortranAMapException(%[[V_313]]) fastmath : (i32) -> i32 @@ -419,7 +418,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c3{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_284]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_64]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 @@ -440,7 +439,7 @@ ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_291]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_292]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref> - ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_313:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.load %[[V_313]] : !fir.ref ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_314]] : (i8) -> i32 ! CHECK: %[[V_316:[0-9]+]] = fir.call @_FortranAMapException(%[[V_315]]) fastmath : (i32) -> i32 @@ -457,7 +456,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c3{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_293]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_64]](%[[V_54]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 @@ -475,7 +474,7 @@ ! CHECK: %[[V_300:[0-9]+]] = fir.declare %[[V_189]](%[[V_1]]) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.5x_QM__fortran_builtinsT__builtin_ieee_flag_type.6"} : (!fir.ref>>, !fir.shape<1>) -> !fir.ref>> ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c5{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_300]](%[[V_1]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_311:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_312:[0-9]+]] = fir.load %[[V_311]] : !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.convert %[[V_312]] : (i8) -> i32 ! CHECK: %[[V_314:[0-9]+]] = fir.call @_FortranAMapException(%[[V_313]]) fastmath : (i32) -> i32 @@ -491,7 +490,7 @@ ! CHECK: fir.do_loop %arg0 = %c1{{.*}} to %c5{{.*}} step %c1{{.*}} { ! CHECK: %[[V_310:[0-9]+]] = fir.array_coor %[[V_301]](%[[V_1]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: %[[V_311:[0-9]+]] = fir.array_coor %[[V_62]](%[[V_1]]) %arg0 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> - ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], %[[V_82]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_312:[0-9]+]] = fir.coordinate_of %[[V_310]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_313:[0-9]+]] = fir.load %[[V_312]] : !fir.ref ! CHECK: %[[V_314:[0-9]+]] = fir.call @fegetexcept() fastmath : () -> i32 ! CHECK: %[[V_315:[0-9]+]] = fir.convert %[[V_313]] : (i8) -> i32 diff --git a/flang/test/Lower/Intrinsics/ieee_logb.f90 b/flang/test/Lower/Intrinsics/ieee_logb.f90 index bbc65e68e0b46..d9252e22a5f9f 100644 --- a/flang/test/Lower/Intrinsics/ieee_logb.f90 +++ b/flang/test/Lower/Intrinsics/ieee_logb.f90 @@ -15,8 +15,7 @@ subroutine out(x) ! CHECK: %[[V_65:[0-9]+]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0) : !fir.ref> ! CHECK: %[[V_66:[0-9]+]] = fir.declare %[[V_65]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_67:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_flag_type.flag, !fir.type<_QM__fortran_builtinsT__builtin_ieee_flag_type{_QM__fortran_builtinsT__builtin_ieee_flag_type.flag:i8}> - ! CHECK: %[[V_68:[0-9]+]] = fir.coordinate_of %[[V_66]], %[[V_67]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_68:[0-9]+]] = fir.coordinate_of %[[V_66]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_69:[0-9]+]] = fir.load %[[V_68]] : !fir.ref ! CHECK: %[[V_70:[0-9]+]] = fir.convert %[[V_69]] : (i8) -> i32 ! CHECK: %[[V_71:[0-9]+]] = fir.call @_FortranAMapException(%[[V_70]]) fastmath : (i32) -> i32 @@ -53,7 +52,7 @@ subroutine out(x) r = ieee_logb(x) ! CHECK: %[[V_76:[0-9]+]] = fir.declare %[[V_65]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.0"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_77:[0-9]+]] = fir.coordinate_of %[[V_76]], %[[V_67]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_77:[0-9]+]] = fir.coordinate_of %[[V_76]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_78:[0-9]+]] = fir.load %[[V_77]] : !fir.ref ! CHECK: %[[V_79:[0-9]+]] = fir.convert %[[V_78]] : (i8) -> i32 ! CHECK: %[[V_80:[0-9]+]] = fir.call @_FortranAMapException(%[[V_79]]) fastmath : (i32) -> i32 diff --git a/flang/test/Lower/Intrinsics/ieee_max_min.f90 b/flang/test/Lower/Intrinsics/ieee_max_min.f90 index 69ae05b8f2f8c..581f3d6c7f52c 100644 --- a/flang/test/Lower/Intrinsics/ieee_max_min.f90 +++ b/flang/test/Lower/Intrinsics/ieee_max_min.f90 @@ -69,8 +69,7 @@ program p ! CHECK: %[[V_201:[0-9]+]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10) : !fir.ref> ! CHECK: %[[V_202:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_203:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_flag_type.flag, !fir.type<_QM__fortran_builtinsT__builtin_ieee_flag_type{_QM__fortran_builtinsT__builtin_ieee_flag_type.flag:i8}> - ! CHECK: %[[V_204:[0-9]+]] = fir.coordinate_of %[[V_202]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_204:[0-9]+]] = fir.coordinate_of %[[V_202]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_205:[0-9]+]] = fir.load %[[V_204]] : !fir.ref ! CHECK: %[[V_206:[0-9]+]] = fir.convert %[[V_205]] : (i8) -> i32 ! CHECK: %[[V_207:[0-9]+]] = fir.call @_FortranAMapException(%[[V_206]]) fastmath : (i32) -> i32 @@ -113,7 +112,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_211]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_212:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_213:[0-9]+]] = fir.coordinate_of %[[V_212]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_213:[0-9]+]] = fir.coordinate_of %[[V_212]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_214:[0-9]+]] = fir.load %[[V_213]] : !fir.ref ! CHECK: %[[V_215:[0-9]+]] = fir.convert %[[V_214]] : (i8) -> i32 ! CHECK: %[[V_216:[0-9]+]] = fir.call @_FortranAMapException(%[[V_215]]) fastmath : (i32) -> i32 @@ -127,7 +126,7 @@ program p write(*, 4) 'max ', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_268:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_269:[0-9]+]] = fir.coordinate_of %[[V_268]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_269:[0-9]+]] = fir.coordinate_of %[[V_268]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_270:[0-9]+]] = fir.load %[[V_269]] : !fir.ref ! CHECK: %[[V_271:[0-9]+]] = fir.convert %[[V_270]] : (i8) -> i32 ! CHECK: %[[V_272:[0-9]+]] = fir.call @_FortranAMapException(%[[V_271]]) fastmath : (i32) -> i32 @@ -172,7 +171,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_278]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_279:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_280:[0-9]+]] = fir.coordinate_of %[[V_279]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_280:[0-9]+]] = fir.coordinate_of %[[V_279]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_281:[0-9]+]] = fir.load %[[V_280]] : !fir.ref ! CHECK: %[[V_282:[0-9]+]] = fir.convert %[[V_281]] : (i8) -> i32 ! CHECK: %[[V_283:[0-9]+]] = fir.call @_FortranAMapException(%[[V_282]]) fastmath : (i32) -> i32 @@ -186,7 +185,7 @@ program p write(*, 4) 'mag ', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_329:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_330:[0-9]+]] = fir.coordinate_of %[[V_329]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_330:[0-9]+]] = fir.coordinate_of %[[V_329]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_331:[0-9]+]] = fir.load %[[V_330]] : !fir.ref ! CHECK: %[[V_332:[0-9]+]] = fir.convert %[[V_331]] : (i8) -> i32 ! CHECK: %[[V_333:[0-9]+]] = fir.call @_FortranAMapException(%[[V_332]]) fastmath : (i32) -> i32 @@ -233,7 +232,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_337]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_338:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_339:[0-9]+]] = fir.coordinate_of %[[V_338]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_339:[0-9]+]] = fir.coordinate_of %[[V_338]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_340:[0-9]+]] = fir.load %[[V_339]] : !fir.ref ! CHECK: %[[V_341:[0-9]+]] = fir.convert %[[V_340]] : (i8) -> i32 ! CHECK: %[[V_342:[0-9]+]] = fir.call @_FortranAMapException(%[[V_341]]) fastmath : (i32) -> i32 @@ -247,7 +246,7 @@ program p write(*, 4) 'max_num', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_388:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_389:[0-9]+]] = fir.coordinate_of %[[V_388]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_389:[0-9]+]] = fir.coordinate_of %[[V_388]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_390:[0-9]+]] = fir.load %[[V_389]] : !fir.ref ! CHECK: %[[V_391:[0-9]+]] = fir.convert %[[V_390]] : (i8) -> i32 ! CHECK: %[[V_392:[0-9]+]] = fir.call @_FortranAMapException(%[[V_391]]) fastmath : (i32) -> i32 @@ -296,7 +295,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_398]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_399:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_400:[0-9]+]] = fir.coordinate_of %[[V_399]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_400:[0-9]+]] = fir.coordinate_of %[[V_399]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_401:[0-9]+]] = fir.load %[[V_400]] : !fir.ref ! CHECK: %[[V_402:[0-9]+]] = fir.convert %[[V_401]] : (i8) -> i32 ! CHECK: %[[V_403:[0-9]+]] = fir.call @_FortranAMapException(%[[V_402]]) fastmath : (i32) -> i32 @@ -310,7 +309,7 @@ program p write(*, 4) 'mag_num', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_449:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_450:[0-9]+]] = fir.coordinate_of %[[V_449]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_450:[0-9]+]] = fir.coordinate_of %[[V_449]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_451:[0-9]+]] = fir.load %[[V_450]] : !fir.ref ! CHECK: %[[V_452:[0-9]+]] = fir.convert %[[V_451]] : (i8) -> i32 ! CHECK: %[[V_453:[0-9]+]] = fir.call @_FortranAMapException(%[[V_452]]) fastmath : (i32) -> i32 @@ -353,7 +352,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_457]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_458:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_459:[0-9]+]] = fir.coordinate_of %[[V_458]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_459:[0-9]+]] = fir.coordinate_of %[[V_458]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_460:[0-9]+]] = fir.load %[[V_459]] : !fir.ref ! CHECK: %[[V_461:[0-9]+]] = fir.convert %[[V_460]] : (i8) -> i32 ! CHECK: %[[V_462:[0-9]+]] = fir.call @_FortranAMapException(%[[V_461]]) fastmath : (i32) -> i32 @@ -367,7 +366,7 @@ program p write(*, 4) 'min ', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_508:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_509:[0-9]+]] = fir.coordinate_of %[[V_508]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_509:[0-9]+]] = fir.coordinate_of %[[V_508]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_510:[0-9]+]] = fir.load %[[V_509]] : !fir.ref ! CHECK: %[[V_511:[0-9]+]] = fir.convert %[[V_510]] : (i8) -> i32 ! CHECK: %[[V_512:[0-9]+]] = fir.call @_FortranAMapException(%[[V_511]]) fastmath : (i32) -> i32 @@ -412,7 +411,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_518]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_519:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_520:[0-9]+]] = fir.coordinate_of %[[V_519]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_520:[0-9]+]] = fir.coordinate_of %[[V_519]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_521:[0-9]+]] = fir.load %[[V_520]] : !fir.ref ! CHECK: %[[V_522:[0-9]+]] = fir.convert %[[V_521]] : (i8) -> i32 ! CHECK: %[[V_523:[0-9]+]] = fir.call @_FortranAMapException(%[[V_522]]) fastmath : (i32) -> i32 @@ -426,7 +425,7 @@ program p write(*, 4) 'mig ', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_569:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_570:[0-9]+]] = fir.coordinate_of %[[V_569]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_570:[0-9]+]] = fir.coordinate_of %[[V_569]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_571:[0-9]+]] = fir.load %[[V_570]] : !fir.ref ! CHECK: %[[V_572:[0-9]+]] = fir.convert %[[V_571]] : (i8) -> i32 ! CHECK: %[[V_573:[0-9]+]] = fir.call @_FortranAMapException(%[[V_572]]) fastmath : (i32) -> i32 @@ -473,7 +472,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_577]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_578:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_579:[0-9]+]] = fir.coordinate_of %[[V_578]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_579:[0-9]+]] = fir.coordinate_of %[[V_578]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_580:[0-9]+]] = fir.load %[[V_579]] : !fir.ref ! CHECK: %[[V_581:[0-9]+]] = fir.convert %[[V_580]] : (i8) -> i32 ! CHECK: %[[V_582:[0-9]+]] = fir.call @_FortranAMapException(%[[V_581]]) fastmath : (i32) -> i32 @@ -487,7 +486,7 @@ program p write(*, 4) 'min_num', a, a, b, b, r, flag_value, trim(tag(r)) ! CHECK: %[[V_628:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_629:[0-9]+]] = fir.coordinate_of %[[V_628]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_629:[0-9]+]] = fir.coordinate_of %[[V_628]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_630:[0-9]+]] = fir.load %[[V_629]] : !fir.ref ! CHECK: %[[V_631:[0-9]+]] = fir.convert %[[V_630]] : (i8) -> i32 ! CHECK: %[[V_632:[0-9]+]] = fir.call @_FortranAMapException(%[[V_631]]) fastmath : (i32) -> i32 @@ -536,7 +535,7 @@ program p ! CHECK: } ! CHECK: fir.store %[[V_638]] to %[[V_83]] : !fir.ref ! CHECK: %[[V_639:[0-9]+]] = fir.declare %[[V_201]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QM__fortran_builtinsT__builtin_ieee_flag_type.10"} : (!fir.ref>) -> !fir.ref> - ! CHECK: %[[V_640:[0-9]+]] = fir.coordinate_of %[[V_639]], %[[V_203]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_640:[0-9]+]] = fir.coordinate_of %[[V_639]], _QM__fortran_builtinsT__builtin_ieee_flag_type.flag : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_641:[0-9]+]] = fir.load %[[V_640]] : !fir.ref ! CHECK: %[[V_642:[0-9]+]] = fir.convert %[[V_641]] : (i8) -> i32 ! CHECK: %[[V_643:[0-9]+]] = fir.call @_FortranAMapException(%[[V_642]]) fastmath : (i32) -> i32 diff --git a/flang/test/Lower/Intrinsics/ieee_operator_eq.f90 b/flang/test/Lower/Intrinsics/ieee_operator_eq.f90 index d2067602babb3..8f77460a010fd 100644 --- a/flang/test/Lower/Intrinsics/ieee_operator_eq.f90 +++ b/flang/test/Lower/Intrinsics/ieee_operator_eq.f90 @@ -4,10 +4,8 @@ subroutine s(r1,r2) use ieee_arithmetic, only: ieee_round_type, operator(==) type(ieee_round_type) :: r1, r2 - ! CHECK: %[[V_3:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_4:[0-9]+]] = fir.coordinate_of %arg0, %[[V_3]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: %[[V_5:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_6:[0-9]+]] = fir.coordinate_of %arg1, %[[V_5]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_4:[0-9]+]] = fir.coordinate_of %arg0, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref + ! CHECK: %[[V_6:[0-9]+]] = fir.coordinate_of %arg1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_7:[0-9]+]] = fir.load %[[V_4]] : !fir.ref ! CHECK: %[[V_8:[0-9]+]] = fir.load %[[V_6]] : !fir.ref ! CHECK: %[[V_9:[0-9]+]] = arith.cmpi eq, %[[V_7]], %[[V_8]] : i8 @@ -30,20 +28,16 @@ subroutine s(r1,r2) ! CHECK: %[[V_1:[0-9]+]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> ! CHECK: %[[V_2:[0-9]+]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> ! CHECK: %[[V_3:[0-9]+]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_9:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_10:[0-9]+]] = fir.coordinate_of %[[V_3]], %[[V_9]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_10:[0-9]+]] = fir.coordinate_of %[[V_3]], _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c0{{.*}} to %[[V_10]] : !fir.ref - ! CHECK: %[[V_16:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_17:[0-9]+]] = fir.coordinate_of %[[V_2]], %[[V_16]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_17:[0-9]+]] = fir.coordinate_of %[[V_2]], _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c1{{.*}} to %[[V_17]] : !fir.ref ! CHECK: fir.call @_QPs(%[[V_3]], %[[V_2]]) {{.*}} : (!fir.ref>, !fir.ref>) -> () call s(ieee_to_zero, ieee_nearest) - ! CHECK: %[[V_23:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_24:[0-9]+]] = fir.coordinate_of %[[V_1]], %[[V_23]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_24:[0-9]+]] = fir.coordinate_of %[[V_1]], _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c1{{.*}} to %[[V_24]] : !fir.ref - ! CHECK: %[[V_30:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_31:[0-9]+]] = fir.coordinate_of %[[V_0]], %[[V_30]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_31:[0-9]+]] = fir.coordinate_of %[[V_0]], _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c1{{.*}} to %[[V_31]] : !fir.ref ! CHECK: fir.call @_QPs(%[[V_1]], %[[V_0]]) {{.*}} : (!fir.ref>, !fir.ref>) -> () call s(ieee_nearest, ieee_nearest) diff --git a/flang/test/Lower/Intrinsics/ieee_rint_int.f90 b/flang/test/Lower/Intrinsics/ieee_rint_int.f90 index 86a4aff5005bc..be3a3b92a4584 100644 --- a/flang/test/Lower/Intrinsics/ieee_rint_int.f90 +++ b/flang/test/Lower/Intrinsics/ieee_rint_int.f90 @@ -43,8 +43,7 @@ program p ! CHECK: %[[V_35:[0-9]+]]:2 = hlfir.declare %[[V_34]] ! CHECK: %[[V_36:[0-9]+]] = fir.load %[[V_19]]#0 : !fir.ref ! CHECK: %[[V_37:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 - ! CHECK: %[[V_38:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_39:[0-9]+]] = fir.coordinate_of %[[V_35]]#1, %[[V_38]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_39:[0-9]+]] = fir.coordinate_of %[[V_35]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_40:[0-9]+]] = fir.load %[[V_39]] : !fir.ref ! CHECK: %[[V_41:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_42:[0-9]+]] = arith.andi %[[V_40]], %[[V_41]] : i8 @@ -62,8 +61,7 @@ program p ! CHECK: %[[V_49:[0-9]+]]:2 = hlfir.declare %[[V_48]] ! CHECK: %[[V_50:[0-9]+]] = fir.load %[[V_19]]#0 : !fir.ref ! CHECK: %[[V_51:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 - ! CHECK: %[[V_52:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_53:[0-9]+]] = fir.coordinate_of %[[V_49]]#1, %[[V_52]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_53:[0-9]+]] = fir.coordinate_of %[[V_49]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_54:[0-9]+]] = fir.load %[[V_53]] : !fir.ref ! CHECK: %[[V_55:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_56:[0-9]+]] = arith.andi %[[V_54]], %[[V_55]] : i8 @@ -104,8 +102,7 @@ program p ! CHECK: %[[V_69:[0-9]+]]:2 = hlfir.declare %[[V_68]] ! CHECK: %[[V_70:[0-9]+]] = fir.load %[[V_21]]#0 : !fir.ref ! CHECK: %[[V_71:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 - ! CHECK: %[[V_72:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_73:[0-9]+]] = fir.coordinate_of %[[V_69]]#1, %[[V_72]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_73:[0-9]+]] = fir.coordinate_of %[[V_69]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_74:[0-9]+]] = fir.load %[[V_73]] : !fir.ref ! CHECK: %[[V_75:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_76:[0-9]+]] = arith.andi %[[V_74]], %[[V_75]] : i8 @@ -124,8 +121,7 @@ program p ! CHECK: %[[V_84:[0-9]+]]:2 = hlfir.declare %[[V_83]] ! CHECK: %[[V_85:[0-9]+]] = fir.load %[[V_21]]#0 : !fir.ref ! CHECK: %[[V_86:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 - ! CHECK: %[[V_87:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_88:[0-9]+]] = fir.coordinate_of %[[V_84]]#1, %[[V_87]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_88:[0-9]+]] = fir.coordinate_of %[[V_84]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_89:[0-9]+]] = fir.load %[[V_88]] : !fir.ref ! CHECK: %[[V_90:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_91:[0-9]+]] = arith.andi %[[V_89]], %[[V_90]] : i8 @@ -180,8 +176,7 @@ program p ! CHECK: %[[V_110:[0-9]+]]:2 = hlfir.declare %[[V_109]] ! CHECK: %[[V_111:[0-9]+]] = fir.load %[[V_23]]#0 : !fir.ref ! CHECK: %[[V_112:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 - ! CHECK: %[[V_113:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_114:[0-9]+]] = fir.coordinate_of %[[V_110]]#1, %[[V_113]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_114:[0-9]+]] = fir.coordinate_of %[[V_110]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_115:[0-9]+]] = fir.load %[[V_114]] : !fir.ref ! CHECK: %[[V_116:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_117:[0-9]+]] = arith.andi %[[V_115]], %[[V_116]] : i8 @@ -219,8 +214,7 @@ program p ! CHECK: %[[V_130:[0-9]+]] = fir.address_of(@_QQro._QMieee_arithmeticTieee_class_type.3) : !fir.ref> ! CHECK: %[[V_131:[0-9]+]]:2 = hlfir.declare %[[V_130]] - ! CHECK: %[[V_132:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> - ! CHECK: %[[V_133:[0-9]+]] = fir.coordinate_of %[[V_131]]#1, %[[V_132]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_133:[0-9]+]] = fir.coordinate_of %[[V_131]]#1, _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_134:[0-9]+]] = fir.load %[[V_133]] : !fir.ref ! CHECK: %[[V_135:[0-9]+]] = fir.address_of(@_FortranAIeeeValueTable_4) : !fir.ref> ! CHECK: %[[V_136:[0-9]+]] = fir.coordinate_of %[[V_135]], %[[V_134]] : (!fir.ref>, i8) -> !fir.ref @@ -244,8 +238,7 @@ program p ! CHECK: %[[V_144:[0-9]+]]:2 = hlfir.declare %[[V_143]] ! CHECK: %[[V_145:[0-9]+]] = fir.load %[[V_25]]#0 : !fir.ref ! CHECK: %[[V_146:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 - ! CHECK: %[[V_147:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_148:[0-9]+]] = fir.coordinate_of %[[V_144]]#1, %[[V_147]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_148:[0-9]+]] = fir.coordinate_of %[[V_144]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_149:[0-9]+]] = fir.load %[[V_148]] : !fir.ref ! CHECK: %[[V_150:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_151:[0-9]+]] = arith.andi %[[V_149]], %[[V_150]] : i8 diff --git a/flang/test/Lower/Intrinsics/ieee_rounding.f90 b/flang/test/Lower/Intrinsics/ieee_rounding.f90 index 211e8c8178787..a0c73a3ff8bcd 100644 --- a/flang/test/Lower/Intrinsics/ieee_rounding.f90 +++ b/flang/test/Lower/Intrinsics/ieee_rounding.f90 @@ -9,8 +9,7 @@ program r ! CHECK: fir.if %true{{[_0-9]*}} { if (ieee_support_rounding(ieee_down)) then - ! CHECK: %[[V_62:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_63:[0-9]+]] = fir.coordinate_of %[[V_57]]#1, %[[V_62]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_63:[0-9]+]] = fir.coordinate_of %[[V_57]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_64:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 ! CHECK: %[[V_65:[0-9]+]] = fir.convert %[[V_64]] : (i32) -> i8 ! CHECK: fir.store %[[V_65]] to %[[V_63]] : !fir.ref @@ -18,8 +17,7 @@ program r ! CHECK: %[[V_66:[0-9]+]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_ieee_round_type.0) : !fir.ref> ! CHECK: %[[V_67:[0-9]+]]:2 = hlfir.declare %[[V_66]] - ! CHECK: %[[V_68:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_69:[0-9]+]] = fir.coordinate_of %[[V_67]]#1, %[[V_68]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_69:[0-9]+]] = fir.coordinate_of %[[V_67]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_70:[0-9]+]] = fir.load %[[V_69]] : !fir.ref ! CHECK: %[[V_71:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_72:[0-9]+]] = arith.andi %[[V_70]], %[[V_71]] : i8 @@ -30,8 +28,7 @@ program r call ieee_set_rounding_mode(ieee_down) print*, 'ok' - ! CHECK: %[[V_85:[0-9]+]] = fir.field_index _QM__fortran_builtinsT__builtin_ieee_round_type.mode, !fir.type<_QM__fortran_builtinsT__builtin_ieee_round_type{_QM__fortran_builtinsT__builtin_ieee_round_type.mode:i8}> - ! CHECK: %[[V_86:[0-9]+]] = fir.coordinate_of %[[V_57]]#1, %[[V_85]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[V_86:[0-9]+]] = fir.coordinate_of %[[V_57]]#1, _QM__fortran_builtinsT__builtin_ieee_round_type.mode : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_87:[0-9]+]] = fir.load %[[V_86]] : !fir.ref ! CHECK: %[[V_88:[0-9]+]] = arith.shli %c-1{{.*}}, %c2{{.*}} : i8 ! CHECK: %[[V_89:[0-9]+]] = arith.andi %[[V_87]], %[[V_88]] : i8 diff --git a/flang/test/Lower/Intrinsics/ieee_unordered.f90 b/flang/test/Lower/Intrinsics/ieee_unordered.f90 index b7e81d53a2d75..18bb2b0009ed9 100644 --- a/flang/test/Lower/Intrinsics/ieee_unordered.f90 +++ b/flang/test/Lower/Intrinsics/ieee_unordered.f90 @@ -12,12 +12,10 @@ x = -17.0 -! CHECK: %[[V_10:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> -! CHECK: %[[V_11:[0-9]+]] = fir.coordinate_of %[[V_1]], %[[V_10]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[V_11:[0-9]+]] = fir.coordinate_of %[[V_1]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c3{{.*}} to %[[V_11]] : !fir.ref -! CHECK: %[[V_12:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> -! CHECK: %[[V_13:[0-9]+]] = fir.coordinate_of %[[V_1]], %[[V_12]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[V_13:[0-9]+]] = fir.coordinate_of %[[V_1]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_14:[0-9]+]] = fir.load %[[V_13]] : !fir.ref ! CHECK: %[[V_15:[0-9]+]] = fir.address_of(@_FortranAIeeeValueTable_16) : !fir.ref> ! CHECK: %[[V_16:[0-9]+]] = fir.coordinate_of %[[V_15]], %[[V_14]] : (!fir.ref>, i8) -> !fir.ref @@ -28,11 +26,9 @@ ! CHECK: fir.store %[[V_20]] to %[[V_3]] : !fir.ref y = ieee_value(y, ieee_negative_inf) -! CHECK: %[[V_26:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> -! CHECK: %[[V_27:[0-9]+]] = fir.coordinate_of %[[V_0]], %[[V_26]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[V_27:[0-9]+]] = fir.coordinate_of %[[V_0]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %c2{{.*}} to %[[V_27]] : !fir.ref -! CHECK: %[[V_28:[0-9]+]] = fir.field_index _QMieee_arithmeticTieee_class_type.which, !fir.type<_QMieee_arithmeticTieee_class_type{_QMieee_arithmeticTieee_class_type.which:i8}> -! CHECK: %[[V_29:[0-9]+]] = fir.coordinate_of %[[V_0]], %[[V_28]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[V_29:[0-9]+]] = fir.coordinate_of %[[V_0]], _QMieee_arithmeticTieee_class_type.which : (!fir.ref>) -> !fir.ref ! CHECK: %[[V_30:[0-9]+]] = fir.load %[[V_29]] : !fir.ref ! CHECK: %[[V_31:[0-9]+]] = fir.address_of(@_FortranAIeeeValueTable_16) : !fir.ref> ! CHECK: %[[V_32:[0-9]+]] = fir.coordinate_of %[[V_31]], %[[V_30]] : (!fir.ref>, i8) -> !fir.ref diff --git a/flang/test/Lower/Intrinsics/storage_size.f90 b/flang/test/Lower/Intrinsics/storage_size.f90 index 3dc135bbf6fbc..d17602b4d4089 100644 --- a/flang/test/Lower/Intrinsics/storage_size.f90 +++ b/flang/test/Lower/Intrinsics/storage_size.f90 @@ -117,8 +117,7 @@ integer function polymorphic_value(t) result(size) ! CHECK-LABEL: func.func @_QMstorage_size_testPpolymorphic_value( ! CHECK-SAME: %[[T:.*]]: !fir.ref>>>}>> {fir.bindc_name = "t"}) -> i32 { ! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 {bindc_name = "size", uniq_name = "_QMstorage_size_testFpolymorphic_valueEsize"} -! CHECK: %[[FIELD_P:.*]] = fir.field_index p, !fir.type<_QMstorage_size_testTp3{p:!fir.class>>>}> -! CHECK: %[[COORD_P:.*]] = fir.coordinate_of %[[T]], %[[FIELD_P]] : (!fir.ref>>>}>>, !fir.field) -> !fir.ref>>>> +! CHECK: %[[COORD_P:.*]] = fir.coordinate_of %[[T]], p : (!fir.ref>>>}>>) -> !fir.ref>>>> ! CHECK: %[[LOAD_COORD_P:.*]] = fir.load %[[COORD_P]] : !fir.ref>>>> ! CHECK: %[[C0:.*]] = arith.constant 0 : index ! CHECK: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[LOAD_COORD_P]], %[[C0]] : (!fir.class>>>, index) -> (index, index, index) diff --git a/flang/test/Lower/Intrinsics/transfer.f90 b/flang/test/Lower/Intrinsics/transfer.f90 index b75fe2e826561..2cc7e93f86f51 100644 --- a/flang/test/Lower/Intrinsics/transfer.f90 +++ b/flang/test/Lower/Intrinsics/transfer.f90 @@ -106,8 +106,7 @@ integer function trans_test3(p) ! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_19]] : (!fir.ref>) -> !fir.ref ! CHECK: fir.call @_FortranAAssign(%[[VAL_21]], %[[VAL_22]], %[[VAL_23]], %[[VAL_20]]) {{.*}}: (!fir.ref>, !fir.box, !fir.ref, i32) -> () ! CHECK: fir.freemem %[[VAL_17]] - ! CHECK: %[[VAL_25:.*]] = fir.field_index x, !fir.type<_QFtrans_test3Tobj{x:i32}> - ! CHECK: %[[VAL_26:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_25]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[VAL_26:.*]] = fir.coordinate_of %[[VAL_3]], x : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref ! CHECK: fir.store %[[VAL_27]] to %[[VAL_4]] : !fir.ref ! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_4]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/declare-mapper.f90 b/flang/test/Lower/OpenMP/declare-mapper.f90 index fa7f23c182a68..e12becbc5d9a9 100644 --- a/flang/test/Lower/OpenMP/declare-mapper.f90 +++ b/flang/test/Lower/OpenMP/declare-mapper.f90 @@ -39,8 +39,7 @@ subroutine declare_mapper_1 !CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i64) -> index !CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_14]], %[[VAL_6]]#0 : index !CHECK: %[[VAL_16:.*]] = omp.map.bounds lower_bound(%[[VAL_10]] : index) upper_bound(%[[VAL_15]] : index) extent(%[[VAL_6]]#1 : index) stride(%[[VAL_8]] : index) start_idx(%[[VAL_6]]#0 : index) - !CHECK: %[[VAL_17:.*]] = arith.constant 1 : index - !CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_1]]#0, %[[VAL_17]] : (!fir.ref<[[MY_TYPE]]>, index) -> !fir.ref>>> + !CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_1]]#0, values : (!fir.ref<[[MY_TYPE]]>) -> !fir.ref>>> !CHECK: %[[VAL_19:.*]] = fir.box_offset %[[VAL_18]] base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[VAL_20:.*]] = omp.map.info var_ptr(%[[VAL_18]] : !fir.ref>>>, i32) var_ptr_ptr(%[[VAL_19]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) bounds(%[[VAL_16]]) -> !fir.llvm_ptr>> {name = ""} !CHECK: %[[VAL_21:.*]] = omp.map.info var_ptr(%[[VAL_18]] : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) -> !fir.ref>>> {name = "var%[[VAL_22:.*]](1:var%[[VAL_23:.*]])"} @@ -132,8 +131,7 @@ subroutine declare_mapper_3 !CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i64) -> index !CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_14]], %[[VAL_6]]#0 : index !CHECK: %[[VAL_16:.*]] = omp.map.bounds lower_bound(%[[VAL_10]] : index) upper_bound(%[[VAL_15]] : index) extent(%[[VAL_6]]#1 : index) stride(%[[VAL_8]] : index) start_idx(%[[VAL_6]]#0 : index) - !CHECK: %[[VAL_17:.*]] = arith.constant 1 : index - !CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_1]]#0, %[[VAL_17]] : (!fir.ref<[[MY_TYPE]]>, index) -> !fir.ref>>> + !CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_1]]#0, values : (!fir.ref<[[MY_TYPE]]>) -> !fir.ref>>> !CHECK: %[[VAL_19:.*]] = fir.box_offset %[[VAL_18]] base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[VAL_20:.*]] = omp.map.info var_ptr(%[[VAL_18]] : !fir.ref>>>, i32) var_ptr_ptr(%[[VAL_19]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) bounds(%[[VAL_16]]) -> !fir.llvm_ptr>> {name = ""} !CHECK: %[[VAL_21:.*]] = omp.map.info var_ptr(%[[VAL_18]] : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) -> !fir.ref>>> {name = "var%[[VAL_22:.*]](1:var%[[VAL_23:.*]])"} diff --git a/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 b/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 index 28a2b9b5b967b..768d782848b53 100644 --- a/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 +++ b/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 @@ -3,8 +3,7 @@ !CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.type<[[ONE_LAYER_TY:_QFdtype_alloca_map_op_blockTone_layer{i:f32,scalar:!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}]]> {{.*}} !CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {{{.*}}} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true} -!CHECK: %[[MEMBER_INDEX:.*]] = arith.constant 4 : index -!CHECK: %[[MEMBER_COORD:.*]] = fir.coordinate_of %[[DECLARE]]#0, %[[MEMBER_INDEX]] : (!fir.ref>, index) -> !fir.ref>>> +!CHECK: %[[MEMBER_COORD:.*]] = fir.coordinate_of %[[DECLARE]]#0, array_j : (!fir.ref>) -> !fir.ref>>> !CHECK: %[[MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[MEMBER_COORD]] base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref>>>, i32) var_ptr_ptr(%[[MEMBER_BASE_ADDR]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {{.*}} !CHECK: %[[MAP_MEMBER_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) -> !fir.ref>>> {{.*}} @@ -34,14 +33,12 @@ subroutine dtype_alloca_map_op_block() !CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {{{.*}}} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true} !CHECK: %[[LOAD_DTYPE:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref>>> -!CHECK: %[[MEMBER_INDEX:.*]] = arith.constant 4 : index -!CHECK: %[[MEMBER_COORD:.*]] = fir.coordinate_of %[[LOAD_DTYPE]], %[[MEMBER_INDEX]] : (!fir.box>>, index) -> !fir.ref>>> +!CHECK: %[[MEMBER_COORD:.*]] = fir.coordinate_of %[[LOAD_DTYPE]], array_j : (!fir.box>>) -> !fir.ref>>> !CHECK: %[[MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[MEMBER_COORD]] base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref>>>, i32) var_ptr_ptr(%[[MEMBER_BASE_ADDR]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {{.*}} !CHECK: %[[MAP_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) -> !fir.ref>>> {{.*}} !CHECK: %[[LOAD_DTYPE:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref>>> -!CHECK: %[[MEMBER_COORD:.*]] = arith.constant 5 : index -!CHECK: %[[REGULAR_MEMBER:.*]] = fir.coordinate_of %[[LOAD_DTYPE]], %[[MEMBER_COORD]] : (!fir.box>>, index) -> !fir.ref +!CHECK: %[[REGULAR_MEMBER:.*]] = fir.coordinate_of %[[LOAD_DTYPE]], k : (!fir.box>>) -> !fir.ref !CHECK: %[[MAP_REGULAR_MEMBER:.*]] = omp.map.info var_ptr(%[[REGULAR_MEMBER]] : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {{.*}} !CHECK: %[[DTYPE_BASE_ADDR:.*]] = fir.box_offset %[[DECLARE]]#1 base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_DTYPE_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref>>>, !fir.type<[[REC_TY]]>) var_ptr_ptr(%[[DTYPE_BASE_ADDR]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr>> {{.*}} @@ -73,18 +70,14 @@ subroutine alloca_dtype_op_block_add() !CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {{.*}} : (!fir.ref}>>>>) -> (!fir.ref}>>>>, !fir.ref}>>>>) !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true} !CHECK: %[[LOAD:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref}>>>> -!CHECK: %[[NESTED_DTYPE_INDEX:.*]] = arith.constant 6 : index -!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[LOAD]], %[[NESTED_DTYPE_INDEX]] : (!fir.box}>>>, index) -> !fir.ref,array_k:!fir.box>>,k:i32}]]>> -!CHECK: %[[NESTED_MEMBER_INDEX:.*]] = arith.constant 2 : index -!CHECK: %[[NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], %[[NESTED_MEMBER_INDEX]] : (!fir.ref>, index) -> !fir.ref>>> +!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[LOAD]], nest : (!fir.box}>>>) -> !fir.ref,array_k:!fir.box>>,k:i32}]]>> +!CHECK: %[[NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], array_k : (!fir.ref>) -> !fir.ref>>> !CHECK: %[[NESTED_MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[NESTED_MEMBER_COORD]] base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_NESTED_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref>>>, i32) var_ptr_ptr(%[[NESTED_MEMBER_BASE_ADDR]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {{.*}} !CHECK: %[[MAP_NESTED_MEMBER_COORD:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) -> !fir.ref>>> {{.*}} !CHECK: %[[LOAD:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref}>>>> -!CHECK: %[[NESTED_DTYPE_INDEX:.*]] = arith.constant 6 : index -!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[LOAD]], %[[NESTED_DTYPE_INDEX]] : (!fir.box}>>>, index) -> !fir.ref> -!CHECK: %[[NESTED_MEMBER_INDEX:.*]] = arith.constant 3 : index -!CHECK: %[[REGULAR_NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], %[[NESTED_MEMBER_INDEX]] : (!fir.ref>, index) -> !fir.ref +!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[LOAD]], nest : (!fir.box}>>>) -> !fir.ref> +!CHECK: %[[REGULAR_NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], k : (!fir.ref>) -> !fir.ref !CHECK: %[[MAP_REGULAR_NESTED_MEMBER:.*]] = omp.map.info var_ptr(%[[REGULAR_NESTED_MEMBER_COORD]] : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {{.*}} !CHECK: %[[DTYPE_BASE_ADDR:.*]] = fir.box_offset %[[DECLARE]]#1 base_addr : (!fir.ref}>>>>) -> !fir.llvm_ptr}>>> !CHECK: %[[MAP_DTYPE_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref}>>>>, !fir.type<[[REC_TY]]>}>) var_ptr_ptr(%[[DTYPE_BASE_ADDR]] : !fir.llvm_ptr}>>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr}>>> {{.*}} @@ -123,10 +116,8 @@ subroutine alloca_nest_dype_map_op_block_add() !CHECK: %[[ALLOCA]] = fir.alloca !fir.type<[[REC_TY:_QFnest_dtype_alloca_map_op_block_addTtop_layer{i:f32,scalar:!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFnest_dtype_alloca_map_op_block_addTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}]]> {{.*}} !CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA:.*]] {{.*}} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) {stride_in_bytes = true} -!CHECK: %[[NESTED_DTYPE_INDEX:.*]] = arith.constant 6 : index -!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[DECLARE]]#0, %[[NESTED_DTYPE_INDEX]] : (!fir.ref>, index) -> !fir.ref,array_k:!fir.box>>,k:i32}]]>> -!CHECK: %[[NESTED_MEMBER_INDEX:.*]] = arith.constant 2 : index -!CHECK: %[[NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], %[[NESTED_MEMBER_INDEX]] : (!fir.ref>, index) -> !fir.ref>>> +!CHECK: %[[NESTED_DTYPE_COORD:.*]] = fir.coordinate_of %[[DECLARE]]#0, nest : (!fir.ref>) -> !fir.ref,array_k:!fir.box>>,k:i32}]]>> +!CHECK: %[[NESTED_MEMBER_COORD:.*]] = fir.coordinate_of %[[NESTED_DTYPE_COORD]], array_k : (!fir.ref>) -> !fir.ref>>> !CHECK: %[[NESTED_MEMBER_BASE_ADDR:.*]] = fir.box_offset %[[NESTED_MEMBER_COORD]] base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_NESTED_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref>>>, i32) var_ptr_ptr(%[[NESTED_MEMBER_BASE_ADDR]] : !fir.llvm_ptr>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {{.*}} !CHECK: %[[MAP_NESTED_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) -> !fir.ref>>> {{.*}} diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90 index bf801e69405b9..3e1680a294b6e 100644 --- a/flang/test/Lower/OpenMP/target.f90 +++ b/flang/test/Lower/OpenMP/target.f90 @@ -508,7 +508,7 @@ subroutine omp_target_device_ptr !CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> {{.*}} {name = "a"} !CHECK: omp.target_data map_entries(%[[MAP]]{{.*}}) use_device_ptr({{.*}} -> %[[VAL_1:.*]] : !fir.ref>) !$omp target data map(tofrom: a) use_device_ptr(a) - !CHECK: {{.*}} = fir.coordinate_of %[[VAL_1:.*]], {{.*}} : (!fir.ref>, !fir.field) -> !fir.ref + !CHECK: {{.*}} = fir.coordinate_of %[[VAL_1:.*]], __address : (!fir.ref>) -> !fir.ref a = c_loc(b) !CHECK: omp.terminator !$omp end target data diff --git a/flang/test/Lower/array-elemental-calls-2.f90 b/flang/test/Lower/array-elemental-calls-2.f90 index 2243bfdd0b289..2674b07dece17 100644 --- a/flang/test/Lower/array-elemental-calls-2.f90 +++ b/flang/test/Lower/array-elemental-calls-2.f90 @@ -185,10 +185,8 @@ integer elemental function elem_func_derived(x) ! CHECK: fir.do_loop ! CHECK: %[[VAL_21:.*]] = fir.array_access %{{.}}, %{{.*}} ! CHECK: %[[VAL_22:.*]] = fir.no_reassoc %[[VAL_21]] : !fir.ref> -! CHECK: %[[FIELD:.*]] = fir.field_index i, !fir.type<_QMtest_opsFcheck_parentheses_derivedTt{i:i32}> -! CHECK: %[[FROM:.*]] = fir.coordinate_of %[[VAL_22]], %[[FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref -! CHECK: %[[FIELD2:.*]] = fir.field_index i, !fir.type<_QMtest_opsFcheck_parentheses_derivedTt{i:i32}> -! CHECK: %[[TO:.*]] = fir.coordinate_of %[[VAL_0]], %[[FIELD2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[FROM:.*]] = fir.coordinate_of %[[VAL_22]], i : (!fir.ref>) -> !fir.ref +! CHECK: %[[TO:.*]] = fir.coordinate_of %[[VAL_0]], i : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL:.*]] = fir.load %[[FROM]] : !fir.ref ! CHECK: fir.store %[[VAL]] to %[[TO]] : !fir.ref ! CHECK: %{{.*}} = fir.call @_QPelem_func_derived(%[[VAL_0]]) {{.*}}: (!fir.ref>) -> i32 diff --git a/flang/test/Lower/c-interoperability-c-pointer.f90 b/flang/test/Lower/c-interoperability-c-pointer.f90 index 9700440f6650b..c62f48fa9a1be 100644 --- a/flang/test/Lower/c-interoperability-c-pointer.f90 +++ b/flang/test/Lower/c-interoperability-c-pointer.f90 @@ -3,12 +3,10 @@ ! CHECK-LABEL: func.func @_QPtest( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "ptr1"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref> {fir.bindc_name = "ptr2"}) { -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i64) -> !fir.ref -! CHECK: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_7]] : !fir.ref ! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i64) -> !fir.ref ! CHECK: fir.call @c_func(%[[VAL_5]], %[[VAL_9]]) {{.*}}: (!fir.ref, !fir.ref) -> () @@ -35,14 +33,11 @@ subroutine c_func(c_t1, c_t2) bind(c, name="c_func") ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "local", uniq_name = "_QFtest_callee_c_ptrElocal"} ! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_0]] : (!fir.ref) -> i64 ! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref -! CHECK: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref -! CHECK: %[[VAL_8:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], %[[VAL_8]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref +! CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref ! CHECK: fir.store %[[VAL_10]] to %[[VAL_9]] : !fir.ref ! CHECK: return @@ -59,15 +54,12 @@ subroutine test_callee_c_ptr(ptr1) bind(c) ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> {bindc_name = "local", uniq_name = "_QFtest_callee_c_funptrElocal"} ! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_0]] : (!fir.ref) -> i64 ! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref -! CHECK: %[[VAL_6:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_6]] : (!fir.ref>, !fir.field) -> !fir.ref -! CHECK: %[[VAL_8:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], %[[VAL_8]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref +! CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_5]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref ! CHECK: fir.store %[[VAL_10]] to %[[VAL_9]] : !fir.ref ! CHECK: return diff --git a/flang/test/Lower/c_ptr-constant-init.f90 b/flang/test/Lower/c_ptr-constant-init.f90 index da56670438aa3..b75ed55f1a4f3 100644 --- a/flang/test/Lower/c_ptr-constant-init.f90 +++ b/flang/test/Lower/c_ptr-constant-init.f90 @@ -13,7 +13,6 @@ end subroutine test ! CHECK-LABEL: fir.global internal @_QQro.1x_QM__fortran_builtinsT__builtin_c_ptr.0 constant : !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> { ! CHECK: %[[VAL_0:.*]] = fir.undefined !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> ! CHECK: %[[VAL_1:.*]] = fir.undefined !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK: %[[VAL_3:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_4:.*]] = fir.insert_value %[[VAL_1]], %[[VAL_3]], ["__address", !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>] : (!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>, i64) -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK: %[[VAL_5:.*]] = fir.insert_value %[[VAL_0]], %[[VAL_4]], [0 : index] : (!fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>> @@ -33,7 +32,6 @@ end subroutine test2 ! CHECK-LABEL: fir.global internal @_QQro.1x_QM__fortran_builtinsT__builtin_c_funptr.1 constant : !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>> { ! CHECK: %[[VAL_0:.*]] = fir.undefined !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>> ! CHECK: %[[VAL_1:.*]] = fir.undefined !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> ! CHECK: %[[VAL_3:.*]] = arith.constant 0 : i64 ! CHECK: %[[VAL_4:.*]] = fir.insert_value %[[VAL_1]], %[[VAL_3]], ["__address", !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>] : (!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>, i64) -> !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> ! CHECK: %[[VAL_5:.*]] = fir.insert_value %[[VAL_0]], %[[VAL_4]], [0 : index] : (!fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>>, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>) -> !fir.array<1x!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>> diff --git a/flang/test/Lower/call-by-value.f90 b/flang/test/Lower/call-by-value.f90 index 32b9c79e11e68..3b551014b6e32 100644 --- a/flang/test/Lower/call-by-value.f90 +++ b/flang/test/Lower/call-by-value.f90 @@ -105,8 +105,7 @@ subroutine test_char_value(x) bind(c) ! CHECK-LABEL: func.func @_QPtest_cptr_value( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref ! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], __address : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_0]] : (!fir.ref) -> i64 ! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.declare %[[VAL_1]] diff --git a/flang/test/Lower/call-copy-in-out.f90 b/flang/test/Lower/call-copy-in-out.f90 index fd3b5c342a48f..1eb2c3ffc0b0e 100644 --- a/flang/test/Lower/call-copy-in-out.f90 +++ b/flang/test/Lower/call-copy-in-out.f90 @@ -283,8 +283,7 @@ subroutine whole_components() end type ! CHECK: %[[a:.*]] = fir.alloca !fir.type<_QFwhole_componentsTt{i:!fir.array<100xi32>}> type(t) :: a - ! CHECK: %[[field:.*]] = fir.field_index i, !fir.type<_QFwhole_componentsTt{i:!fir.array<100xi32>}> - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[a]], %[[field]] : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[a]], i : (!fir.ref}>>) -> !fir.ref> ! CHECK: fir.call @_QPbar_integer(%[[addr]]) {{.*}}: (!fir.ref>) -> () call bar_integer(a%i) end subroutine @@ -297,8 +296,7 @@ subroutine whole_component_contiguous_pointer() end type ! CHECK: %[[a:.*]] = fir.alloca !fir.type<_QFwhole_component_contiguous_pointerTt{i:!fir.box>>}> type(t) :: a - ! CHECK: %[[field:.*]] = fir.field_index i, !fir.type<_QFwhole_component_contiguous_pointerTt{i:!fir.box>>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a]], %[[field]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a]], i : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[box_load:.*]] = fir.load %[[coor]] : !fir.ref>>> ! CHECK: %[[addr:.*]] = fir.box_addr %[[box_load]] : (!fir.box>>) -> !fir.ptr> ! CHECK: %[[cast:.*]] = fir.convert %[[addr]] : (!fir.ptr>) -> !fir.ref> @@ -314,8 +312,7 @@ subroutine whole_component_contiguous_char_pointer() end type ! CHECK: %[[a:.*]] = fir.alloca !fir.type<_QFwhole_component_contiguous_char_pointerTt{i:!fir.box>>>}> type(t) :: a - ! CHECK: %[[field:.*]] = fir.field_index i, !fir.type<_QFwhole_component_contiguous_char_pointerTt{i:!fir.box>>>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a]], %[[field]] : (!fir.ref>>>}>>, !fir.field) -> !fir.ref>>>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a]], i : (!fir.ref>>>}>>) -> !fir.ref>>>> ! CHECK: %[[box_load:.*]] = fir.load %[[coor]] : !fir.ref>>>> ! CHECK: %[[addr:.*]] = fir.box_addr %[[box_load]] : (!fir.box>>>) -> !fir.ptr>> ! CHECK: %[[len:.*]] = fir.box_elesize %[[box_load]] : (!fir.box>>>) -> index diff --git a/flang/test/Lower/derived-allocatable-components.f90 b/flang/test/Lower/derived-allocatable-components.f90 index 850a372baf3c1..1debb275d6276 100644 --- a/flang/test/Lower/derived-allocatable-components.f90 +++ b/flang/test/Lower/derived-allocatable-components.f90 @@ -79,8 +79,7 @@ subroutine ref_scalar_real_a(a0_0, a1_0, a0_1, a1_1) type(real_a0) :: a0_0, a0_1(100) type(real_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMacompTreal_a0{p:!fir.box>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg0]], %[[fld]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg0]], p : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[load:.*]] = fir.load %[[coor]] : !fir.ref>> ! CHECK: %[[addr:.*]] = fir.box_addr %[[load]] : (!fir.box>) -> !fir.heap ! CHECK: %[[cast:.*]] = fir.convert %[[addr]] : (!fir.heap) -> !fir.ref @@ -88,16 +87,14 @@ subroutine ref_scalar_real_a(a0_0, a1_0, a0_1, a1_1) call takes_real_scalar(a0_0%p) ! CHECK: %[[a0_1_coor:.*]] = fir.coordinate_of %[[arg2]], %{{.*}} : (!fir.ref>}>>>, i64) -> !fir.ref>}>> - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMacompTreal_a0{p:!fir.box>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_1_coor]], %[[fld]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_1_coor]], p : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[load:.*]] = fir.load %[[coor]] : !fir.ref>> ! CHECK: %[[addr:.*]] = fir.box_addr %[[load]] : (!fir.box>) -> !fir.heap ! CHECK: %[[cast:.*]] = fir.convert %[[addr]] : (!fir.heap) -> !fir.ref ! CHECK: fir.call @_QPtakes_real_scalar(%[[cast]]) {{.*}}: (!fir.ref) -> () call takes_real_scalar(a0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMacompTreal_a1{p:!fir.box>>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg1]], %[[fld]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg1]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[box:.*]] = fir.load %[[coor]] : !fir.ref>>> ! CHECK-DAG: %[[addr:.*]] = fir.box_addr %[[box]] : (!fir.box>>) -> !fir.heap> ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} : (!fir.box>>, index) -> (index, index, index) @@ -108,8 +105,7 @@ subroutine ref_scalar_real_a(a0_0, a1_0, a0_1, a1_1) call takes_real_scalar(a1_0%p(7)) ! CHECK: %[[a1_1_coor:.*]] = fir.coordinate_of %[[arg3]], %{{.*}} : (!fir.ref>>}>>>, i64) -> !fir.ref>>}>> - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMacompTreal_a1{p:!fir.box>>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_1_coor]], %[[fld]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_1_coor]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[box:.*]] = fir.load %[[coor]] : !fir.ref>>> ! CHECK-DAG: %[[addr:.*]] = fir.box_addr %[[box]] : (!fir.box>>) -> !fir.heap> ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} : (!fir.box>>, index) -> (index, index, index) @@ -122,8 +118,7 @@ subroutine ref_scalar_real_a(a0_0, a1_0, a0_1, a1_1) ! CHECK-LABEL: func @_QMacompPref_array_real_a( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>}>>{{.*}}, %[[VAL_1:.*]]: !fir.ref>>}>>>{{.*}}) { -! CHECK: %[[VAL_2:.*]] = fir.field_index p, !fir.type<_QMacompTreal_a1{p:!fir.box>>}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_2]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]] : !fir.ref>>> ! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_4]], %[[VAL_5]] : (!fir.box>>, index) -> (index, index, index) @@ -143,8 +138,7 @@ subroutine ref_scalar_real_a(a0_0, a1_0, a0_1, a1_1) ! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_17]], %[[VAL_18]] : i64 ! CHECK: %[[VAL_20:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_19]] : (!fir.ref>>}>>>, i64) -> !fir.ref>>}>> -! CHECK: %[[VAL_21:.*]] = fir.field_index p, !fir.type<_QMacompTreal_a1{p:!fir.box>>}> -! CHECK: %[[VAL_22:.*]] = fir.coordinate_of %[[VAL_20]], %[[VAL_21]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_22:.*]] = fir.coordinate_of %[[VAL_20]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref>>> ! CHECK: %[[VAL_24:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_25:.*]]:3 = fir.box_dims %[[VAL_23]], %[[VAL_24]] : (!fir.box>>, index) -> (index, index, index) @@ -175,8 +169,7 @@ subroutine ref_scalar_cst_char_a(a0_0, a1_0, a0_1, a1_1) type(cst_char_a0) :: a0_0, a0_1(100) type(cst_char_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.box_addr %[[box]] ! CHECK: %[[boxchar:.*]] = fir.emboxchar %[[addr]], %c10{{.*}} @@ -184,8 +177,7 @@ subroutine ref_scalar_cst_char_a(a0_0, a1_0, a0_1, a1_1) call takes_char_scalar(a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.box_addr %[[box]] ! CHECK: %[[boxchar:.*]] = fir.emboxchar %[[addr]], %c10{{.*}} @@ -193,8 +185,7 @@ subroutine ref_scalar_cst_char_a(a0_0, a1_0, a0_1, a1_1) call takes_char_scalar(a0_1(5)%p) - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[base:.*]] = fir.box_addr %[[box]] ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} @@ -207,8 +198,7 @@ subroutine ref_scalar_cst_char_a(a0_0, a1_0, a0_1, a1_1) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[base:.*]] = fir.box_addr %[[box]] ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} @@ -227,8 +217,7 @@ subroutine ref_scalar_def_char_a(a0_0, a1_0, a0_1, a1_1) type(def_char_a0) :: a0_0, a0_1(100) type(def_char_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] ! CHECK-DAG: %[[addr:.*]] = fir.box_addr %[[box]] @@ -237,8 +226,7 @@ subroutine ref_scalar_def_char_a(a0_0, a1_0, a0_1, a1_1) call takes_char_scalar(a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] ! CHECK-DAG: %[[addr:.*]] = fir.box_addr %[[box]] @@ -247,8 +235,7 @@ subroutine ref_scalar_def_char_a(a0_0, a1_0, a0_1, a1_1) call takes_char_scalar(a0_1(5)%p) - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] @@ -267,8 +254,7 @@ subroutine ref_scalar_def_char_a(a0_0, a1_0, a0_1, a1_1) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] @@ -293,45 +279,37 @@ subroutine ref_scalar_derived(a0_0, a1_0, a0_1, a1_1) type(derived_a0) :: a0_0, a0_1(100) type(derived_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(a0_0%p%x) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(a0_1(5)%p%x) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 ! CHECK: %[[index:.*]] = arith.subi %c7{{.*}}, %[[lb]] : i64 ! CHECK: %[[elem:.*]] = fir.coordinate_of %[[box]], %[[index]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(a1_0%p(7)%x) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 ! CHECK: %[[index:.*]] = arith.subi %c7{{.*}}, %[[lb]] : i64 ! CHECK: %[[elem:.*]] = fir.coordinate_of %[[box]], %[[index]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(a1_1(5)%p(7)%x) @@ -346,25 +324,21 @@ subroutine ref_scalar_derived(a0_0, a1_0, a0_1, a1_1) subroutine pass_real_a(a0_0, a1_0, a0_1, a1_1) type(real_a0) :: a0_0, a0_1(100) type(real_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: fir.call @_QPtakes_real_scalar_pointer(%[[coor]]) call takes_real_scalar_pointer(a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.call @_QPtakes_real_scalar_pointer(%[[coor]]) call takes_real_scalar_pointer(a0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: fir.call @_QPtakes_real_array_pointer(%[[coor]]) call takes_real_array_pointer(a1_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.call @_QPtakes_real_array_pointer(%[[coor]]) call takes_real_array_pointer(a1_1(5)%p) end subroutine @@ -378,28 +352,24 @@ subroutine pass_real_a(a0_0, a1_0, a0_1, a1_1) subroutine allocated_p(a0_0, a1_0, a0_1, a1_1) type(real_a0) :: a0_0, a0_1(100) type(def_char_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(allocated(a0_0%p)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(allocated(a0_1(5)%p)) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(allocated(a1_0%p)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(allocated(a1_1(5)%p)) @@ -414,25 +384,21 @@ subroutine allocated_p(a0_0, a1_0, a0_1, a1_1) subroutine allocate_real(a0_0, a1_0, a0_1, a1_1) type(real_a0) :: a0_0, a0_1(100) type(real_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a1_0%p(100)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a1_1(5)%p(100)) end subroutine @@ -442,25 +408,21 @@ subroutine allocate_real(a0_0, a1_0, a0_1, a1_1) subroutine allocate_cst_char(a0_0, a1_0, a0_1, a1_1) type(cst_char_a0) :: a0_0, a0_1(100) type(cst_char_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a1_0%p(100)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(a1_1(5)%p(100)) end subroutine @@ -470,25 +432,21 @@ subroutine allocate_cst_char(a0_0, a1_0, a0_1, a1_1) subroutine allocate_def_char(a0_0, a1_0, a0_1, a1_1) type(def_char_a0) :: a0_0, a0_1(100) type(def_char_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::a0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::a1_0%p(100)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::a1_1(5)%p(100)) end subroutine @@ -502,25 +460,21 @@ subroutine allocate_def_char(a0_0, a1_0, a0_1, a1_1) subroutine deallocate_real(a0_0, a1_0, a0_1, a1_1) type(real_a0) :: a0_0, a0_1(100) type(real_a1) :: a1_0, a1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] deallocate(a0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] deallocate(a0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[a1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] deallocate(a1_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[a1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] deallocate(a1_1(5)%p) end subroutine @@ -538,17 +492,13 @@ subroutine test_recursive(x) end type type(t) :: x - ! CHECK: %[[fldNext1:.*]] = fir.field_index next - ! CHECK: %[[next1:.*]] = fir.coordinate_of %[[x]], %[[fldNext1]] + ! CHECK: %[[next1:.*]] = fir.coordinate_of %[[x]], next ! CHECK: %[[nextBox1:.*]] = fir.load %[[next1]] - ! CHECK: %[[fldNext2:.*]] = fir.field_index next - ! CHECK: %[[next2:.*]] = fir.coordinate_of %[[nextBox1]], %[[fldNext2]] + ! CHECK: %[[next2:.*]] = fir.coordinate_of %[[nextBox1]], next ! CHECK: %[[nextBox2:.*]] = fir.load %[[next2]] - ! CHECK: %[[fldNext3:.*]] = fir.field_index next - ! CHECK: %[[next3:.*]] = fir.coordinate_of %[[nextBox2]], %[[fldNext3]] + ! CHECK: %[[next3:.*]] = fir.coordinate_of %[[nextBox2]], next ! CHECK: %[[nextBox3:.*]] = fir.load %[[next3]] - ! CHECK: %[[fldi:.*]] = fir.field_index i - ! CHECK: %[[i:.*]] = fir.coordinate_of %[[nextBox3]], %[[fldi]] + ! CHECK: %[[i:.*]] = fir.coordinate_of %[[nextBox3]], i ! CHECK: %[[nextBox3:.*]] = fir.load %[[i]] : !fir.ref print *, x%next%next%next%i end subroutine diff --git a/flang/test/Lower/derived-pointer-components.f90 b/flang/test/Lower/derived-pointer-components.f90 index b01cb5f8deb60..a55618dc16a5f 100644 --- a/flang/test/Lower/derived-pointer-components.f90 +++ b/flang/test/Lower/derived-pointer-components.f90 @@ -79,8 +79,7 @@ subroutine ref_scalar_real_p(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(real_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p0{p:!fir.box>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg0]], %[[fld]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg0]], p : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[load:.*]] = fir.load %[[coor]] : !fir.ref>> ! CHECK: %[[addr:.*]] = fir.box_addr %[[load]] : (!fir.box>) -> !fir.ptr ! CHECK: %[[cast:.*]] = fir.convert %[[addr]] : (!fir.ptr) -> !fir.ref @@ -88,16 +87,14 @@ subroutine ref_scalar_real_p(p0_0, p1_0, p0_1, p1_1) call takes_real_scalar(p0_0%p) ! CHECK: %[[p0_1_coor:.*]] = fir.coordinate_of %[[arg2]], %{{.*}} : (!fir.ref>}>>>, i64) -> !fir.ref>}>> - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p0{p:!fir.box>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_1_coor]], %[[fld]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_1_coor]], p : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[load:.*]] = fir.load %[[coor]] : !fir.ref>> ! CHECK: %[[addr:.*]] = fir.box_addr %[[load]] : (!fir.box>) -> !fir.ptr ! CHECK: %[[cast:.*]] = fir.convert %[[addr]] : (!fir.ptr) -> !fir.ref ! CHECK: fir.call @_QPtakes_real_scalar(%[[cast]]) {{.*}}: (!fir.ref) -> () call takes_real_scalar(p0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg1]], %[[fld]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg1]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[load:.*]] = fir.load %[[coor]] : !fir.ref>>> ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[load]], %c0{{.*}} : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 @@ -107,8 +104,7 @@ subroutine ref_scalar_real_p(p0_0, p1_0, p0_1, p1_1) call takes_real_scalar(p1_0%p(7)) ! CHECK: %[[p1_1_coor:.*]] = fir.coordinate_of %[[arg3]], %{{.*}} : (!fir.ref>>}>>>, i64) -> !fir.ref>>}>> - ! CHECK: %[[fld:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_1_coor]], %[[fld]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_1_coor]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[load:.*]] = fir.load %[[coor]] : !fir.ref>>> ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[load]], %c0{{.*}} : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 @@ -120,8 +116,7 @@ subroutine ref_scalar_real_p(p0_0, p1_0, p0_1, p1_1) ! CHECK-LABEL: func @_QMpcompPref_array_real_p( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>}>>{{.*}}, %[[VAL_1:.*]]: !fir.ref>>}>>>{{.*}}) { -! CHECK: %[[VAL_2:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> -! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_2]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_0]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]] : !fir.ref>>> ! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_4]], %[[VAL_5]] : (!fir.box>>, index) -> (index, index, index) @@ -140,8 +135,7 @@ subroutine ref_scalar_real_p(p0_0, p1_0, p0_1, p1_1) ! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_16]], %[[VAL_17]] : i64 ! CHECK: %[[VAL_19:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_18]] : (!fir.ref>>}>>>, i64) -> !fir.ref>>}>> -! CHECK: %[[VAL_20:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> -! CHECK: %[[VAL_21:.*]] = fir.coordinate_of %[[VAL_19]], %[[VAL_20]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_21:.*]] = fir.coordinate_of %[[VAL_19]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref>>> ! CHECK: %[[VAL_23:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_24:.*]]:3 = fir.box_dims %[[VAL_22]], %[[VAL_23]] : (!fir.box>>, index) -> (index, index, index) @@ -171,31 +165,27 @@ subroutine ref_array_real_p(p1_0, p1_1) subroutine assign_scalar_real_p(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(real_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.box_addr %[[box]] ! CHECK: fir.store {{.*}} to %[[addr]] p0_0%p = 1. ! CHECK: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.box_addr %[[box]] ! CHECK: fir.store {{.*}} to %[[addr]] p0_1(5)%p = 2. - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], {{.*}} ! CHECK: fir.store {{.*}} to %[[addr]] p1_0%p(7) = 3. ! CHECK: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], {{.*}} ! CHECK: fir.store {{.*}} to %[[addr]] @@ -208,8 +198,7 @@ subroutine ref_scalar_cst_char_p(p0_0, p1_0, p0_1, p1_1) type(cst_char_p0) :: p0_0, p0_1(100) type(cst_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.box_addr %[[box]] ! CHECK: %[[boxchar:.*]] = fir.emboxchar %[[addr]], %c10{{.*}} @@ -217,8 +206,7 @@ subroutine ref_scalar_cst_char_p(p0_0, p1_0, p0_1, p1_1) call takes_char_scalar(p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[addr:.*]] = fir.box_addr %[[box]] ! CHECK: %[[boxchar:.*]] = fir.emboxchar %[[addr]], %c10{{.*}} @@ -226,8 +214,7 @@ subroutine ref_scalar_cst_char_p(p0_0, p1_0, p0_1, p1_1) call takes_char_scalar(p0_1(5)%p) - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 @@ -239,8 +226,7 @@ subroutine ref_scalar_cst_char_p(p0_0, p1_0, p0_1, p1_1) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 @@ -258,8 +244,7 @@ subroutine ref_scalar_def_char_p(p0_0, p1_0, p0_1, p1_1) type(def_char_p0) :: p0_0, p0_1(100) type(def_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] ! CHECK-DAG: %[[addr:.*]] = fir.box_addr %[[box]] @@ -268,8 +253,7 @@ subroutine ref_scalar_def_char_p(p0_0, p1_0, p0_1, p1_1) call takes_char_scalar(p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] ! CHECK-DAG: %[[addr:.*]] = fir.box_addr %[[box]] @@ -278,8 +262,7 @@ subroutine ref_scalar_def_char_p(p0_0, p1_0, p0_1, p1_1) call takes_char_scalar(p0_1(5)%p) - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} @@ -292,8 +275,7 @@ subroutine ref_scalar_def_char_p(p0_0, p1_0, p0_1, p1_1) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK-DAG: %[[len:.*]] = fir.box_elesize %[[box]] ! CHECK-DAG: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} @@ -312,45 +294,37 @@ subroutine ref_scalar_derived(p0_0, p1_0, p0_1, p1_1) type(derived_p0) :: p0_0, p0_1(100) type(derived_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(p0_0%p%x) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[box]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(p0_1(5)%p%x) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 ! CHECK: %[[index:.*]] = arith.subi %c7{{.*}}, %[[lb]] ! CHECK: %[[elem:.*]] = fir.coordinate_of %[[box]], %[[index]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(p1_0%p(7)%x) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[dims]]#0 : (index) -> i64 ! CHECK: %[[index:.*]] = arith.subi %c7{{.*}}, %[[lb]] ! CHECK: %[[elem:.*]] = fir.coordinate_of %[[box]], %[[index]] - ! CHECK: %[[fldx:.*]] = fir.field_index x - ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], %[[fldx]] + ! CHECK: %[[addr:.*]] = fir.coordinate_of %[[elem]], x ! CHECK: fir.call @_QPtakes_real_scalar(%[[addr]]) call takes_real_scalar(p1_1(5)%p(7)%x) @@ -365,25 +339,21 @@ subroutine ref_scalar_derived(p0_0, p1_0, p0_1, p1_1) subroutine pass_real_p(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(real_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.call @_QPtakes_real_scalar_pointer(%[[coor]]) call takes_real_scalar_pointer(p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.call @_QPtakes_real_scalar_pointer(%[[coor]]) call takes_real_scalar_pointer(p0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.call @_QPtakes_real_array_pointer(%[[coor]]) call takes_real_array_pointer(p1_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.call @_QPtakes_real_array_pointer(%[[coor]]) call takes_real_array_pointer(p1_1(5)%p) end subroutine @@ -397,28 +367,24 @@ subroutine pass_real_p(p0_0, p1_0, p0_1, p1_1) subroutine associated_p(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(def_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(associated(p0_0%p)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(associated(p0_1(5)%p)) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(associated(p1_0%p)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: %[[box:.*]] = fir.load %[[coor]] ! CHECK: fir.box_addr %[[box]] call takes_logical(associated(p1_1(5)%p)) @@ -433,25 +399,21 @@ subroutine associated_p(p0_0, p1_0, p0_1, p1_1) subroutine passoc_real(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(real_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p0_0%p => real_target ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p0_1(5)%p => real_target - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p1_0%p => real_array_target ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p1_1(5)%p => real_array_target end subroutine @@ -461,25 +423,21 @@ subroutine passoc_real(p0_0, p1_0, p0_1, p1_1) subroutine passoc_char(p0_0, p1_0, p0_1, p1_1) type(cst_char_p0) :: p0_0, p0_1(100) type(def_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p0_0%p => char_target ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p0_1(5)%p => char_target - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p1_0%p => char_array_target ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] p1_1(5)%p => char_array_target end subroutine @@ -493,25 +451,21 @@ subroutine passoc_char(p0_0, p1_0, p0_1, p1_1) subroutine nullify_test(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(def_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] nullify(p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] nullify(p0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] nullify(p1_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] nullify(p1_1(5)%p) end subroutine @@ -525,25 +479,21 @@ subroutine nullify_test(p0_0, p1_0, p0_1, p1_1) subroutine allocate_real(p0_0, p1_0, p0_1, p1_1) type(real_p0) :: p0_0, p0_1(100) type(real_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p1_0%p(100)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p1_1(5)%p(100)) end subroutine @@ -553,25 +503,21 @@ subroutine allocate_real(p0_0, p1_0, p0_1, p1_1) subroutine allocate_cst_char(p0_0, p1_0, p0_1, p1_1) type(cst_char_p0) :: p0_0, p0_1(100) type(cst_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p1_0%p(100)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(p1_1(5)%p(100)) end subroutine @@ -581,25 +527,21 @@ subroutine allocate_cst_char(p0_0, p1_0, p0_1, p1_1) subroutine allocate_def_char(p0_0, p1_0, p0_1, p1_1) type(def_char_p0) :: p0_0, p0_1(100) type(def_char_p1) :: p1_0, p1_1(100) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p0_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::p0_0%p) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p0_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::p0_1(5)%p) - ! CHECK: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[p1_0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::p1_0%p(100)) ! CHECK-DAG: %[[coor0:.*]] = fir.coordinate_of %[[p1_1]], %{{.*}} - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], %[[fld]] + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[coor0]], p ! CHECK: fir.store {{.*}} to %[[coor]] allocate(character(18)::p1_1(5)%p(100)) end subroutine @@ -617,8 +559,7 @@ subroutine deallocate_real(p0_0, p1_0, p0_1, p1_1) ! CHECK: %[[VAL_0:.*]] = fir.absent !fir.box ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QQclX{{.*}}) : !fir.ref> ! CHECK: %[[LINE_0:.*]] = arith.constant {{.*}} : i32 - ! CHECK: %[[VAL_2:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p0{p:!fir.box>}> - ! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %arg0, %[[VAL_2]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %arg0, p : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (!fir.ref>>) -> !fir.ref> ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.call @_FortranAPointerDeallocate(%[[VAL_4]], %false, %[[VAL_0]], %[[VAL_5]], %[[LINE_0]]) fastmath : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -632,8 +573,7 @@ subroutine deallocate_real(p0_0, p1_0, p0_1, p1_1) ! CHECK: %[[CON_1:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_9:.*]] = arith.subi %[[CON_5]], %[[CON_1]] : i64 ! CHECK: %[[VAL_10:.*]] = fir.coordinate_of %arg2, %[[VAL_9:.*]] : (!fir.ref>}>>>, i64) -> !fir.ref>}>> - ! CHECK: %[[VAL_11:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p0{p:!fir.box>}> - ! CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_10]], %[[VAL_11]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_10]], p : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (!fir.ref>>) -> !fir.ref> ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_8]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_15:.*]] = fir.call @_FortranAPointerDeallocate(%[[VAL_13]], %false_0, %[[VAL_7]], %[[VAL_14]], %[[LINE_1]]) fastmath : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -643,8 +583,7 @@ subroutine deallocate_real(p0_0, p1_0, p0_1, p1_1) ! CHECK: %[[VAL_16:.*]] = fir.absent !fir.box ! CHECK: %[[VAL_17:.*]] = fir.address_of(@_QQclX{{.*}}) : !fir.ref> ! CHECK: %[[LINE_2:.*]] = arith.constant {{.*}} : i32 - ! CHECK: %[[VAL_18:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> - ! CHECK: %[[VAL_19:.*]] = fir.coordinate_of %arg1, %[[VAL_18]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[VAL_19:.*]] = fir.coordinate_of %arg1, p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (!fir.ref>>>) -> !fir.ref> ! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_17]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_22:.*]] = fir.call @_FortranAPointerDeallocate(%[[VAL_20]], %false_1, %[[VAL_16]], %[[VAL_21]], %[[LINE_2]]) fastmath : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -658,8 +597,7 @@ subroutine deallocate_real(p0_0, p1_0, p0_1, p1_1) ! CHECK: %[[CON_1A:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_25:.*]] = arith.subi %[[CON_5A]], %[[CON_1A]] : i64 ! CHECK: %[[VAL_26:.*]] = fir.coordinate_of %arg3, %[[VAL_25]] : (!fir.ref>>}>>>, i64) -> !fir.ref>>}>> - ! CHECK: %[[VAL_27:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> - ! CHECK: %[[VAL_28:.*]] = fir.coordinate_of %[[VAL_26]], %[[VAL_27]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[VAL_28:.*]] = fir.coordinate_of %[[VAL_26]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (!fir.ref>>>) -> !fir.ref> ! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_24]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_31:.*]] = fir.call @_FortranAPointerDeallocate(%[[VAL_29]], %false_2, %[[VAL_23]], %[[VAL_30]], %[[LINE_3]]) fastmath : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -694,24 +632,18 @@ subroutine very_long(x) type(t5) :: x(:, :, :, :, :) ! CHECK: %[[coor0:.*]] = fir.coordinate_of %[[x]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.}} - ! CHECK-DAG: %[[flda:.*]] = fir.field_index a - ! CHECK-DAG: %[[fldb:.*]] = fir.field_index b - ! CHECK: %[[coor1:.*]] = fir.coordinate_of %[[coor0]], %[[flda]], %[[fldb]] + ! CHECK: %[[coor1:.*]] = fir.coordinate_of %[[coor0]], a, b ! CHECK: %[[b_box:.*]] = fir.load %[[coor1]] - ! CHECK-DAG: %[[fldc:.*]] = fir.field_index c - ! CHECK-DAG: %[[fldd:.*]] = fir.field_index d - ! CHECK: %[[coor2:.*]] = fir.coordinate_of %[[b_box]], %[[fldc]], %[[fldd]] + ! CHECK: %[[coor2:.*]] = fir.coordinate_of %[[b_box]], c, d ! CHECK: %[[index:.*]] = arith.subi %c6{{.*}}, %c1{{.*}} : i64 ! CHECK: %[[coor3:.*]] = fir.coordinate_of %[[coor2]], %[[index]] - ! CHECK: %[[flde:.*]] = fir.field_index e - ! CHECK: %[[coor4:.*]] = fir.coordinate_of %[[coor3]], %[[flde]] + ! CHECK: %[[coor4:.*]] = fir.coordinate_of %[[coor3]], e ! CHECK: %[[e_box:.*]] = fir.load %[[coor4]] ! CHECK: %[[edims:.*]]:3 = fir.box_dims %[[e_box]], %c0{{.*}} ! CHECK: %[[lb:.*]] = fir.convert %[[edims]]#0 : (index) -> i64 ! CHECK: %[[index2:.*]] = arith.subi %c7{{.*}}, %[[lb]] ! CHECK: %[[coor5:.*]] = fir.coordinate_of %[[e_box]], %[[index2]] - ! CHECK: %[[fldf:.*]] = fir.field_index f - ! CHECK: %[[coor6:.*]] = fir.coordinate_of %[[coor5]], %[[fldf:.*]] + ! CHECK: %[[coor6:.*]] = fir.coordinate_of %[[coor5]], f ! CHECK: fir.load %[[coor6]] : !fir.ref print *, x(1,2,3,4,5)%a%b%c%d(6)%e(7)%f end subroutine @@ -729,17 +661,13 @@ subroutine test_recursive(x) end type type(t) :: x - ! CHECK: %[[fldNext1:.*]] = fir.field_index next - ! CHECK: %[[next1:.*]] = fir.coordinate_of %[[x]], %[[fldNext1]] + ! CHECK: %[[next1:.*]] = fir.coordinate_of %[[x]], next ! CHECK: %[[nextBox1:.*]] = fir.load %[[next1]] - ! CHECK: %[[fldNext2:.*]] = fir.field_index next - ! CHECK: %[[next2:.*]] = fir.coordinate_of %[[nextBox1]], %[[fldNext2]] + ! CHECK: %[[next2:.*]] = fir.coordinate_of %[[nextBox1]], next ! CHECK: %[[nextBox2:.*]] = fir.load %[[next2]] - ! CHECK: %[[fldNext3:.*]] = fir.field_index next - ! CHECK: %[[next3:.*]] = fir.coordinate_of %[[nextBox2]], %[[fldNext3]] + ! CHECK: %[[next3:.*]] = fir.coordinate_of %[[nextBox2]], next ! CHECK: %[[nextBox3:.*]] = fir.load %[[next3]] - ! CHECK: %[[fldi:.*]] = fir.field_index i - ! CHECK: %[[i:.*]] = fir.coordinate_of %[[nextBox3]], %[[fldi]] + ! CHECK: %[[i:.*]] = fir.coordinate_of %[[nextBox3]], i ! CHECK: %[[nextBox3:.*]] = fir.load %[[i]] : !fir.ref print *, x%next%next%next%i end subroutine @@ -754,7 +682,6 @@ module pinit use pcomp ! CHECK-LABEL: fir.global {{.*}}@_QMpinitEarp0 ! CHECK-DAG: %[[undef:.*]] = fir.undefined - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p ! CHECK-DAG: %[[target:.*]] = fir.address_of(@_QMpcompEreal_target) ! CHECK: %[[box:.*]] = fir.embox %[[target]] : (!fir.ref) -> !fir.box ! CHECK: %[[rebox:.*]] = fir.rebox %[[box]] : (!fir.box) -> !fir.box> @@ -764,7 +691,6 @@ module pinit ! CHECK-LABEL: fir.global @_QMpinitEbrp1 : !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> { ! CHECK: %[[VAL_0:.*]] = fir.undefined !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> -! CHECK: %[[VAL_1:.*]] = fir.field_index p, !fir.type<_QMpcompTreal_p1{p:!fir.box>>}> ! CHECK: %[[VAL_2:.*]] = fir.address_of(@_QMpcompEreal_array_target) : !fir.ref> ! CHECK: %[[VAL_3:.*]] = arith.constant 100 : index ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : index @@ -792,7 +718,6 @@ module pinit ! CHECK-LABEL: fir.global {{.*}}@_QMpinitEccp0 ! CHECK-DAG: %[[undef:.*]] = fir.undefined - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p ! CHECK-DAG: %[[target:.*]] = fir.address_of(@_QMpcompEchar_target) ! CHECK: %[[box:.*]] = fir.embox %[[target]] : (!fir.ref>) -> !fir.box> ! CHECK: %[[rebox:.*]] = fir.rebox %[[box]] : (!fir.box>) -> !fir.box>> @@ -802,7 +727,6 @@ module pinit ! CHECK-LABEL: fir.global {{.*}}@_QMpinitEdcp1 ! CHECK-DAG: %[[undef:.*]] = fir.undefined - ! CHECK-DAG: %[[fld:.*]] = fir.field_index p ! CHECK-DAG: %[[target:.*]] = fir.address_of(@_QMpcompEchar_array_target) ! CHECK-DAG: %[[shape:.*]] = fir.shape %c100{{.*}} ! CHECK-DAG: %[[box:.*]] = fir.embox %[[target]](%[[shape]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> diff --git a/flang/test/Lower/derived-type-finalization.f90 b/flang/test/Lower/derived-type-finalization.f90 index b38fcd8ba5766..66735a9056544 100644 --- a/flang/test/Lower/derived-type-finalization.f90 +++ b/flang/test/Lower/derived-type-finalization.f90 @@ -132,8 +132,7 @@ subroutine test_end_finalization2(a) ! CHECK: cf.br ^bb3 ! CHECK: ^bb2: ! CHECK: %[[C10:.*]] = arith.constant 10 : i32 -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMderived_type_finalizationTt1{a:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[T]], %[[FIELD_A]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[T]], a : (!fir.ref>) -> !fir.ref ! CHECK: fir.store %[[C10]] to %[[COORD_A]] : !fir.ref ! CHECK: cf.br ^bb3 ! CHECK: ^bb3: diff --git a/flang/test/Lower/derived-types.f90 b/flang/test/Lower/derived-types.f90 index 901eb8eca8c6e..4d36a7632b070 100644 --- a/flang/test/Lower/derived-types.f90 +++ b/flang/test/Lower/derived-types.f90 @@ -55,8 +55,7 @@ subroutine saved_derived() subroutine scalar_numeric_ref() ! CHECK: %[[alloc:.*]] = fir.alloca !fir.type<_QMdTr{x:f32}> type(r) :: some_r - ! CHECK: %[[field:.*]] = fir.field_index x, !fir.type<_QMdTr{x:f32}> - ! CHECK: fir.coordinate_of %[[alloc]], %[[field]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: fir.coordinate_of %[[alloc]], x : (!fir.ref>) -> !fir.ref call real_bar(some_r%x) end subroutine @@ -64,8 +63,7 @@ subroutine scalar_numeric_ref() subroutine scalar_character_ref() ! CHECK: %[[alloc:.*]] = fir.alloca !fir.type<_QMdTc{ch:!fir.char<1,10>}> type(c) :: some_c - ! CHECK: %[[field:.*]] = fir.field_index ch, !fir.type<_QMdTc{ch:!fir.char<1,10>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[alloc]], %[[field]] : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[alloc]], ch : (!fir.ref}>>) -> !fir.ref> ! CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index ! CHECK: fir.emboxchar %[[coor]], %c10 : (!fir.ref>, index) -> !fir.boxchar<1> call char_bar(some_c%ch) @@ -78,8 +76,7 @@ subroutine scalar_character_ref() subroutine array_comp_elt_ref() type(r2) :: some_r2 ! CHECK: %[[alloc:.*]] = fir.alloca !fir.type<_QMdTr2{x_array:!fir.array<10x20xf32>}> - ! CHECK: %[[field:.*]] = fir.field_index x_array, !fir.type<_QMdTr2{x_array:!fir.array<10x20xf32>}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[alloc]], %[[field]] : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[alloc]], x_array : (!fir.ref}>>) -> !fir.ref> ! CHECK-DAG: %[[index1:.*]] = arith.subi %c5{{.*}}, %c1{{.*}} : i64 ! CHECK-DAG: %[[index2:.*]] = arith.subi %c6{{.*}}, %c1{{.*}} : i64 ! CHECK: fir.coordinate_of %[[coor]], %[[index1]], %[[index2]] : (!fir.ref>, i64, i64) -> !fir.ref @@ -90,7 +87,7 @@ subroutine array_comp_elt_ref() ! CHECK-LABEL: func @_QMdPchar_array_comp_elt_ref( subroutine char_array_comp_elt_ref() type(c2) :: some_c2 - ! CHECK: %[[coor:.*]] = fir.coordinate_of %{{.*}}, %{{.*}} : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[coor:.*]] = fir.coordinate_of %{{.*}}, ch_array : (!fir.ref>}>>) -> !fir.ref>> ! CHECK-DAG: %[[index1:.*]] = arith.subi %c5{{.*}}, %c1{{.*}} : i64 ! CHECK-DAG: %[[index2:.*]] = arith.subi %c6{{.*}}, %c1{{.*}} : i64 ! CHECK: fir.coordinate_of %[[coor]], %[[index1]], %[[index2]] : (!fir.ref>>, i64, i64) -> !fir.ref> @@ -104,8 +101,7 @@ subroutine array_elt_comp_ref() ! CHECK: %[[alloca:.*]] = fir.alloca !fir.array<100x!fir.type<_QMdTr{x:f32}>> ! CHECK: %[[index:.*]] = arith.subi %c5{{.*}}, %c1{{.*}} : i64 ! CHECK: %[[elt:.*]] = fir.coordinate_of %[[alloca]], %[[index]] : (!fir.ref>>, i64) -> !fir.ref> - ! CHECK: %[[field:.*]] = fir.field_index x, !fir.type<_QMdTr{x:f32}> - ! CHECK: fir.coordinate_of %[[elt]], %[[field]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: fir.coordinate_of %[[elt]], x : (!fir.ref>) -> !fir.ref call real_bar(some_r_array(5)%x) end subroutine @@ -113,7 +109,7 @@ subroutine array_elt_comp_ref() subroutine char_array_elt_comp_ref() type(c) :: some_c_array(100) ! CHECK: fir.coordinate_of %{{.*}}, %{{.*}} : (!fir.ref}>>>, i64) -> !fir.ref}>> - ! CHECK: fir.coordinate_of %{{.*}}, %{{.*}} : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: fir.coordinate_of %{{.*}}, ch : (!fir.ref}>>) -> !fir.ref> ! CHECK: fir.emboxchar %{{.*}}, %c10{{.*}} : (!fir.ref>, index) -> !fir.boxchar<1> call char_bar(some_c_array(5)%ch) end subroutine @@ -130,8 +126,7 @@ subroutine char_array_elt_comp_ref() ! CHECK-SAME: %[[arg0:.*]]: !fir.ref> real function scalar_numeric_load(some_r) type(r) :: some_r - ! CHECK: %[[field:.*]] = fir.field_index x, !fir.type<_QMdTr{x:f32}> - ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg0]], %[[field]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[arg0]], x : (!fir.ref>) -> !fir.ref ! CHECK: fir.load %[[coor]] scalar_numeric_load = some_r%x end function diff --git a/flang/test/Lower/equivalence-1.f90 b/flang/test/Lower/equivalence-1.f90 index aec5c0f54c190..133accd38c2f7 100644 --- a/flang/test/Lower/equivalence-1.f90 +++ b/flang/test/Lower/equivalence-1.f90 @@ -45,8 +45,7 @@ SUBROUTINE s3 ! CHECK: %[[coor:.*]] = fir.coordinate_of %[[group]], %c0 : (!fir.ref>, index) -> !fir.ref ! CHECK: %[[rloc:.*]] = fir.convert %[[coor]] : (!fir.ref) -> !fir.ptr> ! CHECK: %[[xloc:.*]] = fir.convert %[[coor]] : (!fir.ref) -> !fir.ptr}>> - ! CHECK: %[[fidx:.*]] = fir.field_index r, !fir.type<_QFs3Tt{r:!fir.array<10xf32>}> - ! CHECK: %[[xrloc:.*]] = fir.coordinate_of %[[xloc]], %[[fidx]] : + ! CHECK: %[[xrloc:.*]] = fir.coordinate_of %[[xloc]], r ! CHECK: %[[v1loc:.*]] = fir.coordinate_of %[[xrloc]], %c8_i64 : (!fir.ref>, i64) -> !fir.ref ! CHECK: fir.store %{{.*}} to %[[v1loc]] : !fir.ref x%r(9) = 9.0 diff --git a/flang/test/Lower/forall/array-pointer.f90 b/flang/test/Lower/forall/array-pointer.f90 index 1e8f7a6a55002..fd3efed736c39 100644 --- a/flang/test/Lower/forall/array-pointer.f90 +++ b/flang/test/Lower/forall/array-pointer.f90 @@ -407,10 +407,9 @@ end subroutine s3 ! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i64) -> index ! CHECK: %[[VAL_24:.*]] = arith.subi %[[VAL_23]], %[[VAL_20]] : index ! CHECK: %[[VAL_25:.*]] = fir.field_index ip, !fir.type<_QMarray_of_pointer_testTtu{ip:!fir.box>>}> -! CHECK: %[[VAL_26:.*]] = fir.field_index v, !fir.type<_QMarray_of_pointer_testTu{v:i32}> ! CHECK: %[[VAL_27:.*]] = fir.array_access %[[VAL_12]], %[[VAL_24]], %[[VAL_25]] : (!fir.array>>}>>, index, !fir.field) -> !fir.ref>>> ! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref>>> -! CHECK: %[[VAL_29:.*]] = fir.coordinate_of %[[VAL_28]], %[[VAL_26]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.coordinate_of %[[VAL_28]], v : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[VAL_19]] to %[[VAL_29]] : !fir.ref ! CHECK: %[[VAL_30:.*]] = fir.array_amend %[[VAL_12]], %[[VAL_27]] : (!fir.array>>}>>, !fir.ref>>>) -> !fir.array>>}>> ! CHECK: fir.result %[[VAL_30]] : !fir.array>>}>> @@ -456,10 +455,9 @@ end subroutine s3_1 ! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i64) -> index ! CHECK: %[[VAL_24:.*]] = arith.subi %[[VAL_23]], %[[VAL_20]] : index ! CHECK: %[[VAL_25:.*]] = fir.field_index ip, !fir.type<_QMarray_of_pointer_testTtu{ip:!fir.box>>}> -! CHECK: %[[VAL_26:.*]] = fir.field_index v, !fir.type<_QMarray_of_pointer_testTu{v:i32}> ! CHECK: %[[VAL_27:.*]] = fir.array_access %[[VAL_12]], %[[VAL_24]], %[[VAL_25]] : (!fir.array>>}>>, index, !fir.field) -> !fir.ref>>> ! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref>>> -! CHECK: %[[VAL_29:.*]] = fir.coordinate_of %[[VAL_28]], %[[VAL_26]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.coordinate_of %[[VAL_28]], v : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[VAL_19]] to %[[VAL_29]] : !fir.ref ! CHECK: %[[VAL_30:.*]] = fir.array_amend %[[VAL_12]], %[[VAL_27]] : (!fir.array>>}>>, !fir.ref>>>) -> !fir.array>>}>> ! CHECK: fir.result %[[VAL_30]] : !fir.array>>}>> @@ -605,14 +603,12 @@ end subroutine s6 ! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i32) -> i64 ! CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i64) -> index ! CHECK: %[[VAL_40:.*]] = arith.subi %[[VAL_39]], %[[VAL_31]] : index -! CHECK: %[[VAL_41:.*]] = fir.field_index ip, !fir.type<_QMarray_of_pointer_testTtu{ip:!fir.box>>}> -! CHECK: %[[VAL_42:.*]] = fir.field_index v, !fir.type<_QMarray_of_pointer_testTu{v:i32}> ! CHECK: %[[VAL_43:.*]] = fir.array_access %[[VAL_23]], %[[VAL_35]], %[[VAL_36]] : (!fir.array>>}>>>>}>>, index, !fir.field) -> !fir.ref>>}>>>>> ! CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_43]] : !fir.ref>>}>>>>> ! CHECK: %[[VAL_45:.*]] = fir.coordinate_of %[[VAL_44]], %[[VAL_40]] : (!fir.box>>}>>>>, index) -> !fir.ref>>}>> -! CHECK: %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_45]], %[[VAL_41]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_45]], ip : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref>>> -! CHECK: %[[VAL_48:.*]] = fir.coordinate_of %[[VAL_47]], %[[VAL_42]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_48:.*]] = fir.coordinate_of %[[VAL_47]], v : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[VAL_30]] to %[[VAL_48]] : !fir.ref ! CHECK: %[[VAL_49:.*]] = fir.array_amend %[[VAL_23]], %[[VAL_43]] : (!fir.array>>}>>>>}>>, !fir.ref>>}>>>>>) -> !fir.array>>}>>>>}>> ! CHECK: fir.result %[[VAL_49]] : !fir.array>>}>>>>}>> @@ -658,8 +654,7 @@ end subroutine s7 ! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64 ! CHECK: %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.box>}>>>, i64) -> !fir.ref>}>> -! CHECK: %[[VAL_25:.*]] = fir.field_index ip, !fir.type<_QMarray_of_pointer_testTt{ip:!fir.box>}> -! CHECK: %[[VAL_26:.*]] = fir.coordinate_of %[[VAL_24]], %[[VAL_25]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> +! CHECK: %[[VAL_26:.*]] = fir.coordinate_of %[[VAL_24]], ip : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref>> ! CHECK: %[[VAL_28:.*]] = fir.box_addr %[[VAL_27]] : (!fir.box>) -> !fir.ptr ! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ptr diff --git a/flang/test/Lower/forall/forall-allocatable-2.f90 b/flang/test/Lower/forall/forall-allocatable-2.f90 index f7c46acf87275..67a0018f9a22b 100644 --- a/flang/test/Lower/forall/forall-allocatable-2.f90 +++ b/flang/test/Lower/forall/forall-allocatable-2.f90 @@ -29,8 +29,7 @@ end subroutine forall_with_allocatable2 ! CHECK: %[[VAL_11:.*]] = arith.constant 15 : i32 ! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index ! CHECK: %[[VAL_13:.*]] = arith.constant 1 : index -! CHECK: %[[VAL_14:.*]] = fir.field_index arr, !fir.type<_QFforall_with_allocatable2Tt{i:i32,arr:!fir.box>>}> -! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_2]], %[[VAL_14]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_2]], arr : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref>>> ! CHECK: %[[VAL_17:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_16]], %[[VAL_17]] : (!fir.box>>, index) -> (index, index, index) diff --git a/flang/test/Lower/forall/forall-where.f90 b/flang/test/Lower/forall/forall-where.f90 index b1dd72fdfb4f2..54ff2bd4c3f16 100644 --- a/flang/test/Lower/forall/forall-where.f90 +++ b/flang/test/Lower/forall/forall-where.f90 @@ -137,8 +137,7 @@ end subroutine test_nested_forall_where ! CHECK: %[[VAL_109:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_110:.*]] = arith.subi %[[VAL_108]], %[[VAL_109]] : i64 ! CHECK: %[[VAL_111:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_106]], %[[VAL_110]] : (!fir.box}>>>, i64, i64) -> !fir.ref}>> -! CHECK: %[[VAL_112:.*]] = fir.field_index data, !fir.type<_QFtest_nested_forall_whereTt{data:!fir.array<100xf32>}> -! CHECK: %[[VAL_113:.*]] = fir.coordinate_of %[[VAL_111]], %[[VAL_112]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_113:.*]] = fir.coordinate_of %[[VAL_111]], data : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[VAL_114:.*]] = arith.constant 100 : index ! CHECK: %[[VAL_115:.*]] = fir.shape %[[VAL_114]] : (index) -> !fir.shape<1> ! CHECK: %[[VAL_116:.*]] = fir.array_load %[[VAL_113]](%[[VAL_115]]) : (!fir.ref>, !fir.shape<1>) -> !fir.array<100xf32> diff --git a/flang/test/Lower/identical-block-merge-disable.f90 b/flang/test/Lower/identical-block-merge-disable.f90 index de74ba529d87f..cc3120a3b6f67 100644 --- a/flang/test/Lower/identical-block-merge-disable.f90 +++ b/flang/test/Lower/identical-block-merge-disable.f90 @@ -55,8 +55,7 @@ END MODULE DMUMPS_SOL_LR ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_11]]#0 : (index) -> i64 ! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64 ! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_10]], %[[VAL_15]] : (!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>>>, i64) -> !fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>> -! CHECK: %[[VAL_17:.*]] = fir.field_index panels_l, !fir.type<_QMdmumps_sol_lrTblr_struc_t{panels_l:!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}> -! CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_16]], %[[VAL_17]] : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_16]], panels_l : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref>>> ! CHECK: %[[VAL_20:.*]] = fir.box_addr %[[VAL_19]] : (!fir.box>>) -> !fir.ptr> ! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (!fir.ptr>) -> i64 @@ -70,7 +69,7 @@ END MODULE DMUMPS_SOL_LR ! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_24]]#0 : (index) -> i64 ! CHECK: %[[VAL_28:.*]] = arith.subi %[[VAL_26]], %[[VAL_27]] : i64 ! CHECK: %[[VAL_29:.*]] = fir.coordinate_of %[[VAL_23]], %[[VAL_28]] : (!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>>>, i64) -> !fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>> -! CHECK: %[[VAL_30:.*]] = fir.coordinate_of %[[VAL_29]], %[[VAL_17]] : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_30:.*]] = fir.coordinate_of %[[VAL_29]], panels_l : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_30]] : !fir.ref>>> ! CHECK: %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_31]], %[[VAL_3]] : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]]#1 : (index) -> i32 @@ -82,8 +81,7 @@ END MODULE DMUMPS_SOL_LR ! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_35]]#0 : (index) -> i64 ! CHECK: %[[VAL_39:.*]] = arith.subi %[[VAL_37]], %[[VAL_38]] : i64 ! CHECK: %[[VAL_40:.*]] = fir.coordinate_of %[[VAL_34]], %[[VAL_39]] : (!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>>>, i64) -> !fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>> -! CHECK: %[[VAL_41:.*]] = fir.field_index begs_blr_static, !fir.type<_QMdmumps_sol_lrTblr_struc_t{panels_l:!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}> -! CHECK: %[[VAL_42:.*]] = fir.coordinate_of %[[VAL_40]], %[[VAL_41]] : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_42:.*]] = fir.coordinate_of %[[VAL_40]], begs_blr_static : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref>>> ! CHECK: %[[VAL_44:.*]]:3 = fir.box_dims %[[VAL_43]], %[[VAL_3]] : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_44]]#1 : (index) -> i32 @@ -98,8 +96,7 @@ END MODULE DMUMPS_SOL_LR ! CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_48]]#0 : (index) -> i64 ! CHECK: %[[VAL_52:.*]] = arith.subi %[[VAL_50]], %[[VAL_51]] : i64 ! CHECK: %[[VAL_53:.*]] = fir.coordinate_of %[[VAL_47]], %[[VAL_52]] : (!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>>>, i64) -> !fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>> -! CHECK: %[[VAL_54:.*]] = fir.field_index panels_u, !fir.type<_QMdmumps_sol_lrTblr_struc_t{panels_l:!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}> -! CHECK: %[[VAL_55:.*]] = fir.coordinate_of %[[VAL_53]], %[[VAL_54]] : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_55:.*]] = fir.coordinate_of %[[VAL_53]], panels_u : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_56:.*]] = fir.load %[[VAL_55]] : !fir.ref>>> ! CHECK: %[[VAL_57:.*]] = fir.box_addr %[[VAL_56]] : (!fir.box>>) -> !fir.ptr> ! CHECK: %[[VAL_58:.*]] = fir.convert %[[VAL_57]] : (!fir.ptr>) -> i64 @@ -113,7 +110,7 @@ END MODULE DMUMPS_SOL_LR ! CHECK: %[[VAL_64:.*]] = fir.convert %[[VAL_61]]#0 : (index) -> i64 ! CHECK: %[[VAL_65:.*]] = arith.subi %[[VAL_63]], %[[VAL_64]] : i64 ! CHECK: %[[VAL_66:.*]] = fir.coordinate_of %[[VAL_60]], %[[VAL_65]] : (!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>>>, i64) -> !fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>> -! CHECK: %[[VAL_67:.*]] = fir.coordinate_of %[[VAL_66]], %[[VAL_54]] : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_67:.*]] = fir.coordinate_of %[[VAL_66]], panels_u : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_68:.*]] = fir.load %[[VAL_67]] : !fir.ref>>> ! CHECK: %[[VAL_69:.*]]:3 = fir.box_dims %[[VAL_68]], %[[VAL_3]] : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[VAL_70:.*]] = fir.convert %[[VAL_69]]#1 : (index) -> i32 @@ -125,8 +122,7 @@ END MODULE DMUMPS_SOL_LR ! CHECK: %[[VAL_75:.*]] = fir.convert %[[VAL_72]]#0 : (index) -> i64 ! CHECK: %[[VAL_76:.*]] = arith.subi %[[VAL_74]], %[[VAL_75]] : i64 ! CHECK: %[[VAL_77:.*]] = fir.coordinate_of %[[VAL_71]], %[[VAL_76]] : (!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>>>, i64) -> !fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>> -! CHECK: %[[VAL_78:.*]] = fir.field_index begs_blr_static, !fir.type<_QMdmumps_sol_lrTblr_struc_t{panels_l:!fir.box>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}> -! CHECK: %[[VAL_79:.*]] = fir.coordinate_of %[[VAL_77]], %[[VAL_78]] : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[VAL_79:.*]] = fir.coordinate_of %[[VAL_77]], begs_blr_static : (!fir.ref>>,panels_u:!fir.box>>,begs_blr_static:!fir.box>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_80:.*]] = fir.load %[[VAL_79]] : !fir.ref>>> ! CHECK: %[[VAL_81:.*]]:3 = fir.box_dims %[[VAL_80]], %[[VAL_3]] : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[VAL_82:.*]] = fir.convert %[[VAL_81]]#1 : (index) -> i32 diff --git a/flang/test/Lower/io-derived-type.f90 b/flang/test/Lower/io-derived-type.f90 index f96feca77c485..ecbbc22d24b1e 100644 --- a/flang/test/Lower/io-derived-type.f90 +++ b/flang/test/Lower/io-derived-type.f90 @@ -1,4 +1,4 @@ -! RUN: bbc -emit-fir -hlfir=false -o - %s | FileCheck %s +! RUN: bbc -emit-hlfir -o - %s | FileCheck %s module m type t @@ -35,9 +35,6 @@ subroutine wftd(dtv, unit, iotype, v_list, iostat, iomsg) ! CHECK-LABEL: @_QMmPtest1 subroutine test1 import, all - ! CHECK: %[[V_14:[0-9]+]] = fir.field_index n, !fir.type<_QMmTt{n:i32}> - ! CHECK: %[[V_15:[0-9]+]] = fir.coordinate_of %{{.*}}, %[[V_14]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: fir.store %c1{{.*}} to %[[V_15]] : !fir.ref ! CHECK: %[[V_16:[0-9]+]] = fir.embox %{{.*}} : (!fir.ref>) -> !fir.box> ! CHECK: %[[V_17:[0-9]+]] = fir.convert %[[V_16]] : (!fir.box>) -> !fir.box ! CHECK: %[[V_18:[0-9]+]] = fir.address_of(@_QQMmFtest1.nonTbpDefinedIoTable) : !fir.ref, !fir.ref, i32, i1>>>, i1>> @@ -46,9 +43,6 @@ subroutine test1 print *, 'test1 outer, should call wft: ', t(1) block import, only: t - ! CHECK: %[[V_35:[0-9]+]] = fir.field_index n, !fir.type<_QMmTt{n:i32}> - ! CHECK: %[[V_36:[0-9]+]] = fir.coordinate_of %{{.*}}, %[[V_35]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: fir.store %c2{{.*}} to %[[V_36]] : !fir.ref ! CHECK: %[[V_37:[0-9]+]] = fir.embox %{{.*}} : (!fir.ref>) -> !fir.box> ! CHECK: %[[V_38:[0-9]+]] = fir.convert %[[V_37]] : (!fir.box>) -> !fir.box ! CHECK: %[[V_39:[0-9]+]] = fir.address_of(@_QQdefault.nonTbpDefinedIoTable) : !fir.ref, !fir.ref, i32, i1>>>, i1>> @@ -60,9 +54,6 @@ subroutine test1 ! CHECK-LABEL: @_QMmPtest2 subroutine test2 - ! CHECK: %[[V_13:[0-9]+]] = fir.field_index n, !fir.type<_QMmTt{n:i32}> - ! CHECK: %[[V_14:[0-9]+]] = fir.coordinate_of %{{.*}}, %[[V_13]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: fir.store %c3{{.*}} to %[[V_14]] : !fir.ref ! CHECK: %[[V_15:[0-9]+]] = fir.embox %{{.*}} : (!fir.ref>) -> !fir.box> ! CHECK: %[[V_16:[0-9]+]] = fir.convert %[[V_15]] : (!fir.box>) -> !fir.box ! CHECK: %[[V_17:[0-9]+]] = fir.address_of(@_QQdefault.nonTbpDefinedIoTable) : !fir.ref, !fir.ref, i32, i1>>>, i1>> @@ -81,7 +72,7 @@ subroutine test3(p, x) procedure p end interface - ! CHECK: %[[V_3:[0-9]+]] = fir.embox %arg1 : (!fir.ref>) -> !fir.box> + ! CHECK: %[[V_3:[0-9]+]] = fir.embox %{{.*}} : (!fir.ref>) -> !fir.box> ! CHECK: %[[V_4:[0-9]+]] = fir.convert %[[V_3]] : (!fir.box>) -> !fir.box ! CHECK: %[[V_5:[0-9]+]] = fir.alloca !fir.array<1xtuple, !fir.ref, i32, i1>> ! CHECK: %[[V_6:[0-9]+]] = fir.undefined !fir.array<1xtuple, !fir.ref, i32, i1>> @@ -118,9 +109,6 @@ program p ! CHECK: fir.call @_QMmPtest3 call test3(wftd, t(17)) - ! CHECK: %[[V_95:[0-9]+]] = fir.field_index n, !fir.type<_QMmTt{n:i32}> - ! CHECK: %[[V_96:[0-9]+]] = fir.coordinate_of %{{.*}}, %[[V_95]] : (!fir.ref>, !fir.field) -> !fir.ref - ! CHECK: fir.store %c4{{.*}} to %[[V_96]] : !fir.ref ! CHECK: %[[V_97:[0-9]+]] = fir.embox %{{.*}} : (!fir.ref>) -> !fir.box> ! CHECK: %[[V_98:[0-9]+]] = fir.convert %[[V_97]] : (!fir.box>) -> !fir.box ! CHECK: %[[V_99:[0-9]+]] = fir.address_of(@_QQF.nonTbpDefinedIoTable) : !fir.ref, !fir.ref, i32, i1>>>, i1>> diff --git a/flang/test/Lower/parent-component.f90 b/flang/test/Lower/parent-component.f90 index 3cb23f277c9a3..7de20ea044905 100644 --- a/flang/test/Lower/parent-component.f90 +++ b/flang/test/Lower/parent-component.f90 @@ -1,7 +1,7 @@ ! Test different ways of passing the parent component of an extended ! derived-type to a subroutine or the runtime. -! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false %s -o - | FileCheck %s +! RUN: bbc -emit-hlfir %s -o - | FileCheck %s program parent_comp type p @@ -43,35 +43,21 @@ subroutine init_with_slice() print*,y(:)%p end subroutine ! CHECK-LABEL: func.func private @_QFPinit_with_slice() - ! CHECK: %[[Y:.*]] = fir.address_of(@_QFFinit_with_sliceEy) : !fir.ref>> - ! CHECK: %[[C2:.*]] = arith.constant 2 : index - ! CHECK: %[[C1:.*]] = arith.constant 1 : index - ! CHECK: %[[C1_I64:.*]] = arith.constant 1 : i64 - ! CHECK: %[[STRIDE:.*]] = fir.convert %[[C1_I64]] : (i64) -> index - ! CHECK: %[[ADD:.*]] = arith.addi %[[C1]], %[[C2]] : index - ! CHECK: %[[UB:.*]] = arith.subi %[[ADD]], %[[C1]] : index - ! CHECK: %[[SHAPE:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> - ! CHECK: %[[SLICE:.*]] = fir.slice %[[C1]], %[[UB]], %[[STRIDE]] : (index, index, index) -> !fir.slice<1> - ! CHECK: %[[BOX:.*]] = fir.embox %[[Y]](%[[SHAPE]]) [%[[SLICE]]] : (!fir.ref>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box>> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %[[IS_CONTIGOUS:.*]] = fir.call @_FortranAIsContiguous(%[[BOX_NONE]]) {{.*}}: (!fir.box) -> i1 - ! CHECK: %[[TEMP:.*]] = fir.if %[[IS_CONTIGOUS]] -> (!fir.heap>>) { - ! CHECK: } else { - ! CHECK: fir.call @_FortranAAssign - ! CHECK: %[[TEMP_CAST:.*]] = fir.convert %[[TEMP]] : (!fir.heap>>) -> !fir.ref>> - ! CHECK: fir.call @_QFPprint_p(%[[TEMP_CAST]]) {{.*}}: (!fir.ref>>) -> () - - ! CHECK-LABEL: %{{.*}} = fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (i32, !fir.ref, i32) -> !fir.ref - ! CHECK: %[[C1:.*]] = arith.constant 1 : index - ! CHECK: %[[C1_I64:.*]] = arith.constant 1 : i64 - ! CHECK: %[[STRIDE:.*]] = fir.convert %[[C1_I64]] : (i64) -> index - ! CHECK: %[[ADD:.*]] = arith.addi %[[C1]], %[[C2]] : index - ! CHECK: %[[UB:.*]] = arith.subi %[[ADD]], %[[C1]] : index - ! CHECK: %[[SHAPE:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> - ! CHECK: %[[SLICE:.*]] = fir.slice %{{.*}}, %{{.*}}, %{{.*}} : (index, index, index) -> !fir.slice<1> - ! CHECK: %[[BOX:.*]] = fir.embox %[[Y]](%[[SHAPE]]) [%[[SLICE]]] : (!fir.ref>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box>> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %{{.*}} = fir.call @_FortranAioOutputDescriptor(%{{.*}}, %[[BOX_NONE]]) {{.*}}: (!fir.ref, !fir.box) -> i1 + ! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box>>> + ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFFinit_with_sliceEy) : !fir.ref,b:i32}>>> + ! CHECK: %[[VAL_2:.*]] = arith.constant 2 : index + ! CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1> + ! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_3]]) {uniq_name = "_QFFinit_with_sliceEy"} : (!fir.ref,b:i32}>>>, !fir.shape<1>) -> (!fir.ref,b:i32}>>>, !fir.ref,b:i32}>>>) + ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : index + ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : index + ! CHECK: %[[VAL_7:.*]] = arith.constant 2 : index + ! CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1> + ! CHECK: %[[VAL_9:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_5]]:%[[VAL_2]]:%[[VAL_6]]) shape %[[VAL_8]] : (!fir.ref,b:i32}>>>, index, index, index, !fir.shape<1>) -> !fir.ref,b:i32}>>> + ! CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_9]]{"p"} shape %[[VAL_8]] : (!fir.ref,b:i32}>>>, !fir.shape<1>) -> !fir.box>> + ! CHECK: %[[VAL_11:.*]]:2 = hlfir.copy_in %[[VAL_10]] to %[[VAL_0]] : (!fir.box>>, !fir.ref>>>>) -> (!fir.box>>, i1) + ! CHECK: %[[VAL_12:.*]] = fir.box_addr %[[VAL_11]]#0 : (!fir.box>>) -> !fir.ref>> + ! CHECK: fir.call @_QFPprint_p(%[[VAL_12]]) fastmath : (!fir.ref>>) -> () + ! CHECK: hlfir.copy_out %[[VAL_0]], %[[VAL_11]]#1 : (!fir.ref>>>>, i1) -> () subroutine init_no_slice() type(c) :: y(2) = [ c(11, 21), c(12, 22) ] @@ -79,23 +65,16 @@ subroutine init_no_slice() print*,y%p end subroutine ! CHECK-LABEL: func.func private @_QFPinit_no_slice() - ! CHECK: %[[Y:.*]] = fir.address_of(@_QFFinit_no_sliceEy) : !fir.ref>> - ! CHECK: %[[C2:.*]] = arith.constant 2 : index - ! CHECK: %[[SHAPE:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> - ! CHECK: %[[BOX:.*]] = fir.embox %[[Y]](%[[SHAPE]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %[[IS_CONTIGOUS:.*]] = fir.call @_FortranAIsContiguous(%[[BOX_NONE]]) {{.*}}: (!fir.box) -> i1 - ! CHECK: %[[TEMP:.*]] = fir.if %[[IS_CONTIGOUS]] -> (!fir.heap>>) { - ! CHECK: } else { - ! CHECK: fir.call @_FortranAAssign - ! CHECK: %[[TEMP_CAST:.*]] = fir.convert %[[TEMP]] : (!fir.heap>>) -> !fir.ref>> - ! CHECK: fir.call @_QFPprint_p(%[[TEMP_CAST]]) {{.*}}: (!fir.ref>>) -> () - - ! CHECK-LABEL: %{{.*}} = fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (i32, !fir.ref, i32) -> !fir.ref - ! CHECK: %[[SHAPE:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> - ! CHECK: %[[BOX:.*]] = fir.embox %[[Y]](%[[SHAPE]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %{{.*}} = fir.call @_FortranAioOutputDescriptor(%{{.*}}, %[[BOX_NONE]]) {{.*}}: (!fir.ref, !fir.box) -> i1 + ! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box>>> + ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFFinit_no_sliceEy) : !fir.ref,b:i32}>>> + ! CHECK: %[[VAL_2:.*]] = arith.constant 2 : index + ! CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1> + ! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_3]]) {uniq_name = "_QFFinit_no_sliceEy"} : (!fir.ref,b:i32}>>>, !fir.shape<1>) -> (!fir.ref,b:i32}>>>, !fir.ref,b:i32}>>>) + ! CHECK: %[[VAL_5:.*]] = hlfir.designate %[[VAL_4]]#0{"p"} shape %[[VAL_3]] : (!fir.ref,b:i32}>>>, !fir.shape<1>) -> !fir.box>> + ! CHECK: %[[VAL_6:.*]]:2 = hlfir.copy_in %[[VAL_5]] to %[[VAL_0]] : (!fir.box>>, !fir.ref>>>>) -> (!fir.box>>, i1) + ! CHECK: %[[VAL_7:.*]] = fir.box_addr %[[VAL_6]]#0 : (!fir.box>>) -> !fir.ref>> + ! CHECK: fir.call @_QFPprint_p(%[[VAL_7]]) fastmath : (!fir.ref>>) -> () + ! CHECK: hlfir.copy_out %[[VAL_0]], %[[VAL_6]]#1 : (!fir.ref>>>>, i1) -> () subroutine init_allocatable() type(c), allocatable :: y(:) @@ -107,31 +86,19 @@ subroutine init_allocatable() end subroutine ! CHECK-LABEL: func.func private @_QFPinit_allocatable() - ! CHECK: %[[ALLOC:.*]] = fir.alloca !fir.heap>> {uniq_name = "_QFFinit_allocatableEy.addr"} - ! CHECK: %[[LB0:.*]] = fir.alloca index {uniq_name = "_QFFinit_allocatableEy.lb0"} - ! CHECK: %[[EXT0:.*]] = fir.alloca index {uniq_name = "_QFFinit_allocatableEy.ext0"} - ! CHECK-COUNT-6: %{{.*}} = fir.field_index a, !fir.type<_QFTc{a:i32,b:i32}> - ! CHECK: %[[LOAD_LB0:.*]] = fir.load %[[LB0]] : !fir.ref - ! CHECK: %[[LOAD_EXT0:.*]] = fir.load %[[EXT0]] : !fir.ref - ! CHECK: %[[MEM:.*]] = fir.load %[[ALLOC]] : !fir.ref>>> - ! CHECK: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[LOAD_LB0]], %[[LOAD_EXT0]] : (index, index) -> !fir.shapeshift<1> - ! CHECK: %[[BOX:.*]] = fir.embox %[[MEM]](%[[SHAPE_SHIFT]]) : (!fir.heap>>, !fir.shapeshift<1>) -> !fir.box>> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %[[IS_CONTIGOUS:.*]] = fir.call @_FortranAIsContiguous(%[[BOX_NONE]]) {{.*}}: (!fir.box) -> i1 - ! CHECK: %[[TEMP:.*]] = fir.if %[[IS_CONTIGOUS]] -> (!fir.heap>>) { - ! CHECK: } else { - ! CHECK: fir.call @_FortranAAssign - ! CHECK: %[[TEMP_CAST:.*]] = fir.convert %[[TEMP]] : (!fir.heap>>) -> !fir.ref>> - ! CHECK: fir.call @_QFPprint_p(%[[TEMP_CAST]]) {{.*}}: (!fir.ref>>) -> () - - ! CHECK-LABEL: %{{.*}} = fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (i32, !fir.ref, i32) -> !fir.ref - ! CHECK: %[[LOAD_LB0:.*]] = fir.load %[[LB0]] : !fir.ref - ! CHECK: %[[LOAD_EXT0:.*]] = fir.load %[[EXT0]] : !fir.ref - ! CHECK: %[[LOAD_ALLOC:.*]] = fir.load %[[ALLOC]] : !fir.ref>>> - ! CHECK: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[LOAD_LB0]], %[[LOAD_EXT0]] : (index, index) -> !fir.shapeshift<1> - ! CHECK: %[[BOX:.*]] = fir.embox %[[LOAD_ALLOC]](%[[SHAPE_SHIFT]]) : (!fir.heap>>, !fir.shapeshift<1>) -> !fir.box>> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %{{.*}} = fir.call @_FortranAioOutputDescriptor(%{{.*}}, %[[BOX_NONE]]) {{.*}}: (!fir.ref, !fir.box) -> i1 + ! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %{{.*}}_QFFinit_allocatableEy" + ! CHECK: hlfir.assign + ! CHECK: hlfir.assign + ! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref,b:i32}>>>>> + ! CHECK: %[[VAL_31:.*]] = arith.constant 0 : index + ! CHECK: %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_30]], %[[VAL_31]] : (!fir.box,b:i32}>>>>, index) -> (index, index, index) + ! CHECK: %[[VAL_33:.*]] = fir.shape %[[VAL_32]]#1 : (index) -> !fir.shape<1> + ! CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_30]]{"p"} shape %[[VAL_33]] : (!fir.box,b:i32}>>>>, !fir.shape<1>) -> !fir.box>> + ! CHECK: %[[VAL_35:.*]]:2 = hlfir.copy_in %[[VAL_34]] to %[[VAL_0:.*]] : (!fir.box>>, !fir.ref>>>>) -> (!fir.box>>, i1) + ! CHECK: %[[VAL_36:.*]] = fir.box_addr %[[VAL_35]]#0 : (!fir.box>>) -> !fir.ref>> + ! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (!fir.ref>>) -> !fir.ref>> + ! CHECK: fir.call @_QFPprint_p(%[[VAL_37]]) fastmath : (!fir.ref>>) -> () + ! CHECK: hlfir.copy_out %[[VAL_0]], %[[VAL_35]]#1 : (!fir.ref>>>>, i1) -> () subroutine init_scalar() type(c) :: s = c(11, 21) @@ -140,13 +107,10 @@ subroutine init_scalar() end subroutine ! CHECK-LABEL: func.func private @_QFPinit_scalar() - ! CHECK: %[[S:.*]] = fir.address_of(@_QFFinit_scalarEs) : !fir.ref> - ! CHECK: %[[CAST:.*]] = fir.convert %[[S]] : (!fir.ref>) -> !fir.ref> - ! CHECK: fir.call @_QFPprint_scalar(%[[CAST]]) {{.*}}: (!fir.ref>) -> () - - ! CHECK: %[[BOX:.*]] = fir.embox %{{.*}} : (!fir.ref>) -> !fir.box> - ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX]] : (!fir.box>) -> !fir.box - ! CHECK: %{{.*}} = fir.call @_FortranAioOutputDerivedType(%{{.*}}, %[[BOX_NONE]], %{{.*}}) {{.*}}: (!fir.ref, !fir.box, !fir.ref) -> i1 + ! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFFinit_scalarEs) : !fir.ref,b:i32}>> + ! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFFinit_scalarEs"} : (!fir.ref,b:i32}>>) -> (!fir.ref,b:i32}>>, !fir.ref,b:i32}>>) + ! CHECK: %[[VAL_2:.*]] = hlfir.designate %[[VAL_1]]#0{"p"} : (!fir.ref,b:i32}>>) -> !fir.ref> + ! CHECK: fir.call @_QFPprint_scalar(%[[VAL_2]]) fastmath : (!fir.ref>) -> () subroutine init_assumed(y) type(c) :: y(:) @@ -155,12 +119,11 @@ subroutine init_assumed(y) end subroutine ! CHECK-LABEL: func.func private @_QFPinit_assumed( - ! CHECK-SAME: %[[ARG0:.*]]: !fir.box> - ! CHECK: %[[BOX:.*]] = fir.rebox %[[ARG0]] : (!fir.box>>) -> !fir.box>> - - ! CHECK: %[[REBOX:.*]] = fir.rebox %[[ARG0]] : (!fir.box>>) -> !fir.box>> - ! CHECK: %[[REBOX_CAST:.*]] = fir.convert %[[REBOX]] : (!fir.box>>) -> !fir.box - ! CHECK: %{{.*}} = fir.call @_FortranAioOutputDescriptor(%{{.*}}, %[[REBOX_CAST]]) {{.*}}: (!fir.ref, !fir.box) -> i1 + ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}}"_QFFinit_assumedEy" + ! CHECK: %[[VAL_4:.*]] = arith.constant 0 : index + ! CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_3]]#0, %[[VAL_4]] : (!fir.box,b:i32}>>>, index) -> (index, index, index) + ! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]]#1 : (index) -> !fir.shape<1> + ! CHECK: %[[VAL_7:.*]] = hlfir.designate %[[VAL_3]]#0{"p"} shape %[[VAL_6]] : (!fir.box,b:i32}>>>, !fir.shape<1>) -> !fir.box>> subroutine init_existing_field() type(z) :: y(2) @@ -168,13 +131,9 @@ subroutine init_existing_field() end subroutine ! CHECK-LABEL: func.func private @_QFPinit_existing_field - ! CHECK: %[[C2:.*]] = arith.constant 2 : index - ! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<2x!fir.type<_QFTz{k:i32,c:!fir.type<_QFTc{a:i32,b:i32}>}>> {bindc_name = "y", uniq_name = "_QFFinit_existing_fieldEy"} - ! CHECK: %[[FIELD_C:.*]] = fir.field_index c, !fir.type<_QFTz{k:i32,c:!fir.type<_QFTc{a:i32,b:i32}>}> - ! CHECK: %[[SHAPE:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1> - ! CHECK: %[[C1:.*]] = arith.constant 1 : index - ! CHECK: %[[SLICE:.*]] = fir.slice %[[C1]], %[[C2]], %[[C1]] path %[[FIELD_C]] : (index, index, index, !fir.field) -> !fir.slice<1> - ! CHECK: %{{.*}} = fir.embox %[[ALLOCA]](%[[SHAPE]]) [%[[SLICE]]] : (!fir.ref}>>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box>> + ! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %{{.*}}"_QFFinit_existing_fieldEy" + ! CHECK: %[[VAL_5:.*]] = hlfir.designate %[[VAL_4]]#0{"c"} shape %[[VAL_3]] : (!fir.ref,b:i32}>}>>>, !fir.shape<1>) -> !fir.box,b:i32}>>> + ! CHECK: %[[VAL_6:.*]] = hlfir.designate %[[VAL_5]]{"p"} shape %[[VAL_3]] : (!fir.box,b:i32}>>>, !fir.shape<1>) -> !fir.box>> subroutine parent_comp_lhs() type(c) :: a @@ -183,15 +142,10 @@ subroutine parent_comp_lhs() a%p = B end subroutine -! CHECK-LABEL: func.func private @_QFPparent_comp_lhs() -! CHECK: %[[BOX:.*]] = fir.alloca !fir.box> -! CHECK: %[[A:.*]] = fir.alloca !fir.type<_QFTc{a:i32,b:i32}> {bindc_name = "a", uniq_name = "_QFFparent_comp_lhsEa"} -! CHECK: %[[B:.*]] = fir.alloca !fir.type<_QFTp{a:i32}> {bindc_name = "b", uniq_name = "_QFFparent_comp_lhsEb"} -! CHECK: %[[EMBOX_A:.*]] = fir.embox %[[A]] : (!fir.ref>) -> !fir.box> -! CHECK: %[[EMBOX_B:.*]] = fir.embox %[[B]] : (!fir.ref>) -> !fir.box> -! CHECK: fir.store %[[EMBOX_A]] to %[[BOX]] : !fir.ref>> -! CHECK: %[[A_NONE:.*]] = fir.convert %[[BOX]] : (!fir.ref>>) -> !fir.ref> -! CHECK: %[[B_NONE:.*]] = fir.convert %[[EMBOX_B]] : (!fir.box>) -> !fir.box -! CHECK: fir.call @_FortranAAssign(%[[A_NONE]], %[[B_NONE]], %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, !fir.box, !fir.ref, i32) -> () +! CHECK-LABEL: func.func private @_QFPparent_comp_lhs +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %{{.*}}"_QFFparent_comp_lhsEa" +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}}"_QFFparent_comp_lhsEb" +! CHECK: %[[VAL_4:.*]] = hlfir.designate %[[VAL_1]]#0{"p"} : (!fir.ref,b:i32}>>) -> !fir.ref> +! CHECK: hlfir.assign %[[VAL_3]]#0 to %[[VAL_4]] : !fir.ref>, !fir.ref> end diff --git a/flang/test/Lower/pointer-assignments.f90 b/flang/test/Lower/pointer-assignments.f90 index cdf9eac70f450..8f83bf7c4946e 100644 --- a/flang/test/Lower/pointer-assignments.f90 +++ b/flang/test/Lower/pointer-assignments.f90 @@ -76,7 +76,7 @@ subroutine test_pointer_component(temp, temp_ptr) end type mytype type(mytype) :: temp real, pointer :: temp_ptr(:) - ! CHECK: %[[ptr_addr:.*]] = fir.coordinate_of %[[temp]], %{{.*}} : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[ptr_addr:.*]] = fir.coordinate_of %[[temp]], ptr : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[ptr:.*]] = fir.load %[[ptr_addr]] : !fir.ref>>> ! CHECK: %[[dims:.*]]:3 = fir.box_dims %[[ptr]], %{{.*}} : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[shift:.*]] = fir.shift %[[dims]]#0 : (index) -> !fir.shift<1> diff --git a/flang/test/Lower/polymorphic-temp.f90 b/flang/test/Lower/polymorphic-temp.f90 index 5e2937e1f5f65..a9db9ba7b7902 100644 --- a/flang/test/Lower/polymorphic-temp.f90 +++ b/flang/test/Lower/polymorphic-temp.f90 @@ -197,11 +197,9 @@ subroutine test_merge_intrinsic(a, b) ! CHECK-LABEL: func.func @_QMpoly_tmpPtest_merge_intrinsic( ! CHECK-SAME: %[[ARG0:.*]]: !fir.class> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.class> {fir.bindc_name = "b"}) { -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMpoly_tmpTp1{a:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[ARG0]], %[[FIELD_A]] : (!fir.class>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[ARG0]], a : (!fir.class>) -> !fir.ref ! CHECK: %[[LOAD_A1:.*]] = fir.load %[[COORD_A]] : !fir.ref -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMpoly_tmpTp1{a:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[ARG1]], %[[FIELD_A]] : (!fir.class>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[ARG1]], a : (!fir.class>) -> !fir.ref ! CHECK: %[[LOAD_A2:.*]] = fir.load %[[COORD_A]] : !fir.ref ! CHECK: %[[CMPI:.*]] = arith.cmpi sgt, %[[LOAD_A1]], %[[LOAD_A2]] : i32 ! CHECK: %[[SELECT:.*]] = arith.select %[[CMPI]], %[[ARG0]], %[[ARG1]] : !fir.class> diff --git a/flang/test/Lower/polymorphic.f90 b/flang/test/Lower/polymorphic.f90 index a1872e225359f..10793d8a88f42 100644 --- a/flang/test/Lower/polymorphic.f90 +++ b/flang/test/Lower/polymorphic.f90 @@ -104,8 +104,7 @@ subroutine component_access(p) ! CHECK-LABEL: func.func @_QMpolymorphic_testPcomponent_access( ! CHECK-SAME: %[[P:.*]]: !fir.class> {fir.bindc_name = "p"}) { -! CHECK: %[[FIELD:.*]] = fir.field_index a, !fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}> -! CHECK: %[[COORD:.*]] = fir.coordinate_of %[[P]], %[[FIELD]] : (!fir.class>, !fir.field) -> !fir.ref +! CHECK: %[[COORD:.*]] = fir.coordinate_of %[[P]], a : (!fir.class>) -> !fir.ref ! CHECK: %[[LOAD:.*]] = fir.load %[[COORD]] : !fir.ref ! CHECK: %{{.*}} = fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]]) {{.*}}: (!fir.ref, i32) -> i1 @@ -205,8 +204,7 @@ subroutine associate_up_pointer(r) ! CHECK-LABEL: func.func @_QMpolymorphic_testPassociate_up_pointer( ! CHECK-SAME: %[[ARG0:.*]]: !fir.class>>}>> {fir.bindc_name = "r"}) { ! CHECK: %[[P:.*]] = fir.alloca !fir.class>> {bindc_name = "p", uniq_name = "_QMpolymorphic_testFassociate_up_pointerEp"} -! CHECK: %[[FIELD_RP:.*]] = fir.field_index rp, !fir.type<_QMpolymorphic_testTr1{rp:!fir.box>>}> -! CHECK: %[[COORD_RP:.*]] = fir.coordinate_of %[[ARG0]], %[[FIELD_RP]] : (!fir.class>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[COORD_RP:.*]] = fir.coordinate_of %[[ARG0]], rp : (!fir.class>>}>>) -> !fir.ref>>> ! CHECK: %[[LOAD_RP:.*]] = fir.load %[[COORD_RP]] : !fir.ref>>> ! CHECK: %[[REBOX_RP:.*]] = fir.rebox %[[LOAD_RP]](%{{.*}}) : (!fir.box>>, !fir.shift<1>) -> !fir.box> ! CHECK: %[[CONV_P:.*]] = fir.convert %[[P]] : (!fir.ref>>>) -> !fir.ref> @@ -308,8 +306,7 @@ subroutine nullify_pointer_array(a) ! CHECK-LABEL: func.func @_QMpolymorphic_testPnullify_pointer_array( ! CHECK-SAME: %[[ARG0:.*]]: !fir.ref>>>}>> {fir.bindc_name = "a"}) { -! CHECK: %[[FIELD_P:.*]] = fir.field_index p, !fir.type<_QMpolymorphic_testTp3{p:!fir.class>>>}> -! CHECK: %[[COORD_P:.*]] = fir.coordinate_of %[[ARG0]], %[[FIELD_P]] : (!fir.ref>>>}>>, !fir.field) -> !fir.ref>>>}>>>>> +! CHECK: %[[COORD_P:.*]] = fir.coordinate_of %[[ARG0]], p : (!fir.ref>>>}>>) -> !fir.ref>>>}>>>>> ! CHECK: %[[TYPE_DESC:.*]] = fir.type_desc !fir.type<_QMpolymorphic_testTp3{p:!fir.class>>>}> ! CHECK: %[[CONV_P:.*]] = fir.convert %[[COORD_P]] : (!fir.ref>>>}>>>>>) -> !fir.ref> ! CHECK: %[[CONV_TDESC:.*]] = fir.convert %[[TYPE_DESC]] : (!fir.tdesc>>>}>>) -> !fir.ref @@ -524,12 +521,10 @@ subroutine internal ! CHECK: %[[POS_IN_TUPLE:.*]] = arith.constant 0 : i32 ! CHECK: %[[COORD_OF_CLASS:.*]] = fir.coordinate_of %[[TUPLE]], %[[POS_IN_TUPLE]] : (!fir.ref>>>, i32) -> !fir.ref>> ! CHECK: %[[CLASS:.*]] = fir.load %[[COORD_OF_CLASS]] : !fir.ref>> -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[CLASS]], %[[FIELD_A]] : (!fir.class>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[CLASS]], a : (!fir.class>) -> !fir.ref ! CHECK: %[[A:.*]] = fir.load %[[COORD_A]] : !fir.ref ! CHECK: %{{.*}} = fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[A]]) {{.*}} : (!fir.ref, i32) -> i1 -! CHECK: %[[FIELD_B:.*]] = fir.field_index b, !fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}> -! CHECK: %[[COORD_B:.*]] = fir.coordinate_of %[[CLASS]], %[[FIELD_B]] : (!fir.class>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_B:.*]] = fir.coordinate_of %[[CLASS]], b : (!fir.class>) -> !fir.ref ! CHECK: %[[B:.*]] = fir.load %[[COORD_B]] : !fir.ref ! CHECK: %{{.*}} = fir.call @_FortranAioOutputInteger32(%{{.*}}, %[[B]]) {{.*}} : (!fir.ref, i32) -> i1 @@ -1156,8 +1151,7 @@ program test ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[ADDR_O]] : (!fir.ref}>>>>) -> !fir.ref> ! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[O:.*]] = fir.load %[[ADDR_O]] : !fir.ref}>>>> -! CHECK: %[[FIELD_INNER:.*]] = fir.field_index inner, !fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}> -! CHECK: %[[COORD_INNER:.*]] = fir.coordinate_of %[[O]], %[[FIELD_INNER]] : (!fir.box}>>>, !fir.field) -> !fir.ref> +! CHECK: %[[COORD_INNER:.*]] = fir.coordinate_of %[[O]], inner : (!fir.box}>>>) -> !fir.ref> ! CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%arg1 = %{{.*}}) -> (!fir.array<5x!fir.logical<4>>) { ! CHECK: %[[EMBOXED:.*]] = fir.embox %[[COORD_INNER]] : (!fir.ref>) -> !fir.class> ! CHECK: %{{.*}} = fir.call @_QMpolymorphic_testPlt(%{{.*}}, %[[EMBOXED]]) {{.*}} : (!fir.ref, !fir.class>) -> !fir.logical<4> diff --git a/flang/test/Lower/select-type.f90 b/flang/test/Lower/select-type.f90 index e4ff2fef0efd3..64dd639731ab1 100644 --- a/flang/test/Lower/select-type.f90 +++ b/flang/test/Lower/select-type.f90 @@ -63,8 +63,7 @@ subroutine select_type1(a) ! CHECK: ^[[CLASS_IS_P1_BLK]] ! CHECK: ^[[CLASS_IS_P2_BLK]] ! CHECK: %[[P2:.*]] = fir.convert %[[ARG0:.*]] : (!fir.class>) -> !fir.class> -! CHECK: %[[FIELD:.*]] = fir.field_index c, !fir.type<_QMselect_type_lower_testTp2{a:i32,b:i32,c:i32}> -! CHECK: %{{.*}} = fir.coordinate_of %[[P2]], %[[FIELD]] : (!fir.class>, !fir.field) -> !fir.ref +! CHECK: %{{.*}} = fir.coordinate_of %[[P2]], c : (!fir.class>) -> !fir.ref ! CHECK: ^[[DEFAULT_BLOCK]] ! CFG-LABEL: func.func @_QMselect_type_lower_testPselect_type1( @@ -663,21 +662,18 @@ subroutine select_type10(a) ! CHECK: ^bb{{.*}}: ! CHECK: %[[EXACT_BOX:.*]] = fir.convert %[[SELECTOR]] : (!fir.class>>) -> !fir.box>> ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMselect_type_lower_testTp1{a:i32,b:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[EXACT_BOX]], %[[FIELD_A]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[EXACT_BOX]], a : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[C1]] to %[[COORD_A]] : !fir.ref ! CHECK: cf.br ^bb{{.*}} ! CHECK: ^bb{{.*}}: ! CHECK: %[[EXACT_BOX:.*]] = fir.convert %[[SELECTOR]] : (!fir.class>>) -> !fir.box>> ! CHECK: %[[C3:.*]] = arith.constant 3 : i32 -! CHECK: %[[FIELD_C:.*]] = fir.field_index c, !fir.type<_QMselect_type_lower_testTp2{a:i32,b:i32,c:i32}> -! CHECK: %[[COORD_C:.*]] = fir.coordinate_of %[[EXACT_BOX]], %[[FIELD_C]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_C:.*]] = fir.coordinate_of %[[EXACT_BOX]], c : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[C3]] to %[[COORD_C]] : !fir.ref ! CHECK: cf.br ^bb{{.*}} ! CHECK: ^bb{{.*}} ! CHECK: %[[C5:.*]] = arith.constant 5 : i32 -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMselect_type_lower_testTp1{a:i32,b:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[SELECTOR]], %[[FIELD_A]] : (!fir.class>>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[SELECTOR]], a : (!fir.class>>) -> !fir.ref ! CHECK: fir.store %[[C5]] to %[[COORD_A]] : !fir.ref ! CHECK: cf.br ^bb{{.*}} @@ -699,15 +695,13 @@ subroutine select_type11(a) ! CHECK: ^bb{{.*}}: ! CHECK: %[[EXACT_BOX:.*]] = fir.convert %[[SELECTOR]] : (!fir.class>>) -> !fir.box>> ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 -! CHECK: %[[FIELD_A:.*]] = fir.field_index a, !fir.type<_QMselect_type_lower_testTp1{a:i32,b:i32}> -! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[EXACT_BOX]], %[[FIELD_A]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_A:.*]] = fir.coordinate_of %[[EXACT_BOX]], a : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[C1]] to %[[COORD_A]] : !fir.ref ! CHECK: cf.br ^bb{{.*}} ! CHECK: ^bb{{.*}}: ! CHECK: %[[EXACT_BOX:.*]] = fir.convert %[[SELECTOR]] : (!fir.class>>) -> !fir.box>> ! CHECK: %[[C3:.*]] = arith.constant 3 : i32 -! CHECK: %[[FIELD_C:.*]] = fir.field_index c, !fir.type<_QMselect_type_lower_testTp2{a:i32,b:i32,c:i32}> -! CHECK: %[[COORD_C:.*]] = fir.coordinate_of %[[EXACT_BOX]], %[[FIELD_C]] : (!fir.box>>, !fir.field) -> !fir.ref +! CHECK: %[[COORD_C:.*]] = fir.coordinate_of %[[EXACT_BOX]], c : (!fir.box>>) -> !fir.ref ! CHECK: fir.store %[[C3]] to %[[COORD_C]] : !fir.ref ! CHECK: cf.br ^bb{{.*}} diff --git a/flang/test/Lower/structure-constructors.f90 b/flang/test/Lower/structure-constructors.f90 index 86581ce51bf45..171c8eb631f6e 100644 --- a/flang/test/Lower/structure-constructors.f90 +++ b/flang/test/Lower/structure-constructors.f90 @@ -32,8 +32,7 @@ module m_struct_ctor subroutine test_simple(x) real :: x ! CHECK: %[[tmp:.*]] = fir.alloca !fir.type<_QMm_struct_ctorTt_simple{x:f32}> - ! CHECK: %[[field:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_simple{x:f32}> - ! CHECK: %[[xcoor:.*]] = fir.coordinate_of %[[tmp]], %[[field]] : (!fir.ref>, !fir.field) -> !fir.ref + ! CHECK: %[[xcoor:.*]] = fir.coordinate_of %[[tmp]], x : (!fir.ref>) -> !fir.ref ! CHECK: %[[val:.*]] = fir.load %[[x]] : !fir.ref ! CHECK: fir.store %[[val]] to %[[xcoor]] : !fir.ref call print_simple(t_simple(x=x)) @@ -43,13 +42,11 @@ subroutine test_simple(x) ! CHECK-SAME: %[[x:.*]]: !fir.ref{{.*}}) subroutine test_char_scalar(x) ! CHECK: %[[tmp:.*]] = fir.alloca !fir.type<_QMm_struct_ctorTt_char_scalar{x:f32,c:!fir.char<1,3>}> - ! CHECK: %[[xfield:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_char_scalar{x:f32,c:!fir.char<1,3>}> - ! CHECK: %[[xcoor:.*]] = fir.coordinate_of %[[tmp]], %[[xfield]] : (!fir.ref}>>, !fir.field) -> !fir.ref + ! CHECK: %[[xcoor:.*]] = fir.coordinate_of %[[tmp]], x : (!fir.ref}>>) -> !fir.ref ! CHECK: %[[val:.*]] = fir.load %[[x]] : !fir.ref ! CHECK: fir.store %[[val]] to %[[xcoor]] : !fir.ref - ! CHECK: %[[cfield:.*]] = fir.field_index c, !fir.type<_QMm_struct_ctorTt_char_scalar{x:f32,c:!fir.char<1,3>}> - ! CHECK: %[[ccoor:.*]] = fir.coordinate_of %[[tmp]], %[[cfield]] : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: %[[ccoor:.*]] = fir.coordinate_of %[[tmp]], c : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[cst:.*]] = fir.address_of(@_QQ{{.*}}) : !fir.ref> ! CHECK-DAG: %[[ccast:.*]] = fir.convert %[[ccoor]] : (!fir.ref>) -> !fir.ref ! CHECK-DAG: %[[cstcast:.*]] = fir.convert %[[cst]] : (!fir.ref>) -> !fir.ref @@ -65,13 +62,11 @@ subroutine test_simple_array(x, j) integer :: j(5) call print_simple_array(t_array(x=x, i=2*j)) ! CHECK: %[[tmp:.*]] = fir.alloca !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[xfield:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[xcoor:.*]] = fir.coordinate_of %[[tmp]], %[[xfield]] : (!fir.ref}>>, !fir.field) -> !fir.ref + ! CHECK: %[[xcoor:.*]] = fir.coordinate_of %[[tmp]], x : (!fir.ref}>>) -> !fir.ref ! CHECK: %[[val:.*]] = fir.load %[[x]] : !fir.ref ! CHECK: fir.store %[[val]] to %[[xcoor]] : !fir.ref - ! CHECK: %[[ifield:.*]] = fir.field_index i, !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[icoor:.*]] = fir.coordinate_of %[[tmp]], %[[ifield]] : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: %[[icoor:.*]] = fir.coordinate_of %[[tmp]], i : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[iload:.*]] = fir.array_load %[[icoor]](%{{.*}}) : (!fir.ref>, !fir.shape<1>) -> !fir.array<5xi32> ! CHECK: %[[jload:.*]] = fir.array_load %[[j]](%{{.*}}) : (!fir.ref>, !fir.shape<1>) -> !fir.array<5xi32> ! CHECK: %[[loop:.*]] = fir.do_loop %[[idx:.*]] = %c0{{.*}} to %{{.*}} step %c1{{.*}} iter_args(%[[res:.*]] = %[[iload]]) -> (!fir.array<5xi32>) { @@ -90,12 +85,10 @@ subroutine test_char_array(x, c1) ! CHECK: %[[VAL_4:.*]]:2 = fir.unboxchar %[[VAL_1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]]#0 : (!fir.ref>) -> !fir.ref>> ! CHECK: %[[VAL_6:.*]] = arith.constant 5 : index - ! CHECK: %[[VAL_7:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_char_array{x:f32,c:!fir.array<5x!fir.char<1,3>>}> - ! CHECK: %[[VAL_8:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_7]] : (!fir.ref>}>>, !fir.field) -> !fir.ref + ! CHECK: %[[VAL_8:.*]] = fir.coordinate_of %[[VAL_3]], x : (!fir.ref>}>>) -> !fir.ref ! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_0]] : !fir.ref ! CHECK: fir.store %[[VAL_9]] to %[[VAL_8]] : !fir.ref - ! CHECK: %[[VAL_10:.*]] = fir.field_index c, !fir.type<_QMm_struct_ctorTt_char_array{x:f32,c:!fir.array<5x!fir.char<1,3>>}> - ! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_10]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> + ! CHECK: %[[VAL_11:.*]] = fir.coordinate_of %[[VAL_3]], c : (!fir.ref>}>>) -> !fir.ref>> ! CHECK: %[[VAL_12:.*]] = arith.constant 5 : index ! CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1> ! CHECK: %[[VAL_14:.*]] = fir.array_load %[[VAL_11]](%[[VAL_13]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.array<5x!fir.char<1,3>> @@ -132,12 +125,10 @@ subroutine test_char_array(x, c1) ! CHECK-LABEL: func @_QMm_struct_ctorPtest_ptr( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref{{.*}}, %[[VAL_1:.*]]: !fir.box> {{{.*}}, fir.target}) { ! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QMm_struct_ctorTt_ptr{x:f32,p:!fir.box>>}> - ! CHECK: %[[VAL_4:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_ptr{x:f32,p:!fir.box>>}> - ! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_4]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref + ! CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_3]], x : (!fir.ref>>}>>) -> !fir.ref ! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_0]] : !fir.ref ! CHECK: fir.store %[[VAL_6]] to %[[VAL_5]] : !fir.ref - ! CHECK: %[[VAL_7:.*]] = fir.field_index p, !fir.type<_QMm_struct_ctorTt_ptr{x:f32,p:!fir.box>>}> - ! CHECK: %[[VAL_8:.*]] = fir.coordinate_of %[[VAL_3]], %[[VAL_7]] : (!fir.ref>>}>>, !fir.field) -> !fir.ref>>> + ! CHECK: %[[VAL_8:.*]] = fir.coordinate_of %[[VAL_3]], p : (!fir.ref>>}>>) -> !fir.ref>>> ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i64) -> index ! CHECK: %[[VAL_11:.*]] = arith.constant 2 : i64 @@ -170,22 +161,16 @@ subroutine test_nested(x, d) real :: x type(t_array) :: d ! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.type<_QMm_struct_ctorTt_nested{x:f32,dt:!fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}>}> - ! CHECK: %[[VAL_3:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_nested{x:f32,dt:!fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}>}> - ! CHECK: %[[VAL_4:.*]] = fir.coordinate_of %[[VAL_2]], %[[VAL_3]] : (!fir.ref}>}>>, !fir.field) -> !fir.ref + ! CHECK: %[[VAL_4:.*]] = fir.coordinate_of %[[VAL_2]], x : (!fir.ref}>}>>) -> !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_0]] : !fir.ref ! CHECK: fir.store %[[VAL_5]] to %[[VAL_4]] : !fir.ref - ! CHECK: %[[VAL_6:.*]] = fir.field_index dt, !fir.type<_QMm_struct_ctorTt_nested{x:f32,dt:!fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}>}> - ! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_2]], %[[VAL_6]] : (!fir.ref}>}>>, !fir.field) -> !fir.ref}>> - ! CHECK: %[[VAL_8:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_8]] : (!fir.ref}>>, !fir.field) -> !fir.ref - ! CHECK: %[[VAL_8b:.*]] = fir.field_index x, !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[VAL_10:.*]] = fir.coordinate_of %[[VAL_7]], %[[VAL_8b]] : (!fir.ref}>>, !fir.field) -> !fir.ref + ! CHECK: %[[VAL_7:.*]] = fir.coordinate_of %[[VAL_2]], dt : (!fir.ref}>}>>) -> !fir.ref}>> + ! CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_1]], x : (!fir.ref}>>) -> !fir.ref + ! CHECK: %[[VAL_10:.*]] = fir.coordinate_of %[[VAL_7]], x : (!fir.ref}>>) -> !fir.ref ! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref ! CHECK: fir.store %[[VAL_11]] to %[[VAL_10]] : !fir.ref - ! CHECK: %[[VAL_12:.*]] = fir.field_index i, !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[VAL_13:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_12]] : (!fir.ref}>>, !fir.field) -> !fir.ref> - ! CHECK: %[[VAL_12b:.*]] = fir.field_index i, !fir.type<_QMm_struct_ctorTt_array{x:f32,i:!fir.array<5xi32>}> - ! CHECK: %[[VAL_14:.*]] = fir.coordinate_of %[[VAL_7]], %[[VAL_12b]] : (!fir.ref}>>, !fir.field) -> !fir.ref> + ! CHECK: %[[VAL_13:.*]] = fir.coordinate_of %[[VAL_1]], i : (!fir.ref}>>) -> !fir.ref> + ! CHECK: %[[VAL_14:.*]] = fir.coordinate_of %[[VAL_7]], i : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[VAL_15:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : index ! CHECK: %[[VAL_17:.*]] = arith.constant 4 : index @@ -238,12 +223,10 @@ subroutine print_nested(t) ! CHECK-LABEL: func.func @_QPtest_parent_component1() { ! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> ! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QFtest_parent_component1Tmid{x:i32,y:!fir.array<2xi32>,mask:!fir.logical<4>}> -! CHECK: %[[VAL_14:.*]] = fir.field_index x, !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> -! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref}>>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], x : (!fir.ref}>>) -> !fir.ref ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 ! CHECK: fir.store %[[VAL_16]] to %[[VAL_15]] : !fir.ref -! CHECK: %[[VAL_17:.*]] = fir.field_index y, !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> -! CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_17]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_0]], y : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[VAL_19:.*]] = arith.constant 2 : index ! CHECK: %[[VAL_20:.*]] = fir.shape %[[VAL_19]] : (index) -> !fir.shape<1> ! CHECK: %[[VAL_21:.*]] = fir.array_load %[[VAL_18]](%[[VAL_20]]) : (!fir.ref>, !fir.shape<1>) -> !fir.array<2xi32> @@ -260,16 +243,12 @@ subroutine print_nested(t) ! CHECK: } ! CHECK: fir.array_merge_store %[[VAL_21]], %[[VAL_34:.*]] to %[[VAL_18]] : !fir.array<2xi32>, !fir.array<2xi32>, !fir.ref> ! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_1]] : (!fir.ref,mask:!fir.logical<4>}>>) -> !fir.ref}>> -! CHECK: %[[VAL_36:.*]] = fir.field_index x, !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> -! CHECK: %[[VAL_37:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_36]] : (!fir.ref}>>, !fir.field) -> !fir.ref -! CHECK: %[[VAL_38:.*]] = fir.field_index x, !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> -! CHECK: %[[VAL_39:.*]] = fir.coordinate_of %[[VAL_35]], %[[VAL_38]] : (!fir.ref}>>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_37:.*]] = fir.coordinate_of %[[VAL_0]], x : (!fir.ref}>>) -> !fir.ref +! CHECK: %[[VAL_39:.*]] = fir.coordinate_of %[[VAL_35]], x : (!fir.ref}>>) -> !fir.ref ! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_37]] : !fir.ref ! CHECK: fir.store %[[VAL_40]] to %[[VAL_39]] : !fir.ref -! CHECK: %[[VAL_41:.*]] = fir.field_index y, !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> -! CHECK: %[[VAL_42:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_41]] : (!fir.ref}>>, !fir.field) -> !fir.ref> -! CHECK: %[[VAL_43:.*]] = fir.field_index y, !fir.type<_QFtest_parent_component1Tbase{x:i32,y:!fir.array<2xi32>}> -! CHECK: %[[VAL_44:.*]] = fir.coordinate_of %[[VAL_35]], %[[VAL_43]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_42:.*]] = fir.coordinate_of %[[VAL_0]], y : (!fir.ref}>>) -> !fir.ref> +! CHECK: %[[VAL_44:.*]] = fir.coordinate_of %[[VAL_35]], y : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[VAL_45:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_46:.*]] = arith.constant 1 : index ! CHECK: %[[VAL_47:.*]] = arith.constant 1 : index @@ -279,8 +258,7 @@ subroutine print_nested(t) ! CHECK: %[[VAL_51:.*]] = fir.load %[[VAL_50]] : !fir.ref ! CHECK: fir.store %[[VAL_51]] to %[[VAL_49]] : !fir.ref ! CHECK: } -! CHECK: %[[VAL_52:.*]] = fir.field_index mask, !fir.type<_QFtest_parent_component1Tmid{x:i32,y:!fir.array<2xi32>,mask:!fir.logical<4>}> -! CHECK: %[[VAL_53:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_52]] : (!fir.ref,mask:!fir.logical<4>}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_53:.*]] = fir.coordinate_of %[[VAL_1]], mask : (!fir.ref,mask:!fir.logical<4>}>>) -> !fir.ref> ! CHECK: %[[VAL_54:.*]] = arith.constant true ! CHECK: %[[VAL_55:.*]] = fir.convert %[[VAL_54]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[VAL_55]] to %[[VAL_53]] : !fir.ref> @@ -303,10 +281,8 @@ subroutine test_parent_component1() ! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.type<_QFtest_parent_component2Tmid{z:!fir.char<1,5>,mask:!fir.logical<4>}> ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFtest_parent_component2Epv) : !fir.ref}>> ! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_0]] : (!fir.ref,mask:!fir.logical<4>}>>) -> !fir.ref}>> -! CHECK: %[[VAL_9:.*]] = fir.field_index z, !fir.type<_QFtest_parent_component2Tbase{z:!fir.char<1,5>}> -! CHECK: %[[VAL_10:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_9]] : (!fir.ref}>>, !fir.field) -> !fir.ref> -! CHECK: %[[VAL_11:.*]] = fir.field_index z, !fir.type<_QFtest_parent_component2Tbase{z:!fir.char<1,5>}> -! CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_8]], %[[VAL_11]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_10:.*]] = fir.coordinate_of %[[VAL_1]], z : (!fir.ref}>>) -> !fir.ref> +! CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_8]], z : (!fir.ref}>>) -> !fir.ref> ! CHECK: %[[VAL_13:.*]] = arith.constant 5 : index ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i64 ! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_13]] : (index) -> i64 @@ -315,8 +291,7 @@ subroutine test_parent_component1() ! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_12]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_10]] : (!fir.ref>) -> !fir.ref ! CHECK: fir.call @llvm.memmove.p0.p0.i64(%[[VAL_18]], %[[VAL_19]], %[[VAL_16]], %[[VAL_17]]) {{.*}}: (!fir.ref, !fir.ref, i64, i1) -> () -! CHECK: %[[VAL_20:.*]] = fir.field_index mask, !fir.type<_QFtest_parent_component2Tmid{z:!fir.char<1,5>,mask:!fir.logical<4>}> -! CHECK: %[[VAL_21:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_20]] : (!fir.ref,mask:!fir.logical<4>}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_21:.*]] = fir.coordinate_of %[[VAL_0]], mask : (!fir.ref,mask:!fir.logical<4>}>>) -> !fir.ref> ! CHECK: %[[VAL_22:.*]] = arith.constant true ! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[VAL_23]] to %[[VAL_21]] : !fir.ref> @@ -347,8 +322,7 @@ subroutine test_parent_component2() ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>}>>>) -> !fir.ref> ! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_9]] : (!fir.box>}>>>) -> !fir.box ! CHECK: fir.call @_FortranAAssign(%[[VAL_14]], %[[VAL_15]], %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, !fir.box, !fir.ref, i32) -> () -! CHECK: %[[VAL_18:.*]] = fir.field_index mask, !fir.type<_QFtest_parent_component3Tmid{m:!fir.array<2x!fir.char<1,5>>,mask:!fir.logical<4>}> -! CHECK: %[[VAL_19:.*]] = fir.coordinate_of %[[VAL_2]], %[[VAL_18]] : (!fir.ref>,mask:!fir.logical<4>}>>, !fir.field) -> !fir.ref> +! CHECK: %[[VAL_19:.*]] = fir.coordinate_of %[[VAL_2]], mask : (!fir.ref>,mask:!fir.logical<4>}>>) -> !fir.ref> ! CHECK: %[[VAL_20:.*]] = arith.constant true ! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[VAL_21]] to %[[VAL_19]] : !fir.ref> diff --git a/flang/test/Transforms/omp-map-info-finalization-implicit-field.fir b/flang/test/Transforms/omp-map-info-finalization-implicit-field.fir index bcf8b63075dbf..121ee553b51e2 100644 --- a/flang/test/Transforms/omp-map-info-finalization-implicit-field.fir +++ b/flang/test/Transforms/omp-map-info-finalization-implicit-field.fir @@ -28,7 +28,7 @@ func.func @_QQmain() { } // CHECK: %[[RECORD_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = "_QFEdst_record"} -// CHECK: %[[FIELD_COORD:.*]] = fir.coordinate_of %[[RECORD_DECL]]#1, %{{c1.*}} +// CHECK: %[[FIELD_COORD:.*]] = fir.coordinate_of %[[RECORD_DECL]]#1, to_implicitly_map // CHECK: %[[UPPER_BOUND:.*]] = arith.subi %{{.*}}#1, %{{c1.*}} : index From 58b5dc1ddf661092150987dcac2d8e66adf19f66 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 28 Feb 2025 09:54:47 +0100 Subject: [PATCH 067/123] [Coroutines] Mark parameter allocas with coro.outside.frame metadata (#127653) Parameters to a coroutine get copied (moved) to coroutine-local instances which code inside the coroutine then uses. The original parameters should not be part of the frame. Normally CoroSplit figures that out by itself, but for [[clang::trivial_abi]] parameters which, get destructed at the end of the ramp function, it does not (see bug), causing use-after-free's if the frame is destroyed before the end of the ramp (as happens if it doesn't suspend). Since Clang knows these should never be part of the frame, use metadata to make it so. Fixes #127499 --- clang/lib/CodeGen/CGCoroutine.cpp | 14 ++++ clang/test/CodeGenCoroutines/coro-params.cpp | 73 +++++++++++++++++--- 2 files changed, 78 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 058ec01f8ce0e..a9795c2c0dc8f 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -855,6 +855,20 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { // Create parameter copies. We do it before creating a promise, since an // evolution of coroutine TS may allow promise constructor to observe // parameter copies. + for (const ParmVarDecl *Parm : FnArgs) { + // If the original param is in an alloca, exclude it from the coroutine + // frame. The parameter copy will be part of the frame, but the original + // parameter memory should remain on the stack. This is necessary to + // ensure that parameters destroyed in callees, as with `trivial_abi` or + // in the MSVC C++ ABI, are appropriately destroyed after setting up the + // coroutine. + Address ParmAddr = GetAddrOfLocalVar(Parm); + if (auto *ParmAlloca = + dyn_cast(ParmAddr.getBasePointer())) { + ParmAlloca->setMetadata(llvm::LLVMContext::MD_coro_outside_frame, + llvm::MDNode::get(CGM.getLLVMContext(), {})); + } + } for (auto *PM : S.getParamMoves()) { EmitStmt(PM); ParamReplacer.addCopy(cast(PM)); diff --git a/clang/test/CodeGenCoroutines/coro-params.cpp b/clang/test/CodeGenCoroutines/coro-params.cpp index b318f2f52ac09..719726cca29c5 100644 --- a/clang/test/CodeGenCoroutines/coro-params.cpp +++ b/clang/test/CodeGenCoroutines/coro-params.cpp @@ -3,6 +3,7 @@ // Vefifies that parameter copies are used in the body of the coroutine // Verifies that parameter copies are used to construct the promise type, if that type has a matching constructor // RUN: %clang_cc1 -std=c++20 -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -disable-llvm-passes -fexceptions | FileCheck %s +// RUN: %clang_cc1 -std=c++20 -triple=x86_64-pc-win32 -emit-llvm -o - %s -disable-llvm-passes -fexceptions | FileCheck %s --check-prefix=MSABI namespace std { template struct coroutine_traits; @@ -59,13 +60,22 @@ struct MoveAndCopy { ~MoveAndCopy(); }; -void consume(int,int,int) noexcept; +struct [[clang::trivial_abi]] TrivialABI { + int val; + TrivialABI(TrivialABI&&) noexcept; + ~TrivialABI(); +}; + +void consume(int,int,int,int) noexcept; // TODO: Add support for CopyOnly params -// CHECK: define{{.*}} void @_Z1fi8MoveOnly11MoveAndCopy(i32 noundef %val, ptr noundef %[[MoParam:.+]], ptr noundef %[[McParam:.+]]) #0 personality ptr @__gxx_personality_v0 -void f(int val, MoveOnly moParam, MoveAndCopy mcParam) { +// CHECK: define{{.*}} void @_Z1fi8MoveOnly11MoveAndCopy10TrivialABI(i32 noundef %val, ptr noundef %[[MoParam:.+]], ptr noundef %[[McParam:.+]], i32 %[[TrivialParam:.+]]) #0 personality ptr @__gxx_personality_v0 +void f(int val, MoveOnly moParam, MoveAndCopy mcParam, TrivialABI trivialParam) { + // CHECK: %[[TrivialAlloca:.+]] = alloca %struct.TrivialABI, + // CHECK-SAME: !coro.outside.frame // CHECK: %[[MoCopy:.+]] = alloca %struct.MoveOnly, // CHECK: %[[McCopy:.+]] = alloca %struct.MoveAndCopy, + // CHECK: %[[TrivialCopy:.+]] = alloca %struct.TrivialABI, // CHECK: store i32 %val, ptr %[[ValAddr:.+]] // CHECK: call ptr @llvm.coro.begin( @@ -73,25 +83,31 @@ void f(int val, MoveOnly moParam, MoveAndCopy mcParam) { // CHECK-NEXT: call void @llvm.lifetime.start.p0( // CHECK-NEXT: call void @_ZN11MoveAndCopyC1EOS_(ptr {{[^,]*}} %[[McCopy]], ptr noundef nonnull align 4 dereferenceable(4) %[[McParam]]) # // CHECK-NEXT: call void @llvm.lifetime.start.p0( - // CHECK-NEXT: invoke void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeC1Ev( + // CHECK-NEXT: call void @_ZN10TrivialABIC1EOS_(ptr {{[^,]*}} %[[TrivialCopy]], ptr {{[^,]*}} %[[TrivialAlloca]]) + // CHECK-NEXT: call void @llvm.lifetime.start.p0( + // CHECK-NEXT: invoke void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopy10TrivialABIEE12promise_typeC1Ev( // CHECK: call void @_ZN14suspend_always12await_resumeEv( // CHECK: %[[IntParam:.+]] = load i32, ptr %{{.*}} // CHECK: %[[MoGep:.+]] = getelementptr inbounds nuw %struct.MoveOnly, ptr %[[MoCopy]], i32 0, i32 0 // CHECK: %[[MoVal:.+]] = load i32, ptr %[[MoGep]] - // CHECK: %[[McGep:.+]] = getelementptr inbounds nuw %struct.MoveAndCopy, ptr %[[McCopy]], i32 0, i32 0 + // CHECK: %[[McGep:.+]] = getelementptr inbounds nuw %struct.MoveAndCopy, ptr %[[McCopy]], i32 0, i32 0 // CHECK: %[[McVal:.+]] = load i32, ptr %[[McGep]] - // CHECK: call void @_Z7consumeiii(i32 noundef %[[IntParam]], i32 noundef %[[MoVal]], i32 noundef %[[McVal]]) + // CHECK: %[[TrivialGep:.+]] = getelementptr inbounds nuw %struct.TrivialABI, ptr %[[TrivialCopy]], i32 0, i32 0 + // CHECK: %[[TrivialVal:.+]] = load i32, ptr %[[TrivialGep]] + // CHECK: call void @_Z7consumeiiii(i32 noundef %[[IntParam]], i32 noundef %[[MoVal]], i32 noundef %[[McVal]], i32 noundef %[[TrivialVal]]) - consume(val, moParam.val, mcParam.val); + consume(val, moParam.val, mcParam.val, trivialParam.val); co_return; // Skip to final suspend: - // CHECK: call void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_type13final_suspendEv( + // CHECK: call void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopy10TrivialABIEE12promise_type13final_suspendEv( // CHECK: call void @_ZN14suspend_always12await_resumeEv( // Destroy promise, then parameter copies: - // CHECK: call void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeD1Ev(ptr {{[^,]*}} %__promise) + // CHECK: call void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopy10TrivialABIEE12promise_typeD1Ev(ptr {{[^,]*}} %__promise) + // CHECK-NEXT: call void @llvm.lifetime.end.p0( + // CHECK-NEXT: call void @_ZN10TrivialABID1Ev(ptr {{[^,]*}} %[[TrivialCopy]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0( // CHECK-NEXT: call void @_ZN11MoveAndCopyD1Ev(ptr {{[^,]*}} %[[McCopy]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0( @@ -99,6 +115,10 @@ void f(int val, MoveOnly moParam, MoveAndCopy mcParam) { // CHECK-NEXT: call void @llvm.lifetime.end.p0( // CHECK-NEXT: call void @llvm.lifetime.end.p0( // CHECK-NEXT: call ptr @llvm.coro.free( + + // The original trivial_abi parameter is destroyed when returning from the ramp. + // CHECK: call i1 @llvm.coro.end + // CHECK: call void @_ZN10TrivialABID1Ev(ptr {{[^,]*}} %[[TrivialAlloca]]) } // CHECK-LABEL: void @_Z16dependent_paramsI1A1BEvT_T0_S3_(ptr noundef %x, ptr noundef %0, ptr noundef %y) @@ -190,3 +210,38 @@ method some_class::good_coroutine_calls_custom_constructor(float) { // CHECK: invoke void @_ZNSt16coroutine_traitsIJ6methodR10some_classfEE12promise_typeC1ES2_f(ptr {{[^,]*}} %__promise, ptr noundef nonnull align 1 dereferenceable(1) %{{.+}}, float co_return; } + + +struct MSParm { + int val; + ~MSParm(); +}; + +void consume(int) noexcept; + +// Similarly to the [[clang::trivial_abi]] parameters, with the MSVC ABI +// parameters are also destroyed by the callee, and on x86-64 such parameters +// may get passed in registers. In that case it's again important that the +// parameter's local alloca does not become part of the coro frame since that +// may be destroyed before the destructor call. +void msabi(MSParm p) { + // MSABI: define{{.*}} void @"?msabi@@YAXUMSParm@@@Z"(i32 %[[Param:.+]]) + + // The parameter's local alloca is marked not part of the frame. + // MSABI: %[[ParamAlloca:.+]] = alloca %struct.MSParm + // MSABI-SAME: !coro.outside.frame + + // MSABI: %[[ParamCopy:.+]] = alloca %struct.MSParm + + consume(p.val); + // The parameter's copy is used by the coroutine. + // MSABI: %[[ValPtr:.+]] = getelementptr inbounds nuw %struct.MSParm, ptr %[[ParamCopy]], i32 0, i32 0 + // MSABI: %[[Val:.+]] = load i32, ptr %[[ValPtr]] + // MSABI: call void @"?consume@@YAXH@Z"(i32{{.*}} %[[Val]]) + + co_return; + + // The local alloca is used for the destructor call at the end of the ramp. + // MSABI: call i1 @llvm.coro.end + // MSABI: call void @"??1MSParm@@QEAA@XZ"(ptr{{.*}} %[[ParamAlloca]]) +} From cfc89e2eedb6c4f430897d1e94bc59b1465e64a5 Mon Sep 17 00:00:00 2001 From: Jonathan Albrecht Date: Fri, 28 Feb 2025 04:16:19 -0500 Subject: [PATCH 068/123] [SystemZ] Add header guard macros to vecintrin.h (#129170) Add header guard macros to clang/lib/Headers/vecintrin.h. Found while compiling the latest numpy with clang 19 on s390x which ends up including vecintrin.h twice. The gcc version of this file has header guards so numpy compiles fine with gcc. Signed-off-by: Jonathan Albrecht --- clang/lib/Headers/vecintrin.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h index a14c39f9f7313..338ea51ce8863 100644 --- a/clang/lib/Headers/vecintrin.h +++ b/clang/lib/Headers/vecintrin.h @@ -7,6 +7,9 @@ *===-----------------------------------------------------------------------=== */ +#ifndef _VECINTRIN_H +#define _VECINTRIN_H + #if defined(__s390x__) && defined(__VEC__) #define __ATTRS_ai __attribute__((__always_inline__)) @@ -12861,3 +12864,5 @@ vec_search_string_until_zero_cc(__vector unsigned int __a, #error "Use -fzvector to enable vector extensions" #endif + +#endif /* _VECINTRIN_H */ From f749146021a6efaf39be3d8eeef1c7e6b82fe455 Mon Sep 17 00:00:00 2001 From: Mikhail Goncharov Date: Fri, 28 Feb 2025 10:20:16 +0100 Subject: [PATCH 069/123] [bazel] fix build after bafd44bff58cff9efe569a221b232bab004d55cd --- utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index cc0c7c65b736b..6f5d381666e6d 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -58,6 +58,7 @@ py_binary( name = "hdrgen", srcs = glob(["utils/hdrgen/hdrgen/**/*.py"]), imports = ["utils/hdrgen"], + main = "utils/hdrgen/hdrgen/main.py", ) ################################## Base Config ################################# From 10d99adc93cfba5b3da7b1c1cd6bdb923fc0241b Mon Sep 17 00:00:00 2001 From: Devon Loehr Date: Fri, 28 Feb 2025 04:25:55 -0500 Subject: [PATCH 070/123] Disable unique-object-duplication warning in templates (#129120) I've been trying to resolve instances of the unique-object-duplication warning in chromium code. Unfortunately, I've found that practically speaking, it's near-impossible to actually fix the problem when templates are involved. My understanding is that the warning is correct -- the variables it's flagging are indeed duplicated and potentially causing bugs as a result. The problem is that hiddenness is contagious: if a templated class or variable depends on something hidden, then it itself must also be hidden, even if the user explicitly marked it visible. In order to make it actually visible, the user must manually figure out everything that it depends on, mark them as visible, and do so recursively until all of its ancestors are visible. This process is extremely difficult and unergonomic, negating much of the benefits of templates since now each new use requires additional work. Furthermore, the process doesn't work if the user can't edit some of the files, e.g. if they're in a third-party library. Since a warning that can't practically be fixed isn't useful, this PR disables the warning for _all_ templated code by inverting the check. The warning remains active (and, in my experience, easily fixable) in non-templated code. --- clang/lib/Sema/SemaDecl.cpp | 14 ++-- .../test/SemaCXX/unique_object_duplication.h | 76 ++----------------- 2 files changed, 15 insertions(+), 75 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 285bd27a35a76..86e65e56accc8 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -13427,9 +13427,13 @@ bool Sema::GloballyUniqueObjectMightBeAccidentallyDuplicated( FunDcl->getTemplateSpecializationKind() != TSK_Undeclared; } - // Non-inline functions/variables can only legally appear in one TU, - // unless they were part of a template. - if (!TargetIsInline && !TargetWasTemplated) + // Non-inline functions/variables can only legally appear in one TU + // unless they were part of a template. Unfortunately, making complex + // template instantiations visible is infeasible in practice, since + // everything the template depends on also has to be visible. To avoid + // giving impractical-to-fix warnings, don't warn if we're inside + // something that was templated, even on inline stuff. + if (!TargetIsInline || TargetWasTemplated) return false; // If the object isn't hidden, the dynamic linker will prevent duplication. @@ -13469,8 +13473,8 @@ void Sema::DiagnoseUniqueObjectDuplication(const VarDecl *VD) { // FIXME: Windows uses dllexport/dllimport instead of visibility, and we don't // handle that yet. Disable the warning on Windows for now. - // Don't diagnose if we're inside a template; - // we'll diagnose during instantiation instead. + // Don't diagnose if we're inside a template, because it's not practical to + // fix the warning in most cases. if (!Context.getTargetInfo().shouldDLLImportComdatSymbols() && !VD->isTemplated() && GloballyUniqueObjectMightBeAccidentallyDuplicated(VD)) { diff --git a/clang/test/SemaCXX/unique_object_duplication.h b/clang/test/SemaCXX/unique_object_duplication.h index 861175766db70..e5c63efbf918c 100644 --- a/clang/test/SemaCXX/unique_object_duplication.h +++ b/clang/test/SemaCXX/unique_object_duplication.h @@ -165,81 +165,17 @@ namespace GlobalTest { namespace TemplateTest { -template -int disallowedTemplate1 = 0; // hidden-warning {{'disallowedTemplate1' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - -template int disallowedTemplate1; // hidden-note {{in instantiation of}} - - -// Should work for implicit instantiation as well -template -int disallowedTemplate2 = 0; // hidden-warning {{'disallowedTemplate2' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - -int implicit_instantiate() { - return disallowedTemplate2; // hidden-note {{in instantiation of}} -} - +// We never warn inside templates because it's frequently infeasible to actually +// fix the warning. -// Ensure we only get warnings for templates that are actually instantiated template -int maybeAllowedTemplate = 0; // Not instantiated, so no warning here - -template -int maybeAllowedTemplate = 1; // hidden-warning {{'maybeAllowedTemplate' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - -template <> -int maybeAllowedTemplate = 2; // hidden-warning {{'maybeAllowedTemplate' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - -template int maybeAllowedTemplate; // hidden-note {{in instantiation of}} +int allowedTemplate1 = 0; - - -// Should work the same for static class members -template -struct S { - static int staticMember; -}; +template int allowedTemplate1; template -int S::staticMember = 0; // Never instantiated +inline int allowedTemplate2 = 0; -// T* specialization -template -struct S { - static int staticMember; -}; - -template -int S::staticMember = 1; // hidden-warning {{'staticMember' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - -template class S; // hidden-note {{in instantiation of}} - -// T& specialization, implicitly instantiated -template -struct S { - static int staticMember; -}; - -template -int S::staticMember = 2; // hidden-warning {{'staticMember' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - -int implicit_instantiate2() { - return S::staticMember; // hidden-note {{in instantiation of}} -} - - -// Should work for static locals as well -template -int* wrapper() { - static int staticLocal; // hidden-warning {{'staticLocal' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - return &staticLocal; -} - -template <> -int* wrapper() { - static int staticLocal; // hidden-warning {{'staticLocal' may be duplicated when built into a shared library: it is mutable, has hidden visibility, and external linkage}} - return &staticLocal; -} +template int allowedTemplate2; -auto dummy = wrapper(); // hidden-note {{in instantiation of}} } // namespace TemplateTest \ No newline at end of file From 95f309c5eb01df78aea3e66f5e2f2139cadfc98d Mon Sep 17 00:00:00 2001 From: pvanhout Date: Fri, 28 Feb 2025 10:29:42 +0100 Subject: [PATCH 071/123] [NFC][clang] Remove trailing whitespace in Options.td --- clang/include/clang/Driver/Options.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 883d6a969c258..75b1c51445942 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3176,7 +3176,7 @@ def modules_reduced_bmi : Flag<["-"], "fmodules-reduced-bmi">, HelpText<"Generate the reduced BMI">, MarshallingInfoFlag>; -def experimental_modules_reduced_bmi : Flag<["-"], "fexperimental-modules-reduced-bmi">, +def experimental_modules_reduced_bmi : Flag<["-"], "fexperimental-modules-reduced-bmi">, Group, Visibility<[ClangOption, CC1Option]>, Alias; def fmodules_embed_all_files : Joined<["-"], "fmodules-embed-all-files">, @@ -7431,7 +7431,7 @@ def fuse_register_sized_bitfield_access: Flag<["-"], "fuse-register-sized-bitfie def relaxed_aliasing : Flag<["-"], "relaxed-aliasing">, HelpText<"Turn off Type Based Alias Analysis">, MarshallingInfoFlag>; -defm pointer_tbaa: BoolOption<"", "pointer-tbaa", CodeGenOpts<"PointerTBAA">, +defm pointer_tbaa: BoolOption<"", "pointer-tbaa", CodeGenOpts<"PointerTBAA">, DefaultTrue, PosFlag, NegFlag, From 67ab38af8036a880879fe556b0bb38d5714932f2 Mon Sep 17 00:00:00 2001 From: Mikhail Goncharov Date: Fri, 28 Feb 2025 10:31:31 +0100 Subject: [PATCH 072/123] [bazel] port 15c49b9db3f60bdbd320271d5e97f118c00b95dd --- utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel index 010a617066c7b..4aa9d562124fe 100644 --- a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel @@ -241,6 +241,7 @@ cc_test( "//clang:lex", "//clang:sema", "//clang:serialization", + "//clang:tooling", "//llvm:Support", "//llvm:TargetParser", "//third-party/unittest:gmock", From 7104c0adf1d67fa09dececb006eedbfbc39a3fad Mon Sep 17 00:00:00 2001 From: klensy Date: Fri, 28 Feb 2025 13:04:16 +0300 Subject: [PATCH 073/123] [flang][test] Fix filecheck annotation typos [2/n] (#126099) Few more fixes, previous: #92387 Co-authored-by: klensy --- flang/test/Driver/config-file.f90 | 6 +++--- flang/test/Lower/CUDA/cuda-data-transfer.cuf | 2 +- flang/test/Lower/HLFIR/type-info-components.f90 | 6 +++--- .../target-private-multiple-variables.f90 | 6 +++--- flang/test/Lower/OpenMP/copyprivate2.f90 | 2 +- flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/flang/test/Driver/config-file.f90 b/flang/test/Driver/config-file.f90 index 6991fda9bd483..2ad9b71d01613 100644 --- a/flang/test/Driver/config-file.f90 +++ b/flang/test/Driver/config-file.f90 @@ -77,13 +77,13 @@ ! CHECK-LINKING-LIBOMP-GOES-AFTER: "--as-needed" "{{.*}}-{{.*}}.o" "-lmylib" "foo.a" "-lm" "-Bstatic" "-lhappy" "-Bdynamic" {{.*}}"-lomp" ! CHECK-NOLINKING: Configuration file: {{.*}}Inputs{{.}}config-l.cfg ! CHECK-NOLINKING: "-ffast-math" -! CHECK-NOLINKING-NO: "-lm" "-Bstatic" "-lhappy" "-Bdynamic" +! CHECK-NOLINKING-NOT: "-lm" "-Bstatic" "-lhappy" "-Bdynamic" ! CHECK-NOLINKING-OPENMP: Configuration file: {{.*}}Inputs{{.}}config-l.cfg ! CHECK-NOLINKING-OPENMP: "-ffast-math" {{.*}}"-fopenmp" -! CHECK-NOLINKING-OPENMP-NO: "-lm" "-Bstatic" "-lhappy" "-Bdynamic" {{.}}"-lomp" +! CHECK-NOLINKING-OPENMP-NOT: "-lm" "-Bstatic" "-lhappy" "-Bdynamic" {{.}}"-lomp" ! CHECK-LINKING-MSVC: Configuration file: {{.*}}Inputs{{.}}config-l.cfg ! CHECK-LINKING-MSVC: "-ffast-math" ! CHECK-LINKING-MSVC: "--as-needed" "{{.*}}-{{.*}}.o" "mylib.lib" "foo.lib" "m.lib" "-Bstatic" "happy.lib" "-Bdynamic" ! CHECK-NOLINKING-MSVC: Configuration file: {{.*}}Inputs{{.}}config-l.cfg ! CHECK-NOLINKING-MSVC: "-ffast-math" -! CHECK-NOLINKING-MSVC-NO: "m.lib" "-Bstatic" "happy.lib" "-Bdynamic" +! CHECK-NOLINKING-MSVC-NOT: "m.lib" "-Bstatic" "happy.lib" "-Bdynamic" diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index cbddcd79c6333..1c03a76cae76a 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -393,4 +393,4 @@ end subroutine ! CHECK: %[[ALLOC_TMP:.*]] = fir.allocmem !fir.array<10xi32> {bindc_name = ".tmp", uniq_name = ""} ! CHECK: %[[TMP:.*]]:2 = hlfir.declare %[[ALLOC_TMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.heap>, !fir.heap>) ! CHECK: cuf.data_transfer %[[ADEV_DECL]]#1 to %[[TMP]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.heap> -! CHECL: hlfir.assign +! CHECK: hlfir.assign diff --git a/flang/test/Lower/HLFIR/type-info-components.f90 b/flang/test/Lower/HLFIR/type-info-components.f90 index ee36f9cf6588f..9faf35656166e 100644 --- a/flang/test/Lower/HLFIR/type-info-components.f90 +++ b/flang/test/Lower/HLFIR/type-info-components.f90 @@ -17,7 +17,7 @@ subroutine test_1(x) type(sometype) :: x end subroutine ! CHECK-LABEL: fir.type_info @_QFtest_1Tsometype -! CHECK-SAME component_info { +! CHECK-SAME: component_info { ! CHECK: fir.dt_component "i" lbs [-1] init @_QFtest_1E.di.sometype.i ! CHECK-NOT: fir.dt_component "j" ! CHECK: fir.dt_component "p" init @_QFtest_1E.di.sometype.p @@ -35,7 +35,7 @@ subroutine test_nesting(x) type(sometype2) :: x end subroutine ! CHECK-LABEL: fir.type_info @_QFtest_nestingTsome_sub_type -! CHECK-SAME component_info { +! CHECK-SAME: component_info { ! CHECK: fir.dt_component "i" init @_QFtest_nestingE.di.some_sub_type.i ! CHECK: } @@ -50,6 +50,6 @@ subroutine data_like(x) type(sometype3) :: x end subroutine ! CHECK-LABEL: fir.type_info @_QFdata_likeTsometype3 -! CHECK-SAME component_info { +! CHECK-SAME: component_info { ! CHECK: fir.dt_component "i" init @_QFdata_likeE.di.sometype3.i ! CHECK: } diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90 index 5d31de10d74f8..0b0d0e7ae3735 100644 --- a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90 +++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90 @@ -148,9 +148,9 @@ end subroutine target_allocatable ! CHECK-SAME: map_entries( ! CHECK-SAME: %[[MAPPED_MI0]] -> %[[MAPPED_ARG0:[^,]+]], ! CHECK-SAME: %[[ALLOC_VAR_MAP]] -> %[[MAPPED_ARG1:[^,]+]] -! CHECK-SAME %[[REAL_ARR_DESC_MAP]] -> %[[MAPPED_ARG2:[^,]+]] -! CHECK_SAME %[[CHAR_VAR_DESC_MAP]] -> %[[MAPPED_ARG3:.[^,]+]] : -! CHECK-SAME !fir.ref, !fir.ref>>, !fir.ref>>, !fir.ref>) +! CHECK-SAME: %[[REAL_ARR_DESC_MAP]] -> %[[MAPPED_ARG2:[^,]+]] +! CHECK-SAME: %[[CHAR_VAR_DESC_MAP]] -> %[[MAPPED_ARG3:.[^,]+]] : +! CHECK-SAME: !fir.ref, !fir.ref>>, !fir.ref>>, !fir.ref>) ! CHECK-SAME: private( ! CHECK-SAME: @[[ALLOC_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[ALLOC_ARG:[^,]+]] [map_idx=1], ! CHECK-SAME: @[[REAL_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[REAL_ARG:[^,]+]], diff --git a/flang/test/Lower/OpenMP/copyprivate2.f90 b/flang/test/Lower/OpenMP/copyprivate2.f90 index 3412ba2c63c4d..993a81d199f56 100644 --- a/flang/test/Lower/OpenMP/copyprivate2.f90 +++ b/flang/test/Lower/OpenMP/copyprivate2.f90 @@ -43,7 +43,7 @@ !CHECK: omp.single copyprivate( !CHECK-SAME: %[[A]]#0 -> @_copy_box_heap_Uxi32 : !fir.ref>>>, !CHECK-SAME: %[[P]]#0 -> @_copy_box_ptr_i32 : !fir.ref>>) -!CHEK: } +!CHECK: } subroutine test_alloc_ptr() integer, allocatable :: a(:) integer, pointer :: p diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 index 85df29e83f75d..f3b5a3da4dc0b 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul-byref.f90 @@ -27,7 +27,7 @@ ! CHECK-SAME: alloc { ! CHECK: %[[REF:.*]] = fir.alloca i64 ! CHECK: omp.yield(%[[REF]] : !fir.ref) -! CHECK-LABE: } init { +! CHECK-LABEL: } init { ! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref, %[[ALLOC:.*]]: !fir.ref): ! CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 ! CHECK: fir.store %[[VAL_1]] to %[[ALLOC]] : !fir.ref From 724930459799a76b3349dd7837eb9411494f4c05 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 28 Feb 2025 10:09:53 +0000 Subject: [PATCH 074/123] [AMDGPU] Cosmetic tweaks in AMDGPUAtomicOptimizer. NFC. (#129081) Simplify iteration over the ToReplace vector, and some related cosmetic cleanups. --- .../Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 28 ++++++------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index e46d0587e7943..76b1775f0d096 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -151,23 +151,18 @@ PreservedAnalyses AMDGPUAtomicOptimizerPass::run(Function &F, } bool AMDGPUAtomicOptimizerImpl::run() { - // Scan option None disables the Pass - if (ScanImpl == ScanOptions::None) { + if (ScanImpl == ScanOptions::None) return false; - } visit(F); + if (ToReplace.empty()) + return false; - const bool Changed = !ToReplace.empty(); - - for (ReplacementInfo &Info : ToReplace) { - optimizeAtomic(*Info.I, Info.Op, Info.ValIdx, Info.ValDivergent); - } - + for (auto &[I, Op, ValIdx, ValDivergent] : ToReplace) + optimizeAtomic(*I, Op, ValIdx, ValDivergent); ToReplace.clear(); - - return Changed; + return true; } static bool isLegalCrossLaneType(Type *Ty) { @@ -247,9 +242,7 @@ void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &I) { // If we get here, we can optimize the atomic using a single wavefront-wide // atomic operation to do the calculation for the entire wavefront, so // remember the instruction so we can come back to it. - const ReplacementInfo Info = {&I, Op, ValIdx, ValDivergent}; - - ToReplace.push_back(Info); + ToReplace.push_back({&I, Op, ValIdx, ValDivergent}); } void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(IntrinsicInst &I) { @@ -333,17 +326,14 @@ void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(IntrinsicInst &I) { // If any of the other arguments to the intrinsic are divergent, we can't // optimize the operation. for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) { - if (UA.isDivergentUse(I.getOperandUse(Idx))) { + if (UA.isDivergentUse(I.getOperandUse(Idx))) return; - } } // If we get here, we can optimize the atomic using a single wavefront-wide // atomic operation to do the calculation for the entire wavefront, so // remember the instruction so we can come back to it. - const ReplacementInfo Info = {&I, Op, ValIdx, ValDivergent}; - - ToReplace.push_back(Info); + ToReplace.push_back({&I, Op, ValIdx, ValDivergent}); } // Use the builder to create the non-atomic counterpart of the specified From f6d73918c51f3a76f42f20d2ae1802278f726e64 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 28 Feb 2025 10:23:27 +0100 Subject: [PATCH 075/123] [CaptureTracking] Take non-willreturn calls into account We can leak one bit of information about the address by either diverging or not. Part of https://github.com/llvm/llvm-project/issues/129090. --- llvm/lib/Analysis/CaptureTracking.cpp | 6 +- .../test/Transforms/Attributor/nocapture-1.ll | 17 +- .../Transforms/FunctionAttrs/nocapture.ll | 160 +++++++++++------- llvm/test/Transforms/FunctionAttrs/nonnull.ll | 10 +- .../out-of-bounds-iterator-bug.ll | 2 +- 5 files changed, 121 insertions(+), 74 deletions(-) diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp index 6e5748c233240..98f68d322287f 100644 --- a/llvm/lib/Analysis/CaptureTracking.cpp +++ b/llvm/lib/Analysis/CaptureTracking.cpp @@ -279,9 +279,9 @@ UseCaptureInfo llvm::DetermineUseCaptureKind( case Instruction::Invoke: { auto *Call = cast(I); // Not captured if the callee is readonly, doesn't return a copy through - // its return value and doesn't unwind (a readonly function can leak bits - // by throwing an exception or not depending on the input value). - if (Call->onlyReadsMemory() && Call->doesNotThrow() && + // its return value and doesn't unwind or diverge (a readonly function can + // leak bits by throwing an exception or not depending on the input value). + if (Call->onlyReadsMemory() && Call->doesNotThrow() && Call->willReturn() && Call->getType()->isVoidTy()) return CaptureComponents::None; diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll index 2e67b637eebf6..b9d2aaf972b23 100644 --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -337,9 +337,9 @@ define void @nc4(ptr %p) { ret void } -define void @nc5(ptr %f, ptr %p) { -; CHECK-LABEL: define {{[^@]+}}@nc5 -; CHECK-SAME: (ptr nofree noundef nonnull captures(none) [[F:%.*]], ptr captures(none) [[P:%.*]]) { +define void @callsite_readonly_nounwind_not_willreturn(ptr %f, ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@callsite_readonly_nounwind_not_willreturn +; CHECK-SAME: (ptr nofree noundef nonnull captures(none) [[F:%.*]], ptr [[P:%.*]]) { ; CHECK-NEXT: call void [[F]](ptr captures(none) [[P]]) ; CHECK-NEXT: ret void ; @@ -348,6 +348,17 @@ define void @nc5(ptr %f, ptr %p) { ret void } +define void @callsite_readonly_nounwind_willreturn(ptr %f, ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@callsite_readonly_nounwind_willreturn +; CHECK-SAME: (ptr nofree noundef nonnull captures(none) [[F:%.*]], ptr captures(none) [[P:%.*]]) { +; CHECK-NEXT: call void [[F]](ptr captures(none) [[P]]) +; CHECK-NEXT: ret void +; + call void %f(ptr %p) readonly nounwind willreturn + call void %f(ptr nocapture %p) + ret void +} + ; It would be acceptable to add readnone to %y1_1 and %y1_2. define void @test1_1(ptr %x1_1, ptr %y1_1, i1 %c) { ; TUNIT: Function Attrs: nofree nosync nounwind memory(write) diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index cc23c435d96c6..401dee6b438ab 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -46,7 +46,7 @@ define void @c3(ptr %q) { ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) ; ATTRIBUTOR-LABEL: define void @c3 ; ATTRIBUTOR-SAME: (ptr nofree writeonly [[Q:%.*]]) #[[ATTR1]] { -; ATTRIBUTOR-NEXT: call void @c2(ptr nofree writeonly [[Q]]) #[[ATTR16:[0-9]+]] +; ATTRIBUTOR-NEXT: call void @c2(ptr nofree writeonly [[Q]]) #[[ATTR18:[0-9]+]] ; ATTRIBUTOR-NEXT: ret void ; call void @c2(ptr %q) @@ -232,7 +232,7 @@ define i1 @c7(ptr %q, i32 %bitno) { ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read) ; ATTRIBUTOR-LABEL: define i1 @c7 ; ATTRIBUTOR-SAME: (ptr nofree readonly [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR2]] { -; ATTRIBUTOR-NEXT: [[PTR:%.*]] = call ptr @lookup_bit(ptr nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR17:[0-9]+]] +; ATTRIBUTOR-NEXT: [[PTR:%.*]] = call ptr @lookup_bit(ptr nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR19:[0-9]+]] ; ATTRIBUTOR-NEXT: [[VAL:%.*]] = load i1, ptr [[PTR]], align 1 ; ATTRIBUTOR-NEXT: ret i1 [[VAL]] ; @@ -337,7 +337,7 @@ define void @nc2(ptr %p, ptr %q) { ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn ; ATTRIBUTOR-LABEL: define void @nc2 ; ATTRIBUTOR-SAME: (ptr nofree captures(none) [[P:%.*]], ptr nofree [[Q:%.*]]) #[[ATTR5]] { -; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = call i32 @nc1(ptr nofree [[Q]], ptr nofree captures(none) [[P]], i1 false) #[[ATTR18:[0-9]+]] +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = call i32 @nc1(ptr nofree [[Q]], ptr nofree captures(none) [[P]], i1 false) #[[ATTR20:[0-9]+]] ; ATTRIBUTOR-NEXT: ret void ; %1 = call i32 @nc1(ptr %q, ptr %p, i1 0) ; [#uses=0] @@ -360,33 +360,51 @@ define void @nc3(ptr %p) { ret void } -declare void @external(ptr) readonly nounwind -define void @nc4(ptr %p) { +declare void @external_not_willreturn(ptr) readonly nounwind +define void @readononly_nounwind_not_willreturn(ptr %p) { ; FNATTRS: Function Attrs: nofree nounwind memory(read) -; FNATTRS-LABEL: define void @nc4 -; FNATTRS-SAME: (ptr readonly captures(none) [[P:%.*]]) #[[ATTR9:[0-9]+]] { -; FNATTRS-NEXT: call void @external(ptr [[P]]) +; FNATTRS-LABEL: define void @readononly_nounwind_not_willreturn +; FNATTRS-SAME: (ptr readonly [[P:%.*]]) #[[ATTR9:[0-9]+]] { +; FNATTRS-NEXT: call void @external_not_willreturn(ptr [[P]]) ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nosync nounwind memory(read) -; ATTRIBUTOR-LABEL: define void @nc4 +; ATTRIBUTOR-LABEL: define void @readononly_nounwind_not_willreturn ; ATTRIBUTOR-SAME: (ptr readonly captures(none) [[P:%.*]]) #[[ATTR7:[0-9]+]] { -; ATTRIBUTOR-NEXT: call void @external(ptr readonly captures(none) [[P]]) #[[ATTR4]] +; ATTRIBUTOR-NEXT: call void @external_not_willreturn(ptr readonly captures(none) [[P]]) #[[ATTR4]] ; ATTRIBUTOR-NEXT: ret void ; - call void @external(ptr %p) + call void @external_not_willreturn(ptr %p) ret void } -define void @nc5(ptr %f, ptr %p) { -; FNATTRS-LABEL: define void @nc5 -; FNATTRS-SAME: (ptr readonly captures(none) [[F:%.*]], ptr captures(none) [[P:%.*]]) { +declare void @external_willreturn(ptr) readonly nounwind willreturn +define void @readononly_nounwind_willreturn(ptr %p) { +; FNATTRS: Function Attrs: mustprogress nofree nounwind willreturn memory(read) +; FNATTRS-LABEL: define void @readononly_nounwind_willreturn +; FNATTRS-SAME: (ptr readonly captures(none) [[P:%.*]]) #[[ATTR11:[0-9]+]] { +; FNATTRS-NEXT: call void @external_willreturn(ptr [[P]]) +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nosync nounwind willreturn memory(read) +; ATTRIBUTOR-LABEL: define void @readononly_nounwind_willreturn +; ATTRIBUTOR-SAME: (ptr readonly captures(none) [[P:%.*]]) #[[ATTR9:[0-9]+]] { +; ATTRIBUTOR-NEXT: call void @external_willreturn(ptr readonly captures(none) [[P]]) #[[ATTR21:[0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + call void @external_willreturn(ptr %p) + ret void +} + +define void @callsite_readonly_nounwind_not_willreturn(ptr %f, ptr %p) { +; FNATTRS-LABEL: define void @callsite_readonly_nounwind_not_willreturn +; FNATTRS-SAME: (ptr readonly captures(none) [[F:%.*]], ptr [[P:%.*]]) { ; FNATTRS-NEXT: call void [[F]](ptr [[P]]) #[[ATTR8:[0-9]+]] ; FNATTRS-NEXT: call void [[F]](ptr captures(none) [[P]]) ; FNATTRS-NEXT: ret void ; -; ATTRIBUTOR-LABEL: define void @nc5 -; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[F:%.*]], ptr captures(none) [[P:%.*]]) { +; ATTRIBUTOR-LABEL: define void @callsite_readonly_nounwind_not_willreturn +; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[F:%.*]], ptr [[P:%.*]]) { ; ATTRIBUTOR-NEXT: call void [[F]](ptr [[P]]) #[[ATTR6:[0-9]+]] ; ATTRIBUTOR-NEXT: call void [[F]](ptr captures(none) [[P]]) ; ATTRIBUTOR-NEXT: ret void @@ -396,19 +414,37 @@ define void @nc5(ptr %f, ptr %p) { ret void } +define void @callsite_readonly_nounwind_willreturn(ptr %f, ptr %p) { +; FNATTRS-LABEL: define void @callsite_readonly_nounwind_willreturn +; FNATTRS-SAME: (ptr readonly captures(none) [[F:%.*]], ptr captures(none) [[P:%.*]]) { +; FNATTRS-NEXT: call void [[F]](ptr [[P]]) #[[ATTR10:[0-9]+]] +; FNATTRS-NEXT: call void [[F]](ptr captures(none) [[P]]) +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define void @callsite_readonly_nounwind_willreturn +; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[F:%.*]], ptr captures(none) [[P:%.*]]) { +; ATTRIBUTOR-NEXT: call void [[F]](ptr [[P]]) #[[ATTR8:[0-9]+]] +; ATTRIBUTOR-NEXT: call void [[F]](ptr captures(none) [[P]]) +; ATTRIBUTOR-NEXT: ret void +; + call void %f(ptr %p) readonly nounwind willreturn + call void %f(ptr nocapture %p) + ret void +} + ; It would be acceptable to add readnone to %y1_1 and %y1_2. define void @test1_1(ptr %x1_1, ptr %y1_1, i1 %c) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test1_1 -; FNATTRS-SAME: (ptr readnone captures(none) [[X1_1:%.*]], ptr [[Y1_1:%.*]], i1 [[C:%.*]]) #[[ATTR10:[0-9]+]] { +; FNATTRS-SAME: (ptr readnone captures(none) [[X1_1:%.*]], ptr [[Y1_1:%.*]], i1 [[C:%.*]]) #[[ATTR12:[0-9]+]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = call ptr @test1_2(ptr [[X1_1]], ptr [[Y1_1]], i1 [[C]]) ; FNATTRS-NEXT: store ptr null, ptr @g, align 8 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define void @test1_1 -; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X1_1:%.*]], ptr nofree readnone captures(none) [[Y1_1:%.*]], i1 [[C:%.*]]) #[[ATTR8:[0-9]+]] { -; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = call ptr @test1_2(ptr nofree readnone captures(none) [[X1_1]], ptr nofree readnone [[Y1_1]], i1 [[C]]) #[[ATTR8]] +; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X1_1:%.*]], ptr nofree readnone captures(none) [[Y1_1:%.*]], i1 [[C:%.*]]) #[[ATTR10:[0-9]+]] { +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = call ptr @test1_2(ptr nofree readnone captures(none) [[X1_1]], ptr nofree readnone [[Y1_1]], i1 [[C]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr null, ptr @g, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -420,7 +456,7 @@ define void @test1_1(ptr %x1_1, ptr %y1_1, i1 %c) { define ptr @test1_2(ptr %x1_2, ptr %y1_2, i1 %c) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define ptr @test1_2 -; FNATTRS-SAME: (ptr readnone captures(none) [[X1_2:%.*]], ptr returned [[Y1_2:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { +; FNATTRS-SAME: (ptr readnone captures(none) [[X1_2:%.*]], ptr returned [[Y1_2:%.*]], i1 [[C:%.*]]) #[[ATTR12]] { ; FNATTRS-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; FNATTRS: t: ; FNATTRS-NEXT: call void @test1_1(ptr [[X1_2]], ptr [[Y1_2]], i1 [[C]]) @@ -431,10 +467,10 @@ define ptr @test1_2(ptr %x1_2, ptr %y1_2, i1 %c) { ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define ptr @test1_2 -; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X1_2:%.*]], ptr nofree readnone [[Y1_2:%.*]], i1 [[C:%.*]]) #[[ATTR8]] { +; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X1_2:%.*]], ptr nofree readnone [[Y1_2:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { ; ATTRIBUTOR-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; ATTRIBUTOR: t: -; ATTRIBUTOR-NEXT: call void @test1_1(ptr nofree readnone captures(none) [[X1_2]], ptr nofree readnone captures(none) [[Y1_2]], i1 [[C]]) #[[ATTR8]] +; ATTRIBUTOR-NEXT: call void @test1_1(ptr nofree readnone captures(none) [[X1_2]], ptr nofree readnone captures(none) [[Y1_2]], i1 [[C]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr null, ptr @g, align 8 ; ATTRIBUTOR-NEXT: br label [[F]] ; ATTRIBUTOR: f: @@ -452,15 +488,15 @@ f: define void @test2(ptr %x2) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test2 -; FNATTRS-SAME: (ptr readnone captures(none) [[X2:%.*]]) #[[ATTR10]] { +; FNATTRS-SAME: (ptr readnone captures(none) [[X2:%.*]]) #[[ATTR12]] { ; FNATTRS-NEXT: call void @test2(ptr [[X2]]) ; FNATTRS-NEXT: store ptr null, ptr @g, align 8 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define void @test2 -; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X2:%.*]]) #[[ATTR8]] { -; ATTRIBUTOR-NEXT: call void @test2(ptr nofree readnone captures(none) [[X2]]) #[[ATTR8]] +; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X2:%.*]]) #[[ATTR10]] { +; ATTRIBUTOR-NEXT: call void @test2(ptr nofree readnone captures(none) [[X2]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr null, ptr @g, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -472,15 +508,15 @@ define void @test2(ptr %x2) { define void @test3(ptr %x3, ptr %y3, ptr %z3) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test3 -; FNATTRS-SAME: (ptr readnone captures(none) [[X3:%.*]], ptr readnone captures(none) [[Y3:%.*]], ptr readnone captures(none) [[Z3:%.*]]) #[[ATTR10]] { +; FNATTRS-SAME: (ptr readnone captures(none) [[X3:%.*]], ptr readnone captures(none) [[Y3:%.*]], ptr readnone captures(none) [[Z3:%.*]]) #[[ATTR12]] { ; FNATTRS-NEXT: call void @test3(ptr [[Z3]], ptr [[Y3]], ptr [[X3]]) ; FNATTRS-NEXT: store ptr null, ptr @g, align 8 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define void @test3 -; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X3:%.*]], ptr nofree readnone captures(none) [[Y3:%.*]], ptr nofree readnone captures(none) [[Z3:%.*]]) #[[ATTR8]] { -; ATTRIBUTOR-NEXT: call void @test3(ptr nofree readnone captures(none) [[Z3]], ptr nofree readnone captures(none) [[Y3]], ptr nofree readnone captures(none) [[X3]]) #[[ATTR8]] +; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X3:%.*]], ptr nofree readnone captures(none) [[Y3:%.*]], ptr nofree readnone captures(none) [[Z3:%.*]]) #[[ATTR10]] { +; ATTRIBUTOR-NEXT: call void @test3(ptr nofree readnone captures(none) [[Z3]], ptr nofree readnone captures(none) [[Y3]], ptr nofree readnone captures(none) [[X3]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr null, ptr @g, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -492,15 +528,15 @@ define void @test3(ptr %x3, ptr %y3, ptr %z3) { define void @test4_1(ptr %x4_1, i1 %c) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test4_1 -; FNATTRS-SAME: (ptr [[X4_1:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { +; FNATTRS-SAME: (ptr [[X4_1:%.*]], i1 [[C:%.*]]) #[[ATTR12]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = call ptr @test4_2(ptr [[X4_1]], ptr [[X4_1]], ptr [[X4_1]], i1 [[C]]) ; FNATTRS-NEXT: store ptr null, ptr @g, align 8 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define void @test4_1 -; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X4_1:%.*]], i1 [[C:%.*]]) #[[ATTR8]] { -; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = call ptr @test4_2(ptr nofree readnone captures(none) [[X4_1]], ptr nofree readnone [[X4_1]], ptr nofree readnone captures(none) [[X4_1]], i1 [[C]]) #[[ATTR8]] +; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X4_1:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = call ptr @test4_2(ptr nofree readnone captures(none) [[X4_1]], ptr nofree readnone [[X4_1]], ptr nofree readnone captures(none) [[X4_1]], i1 [[C]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr null, ptr @g, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -512,7 +548,7 @@ define void @test4_1(ptr %x4_1, i1 %c) { define ptr @test4_2(ptr %x4_2, ptr %y4_2, ptr %z4_2, i1 %c) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define ptr @test4_2 -; FNATTRS-SAME: (ptr readnone captures(none) [[X4_2:%.*]], ptr readnone returned captures(ret: address, provenance) [[Y4_2:%.*]], ptr readnone captures(none) [[Z4_2:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { +; FNATTRS-SAME: (ptr readnone captures(none) [[X4_2:%.*]], ptr readnone returned captures(ret: address, provenance) [[Y4_2:%.*]], ptr readnone captures(none) [[Z4_2:%.*]], i1 [[C:%.*]]) #[[ATTR12]] { ; FNATTRS-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; FNATTRS: t: ; FNATTRS-NEXT: call void @test4_1(ptr null, i1 [[C]]) @@ -523,10 +559,10 @@ define ptr @test4_2(ptr %x4_2, ptr %y4_2, ptr %z4_2, i1 %c) { ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define ptr @test4_2 -; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X4_2:%.*]], ptr nofree readnone [[Y4_2:%.*]], ptr nofree readnone captures(none) [[Z4_2:%.*]], i1 [[C:%.*]]) #[[ATTR8]] { +; ATTRIBUTOR-SAME: (ptr nofree readnone captures(none) [[X4_2:%.*]], ptr nofree readnone [[Y4_2:%.*]], ptr nofree readnone captures(none) [[Z4_2:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { ; ATTRIBUTOR-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; ATTRIBUTOR: t: -; ATTRIBUTOR-NEXT: call void @test4_1(ptr nofree readnone null, i1 [[C]]) #[[ATTR8]] +; ATTRIBUTOR-NEXT: call void @test4_1(ptr nofree readnone null, i1 [[C]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr null, ptr @g, align 8 ; ATTRIBUTOR-NEXT: br label [[F]] ; ATTRIBUTOR: f: @@ -578,13 +614,13 @@ define void @test6_2(ptr %x6_2, ptr %y6_2, ptr %z6_2) { define void @test_cmpxchg(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; FNATTRS-LABEL: define void @test_cmpxchg -; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR11:[0-9]+]] { +; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR13:[0-9]+]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i32 0, i32 1 acquire monotonic, align 4 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; ATTRIBUTOR-LABEL: define void @test_cmpxchg -; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[P:%.*]]) #[[ATTR9:[0-9]+]] { +; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[P:%.*]]) #[[ATTR11:[0-9]+]] { ; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i32 0, i32 1 acquire monotonic, align 4 ; ATTRIBUTOR-NEXT: ret void ; @@ -595,13 +631,13 @@ define void @test_cmpxchg(ptr %p) { define void @test_cmpxchg_ptr(ptr %p, ptr %q) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; FNATTRS-LABEL: define void @test_cmpxchg_ptr -; FNATTRS-SAME: (ptr captures(none) [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR11]] { +; FNATTRS-SAME: (ptr captures(none) [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR13]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], ptr null, ptr [[Q]] acquire monotonic, align 8 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; ATTRIBUTOR-LABEL: define void @test_cmpxchg_ptr -; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[P:%.*]], ptr nofree [[Q:%.*]]) #[[ATTR9]] { +; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[P:%.*]], ptr nofree [[Q:%.*]]) #[[ATTR11]] { ; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], ptr null, ptr [[Q]] acquire monotonic, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -612,13 +648,13 @@ define void @test_cmpxchg_ptr(ptr %p, ptr %q) { define void @test_atomicrmw(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; FNATTRS-LABEL: define void @test_atomicrmw -; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR11]] { +; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR13]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P]], i32 1 seq_cst, align 4 ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; ATTRIBUTOR-LABEL: define void @test_atomicrmw -; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[P:%.*]]) #[[ATTR9]] { +; ATTRIBUTOR-SAME: (ptr nofree nonnull captures(none) [[P:%.*]]) #[[ATTR11]] { ; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P]], i32 1 seq_cst, align 4 ; ATTRIBUTOR-NEXT: ret void ; @@ -629,7 +665,7 @@ define void @test_atomicrmw(ptr %p) { define void @test_volatile(ptr %x) { ; FNATTRS: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) ; FNATTRS-LABEL: define void @test_volatile -; FNATTRS-SAME: (ptr [[X:%.*]]) #[[ATTR12:[0-9]+]] { +; FNATTRS-SAME: (ptr [[X:%.*]]) #[[ATTR14:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[X]], i64 1 ; FNATTRS-NEXT: store volatile i32 0, ptr [[GEP]], align 4 @@ -637,7 +673,7 @@ define void @test_volatile(ptr %x) { ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; ATTRIBUTOR-LABEL: define void @test_volatile -; ATTRIBUTOR-SAME: (ptr nofree [[X:%.*]]) #[[ATTR9]] { +; ATTRIBUTOR-SAME: (ptr nofree [[X:%.*]]) #[[ATTR11]] { ; ATTRIBUTOR-NEXT: entry: ; ATTRIBUTOR-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[X]], i64 1 ; ATTRIBUTOR-NEXT: store volatile i32 0, ptr [[GEP]], align 4 @@ -652,7 +688,7 @@ entry: define void @nocaptureLaunder(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write, inaccessiblemem: readwrite) ; FNATTRS-LABEL: define void @nocaptureLaunder -; FNATTRS-SAME: (ptr writeonly captures(none) [[P:%.*]]) #[[ATTR13:[0-9]+]] { +; FNATTRS-SAME: (ptr writeonly captures(none) [[P:%.*]]) #[[ATTR15:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]]) ; FNATTRS-NEXT: store i8 42, ptr [[B]], align 1 @@ -660,9 +696,9 @@ define void @nocaptureLaunder(ptr %p) { ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; ATTRIBUTOR-LABEL: define void @nocaptureLaunder -; ATTRIBUTOR-SAME: (ptr nofree captures(none) [[P:%.*]]) #[[ATTR10:[0-9]+]] { +; ATTRIBUTOR-SAME: (ptr nofree captures(none) [[P:%.*]]) #[[ATTR12:[0-9]+]] { ; ATTRIBUTOR-NEXT: entry: -; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]]) #[[ATTR19:[0-9]+]] +; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]]) #[[ATTR22:[0-9]+]] ; ATTRIBUTOR-NEXT: store i8 42, ptr [[B]], align 1 ; ATTRIBUTOR-NEXT: ret void ; @@ -676,7 +712,7 @@ entry: define void @captureLaunder(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: readwrite) ; FNATTRS-LABEL: define void @captureLaunder -; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR14:[0-9]+]] { +; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR16:[0-9]+]] { ; FNATTRS-NEXT: [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]]) ; FNATTRS-NEXT: store ptr [[B]], ptr @g2, align 8 ; FNATTRS-NEXT: ret void @@ -684,7 +720,7 @@ define void @captureLaunder(ptr %p) { ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn ; ATTRIBUTOR-LABEL: define void @captureLaunder ; ATTRIBUTOR-SAME: (ptr nofree [[P:%.*]]) #[[ATTR5]] { -; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]]) #[[ATTR19]] +; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]]) #[[ATTR22]] ; ATTRIBUTOR-NEXT: store ptr [[B]], ptr @g2, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -696,7 +732,7 @@ define void @captureLaunder(ptr %p) { define void @nocaptureStrip(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; FNATTRS-LABEL: define void @nocaptureStrip -; FNATTRS-SAME: (ptr writeonly captures(none) [[P:%.*]]) #[[ATTR15:[0-9]+]] { +; FNATTRS-SAME: (ptr writeonly captures(none) [[P:%.*]]) #[[ATTR17:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: [[B:%.*]] = call ptr @llvm.strip.invariant.group.p0(ptr [[P]]) ; FNATTRS-NEXT: store i8 42, ptr [[B]], align 1 @@ -704,9 +740,9 @@ define void @nocaptureStrip(ptr %p) { ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; ATTRIBUTOR-LABEL: define void @nocaptureStrip -; ATTRIBUTOR-SAME: (ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR11:[0-9]+]] { +; ATTRIBUTOR-SAME: (ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR13:[0-9]+]] { ; ATTRIBUTOR-NEXT: entry: -; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.strip.invariant.group.p0(ptr [[P]]) #[[ATTR17]] +; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.strip.invariant.group.p0(ptr [[P]]) #[[ATTR19]] ; ATTRIBUTOR-NEXT: store i8 42, ptr [[B]], align 1 ; ATTRIBUTOR-NEXT: ret void ; @@ -728,7 +764,7 @@ define void @captureStrip(ptr %p) { ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) ; ATTRIBUTOR-LABEL: define void @captureStrip ; ATTRIBUTOR-SAME: (ptr nofree writeonly [[P:%.*]]) #[[ATTR1]] { -; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.strip.invariant.group.p0(ptr [[P]]) #[[ATTR17]] +; ATTRIBUTOR-NEXT: [[B:%.*]] = call ptr @llvm.strip.invariant.group.p0(ptr [[P]]) #[[ATTR19]] ; ATTRIBUTOR-NEXT: store ptr [[B]], ptr @g3, align 8 ; ATTRIBUTOR-NEXT: ret void ; @@ -870,14 +906,14 @@ define i1 @notInboundsGEPICmp(ptr %x) { define i1 @inboundsGEPICmpNullPointerDefined(ptr %x) null_pointer_is_valid { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; FNATTRS-LABEL: define i1 @inboundsGEPICmpNullPointerDefined -; FNATTRS-SAME: (ptr readnone captures(address) [[X:%.*]]) #[[ATTR16:[0-9]+]] { +; FNATTRS-SAME: (ptr readnone captures(address) [[X:%.*]]) #[[ATTR18:[0-9]+]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[X]], i32 5 ; FNATTRS-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null ; FNATTRS-NEXT: ret i1 [[TMP2]] ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; ATTRIBUTOR-LABEL: define i1 @inboundsGEPICmpNullPointerDefined -; ATTRIBUTOR-SAME: (ptr nofree readnone [[X:%.*]]) #[[ATTR12:[0-9]+]] { +; ATTRIBUTOR-SAME: (ptr nofree readnone [[X:%.*]]) #[[ATTR14:[0-9]+]] { ; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[X]], i32 5 ; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null ; ATTRIBUTOR-NEXT: ret i1 [[TMP2]] @@ -907,13 +943,13 @@ define i1 @nocaptureDereferenceableOrNullICmp(ptr dereferenceable_or_null(4) %x) define i1 @captureDereferenceableOrNullICmp(ptr dereferenceable_or_null(4) %x) null_pointer_is_valid { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; FNATTRS-LABEL: define noundef i1 @captureDereferenceableOrNullICmp -; FNATTRS-SAME: (ptr readnone captures(address_is_null) dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR16]] { +; FNATTRS-SAME: (ptr readnone captures(address_is_null) dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR18]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = icmp eq ptr [[X]], null ; FNATTRS-NEXT: ret i1 [[TMP1]] ; ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; ATTRIBUTOR-LABEL: define i1 @captureDereferenceableOrNullICmp -; ATTRIBUTOR-SAME: (ptr nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR12]] { +; ATTRIBUTOR-SAME: (ptr nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR14]] { ; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = icmp eq ptr [[X]], null ; ATTRIBUTOR-NEXT: ret i1 [[TMP1]] ; @@ -962,14 +998,14 @@ define void @recurse_fptr(ptr %f, ptr %p) { define void @readnone_indirec(ptr %f, ptr %p) { ; FNATTRS: Function Attrs: nofree nosync memory(none) ; FNATTRS-LABEL: define void @readnone_indirec -; FNATTRS-SAME: (ptr readonly captures(none) [[F:%.*]], ptr readnone [[P:%.*]]) #[[ATTR17:[0-9]+]] { -; FNATTRS-NEXT: call void [[F]](ptr [[P]]) #[[ATTR20:[0-9]+]] +; FNATTRS-SAME: (ptr readonly captures(none) [[F:%.*]], ptr readnone [[P:%.*]]) #[[ATTR19:[0-9]+]] { +; FNATTRS-NEXT: call void [[F]](ptr [[P]]) #[[ATTR22:[0-9]+]] ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nosync memory(none) ; ATTRIBUTOR-LABEL: define void @readnone_indirec -; ATTRIBUTOR-SAME: (ptr nofree nonnull readnone captures(none) [[F:%.*]], ptr readnone [[P:%.*]]) #[[ATTR13:[0-9]+]] { -; ATTRIBUTOR-NEXT: call void [[F]](ptr [[P]]) #[[ATTR20:[0-9]+]] +; ATTRIBUTOR-SAME: (ptr nofree nonnull readnone captures(none) [[F:%.*]], ptr readnone [[P:%.*]]) #[[ATTR15:[0-9]+]] { +; ATTRIBUTOR-NEXT: call void [[F]](ptr [[P]]) #[[ATTR23:[0-9]+]] ; ATTRIBUTOR-NEXT: ret void ; call void %f(ptr %p) readnone @@ -1062,7 +1098,7 @@ define ptr @captures_used_ret(ptr %p) { define ptr @scc_capture_via_ret(i1 %c, ptr %p) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) ; FNATTRS-LABEL: define ptr @scc_capture_via_ret -; FNATTRS-SAME: (i1 [[C:%.*]], ptr [[P:%.*]]) #[[ATTR10]] { +; FNATTRS-SAME: (i1 [[C:%.*]], ptr [[P:%.*]]) #[[ATTR12]] { ; FNATTRS-NEXT: br i1 [[C]], label [[IF:%.*]], label [[ELSE:%.*]] ; FNATTRS: if: ; FNATTRS-NEXT: [[C_NOT:%.*]] = xor i1 [[C]], true @@ -1074,11 +1110,11 @@ define ptr @scc_capture_via_ret(i1 %c, ptr %p) { ; ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind memory(write) ; ATTRIBUTOR-LABEL: define ptr @scc_capture_via_ret -; ATTRIBUTOR-SAME: (i1 [[C:%.*]], ptr nofree [[P:%.*]]) #[[ATTR8]] { +; ATTRIBUTOR-SAME: (i1 [[C:%.*]], ptr nofree [[P:%.*]]) #[[ATTR10]] { ; ATTRIBUTOR-NEXT: br i1 [[C]], label [[IF:%.*]], label [[ELSE:%.*]] ; ATTRIBUTOR: if: ; ATTRIBUTOR-NEXT: [[C_NOT:%.*]] = xor i1 [[C]], true -; ATTRIBUTOR-NEXT: [[RET:%.*]] = call ptr @scc_capture_via_ret(i1 [[C_NOT]], ptr nofree [[P]]) #[[ATTR8]] +; ATTRIBUTOR-NEXT: [[RET:%.*]] = call ptr @scc_capture_via_ret(i1 [[C_NOT]], ptr nofree [[P]]) #[[ATTR10]] ; ATTRIBUTOR-NEXT: store ptr [[RET]], ptr @g, align 8 ; ATTRIBUTOR-NEXT: ret ptr [[RET]] ; ATTRIBUTOR: else: diff --git a/llvm/test/Transforms/FunctionAttrs/nonnull.ll b/llvm/test/Transforms/FunctionAttrs/nonnull.ll index 94093568419af..483b560ece6c8 100644 --- a/llvm/test/Transforms/FunctionAttrs/nonnull.ll +++ b/llvm/test/Transforms/FunctionAttrs/nonnull.ll @@ -1029,10 +1029,10 @@ define ptr @g1() { ret ptr %c } -declare void @use_i32_ptr(ptr) readnone nounwind +declare void @use_i32_ptr(ptr) readnone nounwind willreturn define internal void @called_by_weak(ptr %a) { ; FNATTRS-LABEL: define internal void @called_by_weak( -; FNATTRS-SAME: ptr readnone captures(none) [[A:%.*]]) #[[ATTR1]] { +; FNATTRS-SAME: ptr readnone captures(none) [[A:%.*]]) #[[ATTR10:[0-9]+]] { ; FNATTRS-NEXT: call void @use_i32_ptr(ptr [[A]]) ; FNATTRS-NEXT: ret void ; @@ -1064,7 +1064,7 @@ define weak_odr void @weak_caller(ptr nonnull %a) { ; Expect nonnull define internal void @control(ptr dereferenceable(4) %a) { ; FNATTRS-LABEL: define internal void @control( -; FNATTRS-SAME: ptr readnone captures(none) dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +; FNATTRS-SAME: ptr readnone captures(none) dereferenceable(4) [[A:%.*]]) #[[ATTR10]] { ; FNATTRS-NEXT: call void @use_i32_ptr(ptr [[A]]) ; FNATTRS-NEXT: ret void ; @@ -1079,7 +1079,7 @@ define internal void @control(ptr dereferenceable(4) %a) { ; Avoid nonnull as we do not touch naked functions define internal void @naked(ptr dereferenceable(4) %a) naked { ; FNATTRS-LABEL: define internal void @naked( -; FNATTRS-SAME: ptr dereferenceable(4) [[A:%.*]]) #[[ATTR10:[0-9]+]] { +; FNATTRS-SAME: ptr dereferenceable(4) [[A:%.*]]) #[[ATTR11:[0-9]+]] { ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR-LABEL: define internal void @naked( @@ -1091,7 +1091,7 @@ define internal void @naked(ptr dereferenceable(4) %a) naked { ; Avoid nonnull as we do not touch optnone define internal void @optnone(ptr dereferenceable(4) %a) optnone noinline { ; FNATTRS-LABEL: define internal void @optnone( -; FNATTRS-SAME: ptr dereferenceable(4) [[A:%.*]]) #[[ATTR11:[0-9]+]] { +; FNATTRS-SAME: ptr dereferenceable(4) [[A:%.*]]) #[[ATTR12:[0-9]+]] { ; FNATTRS-NEXT: call void @use_i32_ptr(ptr [[A]]) ; FNATTRS-NEXT: ret void ; diff --git a/llvm/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll b/llvm/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll index fd98b71cb5562..e2e3603e9cb43 100644 --- a/llvm/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll +++ b/llvm/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll @@ -6,7 +6,7 @@ declare void @llvm.va_start(ptr) declare void @llvm.va_end(ptr) -define void @va_func(ptr readonly %b, ...) readonly nounwind { +define void @va_func(ptr readonly %b, ...) readonly nounwind willreturn { ; CHECK-LABEL: define void @va_func(ptr readonly captures(none) %b, ...) entry: %valist = alloca i8 From f0bb41dfada16d27aacef31d7103727b93813f27 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 28 Feb 2025 17:24:28 +0700 Subject: [PATCH 076/123] AMDGPU/GlobalISel: Restore disabled test (#129001) --- .../AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll | 887 +++++++++--------- 1 file changed, 421 insertions(+), 466 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll index 53f6c9543c3e3..074272f7bed86 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll @@ -5,7 +5,6 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX10_W64 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefix=GFX11_W32 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX11_W64 %s -; REQUIRES: do-not-run-me define float @v_div_fmas_f32(float %a, float %b, float %c, i1 %d) { ; GFX7-LABEL: v_div_fmas_f32: @@ -291,14 +290,14 @@ define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa -; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13 -; GFX7-NEXT: s_load_dword s6, s[2:3], 0x1c -; GFX7-NEXT: s_load_dword s7, s[2:3], 0x25 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0xa +; GFX7-NEXT: s_load_dword s3, s[4:5], 0x13 +; GFX7-NEXT: s_load_dword s6, s[4:5], 0x1c +; GFX7-NEXT: s_load_dword s7, s[4:5], 0x25 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: v_mov_b32_e32 v2, s6 ; GFX7-NEXT: s_and_b32 s2, 1, s7 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 @@ -311,17 +310,17 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f ; ; GFX8-LABEL: test_div_fmas_f32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28 -; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c -; GFX8-NEXT: s_load_dword s4, s[2:3], 0x70 -; GFX8-NEXT: s_load_dword s5, s[2:3], 0x94 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x28 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x4c +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x70 +; GFX8-NEXT: s_load_dword s3, s[4:5], 0x94 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: s_and_b32 s0, 1, s5 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-NEXT: s_and_b32 s0, 1, s3 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_nop 2 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -333,14 +332,14 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f ; GFX10_W32-LABEL: test_div_fmas_f32: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x4 -; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x94 -; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x4c -; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x70 -; GFX10_W32-NEXT: s_load_dword s7, s[2:3], 0x28 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x94 +; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x4c +; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x70 +; GFX10_W32-NEXT: s_load_dword s7, s[4:5], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s7, v0, v1 @@ -351,14 +350,14 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f ; GFX10_W64-LABEL: test_div_fmas_f32: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x4 -; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x94 -; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x4c -; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x70 -; GFX10_W64-NEXT: s_load_dword s7, s[2:3], 0x28 -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x94 +; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x4c +; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x70 +; GFX10_W64-NEXT: s_load_dword s7, s[4:5], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s7, v0, v1 @@ -369,40 +368,36 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f ; GFX11_W32-LABEL: test_div_fmas_f32: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x4 -; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x94 -; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x4c -; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x70 -; GFX11_W32-NEXT: s_load_b32 s7, s[2:3], 0x28 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x94 +; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x4c +; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x70 +; GFX11_W32-NEXT: s_load_b32 s7, s[4:5], 0x28 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W32-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W32-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s6 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s7, v0, v1 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x4 -; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x94 -; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x4c -; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x70 -; GFX11_W64-NEXT: s_load_b32 s7, s[2:3], 0x28 -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x94 +; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x4c +; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x70 +; GFX11_W64-NEXT: s_load_b32 s7, s[4:5], 0x28 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, s6 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s7, v0, v1 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d) store float %result, ptr addrspace(1) %out, align 4 @@ -412,33 +407,33 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[2:3], 0x13 -; GFX7-NEXT: s_load_dword s5, s[2:3], 0x1c -; GFX7-NEXT: s_load_dword s6, s[2:3], 0x25 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x13 +; GFX7-NEXT: s_load_dword s3, s[4:5], 0x1c +; GFX7-NEXT: s_load_dword s6, s[4:5], 0x25 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: s_and_b32 s2, 1, s6 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_nop 2 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_nop 1 ; GFX7-NEXT: v_div_fmas_f32 v0, 1.0, v0, v1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x4c -; GFX8-NEXT: s_load_dword s1, s[2:3], 0x70 -; GFX8-NEXT: s_load_dword s4, s[2:3], 0x94 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x4c +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x70 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x94 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: s_and_b32 s0, 1, s4 +; GFX8-NEXT: s_and_b32 s0, 1, s2 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_nop 2 ; GFX8-NEXT: v_div_fmas_f32 v2, 1.0, v0, v1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -450,14 +445,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x94 -; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x70 -; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x4c -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x94 +; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x70 +; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x4c +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] @@ -466,14 +461,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x94 -; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x70 -; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x4c -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x94 +; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x70 +; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x4c +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] @@ -482,37 +477,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x3 -; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x94 -; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x70 -; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x4c -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x94 +; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x4c +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x3 -; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x94 -; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x70 -; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x4c -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x94 +; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x4c +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) store float %result, ptr addrspace(1) %out, align 4 @@ -522,33 +513,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, float %a, float %b, float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[2:3], 0x2 -; GFX7-NEXT: s_load_dword s5, s[2:3], 0x4 -; GFX7-NEXT: s_load_dword s6, s[2:3], 0xd -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 +; GFX7-NEXT: s_load_dword s3, s[4:5], 0x4 +; GFX7-NEXT: s_load_dword s6, s[4:5], 0xd +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: s_and_b32 s2, 1, s6 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_nop 2 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_nop 1 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, 1.0, v1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x8 -; GFX8-NEXT: s_load_dword s1, s[2:3], 0x10 -; GFX8-NEXT: s_load_dword s4, s[2:3], 0x34 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x10 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: s_and_b32 s0, 1, s4 +; GFX8-NEXT: s_and_b32 s0, 1, s2 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_nop 2 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, 1.0, v1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -560,14 +551,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x34 -; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x10 -; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x8 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x34 +; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x10 +; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x8 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] @@ -576,14 +567,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x34 -; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x10 -; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x8 -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x34 +; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x10 +; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x8 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] @@ -592,37 +583,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x3 -; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x34 -; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x10 -; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x8 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x34 +; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x10 +; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x8 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x3 -; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x34 -; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x10 -; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x8 -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x34 +; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x10 +; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x8 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) store float %result, ptr addrspace(1) %out, align 4 @@ -632,33 +619,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa -; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13 -; GFX7-NEXT: s_load_dword s6, s[2:3], 0x25 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0xa +; GFX7-NEXT: s_load_dword s3, s[4:5], 0x13 +; GFX7-NEXT: s_load_dword s6, s[4:5], 0x25 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: s_and_b32 s2, 1, s6 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_nop 2 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_nop 1 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, 1.0 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28 -; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c -; GFX8-NEXT: s_load_dword s4, s[2:3], 0x94 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x28 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x4c +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x94 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: s_and_b32 s0, 1, s4 +; GFX8-NEXT: s_and_b32 s0, 1, s2 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_nop 2 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, 1.0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -670,14 +657,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x94 -; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x4c -; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x28 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x94 +; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x4c +; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] @@ -686,14 +673,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x94 -; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x4c -; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x28 -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x94 +; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x4c +; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] @@ -702,37 +689,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, ; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x3 -; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x94 -; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x4c -; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x28 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x94 +; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x4c +; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x28 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W32-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x3 -; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x94 -; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x4c -; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x28 -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x94 +; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x4c +; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x28 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: s_and_b32 s2, 1, s4 -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 +; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3 ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) store float %result, ptr addrspace(1) %out, align 4 @@ -742,83 +725,83 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, double %b, double %c, i1 %d) { ; GFX7-LABEL: test_div_fmas_f64: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s0, s[2:3], 0x8 +; GFX7-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s0, s[4:5], 0x8 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s6 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 -; GFX7-NEXT: v_mov_b32_e32 v4, s10 +; GFX7-NEXT: v_mov_b32_e32 v0, s10 +; GFX7-NEXT: v_mov_b32_e32 v2, s12 +; GFX7-NEXT: v_mov_b32_e32 v4, s14 ; GFX7-NEXT: s_and_b32 s0, 1, s0 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: v_mov_b32_e32 v3, s9 -; GFX7-NEXT: v_mov_b32_e32 v5, s11 +; GFX7-NEXT: v_mov_b32_e32 v1, s11 +; GFX7-NEXT: v_mov_b32_e32 v3, s13 +; GFX7-NEXT: v_mov_b32_e32 v5, s15 ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX7-NEXT: s_mov_b32 s6, -1 -; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b32 s10, -1 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 ; GFX7-NEXT: s_nop 1 ; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] -; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f64: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x20 +; GFX8-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x20 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, s6 -; GFX8-NEXT: v_mov_b32_e32 v2, s8 -; GFX8-NEXT: v_mov_b32_e32 v4, s10 +; GFX8-NEXT: v_mov_b32_e32 v0, s10 +; GFX8-NEXT: v_mov_b32_e32 v2, s12 +; GFX8-NEXT: v_mov_b32_e32 v4, s14 ; GFX8-NEXT: s_and_b32 s0, 1, s0 -; GFX8-NEXT: v_mov_b32_e32 v1, s7 -; GFX8-NEXT: v_mov_b32_e32 v3, s9 -; GFX8-NEXT: v_mov_b32_e32 v5, s11 +; GFX8-NEXT: v_mov_b32_e32 v1, s11 +; GFX8-NEXT: v_mov_b32_e32 v3, s13 +; GFX8-NEXT: v_mov_b32_e32 v5, s15 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 ; GFX8-NEXT: s_nop 3 ; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_mov_b32_e32 v3, s5 +; GFX8-NEXT: v_mov_b32_e32 v2, s8 +; GFX8-NEXT: v_mov_b32_e32 v3, s9 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8-NEXT: s_endpgm ; ; GFX10_W32-LABEL: test_div_fmas_f64: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x1 -; GFX10_W32-NEXT: s_load_dword s0, s[2:3], 0x20 -; GFX10_W32-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s0, s[4:5], 0x20 +; GFX10_W32-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_and_b32 s0, 1, s0 -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s8 -; GFX10_W32-NEXT: v_mov_b32_e32 v2, s10 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s12 +; GFX10_W32-NEXT: v_mov_b32_e32 v2, s14 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 -; GFX10_W32-NEXT: v_mov_b32_e32 v1, s9 -; GFX10_W32-NEXT: v_mov_b32_e32 v3, s11 -; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], s[6:7], v[0:1], v[2:3] +; GFX10_W32-NEXT: v_mov_b32_e32 v1, s13 +; GFX10_W32-NEXT: v_mov_b32_e32 v3, s15 +; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], s[10:11], v[0:1], v[2:3] ; GFX10_W32-NEXT: v_mov_b32_e32 v2, 0 -; GFX10_W32-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] +; GFX10_W32-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX10_W32-NEXT: s_endpgm ; ; GFX10_W64-LABEL: test_div_fmas_f64: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x1 -; GFX10_W64-NEXT: s_load_dword s0, s[2:3], 0x20 -; GFX10_W64-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s0, s[4:5], 0x20 +; GFX10_W64-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_and_b32 s0, 1, s0 -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s8 -; GFX10_W64-NEXT: v_mov_b32_e32 v2, s10 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s12 +; GFX10_W64-NEXT: v_mov_b32_e32 v2, s14 ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX10_W64-NEXT: v_mov_b32_e32 v1, s9 -; GFX10_W64-NEXT: v_mov_b32_e32 v3, s11 -; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], s[6:7], v[0:1], v[2:3] +; GFX10_W64-NEXT: v_mov_b32_e32 v1, s13 +; GFX10_W64-NEXT: v_mov_b32_e32 v3, s15 +; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], s[10:11], v[0:1], v[2:3] ; GFX10_W64-NEXT: v_mov_b32_e32 v2, 0 -; GFX10_W64-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] +; GFX10_W64-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX10_W64-NEXT: s_endpgm ; ; GFX11_W32-LABEL: test_div_fmas_f64: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x1 -; GFX11_W32-NEXT: s_load_b32 s8, s[2:3], 0x20 -; GFX11_W32-NEXT: s_load_b256 s[0:7], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s8, s[4:5], 0x20 +; GFX11_W32-NEXT: s_load_b256 s[0:7], s[4:5], 0x0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W32-NEXT: s_and_b32 s8, 1, s8 ; GFX11_W32-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 @@ -827,15 +810,13 @@ define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, d ; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3] ; GFX11_W32-NEXT: v_mov_b32_e32 v2, 0 ; GFX11_W32-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f64: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x1 -; GFX11_W64-NEXT: s_load_b32 s8, s[2:3], 0x20 -; GFX11_W64-NEXT: s_load_b256 s[0:7], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s8, s[4:5], 0x20 +; GFX11_W64-NEXT: s_load_b256 s[0:7], s[4:5], 0x0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W64-NEXT: s_and_b32 s8, 1, s8 ; GFX11_W64-NEXT: v_mov_b32_e32 v0, s4 @@ -846,8 +827,6 @@ define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, d ; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3] ; GFX11_W64-NEXT: v_mov_b32_e32 v2, 0 ; GFX11_W64-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d) store double %result, ptr addrspace(1) %out, align 8 @@ -857,35 +836,35 @@ define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, d define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %i) { ; GFX7-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x2 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_cmp_eq_u32 s7, 0 -; GFX7-NEXT: s_cselect_b32 s2, 1, 0 -; GFX7-NEXT: s_and_b32 s2, 1, s2 -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_mov_b32_e32 v2, s6 -; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 -; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_nop 2 +; GFX7-NEXT: s_cmp_eq_u32 s3, 0 +; GFX7-NEXT: s_cselect_b32 s3, 1, 0 +; GFX7-NEXT: v_mov_b32_e32 v0, s0 +; GFX7-NEXT: s_and_b32 s0, 1, s3 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 +; GFX7-NEXT: s_nop 3 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2 -; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x8 +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x8 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_cmp_eq_u32 s7, 0 -; GFX8-NEXT: s_cselect_b32 s0, 1, 0 -; GFX8-NEXT: s_and_b32 s0, 1, s0 -; GFX8-NEXT: v_mov_b32_e32 v0, s4 -; GFX8-NEXT: v_mov_b32_e32 v1, s5 -; GFX8-NEXT: v_mov_b32_e32 v2, s6 +; GFX8-NEXT: s_cmp_eq_u32 s3, 0 +; GFX8-NEXT: s_cselect_b32 s3, 1, 0 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: s_and_b32 s0, 1, s3 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_nop 2 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -897,72 +876,68 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, ; GFX10_W32-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x1 -; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x8 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x8 +; GFX10_W32-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: s_cmp_eq_u32 s7, 0 -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 -; GFX10_W32-NEXT: s_cselect_b32 s2, 1, 0 -; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6 -; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 -; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 -; GFX10_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1 +; GFX10_W32-NEXT: s_cmp_eq_u32 s3, 0 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s1 +; GFX10_W32-NEXT: s_cselect_b32 s3, 1, 0 +; GFX10_W32-NEXT: v_mov_b32_e32 v1, s2 +; GFX10_W32-NEXT: s_and_b32 s3, 1, s3 +; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s3 +; GFX10_W32-NEXT: v_div_fmas_f32 v0, s0, v0, v1 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 -; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10_W32-NEXT: global_store_dword v1, v0, s[6:7] ; GFX10_W32-NEXT: s_endpgm ; ; GFX10_W64-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x1 -; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x8 -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x8 +; GFX10_W64-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: s_cmp_eq_u32 s7, 0 -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 -; GFX10_W64-NEXT: s_cselect_b32 s2, 1, 0 -; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6 -; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 -; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 -; GFX10_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1 +; GFX10_W64-NEXT: s_cmp_eq_u32 s3, 0 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s1 +; GFX10_W64-NEXT: s_cselect_b32 s3, 1, 0 +; GFX10_W64-NEXT: v_mov_b32_e32 v1, s2 +; GFX10_W64-NEXT: s_and_b32 s3, 1, s3 +; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s3 +; GFX10_W64-NEXT: v_div_fmas_f32 v0, s0, v0, v1 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 -; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10_W64-NEXT: global_store_dword v1, v0, s[6:7] ; GFX10_W64-NEXT: s_endpgm ; ; GFX11_W32-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x1 -; GFX11_W32-NEXT: s_load_b128 s[4:7], s[2:3], 0x8 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b128 s[0:3], s[4:5], 0x8 +; GFX11_W32-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: s_cmp_eq_u32 s7, 0 -; GFX11_W32-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11_W32-NEXT: s_cselect_b32 s2, 1, 0 -; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 -; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 -; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1 +; GFX11_W32-NEXT: s_cmp_eq_u32 s3, 0 +; GFX11_W32-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2 +; GFX11_W32-NEXT: s_cselect_b32 s3, 1, 0 +; GFX11_W32-NEXT: s_and_b32 s3, 1, s3 +; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s3 +; GFX11_W32-NEXT: v_div_fmas_f32 v0, s0, v0, v1 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 -; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11_W32-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x1 -; GFX11_W64-NEXT: s_load_b128 s[4:7], s[2:3], 0x8 -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b128 s[0:3], s[4:5], 0x8 +; GFX11_W64-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: s_cmp_eq_u32 s7, 0 -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 -; GFX11_W64-NEXT: s_cselect_b32 s2, 1, 0 -; GFX11_W64-NEXT: v_mov_b32_e32 v1, s6 -; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 -; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 -; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1 +; GFX11_W64-NEXT: s_cmp_eq_u32 s3, 0 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s1 +; GFX11_W64-NEXT: s_cselect_b32 s3, 1, 0 +; GFX11_W64-NEXT: v_mov_b32_e32 v1, s2 +; GFX11_W64-NEXT: s_and_b32 s3, 1, s3 +; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s3 +; GFX11_W64-NEXT: v_div_fmas_f32 v0, s0, v0, v1 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 -; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11_W64-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11_W64-NEXT: s_endpgm %cmp = icmp eq i32 %i, 0 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) @@ -973,14 +948,14 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { ; GFX7-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa -; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13 -; GFX7-NEXT: s_load_dword s6, s[2:3], 0x1c -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0xa +; GFX7-NEXT: s_load_dword s3, s[4:5], 0x13 +; GFX7-NEXT: s_load_dword s6, s[4:5], 0x1c +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_mov_b64 vcc, 0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: v_mov_b32_e32 v2, s6 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -990,16 +965,16 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace ; ; GFX8-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28 -; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c -; GFX8-NEXT: s_load_dword s4, s[2:3], 0x70 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x28 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x4c +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x70 ; GFX8-NEXT: s_mov_b64 vcc, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 @@ -1009,14 +984,14 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace ; GFX10_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x4c -; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x70 -; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x28 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x4c +; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x70 +; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4 -; GFX10_W32-NEXT: v_mov_b32_e32 v1, s5 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s2 +; GFX10_W32-NEXT: v_mov_b32_e32 v1, s3 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] @@ -1025,14 +1000,14 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace ; GFX10_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x4c -; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x70 -; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x28 -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x4c +; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x70 +; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: s_mov_b64 vcc, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4 -; GFX10_W64-NEXT: v_mov_b32_e32 v1, s5 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s2 +; GFX10_W64-NEXT: v_mov_b32_e32 v1, s3 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] @@ -1041,36 +1016,32 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace ; GFX11_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x3 -; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x4c -; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x70 -; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x28 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x4c +; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x28 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: s_mov_b32 vcc_lo, 0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX11_W32-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x3 -; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x4c -; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x70 -; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x28 -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x4c +; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x28 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: s_mov_b64 vcc, 0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s4 -; GFX11_W64-NEXT: v_mov_b32_e32 v1, s5 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s2 +; GFX11_W64-NEXT: v_mov_b32_e32 v1, s3 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false) store float %result, ptr addrspace(1) %out, align 4 @@ -1080,14 +1051,14 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { ; GFX7-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa -; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13 -; GFX7-NEXT: s_load_dword s6, s[2:3], 0x1c -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0xa +; GFX7-NEXT: s_load_dword s3, s[4:5], 0x13 +; GFX7-NEXT: s_load_dword s6, s[4:5], 0x1c +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_mov_b64 vcc, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: v_mov_b32_e32 v2, s6 ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -1097,16 +1068,16 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace( ; ; GFX8-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28 -; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c -; GFX8-NEXT: s_load_dword s4, s[2:3], 0x70 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x28 +; GFX8-NEXT: s_load_dword s1, s[4:5], 0x4c +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x70 ; GFX8-NEXT: s_mov_b64 vcc, -1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 @@ -1116,14 +1087,14 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace( ; GFX10_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x4c -; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x70 -; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x28 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x4c +; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x70 +; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, -1 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4 -; GFX10_W32-NEXT: v_mov_b32_e32 v1, s5 +; GFX10_W32-NEXT: v_mov_b32_e32 v0, s2 +; GFX10_W32-NEXT: v_mov_b32_e32 v1, s3 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] @@ -1132,14 +1103,14 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace( ; GFX10_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x4c -; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x70 -; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x28 -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x4c +; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x70 +; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: s_mov_b64 vcc, -1 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4 -; GFX10_W64-NEXT: v_mov_b32_e32 v1, s5 +; GFX10_W64-NEXT: v_mov_b32_e32 v0, s2 +; GFX10_W64-NEXT: v_mov_b32_e32 v1, s3 ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] @@ -1148,36 +1119,32 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace( ; GFX11_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX11_W32: ; %bb.0: ; GFX11_W32-NEXT: s_clause 0x3 -; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x4c -; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x70 -; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x28 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x4c +; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x28 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: s_mov_b32 vcc_lo, -1 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX11_W32-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX11_W64: ; %bb.0: ; GFX11_W64-NEXT: s_clause 0x3 -; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x4c -; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x70 -; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x28 -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x4c +; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x28 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: s_mov_b64 vcc, -1 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: v_mov_b32_e32 v0, s4 -; GFX11_W64-NEXT: v_mov_b32_e32 v1, s5 +; GFX11_W64-NEXT: v_mov_b32_e32 v0, s2 +; GFX11_W64-NEXT: v_mov_b32_e32 v1, s3 ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true) store float %result, ptr addrspace(1) %out, align 4 @@ -1187,40 +1154,40 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace( define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], i32 %d) { ; GFX7-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s0, s[2:3], 0xc -; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s4, s[4:5], 0xc +; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_mov_b64 s[4:5], s[10:11] -; GFX7-NEXT: buffer_load_dword v3, v[1:2], s[4:7], 0 addr64 glc +; GFX7-NEXT: s_mov_b64 s[0:1], s[10:11] +; GFX7-NEXT: buffer_load_dword v3, v[1:2], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: buffer_load_dword v4, v[1:2], s[4:7], 0 addr64 offset:4 glc +; GFX7-NEXT: buffer_load_dword v4, v[1:2], s[0:3], 0 addr64 offset:4 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: buffer_load_dword v1, v[1:2], s[4:7], 0 addr64 offset:8 glc +; GFX7-NEXT: buffer_load_dword v1, v[1:2], s[0:3], 0 addr64 offset:8 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: s_cmp_lg_u32 s0, 0 +; GFX7-NEXT: s_cmp_lg_u32 s4, 0 ; GFX7-NEXT: s_cselect_b32 s0, 1, 0 ; GFX7-NEXT: s_and_b32 s0, 1, s0 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX7-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_and_b64 vcc, vcc, s[0:1] -; GFX7-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX7-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX7-NEXT: v_div_fmas_f32 v0, v3, v4, v1 ; GFX7-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:8 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 -; GFX8-NEXT: s_load_dword s2, s[2:3], 0x30 +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s4, s[4:5], 0x30 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v1, s6 -; GFX8-NEXT: v_mov_b32_e32 v2, s7 +; GFX8-NEXT: v_mov_b32_e32 v1, s2 +; GFX8-NEXT: v_mov_b32_e32 v2, s3 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 4, v1 @@ -1233,9 +1200,9 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: flat_load_dword v3, v[5:6] glc ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: s_add_u32 s0, s4, 8 -; GFX8-NEXT: s_addc_u32 s1, s5, 0 -; GFX8-NEXT: s_cmp_lg_u32 s2, 0 +; GFX8-NEXT: s_add_u32 s0, s0, 8 +; GFX8-NEXT: s_addc_u32 s1, s1, 0 +; GFX8-NEXT: s_cmp_lg_u32 s4, 0 ; GFX8-NEXT: s_cselect_b32 s2, 1, 0 ; GFX8-NEXT: s_and_b32 s2, 1, s2 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 @@ -1250,104 +1217,96 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1 ; ; GFX10_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX10_W32: ; %bb.0: -; GFX10_W32-NEXT: s_clause 0x1 -; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 -; GFX10_W32-NEXT: s_load_dword s0, s[2:3], 0x30 +; GFX10_W32-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX10_W32-NEXT: s_load_dword s4, s[4:5], 0x30 ; GFX10_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: global_load_dword v2, v1, s[6:7] glc dlc +; GFX10_W32-NEXT: global_load_dword v2, v1, s[2:3] glc dlc ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-NEXT: global_load_dword v3, v1, s[6:7] offset:4 glc dlc +; GFX10_W32-NEXT: global_load_dword v3, v1, s[2:3] offset:4 glc dlc ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-NEXT: global_load_dword v4, v1, s[6:7] offset:8 glc dlc +; GFX10_W32-NEXT: global_load_dword v4, v1, s[2:3] offset:8 glc dlc ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-NEXT: s_cmp_lg_u32 s0, 0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 -; GFX10_W32-NEXT: s_cselect_b32 s0, 1, 0 -; GFX10_W32-NEXT: s_and_b32 s0, 1, s0 -; GFX10_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 -; GFX10_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 +; GFX10_W32-NEXT: s_cmp_lg_u32 s4, 0 +; GFX10_W32-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W32-NEXT: v_cmp_ne_u32_e64 s2, 0, s2 +; GFX10_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s2 ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v2, v3, v4 -; GFX10_W32-NEXT: global_store_dword v1, v0, s[4:5] offset:8 +; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] offset:8 ; GFX10_W32-NEXT: s_endpgm ; ; GFX10_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX10_W64: ; %bb.0: -; GFX10_W64-NEXT: s_clause 0x1 -; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 -; GFX10_W64-NEXT: s_load_dword s0, s[2:3], 0x30 +; GFX10_W64-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX10_W64-NEXT: s_load_dword s4, s[4:5], 0x30 ; GFX10_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W64-NEXT: global_load_dword v2, v1, s[6:7] glc dlc +; GFX10_W64-NEXT: global_load_dword v2, v1, s[2:3] glc dlc ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-NEXT: global_load_dword v3, v1, s[6:7] offset:4 glc dlc +; GFX10_W64-NEXT: global_load_dword v3, v1, s[2:3] offset:4 glc dlc ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-NEXT: global_load_dword v4, v1, s[6:7] offset:8 glc dlc +; GFX10_W64-NEXT: global_load_dword v4, v1, s[2:3] offset:8 glc dlc ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-NEXT: s_cmp_lg_u32 s0, 0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 -; GFX10_W64-NEXT: s_cselect_b32 s0, 1, 0 -; GFX10_W64-NEXT: s_and_b32 s0, 1, s0 -; GFX10_W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; GFX10_W64-NEXT: s_and_b64 vcc, vcc, s[0:1] +; GFX10_W64-NEXT: s_cmp_lg_u32 s4, 0 +; GFX10_W64-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX10_W64-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2 +; GFX10_W64-NEXT: s_and_b64 vcc, vcc, s[2:3] ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v2, v3, v4 -; GFX10_W64-NEXT: global_store_dword v1, v0, s[4:5] offset:8 +; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] offset:8 ; GFX10_W64-NEXT: s_endpgm ; ; GFX11_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX11_W32: ; %bb.0: -; GFX11_W32-NEXT: s_clause 0x1 -; GFX11_W32-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 -; GFX11_W32-NEXT: s_load_b32 s0, s[2:3], 0x30 +; GFX11_W32-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GFX11_W32-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11_W32-NEXT: s_load_b32 s4, s[4:5], 0x30 ; GFX11_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX11_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: global_load_b32 v2, v1, s[6:7] glc dlc +; GFX11_W32-NEXT: global_load_b32 v2, v1, s[2:3] glc dlc ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) -; GFX11_W32-NEXT: global_load_b32 v3, v1, s[6:7] offset:4 glc dlc +; GFX11_W32-NEXT: global_load_b32 v3, v1, s[2:3] offset:4 glc dlc ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) -; GFX11_W32-NEXT: global_load_b32 v1, v1, s[6:7] offset:8 glc dlc +; GFX11_W32-NEXT: global_load_b32 v1, v1, s[2:3] offset:8 glc dlc ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) -; GFX11_W32-NEXT: s_cmp_lg_u32 s0, 0 -; GFX11_W32-NEXT: s_cselect_b32 s0, 1, 0 -; GFX11_W32-NEXT: s_and_b32 s0, 1, s0 -; GFX11_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 -; GFX11_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 +; GFX11_W32-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11_W32-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W32-NEXT: v_cmp_ne_u32_e64 s2, 0, s2 +; GFX11_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s2 ; GFX11_W32-NEXT: v_div_fmas_f32 v0, v2, v3, v1 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 -; GFX11_W32-NEXT: global_store_b32 v1, v0, s[4:5] offset:8 -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] offset:8 ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX11_W64: ; %bb.0: -; GFX11_W64-NEXT: s_clause 0x1 -; GFX11_W64-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 -; GFX11_W64-NEXT: s_load_b32 s0, s[2:3], 0x30 +; GFX11_W64-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GFX11_W64-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11_W64-NEXT: s_load_b32 s4, s[4:5], 0x30 ; GFX11_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX11_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W64-NEXT: global_load_b32 v2, v1, s[6:7] glc dlc +; GFX11_W64-NEXT: global_load_b32 v2, v1, s[2:3] glc dlc ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) -; GFX11_W64-NEXT: global_load_b32 v3, v1, s[6:7] offset:4 glc dlc +; GFX11_W64-NEXT: global_load_b32 v3, v1, s[2:3] offset:4 glc dlc ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) -; GFX11_W64-NEXT: global_load_b32 v1, v1, s[6:7] offset:8 glc dlc +; GFX11_W64-NEXT: global_load_b32 v1, v1, s[2:3] offset:8 glc dlc ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) -; GFX11_W64-NEXT: s_cmp_lg_u32 s0, 0 -; GFX11_W64-NEXT: s_cselect_b32 s0, 1, 0 -; GFX11_W64-NEXT: s_and_b32 s0, 1, s0 -; GFX11_W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; GFX11_W64-NEXT: s_and_b64 vcc, vcc, s[0:1] +; GFX11_W64-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11_W64-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 +; GFX11_W64-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2 +; GFX11_W64-NEXT: s_and_b64 vcc, vcc, s[2:3] ; GFX11_W64-NEXT: v_div_fmas_f32 v0, v2, v3, v1 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 -; GFX11_W64-NEXT: global_store_b32 v1, v0, s[4:5] offset:8 -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] offset:8 ; GFX11_W64-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid @@ -1371,19 +1330,19 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1 define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [8 x i32], ptr addrspace(1) %in, [8 x i32], ptr addrspace(1) %dummy) { ; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: s_mov_b32 s6, 0 -; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, 0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64 +; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[0:3], 0 addr64 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 ; GFX7-NEXT: s_mov_b64 vcc, 0 -; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[0:1] +; GFX7-NEXT: s_and_saveexec_b64 s[6:7], s[0:1] ; GFX7-NEXT: s_cbranch_execz .LBB13_2 ; GFX7-NEXT: ; %bb.1: ; %bb -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x14 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x14 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -1395,18 +1354,18 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX7-NEXT: s_and_b64 s[0:1], exec, s[0:1] ; GFX7-NEXT: s_or_b64 vcc, s[8:9], s[0:1] ; GFX7-NEXT: .LBB13_2: ; %exit -; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_div_fmas_f32 v0, v1, v2, v3 -; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 +; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x28 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, s1 @@ -1416,10 +1375,10 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX8-NEXT: flat_load_dwordx3 v[1:3], v[1:2] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 ; GFX8-NEXT: s_mov_b64 vcc, 0 -; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[0:1] +; GFX8-NEXT: s_and_saveexec_b64 s[2:3], s[0:1] ; GFX8-NEXT: s_cbranch_execz .LBB13_2 ; GFX8-NEXT: ; %bb.1: ; %bb -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x50 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x50 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -1431,10 +1390,10 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX8-NEXT: s_and_b64 s[0:1], exec, s[0:1] ; GFX8-NEXT: s_or_b64 vcc, s[6:7], s[0:1] ; GFX8-NEXT: .LBB13_2: ; %exit -; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_add_u32 s0, s0, 8 ; GFX8-NEXT: s_addc_u32 s1, s1, 0 @@ -1445,7 +1404,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; ; GFX10_W32-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX10_W32: ; %bb.0: ; %entry -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28 ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) @@ -1454,20 +1413,20 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX10_W32-NEXT: s_and_saveexec_b32 s1, s0 ; GFX10_W32-NEXT: s_cbranch_execz .LBB13_2 ; GFX10_W32-NEXT: ; %bb.1: ; %bb -; GFX10_W32-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x50 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x50 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10_W32-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX10_W32-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_cmp_lg_u32 s0, 0 ; GFX10_W32-NEXT: s_cselect_b32 s0, 1, 0 -; GFX10_W32-NEXT: s_andn2_b32 s4, 0, exec_lo +; GFX10_W32-NEXT: s_andn2_b32 s2, 0, exec_lo ; GFX10_W32-NEXT: s_and_b32 s0, 1, s0 ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX10_W32-NEXT: s_and_b32 s0, exec_lo, s0 -; GFX10_W32-NEXT: s_or_b32 vcc_lo, s4, s0 +; GFX10_W32-NEXT: s_or_b32 vcc_lo, s2, s0 ; GFX10_W32-NEXT: .LBB13_2: ; %exit ; GFX10_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W32-NEXT: s_waitcnt vmcnt(0) ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v1, v2, v3 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 @@ -1477,16 +1436,16 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; ; GFX10_W64-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX10_W64: ; %bb.0: ; %entry -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28 ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX10_W64-NEXT: s_mov_b64 vcc, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: global_load_dwordx3 v[1:3], v1, s[0:1] ; GFX10_W64-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 -; GFX10_W64-NEXT: s_and_saveexec_b64 s[4:5], s[0:1] +; GFX10_W64-NEXT: s_and_saveexec_b64 s[2:3], s[0:1] ; GFX10_W64-NEXT: s_cbranch_execz .LBB13_2 ; GFX10_W64-NEXT: ; %bb.1: ; %bb -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x50 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x50 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) @@ -1498,8 +1457,8 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX10_W64-NEXT: s_and_b64 s[0:1], exec, s[0:1] ; GFX10_W64-NEXT: s_or_b64 vcc, s[6:7], s[0:1] ; GFX10_W64-NEXT: .LBB13_2: ; %exit -; GFX10_W64-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10_W64-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10_W64-NEXT: s_waitcnt vmcnt(0) ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v1, v2, v3 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 @@ -1509,7 +1468,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; ; GFX11_W32-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX11_W32: ; %bb.0: ; %entry -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x28 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x28 ; GFX11_W32-NEXT: v_and_b32_e32 v3, 0x3ff, v0 ; GFX11_W32-NEXT: s_mov_b32 vcc_lo, 0 ; GFX11_W32-NEXT: v_lshlrev_b32_e32 v0, 2, v3 @@ -1519,42 +1478,40 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX11_W32-NEXT: v_cmpx_eq_u32_e32 0, v3 ; GFX11_W32-NEXT: s_cbranch_execz .LBB13_2 ; GFX11_W32-NEXT: ; %bb.1: ; %bb -; GFX11_W32-NEXT: s_load_b64 s[4:5], s[2:3], 0x50 +; GFX11_W32-NEXT: s_load_b64 s[2:3], s[4:5], 0x50 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX11_W32-NEXT: s_load_b32 s0, s[4:5], 0x0 +; GFX11_W32-NEXT: s_load_b32 s0, s[2:3], 0x0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W32-NEXT: s_cmp_lg_u32 s0, 0 ; GFX11_W32-NEXT: s_cselect_b32 s0, 1, 0 -; GFX11_W32-NEXT: s_and_not1_b32 s4, 0, exec_lo +; GFX11_W32-NEXT: s_and_not1_b32 s2, 0, exec_lo ; GFX11_W32-NEXT: s_and_b32 s0, 1, s0 ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX11_W32-NEXT: s_and_b32 s0, exec_lo, s0 -; GFX11_W32-NEXT: s_or_b32 vcc_lo, s4, s0 +; GFX11_W32-NEXT: s_or_b32 vcc_lo, s2, s0 ; GFX11_W32-NEXT: .LBB13_2: ; %exit ; GFX11_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W32-NEXT: s_waitcnt vmcnt(0) ; GFX11_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2 ; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] offset:8 -; GFX11_W32-NEXT: s_nop 0 -; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W32-NEXT: s_endpgm ; ; GFX11_W64-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX11_W64: ; %bb.0: ; %entry -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x28 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x28 ; GFX11_W64-NEXT: v_and_b32_e32 v3, 0x3ff, v0 ; GFX11_W64-NEXT: s_mov_b64 vcc, 0 -; GFX11_W64-NEXT: s_mov_b64 s[4:5], exec +; GFX11_W64-NEXT: s_mov_b64 s[2:3], exec ; GFX11_W64-NEXT: v_lshlrev_b32_e32 v0, 2, v3 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W64-NEXT: global_load_b96 v[0:2], v0, s[0:1] ; GFX11_W64-NEXT: v_cmpx_eq_u32_e32 0, v3 ; GFX11_W64-NEXT: s_cbranch_execz .LBB13_2 ; GFX11_W64-NEXT: ; %bb.1: ; %bb -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x50 +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x50 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W64-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) @@ -1566,15 +1523,13 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [ ; GFX11_W64-NEXT: s_and_b64 s[0:1], exec, s[0:1] ; GFX11_W64-NEXT: s_or_b64 vcc, s[6:7], s[0:1] ; GFX11_W64-NEXT: .LBB13_2: ; %exit -; GFX11_W64-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 +; GFX11_W64-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; GFX11_W64-NEXT: s_waitcnt vmcnt(0) ; GFX11_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2 ; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] offset:8 -; GFX11_W64-NEXT: s_nop 0 -; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11_W64-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() From 5c49cd034cbcc71e84cb77c781c6f6e00dc73a30 Mon Sep 17 00:00:00 2001 From: Meng Zhuo Date: Fri, 28 Feb 2025 18:27:48 +0800 Subject: [PATCH 077/123] [tsan][RISCV] Add Go support for linux/riscv64 (#127295) This is needed to support race detector in Golang. See also: https://github.com/golang/go/issues/64345 --- compiler-rt/lib/tsan/go/buildgo.sh | 6 ++++ compiler-rt/lib/tsan/rtl/tsan_platform.h | 30 +++++++++++++++++++ .../lib/tsan/rtl/tsan_platform_linux.cpp | 6 ++++ 3 files changed, 42 insertions(+) diff --git a/compiler-rt/lib/tsan/go/buildgo.sh b/compiler-rt/lib/tsan/go/buildgo.sh index 6871b36c3f510..d9e56402ad48f 100755 --- a/compiler-rt/lib/tsan/go/buildgo.sh +++ b/compiler-rt/lib/tsan/go/buildgo.sh @@ -112,6 +112,12 @@ if [ "$GOOS" = "linux" ]; then ARCHCFLAGS="-mips64 -EL" elif [ "$GOARCH" = "mips64" ]; then ARCHCFLAGS="-mips64 -EB" + elif [ "$GOARCH" = "riscv64" ]; then + if [ "$GORISCV64" = "rva23u64" ]; then + ARCHCFLAGS="-march=rv64gcv" + else + ARCHCFLAGS="-march=rv64gc" + fi elif [ "$GOARCH" = "s390x" ]; then SRCS="$SRCS ../../sanitizer_common/sanitizer_linux_s390.cpp" ARCHCFLAGS="" diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h index 377f8aeb8d66e..354f6da6a64a1 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform.h +++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h @@ -681,6 +681,33 @@ struct MappingGoMips64_47 { static const uptr kShadowAdd = 0x200000000000ull; }; +/* Go on linux/riscv64 (48-bit VMA) +0000 0001 0000 - 00e0 0000 0000: executable and heap (896 GiB) +00e0 0000 0000 - 2000 0000 0000: - +2000 0000 0000 - 2400 0000 0000: shadow - 4 TiB ( ~ 4 * app) +2400 0000 0000 - 3000 0000 0000: - +3000 0000 0000 - 3100 0000 0000: metainfo - 1 TiB ( ~ 1 * app) +3100 0000 0000 - 8000 0000 0000: - +*/ +struct MappingGoRiscv64 { + static const uptr kMetaShadowBeg = 0x300000000000ull; + static const uptr kMetaShadowEnd = 0x310000000000ull; + static const uptr kShadowBeg = 0x200000000000ull; + static const uptr kShadowEnd = 0x240000000000ull; + static const uptr kLoAppMemBeg = 0x000000010000ull; + static const uptr kLoAppMemEnd = 0x000e00000000ull; + static const uptr kMidAppMemBeg = 0; + static const uptr kMidAppMemEnd = 0; + static const uptr kHiAppMemBeg = 0; + static const uptr kHiAppMemEnd = 0; + static const uptr kHeapMemBeg = 0; + static const uptr kHeapMemEnd = 0; + static const uptr kVdsoBeg = 0; + static const uptr kShadowMsk = 0; + static const uptr kShadowXor = 0; + static const uptr kShadowAdd = 0x200000000000ull; +}; + /* Go on linux/s390x 0000 0000 1000 - 1000 0000 0000: executable and heap - 16 TiB @@ -728,6 +755,8 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) { return Func::template Apply(arg); # elif defined(__loongarch_lp64) return Func::template Apply(arg); +# elif SANITIZER_RISCV64 + return Func::template Apply(arg); # elif SANITIZER_WINDOWS return Func::template Apply(arg); # else @@ -798,6 +827,7 @@ void ForEachMapping() { Func::template Apply(); Func::template Apply(); Func::template Apply(); + Func::template Apply(); Func::template Apply(); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp index 3e08a1bece98f..373acd3d95d01 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp @@ -373,6 +373,12 @@ void InitializePlatformEarly() { Printf("FATAL: Found %zd - Supported 39 and 48\n", vmaSize); Die(); } +# else + if (vmaSize != 48) { + Printf("FATAL: ThreadSanitizer: unsupported VMA range\n"); + Printf("FATAL: Found %zd - Supported 48\n", vmaSize); + Die(); + } # endif # endif From b36187d9f8200668c4904ecc0c621f1f586b6ce0 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 28 Feb 2025 11:23:51 +0100 Subject: [PATCH 078/123] [FunctionAttrs] Consider non-willreturn functions during capture inference Matching the CaptureTracking change in abd97d9685c07c4787ff22e56c0a7b8963630063, only directly infer captures(none) for readonly+nocapture+willreturn+void. Part of https://github.com/llvm/llvm-project/issues/129090. --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 2 +- .../TypeBasedAliasAnalysis/functionattrs.ll | 27 +++++------ .../Transforms/FunctionAttrs/nocapture.ll | 46 ++++++++++++++++--- 3 files changed, 55 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index f6e211c302230..5c17b9e8d386d 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1249,7 +1249,7 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes, // Functions that are readonly (or readnone) and nounwind and don't return // a value can't capture arguments. Don't analyze them. - if (F->onlyReadsMemory() && F->doesNotThrow() && + if (F->onlyReadsMemory() && F->doesNotThrow() && F->willReturn() && F->getReturnType()->isVoidTy()) { for (Argument &A : F->args()) { if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) { diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll index e5443038cb4c3..9c5f0640b1677 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll @@ -10,13 +10,13 @@ ; code path which isn't ever executed. ; CHECK: define void @test0_yes(ptr captures(none) %p) #0 { -define void @test0_yes(ptr %p) nounwind { +define void @test0_yes(ptr %p) nounwind willreturn { store i32 0, ptr %p, !tbaa !1 ret void } ; CHECK: define void @test0_no(ptr writeonly captures(none) initializes((0, 4)) %p) #1 { -define void @test0_no(ptr %p) nounwind { +define void @test0_no(ptr %p) nounwind willreturn { store i32 0, ptr %p, !tbaa !2 ret void } @@ -25,13 +25,13 @@ define void @test0_no(ptr %p) nounwind { ; TBAA says only accesses constant memory. ; CHECK: define void @test1_yes(ptr captures(none) %p) #2 { -define void @test1_yes(ptr %p) nounwind { +define void @test1_yes(ptr %p) nounwind willreturn { call void @callee(ptr %p), !tbaa !1 ret void } ; CHECK: define void @test1_no(ptr %p) #3 { -define void @test1_no(ptr %p) nounwind { +define void @test1_no(ptr %p) nounwind willreturn { call void @callee(ptr %p), !tbaa !2 ret void } @@ -44,13 +44,13 @@ define void @test1_no(ptr %p) nounwind { ; isn't necessarily invalid. ; CHECK: define void @test2_yes(ptr captures(none) %p, ptr captures(none) %q, i64 %n) #0 { -define void @test2_yes(ptr %p, ptr %q, i64 %n) nounwind { +define void @test2_yes(ptr %p, ptr %q, i64 %n) nounwind willreturn { call void @llvm.memcpy.p0.p0.i64(ptr %p, ptr %q, i64 %n, i1 false), !tbaa !1 ret void } ; CHECK: define void @test2_no(ptr writeonly captures(none) %p, ptr readonly captures(none) %q, i64 %n) #4 { -define void @test2_no(ptr %p, ptr %q, i64 %n) nounwind { +define void @test2_no(ptr %p, ptr %q, i64 %n) nounwind willreturn { call void @llvm.memcpy.p0.p0.i64(ptr %p, ptr %q, i64 %n, i1 false), !tbaa !2 ret void } @@ -58,26 +58,27 @@ define void @test2_no(ptr %p, ptr %q, i64 %n) nounwind { ; Similar to the others, va_arg only accesses memory through its operand. ; CHECK: define i32 @test3_yes(ptr captures(none) %p) #0 { -define i32 @test3_yes(ptr %p) nounwind { +define i32 @test3_yes(ptr %p) nounwind willreturn { %t = va_arg ptr %p, i32, !tbaa !1 ret i32 %t } ; CHECK: define i32 @test3_no(ptr captures(none) %p) #4 { -define i32 @test3_no(ptr %p) nounwind { +define i32 @test3_no(ptr %p) nounwind willreturn { %t = va_arg ptr %p, i32, !tbaa !2 ret i32 %t } -declare void @callee(ptr %p) nounwind -declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) nounwind +declare void @callee(ptr %p) nounwind willreturn +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) nounwind willreturn ; CHECK: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } ; CHECK: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) } -; CHECK: attributes #2 = { nofree nosync nounwind memory(none) } -; CHECK: attributes #3 = { nounwind } +; CHECK: attributes #2 = { mustprogress nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #3 = { mustprogress nounwind willreturn } ; CHECK: attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } -; CHECK: attributes #5 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #5 = { nounwind willreturn } +; CHECK: attributes #6 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; Root note. !0 = !{ } diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index 401dee6b438ab..dc1fdb6100aeb 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -361,15 +361,15 @@ define void @nc3(ptr %p) { } declare void @external_not_willreturn(ptr) readonly nounwind -define void @readononly_nounwind_not_willreturn(ptr %p) { +define void @readonly_nounwind_not_willreturn(ptr %p) { ; FNATTRS: Function Attrs: nofree nounwind memory(read) -; FNATTRS-LABEL: define void @readononly_nounwind_not_willreturn +; FNATTRS-LABEL: define void @readonly_nounwind_not_willreturn ; FNATTRS-SAME: (ptr readonly [[P:%.*]]) #[[ATTR9:[0-9]+]] { ; FNATTRS-NEXT: call void @external_not_willreturn(ptr [[P]]) ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nosync nounwind memory(read) -; ATTRIBUTOR-LABEL: define void @readononly_nounwind_not_willreturn +; ATTRIBUTOR-LABEL: define void @readonly_nounwind_not_willreturn ; ATTRIBUTOR-SAME: (ptr readonly captures(none) [[P:%.*]]) #[[ATTR7:[0-9]+]] { ; ATTRIBUTOR-NEXT: call void @external_not_willreturn(ptr readonly captures(none) [[P]]) #[[ATTR4]] ; ATTRIBUTOR-NEXT: ret void @@ -379,15 +379,15 @@ define void @readononly_nounwind_not_willreturn(ptr %p) { } declare void @external_willreturn(ptr) readonly nounwind willreturn -define void @readononly_nounwind_willreturn(ptr %p) { +define void @readonly_nounwind_willreturn(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree nounwind willreturn memory(read) -; FNATTRS-LABEL: define void @readononly_nounwind_willreturn +; FNATTRS-LABEL: define void @readonly_nounwind_willreturn ; FNATTRS-SAME: (ptr readonly captures(none) [[P:%.*]]) #[[ATTR11:[0-9]+]] { ; FNATTRS-NEXT: call void @external_willreturn(ptr [[P]]) ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: mustprogress nosync nounwind willreturn memory(read) -; ATTRIBUTOR-LABEL: define void @readononly_nounwind_willreturn +; ATTRIBUTOR-LABEL: define void @readonly_nounwind_willreturn ; ATTRIBUTOR-SAME: (ptr readonly captures(none) [[P:%.*]]) #[[ATTR9:[0-9]+]] { ; ATTRIBUTOR-NEXT: call void @external_willreturn(ptr readonly captures(none) [[P]]) #[[ATTR21:[0-9]+]] ; ATTRIBUTOR-NEXT: ret void @@ -432,6 +432,40 @@ define void @callsite_readonly_nounwind_willreturn(ptr %f, ptr %p) { ret void } +define void @self_readonly_nounwind_not_willreturn(ptr %p) readonly nounwind { +; FNATTRS: Function Attrs: nofree nounwind memory(read) +; FNATTRS-LABEL: define void @self_readonly_nounwind_not_willreturn +; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR9]] { +; FNATTRS-NEXT: call void @capture(ptr [[P]]) +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: nosync nounwind memory(read) +; ATTRIBUTOR-LABEL: define void @self_readonly_nounwind_not_willreturn +; ATTRIBUTOR-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR7]] { +; ATTRIBUTOR-NEXT: call void @capture(ptr [[P]]) +; ATTRIBUTOR-NEXT: ret void +; + call void @capture(ptr %p) + ret void +} + +define void @self_readonly_nounwind_willreturn(ptr %p) readonly nounwind willreturn { +; FNATTRS: Function Attrs: mustprogress nofree nounwind willreturn memory(read) +; FNATTRS-LABEL: define void @self_readonly_nounwind_willreturn +; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR11]] { +; FNATTRS-NEXT: call void @capture(ptr [[P]]) +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nosync nounwind willreturn memory(read) +; ATTRIBUTOR-LABEL: define void @self_readonly_nounwind_willreturn +; ATTRIBUTOR-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR9]] { +; ATTRIBUTOR-NEXT: call void @capture(ptr [[P]]) +; ATTRIBUTOR-NEXT: ret void +; + call void @capture(ptr %p) + ret void +} + ; It would be acceptable to add readnone to %y1_1 and %y1_2. define void @test1_1(ptr %x1_1, ptr %y1_1, i1 %c) { ; FNATTRS: Function Attrs: nofree nosync nounwind memory(write, argmem: none, inaccessiblemem: none) From 75947bb7f18f352ba42fee3bf3803a612b1e8c67 Mon Sep 17 00:00:00 2001 From: Jack Frankland Date: Fri, 28 Feb 2025 10:41:44 +0000 Subject: [PATCH 079/123] [mlir][tosa][tosa-to-linalg] Ignore Int NaN Mode (#129041) For non floating point operations NaN propagation mode has no meaning and can be safely ignored. For non integer types skip the compare and select materialization for NaN propagation even in "IGNORE" mode. This fixes a bug where an unchecked `cast()` was called in the "IGNORE" case even when the operation is acting on integers. Update the lit tests for the NaN propagation lowering to check that the propagation logic is not materialized in the case of a non floating point type e.g. i8. Signed-off-by: Jack Frankland --- .../Conversion/TosaToLinalg/TosaToLinalg.cpp | 20 +++- .../TosaToLinalg/TosaToLinalgNamed.cpp | 5 + .../TosaToLinalg/tosa-to-linalg-named.mlir | 10 ++ .../TosaToLinalg/tosa-to-linalg.mlir | 95 +++++++++++++++++++ 4 files changed, 128 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 06831a642664e..8732ddafa24d4 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -49,6 +49,11 @@ using namespace mlir::tosa; // calculated result based on whether the lhs or rhs is NaN or not. In pseudo // code: // +// In the case that the op is operating on non floating point types we ignore +// the attribute completely, this is consistent with the TOSA spec which has +// the following wording: "This attribute is ignored by non floating-point +// types." +// // binary(lhs, rhs): // result = op(lhs, rhs) // if lhs == NaN return rhs @@ -58,6 +63,10 @@ template static Value materializeBinaryNanCheckIfRequired(OpTy op, PatternRewriter &rewriter, Value lhs, Value rhs, Value result) { + // NaN propagation has no meaning for non floating point types. + if (!isa(getElementTypeOrSelf(lhs))) + return result; + auto nanMode = op.getNanMode(); if (nanMode == "PROPAGATE") return result; @@ -449,6 +458,11 @@ static Value createLinalgBodyCalculationForElementwiseOp( auto clampOp = llvm::cast(op); const auto nanMode = clampOp.getNanMode(); + + // NaN propagation has no meaning for non floating point types. + if (!isa(elementTy)) + return result; + // In the case of "PROPAGATE" semantics no compare and selection is // required. if (nanMode == "PROPAGATE") @@ -1192,7 +1206,8 @@ static LogicalResult reduceMatchAndRewriteHelper(OpTy op, uint64_t axis, bool isNanIgnoreMode = false; if constexpr (std::is_same_v || std::is_same_v) { - if (op.getNanMode() == "IGNORE") { + // NaN propagation has no meaning for non floating point types. + if (isa(elementTy) && op.getNanMode() == "IGNORE") { isNanIgnoreMode = true; // Because the TOSA spec requires the result be NaN iff all elements in // the reduction are NaN we can't simply perform a compare and select. @@ -2282,7 +2297,8 @@ class ArgMaxConverter : public OpRewritePattern { // In the case "IGNORE" we check if the current argument is NaN and // select the old index and value otherwise take the updated index and // value. - if (const auto nanMode = argmaxOp.getNanMode(); nanMode == "IGNORE") { + if (const auto nanMode = argmaxOp.getNanMode(); + isa(inElementTy) && nanMode == "IGNORE") { // Unordered comparison of NaN against itself will always return // true. Value isNaN = rewriter.create( diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp index 006e35806d64f..e3400b9ba4358 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp @@ -748,6 +748,11 @@ class MaxPool2dConverter : public OpConversionPattern { dilationAttr); rewriter.replaceOp(op, resultOp); + + // NaN propagation has no meaning for non floating point types. + if (!isa(getElementTypeOrSelf(inputTy))) + return success(); + // "PROPAGATE" mode matches the behaviour of the LinAlg named op, so no // compare and select materialization is required. // diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir index 332b706871547..02d2f16b74ef8 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -940,6 +940,16 @@ func.func @max_pool2d_nan_propagate(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x4 // ----- +// CHECK-LABEL: @max_pool2d_nan_ignore_int +func.func @max_pool2d_nan_ignore_int(%arg0: tensor<1x6x34x62xi8>) -> (tensor<1x4x32x62xi8>) { + // CHECK: linalg.pooling_nhwc_max + // CHECK-NOT: linalg.generic + %0 = tosa.max_pool2d %arg0 {pad = array, kernel = array, stride = array, nan_mode = "IGNORE"} : (tensor<1x6x34x62xi8>) -> tensor<1x4x32x62xi8> + return %0: tensor<1x4x32x62xi8> +} + +// ----- + // CHECK-LABEL: @max_pool2d_nan_ignore func.func @max_pool2d_nan_ignore(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>) { // CHECK-NOT: linalg.pooling_nhwc_max diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index 78f2e173d7cb1..c3992d2cda46e 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -2033,6 +2033,44 @@ func.func @reduce_max_nan_propagate(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf3 // ----- +// CHECK-LABEL: @reduce_min_nan_ignore_int +func.func @reduce_min_nan_ignore_int(%arg0: tensor<5x4xi8>, %arg1: tensor<5x4xi8>) -> () { + // CHECK: linalg.reduce + // CHECK: arith.minsi + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.select + // CHECK: linalg.yield + // CHECK-NOT: arith.constant 0x7FC00000 + // CHECK-NOT: tensor.empty() + // CHECK-NOT: linalg.fill + // CHECK-NOT: tensor.empty() + // CHECK-NOT: select + // CHECK: return + %5 = tosa.reduce_min %arg0 {axis = 0 : i32, nan_mode = "IGNORE"} : (tensor<5x4xi8>) -> tensor<1x4xi8> + return +} + +// ----- + +// CHECK-LABEL: @reduce_max_nan_ignore_int +func.func @reduce_max_nan_ignore_int(%arg0: tensor<5x4xi8>, %arg1: tensor<5x4xi8>) -> () { + // CHECK: linalg.reduce + // CHECK: arith.maxsi + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.select + // CHECK: linalg.yield + // CHECK-NOT: arith.constant 0x7FC00000 + // CHECK-NOT: tensor.empty() + // CHECK-NOT: linalg.fill + // CHECK-NOT: tensor.empty() + // CHECK-NOT: select + // CHECK: return + %6 = tosa.reduce_max %arg0 {axis = 0 : i32, nan_mode = "IGNORE"} : (tensor<5x4xi8>) -> tensor<1x4xi8> + return +} + +// ----- + // CHECK-LABEL: @reduce_min_nan_ignore func.func @reduce_min_nan_ignore(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) -> () { // CHECK: linalg.reduce @@ -2095,6 +2133,32 @@ func.func @maximum_nan_propagate(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) // ----- +// CHECK-LABEL: @minimum_nan_ignore_int +func.func @minimum_nan_ignore_int(%arg0: tensor<5x4xi8>, %arg1: tensor<5x4xi8>) -> () { + // CHECK: linalg.generic + // CHECK: arith.minsi + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.select + // CHECK: linalg.yield + %9 = tosa.minimum %arg0, %arg1 {nan_mode = "IGNORE"} : (tensor<5x4xi8>, tensor<5x4xi8>) -> tensor<5x4xi8> + return +} + +// ----- + +// CHECK-LABEL: @maximum_nan_ignore_int +func.func @maximum_nan_ignore_int(%arg0: tensor<5x4xi8>, %arg1: tensor<5x4xi8>) -> () { + // CHECK: linalg.generic + // CHECK: arith.maxsi + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.select + // CHECK: linalg.yield + %10 = tosa.maximum %arg0, %arg1 {nan_mode = "IGNORE"} : (tensor<5x4xi8>, tensor<5x4xi8>) -> tensor<5x4xi8> + return +} + +// ----- + // CHECK-LABEL: @minimum_nan_ignore func.func @minimum_nan_ignore(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) -> () { // CHECK: linalg.generic @@ -2142,6 +2206,23 @@ func.func @argmax_nan_propagate(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) // ----- +// CHECK-LABEL: @argmax_nan_ignore_int +func.func @argmax_nan_ignore_int(%arg0: tensor<5x4xi8>, %arg1: tensor<5x4xi8>) -> () { + // CHECK: linalg.generic + // CHECK: arith.cmpi sgt + // CHECK: arith.select + // CHECK: arith.select + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.select + // CHECK-NOT: arith.select + // CHECK: linalg.yield + %12 = tosa.argmax %arg0 {axis = 0 : i32, nan_mode = "IGNORE"} : (tensor<5x4xi8>) -> tensor<4xi32> + return +} + +// ----- + // CHECK-LABEL: @argmax_nan_ignore func.func @argmax_nan_ignore(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) -> () { // CHECK: linalg.generic @@ -2172,6 +2253,20 @@ func.func @clamp_nan_propagate(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) - // ----- +// CHECK-LABEL: @clamp_nan_ignore_int +func.func @clamp_nan_ignore_int(%arg0: tensor<5x4xi8>, %arg1: tensor<5x4xi8>) -> () { + // CHECK: linalg.generic + // CHECK: arith.maxsi + // CHECK: arith.minsi + // CHECK-NOT: arith.cmpf uno + // CHECK-NOT: arith.select + // CHECK: linalg.yield + %14 = tosa.clamp %arg0 {min_val = 1 : i8, max_val = 5 : i8, nan_mode = "IGNORE"} : (tensor<5x4xi8>) -> tensor<5x4xi8> + return +} + +// ----- + // CHECK-LABEL: @clamp_nan_ignore func.func @clamp_nan_ignore(%arg0: tensor<5x4xf32>, %arg1: tensor<5x4xf32>) -> () { // CHECK: linalg.generic From a3c123ab8cf3627ee141305987aa4d9bd058f83b Mon Sep 17 00:00:00 2001 From: Paul Osmialowski Date: Fri, 28 Feb 2025 10:54:08 +0000 Subject: [PATCH 080/123] [libc++][test] extend -linux-gnu XFAIL to cover all of the -linux targets (#129140) The default triple of Amazon Linux on AArch64 is aarch64-amazon-linux, see issue highlighded by PR #109263, somewhat serious linker issues are encountered if any other triple is being used. Unfortunately, this makes XFAIL lines like: `XFAIL: target=aarch64{{.*}}-linux-gnu` ineffective, making it impossible to complete all of the check-cxx without failures. --- .../iostream.format/std.manip/setfill_wchar_max.pass.cpp | 2 +- libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp | 2 +- libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp | 2 +- libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp | 2 +- libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp | 2 +- libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp | 2 +- libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp | 2 +- libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp | 2 +- .../test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp | 2 +- libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libcxx/test/std/input.output/iostream.format/std.manip/setfill_wchar_max.pass.cpp b/libcxx/test/std/input.output/iostream.format/std.manip/setfill_wchar_max.pass.cpp index 9d4126153cc23..82842a75827ac 100644 --- a/libcxx/test/std/input.output/iostream.format/std.manip/setfill_wchar_max.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/std.manip/setfill_wchar_max.pass.cpp @@ -16,7 +16,7 @@ // XFAIL: target={{.*}}-windows{{.*}} && libcpp-abi-version=1 // XFAIL: target=armv{{7|8}}{{l?}}{{.*}}-linux-gnueabihf && libcpp-abi-version=1 -// XFAIL: target=aarch64{{.*}}-linux-gnu && libcpp-abi-version=1 +// XFAIL: target=aarch64{{.*}}-linux{{.*}} && libcpp-abi-version=1 #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp index 57b8c13aa3c14..879597b2f80fd 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp @@ -18,7 +18,7 @@ // TODO: investigation needed // TODO(netbsd): incomplete support for locales -// XFAIL: target={{.*}}-linux-gnu{{.*}}, netbsd, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, netbsd, freebsd // REQUIRES: locale.cs_CZ.ISO8859-2 #include diff --git a/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp index 430d35fe739e5..59fb1c48e15d3 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp index b512fa9b5fcf8..0a966759eac3b 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp index 472dc19680263..87ff1e5b6ef12 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp index 9125df404b1de..c4b211e613bec 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp index f85b6a40ce129..56cf2e6a61ff3 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp index aa9441cb3e58f..4655a5c2e0ee6 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp index 9746e45f29da5..7bc8a537ca228 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp @@ -22,7 +22,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd +// XFAIL: target={{.*}}-linux{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp b/libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp index 178979d5b9ce8..3cbbaef9c81b5 100644 --- a/libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp +++ b/libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp @@ -23,7 +23,7 @@ // lookup_collatename(ForwardIterator first, ForwardIterator last) const; // TODO: investigation needed -// XFAIL: target={{.*}}-linux-gnu{{.*}} +// XFAIL: target={{.*}}-linux{{.*}} #include #include From 91f462267d551691897696517fb55a27bac190b8 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Fri, 28 Feb 2025 10:56:09 +0000 Subject: [PATCH 081/123] [LLVM][SVE] Add isel for bfloat based select operations. (#128881) Patch also adds missing tests for unpacked half and float types. --- .../Target/AArch64/AArch64ISelLowering.cpp | 2 + llvm/lib/Target/AArch64/SVEInstrFormats.td | 2 + llvm/test/CodeGen/AArch64/sve-select.ll | 240 +++++++++++++----- 3 files changed, 181 insertions(+), 63 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 35b222e2d55eb..7a471662ea075 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1701,6 +1701,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_ROUND, VT, Custom); setOperationAction(ISD::MLOAD, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 501e659eb1813..8255b267bd7e9 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1962,6 +1962,8 @@ multiclass sve_int_sel_vvv { def : SVE_3_Op_Pat(NAME # _D)>; def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; def : InstAlias<"mov $Zd, $Pg/m, $Zn", (!cast(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, ZPR8:$Zn, ZPR8:$Zd), 1>; diff --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll index b1270165556e6..e0ea173cd28e4 100644 --- a/llvm/test/CodeGen/AArch64/sve-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-select.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s -define @select_nxv1i8(i1 %cond, %a, %b) { +define @select_nxv1i8(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv1i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -9,11 +9,11 @@ define @select_nxv1i8(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv16i8(i1 %cond, %a, %b) { +define @select_nxv16i8(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -21,11 +21,11 @@ define @select_nxv16i8(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv1i16(i1 %cond, %a, %b) { +define @select_nxv1i16(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv1i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -33,11 +33,11 @@ define @select_nxv1i16(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv8i16(i1 %cond, %a, %b) { +define @select_nxv8i16(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -45,11 +45,11 @@ define @select_nxv8i16(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv1i32(i1 %cond, %a, %b) { +define @select_nxv1i32(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv1i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -57,11 +57,11 @@ define @select_nxv1i32(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv4i32(i1 %cond, %a, %b) { +define @select_nxv4i32(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -69,11 +69,11 @@ define @select_nxv4i32(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv1i64(i1 %cond, %a, %b) { +define @select_nxv1i64(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv1i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -81,11 +81,11 @@ define @select_nxv1i64(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv2i64(i1 %cond, %a, %b) { +define @select_nxv2i64(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -93,11 +93,11 @@ define @select_nxv2i64(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv8f16(i1 %cond, %a, %b) { +define @select_nxv8f16(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -105,11 +105,35 @@ define @select_nxv8f16(i1 %cond, %a, < ; CHECK-NEXT: whilelo p0.h, xzr, x8 ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: ret - %res = select i1 %cond, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv4f32(i1 %cond, %a, %b) { +define @select_nxv4f16(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.s, xzr, x8 +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv2f16(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.d, xzr, x8 +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv4f32(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -117,11 +141,23 @@ define @select_nxv4f32(i1 %cond, %a, ; CHECK-NEXT: whilelo p0.s, xzr, x8 ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: ret - %res = select i1 %cond, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv2f32(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.d, xzr, x8 +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv2f64(i1 %cond, %a, %b) { +define @select_nxv2f64(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -129,11 +165,47 @@ define @select_nxv2f64(i1 %cond, % ; CHECK-NEXT: whilelo p0.d, xzr, x8 ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: ret - %res = select i1 %cond, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv8bf16(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.h, xzr, x8 +; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv16i1(i1 %cond, %a, %b) { +define @select_nxv4bf16(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.s, xzr, x8 +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv2bf16(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.d, xzr, x8 +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv16i1(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -141,11 +213,11 @@ define @select_nxv16i1(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv8i1(i1 %cond, %a, %b) { +define @select_nxv8i1(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -153,11 +225,11 @@ define @select_nxv8i1(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv4i1(i1 %cond, %a, %b) { +define @select_nxv4i1(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -165,11 +237,11 @@ define @select_nxv4i1(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv2i1(i1 %cond, %a, %b) { +define @select_nxv2i1(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -177,11 +249,11 @@ define @select_nxv2i1(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } -define @select_nxv1i1(i1 %cond, %a, %b) { +define @select_nxv1i1(i1 %cond, %a, %b) { ; CHECK-LABEL: select_nxv1i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -190,8 +262,8 @@ define @select_nxv1i1(i1 %cond, %a, %a, %b - ret %res + %res = select i1 %cond, %a, %b + ret %res } ; Integer vector select @@ -324,6 +396,20 @@ define @icmp_select_nxv2f64( %a, %sel } +define @icmp_select_nxv2bf16( %a, %b, i64 %x0) { +; CHECK-LABEL: icmp_select_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: sbfx x8, x8, #0, #1 +; CHECK-NEXT: whilelo p0.d, xzr, x8 +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ret + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel +} + define @icmp_select_nxv4f16( %a, %b, i64 %x0) { ; CHECK-LABEL: icmp_select_nxv4f16: ; CHECK: // %bb.0: @@ -352,6 +438,20 @@ define @icmp_select_nxv4f32( %a, %sel } +define @icmp_select_nxv4bf16( %a, %b, i64 %x0) { +; CHECK-LABEL: icmp_select_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: sbfx x8, x8, #0, #1 +; CHECK-NEXT: whilelo p0.s, xzr, x8 +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: ret + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel +} + define @icmp_select_nxv8f16( %a, %b, i64 %x0) { ; CHECK-LABEL: icmp_select_nxv8f16: ; CHECK: // %bb.0: @@ -366,6 +466,20 @@ define @icmp_select_nxv8f16( %a, %sel } +define @icmp_select_nxv8bf16( %a, %b, i64 %x0) { +; CHECK-LABEL: icmp_select_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: sbfx x8, x8, #0, #1 +; CHECK-NEXT: whilelo p0.h, xzr, x8 +; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h +; CHECK-NEXT: ret + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel +} + define @icmp_select_nxv1i64( %a, %b, i64 %x0) { ; CHECK-LABEL: icmp_select_nxv1i64: ; CHECK: // %bb.0: @@ -488,9 +602,9 @@ define @icmp_select_nxv1i1( %a, %a, %b - ret %sel + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel } define @icmp_select_nxv2i1( %a, %b, i64 %x0) { @@ -502,9 +616,9 @@ define @icmp_select_nxv2i1( %a, %a, %b - ret %sel + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel } define @icmp_select_nxv4i1( %a, %b, i64 %x0) { ; CHECK-LABEL: icmp_select_nxv4i1: @@ -515,9 +629,9 @@ define @icmp_select_nxv4i1( %a, %a, %b - ret %sel + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel } define @icmp_select_nxv8i1( %a, %b, i64 %x0) { ; CHECK-LABEL: icmp_select_nxv8i1: @@ -528,9 +642,9 @@ define @icmp_select_nxv8i1( %a, %a, %b - ret %sel + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel } define @icmp_select_nxv16i1( %a, %b, i64 %x0) { ; CHECK-LABEL: icmp_select_nxv16i1: @@ -541,9 +655,9 @@ define @icmp_select_nxv16i1( %a, %a, %b - ret %sel + %mask = icmp eq i64 %x0, 0 + %sel = select i1 %mask, %a, %b + ret %sel } define @select_f32_invert_fmul( %a, %b) #0 { From d0e37ef2cd33a23d5a368a7fcf76c5fd345d9c3b Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Fri, 28 Feb 2025 13:38:52 +0100 Subject: [PATCH 082/123] [libc++] Enable algorithm vectorization on arm neon (#128873) Previously the wrong detection macro has been used to check whether arm NEON is available. This fixes it, and removes a few unnecessary includes from `__algorithm/simd_utils.h` as a drive-by. --- libcxx/include/__algorithm/simd_utils.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h index 4e03723a32854..e3c790998e902 100644 --- a/libcxx/include/__algorithm/simd_utils.h +++ b/libcxx/include/__algorithm/simd_utils.h @@ -15,8 +15,6 @@ #include <__bit/countr.h> #include <__config> #include <__cstddef/size_t.h> -#include <__type_traits/is_arithmetic.h> -#include <__type_traits/is_same.h> #include <__utility/integer_sequence.h> #include @@ -78,7 +76,7 @@ using __get_as_integer_type_t _LIBCPP_NODEBUG = typename __get_as_integer_type_i # if defined(__AVX__) || defined(__MVS__) template inline constexpr size_t __native_vector_size = 32 / sizeof(_Tp); -# elif defined(__SSE__) || defined(__ARM_NEON__) +# elif defined(__SSE__) || defined(__ARM_NEON) template inline constexpr size_t __native_vector_size = 16 / sizeof(_Tp); # elif defined(__MMX__) From c8ba9a27153049e43c4a7f9d9aa7ffc6d15361b1 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Fri, 28 Feb 2025 13:50:18 +0100 Subject: [PATCH 083/123] [modules] Add missing test file for b21ee08e57173102b67bc18237b135550 (#129221) The commit missed a test file. --- clang/test/Modules/pr28744.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 clang/test/Modules/pr28744.cpp diff --git a/clang/test/Modules/pr28744.cpp b/clang/test/Modules/pr28744.cpp new file mode 100644 index 0000000000000..2089872a2a75a --- /dev/null +++ b/clang/test/Modules/pr28744.cpp @@ -0,0 +1,17 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -std=c++11 -I%S/Inputs/PR28794 -verify %s +// RUN: %clang_cc1 -std=c++11 -fmodules -fmodule-map-file=%S/Inputs/PR28794/module.modulemap -fmodules-cache-path=%t -I%S/Inputs/PR28794/ -verify %s + +#include "Subdir/Empty.h" +#include "LibAHeader.h" + +BumpPtrAllocatorImpl<> &getPreprocessorAllocator(); +class B { + struct ModuleMacroInfo { + ModuleMacroInfo *getModuleInfo() { + return new (getPreprocessorAllocator()) ModuleMacroInfo(); + } + }; +}; + +// expected-no-diagnostics From b8e7c0f740ce3523425d38cd7d5d1e16061c331d Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 28 Feb 2025 12:56:12 +0000 Subject: [PATCH 084/123] [LV] Teach the vectorizer to cost and vectorize modf and sincospi intrinsics (#129064) Follow on to #128035. It is a small extension to support vectorizing `llvm.modf.*` and `llvm.sincospi.*` too. This renames the test files from `sincos.ll` -> `multiple-result-intrinsics.ll` to group together the similar tests (which make up most of this PR). --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 29 +- llvm/lib/Analysis/VectorUtils.cpp | 4 + .../AArch64/multiple-result-intrinsics.ll | 579 ++++++++++++++++++ .../LoopVectorize/AArch64/sincos.ll | 251 -------- .../multiple-result-intrinsics.ll | 330 ++++++++++ llvm/test/Transforms/LoopVectorize/sincos.ll | 157 ----- 6 files changed, 938 insertions(+), 412 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll delete mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/sincos.ll create mode 100644 llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll delete mode 100644 llvm/test/Transforms/LoopVectorize/sincos.ll diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index d679409770ca1..563953516a354 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2056,12 +2056,33 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } case Intrinsic::experimental_vector_match: return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind); - case Intrinsic::sincos: { + case Intrinsic::modf: + case Intrinsic::sincos: + case Intrinsic::sincospi: { Type *Ty = getContainedTypes(RetTy).front(); EVT VT = getTLI()->getValueType(DL, Ty); - RTLIB::Libcall LC = RTLIB::getSINCOS(VT.getScalarType()); - if (auto Cost = - getMultipleResultIntrinsicVectorLibCallCost(ICA, CostKind, LC)) + + RTLIB::Libcall LC = [&] { + switch (ICA.getID()) { + case Intrinsic::modf: + return RTLIB::getMODF; + case Intrinsic::sincos: + return RTLIB::getSINCOS; + case Intrinsic::sincospi: + return RTLIB::getSINCOSPI; + default: + llvm_unreachable("unexpected intrinsic"); + } + }()(VT.getScalarType()); + + std::optional CallRetElementIndex; + // The first element of the modf result is returned by value in the + // libcall. + if (ICA.getID() == Intrinsic::modf) + CallRetElementIndex = 0; + + if (auto Cost = getMultipleResultIntrinsicVectorLibCallCost( + ICA, CostKind, LC, CallRetElementIndex)) return *Cost; // Otherwise, fallback to default scalarization cost. break; diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index dcfd3d5a8bd6e..ede0fca4d51b0 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -73,6 +73,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::sin: case Intrinsic::cos: case Intrinsic::sincos: + case Intrinsic::sincospi: case Intrinsic::tan: case Intrinsic::sinh: case Intrinsic::cosh: @@ -88,6 +89,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::maxnum: case Intrinsic::minimum: case Intrinsic::maximum: + case Intrinsic::modf: case Intrinsic::copysign: case Intrinsic::floor: case Intrinsic::ceil: @@ -186,7 +188,9 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg( case Intrinsic::ucmp: case Intrinsic::scmp: return OpdIdx == -1 || OpdIdx == 0; + case Intrinsic::modf: case Intrinsic::sincos: + case Intrinsic::sincospi: case Intrinsic::is_fpclass: case Intrinsic::vp_is_fpclass: return OpdIdx == 0; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll new file mode 100644 index 0000000000000..544ef5c82c7ac --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll @@ -0,0 +1,579 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|modf|extractvalue|store)" --version 5 +; RUN: opt -passes=loop-vectorize -mtriple=aarch64-gnu-linux -mcpu=neoverse-v1 -mattr=+sve < %s -S -o - -debug-only=loop-vectorize 2>%t.1 | FileCheck %s --check-prefix=CHECK +; RUN: opt -passes=loop-vectorize -mtriple=aarch64-gnu-linux -mcpu=neoverse-v1 -mattr=+sve -vector-library=ArmPL < %s -S -o - -debug-only=loop-vectorize 2>%t.2 | FileCheck %s --check-prefix=CHECK-ARMPL +; RUN: FileCheck --input-file=%t.1 --check-prefix=CHECK-COST %s +; RUN: FileCheck --input-file=%t.2 --check-prefix=CHECK-COST-ARMPL %s +; REQUIRES: asserts + +; CHECK-COST-LABEL: sincos_f32 +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: sincos_f32 +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) +; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) + +define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincos_f32( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 +; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 +; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[EXIT:.*:]] +; +; CHECK-ARMPL-LABEL: define void @sincos_f32( +; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.sincos.nxv4f32( [[WIDE_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.sincos.nxv4f32( [[WIDE_LOAD1:%.*]]) +; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 +; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 4 +; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 4 +; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 4 +; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 4 +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK-ARMPL: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @llvm.sincos.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; CHECK-COST-LABEL: sincos_f64 +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: sincos_f64 +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val) +; CHECK-COST-ARMPL: Cost of 12 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) + +define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincos_f64( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 +; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 +; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK: [[EXIT:.*:]] +; +; CHECK-ARMPL-LABEL: define void @sincos_f64( +; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.sincos.nxv2f64( [[WIDE_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.sincos.nxv2f64( [[WIDE_LOAD1:%.*]]) +; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 +; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 8 +; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 8 +; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 8 +; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 8 +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK-ARMPL: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK-ARMPL: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK-ARMPL: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @llvm.sincos.f64(double %in_val) + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; CHECK-COST-LABEL: predicated_sincos +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: predicated_sincos +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) +; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) + +define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @predicated_sincos( +; CHECK-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[IF_THEN:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[IF_MERGE:.*:]] +; CHECK: [[FOR_END:.*:]] +; +; CHECK-ARMPL-LABEL: define void @predicated_sincos( +; CHECK-ARMPL-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP15:%.*]] = call { , } @llvm.sincos.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP15]], 0 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP15]], 1 +; CHECK-ARMPL: call void @llvm.masked.store.nxv4f32.p0( [[TMP16]], ptr [[TMP19:%.*]], i32 4, [[TMP14:%.*]]) +; CHECK-ARMPL: call void @llvm.masked.store.nxv4f32.p0( [[TMP17]], ptr [[TMP21:%.*]], i32 4, [[TMP14]]) +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[IF_THEN:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK-ARMPL: [[IF_MERGE:.*:]] +; CHECK-ARMPL: [[FOR_END:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %if.merge ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %if_cond = fcmp olt float %in_val, %x + br i1 %if_cond, label %if.then, label %if.merge + +if.then: + %call = tail call { float, float } @llvm.sincos.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + br label %if.merge + +if.merge: + %iv.next = add nuw nsw i64 %iv, 1 + %cond = icmp slt i64 %iv.next, 1024 + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} + +; CHECK-COST-LABEL: modf_f32 +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.modf.f32(float %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: modf_f32 +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.modf.f32(float %in_val) +; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 11 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 12 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) + +define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @modf_f32( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 +; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 +; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.modf.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[EXIT:.*:]] +; +; CHECK-ARMPL-LABEL: define void @modf_f32( +; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.modf.nxv4f32( [[WIDE_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.modf.nxv4f32( [[WIDE_LOAD1:%.*]]) +; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 +; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 4 +; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 4 +; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 4 +; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 4 +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.modf.f32(float [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK-ARMPL: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @llvm.modf.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; CHECK-COST-LABEL: modf_f64 +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.modf.f64(double %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: modf_f64 +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.modf.f64(double %in_val) +; CHECK-COST-ARMPL: Cost of 11 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 12 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.modf(ir<%in_val>) + +define void @modf_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @modf_f64( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 +; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 +; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.modf.f64(double [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK: [[EXIT:.*:]] +; +; CHECK-ARMPL-LABEL: define void @modf_f64( +; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.modf.nxv2f64( [[WIDE_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.modf.nxv2f64( [[WIDE_LOAD1:%.*]]) +; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 +; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 8 +; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 8 +; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 8 +; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 8 +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { double, double } @llvm.modf.f64(double [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK-ARMPL: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK-ARMPL: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK-ARMPL: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @llvm.modf.f64(double %in_val) + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; CHECK-COST-LABEL: sincospi_f32 +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincospi.f32(float %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: sincospi_f32 +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincospi.f32(float %in_val) +; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) + +define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincospi_f32( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincospi.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 +; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 +; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincospi.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[EXIT:.*:]] +; +; CHECK-ARMPL-LABEL: define void @sincospi_f32( +; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.sincospi.nxv4f32( [[WIDE_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.sincospi.nxv4f32( [[WIDE_LOAD1:%.*]]) +; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 +; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 4 +; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 4 +; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 4 +; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 4 +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincospi.f32(float [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK-ARMPL: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @llvm.sincospi.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; CHECK-COST-LABEL: sincospi_f64 +; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincospi.f64(double %in_val) +; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) + +; CHECK-COST-ARMPL-LABEL: sincospi_f64 +; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincospi.f64(double %in_val) +; CHECK-COST-ARMPL: Cost of 12 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) +; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincospi(ir<%in_val>) + +define void @sincospi_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincospi_f64( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 +; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 +; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincospi.f64(double [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK: [[EXIT:.*:]] +; +; CHECK-ARMPL-LABEL: define void @sincospi_f64( +; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { +; CHECK-ARMPL: [[ENTRY:.*:]] +; CHECK-ARMPL: [[VECTOR_PH:.*:]] +; CHECK-ARMPL: [[VECTOR_BODY:.*:]] +; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.sincospi.nxv2f64( [[WIDE_LOAD:%.*]]) +; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.sincospi.nxv2f64( [[WIDE_LOAD1:%.*]]) +; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 +; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 +; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 +; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 8 +; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 8 +; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 8 +; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 8 +; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] +; CHECK-ARMPL: [[SCALAR_PH:.*:]] +; CHECK-ARMPL: [[FOR_BODY:.*:]] +; CHECK-ARMPL: [[CALL:%.*]] = tail call { double, double } @llvm.sincospi.f64(double [[IN_VAL:%.*]]) +; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK-ARMPL: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK-ARMPL: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK-ARMPL: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @llvm.sincospi.f64(double %in_val) + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sincos.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sincos.ll deleted file mode 100644 index a7e949838f762..0000000000000 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sincos.ll +++ /dev/null @@ -1,251 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|extractvalue|store)" --version 5 -; RUN: opt -passes=loop-vectorize -mtriple=aarch64-gnu-linux -mcpu=neoverse-v1 -mattr=+sve < %s -S -o - -debug-only=loop-vectorize 2>%t.1 | FileCheck %s --check-prefix=CHECK -; RUN: opt -passes=loop-vectorize -mtriple=aarch64-gnu-linux -mcpu=neoverse-v1 -mattr=+sve -vector-library=ArmPL < %s -S -o - -debug-only=loop-vectorize 2>%t.2 | FileCheck %s --check-prefix=CHECK-ARMPL -; RUN: FileCheck --input-file=%t.1 --check-prefix=CHECK-COST %s -; RUN: FileCheck --input-file=%t.2 --check-prefix=CHECK-COST-ARMPL %s -; REQUIRES: asserts - -; CHECK-COST-LABEL: sincos_f32 -; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) -; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) - -; CHECK-COST-ARMPL-LABEL: sincos_f32 -; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) -; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) - -define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { -; CHECK-LABEL: define void @sincos_f32( -; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[VECTOR_PH:.*:]] -; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) -; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 -; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 -; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 -; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] -; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 -; CHECK: [[EXIT:.*:]] -; -; CHECK-ARMPL-LABEL: define void @sincos_f32( -; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-ARMPL: [[ENTRY:.*:]] -; CHECK-ARMPL: [[VECTOR_PH:.*:]] -; CHECK-ARMPL: [[VECTOR_BODY:.*:]] -; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.sincos.nxv4f32( [[WIDE_LOAD:%.*]]) -; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.sincos.nxv4f32( [[WIDE_LOAD1:%.*]]) -; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 -; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 -; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 -; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 -; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 4 -; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 4 -; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 4 -; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 4 -; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] -; CHECK-ARMPL: [[SCALAR_PH:.*:]] -; CHECK-ARMPL: [[FOR_BODY:.*:]] -; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 -; CHECK-ARMPL: [[EXIT:.*:]] -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv - %in_val = load float, ptr %arrayidx, align 4 - %call = tail call { float, float } @llvm.sincos.f32(float %in_val) - %extract_a = extractvalue { float, float } %call, 0 - %extract_b = extractvalue { float, float } %call, 1 - %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv - store float %extract_a, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv - store float %extract_b, ptr %arrayidx4, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 - br i1 %exitcond.not, label %exit, label %for.body - -exit: - ret void -} - -; CHECK-COST-LABEL: sincos_f64 -; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val) -; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) - -; CHECK-COST-ARMPL-LABEL: sincos_f64 -; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val) -; CHECK-COST-ARMPL: Cost of 12 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) - -define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { -; CHECK-LABEL: define void @sincos_f64( -; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[VECTOR_PH:.*:]] -; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) -; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 -; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 -; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 -; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] -; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 -; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 -; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 -; CHECK: [[EXIT:.*:]] -; -; CHECK-ARMPL-LABEL: define void @sincos_f64( -; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { -; CHECK-ARMPL: [[ENTRY:.*:]] -; CHECK-ARMPL: [[VECTOR_PH:.*:]] -; CHECK-ARMPL: [[VECTOR_BODY:.*:]] -; CHECK-ARMPL: [[TMP12:%.*]] = call { , } @llvm.sincos.nxv2f64( [[WIDE_LOAD:%.*]]) -; CHECK-ARMPL: [[TMP13:%.*]] = call { , } @llvm.sincos.nxv2f64( [[WIDE_LOAD1:%.*]]) -; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { , } [[TMP12]], 0 -; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { , } [[TMP13]], 0 -; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP12]], 1 -; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP13]], 1 -; CHECK-ARMPL: store [[TMP14]], ptr [[TMP19:%.*]], align 8 -; CHECK-ARMPL: store [[TMP15]], ptr [[TMP22:%.*]], align 8 -; CHECK-ARMPL: store [[TMP16]], ptr [[TMP24:%.*]], align 8 -; CHECK-ARMPL: store [[TMP17]], ptr [[TMP27:%.*]], align 8 -; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] -; CHECK-ARMPL: [[SCALAR_PH:.*:]] -; CHECK-ARMPL: [[FOR_BODY:.*:]] -; CHECK-ARMPL: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) -; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 -; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 -; CHECK-ARMPL: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 -; CHECK-ARMPL: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 -; CHECK-ARMPL: [[EXIT:.*:]] -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv - %in_val = load double, ptr %arrayidx, align 8 - %call = tail call { double, double } @llvm.sincos.f64(double %in_val) - %extract_a = extractvalue { double, double } %call, 0 - %extract_b = extractvalue { double, double } %call, 1 - %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv - store double %extract_a, ptr %arrayidx2, align 8 - %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv - store double %extract_b, ptr %arrayidx4, align 8 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 - br i1 %exitcond.not, label %exit, label %for.body - -exit: - ret void -} - -; CHECK-COST-LABEL: predicated_sincos -; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) -; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) - -; CHECK-COST-ARMPL-LABEL: predicated_sincos -; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val) -; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) -; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>) - -define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { -; CHECK-LABEL: define void @predicated_sincos( -; CHECK-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[IF_THEN:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 -; CHECK: [[IF_MERGE:.*:]] -; CHECK: [[FOR_END:.*:]] -; -; CHECK-ARMPL-LABEL: define void @predicated_sincos( -; CHECK-ARMPL-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] { -; CHECK-ARMPL: [[ENTRY:.*:]] -; CHECK-ARMPL: [[VECTOR_PH:.*:]] -; CHECK-ARMPL: [[VECTOR_BODY:.*:]] -; CHECK-ARMPL: [[TMP15:%.*]] = call { , } @llvm.sincos.nxv4f32( [[WIDE_MASKED_LOAD:%.*]]) -; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { , } [[TMP15]], 0 -; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { , } [[TMP15]], 1 -; CHECK-ARMPL: call void @llvm.masked.store.nxv4f32.p0( [[TMP16]], ptr [[TMP19:%.*]], i32 4, [[TMP14:%.*]]) -; CHECK-ARMPL: call void @llvm.masked.store.nxv4f32.p0( [[TMP17]], ptr [[TMP21:%.*]], i32 4, [[TMP14]]) -; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] -; CHECK-ARMPL: [[SCALAR_PH:.*:]] -; CHECK-ARMPL: [[FOR_BODY:.*:]] -; CHECK-ARMPL: [[IF_THEN:.*:]] -; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 -; CHECK-ARMPL: [[IF_MERGE:.*:]] -; CHECK-ARMPL: [[FOR_END:.*:]] -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ %iv.next, %if.merge ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv - %in_val = load float, ptr %arrayidx, align 4 - %if_cond = fcmp olt float %in_val, %x - br i1 %if_cond, label %if.then, label %if.merge - -if.then: - %call = tail call { float, float } @llvm.sincos.f32(float %in_val) - %extract_a = extractvalue { float, float } %call, 0 - %extract_b = extractvalue { float, float } %call, 1 - %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv - store float %extract_a, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv - store float %extract_b, ptr %arrayidx4, align 4 - br label %if.merge - -if.merge: - %iv.next = add nuw nsw i64 %iv, 1 - %cond = icmp slt i64 %iv.next, 1024 - br i1 %cond, label %for.body, label %for.end - -for.end: - ret void -} diff --git a/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll new file mode 100644 index 0000000000000..d928a4b7ebe4b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll @@ -0,0 +1,330 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|modf|extract|store)" --version 5 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s -S -o - | FileCheck %s + +define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincos_f32( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 +; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 +; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @llvm.sincos.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincos_f64( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 +; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 +; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @llvm.sincos.f64(double %in_val) + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @predicated_sincos( +; CHECK-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_BODY1:.*]]: +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_BODY1]] ], [ [[INDEX_NEXT:%.*]], %[[IF_THEN2:.*]] ] +; CHECK: [[TMP4:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 0 +; CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 1 +; CHECK: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3:%.*]], i32 0 +; CHECK: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: +; CHECK: [[TMP9:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 +; CHECK: store float [[TMP9]], ptr [[TMP8:%.*]], align 4 +; CHECK: [[TMP11:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 +; CHECK: store float [[TMP11]], ptr [[TMP10:%.*]], align 4 +; CHECK: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; CHECK: br i1 [[TMP12]], label %[[PRED_STORE_IF1:.*]], label %[[IF_THEN2]] +; CHECK: [[PRED_STORE_IF1]]: +; CHECK: [[TMP15:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 +; CHECK: store float [[TMP15]], ptr [[TMP14:%.*]], align 4 +; CHECK: [[TMP17:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 +; CHECK: store float [[TMP17]], ptr [[TMP16:%.*]], align 4 +; CHECK: br label %[[IF_THEN2]] +; CHECK: [[IF_THEN2]]: +; CHECK: [[IF_THEN:.*:]] +; CHECK: [[IF_THEN3:.*:]] +; CHECK: [[IF_THEN4:.*:]] +; CHECK: [[IF_THEN1:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[IF_MERGE:.*:]] +; CHECK: [[FOR_END:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %if.merge ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %if_cond = fcmp olt float %in_val, %x + br i1 %if_cond, label %if.then, label %if.merge + +if.then: + %call = tail call { float, float } @llvm.sincos.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + br label %if.merge + +if.merge: + %iv.next = add nuw nsw i64 %iv, 1 + %cond = icmp slt i64 %iv.next, 1024 + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} + +define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @modf_f32( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 +; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 +; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.modf.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @llvm.modf.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +define void @modf_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @modf_f64( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 +; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 +; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.modf.f64(double [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @llvm.modf.f64(double %in_val) + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincospi_f32( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincospi.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 +; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 +; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincospi.f32(float [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 +; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 +; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @llvm.sincospi.f32(float %in_val) + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +define void @sincospi_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @sincospi_f64( +; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { +; CHECK: [[ENTRY:.*:]] +; CHECK: [[VECTOR_PH:.*:]] +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 +; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 +; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 +; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[SCALAR_PH:.*:]] +; CHECK: [[FOR_BODY:.*:]] +; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincospi.f64(double [[IN_VAL:%.*]]) +; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 +; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 +; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 +; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 +; CHECK: [[EXIT:.*:]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @llvm.sincospi.f64(double %in_val) + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + diff --git a/llvm/test/Transforms/LoopVectorize/sincos.ll b/llvm/test/Transforms/LoopVectorize/sincos.ll deleted file mode 100644 index c2936eb8bb8b2..0000000000000 --- a/llvm/test/Transforms/LoopVectorize/sincos.ll +++ /dev/null @@ -1,157 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|extract|store)" --version 5 -; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s -S -o - | FileCheck %s - -define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { -; CHECK-LABEL: define void @sincos_f32( -; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[VECTOR_PH:.*:]] -; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) -; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0 -; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 -; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 -; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] -; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 -; CHECK: [[EXIT:.*:]] -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv - %in_val = load float, ptr %arrayidx, align 4 - %call = tail call { float, float } @llvm.sincos.f32(float %in_val) - %extract_a = extractvalue { float, float } %call, 0 - %extract_b = extractvalue { float, float } %call, 1 - %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv - store float %extract_a, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv - store float %extract_b, ptr %arrayidx4, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 - br i1 %exitcond.not, label %exit, label %for.body - -exit: - ret void -} - -define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { -; CHECK-LABEL: define void @sincos_f64( -; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[VECTOR_PH:.*:]] -; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) -; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0 -; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 -; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 -; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] -; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 -; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 -; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 -; CHECK: [[EXIT:.*:]] -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv - %in_val = load double, ptr %arrayidx, align 8 - %call = tail call { double, double } @llvm.sincos.f64(double %in_val) - %extract_a = extractvalue { double, double } %call, 0 - %extract_b = extractvalue { double, double } %call, 1 - %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv - store double %extract_a, ptr %arrayidx2, align 8 - %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv - store double %extract_b, ptr %arrayidx4, align 8 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 - br i1 %exitcond.not, label %exit, label %for.body - -exit: - ret void -} - -define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { -; CHECK-LABEL: define void @predicated_sincos( -; CHECK-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[VECTOR_BODY1:.*]]: -; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_BODY1]] ], [ [[INDEX_NEXT:%.*]], %[[FOR_BODY1:.*]] ] -; CHECK: [[TMP4:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) -; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 0 -; CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 1 -; CHECK: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3:%.*]], i32 0 -; CHECK: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: -; CHECK: [[TMP9:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 -; CHECK: store float [[TMP9]], ptr [[TMP8:%.*]], align 4 -; CHECK: [[TMP11:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 -; CHECK: store float [[TMP11]], ptr [[TMP10:%.*]], align 4 -; CHECK: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 -; CHECK: br i1 [[TMP12]], label %[[PRED_STORE_IF1:.*]], label %[[FOR_BODY1]] -; CHECK: [[PRED_STORE_IF1]]: -; CHECK: [[TMP15:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 -; CHECK: store float [[TMP15]], ptr [[TMP14:%.*]], align 4 -; CHECK: [[TMP17:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 -; CHECK: store float [[TMP17]], ptr [[TMP16:%.*]], align 4 -; CHECK: br label %[[FOR_BODY1]] -; CHECK: [[FOR_BODY1]]: -; CHECK: [[IF_THEN1:.*:]] -; CHECK: [[IF_THEN2:.*:]] -; CHECK: [[IF_THEN:.*:]] -; CHECK: [[IF_THEN3:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 -; CHECK: [[IF_MERGE:.*:]] -; CHECK: [[FOR_END:.*:]] -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ %iv.next, %if.merge ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv - %in_val = load float, ptr %arrayidx, align 4 - %if_cond = fcmp olt float %in_val, %x - br i1 %if_cond, label %if.then, label %if.merge - -if.then: - %call = tail call { float, float } @llvm.sincos.f32(float %in_val) - %extract_a = extractvalue { float, float } %call, 0 - %extract_b = extractvalue { float, float } %call, 1 - %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv - store float %extract_a, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv - store float %extract_b, ptr %arrayidx4, align 4 - br label %if.merge - -if.merge: - %iv.next = add nuw nsw i64 %iv, 1 - %cond = icmp slt i64 %iv.next, 1024 - br i1 %cond, label %for.body, label %for.end - -for.end: - ret void -} From 0c74ba28d428098e690eddd933c1b63277deeccf Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Fri, 28 Feb 2025 08:02:35 -0500 Subject: [PATCH 085/123] [OpenMP] Missing implicit otherwise clause in metadirective. (#127113) Compiling this: `int main() {` ` #pragma omp metadirective when(use r= {condition(0)}` `: parallel for)` `for (int i=0; i<10; i++)` ; }` is generating an error: `error: expected expression` The compiler is interpreting this as if it's compiling a `#pragma omp metadirective` with no `otherwise` clause. In the OMP5.2 specs chapter 7.4 it's mentioned that: `If no otherwise clause is specified the effect is as if one was specified without an associated directive variant.` This patch fixes the issue. --- clang/lib/Parse/ParseOpenMP.cpp | 9 ++ clang/test/OpenMP/metadirective_ast_print.c | 28 ++++ clang/test/OpenMP/metadirective_otherwise.cpp | 125 ++++++++++++++++++ .../include/llvm/Frontend/OpenMP/OMPContext.h | 2 +- 4 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 clang/test/OpenMP/metadirective_otherwise.cpp diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 42e6aac681c1c..b791c5d5e3019 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2883,6 +2883,15 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( /*ReadDirectiveWithinMetadirective=*/true); break; } + // If no match is found and no otherwise clause is present, skip + // OMP5.2 Chapter 7.4: If no otherwise clause is specified the effect is as + // if one was specified without an associated directive variant. + if (BestIdx == -1 && Idx == 1) { + assert(Tok.is(tok::annot_pragma_openmp_end) && + "Expecting the end of the pragma here"); + ConsumeAnnotationToken(); + return StmtEmpty(); + } break; } case OMPD_threadprivate: { diff --git a/clang/test/OpenMP/metadirective_ast_print.c b/clang/test/OpenMP/metadirective_ast_print.c index d9ff7e7645216..851f08ce37ee7 100644 --- a/clang/test/OpenMP/metadirective_ast_print.c +++ b/clang/test/OpenMP/metadirective_ast_print.c @@ -77,6 +77,34 @@ void foo(void) { : parallel) default(nothing) for (int i = 0; i < 16; i++) ; + +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) otherwise() + for (int i=0; i<10; i++) + ; +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) + for (int i=0; i<10; i++) + ; +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) when(implementation = {extension(match_none)} \ + : parallel) default(parallel for) + for (int i=0; i<10; i++) + ; + +#pragma omp metadirective when(user = {condition(1)} \ + : parallel for) otherwise() + for (int i=0; i<10; i++) + ; +#pragma omp metadirective when(user = {condition(1)} \ + : parallel for) + for (int i=0; i<10; i++) + ; +#pragma omp metadirective when(user = {condition(1)} \ + : parallel for) when(implementation = {extension(match_none)} \ + : parallel) default(parallel for) + for (int i=0; i<10; i++) + ; } // CHECK: void bar(void); diff --git a/clang/test/OpenMP/metadirective_otherwise.cpp b/clang/test/OpenMP/metadirective_otherwise.cpp new file mode 100644 index 0000000000000..0533350c84eed --- /dev/null +++ b/clang/test/OpenMP/metadirective_otherwise.cpp @@ -0,0 +1,125 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +void func1() { +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) otherwise() + for (int i = 0; i < 100; i++) + ; + +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) + for (int i = 0; i < 100; i++) + ; + +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) \ + when(implementation = {extension(match_none)} \ + : parallel) default(parallel for) + + for (int i = 0; i < 100; i++) + ; + + +} + +// CHECK-LABEL: define dso_local void @_Z5func1v() +// CHECK: entry +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: br label %[[FOR_COND:.*]] +// CHECK: [[FOR_COND]]: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 100 +// CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK: [[FOR_BODY]]: +// CHECK-NEXT: br label %[[FOR_INC:.*]] +// CHECK: [[FOR_INC]]: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: [[FOR_END]]: +// CHECK-NEXT: store i32 0, ptr [[I1]], align 4 +// CHECK-NEXT: br label %[[FOR_COND2:.*]] +// CHECK: [[FOR_COND2]]: +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I1]], align 4 +// CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP2]], 100 +// CHECK-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END7:.*]] +// CHECK: [[FOR_BODY4]]: +// CHECK-NEXT: br label %[[FOR_INC5:.*]] +// CHECK: [[FOR_INC5]]: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I1]], align 4 +// CHECK-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-NEXT: store i32 [[INC6]], ptr [[I1]], align 4 +// CHECK-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK: [[FOR_END7]]: +// CHECK: ret void + +void func2() { +#pragma omp metadirective when(user = {condition(1)} \ + : parallel for) otherwise() + for (int i = 0; i < 100; i++) + ; + +#pragma omp metadirective when(user = {condition(1)} \ + : parallel for) + for (int i = 0; i < 100; i++) + ; +} + +// CHECK-LABEL: define dso_local void @_Z5func2v() +// CHECK: entry +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @_Z5func2v.omp_outlined) +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @_Z5func2v.omp_outlined.1) +// CHECK-NEXT: ret void + + +void func3() { +#pragma omp metadirective when(user = {condition(0)} \ + : parallel for) \ + when(implementation = {extension(match_none)} \ + : parallel) default(parallel for) + + for (int i = 0; i < 100; i++) + ; + +} + +// CHECK-LABEL: define dso_local void @_Z5func3v() +// CHECK: entry +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 0, ptr @_Z5func3v.omp_outlined) +// CHECK-NEXT: ret void +// CHECK-NEXT: } + +// CHECK-LABEL: define internal void @_Z5func3v.omp_outlined +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], +// CHECK-SAME: ptr noalias noundef [[DOTBOUND_TID_:%.*]]) +// CHECK-NEXT: entry +// CHECK-NEXT: [[GLOB_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[BOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[GLOB_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[BOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: br label %for.cond +// CHECK:for.cond: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 100 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK:for.body: +// CHECK-NEXT: br label [[FOR_INC:%.*]] +// CHECK:for.inc: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK:for.end: +// CHECK-NEXT: ret void +// CHECK-NEXT:} + +#endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPContext.h b/llvm/include/llvm/Frontend/OpenMP/OMPContext.h index a501eaf2356ff..26163fdb4b63d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPContext.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPContext.h @@ -188,7 +188,7 @@ bool isVariantApplicableInContext(const VariantMatchInfo &VMI, bool DeviceSetOnly = false); /// Return the index (into \p VMIs) of the variant with the highest score -/// from the ones applicble in \p Ctx. See llvm::isVariantApplicableInContext. +/// from the ones applicable in \p Ctx. See llvm::isVariantApplicableInContext. int getBestVariantMatchForContext(const SmallVectorImpl &VMIs, const OMPContext &Ctx); From 66ac33f14116ebda309bc2477b897eaa5c75d003 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 28 Feb 2025 14:56:09 +0100 Subject: [PATCH 086/123] [X86] Add tests for sspstrong with phi nodes (NFC) --- llvm/test/CodeGen/X86/stack-protector-phi.ll | 66 ++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 llvm/test/CodeGen/X86/stack-protector-phi.ll diff --git a/llvm/test/CodeGen/X86/stack-protector-phi.ll b/llvm/test/CodeGen/X86/stack-protector-phi.ll new file mode 100644 index 0000000000000..bf0442dbf47a1 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-protector-phi.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @test_phi_diff_size(i1 %c) sspstrong { +; CHECK-LABEL: test_phi_diff_size: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.2: # %if +; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movq $0, (%rax) +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movq $0, (%rax) +; CHECK-NEXT: retq +entry: + %a = alloca i64 + br i1 %c, label %if, label %join + +if: + %gep = getelementptr i8, ptr %a, i64 4 + br label %join + +join: + %phi = phi ptr [ %a, %entry ], [ %gep, %if ] + store i64 0, ptr %phi + ret void +} + +define void @test_phi_loop(i1 %c) sspstrong { +; CHECK-LABEL: test_phi_loop: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-131072, %rsp # imm = 0xFFFE0000 +; CHECK-NEXT: subq $262144, %rsp # imm = 0x40000 +; CHECK-NEXT: movq %rsp, %rax +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB1_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq $0, (%rax) +; CHECK-NEXT: addq $4, %rax +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: jne .LBB1_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq +entry: + %a = alloca <10000 x i64> + br label %loop + +loop: + %phi = phi ptr [ %a, %entry ], [ %gep, %loop ] + store i64 0, ptr %phi + %gep = getelementptr i8, ptr %phi, i64 4 + br i1 %c, label %loop, label %exit + +exit: + ret void +} From 0ace19be70f4ac8668918a9affc58411ded80135 Mon Sep 17 00:00:00 2001 From: gdehame Date: Fri, 28 Feb 2025 15:00:16 +0100 Subject: [PATCH 087/123] [MLIR][EmitC][cf] Bugfix: correctly inline emitc.expression op in the emitted if condition of a cf.cond_br (#128958) emitc.expression ops are expected to be inlined in the if condition in the lowering of cf.cond_br if this is their only use but they weren't inlined. Instead, a use of the variable corresponding to the expression result was generated but with no declaration/definition. --- mlir/lib/Target/Cpp/TranslateToCpp.cpp | 6 ++++-- mlir/test/Target/Cpp/control_flow.mlir | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index abff252575eb0..b00820ffc542b 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -613,8 +613,10 @@ static LogicalResult printOperation(CppEmitter &emitter, Block &trueSuccessor = *condBranchOp.getTrueDest(); Block &falseSuccessor = *condBranchOp.getFalseDest(); - os << "if (" << emitter.getOrCreateName(condBranchOp.getCondition()) - << ") {\n"; + os << "if ("; + if (failed(emitter.emitOperand(condBranchOp.getCondition()))) + return failure(); + os << ") {\n"; os.indent(); diff --git a/mlir/test/Target/Cpp/control_flow.mlir b/mlir/test/Target/Cpp/control_flow.mlir index 436543f7ace95..101b30c2521c9 100644 --- a/mlir/test/Target/Cpp/control_flow.mlir +++ b/mlir/test/Target/Cpp/control_flow.mlir @@ -68,3 +68,22 @@ func.func @block_labels1() { // CPP-DECLTOP-NEXT: label2: // CPP-DECLTOP-NEXT: return; // CPP-DECLTOP-NEXT: } + +emitc.func @expression_inlining(%0 : i32, %1 : i32) { + %2 = expression : i1 { + %3 = cmp lt, %0, %1 : (i32, i32) -> i1 + yield %3 : i1 + } + cf.cond_br %2, ^bb1, ^bb1 + ^bb1: // 2 preds: ^bb0, ^bb0 + return +} +// CPP-DECLTOP: void expression_inlining(int32_t [[v1:v.*]], int32_t [[v2:v.*]]) { +// CPP-DECLTOP-NEXT: if ([[v1]] < [[v2]]) { +// CPP-DECLTOP-NEXT: goto label2; +// CPP-DECLTOP-NEXT: } else { +// CPP-DECLTOP-NEXT: goto label2; +// CPP-DECLTOP-NEXT: } +// CPP-DECLTOP-NEXT: label2: +// CPP-DECLTOP-NEXT: return; +// CPP-DECLTOP-NEXT: } From 591305808b3b5e84e180336361baf44ecce0b7e2 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Fri, 28 Feb 2025 08:03:04 -0600 Subject: [PATCH 088/123] [SystemZ] Fix regstate of SELRMux operand in selectSLRMux(). (#128555) It seems that there can be other cases with this that also can lead to wrong code (discovered with csmith). This time it involved not the kill flag but the undef flag. Use the intersection of the flags from both MachineOperand:s instead of the RegState from just one of them. --- .../lib/Target/SystemZ/SystemZPostRewrite.cpp | 16 ++++--- llvm/test/CodeGen/SystemZ/cond-move-10.mir | 2 +- llvm/test/CodeGen/SystemZ/cond-move-11.mir | 43 +++++++++++++++++++ 3 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/cond-move-11.mir diff --git a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp index cf3073f0f2090..4b16bcf95d51c 100644 --- a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp +++ b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -101,8 +101,10 @@ void SystemZPostRewrite::selectSELRMux(MachineBasicBlock &MBB, unsigned LowOpcode, unsigned HighOpcode) { Register DestReg = MBBI->getOperand(0).getReg(); - Register Src1Reg = MBBI->getOperand(1).getReg(); - Register Src2Reg = MBBI->getOperand(2).getReg(); + MachineOperand &Src1MO = MBBI->getOperand(1); + MachineOperand &Src2MO = MBBI->getOperand(2); + Register Src1Reg = Src1MO.getReg(); + Register Src2Reg = Src2MO.getReg(); bool DestIsHigh = SystemZ::isHighReg(DestReg); bool Src1IsHigh = SystemZ::isHighReg(Src1Reg); bool Src2IsHigh = SystemZ::isHighReg(Src2Reg); @@ -114,7 +116,7 @@ void SystemZPostRewrite::selectSELRMux(MachineBasicBlock &MBB, if (Src1Reg == Src2Reg) { BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), TII->get(SystemZ::COPY), DestReg) - .addReg(MBBI->getOperand(1).getReg(), getRegState(MBBI->getOperand(1))); + .addReg(Src1Reg, getRegState(Src1MO) & getRegState(Src2MO)); MBBI->eraseFromParent(); return; } @@ -126,15 +128,15 @@ void SystemZPostRewrite::selectSELRMux(MachineBasicBlock &MBB, if (DestIsHigh != Src1IsHigh) { BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), TII->get(SystemZ::COPY), DestReg) - .addReg(MBBI->getOperand(1).getReg(), getRegState(MBBI->getOperand(1))); - MBBI->getOperand(1).setReg(DestReg); + .addReg(Src1Reg, getRegState(Src1MO)); + Src1MO.setReg(DestReg); Src1Reg = DestReg; Src1IsHigh = DestIsHigh; } else if (DestIsHigh != Src2IsHigh) { BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), TII->get(SystemZ::COPY), DestReg) - .addReg(MBBI->getOperand(2).getReg(), getRegState(MBBI->getOperand(2))); - MBBI->getOperand(2).setReg(DestReg); + .addReg(Src2Reg, getRegState(Src2MO)); + Src2MO.setReg(DestReg); Src2Reg = DestReg; Src2IsHigh = DestIsHigh; } diff --git a/llvm/test/CodeGen/SystemZ/cond-move-10.mir b/llvm/test/CodeGen/SystemZ/cond-move-10.mir index 1db960829729e..7a27d8b02271f 100644 --- a/llvm/test/CodeGen/SystemZ/cond-move-10.mir +++ b/llvm/test/CodeGen/SystemZ/cond-move-10.mir @@ -5,7 +5,7 @@ # CHECK: name: fun0 # CHECK: renamable $r1l = AHIMuxK killed renamable $r1l, -1, implicit-def dead $cc # CHECK-NEXT: CHIMux renamable $r5h, 9, implicit-def $cc -# CHECK-NEXT: $r14h = COPY killed renamable $r1l +# CHECK-NEXT: $r14h = COPY renamable $r1l --- name: fun0 tracksRegLiveness: true diff --git a/llvm/test/CodeGen/SystemZ/cond-move-11.mir b/llvm/test/CodeGen/SystemZ/cond-move-11.mir new file mode 100644 index 0000000000000..aea2fabf9e536 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-move-11.mir @@ -0,0 +1,43 @@ +# RUN: llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z15 -start-before=systemz-post-rewrite \ +# RUN: -stop-after=machine-cp -verify-machineinstrs 2>&1 | FileCheck %s + +# The chained SELRMux:es both has two operands with the same register but +# where one of the operands have been marked as undef (resulting from +# early-ifcvt). Check that the resulting COPY after machine-cp is from $r0l +# to $r2l. + +# CHECK: name: fun0 +# CHECK: $r2l = COPY $r0l +--- | + + @Res = global i32 0, align 4 + @Z = global i32 0, align 4 + define signext i32 @fun0() { ret i32 0 } +... +--- +name: fun0 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1(0x80000000) + + renamable $r0l = LRL @Z :: (dereferenceable load (s32) from @Z) + renamable $r1l = LHIMux 1 + + bb.1: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $r0l, $r1l + + CHIMux renamable $r1l, 0, implicit-def $cc + renamable $r2l = SELRMux undef renamable $r0l, renamable $r0l, 14, 6, implicit $cc + renamable $r2l = SELRMux undef renamable $r2l, killed renamable $r2l, 14, 6, implicit $cc + BRC 14, 8, %bb.1, implicit killed $cc + J %bb.2 + + bb.2: + liveins: $r2l + + STRL renamable $r2l, @Res :: (store (s32) into @Res) + renamable $r2d = LGFR killed renamable $r2l + Return implicit $r2d +... From b3e17ed39c2db1e9775990052379697d30227ff2 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 28 Feb 2025 15:13:05 +0100 Subject: [PATCH 089/123] [Coroutines] [CodeGen] Don't actually emit an output file from unit test --- clang/unittests/Frontend/NoAlterCodeGenActionTest.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/clang/unittests/Frontend/NoAlterCodeGenActionTest.cpp b/clang/unittests/Frontend/NoAlterCodeGenActionTest.cpp index e7a3bf5a7f87a..fed2d255a9fe8 100644 --- a/clang/unittests/Frontend/NoAlterCodeGenActionTest.cpp +++ b/clang/unittests/Frontend/NoAlterCodeGenActionTest.cpp @@ -52,11 +52,10 @@ class ASTCheckerConsumer : public ASTConsumer { } }; -class TestCodeGenAction : public EmitLLVMAction { +class TestCodeGenAction : public EmitLLVMOnlyAction { public: - using Base = EmitLLVMAction; - TestCodeGenAction(llvm::LLVMContext *_VMContext = nullptr) - : EmitLLVMAction(_VMContext) {} + using Base = EmitLLVMOnlyAction; + using Base::Base; std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { From edd61eba95717d5e6c0cdf154e35b80642f7e1f3 Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Fri, 28 Feb 2025 14:20:59 +0000 Subject: [PATCH 090/123] [clang] Update SVE load and store intrinsics to have FP8 variants (#126726) --- clang/include/clang/Basic/arm_sve.td | 50 ++--- clang/lib/CodeGen/CGBuiltin.cpp | 5 +- clang/lib/CodeGen/CodeGenTypes.cpp | 3 + .../fp8-intrinsics/acle_sve2_fp8_fdot.c | 8 +- .../fp8-intrinsics/acle_sve2_fp8_fmla.c | 24 +-- .../AArch64/sve-intrinsics/acle_sve_ld1.c | 38 ++++ .../AArch64/sve-intrinsics/acle_sve_ld2.c | 32 ++++ .../AArch64/sve-intrinsics/acle_sve_ld3.c | 32 ++++ .../AArch64/sve-intrinsics/acle_sve_ld4.c | 32 ++++ .../AArch64/sve-intrinsics/acle_sve_ldnt1.c | 32 ++++ .../AArch64/sve-intrinsics/acle_sve_st1.c | 38 ++++ .../AArch64/sve-intrinsics/acle_sve_st2.c | 48 +++++ .../AArch64/sve-intrinsics/acle_sve_st3.c | 56 ++++++ .../AArch64/sve-intrinsics/acle_sve_st4.c | 64 +++++++ .../AArch64/sve-intrinsics/acle_sve_stnt1.c | 32 ++++ .../sve2p1-intrinsics/acle_sve2p1_ld1.c | 75 ++++++++ .../sve2p1-intrinsics/acle_sve2p1_ldnt1.c | 75 ++++++++ .../sve2p1-intrinsics/acle_sve2p1_loads.c | 163 ++++++++++++++++ .../sve2p1-intrinsics/acle_sve2p1_st1.c | 76 ++++++++ .../sve2p1-intrinsics/acle_sve2p1_stnt1.c | 75 ++++++++ .../sve2p1-intrinsics/acle_sve2p1_store.c | 180 ++++++++++++++++++ clang/test/CodeGen/arm-mfp8.c | 44 +++-- 22 files changed, 1125 insertions(+), 57 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3afbba51bd138..b51106fa56759 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -19,7 +19,7 @@ include "arm_sve_sme_incl.td" // Loads // Load one vector (scalar base) -def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; @@ -33,7 +33,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { } // Load one vector (scalar base, VL displacement) -def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; @@ -247,10 +247,10 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = InvalidMode in { } // Load one vector, unextended load, non-temporal (scalar base) -def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; // Load one vector, unextended load, non-temporal (scalar base, VL displacement) -def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def SVLDNT1_BF : MInst<"svldnt1[_{2}]", "dPc", "b", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; @@ -265,7 +265,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { } multiclass StructLoad { - def : SInst; + def : SInst; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def: SInst; } @@ -314,11 +314,11 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { def SVLD1UDQ_VNUM : MInst<"svld1udq_vnum[_{d}]", "dPcl", "lUld", [IsLoad], MemEltTyInt64, "aarch64_sve_ld1udq">; // Load one vector (vector base + scalar offset) - def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; - def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; // Load one vector (scalar base + vector offset) - def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">; + def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">; // Load N-element structure into N vectors (scalar base) defm SVLD2Q : StructLoad<"svld2q[_{2}]", "2Pc", "aarch64_sve_ld2q_sret">; @@ -341,7 +341,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { // Stores // Store one vector (scalar base) -def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; @@ -350,7 +350,7 @@ def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore, Verify def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; // Store one vector (scalar base, VL displacement) -def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; @@ -435,7 +435,7 @@ def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "v } // let SVETargetGuard = "sve" multiclass StructStore { - def : SInst; + def : SInst; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def: SInst; } @@ -451,10 +451,10 @@ defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3">; defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">; // Store one vector, with no truncation, non-temporal (scalar base) -def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; // Store one vector, with no truncation, non-temporal (scalar base, VL displacement) -def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def SVSTNT1_BF : MInst<"svstnt1[_{d}]", "vPpd", "b", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; @@ -470,12 +470,12 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { def SVST1UDQ_VNUM : MInst<"svst1dq_vnum[_{d}]", "vPpld", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1dq">; // Store one vector (vector base + scalar offset) - def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; - def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; // Store one vector (scalar base + vector offset) - def SVST1Q_SCATTER_OFFSETS_U : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; - def SVST1Q_SCATTER_OFFSETS_S : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPp#d", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; + def SVST1Q_SCATTER_OFFSETS_U : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; + def SVST1Q_SCATTER_OFFSETS_S : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPp#d", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; // Store N vectors into N-element structure (scalar base) defm SVST2Q : StructStore<"svst2q[_{d}]", "vPc2", "aarch64_sve_st2q">; @@ -2042,20 +2042,20 @@ def SVWHILEHS_COUNT : SInst<"svwhilege_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNo } multiclass MultiVecLoad { - def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; @@ -2067,20 +2067,20 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { } multiclass MultiVecStore { - def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 65fac01d58362..13bffd542e78e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10223,6 +10223,7 @@ llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) { default: llvm_unreachable("Invalid SVETypeFlag!"); + case SVETypeFlags::EltTyMFloat8: case SVETypeFlags::EltTyInt8: return Builder.getInt8Ty(); case SVETypeFlags::EltTyInt16: @@ -10651,7 +10652,7 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, unsigned IntrinsicID, bool IsZExtReturn) { QualType LangPTy = E->getArg(1)->getType(); - llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + llvm::Type *MemEltTy = CGM.getTypes().ConvertTypeForMem( LangPTy->castAs()->getPointeeType()); // The vector type that is returned may be different from the @@ -10698,7 +10699,7 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, SmallVectorImpl &Ops, unsigned IntrinsicID) { QualType LangPTy = E->getArg(1)->getType(); - llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + llvm::Type *MemEltTy = CGM.getTypes().ConvertTypeForMem( LangPTy->castAs()->getPointeeType()); // The vector type that is stored may be different from the diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 405242e97e75c..bd625052cb5ed 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -107,6 +107,9 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) { MT->getNumRows() * MT->getNumColumns()); } + if (T->isMFloat8Type()) + return llvm::Type::getInt8Ty(getLLVMContext()); + llvm::Type *R = ConvertType(T); // Check for the boolean vector case. diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c index 2f3994df03784..0b355db4b2073 100644 --- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c @@ -49,8 +49,8 @@ svfloat32_t test_svdot_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-LABEL: define dso_local @test_svdot_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -59,8 +59,8 @@ svfloat32_t test_svdot_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-CXX-LABEL: define dso_local @_Z20test_svdot_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -91,8 +91,8 @@ svfloat16_t test_svdot_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-LABEL: define dso_local @test_svdot_n_f16_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -101,8 +101,8 @@ svfloat16_t test_svdot_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-CXX-LABEL: define dso_local @_Z20test_svdot_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c index 425e6a57ffe3c..0daeeec9e7dd7 100644 --- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c @@ -49,8 +49,8 @@ svfloat16_t test_svmlalb_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-LABEL: define dso_local @test_svmlalb_n_f16_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalb.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -59,8 +59,8 @@ svfloat16_t test_svmlalb_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-CXX-LABEL: define dso_local @_Z22test_svmlalb_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalb.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -91,8 +91,8 @@ svfloat16_t test_svmlalt_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-LABEL: define dso_local @test_svmlalt_n_f16_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalt.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -101,8 +101,8 @@ svfloat16_t test_svmlalt_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-CXX-LABEL: define dso_local @_Z22test_svmlalt_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalt.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -169,8 +169,8 @@ svfloat32_t test_svmlallbb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlallbb_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -179,8 +179,8 @@ svfloat32_t test_svmlallbb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlallbb_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -211,8 +211,8 @@ svfloat32_t test_svmlallbt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlallbt_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -221,8 +221,8 @@ svfloat32_t test_svmlallbt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlallbt_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -253,8 +253,8 @@ svfloat32_t test_svmlalltb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlalltb_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -263,8 +263,8 @@ svfloat32_t test_svmlalltb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlalltb_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -295,8 +295,8 @@ svfloat32_t test_svmlalltt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlalltt_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -305,8 +305,8 @@ svfloat32_t test_svmlalltt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlalltt_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c index 276ef64736bc3..40dcd65f6c609 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c @@ -205,6 +205,21 @@ svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svld1,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svld1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, [[PG:%.*]], zeroinitializer) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svld1_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, [[PG:%.*]], zeroinitializer) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svmfloat8_t test_svld1_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld1,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -476,6 +491,29 @@ svfloat64_t test_svld1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum return SVE_ACLE_FUNC(svld1_vnum,_f64,,)(pg, base, vnum); } +// CHECK-LABEL: @test_svld1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.masked.load.nxv16i8.p0(ptr [[TMP2]], i32 1, [[PG:%.*]], zeroinitializer) +// CHECK-NEXT: ret [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.masked.load.nxv16i8.p0(ptr [[TMP2]], i32 1, [[PG:%.*]], zeroinitializer) +// CPP-CHECK-NEXT: ret [[TMP3]] +// +svmfloat8_t test_svld1_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld1_vnum,_mf8,,)(pg, base, vnum); +} + #ifndef __ARM_FEATURE_SME // CHECK-LABEL: @test_svld1_gather_u32base_s32( diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c index 3097cb9cbcaab..abe1c87b6f2c3 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c @@ -206,6 +206,21 @@ svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svld2,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svld2_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svld2_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svmfloat8x2_t test_svld2_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld2,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld2_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -410,3 +425,20 @@ svfloat64x2_t test_svld2_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svld2_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svld2_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret { , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svld2_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret { , } [[TMP1]] +// +svmfloat8x2_t test_svld2_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld2_vnum,_mf8,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c index 2deb5a1d4930c..5ff7ad9de483b 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c @@ -205,6 +205,21 @@ svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svld3,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svld3_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svld3_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP0]] +// +svmfloat8x3_t test_svld3_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld3,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld3_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -409,3 +424,20 @@ svfloat64x3_t test_svld3_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svld3_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svld3_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret { , , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svld3_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] +// +svmfloat8x3_t test_svld3_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld3_vnum,_mf8,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c index 30796a4f46a72..650fd5986be27 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c @@ -205,6 +205,21 @@ svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svld4,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svld4_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svld4_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svmfloat8x4_t test_svld4_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld4,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld4_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -409,3 +424,20 @@ svfloat64x4_t test_svld4_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svld4_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svld4_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret { , , , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svld4_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] +// +svmfloat8x4_t test_svld4_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld4_vnum,_mf8,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c index d343c124fe6a7..b96bf0cb23d12 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c @@ -206,6 +206,21 @@ svfloat64_t test_svldnt1_f64(svbool_t pg, const float64_t *base) MODE_ATTR return SVE_ACLE_FUNC(svldnt1,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svldnt1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svldnt1_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svmfloat8_t test_svldnt1_mf8(svbool_t pg, const mfloat8_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svldnt1,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svldnt1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -410,3 +425,20 @@ svfloat64_t test_svldnt1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svldnt1_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svldnt1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z21test_svldnt1_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svmfloat8_t test_svldnt1_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svldnt1_vnum,_mf8,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c index 29afdaf3eb0c7..21350007da86f 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c @@ -205,6 +205,21 @@ void test_svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data) MODE_ATTR return SVE_ACLE_FUNC(svst1,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svst1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0( [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, [[PG:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst1_mf8u10__SVBool_tPu6__mfp8u13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0( [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, [[PG:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_mf8(svbool_t pg, mfloat8_t *base, svmfloat8_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst1,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svst1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -476,6 +491,29 @@ void test_svst1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t return SVE_ACLE_FUNC(svst1_vnum,_f64,,)(pg, base, vnum, data); } +// CHECK-LABEL: @test_svst1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0( [[DATA:%.*]], ptr [[TMP2]], i32 1, [[PG:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_mf8u10__SVBool_tPu6__mfp8lu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0( [[DATA:%.*]], ptr [[TMP2]], i32 1, [[PG:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst1_vnum,_mf8,,)(pg, base, vnum, data); +} + #ifndef __ARM_FEATURE_SME // CHECK-LABEL: @test_svst1_scatter_u32base_s32( diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c index d1511b4c363d0..9e73e4464c6f9 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c @@ -293,6 +293,29 @@ void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data) MODE_ATTR return SVE_ACLE_FUNC(svst2,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svst2_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst2_mf8u10__SVBool_tPu6__mfp813svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2_mf8(svbool_t pg, mfloat8_t *base, svmfloat8x2_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst2,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svst2_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 @@ -585,3 +608,28 @@ void test_svst2_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x2 { return SVE_ACLE_FUNC(svst2_vnum,_f64,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svst2_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst2_vnum_mf8u10__SVBool_tPu6__mfp8l13svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8x2_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst2_vnum,_mf8,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c index 4198a325f5fb6..b693b693b1ebb 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c @@ -337,6 +337,33 @@ void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data) MODE_ATTR return SVE_ACLE_FUNC(svst3,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svst3_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst3_mf8u10__SVBool_tPu6__mfp813svmfloat8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3_mf8(svbool_t pg, mfloat8_t *base, svmfloat8x3_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst3,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svst3_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 @@ -673,3 +700,32 @@ void test_svst3_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x3 { return SVE_ACLE_FUNC(svst3_vnum,_f64,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svst3_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst3_vnum_mf8u10__SVBool_tPu6__mfp8l13svmfloat8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8x3_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst3_vnum,_mf8,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c index 160a21d93e416..f8c3b60682573 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c @@ -381,6 +381,37 @@ void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data) MODE_ATTR return SVE_ACLE_FUNC(svst4,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svst4_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svst4_mf8u10__SVBool_tPu6__mfp813svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4_mf8(svbool_t pg, mfloat8_t *base, svmfloat8x4_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst4,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svst4_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 @@ -761,3 +792,36 @@ void test_svst4_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x4 { return SVE_ACLE_FUNC(svst4_vnum,_f64,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svst4_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[TMP8]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst4_vnum_mf8u10__SVBool_tPu6__mfp8l13svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[TMP8]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8x4_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst4_vnum,_mf8,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c index 5e0869557c8d7..f739ea5dca641 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c @@ -206,6 +206,21 @@ void test_svstnt1_f64(svbool_t pg, float64_t *base, svfloat64_t data) MODE_ATTR return SVE_ACLE_FUNC(svstnt1,_f64,,)(pg, base, data); } +// CHECK-LABEL: @test_svstnt1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv16i8( [[DATA:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z16test_svstnt1_mf8u10__SVBool_tPu6__mfp8u13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv16i8( [[DATA:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_mf8(svbool_t pg, mfloat8_t *base, svmfloat8_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svstnt1,_mf8,,)(pg, base, data); +} + // CHECK-LABEL: @test_svstnt1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] @@ -410,3 +425,20 @@ void test_svstnt1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64 { return SVE_ACLE_FUNC(svstnt1_vnum,_f64,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svstnt1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv16i8( [[DATA:%.*]], [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svstnt1_vnum_mf8u10__SVBool_tPu6__mfp8lu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv16i8( [[DATA:%.*]], [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svstnt1_vnum,_mf8,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c index 93cb653032df7..ee5c2c592c61d 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c @@ -309,6 +309,21 @@ svfloat64x2_t test_svld1_f64_x2(svcount_t pn, const float64_t *base) ATTR return SVE_ACLE_FUNC(svld1,_f64,_x2,)(pn, base); } +// CHECK-LABEL: @test_svld1_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_mf8_x2u11__SVCount_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svmfloat8x2_t test_svld1_mf8_x2(svcount_t pn, const mfloat8_t *base) ATTR +{ + return SVE_ACLE_FUNC(svld1,_mf8,_x2,)(pn, base); +} + // CHECK-LABEL: @test_svld1_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) @@ -354,6 +369,20 @@ svfloat64x4_t test_svld1_f64_x4(svcount_t pn, const float64_t *base) ATTR return SVE_ACLE_FUNC(svld1,_f64,_x4,)(pn, base); } +// CHECK-LABEL: @test_svld1_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_mf8_x4u11__SVCount_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svmfloat8x4_t test_svld1_mf8_x4(svcount_t pn, const mfloat8_t *base) ATTR +{ + return SVE_ACLE_FUNC(svld1,_mf8,_x4,)(pn, base); +} // == VNUM variants == @@ -795,6 +824,29 @@ svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_ return SVE_ACLE_FUNC(svld1_vnum,_f64,_x2,)(pn, base, vnum); } +// CHECK-LABEL: @test_svld1_vnum_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_mf8_x2u11__SVCount_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , } [[TMP3]] +// +svmfloat8x2_t test_svld1_vnum_mf8_x2(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR +{ + return SVE_ACLE_FUNC(svld1_vnum,_mf8,_x2,)(pn, base, vnum); +} + // CHECK-LABEL: @test_svld1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -863,3 +915,26 @@ svfloat64x4_t test_svld1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_ { return SVE_ACLE_FUNC(svld1_vnum,_f64,_x4,)(pn, base, vnum); } + +// CHECK-LABEL: @test_svld1_vnum_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_mf8_x4u11__SVCount_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] +// +svmfloat8x4_t test_svld1_vnum_mf8_x4(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR +{ + return SVE_ACLE_FUNC(svld1_vnum,_mf8,_x4,)(pn, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c index 8254c6aec5dc1..692af131e69de 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c @@ -307,6 +307,21 @@ svfloat64x2_t test_svldnt1_f64_x2(svcount_t pn, const float64_t *base) ATTR return SVE_ACLE_FUNC(svldnt1,_f64,_x2,)(pn, base); } +// CHECK-LABEL: @test_svldnt1_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_mf8_x2u11__SVCount_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svmfloat8x2_t test_svldnt1_mf8_x2(svcount_t pn, const mfloat8_t *base) ATTR +{ + return SVE_ACLE_FUNC(svldnt1,_mf8,_x2,)(pn, base); +} + // CHECK-LABEL: @test_svldnt1_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) @@ -352,6 +367,20 @@ svfloat64x4_t test_svldnt1_f64_x4(svcount_t pn, const float64_t *base) ATTR return SVE_ACLE_FUNC(svldnt1,_f64,_x4,)(pn, base); } +// CHECK-LABEL: @test_svldnt1_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_mf8_x4u11__SVCount_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svmfloat8x4_t test_svldnt1_mf8_x4(svcount_t pn, const mfloat8_t *base) ATTR +{ + return SVE_ACLE_FUNC(svldnt1,_mf8,_x4,)(pn, base); +} // == VNUM variants == @@ -793,6 +822,29 @@ svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int6 return SVE_ACLE_FUNC(svldnt1_vnum,_f64,_x2,)(pn, base, vnum); } +// CHECK-LABEL: @test_svldnt1_vnum_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_mf8_x2u11__SVCount_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , } [[TMP3]] +// +svmfloat8x2_t test_svldnt1_vnum_mf8_x2(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR +{ + return SVE_ACLE_FUNC(svldnt1_vnum,_mf8,_x2,)(pn, base, vnum); +} + // CHECK-LABEL: @test_svldnt1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -861,3 +913,26 @@ svfloat64x4_t test_svldnt1_vnum_f64_x4(svcount_t pn, const float64_t *base, int6 { return SVE_ACLE_FUNC(svldnt1_vnum,_f64,_x4,)(pn, base, vnum); } + +// CHECK-LABEL: @test_svldnt1_vnum_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_mf8_x4u11__SVCount_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] +// +svmfloat8x4_t test_svldnt1_vnum_mf8_x4(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR +{ + return SVE_ACLE_FUNC(svldnt1_vnum,_mf8,_x4,)(pn, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c index 233c9b29e707a..7adb3d4940e7f 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c @@ -214,6 +214,21 @@ svfloat64x2_t test_svld2q_f64(svbool_t pg, const float64_t *base) return SVE_ACLE_FUNC(svld2q,,_f64,)(pg, base); } +// CHECK-LABEL: @test_svld2q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2q_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svmfloat8x2_t test_svld2q_mf8(svbool_t pg, const mfloat8_t *base) +{ + return SVE_ACLE_FUNC(svld2q,,_mf8,)(pg, base); +} + // CHECK-LABEL: @test_svld2q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -509,6 +524,29 @@ svfloat64x2_t test_svld2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t v return SVE_ACLE_FUNC(svld2q_vnum,,_f64,)(pg, base, vnum); } +// CHECK-LABEL: @test_svld2q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , } [[TMP3]] +// +svmfloat8x2_t test_svld2q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld2q_vnum,,_mf8,)(pg, base, vnum); +} + // CHECK-LABEL: @test_svld3q_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) @@ -709,6 +747,21 @@ svfloat64x3_t test_svld3q_f64(svbool_t pg, const float64_t *base) return SVE_ACLE_FUNC(svld3q,,_f64,)(pg, base); } +// CHECK-LABEL: @test_svld3q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3q_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP0]] +// +svmfloat8x3_t test_svld3q_mf8(svbool_t pg, const mfloat8_t *base) +{ + return SVE_ACLE_FUNC(svld3q,,_mf8,)(pg, base); +} + // CHECK-LABEL: @test_svld3q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -1005,6 +1058,29 @@ svfloat64x3_t test_svld3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t v return SVE_ACLE_FUNC(svld3q_vnum,,_f64,)(pg, base, vnum); } +// CHECK-LABEL: @test_svld3q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] +// +svmfloat8x3_t test_svld3q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld3q_vnum,,_mf8,)(pg, base, vnum); +} + // CHECK-LABEL: @test_svld4q_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) @@ -1190,6 +1266,21 @@ svfloat64x4_t test_svld4q_f64(svbool_t pg, const float64_t *base) return SVE_ACLE_FUNC(svld4q,,_f64,)(pg, base); } +// CHECK-LABEL: @test_svld4q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4q_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svmfloat8x4_t test_svld4q_mf8(svbool_t pg, const mfloat8_t *base) +{ + return SVE_ACLE_FUNC(svld4q,,_mf8,)(pg, base); +} + // CHECK-LABEL: @test_svld4q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -1485,6 +1576,28 @@ svfloat64x4_t test_svld4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t v return SVE_ACLE_FUNC(svld4q_vnum,,_f64,)(pg, base, vnum); } +// CHECK-LABEL: @test_svld4q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret { , , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] +// +svmfloat8x4_t test_svld4q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld4q_vnum,,_mf8,)(pg, base, vnum); +} // Gather for 128 bits // vector base + scalar offset @@ -1692,6 +1805,23 @@ svbfloat16_t test_svld1q_gather_u64base_offset_bf16(svbool_t pg, svuint64_t base return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_bf16,)(pg, base, offset); } +// CHECK-LABEL: @test_svld1q_gather_u64base_offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_offset_mf8u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svmfloat8_t test_svld1q_gather_u64base_offset_mf8(svbool_t pg, svuint64_t base, int64_t offset) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_offset_mf8,)(pg, base, offset); +} + // Vector base and no offset // CHECK-LABEL: @test_svld1q_gather_u64base_u64( // CHECK-NEXT: entry: @@ -1897,6 +2027,23 @@ svbfloat16_t test_svld1q_gather_u64base_bf16(svbool_t pg, svuint64_t base) return SVE_ACLE_FUNC(svld1q_gather,_u64base,_bf16,)(pg, base); } +// CHECK-LABEL: @test_svld1q_gather_u64base_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z30test_svld1q_gather_u64base_mf8u10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64( [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svmfloat8_t test_svld1q_gather_u64base_mf8(svbool_t pg, svuint64_t base) +{ + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_mf8,)(pg, base); +} + // CHECK-LABEL: @test_svld1q_gather_u64index_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) @@ -2428,3 +2575,19 @@ svfloat32_t test_svdl1q_gather_u64offset_f32(svbool_t pg, const float32_t *base, svfloat64_t test_svdl1q_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t off) { return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_f64)(pg, base, off); } + +// CHECK-LABEL: @test_svld1q_gather_u64offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svld1q_gather_u64offset_mf8u10__SVBool_tPKu6__mfp8u12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svmfloat8_t test_svld1q_gather_u64offset_mf8(svbool_t pg, mfloat8_t const *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_mf8)(pg, base, off); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c index 9db3e5e98975a..e71e68114a5af 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c @@ -306,6 +306,21 @@ void test_svst1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) ATTR return SVE_ACLE_FUNC(svst1,_f64_x2,,)(pn, base, v); } +// CHECK-LABEL: @test_svst1_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_mf8_x2u11__SVCount_tPu6__mfp813svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_mf8_x2(svcount_t pn, mfloat8_t *base, svmfloat8x2_t v) ATTR +{ + return SVE_ACLE_FUNC(svst1,_mf8_x2,,)(pn, base, v); +} + // CHECK-LABEL: @test_svst1_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) @@ -351,6 +366,21 @@ void test_svst1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR return SVE_ACLE_FUNC(svst1,_f64_x4,,)(pn, base, v); } +// CHECK-LABEL: @test_svst1_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_mf8_x4u11__SVCount_tPu6__mfp813svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_mf8_x4(svcount_t pn, mfloat8_t *base, svmfloat8x4_t v) ATTR +{ + return SVE_ACLE_FUNC(svst1,_mf8_x4,,)(pn, base, v); +} + // == VNUM variants == @@ -798,6 +828,29 @@ void test_svst1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svflo return SVE_ACLE_FUNC(svst1_vnum,_f64_x2,,)(pn, base, vnum, v); } +// CHECK-LABEL: @test_svst1_vnum_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_mf8_x2u11__SVCount_tPu6__mfp8l13svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_mf8_x2(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloat8x2_t v) ATTR +{ + return SVE_ACLE_FUNC(svst1_vnum,_mf8_x2,,)(pn, base, vnum, v); +} + // CHECK-LABEL: @test_svst1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 @@ -872,3 +925,26 @@ void test_svst1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svflo { return SVE_ACLE_FUNC(svst1_vnum,_f64_x4,,)(pn, base, vnum, v); } + +// CHECK-LABEL: @test_svst1_vnum_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_mf8_x4u11__SVCount_tPu6__mfp8l13svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_mf8_x4(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloat8x4_t v) ATTR +{ + return SVE_ACLE_FUNC(svst1_vnum,_mf8_x4,,)(pn, base, vnum, v); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c index ed1959327a611..1544260377a20 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c @@ -325,6 +325,21 @@ void test_svstnt1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) ATTR return SVE_ACLE_FUNC(svstnt1,_f64_x2,,)(pn, base, v); } +// CHECK-LABEL: @test_svstnt1_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_mf8_x2u11__SVCount_tPu6__mfp813svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_mf8_x2(svcount_t pn, mfloat8_t *base, svmfloat8x2_t v) ATTR +{ + return SVE_ACLE_FUNC(svstnt1,_mf8_x2,,)(pn, base, v); +} + // CHECK-LABEL: @test_svstnt1_f16_x4( // CHECK-NEXT: entry: @@ -373,6 +388,21 @@ void test_svstnt1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR return SVE_ACLE_FUNC(svstnt1,_f64_x4,,)(pn, base, v); } +// CHECK-LABEL: @test_svstnt1_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_mf8_x4u11__SVCount_tPu6__mfp813svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_mf8_x4(svcount_t pn, mfloat8_t *base, svmfloat8x4_t v) ATTR +{ + return SVE_ACLE_FUNC(svstnt1,_mf8_x4,,)(pn, base, v); +} + // == VNUM variants == @@ -837,6 +867,28 @@ void test_svstnt1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svf return SVE_ACLE_FUNC(svstnt1_vnum,_f64_x2,,)(pn, base, vnum, v); } +// CHECK-LABEL: @test_svstnt1_vnum_mf8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_mf8_x2u11__SVCount_tPu6__mfp8l13svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_mf8_x2(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloat8x2_t v) ATTR +{ + return SVE_ACLE_FUNC(svstnt1_vnum,_mf8_x2,,)(pn, base, vnum, v); +} // CHECK-LABEL: @test_svstnt1_vnum_f16_x4( // CHECK-NEXT: entry: @@ -914,3 +966,26 @@ void test_svstnt1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svf { return SVE_ACLE_FUNC(svstnt1_vnum,_f64_x4,,)(pn, base, vnum, v); } + +// CHECK-LABEL: @test_svstnt1_vnum_mf8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_mf8_x4u11__SVCount_tPu6__mfp8l13svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_mf8_x4(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloat8x4_t v) ATTR +{ + return SVE_ACLE_FUNC(svstnt1_vnum,_mf8_x4,,)(pn, base, vnum, v); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c index b91780304dacb..517d5f244a46f 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c @@ -213,6 +213,21 @@ void test_svst2q_f64(svbool_t pg, const float64_t *base, svfloat64x2_t zt) SVE_ACLE_FUNC(svst2q,,_f64,)(pg, base, zt); } +// CHECK-LABEL: @test_svst2q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x2_t zt) +{ + SVE_ACLE_FUNC(svst2q,,_mf8,)(pg, base, zt); +} + // CHECK-LABEL: @test_svst2q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -509,6 +524,29 @@ void test_svst2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfl SVE_ACLE_FUNC(svst2q_vnum,,_f64,)(pg, base, vnum, zt); } +// CHECK-LABEL: @test_svst2q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x2_t zt) +{ + SVE_ACLE_FUNC(svst2q_vnum,,_mf8,)(pg, base, vnum, zt); +} + // // ST3Q // CHECK-LABEL: @test_svst3q_u8( @@ -710,6 +748,21 @@ void test_svst3q_f64(svbool_t pg, const float64_t *base, svfloat64x3_t zt) SVE_ACLE_FUNC(svst3q,,_f64,)(pg, base, zt); } +// CHECK-LABEL: @test_svst3q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x3_t zt) +{ + SVE_ACLE_FUNC(svst3q,,_mf8,)(pg, base, zt); +} + // CHECK-LABEL: @test_svst3q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -1006,6 +1059,29 @@ void test_svst3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfl SVE_ACLE_FUNC(svst3q_vnum,,_f64,)(pg, base, vnum, zt); } +// CHECK-LABEL: @test_svst3q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x3_t zt) +{ + SVE_ACLE_FUNC(svst3q_vnum,,_mf8,)(pg, base, vnum, zt); +} + // // ST4Q // CHECK-LABEL: @test_svst4q_u8( @@ -1207,6 +1283,21 @@ void test_svst4q_f64(svbool_t pg, const float64_t *base, svfloat64x4_t zt) SVE_ACLE_FUNC(svst4q,,_f64,)(pg, base, zt); } +// CHECK-LABEL: @test_svst4q_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4q_mf8u10__SVBool_tPKu6__mfp813svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_mf8(svbool_t pg, const mfloat8_t *base, svmfloat8x4_t zt) +{ + SVE_ACLE_FUNC(svst4q,,_mf8,)(pg, base, zt); +} + // CHECK-LABEL: @test_svst4q_vnum_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() @@ -1503,6 +1594,29 @@ void test_svst4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfl SVE_ACLE_FUNC(svst4q_vnum,,_f64,)(pg, base, vnum, zt); } +// CHECK-LABEL: @test_svst4q_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_mf8u10__SVBool_tPKu6__mfp8l13svmfloat8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4q_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vnum, svmfloat8x4_t zt) +{ + SVE_ACLE_FUNC(svst4q_vnum,,_mf8,)(pg, base, vnum, zt); +} + // Scatter for 128 bits // vector base + scalar offset // CHECK-LABEL: @test_svst1q_scatter_u64base_offset_u64( @@ -1710,6 +1824,23 @@ void test_svst1q_scatter_u64base_offset_bf16(svbool_t pg, svuint64_t base, int64 SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _bf16)(pg, base, offset, data); } +// CHECK-LABEL: @test_svst1q_scatter_u64base_offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_offset_mf8u10__SVBool_tu12__SVUint64_tlu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 [[OFFSET:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_offset_mf8(svbool_t pg, svuint64_t base, int64_t offset, svmfloat8_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base, _offset, _mf8)(pg, base, offset, data); +} + // Vector Base and no Offset // CHECK-LABEL: @test_svst1q_scatter_u64base_u64( // CHECK-NEXT: entry: @@ -1915,6 +2046,23 @@ void test_svst1q_scatter_u64base_bf16(svbool_t pg, svuint64_t base, svbfloat16_t SVE_ACLE_FUNC(svst1q_scatter, _u64base,,_bf16)(pg, base, data); } +// CHECK-LABEL: @test_svst1q_scatter_u64base_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svst1q_scatter_u64base_mf8u10__SVBool_tu12__SVUint64_tu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64( [[DATA:%.*]], [[TMP0]], [[BASE:%.*]], i64 0) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_mf8(svbool_t pg, svuint64_t base, svmfloat8_t data) +{ + SVE_ACLE_FUNC(svst1q_scatter, _u64base,,_mf8)(pg, base, data); +} + // CHECK-LABEL: @test_svst1q_scatter_u64index_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) @@ -2798,3 +2946,35 @@ void test_svst1q_scatter_u64offset_f64(svbool_t pg, float64_t *base, svuint64_t void test_svst1q_scatter_s64offset_f64(svbool_t pg, float64_t *base, svint64_t off, svfloat64_t data) { SVE_ACLE_FUNC(svst1q_scatter_,s64,offset,_f64)(pg, base, off, data); } + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_mf8u10__SVBool_tPu6__mfp8u12__SVUint64_tu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_mf8(svbool_t pg, mfloat8_t *base, svuint64_t off, svmfloat8_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_mf8)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_s64offset_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_s64offset_mf8u10__SVBool_tPu6__mfp8u11__SVInt64_tu13__SVMfloat8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_s64offset_mf8(svbool_t pg, mfloat8_t *base, svint64_t off, svmfloat8_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,s64,offset,_mf8)(pg, base, off, data); +} diff --git a/clang/test/CodeGen/arm-mfp8.c b/clang/test/CodeGen/arm-mfp8.c index 9385b537f18b3..d9e7b5d4707d8 100644 --- a/clang/test/CodeGen/arm-mfp8.c +++ b/clang/test/CodeGen/arm-mfp8.c @@ -38,22 +38,34 @@ mfloat8x8_t test_ret_mfloat8x8_t(mfloat8x8_t v) { // CHECK-C-LABEL: define dso_local <1 x i8> @func1n( // CHECK-C-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] { // CHECK-C-NEXT: [[ENTRY:.*:]] -// CHECK-C-NEXT: [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1 -// CHECK-C-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-C-NEXT: store <1 x i8> [[MFP8]], ptr [[ARRAYIDX]], align 1 -// CHECK-C-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-C-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1 -// CHECK-C-NEXT: ret <1 x i8> [[TMP0]] +// CHECK-C-NEXT: [[RETVAL:%.*]] = alloca <1 x i8>, align 1 +// CHECK-C-NEXT: [[MFP8_ADDR:%.*]] = alloca i8, align 1 +// CHECK-C-NEXT: [[F1N:%.*]] = alloca [10 x i8], align 1 +// CHECK-C-NEXT: store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1 +// CHECK-C-NEXT: [[TMP0:%.*]] = load i8, ptr [[MFP8_ADDR]], align 1 +// CHECK-C-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-C-NEXT: store i8 [[TMP0]], ptr [[ARRAYIDX]], align 1 +// CHECK-C-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-C-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +// CHECK-C-NEXT: store i8 [[TMP1]], ptr [[RETVAL]], align 1 +// CHECK-C-NEXT: [[TMP2:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1 +// CHECK-C-NEXT: ret <1 x i8> [[TMP2]] // // CHECK-CXX-LABEL: define dso_local <1 x i8> @_Z6func1nu6__mfp8( // CHECK-CXX-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1 -// CHECK-CXX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-CXX-NEXT: store <1 x i8> [[MFP8]], ptr [[ARRAYIDX]], align 1 -// CHECK-CXX-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1 -// CHECK-CXX-NEXT: ret <1 x i8> [[TMP0]] +// CHECK-CXX-NEXT: [[RETVAL:%.*]] = alloca <1 x i8>, align 1 +// CHECK-CXX-NEXT: [[MFP8_ADDR:%.*]] = alloca i8, align 1 +// CHECK-CXX-NEXT: [[F1N:%.*]] = alloca [10 x i8], align 1 +// CHECK-CXX-NEXT: store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1 +// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[MFP8_ADDR]], align 1 +// CHECK-CXX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-CXX-NEXT: store i8 [[TMP0]], ptr [[ARRAYIDX]], align 1 +// CHECK-CXX-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-CXX-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +// CHECK-CXX-NEXT: store i8 [[TMP1]], ptr [[RETVAL]], align 1 +// CHECK-CXX-NEXT: [[TMP2:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1 +// CHECK-CXX-NEXT: ret <1 x i8> [[TMP2]] // __mfp8 func1n(__mfp8 mfp8) { __mfp8 f1n[10]; @@ -86,14 +98,18 @@ mfloat8_t test_extract_element(mfloat8x16_t x, int i) { // CHECK-C-LABEL: define dso_local <16 x i8> @test_insert_element( // CHECK-C-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]], <1 x i8> [[V:%.*]]) #[[ATTR0]] { // CHECK-C-NEXT: [[ENTRY:.*:]] -// CHECK-C-NEXT: [[TMP0:%.*]] = bitcast <1 x i8> [[V]] to i8 +// CHECK-C-NEXT: [[V_ADDR:%.*]] = alloca i8, align 1 +// CHECK-C-NEXT: store <1 x i8> [[V]], ptr [[V_ADDR]], align 1 +// CHECK-C-NEXT: [[TMP0:%.*]] = load i8, ptr [[V_ADDR]], align 1 // CHECK-C-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[X]], i8 [[TMP0]], i32 [[I]] // CHECK-C-NEXT: ret <16 x i8> [[VECINS]] // // CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z19test_insert_element14__Mfloat8x16_tiu6__mfp8( // CHECK-CXX-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]], <1 x i8> [[V:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <1 x i8> [[V]] to i8 +// CHECK-CXX-NEXT: [[V_ADDR:%.*]] = alloca i8, align 1 +// CHECK-CXX-NEXT: store <1 x i8> [[V]], ptr [[V_ADDR]], align 1 +// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[V_ADDR]], align 1 // CHECK-CXX-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[X]], i8 [[TMP0]], i32 [[I]] // CHECK-CXX-NEXT: ret <16 x i8> [[VECINS]] // From d76a504ccb3c727667b69a43b512f682e9e43123 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 28 Feb 2025 21:26:25 +0700 Subject: [PATCH 091/123] AMDGPU: Remove nocapture attribute from is.shared and is.private intrinsics (#129238) This should be replaced with captures(address), but tablegen currently has no way to indicate that on an intrinsic. I opened issue #129184 to fix this. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 876a6f816ad3f..3118ded81d4c9 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2529,13 +2529,13 @@ def int_amdgcn_set_inactive_chain_arg : // Return if the given flat pointer points to a local memory address. def int_amdgcn_is_shared : ClangBuiltin<"__builtin_amdgcn_is_shared">, DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable, NoCapture>] + [IntrNoMem, IntrSpeculatable] // FIXME: This should be captures(ret: address) >; // Return if the given flat pointer points to a prvate memory address. def int_amdgcn_is_private : ClangBuiltin<"__builtin_amdgcn_is_private">, DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable, NoCapture>] + [IntrNoMem, IntrSpeculatable] // FIXME: This should be captures(ret: address) >; // A uniform tail call to a function with the `amdgpu_cs_chain` or From 536684e0e359b93ea1915226ec6a933067743e87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= Date: Fri, 28 Feb 2025 15:26:57 +0100 Subject: [PATCH 092/123] [NFC][analyzer] OOB test consolidation III: 'outofbound' tests (#128508) Before commit 6e17ed9 the test files `outofbound.c` and `outofbound-notwork.c` tested the behavior of the old alpha checker `alpha.security.ArrayBound` (V1); then that commit converted them into tests for the checker `security.ArrayBound` which was previously called `alpha.security.ArrayBoundV2`. This commit removes these test files and migrates their useful content to `out-of-bounds.c`. The file `outofbound.c` contained lots of testcases that covered features which are also covered in `out-of-bounds.c` or `out-of-bounds-diagnostics.c`; those redundant cases are discarded during this migration process. This is part of a commit series that reorganizes the tests of `security.ArrayBound` to a system that's easier to understand and maintain. --- clang/test/Analysis/out-of-bounds.c | 50 +++++++++ clang/test/Analysis/outofbound-notwork.c | 32 ------ clang/test/Analysis/outofbound.c | 130 ----------------------- 3 files changed, 50 insertions(+), 162 deletions(-) delete mode 100644 clang/test/Analysis/outofbound-notwork.c delete mode 100644 clang/test/Analysis/outofbound.c diff --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c index 7d6cb4ecf1b24..2174dafc0021b 100644 --- a/clang/test/Analysis/out-of-bounds.c +++ b/clang/test/Analysis/out-of-bounds.c @@ -217,3 +217,53 @@ int test_negative_offset_with_unsigned_idx(void) { unsigned idx = 2u; return p[idx]; // expected-warning {{Out of bound access to memory preceding}} } + +struct three_words { int c[3]; }; +struct seven_words { int c[7]; }; +void partially_in_bounds(void) { + struct seven_words c; + struct three_words a, *p = (struct three_words *)&c; + p[0] = a; // no-warning + p[1] = a; // no-warning + p[2] = a; // should warn + // FIXME: This is an overflow, but currently security.ArrayBound only checks + // that the _beginning_ of the accessed element is within bounds. +} + +void vla(int a) { + if (a == 5) { + int x[a]; + x[4] = 4; // no-warning + x[5] = 5; // expected-warning{{Out of bound access}} + } +} + +void sizeof_vla(int a) { + // FIXME: VLA modeling is not good enough to cover this case. + if (a == 5) { + char x[a]; + int y[sizeof(x)]; + y[4] = 4; // no-warning + y[5] = 5; // should be {{Out of bounds access}} + } +} + +void sizeof_vla_2(int a) { + // FIXME: VLA modeling is not good enough to cover this case. + if (a == 5) { + char x[a]; + int y[sizeof(x) / sizeof(char)]; + y[4] = 4; // no-warning + y[5] = 5; // should be {{Out of bounds access}} + } +} + +void sizeof_vla_3(int a) { + // FIXME: VLA modeling is not good enough to cover this case. + if (a == 5) { + char x[a]; + int y[sizeof(*&*&*&x)]; + y[4] = 4; // no-warning + y[5] = 5; // should be {{Out of bounds access}} + } +} diff --git a/clang/test/Analysis/outofbound-notwork.c b/clang/test/Analysis/outofbound-notwork.c deleted file mode 100644 index 1318c07bbf2a8..0000000000000 --- a/clang/test/Analysis/outofbound-notwork.c +++ /dev/null @@ -1,32 +0,0 @@ -// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,security.ArrayBound -verify %s -// XFAIL: * - -// Once we better handle modeling of sizes of VLAs, we can pull this back -// into outofbound.c. - -void sizeof_vla(int a) { - if (a == 5) { - char x[a]; - int y[sizeof(x)]; - y[4] = 4; // no-warning - y[5] = 5; // expected-warning{{Out of bounds access}} - } -} - -void sizeof_vla_2(int a) { - if (a == 5) { - char x[a]; - int y[sizeof(x) / sizeof(char)]; - y[4] = 4; // no-warning - y[5] = 5; // expected-warning{{Out of bounds access}} - } -} - -void sizeof_vla_3(int a) { - if (a == 5) { - char x[a]; - int y[sizeof(*&*&*&x)]; - y[4] = 4; // no-warning - y[5] = 5; // expected-warning{{Out of bounds access}} - } -} diff --git a/clang/test/Analysis/outofbound.c b/clang/test/Analysis/outofbound.c deleted file mode 100644 index d3d8ff2b2f0ed..0000000000000 --- a/clang/test/Analysis/outofbound.c +++ /dev/null @@ -1,130 +0,0 @@ -// RUN: %clang_analyze_cc1 -Wno-array-bounds -verify %s \ -// RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=unix \ -// RUN: -analyzer-checker=security.ArrayBound \ -// RUN: -analyzer-config unix.DynamicMemoryModeling:Optimistic=true - -typedef __typeof(sizeof(int)) size_t; -void *malloc(size_t); -void *calloc(size_t, size_t); - -char f1(void) { - char* s = "abcd"; - char c = s[4]; // no-warning - return s[5] + c; // expected-warning{{Out of bound access to memory after}} -} - -void f2(void) { - int *p = malloc(12); - p[3] = 4; // expected-warning{{Out of bound access to memory after}} -} - -struct three_words { - int c[3]; -}; - -struct seven_words { - int c[7]; -}; - -void f3(void) { - struct three_words a, *p; - p = &a; - p[0] = a; // no-warning - p[1] = a; // expected-warning{{Out of bound access to memory after}} -} - -void f4(void) { - struct seven_words c; - struct three_words a, *p = (struct three_words *)&c; - p[0] = a; // no-warning - p[1] = a; // no-warning - p[2] = a; // should warn - // FIXME: This is an overflow, but currently security.ArrayBound only checks - // that the _beginning_ of the accessed element is within bounds. -} - -void f5(void) { - char *p = calloc(2,2); - p[3] = '.'; // no-warning - p[4] = '!'; // expected-warning{{Out of bound access}} -} - -void f6(void) { - char a[2]; - int *b = (int*)a; - b[1] = 3; // expected-warning{{Out of bound access}} -} - -void f7(void) { - struct three_words a; - a.c[3] = 1; // expected-warning{{Out of bound access}} -} - -void vla(int a) { - if (a == 5) { - int x[a]; - x[4] = 4; // no-warning - x[5] = 5; // expected-warning{{Out of bound access}} - } -} - -void alloca_region(int a) { - if (a == 5) { - char *x = __builtin_alloca(a); - x[4] = 4; // no-warning - x[5] = 5; // expected-warning{{Out of bound access}} - } -} - -int symbolic_index(int a) { - int x[2] = {1, 2}; - if (a == 2) { - return x[a]; // expected-warning{{Out of bound access}} - } - return 0; -} - -int symbolic_index2(int a) { - int x[2] = {1, 2}; - if (a < 0) { - return x[a]; // expected-warning{{Out of bound access}} - } - return 0; -} - -int overflow_binary_search(double in) { - int eee = 16; - if (in < 1e-8 || in > 1e23) { - return 0; - } else { - static const double ins[] = {1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, - 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, - 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; - if (in < ins[eee]) { - eee -= 8; - } else { - eee += 8; - } - if (in < ins[eee]) { - eee -= 4; - } else { - eee += 4; - } - if (in < ins[eee]) { - eee -= 2; - } else { - eee += 2; - } - if (in < ins[eee]) { - eee -= 1; - } else { - eee += 1; - } - if (in < ins[eee]) { // expected-warning {{Out of bound access}} - eee -= 1; - } - } - return eee; -} From 962ffe7c0473ba63043a85f66b3e1af2bc8ff8e1 Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Fri, 28 Feb 2025 09:41:05 -0500 Subject: [PATCH 093/123] [AMDGPU][True16][CodeGen] True16 Add OpSel when optimizing exec mask (#128928) True16 Add OpSel when optimizing exec mask True16 VOPCX have the opsel argument. Add it when we create these instructions in SIOptimizeExecMasking. --------- Co-authored-by: Matt Arsenault --- .../Target/AMDGPU/SIOptimizeExecMasking.cpp | 2 + llvm/test/CodeGen/AMDGPU/true16-saveexec.mir | 64 +++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/true16-saveexec.mir diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 920c3e11e4718..745e4086bc7fe 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -632,6 +632,8 @@ bool SIOptimizeExecMasking::optimizeVCMPSaveExecSequence( TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::clamp); + TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::op_sel); + // The kill flags may no longer be correct. if (Src0->isReg()) MRI->clearKillFlags(Src0->getReg()); diff --git a/llvm/test/CodeGen/AMDGPU/true16-saveexec.mir b/llvm/test/CodeGen/AMDGPU/true16-saveexec.mir new file mode 100644 index 0000000000000..c178083eecbce --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/true16-saveexec.mir @@ -0,0 +1,64 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -run-pass=si-optimize-exec-masking -o - %s | FileCheck %s + +--- +name: int +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: int + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr20 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; CHECK-NEXT: V_CMPX_LT_I16_t16_nosdst_e64 0, 15, 0, $vgpr20_lo16, 0, implicit-def $exec, implicit $exec + ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_XOR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: S_ENDPGM 0 + bb.1: + liveins: $vgpr20 + $vcc = V_CMP_LT_I16_t16_e64 0, 15, 0, $vgpr20_lo16, 0, implicit $exec + renamable $sgpr0_sgpr1 = COPY $exec, implicit-def $exec + renamable $sgpr2_sgpr3 = S_AND_B64 renamable $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc + renamable $sgpr0_sgpr1 = S_XOR_B64 renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc + $exec = S_MOV_B64_term killed renamable $sgpr2_sgpr3 + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... + +--- +name: float +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: float + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr20 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; CHECK-NEXT: V_CMPX_LT_F16_t16_nosdst_e64 0, 15, 0, $vgpr20_lo16, 1, 0, implicit-def $exec, implicit $mode, implicit $exec + ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_XOR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: S_ENDPGM 0 + bb.1: + liveins: $vgpr20 + $vcc = V_CMP_LT_F16_t16_e64 0, 15, 0, $vgpr20_lo16, 1, 0, implicit $exec, implicit $mode + renamable $sgpr0_sgpr1 = COPY $exec, implicit-def $exec + renamable $sgpr2_sgpr3 = S_AND_B64 renamable $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc + renamable $sgpr0_sgpr1 = S_XOR_B64 renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc + $exec = S_MOV_B64_term killed renamable $sgpr2_sgpr3 + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... From 7e294ad29d851afbdd0a140530ba6ffafcdbd1d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Benics?= <108414871+balazs-benics-sonarsource@users.noreply.github.com> Date: Fri, 28 Feb 2025 15:48:31 +0100 Subject: [PATCH 094/123] Fix RegionStore assertion failure after #127602 (#129224) Basically, we may leave the loop because if exhaust the fields, array elements or other subobjects to initialize. In that case, the Bindings may be in an exhausted state, thus no further addBinding calls are allowed. Let's harden the code by sprinkling some early exists in the recursive dispatcher functions. And to actually fix the issue, I added a check guarding the single unguarded addBinding right after a loop I mentioned. Fixes #129211 --- clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 25 ++++++++++++++++ clang/test/Analysis/region-store.cpp | 29 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 620fc117c6789..550a276c66c71 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -2570,6 +2570,9 @@ RegionStoreManager::bind(LimitedRegionBindingsConstRef B, Loc L, SVal V) { LimitedRegionBindingsRef RegionStoreManager::setImplicitDefaultValue(LimitedRegionBindingsConstRef B, const MemRegion *R, QualType T) { + if (B.hasExhaustedBindingLimit()) + return B; + SVal V; if (Loc::isLocType(T)) @@ -2596,6 +2599,8 @@ RegionStoreManager::setImplicitDefaultValue(LimitedRegionBindingsConstRef B, std::optional RegionStoreManager::tryBindSmallArray( LimitedRegionBindingsConstRef B, const TypedValueRegion *R, const ArrayType *AT, nonloc::LazyCompoundVal LCV) { + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(LCV); auto CAT = dyn_cast(AT); @@ -2632,6 +2637,8 @@ RegionStoreManager::bindArray(LimitedRegionBindingsConstRef B, const TypedValueRegion *R, SVal Init) { llvm::TimeTraceScope TimeScope("RegionStoreManager::bindArray", [R]() { return R->getDescriptiveName(); }); + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(Init); const ArrayType *AT =cast(Ctx.getCanonicalType(R->getValueType())); QualType ElementTy = AT->getElementType(); @@ -2698,6 +2705,9 @@ RegionStoreManager::bindVector(LimitedRegionBindingsConstRef B, const TypedValueRegion *R, SVal V) { llvm::TimeTraceScope TimeScope("RegionStoreManager::bindVector", [R]() { return R->getDescriptiveName(); }); + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(V); + QualType T = R->getValueType(); const VectorType *VT = T->castAs(); // Use castAs for typedefs. @@ -2722,6 +2732,9 @@ RegionStoreManager::bindVector(LimitedRegionBindingsConstRef B, if (VI == VE) break; + if (NewB.hasExhaustedBindingLimit()) + return NewB.withValuesEscaped(VI, VE); + NonLoc Idx = svalBuilder.makeArrayIndex(index); const ElementRegion *ER = MRMgr.getElementRegion(ElemType, Idx, R, Ctx); @@ -2758,6 +2771,9 @@ RegionStoreManager::getUniqueDefaultBinding(nonloc::LazyCompoundVal LCV) const { std::optional RegionStoreManager::tryBindSmallStruct( LimitedRegionBindingsConstRef B, const TypedValueRegion *R, const RecordDecl *RD, nonloc::LazyCompoundVal LCV) { + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(LCV); + // If we try to copy a Conjured value representing the value of the whole // struct, don't try to element-wise copy each field. // That would unnecessarily bind Derived symbols slicing off the subregion for @@ -2822,6 +2838,9 @@ RegionStoreManager::bindStruct(LimitedRegionBindingsConstRef B, const TypedValueRegion *R, SVal V) { llvm::TimeTraceScope TimeScope("RegionStoreManager::bindStruct", [R]() { return R->getDescriptiveName(); }); + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(V); + QualType T = R->getValueType(); assert(T->isStructureOrClassType()); @@ -2931,6 +2950,9 @@ RegionStoreManager::bindStruct(LimitedRegionBindingsConstRef B, ++VI; } + if (NewB.hasExhaustedBindingLimit()) + return NewB.withValuesEscaped(VI, VE); + // There may be fewer values in the initialize list than the fields of struct. if (FI != FE) { NewB = NewB.addBinding(R, BindingKey::Default, @@ -2945,6 +2967,9 @@ RegionStoreManager::bindAggregate(LimitedRegionBindingsConstRef B, const TypedRegion *R, SVal Val) { llvm::TimeTraceScope TimeScope("RegionStoreManager::bindAggregate", [R]() { return R->getDescriptiveName(); }); + if (B.hasExhaustedBindingLimit()) + return B.withValuesEscaped(Val); + // Remove the old bindings, using 'R' as the root of all regions // we will invalidate. Then add the new binding. return removeSubRegionBindings(B, R).addBinding(R, BindingKey::Default, Val); diff --git a/clang/test/Analysis/region-store.cpp b/clang/test/Analysis/region-store.cpp index 9e80a2e688575..cb3313cbbb313 100644 --- a/clang/test/Analysis/region-store.cpp +++ b/clang/test/Analysis/region-store.cpp @@ -386,3 +386,32 @@ void tooManyFnArgumentsWhenInlining() { 10,11,12,13,14,15,16,17,18,19, }); } + +void gh129211_assertion() { + struct Clazz { + int b; + int : 0; + }; + + Clazz d[][5][5] = { + { + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}} + }, + { + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + }, + { + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}, {}}, + {{}, {}, {}, {}}, + } + }; // no-crash +} From e9f08d2057547d5b588865b099f4a2f9387419ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 28 Feb 2025 13:56:16 -0100 Subject: [PATCH 095/123] [libc++][ci] Update the Windows toolchains to Clang 19 (#129232) This also fixes test failures in the clang-cl build configs that started a couple days ago. It seems like the failures were triggered by an update to the base image on the Github provided runners. There were failures in test/libcxx/system_reserved_names.gen.py, due to an issue in an Clang intrinsics header (avx512fp16intrin.h); this issue was observed and fixed for Clang 19 in 6f04f46927c. The test does #define A SYSTEM_RESERVED_NAME which clashes with a parameter with the name `A` in that header. By upgrading the toolchain to Clang 19, we get fixed version of this intrinsics header. Also update the llvm-mingw toolchains to a version with Clang 19.1.7. --- .github/workflows/libcxx-build-and-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml index 93e673ca513a4..5d4394435890a 100644 --- a/.github/workflows/libcxx-build-and-test.yaml +++ b/.github/workflows/libcxx-build-and-test.yaml @@ -255,11 +255,11 @@ jobs: - name: Install a current LLVM if: ${{ matrix.mingw != true }} run: | - choco install -y llvm --version=18.1.6 --allow-downgrade + choco install -y llvm --version=19.1.7 --allow-downgrade - name: Install llvm-mingw if: ${{ matrix.mingw == true }} run: | - curl -LO https://github.com/mstorsjo/llvm-mingw/releases/download/20240606/llvm-mingw-20240606-ucrt-x86_64.zip + curl -LO https://github.com/mstorsjo/llvm-mingw/releases/download/20250114/llvm-mingw-20250114-ucrt-x86_64.zip powershell Expand-Archive llvm-mingw*.zip -DestinationPath . del llvm-mingw*.zip mv llvm-mingw* c:\llvm-mingw From 3c274bb612639e5a8ac860ee21eac05f0fd04b89 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Fri, 28 Feb 2025 09:58:19 -0500 Subject: [PATCH 096/123] [CUDA][HIP] check dtor in deferred diag (#129117) Currently the deferred diag fails to diagnose calling of host function in host device function in device compilation triggered by destructors. This can be further divided into two issuse: 1. the deferred diag visitor does not visit dtor of member and parent class when visiting dtor, which it should 2. the deferred diag visitor does not visit virtual dtor of explicit template class instantiation, which it should Due to these issues, some constexpr functions which call host functions are emitted on device side, which causes undefind symbols in linking stage, as revealed by https://github.com/llvm/llvm-project/issues/108548 By fixing these issue, clang will diag the issues early during compilation instead of linking. --- clang/include/clang/Sema/Sema.h | 2 +- clang/lib/Sema/Sema.cpp | 43 ++++++++++++++ clang/lib/Sema/SemaCUDA.cpp | 15 +++++ clang/lib/Sema/SemaDecl.cpp | 15 +++++ clang/test/SemaCUDA/dtor.cu | 102 ++++++++++++++++++++++++++++++++ 5 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 clang/test/SemaCUDA/dtor.cu diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index ebdbc69384efb..3b2be86a88e82 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4392,11 +4392,11 @@ class Sema final : public SemaBase { // Whether the callee should be ignored in CUDA/HIP/OpenMP host/device check. bool shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee); -private: /// Function or variable declarations to be checked for whether the deferred /// diagnostics should be emitted. llvm::SmallSetVector DeclsToCheckForDeferredDiags; +private: /// Map of current shadowing declarations to shadowed declarations. Warn if /// it looks like the user is trying to modify the shadowing declaration. llvm::DenseMap ShadowingDecls; diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index c699e92985156..b3fba097999f5 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1798,6 +1798,47 @@ class DeferredDiagnosticsEmitter Inherited::visitUsedDecl(Loc, D); } + // Visitor member and parent dtors called by this dtor. + void VisitCalledDestructors(CXXDestructorDecl *DD) { + const CXXRecordDecl *RD = DD->getParent(); + + // Visit the dtors of all members + for (const FieldDecl *FD : RD->fields()) { + QualType FT = FD->getType(); + if (const auto *RT = FT->getAs()) + if (const auto *ClassDecl = dyn_cast(RT->getDecl())) + if (ClassDecl->hasDefinition()) + if (CXXDestructorDecl *MemberDtor = ClassDecl->getDestructor()) + asImpl().visitUsedDecl(MemberDtor->getLocation(), MemberDtor); + } + + // Also visit base class dtors + for (const auto &Base : RD->bases()) { + QualType BaseType = Base.getType(); + if (const auto *RT = BaseType->getAs()) + if (const auto *BaseDecl = dyn_cast(RT->getDecl())) + if (BaseDecl->hasDefinition()) + if (CXXDestructorDecl *BaseDtor = BaseDecl->getDestructor()) + asImpl().visitUsedDecl(BaseDtor->getLocation(), BaseDtor); + } + } + + void VisitDeclStmt(DeclStmt *DS) { + // Visit dtors called by variables that need destruction + for (auto *D : DS->decls()) + if (auto *VD = dyn_cast(D)) + if (VD->isThisDeclarationADefinition() && + VD->needsDestruction(S.Context)) { + QualType VT = VD->getType(); + if (const auto *RT = VT->getAs()) + if (const auto *ClassDecl = dyn_cast(RT->getDecl())) + if (ClassDecl->hasDefinition()) + if (CXXDestructorDecl *Dtor = ClassDecl->getDestructor()) + asImpl().visitUsedDecl(Dtor->getLocation(), Dtor); + } + + Inherited::VisitDeclStmt(DS); + } void checkVar(VarDecl *VD) { assert(VD->isFileVarDecl() && "Should only check file-scope variables"); @@ -1839,6 +1880,8 @@ class DeferredDiagnosticsEmitter if (auto *S = FD->getBody()) { this->Visit(S); } + if (CXXDestructorDecl *Dtor = dyn_cast(FD)) + asImpl().VisitCalledDestructors(Dtor); UsePath.pop_back(); InUsePath.erase(FD); } diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 0e1bf727d72d2..473956c37bb51 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -372,6 +372,21 @@ bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, CXXMethodDecl *MemberDecl, bool ConstRHS, bool Diagnose) { + // If MemberDecl is virtual destructor of an explicit template class + // instantiation, it must be emitted, therefore it needs to be inferred + // conservatively by ignoring implicit host/device attrs of member and parent + // dtors called by it. Also, it needs to be checed by deferred diag visitor. + bool IsExpVDtor = false; + if (isa(MemberDecl) && MemberDecl->isVirtual()) { + if (auto *Spec = dyn_cast(ClassDecl)) { + TemplateSpecializationKind TSK = Spec->getTemplateSpecializationKind(); + IsExpVDtor = TSK == TSK_ExplicitInstantiationDeclaration || + TSK == TSK_ExplicitInstantiationDefinition; + } + } + if (IsExpVDtor) + SemaRef.DeclsToCheckForDeferredDiags.insert(MemberDecl); + // If the defaulted special member is defined lexically outside of its // owning class, or the special member already has explicit device or host // attributes, do not infer. diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 86e65e56accc8..fe313c62ff846 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -20473,6 +20473,21 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD, if (IsEmittedForExternalSymbol()) return FunctionEmissionStatus::Emitted; + + // If FD is a virtual destructor of an explicit instantiation + // of a template class, return Emitted. + if (auto *Destructor = dyn_cast(FD)) { + if (Destructor->isVirtual()) { + if (auto *Spec = dyn_cast( + Destructor->getParent())) { + TemplateSpecializationKind TSK = + Spec->getTemplateSpecializationKind(); + if (TSK == TSK_ExplicitInstantiationDeclaration || + TSK == TSK_ExplicitInstantiationDefinition) + return FunctionEmissionStatus::Emitted; + } + } + } } // Otherwise, the function is known-emitted if it's in our set of diff --git a/clang/test/SemaCUDA/dtor.cu b/clang/test/SemaCUDA/dtor.cu new file mode 100644 index 0000000000000..c266e51f5c29e --- /dev/null +++ b/clang/test/SemaCUDA/dtor.cu @@ -0,0 +1,102 @@ +// RUN: %clang_cc1 %s -std=c++20 -fsyntax-only -verify=host +// RUN: %clang_cc1 %s -std=c++20 -fcuda-is-device -fsyntax-only -verify=dev + +// host-no-diagnostics + +#include "Inputs/cuda.h" + +// Virtual dtor ~B() of explicit instantiation B must +// be emitted, which causes host_fun() called. +namespace ExplicitInstantiationExplicitDevDtor { +void host_fun() // dev-note {{'host_fun' declared here}} +{} + +template +constexpr void hd_fun() { + host_fun(); // dev-error {{reference to __host__ function 'host_fun' in __host__ __device__ function}} +} + +struct A { + constexpr ~A() { // dev-note {{called by '~B'}} + hd_fun<8>(); // dev-note {{called by '~A'}} + } +}; + +template +struct B { +public: + virtual __device__ ~B() = default; + A _a; +}; + +template class B; +} + +// The implicit host/device attrs of virtual dtor B::~B() is inferred to +// have implicit device attr since dtors of its members and parent classes can +// be executed on device. This causes a diagnostic since B::~B() must +// be emitted, and it eventually causes host_fun() called on device side. +namespace ExplicitInstantiationDtorNoAttr { +void host_fun() // dev-note {{'host_fun' declared here}} +{} + +template +constexpr void hd_fun() { + host_fun(); // dev-error{{reference to __host__ function 'host_fun' in __host__ __device__ function}} +} + +struct A { + constexpr ~A() { // dev-note {{called by '~B'}} + hd_fun<8>(); // dev-note {{called by '~A'}} + } +}; + +template +struct B { +public: + virtual ~B() = default; + A _a; +}; + +template +struct C { +public: + virtual ~C() = default; +}; + +template class B; +template class C; +__device__ void foo() { + C x; +} +} + +// Dtors of implicit template class instantiation are not +// conservatively inferred because the invalid usage can +// be diagnosed. +namespace ImplicitInstantiation { +void host_fun() // dev-note {{'host_fun' declared here}} +{} + +template +constexpr void hd_fun() { + host_fun(); // dev-error {{reference to __host__ function 'host_fun' in __host__ __device__ function}} +} + +struct A { + constexpr ~A() { // dev-note {{called by '~B'}} + hd_fun<8>(); // dev-note {{called by '~A'}} + } +}; + +template +struct B { +public: + ~B() = default; // dev-note {{called by 'foo'}} + A _a; +}; + +__device__ void foo() { + B x; +} +} From 57ec9e7f9f9ecbe162821334e88bccfe6a2a1b91 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 28 Feb 2025 09:58:49 -0500 Subject: [PATCH 097/123] [libc++] Mark _XOPEN_SOURCE test as unsupported on FreeBSD (#128950) The test otherwise fails on FreeBSD, which wasn't noticed when originally landing the patch that added the test because FreeBSD CI was disabled at that moment. --- libcxx/test/libcxx/xopen_source.gen.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libcxx/test/libcxx/xopen_source.gen.py b/libcxx/test/libcxx/xopen_source.gen.py index 3f2686483730a..d4a3651181ca7 100644 --- a/libcxx/test/libcxx/xopen_source.gen.py +++ b/libcxx/test/libcxx/xopen_source.gen.py @@ -43,6 +43,9 @@ // recent value of _XOPEN_SOURCE. // UNSUPPORTED: LIBCXX-AIX-FIXME +// This test fails on FreeBSD for an unknown reason. +// UNSUPPORTED: LIBCXX-FREEBSD-FIXME + {lit_header_restrictions.get(header, '')} {lit_header_undeprecations.get(header, '')} From 05b35cd9ca5c1dbbf89b9a50e6a6fe793901fcda Mon Sep 17 00:00:00 2001 From: Eisuke Kawashima Date: Fri, 28 Feb 2025 23:59:35 +0900 Subject: [PATCH 098/123] [lldb] fix(lldb/**.py): fix invalid escape sequences (#94034) Co-authored-by: Eisuke Kawashima --- lldb/examples/python/crashlog.py | 8 +- lldb/examples/python/delta.py | 2 +- lldb/examples/python/gdbremote.py | 4 +- lldb/examples/python/jump.py | 6 +- lldb/examples/python/performance.py | 2 +- lldb/examples/python/symbolication.py | 6 +- .../Python/lldbsuite/test/lldbpexpect.py | 2 +- .../test/test_runner/process_control.py | 2 +- .../command/backticks/TestBackticksInAlias.py | 4 +- .../TestMemoryAllocSettings.py | 2 +- .../API/commands/expression/test/TestExprs.py | 2 +- .../TestGuiExpandThreadsTree.py | 2 +- lldb/test/API/commands/help/TestHelp.py | 6 +- .../TestLaunchWithShellExpand.py | 2 +- .../register/TestRegistersUnavailable.py | 4 +- .../register_command/TestRegisters.py | 6 +- .../API/commands/settings/TestSettings.py | 12 +- .../target/basic/TestTargetCommand.py | 2 +- .../dwo/TestDumpDwo.py | 16 +- .../oso/TestDumpOso.py | 16 +- .../API/commands/trace/TestTraceDumpInfo.py | 2 +- .../API/commands/trace/TestTraceEvents.py | 4 +- .../API/commands/trace/TestTraceStartStop.py | 12 +- lldb/test/API/commands/trace/TestTraceTSC.py | 10 +- .../driver/quit_speed/TestQuitWithProcess.py | 2 +- .../TestBreakpointByLineAndColumn.py | 2 +- .../TestBreakpointLocations.py | 2 +- .../TestDataFormatterAdv.py | 8 +- .../TestDataFormatterCpp.py | 6 +- .../TestDataFormatterObjCNSContainer.py | 16 +- .../TestDataFormatterGenericUnordered.py | 22 +-- .../TestTypeSummaryListArg.py | 4 +- .../gdb_remote_client/TestXMLRegisterFlags.py | 10 +- .../memory-region/TestMemoryRegion.py | 2 +- .../target_var/TestTargetVar.py | 2 +- .../completion/TestIOHandlerCompletion.py | 2 +- .../API/lang/c/enum_types/TestEnumTypes.py | 4 +- .../c/function_types/TestFunctionTypes.py | 2 +- .../TestRegisterVariables.py | 2 +- .../API/lang/c/set_values/TestSetValues.py | 4 +- lldb/test/API/lang/c/strings/TestCStrings.py | 2 +- .../API/lang/c/tls_globals/TestTlsGlobals.py | 8 +- .../API/lang/cpp/char1632_t/TestChar1632T.py | 8 +- .../cpp/class_static/TestStaticVariables.py | 4 +- .../lang/cpp/class_types/TestClassTypes.py | 2 +- .../cpp/dynamic-value/TestDynamicValue.py | 2 +- .../TestLibcxxInternalsRecognizer.py | 2 +- .../API/lang/cpp/namespace/TestNamespace.py | 4 +- .../lang/cpp/signed_types/TestSignedTypes.py | 4 +- .../cpp/unsigned_types/TestUnsignedTypes.py | 2 +- .../test/API/lang/mixed/TestMixedLanguages.py | 4 +- .../lang/objc/foundation/TestObjCMethods.py | 2 +- .../objc/foundation/TestObjCMethodsNSArray.py | 10 +- .../objc/foundation/TestObjCMethodsNSError.py | 2 +- .../objc/foundation/TestObjCMethodsString.py | 10 +- .../TestObjCDynamicValue.py | 2 +- .../TestObjCBuiltinTypes.py | 4 +- .../TestAArch64LinuxMTEMemoryTagCoreFile.py | 44 ++--- .../TestAArch64LinuxMTEMemoryTagAccess.py | 162 +++++++++--------- .../TestAArch64LinuxMTEMemoryTagFaults.py | 6 +- .../TestAArch64LinuxTaggedMemoryRegion.py | 6 +- .../macosx/add-dsym/TestAddDsymDownload.py | 2 +- .../TestFirmwareCorefiles.py | 2 +- .../kern-ver-str/TestKernVerStrLCNOTE.py | 2 +- .../TestMultipleBinaryCorefile.py | 2 +- .../macosx/simulator/TestSimulatorPlatform.py | 2 +- .../skinny-corefile/TestSkinnyCorefile.py | 2 +- .../address_range/TestAddressRange.py | 6 +- .../TestTargetArchFromModule.py | 2 +- .../API/source-manager/TestSourceManager.py | 2 +- .../TestDAP_extendedStackTrace.py | 4 +- .../lldb-server/TestGdbRemoteModuleInfo.py | 6 +- .../API/tools/lldb-server/TestPtyServer.py | 2 +- .../TestGdbRemoteTargetXmlPacket.py | 2 +- lldb/test/API/types/AbstractBase.py | 6 +- lldb/utils/lui/sourcewin.py | 2 +- 76 files changed, 282 insertions(+), 276 deletions(-) diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index ab8c2fcaf034b..6615c3353ffe4 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -296,7 +296,7 @@ class DarwinImage(symbolication.Image): except: dsymForUUIDBinary = "" - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") def __init__( self, text_addr_lo, text_addr_hi, identifier, version, uuid, path, verbose @@ -501,7 +501,7 @@ def find_image_with_identifier(self, identifier): for image in self.images: if image.identifier == identifier: return image - regex_text = "^.*\.%s$" % (re.escape(identifier)) + regex_text = r"^.*\.%s$" % (re.escape(identifier)) regex = re.compile(regex_text) for image in self.images: if regex.match(image.identifier): @@ -925,7 +925,7 @@ def get(cls): version = r"(?:" + super().version + r"\s+)?" address = r"(0x[0-9a-fA-F]{4,})" # 4 digits or more - symbol = """ + symbol = r""" (?: [ ]+ (?P.+) @@ -1095,7 +1095,7 @@ def parse_normal(self, line): self.crashlog.process_identifier = line[11:].strip() elif line.startswith("Version:"): version_string = line[8:].strip() - matched_pair = re.search("(.+)\((.+)\)", version_string) + matched_pair = re.search(r"(.+)\((.+)\)", version_string) if matched_pair: self.crashlog.process_version = matched_pair.group(1) self.crashlog.process_compatability_version = matched_pair.group(2) diff --git a/lldb/examples/python/delta.py b/lldb/examples/python/delta.py index eeb3c177cfa90..f847b95ab119f 100755 --- a/lldb/examples/python/delta.py +++ b/lldb/examples/python/delta.py @@ -99,7 +99,7 @@ def parse_log_file(file, options): print("# Log file: '%s'" % file) print("#----------------------------------------------------------------------") - timestamp_regex = re.compile("(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$") + timestamp_regex = re.compile(r"(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$") base_time = 0.0 last_time = 0.0 diff --git a/lldb/examples/python/gdbremote.py b/lldb/examples/python/gdbremote.py index 40ee15853fdb2..0bbfc1a0f1eed 100755 --- a/lldb/examples/python/gdbremote.py +++ b/lldb/examples/python/gdbremote.py @@ -1537,12 +1537,12 @@ def parse_gdb_log(file, options): a long time during a preset set of debugger commands.""" tricky_commands = ["qRegisterInfo"] - timestamp_regex = re.compile("(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$") + timestamp_regex = re.compile(r"(\s*)([1-9][0-9]+\.[0-9]+)([^0-9].*)$") packet_name_regex = re.compile("([A-Za-z_]+)[^a-z]") packet_transmit_name_regex = re.compile( "(?Psend|read) packet: (?P.*)" ) - packet_contents_name_regex = re.compile("\$([^#]*)#[0-9a-fA-F]{2}") + packet_contents_name_regex = re.compile(r"\$([^#]*)#[0-9a-fA-F]{2}") packet_checksum_regex = re.compile(".*#[0-9a-fA-F]{2}$") packet_names_regex_str = "(" + "|".join(gdb_remote_commands.keys()) + ")(.*)" packet_names_regex = re.compile(packet_names_regex_str) diff --git a/lldb/examples/python/jump.py b/lldb/examples/python/jump.py index e086df5fd1528..8d52bd9af43f6 100644 --- a/lldb/examples/python/jump.py +++ b/lldb/examples/python/jump.py @@ -38,7 +38,7 @@ def parse_linespec(linespec, frame, result): ) if not matched: - mo = re.match("^\+([0-9]+)$", linespec) + mo = re.match(r"^\+([0-9]+)$", linespec) if mo is not None: matched = True # print "Matched +" @@ -54,7 +54,7 @@ def parse_linespec(linespec, frame, result): ) if not matched: - mo = re.match("^\-([0-9]+)$", linespec) + mo = re.match(r"^\-([0-9]+)$", linespec) if mo is not None: matched = True # print "Matched -" @@ -79,7 +79,7 @@ def parse_linespec(linespec, frame, result): breakpoint = target.BreakpointCreateByLocation(file_name, line_number) if not matched: - mo = re.match("\*((0x)?([0-9a-f]+))$", linespec) + mo = re.match(r"\*((0x)?([0-9a-f]+))$", linespec) if mo is not None: matched = True # print "Matched " diff --git a/lldb/examples/python/performance.py b/lldb/examples/python/performance.py index 869a0b061cf85..b86b5a52522e0 100755 --- a/lldb/examples/python/performance.py +++ b/lldb/examples/python/performance.py @@ -346,7 +346,7 @@ def __init__(self, pid): def Measure(self): output = subprocess.getoutput(self.command).split("\n")[-1] - values = re.split("[-+\s]+", output) + values = re.split(r"[-+\s]+", output) for idx, stat in enumerate(values): multiplier = 1 if stat: diff --git a/lldb/examples/python/symbolication.py b/lldb/examples/python/symbolication.py index f6dcc8b9a7943..b16745ee963c9 100755 --- a/lldb/examples/python/symbolication.py +++ b/lldb/examples/python/symbolication.py @@ -177,9 +177,9 @@ class Section: """Class that represents an load address range""" sect_info_regex = re.compile("(?P[^=]+)=(?P.*)") - addr_regex = re.compile("^\s*(?P0x[0-9A-Fa-f]+)\s*$") + addr_regex = re.compile(r"^\s*(?P0x[0-9A-Fa-f]+)\s*$") range_regex = re.compile( - "^\s*(?P0x[0-9A-Fa-f]+)\s*(?P[-+])\s*(?P0x[0-9A-Fa-f]+)\s*$" + r"^\s*(?P0x[0-9A-Fa-f]+)\s*(?P[-+])\s*(?P0x[0-9A-Fa-f]+)\s*$" ) def __init__(self, start_addr=None, end_addr=None, name=None): @@ -557,7 +557,7 @@ def find_images_with_identifier(self, identifier): if image.identifier == identifier: images.append(image) if len(images) == 0: - regex_text = "^.*\.%s$" % (re.escape(identifier)) + regex_text = r"^.*\.%s$" % (re.escape(identifier)) regex = re.compile(regex_text) for image in self.images: if regex.match(image.identifier): diff --git a/lldb/packages/Python/lldbsuite/test/lldbpexpect.py b/lldb/packages/Python/lldbsuite/test/lldbpexpect.py index 998a080565b6b..3279e1fd39f8c 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbpexpect.py +++ b/lldb/packages/Python/lldbsuite/test/lldbpexpect.py @@ -104,4 +104,4 @@ def cursor_forward_escape_seq(self, chars_to_move): Returns the escape sequence to move the cursor forward/right by a certain amount of characters. """ - return b"\x1b\[" + str(chars_to_move).encode("utf-8") + b"C" + return b"\x1b\\[" + str(chars_to_move).encode("utf-8") + b"C" diff --git a/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py b/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py index 07c17993bc878..8ab219a92d99d 100644 --- a/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py +++ b/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py @@ -91,7 +91,7 @@ def timeout_to_seconds(timeout): class ProcessHelper(object): - """Provides an interface for accessing process-related functionality. + r"""Provides an interface for accessing process-related functionality. This class provides a factory method that gives the caller a platform-specific implementation instance of the class. diff --git a/lldb/test/API/commands/command/backticks/TestBackticksInAlias.py b/lldb/test/API/commands/command/backticks/TestBackticksInAlias.py index c31a08ac00182..2cb8d225d6d07 100644 --- a/lldb/test/API/commands/command/backticks/TestBackticksInAlias.py +++ b/lldb/test/API/commands/command/backticks/TestBackticksInAlias.py @@ -20,7 +20,7 @@ def test_backticks_in_alias(self): interp = self.dbg.GetCommandInterpreter() result = lldb.SBCommandReturnObject() interp.HandleCommand( - "command alias _test-argv-cmd expression -Z \`argc\` -- argv", result + r"command alias _test-argv-cmd expression -Z \`argc\` -- argv", result ) self.assertCommandReturn(result, "Made the alias") interp.HandleCommand("_test-argv-cmd", result) @@ -28,7 +28,7 @@ def test_backticks_in_alias(self): # Now try a harder case where we create this using an alias: interp.HandleCommand( - "command alias _test-argv-parray-cmd parray \`argc\` argv", result + r"command alias _test-argv-parray-cmd parray \`argc\` argv", result ) self.assertCommandReturn(result, "Made the alias") interp.HandleCommand("_test-argv-parray-cmd", result) diff --git a/lldb/test/API/commands/expression/memory-allocation/TestMemoryAllocSettings.py b/lldb/test/API/commands/expression/memory-allocation/TestMemoryAllocSettings.py index d27f07717affb..a82141a0792f2 100644 --- a/lldb/test/API/commands/expression/memory-allocation/TestMemoryAllocSettings.py +++ b/lldb/test/API/commands/expression/memory-allocation/TestMemoryAllocSettings.py @@ -30,7 +30,7 @@ def test(self): alloc0 = re.search("^.*IRMemoryMap::Malloc.+?0xdead0000.*$", log, re.MULTILINE) # Malloc adds additional bytes to allocation size, hence 10007 alloc1 = re.search( - "^.*IRMemoryMap::Malloc\s*?\(10007.+?0xdead1000.*$", log, re.MULTILINE + r"^.*IRMemoryMap::Malloc\s*?\(10007.+?0xdead1000.*$", log, re.MULTILINE ) self.assertTrue(alloc0, "Couldn't find an allocation at a given address.") self.assertTrue( diff --git a/lldb/test/API/commands/expression/test/TestExprs.py b/lldb/test/API/commands/expression/test/TestExprs.py index 41faf07f8cb44..17fd952130ee7 100644 --- a/lldb/test/API/commands/expression/test/TestExprs.py +++ b/lldb/test/API/commands/expression/test/TestExprs.py @@ -50,7 +50,7 @@ def build_and_run(self): def test_floating_point_expr_commands(self): self.build_and_run() - self.expect("expression 2.234f", patterns=["\(float\) \$.* = 2\.234"]) + self.expect("expression 2.234f", patterns=[r"\(float\) \$.* = 2\.234"]) # (float) $2 = 2.234 def test_many_expr_commands(self): diff --git a/lldb/test/API/commands/gui/expand-threads-tree/TestGuiExpandThreadsTree.py b/lldb/test/API/commands/gui/expand-threads-tree/TestGuiExpandThreadsTree.py index 3bb45521747d8..69aa674f6ae5d 100644 --- a/lldb/test/API/commands/gui/expand-threads-tree/TestGuiExpandThreadsTree.py +++ b/lldb/test/API/commands/gui/expand-threads-tree/TestGuiExpandThreadsTree.py @@ -48,7 +48,7 @@ def test_gui(self): self.child.expect_exact("Threads") # The main thread should be expanded. - self.child.expect("#\d+: main") + self.child.expect(r"#\d+: main") # Quit the GUI self.child.send(escape_key) diff --git a/lldb/test/API/commands/help/TestHelp.py b/lldb/test/API/commands/help/TestHelp.py index f0f5bcb321801..6aaff17fa4ea6 100644 --- a/lldb/test/API/commands/help/TestHelp.py +++ b/lldb/test/API/commands/help/TestHelp.py @@ -349,13 +349,13 @@ def test_help_show_tags(self): self.expect( "help memory read", patterns=[ - "--show-tags\n\s+Include memory tags in output " - "\(does not apply to binary output\)." + "--show-tags\n\\s+Include memory tags in output " + "\\(does not apply to binary output\\)." ], ) self.expect( "help memory find", - patterns=["--show-tags\n\s+Include memory tags in output."], + patterns=["--show-tags\n\\s+Include memory tags in output."], ) @no_debug_info_test diff --git a/lldb/test/API/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py b/lldb/test/API/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py index fcf61c9775c63..a7f8b38649b22 100644 --- a/lldb/test/API/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py +++ b/lldb/test/API/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py @@ -93,7 +93,7 @@ def test(self): self.runCmd("process kill") - self.runCmd("process launch -X true -w %s -- foo\ bar" % (self.getBuildDir())) + self.runCmd(r"process launch -X true -w %s -- foo\ bar" % (self.getBuildDir())) process = self.process() diff --git a/lldb/test/API/commands/register/register/TestRegistersUnavailable.py b/lldb/test/API/commands/register/register/TestRegistersUnavailable.py index abd3aeace8969..0ccccd2f09712 100644 --- a/lldb/test/API/commands/register/register/TestRegistersUnavailable.py +++ b/lldb/test/API/commands/register/register/TestRegistersUnavailable.py @@ -48,12 +48,12 @@ def test_unavailable_registers(self): "register read --all", patterns=[ "(?sm)^general purpose registers:\n" - "^\s+rdx = 0x5555555555555555\n" + "^\\s+rdx = 0x5555555555555555\n" ".*" "^3 registers were unavailable.\n" "\n" "^supplementary registers:\n" - "^\s+edx = 0x55555555\n" + "^\\s+edx = 0x55555555\n" ".*" "^12 registers were unavailable." ], diff --git a/lldb/test/API/commands/register/register/register_command/TestRegisters.py b/lldb/test/API/commands/register/register/register_command/TestRegisters.py index 5bf7aa5dee9c4..100bcceba2812 100644 --- a/lldb/test/API/commands/register/register/register_command/TestRegisters.py +++ b/lldb/test/API/commands/register/register/register_command/TestRegisters.py @@ -662,14 +662,14 @@ def test_register_read_fields(self): # N/Z/C/V bits will always be present, so check only for those. self.expect( "register read cpsr", - patterns=["= \(N = [0|1], Z = [0|1], C = [0|1], V = [0|1]"], + patterns=[r"= \(N = [0|1], Z = [0|1], C = [0|1], V = [0|1]"], ) self.expect( - "register read fpsr", patterns=["= \(QC = [0|1], IDC = [0|1], IXC = [0|1]"] + "register read fpsr", patterns=[r"= \(QC = [0|1], IDC = [0|1], IXC = [0|1]"] ) # AHP/DN/FZ always present, others may vary. self.expect( - "register read fpcr", patterns=["= \(AHP = [0|1], DN = [0|1], FZ = [0|1]"] + "register read fpcr", patterns=[r"= \(AHP = [0|1], DN = [0|1], FZ = [0|1]"] ) # Should get enumerator descriptions for RMode. diff --git a/lldb/test/API/commands/settings/TestSettings.py b/lldb/test/API/commands/settings/TestSettings.py index 2dd813f6b155b..d36e08875919a 100644 --- a/lldb/test/API/commands/settings/TestSettings.py +++ b/lldb/test/API/commands/settings/TestSettings.py @@ -186,13 +186,13 @@ def cleanup(): self.addTearDownHook(cleanup) self.runCmd("settings show frame-format") - m = re.match('^frame-format \(format-string\) = "(.*)"$', self.res.GetOutput()) + m = re.match(r'^frame-format \(format-string\) = "(.*)"$', self.res.GetOutput()) self.assertTrue(m, "Bad settings string") self.format_string = m.group(1) # Change the default format to print function.name rather than # function.name-with-args - format_string = "frame #${frame.index}: ${frame.pc}{ ${module.file.basename}\`${function.name}{${function.pc-offset}}}{ at ${line.file.fullpath}:${line.number}}{, lang=${language}}\n" + format_string = "frame #${frame.index}: ${frame.pc}{ ${module.file.basename}\\`${function.name}{${function.pc-offset}}}{ at ${line.file.fullpath}:${line.number}}{, lang=${language}}\n" self.runCmd("settings set frame-format %s" % format_string) # Immediately test the setting. @@ -724,7 +724,7 @@ def test_settings_with_trailing_whitespace(self): ) self.runCmd("settings set target.run-args 1 2 3") # Set to known value # Set to new value with trailing whitespaces - self.runCmd("settings set target.run-args 3 \ \ ") + self.runCmd(r"settings set target.run-args 3 \ \ ") self.expect( "settings show target.run-args", SETTING_MSG("target.run-args"), @@ -846,11 +846,11 @@ def test_settings_clear_all(self): # Check that settings have their default values after clearing. self.expect( "settings show target.env-vars", - patterns=["^target.env-vars \(dictionary of strings\) =\s*$"], + patterns=[r"^target.env-vars \(dictionary of strings\) =\s*$"], ) self.expect( "settings show target.run-args", - patterns=["^target.run-args \(arguments\) =\s*$"], + patterns=[r"^target.run-args \(arguments\) =\s*$"], ) self.expect("settings show auto-confirm", substrs=["false"]) self.expect("settings show tab-size", substrs=["2"]) @@ -947,7 +947,7 @@ def test_experimental_settings(self): # showing & setting an undefined .experimental. setting should generate no errors. self.expect( "settings show target.experimental.setting-which-does-not-exist", - patterns=["^\s$"], + patterns=[r"^\s$"], error=False, ) self.expect( diff --git a/lldb/test/API/commands/target/basic/TestTargetCommand.py b/lldb/test/API/commands/target/basic/TestTargetCommand.py index 953b59d729bfa..d91a3e0edb715 100644 --- a/lldb/test/API/commands/target/basic/TestTargetCommand.py +++ b/lldb/test/API/commands/target/basic/TestTargetCommand.py @@ -74,7 +74,7 @@ def do_target_command(self): # Find the largest index of the existing list. import re - pattern = re.compile("target #(\d+):") + pattern = re.compile(r"target #(\d+):") for line in reversed(output.split(os.linesep)): match = pattern.search(line) if match: diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py index 05c72945b1439..13d12e3686a17 100644 --- a/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py +++ b/lldb/test/API/commands/target/dump-separate-debug-info/dwo/TestDumpDwo.py @@ -94,11 +94,11 @@ def test_dwos_loaded_table_output(self): self.expect( "target modules dump separate-debug-info", patterns=[ - "Symbol file: .*?a\.out", + r"Symbol file: .*?a\.out", 'Type: "dwo"', - "Dwo ID\s+Err\s+Dwo Path", - "0x[a-zA-Z0-9]{16}\s+.*main\.dwo", - "0x[a-zA-Z0-9]{16}\s+.*foo\.dwo", + r"Dwo ID\s+Err\s+Dwo Path", + r"0x[a-zA-Z0-9]{16}\s+.*main\.dwo", + r"0x[a-zA-Z0-9]{16}\s+.*foo\.dwo", ], ) @@ -118,11 +118,11 @@ def test_dwos_not_loaded_table_output(self): self.expect( "target modules dump separate-debug-info", patterns=[ - "Symbol file: .*?a\.out", + r"Symbol file: .*?a\.out", 'Type: "dwo"', - "Dwo ID\s+Err\s+Dwo Path", - "0x[a-zA-Z0-9]{16}\s+E\s+.*main\.dwo", - "0x[a-zA-Z0-9]{16}\s+E\s+.*foo\.dwo", + r"Dwo ID\s+Err\s+Dwo Path", + r"0x[a-zA-Z0-9]{16}\s+E\s+.*main\.dwo", + r"0x[a-zA-Z0-9]{16}\s+E\s+.*foo\.dwo", ], ) diff --git a/lldb/test/API/commands/target/dump-separate-debug-info/oso/TestDumpOso.py b/lldb/test/API/commands/target/dump-separate-debug-info/oso/TestDumpOso.py index 06dc823459184..4e7560338b1d4 100644 --- a/lldb/test/API/commands/target/dump-separate-debug-info/oso/TestDumpOso.py +++ b/lldb/test/API/commands/target/dump-separate-debug-info/oso/TestDumpOso.py @@ -93,11 +93,11 @@ def test_shows_oso_loaded_table_output(self): self.expect( "target modules dump separate-debug-info", patterns=[ - "Symbol file: .*?a\.out", + r"Symbol file: .*?a\.out", 'Type: "oso"', - "Mod Time\s+Err\s+Oso Path", - "0x[a-zA-Z0-9]{16}\s+.*main\.o", - "0x[a-zA-Z0-9]{16}\s+.*foo\.o", + r"Mod Time\s+Err\s+Oso Path", + r"0x[a-zA-Z0-9]{16}\s+.*main\.o", + r"0x[a-zA-Z0-9]{16}\s+.*foo\.o", ], ) @@ -119,11 +119,11 @@ def test_shows_oso_not_loaded_table_output(self): self.expect( "target modules dump separate-debug-info", patterns=[ - "Symbol file: .*?a\.out", + r"Symbol file: .*?a\.out", 'Type: "oso"', - "Mod Time\s+Err\s+Oso Path", - "0x[a-zA-Z0-9]{16}\s+E\s+.*main\.o", - "0x[a-zA-Z0-9]{16}\s+E\s+.*foo\.o", + r"Mod Time\s+Err\s+Oso Path", + r"0x[a-zA-Z0-9]{16}\s+E\s+.*main\.o", + r"0x[a-zA-Z0-9]{16}\s+E\s+.*foo\.o", ], ) diff --git a/lldb/test/API/commands/trace/TestTraceDumpInfo.py b/lldb/test/API/commands/trace/TestTraceDumpInfo.py index 3f67475d631dd..52449631f6aa9 100644 --- a/lldb/test/API/commands/trace/TestTraceDumpInfo.py +++ b/lldb/test/API/commands/trace/TestTraceDumpInfo.py @@ -64,7 +64,7 @@ def testDumpRawTraceSize(self): hardware disabled tracing: 4 trace synchronization point: 1""", ], - patterns=["Decoding instructions: \d.\d\ds"], + patterns=[r"Decoding instructions: \d.\d\ds"], ) def testDumpRawTraceSizeJSON(self): diff --git a/lldb/test/API/commands/trace/TestTraceEvents.py b/lldb/test/API/commands/trace/TestTraceEvents.py index 52f6241456b76..c20bcc247105b 100644 --- a/lldb/test/API/commands/trace/TestTraceEvents.py +++ b/lldb/test/API/commands/trace/TestTraceEvents.py @@ -68,7 +68,7 @@ def testPauseEvents(self): self.expect( "thread trace dump instructions -e -f", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* 0: \(event\) trace synchronization point \[offset \= 0x0xec0\] 1: \(event\) hardware disabled tracing a.out`main \+ 23 at main.cpp:12 @@ -102,7 +102,7 @@ def testPauseEvents(self): self.expect( "thread trace dump instructions -e --id 18", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`symbol stub for: foo\(\) 18: {ADDRESS_REGEX} jmpq .* 17: \(event\) software disabled tracing diff --git a/lldb/test/API/commands/trace/TestTraceStartStop.py b/lldb/test/API/commands/trace/TestTraceStartStop.py index 134cf13096edb..5add321b4c83f 100644 --- a/lldb/test/API/commands/trace/TestTraceStartStop.py +++ b/lldb/test/API/commands/trace/TestTraceStartStop.py @@ -244,7 +244,7 @@ def testStartStopLiveThreads(self): self.expect( "thread trace dump instructions -f", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`main \+ 4 at main.cpp:2 2: {ADDRESS_REGEX} movl""" ], @@ -255,7 +255,7 @@ def testStartStopLiveThreads(self): self.expect( "thread trace dump instructions -f", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`main \+ 4 at main.cpp:2 2: {ADDRESS_REGEX} movl .* a.out`main \+ 11 at main.cpp:4 @@ -269,7 +269,7 @@ def testStartStopLiveThreads(self): self.expect( "thread trace dump instructions", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`main \+ 32 at main.cpp:4 10: {ADDRESS_REGEX} jle .* ; <\+20> at main.cpp:5 8: {ADDRESS_REGEX} cmpl .* @@ -297,7 +297,7 @@ def testStartStopLiveThreads(self): self.expect( "thread trace dump instructions -f", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`main \+ 20 at main.cpp:5 2: {ADDRESS_REGEX} xorl""" ], @@ -306,7 +306,7 @@ def testStartStopLiveThreads(self): self.expect( "thread trace dump instructions", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`main \+ 20 at main.cpp:5 2: {ADDRESS_REGEX} xorl""" ], @@ -336,7 +336,7 @@ def testStartStopLiveThreads(self): self.expect( "thread trace dump instructions -c 1", patterns=[ - f"""thread #1: tid = .* + rf"""thread #1: tid = .* a.out`main \+ 11 at main.cpp:4""" ], ) diff --git a/lldb/test/API/commands/trace/TestTraceTSC.py b/lldb/test/API/commands/trace/TestTraceTSC.py index 580cb347dc30a..4a19065e60c2b 100644 --- a/lldb/test/API/commands/trace/TestTraceTSC.py +++ b/lldb/test/API/commands/trace/TestTraceTSC.py @@ -20,7 +20,7 @@ def testTscPerThread(self): self.expect("n") self.expect( "thread trace dump instructions -t -c 1", - patterns=[": \[\d+.\d+ ns\] 0x0000000000400511 movl"], + patterns=[r": \[\d+.\d+ ns\] 0x0000000000400511 movl"], ) @testSBAPIAndCommands @@ -43,7 +43,7 @@ def testMultipleTscsPerThread(self): self.runCmd("thread trace dump instructions -t --raw --forward") id_to_timestamp = {} for line in self.res.GetOutput().splitlines(): - m = re.search(" (.+): \[(.+)\ ns].*", line) + m = re.search(r" (.+): \[(.+)\ ns].*", line) if m: id_to_timestamp[int(m.group(1))] = m.group(2) self.assertEqual(len(id_to_timestamp), 3) @@ -69,12 +69,12 @@ def testTscPerProcess(self): self.expect("n") self.expect( "thread trace dump instructions -t -c 1", - patterns=[": \[\d+.\d+ ns\] 0x0000000000400511 movl"], + patterns=[r": \[\d+.\d+ ns\] 0x0000000000400511 movl"], ) self.expect( "thread trace dump instructions -t -c 1 --pretty-json", - patterns=['''"timestamp_ns": "\d+.\d+"'''], + patterns=[r'''"timestamp_ns": "\d+.\d+"'''], ) @testSBAPIAndCommands @@ -91,7 +91,7 @@ def testDumpingAfterTracingWithoutTsc(self): self.expect("n") self.expect( "thread trace dump instructions -t -c 1", - patterns=[": \[unavailable\] 0x0000000000400511 movl"], + patterns=[r": \[unavailable\] 0x0000000000400511 movl"], ) self.expect( diff --git a/lldb/test/API/driver/quit_speed/TestQuitWithProcess.py b/lldb/test/API/driver/quit_speed/TestQuitWithProcess.py index 5cfcf5d69fd2a..2412b295bfb59 100644 --- a/lldb/test/API/driver/quit_speed/TestQuitWithProcess.py +++ b/lldb/test/API/driver/quit_speed/TestQuitWithProcess.py @@ -28,7 +28,7 @@ def test_run_quit(self): # Launch the process without a TTY so we don't have to interrupt: child.sendline("process launch -n") print("launched process") - child.expect("Process ([\d]*) launched:") + child.expect(r"Process ([\d]*) launched:") print("Got launch message") child.sendline("quit") print("sent quit") diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_by_line_and_column/TestBreakpointByLineAndColumn.py b/lldb/test/API/functionalities/breakpoint/breakpoint_by_line_and_column/TestBreakpointByLineAndColumn.py index fe99adf425513..5798c8ffa8220 100644 --- a/lldb/test/API/functionalities/breakpoint/breakpoint_by_line_and_column/TestBreakpointByLineAndColumn.py +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_by_line_and_column/TestBreakpointByLineAndColumn.py @@ -60,7 +60,7 @@ def testBreakpointByLineAndColumnNearestCode(self): for pattern in patterns: line = line_number("main.cpp", pattern) + 1 - column = int(re.search("\(col:([0-9]+)\)", pattern).group(1)) + column = int(re.search(r"\(col:([0-9]+)\)", pattern).group(1)) source_loc.append({"line": line, "column": column}) target = self.createTestTarget() diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_locations/TestBreakpointLocations.py b/lldb/test/API/functionalities/breakpoint/breakpoint_locations/TestBreakpointLocations.py index d87e6275f7b51..e0c93e3c18581 100644 --- a/lldb/test/API/functionalities/breakpoint/breakpoint_locations/TestBreakpointLocations.py +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_locations/TestBreakpointLocations.py @@ -53,7 +53,7 @@ def set_breakpoint(self): ], patterns=[ "where = a.out`func_inlined .+unresolved, hit count = 0", - "where = a.out`main .+\[inlined\].+unresolved, hit count = 0", + r"where = a.out`main .+\[inlined\].+unresolved, hit count = 0", ], ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-advanced/TestDataFormatterAdv.py b/lldb/test/API/functionalities/data-formatter/data-formatter-advanced/TestDataFormatterAdv.py index b740689e67538..ce4eb0a060cee 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-advanced/TestDataFormatterAdv.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-advanced/TestDataFormatterAdv.py @@ -104,7 +104,9 @@ def cleanup(): self.runCmd("type summary clear") - self.runCmd('type summary add --summary-string "${var[0-1]}" -x "int\[[0-9]\]"') + self.runCmd( + r'type summary add --summary-string "${var[0-1]}" -x "int\[[0-9]\]"' + ) self.expect("frame variable int_array", substrs=["1,2"]) @@ -119,7 +121,7 @@ def cleanup(): self.runCmd("type summary clear") - self.runCmd('type summary add -c -x "i_am_cool\[[0-9]\]"') + self.runCmd(r'type summary add -c -x "i_am_cool\[[0-9]\]"') self.runCmd("type summary add -c i_am_cool") self.expect( @@ -172,7 +174,7 @@ def cleanup(): self.runCmd("type summary clear") self.runCmd( - 'type summary add --summary-string "${*var[].x[0-3]%hex} is a bitfield on a set of integers" -x "SimpleWithPointers\[[0-9]\]"' + r'type summary add --summary-string "${*var[].x[0-3]%hex} is a bitfield on a set of integers" -x "SimpleWithPointers\[[0-9]\]"' ) self.expect( diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/TestDataFormatterCpp.py b/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/TestDataFormatterCpp.py index 644529b1c451b..a848c6257510e 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/TestDataFormatterCpp.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/TestDataFormatterCpp.py @@ -62,7 +62,7 @@ def cleanup(): self.expect( "frame variable", patterns=[ - "\(Speed\) SPILookHex = 0x[0-9a-f]+" # Speed should look hex-ish now. + r"\(Speed\) SPILookHex = 0x[0-9a-f]+" # Speed should look hex-ish now. ], ) @@ -71,14 +71,14 @@ def cleanup(): self.expect( "frame variable", patterns=[ - "\(SignalMask\) SMILookHex = 0x[0-9a-f]+" # SignalMask should look hex-ish now. + r"\(SignalMask\) SMILookHex = 0x[0-9a-f]+" # SignalMask should look hex-ish now. ], ) self.expect( "frame variable", matching=False, patterns=[ - "\(Type4\) T4ILookChar = 0x[0-9a-f]+" # Type4 should NOT look hex-ish now. + r"\(Type4\) T4ILookChar = 0x[0-9a-f]+" # Type4 should NOT look hex-ish now. ], ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py index 4046dc79538a5..c90a5c61d9c0b 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py @@ -53,7 +53,7 @@ def nscontainers_data_formatter_commands(self): self.expect( "frame variable -d run-target *nscfDictionary", patterns=[ - "\(__NSCFDictionary\) \*nscfDictionary =", + r"\(__NSCFDictionary\) \*nscfDictionary =", 'key = 0x.* @"foo"', 'value = 0x.* @"foo"', 'key = 0x.* @"bar"', @@ -68,7 +68,7 @@ def nscontainers_data_formatter_commands(self): self.expect( "frame variable -d run-target *cfDictionaryRef", patterns=[ - "\(const __CFDictionary\) \*cfDictionaryRef =", + r"\(const __CFDictionary\) \*cfDictionaryRef =", 'key = 0x.* @"foo"', 'value = 0x.* @"foo"', 'key = 0x.* @"bar"', @@ -89,18 +89,18 @@ def nscontainers_data_formatter_commands(self): self.expect( "frame variable -d run-target *nscfSet", patterns=[ - "\(__NSCFSet\) \*nscfSet =", - '\[0\] = 0x.* @".*"', - '\[1\] = 0x.* @".*"', + r"\(__NSCFSet\) \*nscfSet =", + r'\[0\] = 0x.* @".*"', + r'\[1\] = 0x.* @".*"', ], ) self.expect( "frame variable -d run-target *cfSetRef", patterns=[ - "\(const __CFSet\) \*cfSetRef =", - '\[0\] = 0x.* @".*"', - '\[1\] = 0x.* @".*"', + r"\(const __CFSet\) \*cfSetRef =", + r'\[0\] = 0x.* @".*"', + r'\[1\] = 0x.* @".*"', ], ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered/TestDataFormatterGenericUnordered.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered/TestDataFormatterGenericUnordered.py index c3043b489d951..50dfbbf6b90a5 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered/TestDataFormatterGenericUnordered.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/unordered/TestDataFormatterGenericUnordered.py @@ -83,9 +83,9 @@ def cleanup(): [ "IntsUnorderedSet", "size=5 {", - "\[\d\] = 5", - "\[\d\] = 3", - "\[\d\] = 2", + r"\[\d\] = 5", + r"\[\d\] = 3", + r"\[\d\] = 2", ], ) @@ -94,9 +94,9 @@ def cleanup(): [ "StringsUnorderedSet", "size=5 {", - '\[\d\] = "is"', - '\[\d\] = "world"', - '\[\d\] = "hello"', + r'\[\d\] = "is"', + r'\[\d\] = "world"', + r'\[\d\] = "hello"', ], ) @@ -105,9 +105,9 @@ def cleanup(): [ "IntsUnorderedMultiSet", "size=6 {", - "(\[\d\] = 3(\\n|.)+){3}", - "\[\d\] = 2", - "\[\d\] = 1", + "(\\[\\d\\] = 3(\\n|.)+){3}", + r"\[\d\] = 2", + r"\[\d\] = 1", ], ) @@ -116,8 +116,8 @@ def cleanup(): [ "StringsUnorderedMultiSet", "size=5 {", - '(\[\d\] = "is"(\\n|.)+){2}', - '(\[\d\] = "world"(\\n|.)+){2}', + '(\\[\\d\\] = "is"(\\n|.)+){2}', + '(\\[\\d\\] = "world"(\\n|.)+){2}', ], ) diff --git a/lldb/test/API/functionalities/data-formatter/type_summary_list_arg/TestTypeSummaryListArg.py b/lldb/test/API/functionalities/data-formatter/type_summary_list_arg/TestTypeSummaryListArg.py index 0bc34910df7d2..6b883a5bb6c8f 100644 --- a/lldb/test/API/functionalities/data-formatter/type_summary_list_arg/TestTypeSummaryListArg.py +++ b/lldb/test/API/functionalities/data-formatter/type_summary_list_arg/TestTypeSummaryListArg.py @@ -17,7 +17,7 @@ def test_type_summary_list_with_arg(self): "type summary list Foo", substrs=["Category: default", "Category: system"] ) self.expect( - "type summary list char", substrs=["char ?(\*|\[\])", "char ?\[[0-9]+\]"] + "type summary list char", substrs=[r"char ?(\*|\[\])", r"char ?\[[0-9]+\]"] ) self.expect("type summary list -w default", substrs=["system"], matching=False) @@ -28,6 +28,6 @@ def test_type_summary_list_with_arg(self): ) self.expect( "type summary list -w system char", - substrs=["char ?(\*|\[\])", "char ?\[[0-9]+\]"], + substrs=[r"char ?(\*|\[\])", r"char ?\[[0-9]+\]"], matching=True, ) diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py b/lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py index 2dbb2b5f5e3a9..d7849500c378d 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py @@ -678,7 +678,7 @@ def test_enum_type_not_found(self): """ ) - self.expect("register read cpsr", patterns=["\(E = 1\)$"]) + self.expect("register read cpsr", patterns=[r"\(E = 1\)$"]) @skipIfXmlSupportMissing @skipIfRemote @@ -701,7 +701,7 @@ def test_enum_duplicated_evalue(self): ) self.expect("register info cpsr", patterns=["E: 1 = def, 2 = geh$"]) - self.expect("register read cpsr", patterns=["\(E = def \| geh\)$"]) + self.expect("register read cpsr", patterns=[r"\(E = def \| geh\)$"]) @skipIfXmlSupportMissing @skipIfRemote @@ -725,7 +725,7 @@ def test_enum_duplicated(self): ) self.expect("register info cpsr", patterns=["E: 1 = def$"]) - self.expect("register read cpsr", patterns=["\(E = def\)$"]) + self.expect("register read cpsr", patterns=[r"\(E = def\)$"]) @skipIfXmlSupportMissing @skipIfRemote @@ -1014,7 +1014,7 @@ def test_many_fields_same_enum(self): self.expect("register info cpsr", patterns=expected_info) - expected_read = ["\(f2 = valid, f1 = valid\)$"] + expected_read = [r"\(f2 = valid, f1 = valid\)$"] self.expect("register read x0", patterns=expected_read) self.expect("register read cpsr", patterns=expected_read) @@ -1055,4 +1055,4 @@ def test_fields_same_name_different_enum(self): ], ) - self.expect("register read x0", patterns=["\(foo = foo_1, foo = foo_0\)$"]) + self.expect("register read x0", patterns=[r"\(foo = foo_1, foo = foo_0\)$"]) diff --git a/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py b/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py index 577411ebc1037..50182e72e498c 100644 --- a/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py +++ b/lldb/test/API/functionalities/memory-region/TestMemoryRegion.py @@ -95,7 +95,7 @@ def test_command(self): self.assertFalse(result.Succeeded()) self.assertRegex( result.GetError(), - "Usage: memory region \(or \-\-all\)", + r"Usage: memory region \(or \-\-all\)", ) # --all should match what repeating the command gives you diff --git a/lldb/test/API/functionalities/target_var/TestTargetVar.py b/lldb/test/API/functionalities/target_var/TestTargetVar.py index 0ef3d008e8f19..2d108df3e22e5 100644 --- a/lldb/test/API/functionalities/target_var/TestTargetVar.py +++ b/lldb/test/API/functionalities/target_var/TestTargetVar.py @@ -27,7 +27,7 @@ def testTargetVarExpr(self): ) self.expect("target variable i", substrs=["i", "42"]) self.expect( - "target variable var", patterns=["\(incomplete \*\) var = 0[xX](0)*dead"] + "target variable var", patterns=[r"\(incomplete \*\) var = 0[xX](0)*dead"] ) self.expect( "target variable var[0]", diff --git a/lldb/test/API/iohandler/completion/TestIOHandlerCompletion.py b/lldb/test/API/iohandler/completion/TestIOHandlerCompletion.py index b16869b05e7df..0c788b2cdfee3 100644 --- a/lldb/test/API/iohandler/completion/TestIOHandlerCompletion.py +++ b/lldb/test/API/iohandler/completion/TestIOHandlerCompletion.py @@ -55,7 +55,7 @@ def test_completion(self): self.child.expect( re.compile( b"TestIOHandler(\r" - + self.cursor_forward_escape_seq("\d+") + + self.cursor_forward_escape_seq(r"\d+") + b")?Completion.py" ) ) diff --git a/lldb/test/API/lang/c/enum_types/TestEnumTypes.py b/lldb/test/API/lang/c/enum_types/TestEnumTypes.py index 0015c8f478578..d4bbe9bcfac81 100644 --- a/lldb/test/API/lang/c/enum_types/TestEnumTypes.py +++ b/lldb/test/API/lang/c/enum_types/TestEnumTypes.py @@ -27,7 +27,7 @@ def test_command_line(self): self.expect("fr var c", DATA_TYPES_DISPLAYED_CORRECTLY, patterns=[" = C$"]) self.expect("fr var ab", DATA_TYPES_DISPLAYED_CORRECTLY, patterns=[" = AB$"]) self.expect( - "fr var ac", DATA_TYPES_DISPLAYED_CORRECTLY, patterns=[" = A \| C$"] + "fr var ac", DATA_TYPES_DISPLAYED_CORRECTLY, patterns=[r" = A \| C$"] ) self.expect("fr var all", DATA_TYPES_DISPLAYED_CORRECTLY, patterns=[" = ALL$"]) # Test that an enum that doesn't match the heuristic we use in @@ -39,7 +39,7 @@ def test_command_line(self): self.expect( "expression (enum bitfield)nonsense", DATA_TYPES_DISPLAYED_CORRECTLY, - patterns=[" = B \| C \| 0x10$"], + patterns=[r" = B \| C \| 0x10$"], ) # Break inside the main. diff --git a/lldb/test/API/lang/c/function_types/TestFunctionTypes.py b/lldb/test/API/lang/c/function_types/TestFunctionTypes.py index 2f6bb7fc0bb16..6e42cd63573c9 100644 --- a/lldb/test/API/lang/c/function_types/TestFunctionTypes.py +++ b/lldb/test/API/lang/c/function_types/TestFunctionTypes.py @@ -54,7 +54,7 @@ def test_pointers(self): ) if self.platformIsDarwin(): - regexps = ["lib.*\.dylib`printf"] + regexps = [r"lib.*\.dylib`printf"] else: regexps = ["printf"] self.expect( diff --git a/lldb/test/API/lang/c/register_variables/TestRegisterVariables.py b/lldb/test/API/lang/c/register_variables/TestRegisterVariables.py index 0e51d4f7ff468..bcdd590e00d0a 100644 --- a/lldb/test/API/lang/c/register_variables/TestRegisterVariables.py +++ b/lldb/test/API/lang/c/register_variables/TestRegisterVariables.py @@ -9,7 +9,7 @@ def re_expr_equals(val_type, val): # Match ({val_type}) ${sum_digits} = {val} - return re.compile(r"\(" + val_type + "\) \$\d+ = " + str(val)) + return re.compile(r"\(" + val_type + r"\) \$\d+ = " + str(val)) class RegisterVariableTestCase(TestBase): diff --git a/lldb/test/API/lang/c/set_values/TestSetValues.py b/lldb/test/API/lang/c/set_values/TestSetValues.py index 0d697d6719d5f..e0813734403a0 100644 --- a/lldb/test/API/lang/c/set_values/TestSetValues.py +++ b/lldb/test/API/lang/c/set_values/TestSetValues.py @@ -82,7 +82,7 @@ def test(self): self.expect( "frame variable --show-types", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\((short unsigned int|unsigned short)\) i = 33"], + patterns=[r"\((short unsigned int|unsigned short)\) i = 33"], ) # Now set variable 'i' and check that it is correctly displayed. @@ -90,7 +90,7 @@ def test(self): self.expect( "frame variable --show-types", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\((short unsigned int|unsigned short)\) i = 333"], + patterns=[r"\((short unsigned int|unsigned short)\) i = 333"], ) self.runCmd("continue") diff --git a/lldb/test/API/lang/c/strings/TestCStrings.py b/lldb/test/API/lang/c/strings/TestCStrings.py index 159f8a4e4b664..f11006f7feefe 100644 --- a/lldb/test/API/lang/c/strings/TestCStrings.py +++ b/lldb/test/API/lang/c/strings/TestCStrings.py @@ -20,7 +20,7 @@ def test_with_run_command(self): self.runCmd("process launch", RUN_SUCCEEDED) - self.expect("expression -- a[2]", patterns=["\((const )?char\) \$0 = 'c'"]) + self.expect("expression -- a[2]", patterns=[r"\((const )?char\) \$0 = 'c'"]) self.expect("expression -- z[2]", startstr="(const char) $1 = 'x'") diff --git a/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py b/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py index 2bffd2eea123a..56bb25b3f3c3d 100644 --- a/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py +++ b/lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py @@ -71,12 +71,12 @@ def test(self): self.expect( "expr var_static", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\(int\) \$.* = 88"], + patterns=[r"\(int\) \$.* = 88"], ) self.expect( "expr var_shared", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\(int\) \$.* = 66"], + patterns=[r"\(int\) \$.* = 66"], ) # Continue on the main thread @@ -102,10 +102,10 @@ def test(self): self.expect( "expr var_static", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\(int\) \$.* = 44"], + patterns=[r"\(int\) \$.* = 44"], ) self.expect( "expr var_shared", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\(int\) \$.* = 33"], + patterns=[r"\(int\) \$.* = 33"], ) diff --git a/lldb/test/API/lang/cpp/char1632_t/TestChar1632T.py b/lldb/test/API/lang/cpp/char1632_t/TestChar1632T.py index e1cbe0509ae04..e2521d88258bc 100644 --- a/lldb/test/API/lang/cpp/char1632_t/TestChar1632T.py +++ b/lldb/test/API/lang/cpp/char1632_t/TestChar1632T.py @@ -74,8 +74,8 @@ def test(self): self.expect( "frame variable as16 as32", patterns=[ - "\(char16_t\[[0-9]+\]\) as16 = ", - "\(char32_t\[[0-9]+\]\) as32 = ", + r"\(char16_t\[[0-9]+\]\) as16 = ", + r"\(char32_t\[[0-9]+\]\) as32 = ", ], substrs=['u"ﺸﺵۻ"', 'U"ЕЙРГЖО"'], ) @@ -103,8 +103,8 @@ def test(self): self.expect( "frame variable as16 as32", patterns=[ - "\(char16_t\[[0-9]+\]\) as16 = ", - "\(char32_t\[[0-9]+\]\) as32 = ", + r"\(char16_t\[[0-9]+\]\) as16 = ", + r"\(char32_t\[[0-9]+\]\) as32 = ", ], substrs=['"色ハ匂ヘト散リヌルヲ"', '"෴"'], ) diff --git a/lldb/test/API/lang/cpp/class_static/TestStaticVariables.py b/lldb/test/API/lang/cpp/class_static/TestStaticVariables.py index 04678ec018bdf..41181e468c308 100644 --- a/lldb/test/API/lang/cpp/class_static/TestStaticVariables.py +++ b/lldb/test/API/lang/cpp/class_static/TestStaticVariables.py @@ -38,7 +38,7 @@ def test_with_run_command(self): self.expect( "target variable A::g_points", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\(PointType\[[1-9]*\]\) A::g_points = {"], + patterns=[r"\(PointType\[[1-9]*\]\) A::g_points = {"], ) self.expect( "target variable g_points", @@ -76,7 +76,7 @@ def test_with_run_command_complete(self): "target variable A::g_points", VARIABLES_DISPLAYED_CORRECTLY, patterns=[ - "\(PointType\[[1-9]*\]\) A::g_points = {", + r"\(PointType\[[1-9]*\]\) A::g_points = {", "(x = 1, y = 2)", "(x = 11, y = 22)", ], diff --git a/lldb/test/API/lang/cpp/class_types/TestClassTypes.py b/lldb/test/API/lang/cpp/class_types/TestClassTypes.py index 80781f5d90912..b5d5dd2370781 100644 --- a/lldb/test/API/lang/cpp/class_types/TestClassTypes.py +++ b/lldb/test/API/lang/cpp/class_types/TestClassTypes.py @@ -179,7 +179,7 @@ def test_with_expr_parser(self): self.expect( "expression this->m_c_int", VARIABLES_DISPLAYED_CORRECTLY, - patterns=["\(int\) \$[0-9]+ = 66"], + patterns=[r"\(int\) \$[0-9]+ = 66"], ) def test_with_constructor_name(self): diff --git a/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py b/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py index e016168f047c1..32ef009279713 100644 --- a/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py +++ b/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py @@ -129,7 +129,7 @@ def test_get_dynamic_vals(self): self.expect( "frame var -d run-target --ptr-depth=2 --show-types anotherA.m_client_A", "frame var finds its way into a child member", - patterns=["\(B \*\)"], + patterns=[r"\(B \*\)"], ) # Now make sure we also get it right for a reference as well: diff --git a/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py b/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py index 5cc43f3cd9910..8efa53bdbf722 100644 --- a/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py +++ b/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py @@ -23,7 +23,7 @@ def test_frame_recognizer(self): # We never hide the frame of the entry-point into the standard library, even # if the name starts with `__` which usually indicates an internal function. "ranges_sort_less(int, int)": [ - re.compile("ranges::__sort::(__fn::)?operator\(\)"), + re.compile(r"ranges::__sort::(__fn::)?operator\(\)"), "test_algorithms", ], # `ranges::views::transform` internally uses `std::invoke`, and that diff --git a/lldb/test/API/lang/cpp/namespace/TestNamespace.py b/lldb/test/API/lang/cpp/namespace/TestNamespace.py index 40cbff9cb3c94..d790002dea072 100644 --- a/lldb/test/API/lang/cpp/namespace/TestNamespace.py +++ b/lldb/test/API/lang/cpp/namespace/TestNamespace.py @@ -237,12 +237,12 @@ def test_with_run_command(self): self.expect( "expression myanonfunc", - patterns=["\(anonymous namespace\)::myanonfunc\(int\)"], + patterns=[r"\(anonymous namespace\)::myanonfunc\(int\)"], ) self.expect( "expression variadic_sum", - patterns=["\(anonymous namespace\)::variadic_sum\(int, ...\)"], + patterns=[r"\(anonymous namespace\)::variadic_sum\(int, ...\)"], ) self.expect_expr("::B::Bar b; b.x()", result_type="int", result_value="42") diff --git a/lldb/test/API/lang/cpp/signed_types/TestSignedTypes.py b/lldb/test/API/lang/cpp/signed_types/TestSignedTypes.py index a08c2e721328d..b8c2c23613868 100644 --- a/lldb/test/API/lang/cpp/signed_types/TestSignedTypes.py +++ b/lldb/test/API/lang/cpp/signed_types/TestSignedTypes.py @@ -57,8 +57,8 @@ def test(self): "frame variable --show-types --no-args", VARIABLES_DISPLAYED_CORRECTLY, patterns=[ - "\((short int|short)\) the_signed_short = 99", - "\((signed char|char)\) the_signed_char = 'c'", + r"\((short int|short)\) the_signed_short = 99", + r"\((signed char|char)\) the_signed_char = 'c'", ], substrs=[ "(int) the_signed_int = 99", diff --git a/lldb/test/API/lang/cpp/unsigned_types/TestUnsignedTypes.py b/lldb/test/API/lang/cpp/unsigned_types/TestUnsignedTypes.py index ffce9534e05bd..4796c84966675 100644 --- a/lldb/test/API/lang/cpp/unsigned_types/TestUnsignedTypes.py +++ b/lldb/test/API/lang/cpp/unsigned_types/TestUnsignedTypes.py @@ -22,7 +22,7 @@ def test(self): "frame variable --show-types --no-args", VARIABLES_DISPLAYED_CORRECTLY, patterns=[ - "\((short unsigned int|unsigned short)\) the_unsigned_short = 99" + r"\((short unsigned int|unsigned short)\) the_unsigned_short = 99" ], substrs=[ "(unsigned char) the_unsigned_char = 'c'", diff --git a/lldb/test/API/lang/mixed/TestMixedLanguages.py b/lldb/test/API/lang/mixed/TestMixedLanguages.py index 1637d59a5edcb..d7ab89e89ab7f 100644 --- a/lldb/test/API/lang/mixed/TestMixedLanguages.py +++ b/lldb/test/API/lang/mixed/TestMixedLanguages.py @@ -22,12 +22,12 @@ def cleanup(): self.addTearDownHook(cleanup) self.runCmd("settings show frame-format") - m = re.match('^frame-format \(format-string\) = "(.*)"$', self.res.GetOutput()) + m = re.match(r'^frame-format \(format-string\) = "(.*)"$', self.res.GetOutput()) self.assertTrue(m, "Bad settings string") self.format_string = m.group(1) # Change the default format to print the language. - format_string = "frame #${frame.index}: ${frame.pc}{ ${module.file.basename}\`${function.name}{${function.pc-offset}}}{, lang=${language}}\n" + format_string = "frame #${frame.index}: ${frame.pc}{ ${module.file.basename}\\`${function.name}{${function.pc-offset}}}{, lang=${language}}\n" self.runCmd("settings set frame-format %s" % format_string) self.expect( "settings show frame-format", diff --git a/lldb/test/API/lang/objc/foundation/TestObjCMethods.py b/lldb/test/API/lang/objc/foundation/TestObjCMethods.py index 634e4730c764b..5fa3f280d33bf 100644 --- a/lldb/test/API/lang/objc/foundation/TestObjCMethods.py +++ b/lldb/test/API/lang/objc/foundation/TestObjCMethods.py @@ -166,7 +166,7 @@ def test_data_type_and_expr(self): "frame variable --show-types --scope", VARIABLES_DISPLAYED_CORRECTLY, substrs=["ARG: (MyString *) self"], - patterns=["ARG: \(.*\) _cmd", "(objc_selector *)|(SEL)"], + patterns=[r"ARG: \(.*\) _cmd", "(objc_selector *)|(SEL)"], ) # rdar://problem/8651752 diff --git a/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSArray.py b/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSArray.py index 81c409d08364c..2cd1386289b9e 100644 --- a/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSArray.py +++ b/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSArray.py @@ -19,10 +19,10 @@ def test_NSArray_expr_commands(self): self.runCmd("thread backtrace") self.expect( - "expression (int)[nil_mutable_array count]", patterns=["\(int\) \$.* = 0"] + "expression (int)[nil_mutable_array count]", patterns=[r"\(int\) \$.* = 0"] ) - self.expect("expression (int)[array1 count]", patterns=["\(int\) \$.* = 3"]) - self.expect("expression (int)[array2 count]", patterns=["\(int\) \$.* = 3"]) - self.expect("expression (int)array1.count", patterns=["\(int\) \$.* = 3"]) - self.expect("expression (int)array2.count", patterns=["\(int\) \$.* = 3"]) + self.expect("expression (int)[array1 count]", patterns=[r"\(int\) \$.* = 3"]) + self.expect("expression (int)[array2 count]", patterns=[r"\(int\) \$.* = 3"]) + self.expect("expression (int)array1.count", patterns=[r"\(int\) \$.* = 3"]) + self.expect("expression (int)array2.count", patterns=[r"\(int\) \$.* = 3"]) self.runCmd("process continue") diff --git a/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSError.py b/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSError.py index c9e801422b46c..a14035db5e057 100644 --- a/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSError.py +++ b/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSError.py @@ -20,7 +20,7 @@ def test_runtime_types(self): # Test_NSString: self.runCmd("thread backtrace") - self.expect("expression [str length]", patterns=["\(NSUInteger\) \$.* ="]) + self.expect("expression [str length]", patterns=[r"\(NSUInteger\) \$.* ="]) self.expect("expression str.length") self.expect('expression str = [NSString stringWithCString: "new"]') self.expect( diff --git a/lldb/test/API/lang/objc/foundation/TestObjCMethodsString.py b/lldb/test/API/lang/objc/foundation/TestObjCMethodsString.py index 4d8c9c960ea59..3e33077b8e2d7 100644 --- a/lldb/test/API/lang/objc/foundation/TestObjCMethodsString.py +++ b/lldb/test/API/lang/objc/foundation/TestObjCMethodsString.py @@ -21,11 +21,11 @@ def test_NSString_expr_commands(self): # Test_NSString: self.runCmd("thread backtrace") - self.expect("expression (int)[str length]", patterns=["\(int\) \$.* ="]) - self.expect("expression (int)[str_id length]", patterns=["\(int\) \$.* ="]) - self.expect("expression (id)[str description]", patterns=["\(id\) \$.* = 0x"]) + self.expect("expression (int)[str length]", patterns=[r"\(int\) \$.* ="]) + self.expect("expression (int)[str_id length]", patterns=[r"\(int\) \$.* ="]) + self.expect("expression (id)[str description]", patterns=[r"\(id\) \$.* = 0x"]) self.expect( - "expression (id)[str_id description]", patterns=["\(id\) \$.* = 0x"] + "expression (id)[str_id description]", patterns=[r"\(id\) \$.* = 0x"] ) self.expect("expression str.length") self.expect('expression str = @"new"') @@ -42,6 +42,6 @@ def test_MyString_dump_with_runtime(self): ) self.expect( "expression --show-types -- *my", - patterns=["\(MyString\) \$.* = ", "\(MyBase\)"], + patterns=[r"\(MyString\) \$.* = ", r"\(MyBase\)"], ) self.runCmd("process continue") diff --git a/lldb/test/API/lang/objc/objc-dynamic-value/TestObjCDynamicValue.py b/lldb/test/API/lang/objc/objc-dynamic-value/TestObjCDynamicValue.py index 3ba68f4c35a5c..4d439fa0046e4 100644 --- a/lldb/test/API/lang/objc/objc-dynamic-value/TestObjCDynamicValue.py +++ b/lldb/test/API/lang/objc/objc-dynamic-value/TestObjCDynamicValue.py @@ -107,7 +107,7 @@ def test_get_objc_dynamic_vals(self): self.expect( "frame var -d run-target myObserver->_source", "frame var finds its way into a child member", - patterns=["\(SourceDerived \*\)"], + patterns=[r"\(SourceDerived \*\)"], ) # check that our ObjC GetISA() does a good job at hiding KVO swizzled diff --git a/lldb/test/API/lang/objcxx/objc-builtin-types/TestObjCBuiltinTypes.py b/lldb/test/API/lang/objcxx/objc-builtin-types/TestObjCBuiltinTypes.py index 3cdca31b8969b..ac107de7e00a7 100644 --- a/lldb/test/API/lang/objcxx/objc-builtin-types/TestObjCBuiltinTypes.py +++ b/lldb/test/API/lang/objcxx/objc-builtin-types/TestObjCBuiltinTypes.py @@ -51,11 +51,11 @@ def test_with_python_api(self): frame = thread_list[0].GetFrameAtIndex(0) self.assertTrue(frame, "Got a valid frame 0 frame.") - self.expect("expr (foo)", patterns=["\(ns::id\) \$.* = 0"]) + self.expect("expr (foo)", patterns=[r"\(ns::id\) \$.* = 0"]) self.expect( "expr --language Objective-C++ -- id my_id = 0; my_id", - patterns=["\(id\) \$.* = nil"], + patterns=[r"\(id\) \$.* = nil"], ) self.expect("expr --language C++ -- id my_id = 0; my_id", error=True) diff --git a/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py b/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py index 6309648819026..a9879f67d8b8f 100644 --- a/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py +++ b/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py @@ -88,9 +88,9 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr}+16 {addr}".format(addr=self.MTE_BUF_ADDR), error=True, patterns=[ - "error: End address \(0x[A-Fa-f0-9]+\) " + r"error: End address \(0x[A-Fa-f0-9]+\) " "must be greater than the start address " - "\(0x[A-Fa-f0-9]+\)" + r"\(0x[A-Fa-f0-9]+\)" ], ) @@ -100,8 +100,8 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr} {addr}+32".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)$" + "\\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\\): 0x0\n" + "\\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\\): 0x1 \\(mismatch\\)$" ], ) @@ -110,7 +110,7 @@ def test_mte_tag_core_file_tag_read(self): self.expect( "memory tag read {addr} {addr}+16".format(addr=self.MTE_BUF_ADDR), patterns=[ - "Allocation tags:\n" "\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0$" + "Allocation tags:\n" r"\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0$" ], ) # Get the other half of the first byte. @@ -119,7 +119,7 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr}+16 {addr}+32".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)$" + r"\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)$" ], ) @@ -128,18 +128,18 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr} {addr}+48".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)\n" - "\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\): 0x2 \(mismatch\)$" + "\\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\\): 0x0\n" + "\\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\\): 0x1 \\(mismatch\\)\n" + "\\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\\): 0x2 \\(mismatch\\)$" ], ) self.expect( "memory tag read {addr}+16 {addr}+64".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)\n" - "\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\): 0x2 \(mismatch\)\n" - "\[0x[A-Fa-f0-9]+30, 0x[A-Fa-f0-9]+40\): 0x3 \(mismatch\)$" + "\\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\\): 0x1 \\(mismatch\\)\n" + "\\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\\): 0x2 \\(mismatch\\)\n" + "\\[0x[A-Fa-f0-9]+30, 0x[A-Fa-f0-9]+40\\): 0x3 \\(mismatch\\)$" ], ) # Here both start and end are unaligned. @@ -147,10 +147,10 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr}+16 {addr}+80".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)\n" - "\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\): 0x2 \(mismatch\)\n" - "\[0x[A-Fa-f0-9]+30, 0x[A-Fa-f0-9]+40\): 0x3 \(mismatch\)\n" - "\[0x[A-Fa-f0-9]+40, 0x[A-Fa-f0-9]+50\): 0x4 \(mismatch\)$" + "\\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\\): 0x1 \\(mismatch\\)\n" + "\\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\\): 0x2 \\(mismatch\\)\n" + "\\[0x[A-Fa-f0-9]+30, 0x[A-Fa-f0-9]+40\\): 0x3 \\(mismatch\\)\n" + "\\[0x[A-Fa-f0-9]+40, 0x[A-Fa-f0-9]+50\\): 0x4 \\(mismatch\\)$" ], ) @@ -159,7 +159,7 @@ def test_mte_tag_core_file_tag_read(self): self.expect( "memory tag read {addr} {addr}+1".format(addr=self.MTE_BUF_ADDR), patterns=[ - "Allocation tags:\n" "\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0$" + "Allocation tags:\n" r"\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0$" ], ) @@ -169,8 +169,8 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr} {addr}+17".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)$" + "\\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\\): 0x0\n" + "\\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\\): 0x1 \\(mismatch\\)$" ], ) @@ -179,9 +179,9 @@ def test_mte_tag_core_file_tag_read(self): "memory tag read {addr} {addr}+33".format(addr=self.MTE_BUF_ADDR), patterns=[ "Allocation tags:\n" - "\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\): 0x0\n" - "\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\): 0x1 \(mismatch\)\n", - "\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\): 0x2 \(mismatch\)$", + "\\[0x[A-Fa-f0-9]+00, 0x[A-Fa-f0-9]+10\\): 0x0\n" + "\\[0x[A-Fa-f0-9]+10, 0x[A-Fa-f0-9]+20\\): 0x1 \\(mismatch\\)\n", + "\\[0x[A-Fa-f0-9]+20, 0x[A-Fa-f0-9]+30\\): 0x2 \\(mismatch\\)$", ], ) diff --git a/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py b/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py index c72eb72b05cce..8a76d6c6c40c3 100644 --- a/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py +++ b/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py @@ -86,8 +86,8 @@ def test_mte_tag_read(self): self.expect( "memory tag read mte_buf mte_buf-16", patterns=[ - "error: End address \(0x[A-Fa-f0-9]+\) must be " - "greater than the start address \(0x[A-Fa-f0-9]+\)" + r"error: End address \(0x[A-Fa-f0-9]+\) must be " + r"greater than the start address \(0x[A-Fa-f0-9]+\)" ], error=True, ) @@ -95,8 +95,8 @@ def test_mte_tag_read(self): self.expect( "memory tag read mte_buf mte_buf", patterns=[ - "error: End address \(0x[A-Fa-f0-9]+\) must be " - "greater than the start address \(0x[A-Fa-f0-9]+\)" + r"error: End address \(0x[A-Fa-f0-9]+\) must be " + r"greater than the start address \(0x[A-Fa-f0-9]+\)" ], error=True, ) @@ -117,7 +117,7 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" + r"\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" ], ) @@ -127,7 +127,7 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" + r"\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" ], ) @@ -137,8 +137,8 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0x1 \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x0 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\\): 0x1 \\(mismatch\\)$" ], ) @@ -150,7 +150,7 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0x0\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+, 0x[0-9A-Fa-f]+\): 0x0$" + r"\[0x[0-9A-Fa-f]+, 0x[0-9A-Fa-f]+\): 0x0$" ], ) @@ -179,8 +179,8 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+f0, 0x[0-9A-Fa-f]+00\): 0xf \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+f0, 0x[0-9A-Fa-f]+00\\): 0xf \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x0 \\(mismatch\\)$" ], ) @@ -192,7 +192,7 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0xa\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" + r"\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x0 \(mismatch\)$" ], ) @@ -202,9 +202,9 @@ def test_mte_tag_read(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+80, 0x[0-9A-Fa-f]+90\): 0x8 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+90, 0x[0-9A-Fa-f]+a0\): 0x9\n" - "\[0x[0-9A-Fa-f]+a0, 0x[0-9A-Fa-f]+b0\): 0xa \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+80, 0x[0-9A-Fa-f]+90\\): 0x8 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+90, 0x[0-9A-Fa-f]+a0\\): 0x9\n" + "\\[0x[0-9A-Fa-f]+a0, 0x[0-9A-Fa-f]+b0\\): 0xa \\(mismatch\\)$" ], ) @@ -258,8 +258,8 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x9\n" - "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0x1 \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x9\n" + "\\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\\): 0x1 \\(mismatch\\)$" ], ) @@ -270,9 +270,9 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0xa \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0xb \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\): 0xc \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0xa \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\\): 0xb \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\\): 0xc \\(mismatch\\)$" ], ) @@ -284,7 +284,7 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x0\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+, 0x[0-9A-Fa-f]+\): 0xe \(mismatch\)$" + r"\[0x[0-9A-Fa-f]+, 0x[0-9A-Fa-f]+\): 0xe \(mismatch\)$" ], ) @@ -323,8 +323,8 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x9\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+f0, 0x[0-9A-Fa-f]+00\): 0x1 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x2 \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+f0, 0x[0-9A-Fa-f]+00\\): 0x1 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x2 \\(mismatch\\)$" ], ) @@ -335,7 +335,7 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x0\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x1 \(mismatch\)$" + r"\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x1 \(mismatch\)$" ], ) @@ -361,16 +361,16 @@ def test_mte_tag_write(self): self.expect( "memory tag write mte_buf_2 9 --end-addr mte_buf_2", patterns=[ - "error: End address \(0x[A-Fa-f0-9]+\) must be " - "greater than the start address \(0x[A-Fa-f0-9]+\)" + r"error: End address \(0x[A-Fa-f0-9]+\) must be " + r"greater than the start address \(0x[A-Fa-f0-9]+\)" ], error=True, ) self.expect( "memory tag write mte_buf_2 9 --end-addr mte_buf_2-16", patterns=[ - "error: End address \(0x[A-Fa-f0-9]+\) must be " - "greater than the start address \(0x[A-Fa-f0-9]+\)" + r"error: End address \(0x[A-Fa-f0-9]+\) must be " + r"greater than the start address \(0x[A-Fa-f0-9]+\)" ], error=True, ) @@ -391,10 +391,10 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x0\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x4 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0x5 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\): 0x4 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+30, 0x[0-9A-Fa-f]+40\): 0x0$" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x4 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\\): 0x5 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\\): 0x4 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+30, 0x[0-9A-Fa-f]+40\\): 0x0$" ], ) @@ -409,9 +409,9 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x0\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x6 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0x6 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\): 0x4 \(mismatch\)$" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x6 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\\): 0x6 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\\): 0x4 \\(mismatch\\)$" ], ) @@ -423,10 +423,10 @@ def test_mte_tag_write(self): patterns=[ "Logical tag: 0x0\n" "Allocation tags:\n" - "\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\): 0x3 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0x3 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\): 0x3 \(mismatch\)\n" - "\[0x[0-9A-Fa-f]+30, 0x[0-9A-Fa-f]+40\): 0x0$" + "\\[0x[0-9A-Fa-f]+00, 0x[0-9A-Fa-f]+10\\): 0x3 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\\): 0x3 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\\): 0x3 \\(mismatch\\)\n" + "\\[0x[0-9A-Fa-f]+30, 0x[0-9A-Fa-f]+40\\): 0x0$" ], ) @@ -452,8 +452,8 @@ def test_mte_memory_read_tag_display(self): self.expect( 'memory read mte_buf mte_buf+32 -f "x" -l 1 -s 16 --show-tags', patterns=[ - "0x[0-9A-Fa-f]+00: 0x0+ \(tag: 0x0\)\n" - "0x[0-9A-Fa-f]+10: 0x0+ \(tag: 0x1\)" + "0x[0-9A-Fa-f]+00: 0x0+ \\(tag: 0x0\\)\n" + "0x[0-9A-Fa-f]+10: 0x0+ \\(tag: 0x1\\)" ], ) @@ -461,13 +461,13 @@ def test_mte_memory_read_tag_display(self): # per line. self.expect( 'memory read mte_buf mte_buf+32 -f "x" -l 1 -s 32 --show-tags', - patterns=["0x[0-9A-Fa-f]+00: 0x0+ \(tags: 0x0 0x1\)\n"], + patterns=["0x[0-9A-Fa-f]+00: 0x0+ \\(tags: 0x0 0x1\\)\n"], ) # Reading half a granule still shows you the tag for that granule self.expect( 'memory read mte_buf mte_buf+8 -f "x" -l 1 -s 8 --show-tags', - patterns=["0x[0-9A-Fa-f]+00: 0x0+ \(tag: 0x0\)\n"], + patterns=["0x[0-9A-Fa-f]+00: 0x0+ \\(tag: 0x0\\)\n"], ) # We can read a whole number of granules but split them over more lines @@ -475,10 +475,10 @@ def test_mte_memory_read_tag_display(self): self.expect( 'memory read mte_buf+32 mte_buf+64 -f "x" -l 1 -s 8 --show-tags', patterns=[ - "0x[0-9A-Fa-f]+20: 0x0+ \(tag: 0x2\)\n" - "0x[0-9A-Fa-f]+28: 0x0+ \(tag: 0x2\)\n" - "0x[0-9A-Fa-f]+30: 0x0+ \(tag: 0x3\)\n" - "0x[0-9A-Fa-f]+38: 0x0+ \(tag: 0x3\)" + "0x[0-9A-Fa-f]+20: 0x0+ \\(tag: 0x2\\)\n" + "0x[0-9A-Fa-f]+28: 0x0+ \\(tag: 0x2\\)\n" + "0x[0-9A-Fa-f]+30: 0x0+ \\(tag: 0x3\\)\n" + "0x[0-9A-Fa-f]+38: 0x0+ \\(tag: 0x3\\)" ], ) @@ -488,10 +488,10 @@ def test_mte_memory_read_tag_display(self): self.expect( 'memory read mte_buf+32+8 mte_buf+64+8 -f "x" -l 1 -s 8 --show-tags', patterns=[ - "0x[0-9A-Fa-f]+28: 0x0+ \(tag: 0x2\)\n" - "0x[0-9A-Fa-f]+30: 0x0+ \(tag: 0x3\)\n" - "0x[0-9A-Fa-f]+38: 0x0+ \(tag: 0x3\)\n" - "0x[0-9A-Fa-f]+40: 0x0+ \(tag: 0x4\)" + "0x[0-9A-Fa-f]+28: 0x0+ \\(tag: 0x2\\)\n" + "0x[0-9A-Fa-f]+30: 0x0+ \\(tag: 0x3\\)\n" + "0x[0-9A-Fa-f]+38: 0x0+ \\(tag: 0x3\\)\n" + "0x[0-9A-Fa-f]+40: 0x0+ \\(tag: 0x4\\)" ], ) @@ -501,10 +501,10 @@ def test_mte_memory_read_tag_display(self): self.expect( 'memory read mte_buf+32+4 mte_buf+64+4 -f "x" -l 1 -s 8 --show-tags', patterns=[ - "0x[0-9A-Fa-f]+24: 0x0+ \(tag: 0x2\)\n" - "0x[0-9A-Fa-f]+2c: 0x0+ \(tags: 0x2 0x3\)\n" - "0x[0-9A-Fa-f]+34: 0x0+ \(tag: 0x3\)\n" - "0x[0-9A-Fa-f]+3c: 0x0+ \(tags: 0x3 0x4\)" + "0x[0-9A-Fa-f]+24: 0x0+ \\(tag: 0x2\\)\n" + "0x[0-9A-Fa-f]+2c: 0x0+ \\(tags: 0x2 0x3\\)\n" + "0x[0-9A-Fa-f]+34: 0x0+ \\(tag: 0x3\\)\n" + "0x[0-9A-Fa-f]+3c: 0x0+ \\(tags: 0x3 0x4\\)" ], ) @@ -516,15 +516,17 @@ def test_mte_memory_read_tag_display(self): 'memory read mte_buf-16 mte_buf+32 -f "x" -l 1 -s 16 --show-tags', patterns=[ "0x[0-9A-Fa-f]+f0: 0x0+\n" - "0x[0-9A-Fa-f]+00: 0x0+ \(tag: 0x0\)\n" - "0x[0-9A-Fa-f]+10: 0x0+ \(tag: 0x1\)" + "0x[0-9A-Fa-f]+00: 0x0+ \\(tag: 0x0\\)\n" + "0x[0-9A-Fa-f]+10: 0x0+ \\(tag: 0x1\\)" ], ) # End of range is untagged self.expect( 'memory read mte_buf+page_size-16 mte_buf+page_size+16 -f "x" -l 1 -s 16 --show-tags', - patterns=["0x[0-9A-Fa-f]+f0: 0x0+ \(tag: 0xf\)\n" "0x[0-9A-Fa-f]+00: 0x0+"], + patterns=[ + "0x[0-9A-Fa-f]+f0: 0x0+ \\(tag: 0xf\\)\n" "0x[0-9A-Fa-f]+00: 0x0+" + ], ) # The smallest MTE range we can get is a single page so we just check @@ -533,8 +535,8 @@ def test_mte_memory_read_tag_display(self): self.expect( 'memory read mte_read_only-16 mte_read_only+page_size+16 -f "x" -l 1 -s 16 --force --show-tags', patterns=[ - "0x[0-9A-Fa-f]+f0: 0x0+\n" "0x[0-9A-Fa-f]+00: 0x0+ \(tag: 0x0\)\n", - "0x[0-9A-Fa-f]+f0: 0x0+ \(tag: 0x0\)\n" "0x[0-9A-Fa-f]+00: 0x0+", + "0x[0-9A-Fa-f]+f0: 0x0+\n" "0x[0-9A-Fa-f]+00: 0x0+ \\(tag: 0x0\\)\n", + "0x[0-9A-Fa-f]+f0: 0x0+ \\(tag: 0x0\\)\n" "0x[0-9A-Fa-f]+00: 0x0+", ], ) @@ -542,21 +544,21 @@ def test_mte_memory_read_tag_display(self): # is shown in where the tag would be, to keep the order intact. self.expect( 'memory read mte_buf-16 mte_buf+32 -f "x" -l 1 -s 32 --show-tags', - patterns=["0x[0-9A-Fa-f]+f0: 0x0+ \(tags: 0x0\)"], + patterns=[r"0x[0-9A-Fa-f]+f0: 0x0+ \(tags: 0x0\)"], ) self.expect( 'memory read mte_read_only+page_size-16 mte_read_only+page_size+16 -f "x" -l 1 -s 32 --show-tags', - patterns=["0x[0-9A-Fa-f]+f0: 0x0+ \(tags: 0x0 \)"], + patterns=[r"0x[0-9A-Fa-f]+f0: 0x0+ \(tags: 0x0 \)"], ) # Here the start address is unaligned so we cover 3 granules instead of 2 self.expect( 'memory read mte_buf-16+4 mte_buf+32+4 -f "x" -l 1 -s 32 --show-tags', - patterns=["0x[0-9A-Fa-f]+f4: 0x0+ \(tags: 0x0 0x1\)"], + patterns=[r"0x[0-9A-Fa-f]+f4: 0x0+ \(tags: 0x0 0x1\)"], ) self.expect( 'memory read mte_read_only+page_size-16+4 mte_read_only+page_size+16+4 -f "x" -l 1 -s 32 --show-tags', - patterns=["0x[0-9A-Fa-f]+f4: 0x0+ \(tags: 0x0 \)"], + patterns=[r"0x[0-9A-Fa-f]+f4: 0x0+ \(tags: 0x0 \)"], ) # Some formats call DumpDataExtractor multiple times, @@ -564,24 +566,24 @@ def test_mte_memory_read_tag_display(self): self.expect( 'memory read mte_buf mte_buf+32 -f "x" --show-tags', patterns=[ - "0x[0-9A-Fa-f]+00: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x0\)\n", - "0x[0-9A-Fa-f]+10: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x1\)", + "0x[0-9A-Fa-f]+00: 0x0+ 0x0+ 0x0+ 0x0+ \\(tag: 0x0\\)\n", + "0x[0-9A-Fa-f]+10: 0x0+ 0x0+ 0x0+ 0x0+ \\(tag: 0x1\\)", ], ) self.expect( 'memory read mte_buf mte_buf+32 -f "bytes with ASCII" --show-tags', patterns=[ - "0x[0-9A-Fa-f]+00: (00 ){16} \.{16} \(tag: 0x0\)\n", - "0x[0-9A-Fa-f]+10: (00 ){16} \.{16} \(tag: 0x1\)", + "0x[0-9A-Fa-f]+00: (00 ){16} \\.{16} \\(tag: 0x0\\)\n", + "0x[0-9A-Fa-f]+10: (00 ){16} \\.{16} \\(tag: 0x1\\)", ], ) self.expect( 'memory read mte_buf mte_buf+32 -f "uint8_t[]" -s 16 -l 1 --show-tags', patterns=[ - "0x[0-9A-Fa-f]+00: \{(0x00 ){15}0x00\} \(tag: 0x0\)\n" - "0x[0-9A-Fa-f]+10: \{(0x00 ){15}0x00\} \(tag: 0x1\)" + "0x[0-9A-Fa-f]+00: \\{(0x00 ){15}0x00\\} \\(tag: 0x0\\)\n" + "0x[0-9A-Fa-f]+10: \\{(0x00 ){15}0x00\\} \\(tag: 0x1\\)" ], ) @@ -594,12 +596,12 @@ def test_mte_memory_read_tag_display_repeated(self): self.expect( 'memory read mte_buf mte_buf+16 -f "x" --show-tags', - patterns=["0x[0-9A-fa-f]+00: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x0\)"], + patterns=[r"0x[0-9A-fa-f]+00: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x0\)"], ) # Equivalent to just pressing enter on the command line. self.expect( "memory read", - patterns=["0x[0-9A-fa-f]+10: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x1\)"], + patterns=[r"0x[0-9A-fa-f]+10: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x1\)"], ) # You can add the argument to an existing repetition without resetting @@ -613,10 +615,10 @@ def test_mte_memory_read_tag_display_repeated(self): # Note that the formatting returns to default here. self.expect( "memory read --show-tags", - patterns=["0x[0-9A-fa-f]+20: (00 )+ \.+ \(tag: 0x2\)"], + patterns=[r"0x[0-9A-fa-f]+20: (00 )+ \.+ \(tag: 0x2\)"], ) self.expect( - "memory read", patterns=["0x[0-9A-fa-f]+30: (00 )+ \.+ \(tag: 0x3\)"] + "memory read", patterns=[r"0x[0-9A-fa-f]+30: (00 )+ \.+ \(tag: 0x3\)"] ) # A fresh command reverts to the default of tags being off. @@ -641,8 +643,8 @@ def test_mte_memory_find(self): cmd = 'memory find -s "LLDB" mte_buf+64 mte_buf+512' found_pattern = "data found at location: 0x[0-9A-Fa-f]+80" results_patterns = [ - "0x[0-9A-Fa-f]+80: 4c 4c 44 42 (00 )+ LLDB\.+", - "0x[0-9A-Fa-f]+90: 00 00 00 00 (00 )+ \.+", + r"0x[0-9A-Fa-f]+80: 4c 4c 44 42 (00 )+ LLDB\.+", + r"0x[0-9A-Fa-f]+90: 00 00 00 00 (00 )+ \.+", ] # Default is not to show tags @@ -651,8 +653,8 @@ def test_mte_memory_find(self): cmd + " --show-tags", patterns=[ found_pattern, - results_patterns[0] + " \(tag: 0x8\)", - results_patterns[1] + " \(tag: 0x9\)", + results_patterns[0] + r" \(tag: 0x8\)", + results_patterns[1] + r" \(tag: 0x9\)", ], ) @@ -661,7 +663,7 @@ def test_mte_memory_find(self): 'memory find -s "DB" mte_buf+64 mte_buf+512 --show-tags', patterns=[ "data found at location: 0x[0-9A-Fa-f]+82\n" - "0x[0-9A-Fa-f]+82: 44 42 (00 )+ DB\.+ \(tags: 0x8 0x9\)\n", - "0x[0-9A-Fa-f]+92: 00 00 (00 )+ ..\.+ \(tags: 0x9 0xa\)", + "0x[0-9A-Fa-f]+82: 44 42 (00 )+ DB\\.+ \\(tags: 0x8 0x9\\)\n", + "0x[0-9A-Fa-f]+92: 00 00 (00 )+ ..\\.+ \\(tags: 0x9 0xa\\)", ], ) diff --git a/lldb/test/API/linux/aarch64/mte_tag_faults/TestAArch64LinuxMTEMemoryTagFaults.py b/lldb/test/API/linux/aarch64/mte_tag_faults/TestAArch64LinuxMTEMemoryTagFaults.py index 2d6470505cf7c..331c32749e32c 100644 --- a/lldb/test/API/linux/aarch64/mte_tag_faults/TestAArch64LinuxMTEMemoryTagFaults.py +++ b/lldb/test/API/linux/aarch64/mte_tag_faults/TestAArch64LinuxMTEMemoryTagFaults.py @@ -50,9 +50,9 @@ def test_mte_tag_fault_sync(self): self.expect( "continue", patterns=[ - "\* thread #1, name = 'a.out', stop reason = signal SIGSEGV: " - "sync tag check fault \(fault address=0x9[0-9A-Fa-f]+11\ " - "logical tag=0x9 allocation tag=0xa\)" + r"\* thread #1, name = 'a.out', stop reason = signal SIGSEGV: " + r"sync tag check fault \(fault address=0x9[0-9A-Fa-f]+11\ " + r"logical tag=0x9 allocation tag=0xa\)" ], ) diff --git a/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py b/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py index 8eadd65466a49..509dae3aed855 100644 --- a/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py +++ b/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py @@ -39,7 +39,8 @@ def test_mte_regions(self): # Despite the non address bits we should find a region self.expect( - "memory region the_page", patterns=["\[0x[0-9A-Fa-f]+-0x[0-9A-Fa-f]+\) r-x"] + "memory region the_page", + patterns=[r"\[0x[0-9A-Fa-f]+-0x[0-9A-Fa-f]+\) r-x"], ) # Check that the usual error message is displayed after repeating @@ -68,5 +69,6 @@ def test_mte_regions(self): # This should not error, since the user supplied address overrides # the previous end address. self.expect( - "memory region the_page", patterns=["\[0x[0-9A-Fa-f]+-0x[0-9A-Fa-f]+\) r-x"] + "memory region the_page", + patterns=[r"\[0x[0-9A-Fa-f]+-0x[0-9A-Fa-f]+\) r-x"], ) diff --git a/lldb/test/API/macosx/add-dsym/TestAddDsymDownload.py b/lldb/test/API/macosx/add-dsym/TestAddDsymDownload.py index 52055f250a584..8ff72c9a74edd 100644 --- a/lldb/test/API/macosx/add-dsym/TestAddDsymDownload.py +++ b/lldb/test/API/macosx/add-dsym/TestAddDsymDownload.py @@ -6,7 +6,7 @@ @skipUnlessDarwin class AddDsymDownload(TestBase): - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") def get_uuid(self): dwarfdump_cmd_output = subprocess.check_output( diff --git a/lldb/test/API/macosx/lc-note/firmware-corefile/TestFirmwareCorefiles.py b/lldb/test/API/macosx/lc-note/firmware-corefile/TestFirmwareCorefiles.py index db3074d7e7942..9309de4824ec4 100644 --- a/lldb/test/API/macosx/lc-note/firmware-corefile/TestFirmwareCorefiles.py +++ b/lldb/test/API/macosx/lc-note/firmware-corefile/TestFirmwareCorefiles.py @@ -285,7 +285,7 @@ def test_lc_note_main_bin_spec_os_plugin(self): for l in python_init: writer.write(l + "\n") - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") dwarfdump_cmd_output = subprocess.check_output( ('/usr/bin/dwarfdump --uuid "%s"' % aout_exe), shell=True ).decode("utf-8") diff --git a/lldb/test/API/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py b/lldb/test/API/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py index d4366196c53c3..a3f9144572da5 100644 --- a/lldb/test/API/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py +++ b/lldb/test/API/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py @@ -32,7 +32,7 @@ def test_lc_note(self): lambda: os.environ.pop("LLDB_APPLE_DSYMFORUUID_EXECUTABLE", None) ) - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") dwarfdump_cmd_output = subprocess.check_output( ('/usr/bin/dwarfdump --uuid "%s"' % self.test_exe), shell=True ).decode("utf-8") diff --git a/lldb/test/API/macosx/lc-note/multiple-binary-corefile/TestMultipleBinaryCorefile.py b/lldb/test/API/macosx/lc-note/multiple-binary-corefile/TestMultipleBinaryCorefile.py index 897eab23e05e2..28472cb787d4a 100644 --- a/lldb/test/API/macosx/lc-note/multiple-binary-corefile/TestMultipleBinaryCorefile.py +++ b/lldb/test/API/macosx/lc-note/multiple-binary-corefile/TestMultipleBinaryCorefile.py @@ -107,7 +107,7 @@ def test_corefile_binaries_dsymforuuid(self): ) ) - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") dwarfdump_cmd_output = subprocess.check_output( ('/usr/bin/dwarfdump --uuid "%s"' % self.libtwo_exe), shell=True ).decode("utf-8") diff --git a/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py b/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py index a69f7a055c79b..faf2256b03a0d 100644 --- a/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py +++ b/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py @@ -72,7 +72,7 @@ def run_with(self, arch, os, vers, env, expected_load_command): self, "break here", lldb.SBFileSpec("hello.c") ) triple_re = "-".join([arch, "apple", os + vers + ".*"] + env_list) - self.expect("image list -b -t", patterns=["a\.out " + triple_re]) + self.expect("image list -b -t", patterns=[r"a\.out " + triple_re]) self.check_debugserver(log, os + env, vers) @skipIfAsan diff --git a/lldb/test/API/macosx/skinny-corefile/TestSkinnyCorefile.py b/lldb/test/API/macosx/skinny-corefile/TestSkinnyCorefile.py index 02ab856aabc6b..bc19c69df7620 100644 --- a/lldb/test/API/macosx/skinny-corefile/TestSkinnyCorefile.py +++ b/lldb/test/API/macosx/skinny-corefile/TestSkinnyCorefile.py @@ -42,7 +42,7 @@ def test_lc_note(self): lambda: os.environ.pop("LLDB_APPLE_DSYMFORUUID_EXECUTABLE", None) ) - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") dwarfdump_cmd_output = subprocess.check_output( ('/usr/bin/dwarfdump --uuid "%s"' % self.aout_exe), shell=True ).decode("utf-8") diff --git a/lldb/test/API/python_api/address_range/TestAddressRange.py b/lldb/test/API/python_api/address_range/TestAddressRange.py index ae4b8c7c90ce4..3564022702497 100644 --- a/lldb/test/API/python_api/address_range/TestAddressRange.py +++ b/lldb/test/API/python_api/address_range/TestAddressRange.py @@ -191,7 +191,7 @@ def test_address_range_print_resolved(self): interp.HandleCommand(script, result, False) self.assertTrue(result.Succeeded(), "script command succeeded") # [0x1000-0x2000] // Resolved with target or addresses without sections - self.assertRegex(result.GetOutput(), "^\[0x[0-9a-f]+\-0x[0-9a-f]+\)") + self.assertRegex(result.GetOutput(), r"^\[0x[0-9a-f]+\-0x[0-9a-f]+\)") process.Kill() def test_address_range_print_no_section_resolved(self): @@ -215,7 +215,7 @@ def test_address_range_print_no_section_resolved(self): range_str = str(range) # [0x1000-0x2000] // Resolved with target or addresses without sections - self.assertRegex(range_str, "^\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + self.assertRegex(range_str, r"^\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") process.Kill() def test_address_range_print_not_resolved(self): @@ -223,7 +223,7 @@ def test_address_range_print_not_resolved(self): range = lldb.SBAddressRange(self.addr1, 8) range_str = str(range) # a.out[0x1000-0x2000] // Without target - self.assertRegex(range_str, "^a.out\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + self.assertRegex(range_str, r"^a.out\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") def test_address_range_list_print(self): """Make sure the SBAddressRangeList can be printed.""" diff --git a/lldb/test/API/python_api/target-arch-from-module/TestTargetArchFromModule.py b/lldb/test/API/python_api/target-arch-from-module/TestTargetArchFromModule.py index 260f37c9ed230..0141828ae1eab 100644 --- a/lldb/test/API/python_api/target-arch-from-module/TestTargetArchFromModule.py +++ b/lldb/test/API/python_api/target-arch-from-module/TestTargetArchFromModule.py @@ -34,7 +34,7 @@ def test_target_arch_init(self): lambda: os.environ.pop("LLDB_APPLE_DSYMFORUUID_EXECUTABLE", None) ) - dwarfdump_uuid_regex = re.compile("UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") + dwarfdump_uuid_regex = re.compile(r"UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*") dwarfdump_cmd_output = subprocess.check_output( ('/usr/bin/dwarfdump --uuid "%s"' % aout_exe), shell=True ).decode("utf-8") diff --git a/lldb/test/API/source-manager/TestSourceManager.py b/lldb/test/API/source-manager/TestSourceManager.py index 7d9ce86cdc353..1283c73e152a9 100644 --- a/lldb/test/API/source-manager/TestSourceManager.py +++ b/lldb/test/API/source-manager/TestSourceManager.py @@ -256,7 +256,7 @@ def test_modify_source_file_while_debugging(self): # of breakpoints for the current line, i.e., self.line. import re - m = re.search("^\[(\d+)\].*// Set break point at this line.", output) + m = re.search(r"^\[(\d+)\].*// Set break point at this line.", output) if not m: self.fail("Fail to display source level breakpoints") self.assertGreater(int(m.group(1)), 0) diff --git a/lldb/test/API/tools/lldb-dap/extendedStackTrace/TestDAP_extendedStackTrace.py b/lldb/test/API/tools/lldb-dap/extendedStackTrace/TestDAP_extendedStackTrace.py index ba35ea9e0b64e..f6b613da964b8 100644 --- a/lldb/test/API/tools/lldb-dap/extendedStackTrace/TestDAP_extendedStackTrace.py +++ b/lldb/test/API/tools/lldb-dap/extendedStackTrace/TestDAP_extendedStackTrace.py @@ -66,11 +66,11 @@ def test_stackTrace(self): self.assertEqual(len(stackLabels), 2, "expected two label stack frames") self.assertRegex( stackLabels[0][1]["name"], - "Enqueued from com.apple.root.default-qos \(Thread \d\)", + r"Enqueued from com.apple.root.default-qos \(Thread \d\)", ) self.assertRegex( stackLabels[1][1]["name"], - "Enqueued from com.apple.main-thread \(Thread \d\)", + r"Enqueued from com.apple.main-thread \(Thread \d\)", ) for i, frame in stackLabels: diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteModuleInfo.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteModuleInfo.py index 132072547e207..c8055f9b6320c 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteModuleInfo.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteModuleInfo.py @@ -42,7 +42,7 @@ def test_module_info(self): context = self.expect_gdbremote_sequence() spec = context.get("spec") self.assertRegex(spec, '"file_path":".*"') - self.assertRegex(spec, '"file_offset":\d+') - self.assertRegex(spec, '"file_size":\d+') - self.assertRegex(spec, '"triple":"\w*-\w*-.*"') + self.assertRegex(spec, r'"file_offset":\d+') + self.assertRegex(spec, r'"file_size":\d+') + self.assertRegex(spec, r'"triple":"\w*-\w*-.*"') self.assertRegex(spec, '"uuid":"[A-Fa-f0-9]+"') diff --git a/lldb/test/API/tools/lldb-server/TestPtyServer.py b/lldb/test/API/tools/lldb-server/TestPtyServer.py index 345f68f6d87d3..c3cb1706cffd1 100644 --- a/lldb/test/API/tools/lldb-server/TestPtyServer.py +++ b/lldb/test/API/tools/lldb-server/TestPtyServer.py @@ -65,7 +65,7 @@ def test_pty_server(self): "read packet: $qXfer:features:read:target.xml:0,200000#00", { "direction": "send", - "regex": re.compile("^\$l(.+)#[0-9a-fA-F]{2}$", flags=re.DOTALL), + "regex": re.compile(r"^\$l(.+)#[0-9a-fA-F]{2}$", flags=re.DOTALL), "capture": {1: "target_xml"}, }, ], diff --git a/lldb/test/API/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py b/lldb/test/API/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py index bd78a83c65655..c1a92eeb5d5d1 100644 --- a/lldb/test/API/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py +++ b/lldb/test/API/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py @@ -24,7 +24,7 @@ def test_g_target_xml_returns_correct_data(self): ), { "direction": "send", - "regex": re.compile("^\$l(.+)#[0-9a-fA-F]{2}$", flags=re.DOTALL), + "regex": re.compile(r"^\$l(.+)#[0-9a-fA-F]{2}$", flags=re.DOTALL), "capture": {1: "target_xml"}, }, ], diff --git a/lldb/test/API/types/AbstractBase.py b/lldb/test/API/types/AbstractBase.py index 043a5510c62f2..fb1e25254b281 100644 --- a/lldb/test/API/types/AbstractBase.py +++ b/lldb/test/API/types/AbstractBase.py @@ -20,7 +20,7 @@ def Msg(var, val, using_frame_variable): class GenericTester(TestBase): # This is the pattern by design to match the " var = 'value'" output from # printf() stmts (see basic_type.cpp). - pattern = re.compile(" (\*?a[^=]*) = '([^=]*)'$") + pattern = re.compile(r" (\*?a[^=]*) = '([^=]*)'$") # Assert message. DATA_TYPE_GROKKED = "Data type from expr parser output is parsed correctly" @@ -205,7 +205,7 @@ def generic_type_tester( # output: (char) a_array_bounded[0] = 'a' # try: - dt = re.match("^\((.*)\)", output).group(1) + dt = re.match(r"^\((.*)\)", output).group(1) except: self.fail(self.DATA_TYPE_GROKKED) @@ -284,7 +284,7 @@ def generic_type_expr_tester( # output: (double) $0 = 1100.12 # try: - dt = re.match("^\((.*)\) \$[0-9]+ = ", output).group(1) + dt = re.match(r"^\((.*)\) \$[0-9]+ = ", output).group(1) except: self.fail(self.DATA_TYPE_GROKKED) diff --git a/lldb/utils/lui/sourcewin.py b/lldb/utils/lui/sourcewin.py index efe3c01f3ebd1..a72dad5985ecd 100644 --- a/lldb/utils/lui/sourcewin.py +++ b/lldb/utils/lui/sourcewin.py @@ -210,7 +210,7 @@ def getLocations(event): # inlined frames, so we get the description (which does take # into account inlined functions) and parse it. desc = lldbutil.get_description(location, lldb.eDescriptionLevelFull) - match = re.search("at\ ([^:]+):([\d]+)", desc) + match = re.search(r"at\ ([^:]+):([\d]+)", desc) try: path = match.group(1) line = int(match.group(2).strip()) From ffa0ea222b942bbbdd1ec8190cd8aca9d6b2fc10 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Fri, 28 Feb 2025 23:13:30 +0800 Subject: [PATCH 099/123] [SelectionDAG][RISCV] Promote VECREDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM} (#128800) This patch also adds the tests for VP_REDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM}, which have been supported for a while. --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 25 +- .../SelectionDAG/LegalizeVectorOps.cpp | 8 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 36 ++- .../RISCV/rvv/vreductions-fp-sdnode-bf16.ll | 136 +++++++++ .../RISCV/rvv/vreductions-fp-sdnode-f16.ll | 212 ++++++++++++++ .../RISCV/rvv/vreductions-fp-vp-bf16.ll | 167 +++++++++++ .../RISCV/rvv/vreductions-fp-vp-f16.ll | 269 ++++++++++++++++++ 7 files changed, 838 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f56097fdbb51a..2b8818482a333 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2913,7 +2913,9 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { } SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { - MVT VecVT = Node->getOperand(1).getSimpleValueType(); + bool IsVPOpcode = ISD::isVPOpcode(Node->getOpcode()); + MVT VecVT = IsVPOpcode ? Node->getOperand(1).getSimpleValueType() + : Node->getOperand(0).getSimpleValueType(); MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); MVT ScalarVT = Node->getSimpleValueType(0); MVT NewScalarVT = NewVecVT.getVectorElementType(); @@ -2921,16 +2923,13 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { SDLoc DL(Node); SmallVector Operands(Node->getNumOperands()); - // promote the initial value. // FIXME: Support integer. assert(Node->getOperand(0).getValueType().isFloatingPoint() && "Only FP promotion is supported"); - Operands[0] = - DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0)); - for (unsigned j = 1; j != Node->getNumOperands(); ++j) + for (unsigned j = 0; j != Node->getNumOperands(); ++j) if (Node->getOperand(j).getValueType().isVector() && - !(ISD::isVPOpcode(Node->getOpcode()) && + !(IsVPOpcode && ISD::getVPMaskIdx(Node->getOpcode()) == j)) { // Skip mask operand. // promote the vector operand. // FIXME: Support integer. @@ -2938,6 +2937,10 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { "Only FP promotion is supported"); Operands[j] = DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j)); + } else if (Node->getOperand(j).getValueType().isFloatingPoint()) { + // promote the initial value. + Operands[j] = + DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(j)); } else { Operands[j] = Node->getOperand(j); // Skip VL operand. } @@ -5049,7 +5052,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::SINT_TO_FP || Node->getOpcode() == ISD::SETCC || Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { + Node->getOpcode() == ISD::INSERT_VECTOR_ELT || + Node->getOpcode() == ISD::VECREDUCE_FMAX || + Node->getOpcode() == ISD::VECREDUCE_FMIN || + Node->getOpcode() == ISD::VECREDUCE_FMAXIMUM || + Node->getOpcode() == ISD::VECREDUCE_FMINIMUM) { OVT = Node->getOperand(0).getSimpleValueType(); } if (Node->getOpcode() == ISD::ATOMIC_STORE || @@ -5796,6 +5803,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); break; } + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + case ISD::VECREDUCE_FMAXIMUM: + case ISD::VECREDUCE_FMINIMUM: case ISD::VP_REDUCE_FMAX: case ISD::VP_REDUCE_FMIN: case ISD::VP_REDUCE_FMAXIMUM: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index de4447fb0cf1a..27bde7b96c857 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -503,13 +503,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: + case ISD::VECTOR_FIND_LAST_ACTIVE: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: case ISD::VECREDUCE_FMAXIMUM: case ISD::VECREDUCE_FMINIMUM: - case ISD::VECTOR_FIND_LAST_ACTIVE: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); + // Defer non-vector results to LegalizeDAG. + if (Action == TargetLowering::Promote) + Action = TargetLowering::Legal; break; case ISD::VECREDUCE_SEQ_FADD: case ISD::VECREDUCE_SEQ_FMUL: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6076fe56416ad..759cf531b74b1 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -959,13 +959,35 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // TODO: support more ops. static const unsigned ZvfhminZvfbfminPromoteOps[] = { - ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, - ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, - ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND, - ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS, - ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD, - ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, - ISD::STRICT_FMA}; + ISD::FMINNUM, + ISD::FMAXNUM, + ISD::FADD, + ISD::FSUB, + ISD::FMUL, + ISD::FMA, + ISD::FDIV, + ISD::FSQRT, + ISD::FCEIL, + ISD::FTRUNC, + ISD::FFLOOR, + ISD::FROUND, + ISD::FROUNDEVEN, + ISD::FRINT, + ISD::FNEARBYINT, + ISD::IS_FPCLASS, + ISD::SETCC, + ISD::FMAXIMUM, + ISD::FMINIMUM, + ISD::STRICT_FADD, + ISD::STRICT_FSUB, + ISD::STRICT_FMUL, + ISD::STRICT_FDIV, + ISD::STRICT_FSQRT, + ISD::STRICT_FMA, + ISD::VECREDUCE_FMIN, + ISD::VECREDUCE_FMAX, + ISD::VECREDUCE_FMINIMUM, + ISD::VECREDUCE_FMAXIMUM}; // TODO: support more vp ops. static const unsigned ZvfhminZvfbfminPromoteVPOps[] = { diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll new file mode 100644 index 0000000000000..1c42cd29deca9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define bfloat @vreduce_fmin_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmin_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fmin.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmax_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmax_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fmax.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmin_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmin_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fmin.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmax_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmax_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fmax.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fminimum_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fminimum_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: beqz a0, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fminimum.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmaximum_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmaximum_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: beqz a0, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fmaximum.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fminimum_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fminimum_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fminimum.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmaximum_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmaximum_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fmaximum.nxv4bf16( %val) + ret bfloat %s +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll new file mode 100644 index 0000000000000..e269b13137d44 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll @@ -0,0 +1,212 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN + +define half @vreduce_fmin_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmin_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmin_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fmin.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmax_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmax_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmax_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fmax.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmin_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmin_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmin_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fmin.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmax_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmax_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmax_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fmax.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fminimum_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fminimum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8 +; ZVFH-NEXT: vcpop.m a0, v9 +; ZVFH-NEXT: beqz a0, .LBB4_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB4_2: +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fminimum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10 +; ZVFHMIN-NEXT: vcpop.m a0, v8 +; ZVFHMIN-NEXT: beqz a0, .LBB4_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB4_2: +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fminimum.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmaximum_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmaximum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8 +; ZVFH-NEXT: vcpop.m a0, v9 +; ZVFH-NEXT: beqz a0, .LBB5_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB5_2: +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmaximum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10 +; ZVFHMIN-NEXT: vcpop.m a0, v8 +; ZVFHMIN-NEXT: beqz a0, .LBB5_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB5_2: +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fmaximum.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fminimum_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fminimum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fminimum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fminimum.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmaximum_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmaximum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmaximum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fmaximum.nxv4f16( %val) + ret half %s +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll new file mode 100644 index 0000000000000..37bd0a0496dcf --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define bfloat @vpreduce_fmin_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmin_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fmin.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmax_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmax_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fmax.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmin_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmin_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fmin.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmax_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmax_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fmax.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fminimum_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fminimum_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t +; CHECK-NEXT: feq.s a1, fa5, fa5 +; CHECK-NEXT: vcpop.m a2, v8, v0.t +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fminimum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmaximum_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmaximum_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t +; CHECK-NEXT: feq.s a1, fa5, fa5 +; CHECK-NEXT: vcpop.m a2, v8, v0.t +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fmaximum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fminimum_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fminimum_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fminimum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmaximum_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmaximum_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fmaximum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll new file mode 100644 index 0000000000000..8993bf8a767d8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll @@ -0,0 +1,269 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN + +define half @vpreduce_fmin_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmin_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmin_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fmin.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmax_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmax_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmax_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fmax.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmin_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmin_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmin_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fmin.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmax_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmax_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmax_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fmax.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fminimum_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fminimum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t +; ZVFH-NEXT: fcvt.s.h fa5, fa0 +; ZVFH-NEXT: vcpop.m a1, v9, v0.t +; ZVFH-NEXT: feq.s a2, fa5, fa5 +; ZVFH-NEXT: xori a2, a2, 1 +; ZVFH-NEXT: or a1, a1, a2 +; ZVFH-NEXT: beqz a1, .LBB4_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB4_2: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fminimum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: feq.s a1, fa5, fa5 +; ZVFHMIN-NEXT: vcpop.m a2, v8, v0.t +; ZVFHMIN-NEXT: xori a1, a1, 1 +; ZVFHMIN-NEXT: or a1, a2, a1 +; ZVFHMIN-NEXT: beqz a1, .LBB4_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB4_2: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fminimum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmaximum_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmaximum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t +; ZVFH-NEXT: fcvt.s.h fa5, fa0 +; ZVFH-NEXT: vcpop.m a1, v9, v0.t +; ZVFH-NEXT: feq.s a2, fa5, fa5 +; ZVFH-NEXT: xori a2, a2, 1 +; ZVFH-NEXT: or a1, a1, a2 +; ZVFH-NEXT: beqz a1, .LBB5_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB5_2: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmaximum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: feq.s a1, fa5, fa5 +; ZVFHMIN-NEXT: vcpop.m a2, v8, v0.t +; ZVFHMIN-NEXT: xori a1, a1, 1 +; ZVFHMIN-NEXT: or a1, a2, a1 +; ZVFHMIN-NEXT: beqz a1, .LBB5_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB5_2: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fmaximum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fminimum_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fminimum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fminimum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fminimum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmaximum_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmaximum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmaximum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fmaximum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} From dc785ea5bda15eb6da8814d341503a4f506dfc8d Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi Date: Fri, 28 Feb 2025 15:17:33 +0000 Subject: [PATCH 100/123] Fix fp8-init-list.c test failure (#129259) Fix error in fp8-init-list.c introduced by PR #126726 --- clang/test/CodeGen/AArch64/fp8-init-list.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/clang/test/CodeGen/AArch64/fp8-init-list.c b/clang/test/CodeGen/AArch64/fp8-init-list.c index 8b4b31a71c46a..872ee4f8a3d42 100644 --- a/clang/test/CodeGen/AArch64/fp8-init-list.c +++ b/clang/test/CodeGen/AArch64/fp8-init-list.c @@ -12,14 +12,14 @@ // CHECK-LABEL: define dso_local <8 x i8> @vector_init_test( // CHECK-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VECINIT14:%.*]] = shufflevector <1 x i8> [[X]], <1 x i8> poison, <8 x i32> zeroinitializer -// CHECK-NEXT: ret <8 x i8> [[VECINIT14]] +// CHECK-NEXT: [[VECINIT7:%.*]] = shufflevector <1 x i8> [[X]], <1 x i8> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i8> [[VECINIT7]] // // CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z16vector_init_testu6__mfp8( // CHECK-CXX-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[VECINIT14:%.*]] = shufflevector <1 x i8> [[X]], <1 x i8> poison, <8 x i32> zeroinitializer -// CHECK-CXX-NEXT: ret <8 x i8> [[VECINIT14]] +// CHECK-CXX-NEXT: [[VECINIT7:%.*]] = shufflevector <1 x i8> [[X]], <1 x i8> poison, <8 x i32> zeroinitializer +// CHECK-CXX-NEXT: ret <8 x i8> [[VECINIT7]] // mfloat8x8_t vector_init_test(__mfp8 x) { return (mfloat8x8_t) {x, x, x, x, x, x, x, x}; @@ -34,13 +34,15 @@ struct S s; // CHECK-LABEL: define dso_local void @f( // CHECK-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[X]], i64 0 +// CHECK-NEXT: store i8 [[TMP0]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z1fu6__mfp8( // CHECK-CXX-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] +// CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[X]], i64 0 +// CHECK-CXX-NEXT: store i8 [[TMP0]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] // CHECK-CXX-NEXT: ret void // void f(__mfp8 x) { From 6c4f9a60e0a031d227a2e89d168a5d9634dd97a6 Mon Sep 17 00:00:00 2001 From: RolandF77 <55763885+RolandF77@users.noreply.github.com> Date: Fri, 28 Feb 2025 10:25:07 -0500 Subject: [PATCH 101/123] [PowerPC] custom lower v1024i1 load/store (#126969) Support moving PPC dense math register values to and from storage with LLVM IR load/store. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 135 +++++++++++++++++++- llvm/lib/Target/PowerPC/PPCISelLowering.h | 2 + llvm/test/CodeGen/PowerPC/v1024ls.ll | 47 +++++++ 3 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/v1024ls.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d6c8e8d506799..91df5f467e59c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1355,10 +1355,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STORE, MVT::v256i1, Custom); } if (Subtarget.hasMMA()) { - if (Subtarget.isISAFuture()) + if (Subtarget.isISAFuture()) { addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass); - else + addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass); + setOperationAction(ISD::LOAD, MVT::v1024i1, Custom); + setOperationAction(ISD::STORE, MVT::v1024i1, Custom); + } else { addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass); + } setOperationAction(ISD::LOAD, MVT::v512i1, Custom); setOperationAction(ISD::STORE, MVT::v512i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom); @@ -11758,6 +11762,64 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, return Op; } +SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + LoadSDNode *LN = cast(Op.getNode()); + SDValue LoadChain = LN->getChain(); + SDValue BasePtr = LN->getBasePtr(); + EVT VT = Op.getValueType(); + + // Type v1024i1 is used for Dense Math dmr registers. + assert(VT == MVT::v1024i1 && "Unsupported type."); + assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) && + "Dense Math support required."); + assert(Subtarget.pairedVectorMemops() && "Vector pair support required."); + + SmallVector Loads; + SmallVector LoadChains; + SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32); + SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr}; + MachineMemOperand *MMO = LN->getMemOperand(); + unsigned NumVecs = VT.getSizeInBits() / 256; + for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { + MachineMemOperand *NewMMO = + DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32); + if (Idx > 0) { + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(32, dl, BasePtr.getValueType())); + LoadOps[2] = BasePtr; + } + SDValue Ld = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, + DAG.getVTList(MVT::v256i1, MVT::Other), + LoadOps, MVT::v256i1, NewMMO); + LoadChains.push_back(Ld.getValue(1)); + Loads.push_back(Ld); + } + + if (Subtarget.isLittleEndian()) { + std::reverse(Loads.begin(), Loads.end()); + std::reverse(LoadChains.begin(), LoadChains.end()); + } + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1, Loads[0], + Loads[1]), + 0); + SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32); + SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1, + Loads[2], Loads[3]), + 0); + SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32); + SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32); + const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub}; + SDValue Value = + SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0); + + SDValue RetOps[] = {Value, TF}; + return DAG.getMergeValues(RetOps, dl); +} + SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -11766,6 +11828,9 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, SDValue BasePtr = LN->getBasePtr(); EVT VT = Op.getValueType(); + if (VT == MVT::v1024i1) + return LowerDMFVectorLoad(Op, DAG); + if (VT != MVT::v256i1 && VT != MVT::v512i1) return Op; @@ -11803,6 +11868,69 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, return DAG.getMergeValues(RetOps, dl); } +SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc dl(Op); + StoreSDNode *SN = cast(Op.getNode()); + SDValue StoreChain = SN->getChain(); + SDValue BasePtr = SN->getBasePtr(); + SmallVector Values; + SmallVector Stores; + EVT VT = SN->getValue().getValueType(); + + // Type v1024i1 is used for Dense Math dmr registers. + assert(VT == MVT::v1024i1 && "Unsupported type."); + assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) && + "Dense Math support required."); + assert(Subtarget.pairedVectorMemops() && "Vector pair support required."); + + SDValue Lo( + DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, + Op.getOperand(1), + DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)), + 0); + SDValue Hi( + DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, + Op.getOperand(1), + DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)), + 0); + EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1}; + MachineSDNode *ExtNode = + DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo); + Values.push_back(SDValue(ExtNode, 0)); + Values.push_back(SDValue(ExtNode, 1)); + ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi); + Values.push_back(SDValue(ExtNode, 0)); + Values.push_back(SDValue(ExtNode, 1)); + + if (Subtarget.isLittleEndian()) + std::reverse(Values.begin(), Values.end()); + + SDVTList Tys = DAG.getVTList(MVT::Other); + SmallVector Ops{ + StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32), + Values[0], BasePtr}; + MachineMemOperand *MMO = SN->getMemOperand(); + unsigned NumVecs = VT.getSizeInBits() / 256; + for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { + MachineMemOperand *NewMMO = + DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32); + if (Idx > 0) { + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(32, dl, BasePtr.getValueType())); + Ops[3] = BasePtr; + } + Ops[2] = Values[Idx]; + SDValue St = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, + MVT::v256i1, NewMMO); + Stores.push_back(St); + } + + SDValue TF = DAG.getTokenFactor(dl, Stores); + return TF; +} + SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -11813,6 +11941,9 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SDValue Value2 = SN->getValue(); EVT StoreVT = Value.getValueType(); + if (StoreVT == MVT::v1024i1) + return LowerDMFVectorStore(Op, DAG); + if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1) return Op; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 514329bbe92d7..1f22aa16a89be 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1344,6 +1344,8 @@ namespace llvm { SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, diff --git a/llvm/test/CodeGen/PowerPC/v1024ls.ll b/llvm/test/CodeGen/PowerPC/v1024ls.ll new file mode 100644 index 0000000000000..c7f6911f9ddbc --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/v1024ls.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE + +define void @v1024ls(ptr nocapture readonly %vqp, ptr nocapture %resp) { +; CHECK-LABEL: v1024ls: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: stxvp vsp34, 96(r4) +; CHECK-NEXT: stxvp vsp36, 64(r4) +; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: stxvp vsp34, 32(r4) +; CHECK-NEXT: stxvp vsp36, 0(r4) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: v1024ls: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r4) +; CHECK-BE-NEXT: stxvp vsp34, 64(r4) +; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r4) +; CHECK-BE-NEXT: stxvp vsp34, 0(r4) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vqp, align 64 + store <1024 x i1> %0, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz() From 83458ae5f5b476528bef26ea83a3e97bfc4cb822 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 28 Feb 2025 09:25:20 -0500 Subject: [PATCH 102/123] [gn build] Port 15c49b9db3f6 --- llvm/utils/gn/secondary/clang/unittests/Frontend/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/Frontend/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Frontend/BUILD.gn index 7ea260f952618..931ca8b20abc5 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Frontend/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Frontend/BUILD.gn @@ -10,6 +10,7 @@ unittest("FrontendTests") { "//clang/lib/FrontendTool", "//clang/lib/Lex", "//clang/lib/Sema", + "//clang/lib/Tooling", "//llvm/lib/Support", "//llvm/lib/TargetParser", ] @@ -20,6 +21,7 @@ unittest("FrontendTests") { "CompilerInvocationTest.cpp", "FixedPointString.cpp", "FrontendActionTest.cpp", + "NoAlterCodeGenActionTest.cpp", "OutputStreamTest.cpp", "PCHPreambleTest.cpp", "ParsedSourceLocationTest.cpp", From 5d6486c5ef71106f7089837b6735e8d1d13148c9 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Fri, 28 Feb 2025 15:58:29 +0000 Subject: [PATCH 103/123] [lldb] Restore register state if PrepareTrivialCall fails (#129038) Fixes #124269 PrepareTrivalCall always had the possibility of failing, but given that it only wrote to general purpose registers, if it did, you had bigger problems. When it failed, we did not mark the thread plan valid and when it was torn down we didn't try to restore the register state. This meant that if you tried to continue, the program was unlikely to work. When I added AArch64 GCS support, I needed to handle the situation where the GCS pointer points to unmapped memory and we fail to write the extra entry we need. So I added code to restore the gcspr_el0 register specifically if this happened, and ordered the operations so that we tried this first. In this change I've made the teardown of an invalid thread plan restore the register state if one was saved. It may be there isn't one if ConstructorSetup fails, but this is ok because that function does not modify anything. Now that we're doing that, I don't need the GCS specific code anymore, and all thread plans are protected from this in the rare event something does fail. Testing is done by the existing GCS test case that points the gcspr into unmapped memory which causes PrepareTrivialCall to fail. I tried adding a simulated test using a mock gdb server. This was not possible because they all use DynamicLoaderStatic which disables all JIT features. --- lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp | 9 +-------- lldb/source/Target/ThreadPlanCallFunction.cpp | 13 ++++++++++++- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp b/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp index 280ec5ba37100..25803c9799ce4 100644 --- a/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp +++ b/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp @@ -102,12 +102,7 @@ static Status PushToLinuxGuardedControlStack(addr_t return_addr, size_t wrote = thread.GetProcess()->WriteMemory(gcspr_el0, &return_addr, sizeof(return_addr), error); if ((wrote != sizeof(return_addr) || error.Fail())) { - // When PrepareTrivialCall fails, the register context is not restored, - // unlike when an expression fails to execute. This is arguably a bug, - // see https://github.com/llvm/llvm-project/issues/124269. - // For now we are handling this here specifically. We can assume this - // write will work as the one to decrement the register did. - reg_ctx->WriteRegisterFromUnsigned(gcspr_el0_info, gcspr_el0 + 8); + // gcspr_el0 will be restored by the ThreadPlan's DoTakedown. return Status("Failed to write new Guarded Control Stack entry."); } @@ -150,8 +145,6 @@ bool ABISysV_arm64::PrepareTrivialCall(Thread &thread, addr_t sp, if (args.size() > 8) return false; - // Do this first, as it's got the most chance of failing (though still very - // low). if (GetProcessSP()->GetTarget().GetArchitecture().GetTriple().isOSLinux()) { Status err = PushToLinuxGuardedControlStack(return_addr, reg_ctx, thread); // If we could not manage the GCS, the expression will certainly fail, diff --git a/lldb/source/Target/ThreadPlanCallFunction.cpp b/lldb/source/Target/ThreadPlanCallFunction.cpp index 50dcb66b9719f..218111d4faf60 100644 --- a/lldb/source/Target/ThreadPlanCallFunction.cpp +++ b/lldb/source/Target/ThreadPlanCallFunction.cpp @@ -174,8 +174,20 @@ void ThreadPlanCallFunction::ReportRegisterState(const char *message) { void ThreadPlanCallFunction::DoTakedown(bool success) { Log *log = GetLog(LLDBLog::Step); + Thread &thread = GetThread(); if (!m_valid) { + // If ConstructorSetup was succesfull but PrepareTrivialCall was not, + // we will have a saved register state and potentially modified registers. + // Restore those. + if (m_stored_thread_state.register_backup_sp) + if (!thread.RestoreRegisterStateFromCheckpoint(m_stored_thread_state)) + LLDB_LOGF( + log, + "ThreadPlanCallFunction(%p): Failed to restore register state from " + "invalid plan that contained a saved register state.", + static_cast(this)); + // Don't call DoTakedown if we were never valid to begin with. LLDB_LOGF(log, "ThreadPlanCallFunction(%p): Log called on " @@ -185,7 +197,6 @@ void ThreadPlanCallFunction::DoTakedown(bool success) { } if (!m_takedown_done) { - Thread &thread = GetThread(); if (success) { SetReturnValue(); } From 63e615bb87343a61eda6ba6644141573f8739c80 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 28 Feb 2025 07:21:40 -0800 Subject: [PATCH 104/123] Reapply "[RISCV][TTI] Add shuffle costing for masked slide lowering (#128537)" With a fix for fully undef masks. These can't reach the lowering code, but can reach the costing code via e.g. SLP. This change adds the TTI costing corresponding to the recently added isMaskedSlidePair lowering for vector shuffles. However, since the existing costing code hadn't covered either slideup, slidedown, or the (now removed) isElementRotate, the impact is larger in scope than just that new lowering. --------- Co-authored-by: Alexey Bataev Co-authored-by: Luke Lau --- llvm/include/llvm/Analysis/VectorUtils.h | 9 + llvm/lib/Analysis/VectorUtils.cpp | 30 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 41 +- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 72 +- .../Target/RISCV/RISCVTargetTransformInfo.h | 6 + .../CostModel/RISCV/shuffle-exact-vlen.ll | 12 +- .../RISCV/shuffle-extract_subvector.ll | 2 +- .../CostModel/RISCV/shuffle-transpose.ll | 64 +- .../SLPVectorizer/RISCV/complex-loads.ll | 744 +++--------------- .../SLPVectorizer/RISCV/reductions.ll | 24 +- 10 files changed, 303 insertions(+), 701 deletions(-) diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index f21594c557e0e..4390b45f1f730 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -203,6 +203,15 @@ bool getShuffleDemandedElts(int SrcWidth, ArrayRef Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts = false); +/// Does this shuffle mask represent either one slide shuffle or a pair of +/// two slide shuffles, combined with a select on some constant vector mask? +/// A slide is a shuffle mask which shifts some set of elements up or down +/// the vector, with all other elements being undefined. An identity shuffle +/// will be matched a slide by 0. The output parameter provides the source +/// (-1 means no source), and slide direction for each slide. +bool isMaskedSlidePair(ArrayRef Mask, int NumElts, + std::array, 2> &SrcInfo); + /// Replace each shuffle mask index with the scaled sequential indices for an /// equivalent mask of narrowed elements. Mask elements that are less than 0 /// (sentinel values) are repeated in the output mask. diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index ede0fca4d51b0..53150684b4e4a 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -419,6 +419,36 @@ bool llvm::getShuffleDemandedElts(int SrcWidth, ArrayRef Mask, return true; } +bool llvm::isMaskedSlidePair(ArrayRef Mask, int NumElts, + std::array, 2> &SrcInfo) { + const int SignalValue = NumElts * 2; + SrcInfo[0] = {-1, SignalValue}; + SrcInfo[1] = {-1, SignalValue}; + for (auto [i, M] : enumerate(Mask)) { + if (M < 0) + continue; + int Src = M >= (int)NumElts; + int Diff = (int)i - (M % NumElts); + bool Match = false; + for (int j = 0; j < 2; j++) { + auto &[SrcE, DiffE] = SrcInfo[j]; + if (SrcE == -1) { + assert(DiffE == SignalValue); + SrcE = Src; + DiffE = Diff; + } + if (SrcE == Src && DiffE == Diff) { + Match = true; + break; + } + } + if (!Match) + return false; + } + // Avoid all undef masks + return SrcInfo[0].first != -1; +} + void llvm::narrowShuffleMaskElts(int Scale, ArrayRef Mask, SmallVectorImpl &ScaledMask) { assert(Scale > 0 && "Unexpected scaling factor"); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 759cf531b74b1..4e6b3a224b79b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4584,32 +4584,9 @@ static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, int &EvenSrc, /// Is this mask representing a masked combination of two slides? static bool isMaskedSlidePair(ArrayRef Mask, - std::pair SrcInfo[2]) { - int NumElts = Mask.size(); - int SignalValue = NumElts * 2; - SrcInfo[0] = {-1, SignalValue}; - SrcInfo[1] = {-1, SignalValue}; - for (unsigned i = 0; i != Mask.size(); ++i) { - int M = Mask[i]; - if (M < 0) - continue; - int Src = M >= (int)NumElts; - int Diff = (int)i - (M % NumElts); - bool Match = false; - for (int j = 0; j < 2; j++) { - if (SrcInfo[j].first == -1) { - assert(SrcInfo[j].second == SignalValue); - SrcInfo[j].first = Src; - SrcInfo[j].second = Diff; - } - if (SrcInfo[j].first == Src && SrcInfo[j].second == Diff) { - Match = true; - break; - } - } - if (!Match) - return false; - } + std::array, 2> &SrcInfo) { + if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo)) + return false; // Avoid matching vselect idioms if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0) @@ -4625,7 +4602,8 @@ static bool isMaskedSlidePair(ArrayRef Mask, // Exactly matches the semantics of a previously existing custom matcher // to allow migration to new matcher without changing output. -static bool isElementRotate(std::pair SrcInfo[2], unsigned NumElts) { +static bool isElementRotate(std::array, 2> &SrcInfo, + unsigned NumElts) { if (SrcInfo[1].first == -1) return true; return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 && @@ -5626,10 +5604,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // without masking. Avoid matching bit rotates (which are not also element // rotates) as slide pairs. This is a performance heuristic, not a // functional check. - std::pair SrcInfo[2]; + std::array, 2> SrcInfo; unsigned RotateAmt; MVT RotateVT; - if (isMaskedSlidePair(Mask, SrcInfo) && + if (::isMaskedSlidePair(Mask, SrcInfo) && (isElementRotate(SrcInfo, NumElts) || !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) { SDValue Sources[2]; @@ -5986,10 +5964,11 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { if (SVT.getScalarType() == MVT::i1) return false; - std::pair SrcInfo[2]; + std::array, 2> SrcInfo; int Dummy1, Dummy2; return ShuffleVectorInst::isReverseMask(M, NumElts) || - (isMaskedSlidePair(M, SrcInfo) && isElementRotate(SrcInfo, NumElts)) || + (::isMaskedSlidePair(M, SrcInfo) && + isElementRotate(SrcInfo, NumElts)) || isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index cde643a250be1..6005c067428eb 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -475,6 +475,64 @@ costShuffleViaVRegSplitting(RISCVTTIImpl &TTI, MVT LegalVT, return InstructionCost::getInvalid(); } +InstructionCost RISCVTTIImpl::getSlideCost(FixedVectorType *Tp, + ArrayRef Mask, + TTI::TargetCostKind CostKind) { + // Avoid missing masks and length changing shuffles + if (Mask.size() <= 2 || Mask.size() != Tp->getNumElements()) + return InstructionCost::getInvalid(); + + int NumElts = Tp->getNumElements(); + std::pair LT = getTypeLegalizationCost(Tp); + // Avoid scalarization cases + if (!LT.second.isFixedLengthVector()) + return InstructionCost::getInvalid(); + + // Requires moving elements between parts, which requires additional + // unmodeled instructions. + if (LT.first != 1) + return InstructionCost::getInvalid(); + + auto GetSlideOpcode = [&](int SlideAmt) { + assert(SlideAmt != 0); + bool IsVI = isUInt<5>(std::abs(SlideAmt)); + if (SlideAmt < 0) + return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX; + return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX; + }; + + std::array, 2> SrcInfo; + if (!isMaskedSlidePair(Mask, NumElts, SrcInfo)) + return InstructionCost::getInvalid(); + + if (SrcInfo[1].second == 0) + std::swap(SrcInfo[0], SrcInfo[1]); + + InstructionCost FirstSlideCost = 0; + if (SrcInfo[0].second != 0) { + unsigned Opcode = GetSlideOpcode(SrcInfo[0].second); + FirstSlideCost = getRISCVInstructionCost(Opcode, LT.second, CostKind); + } + + if (SrcInfo[1].first == -1) + return FirstSlideCost; + + InstructionCost SecondSlideCost = 0; + if (SrcInfo[1].second != 0) { + unsigned Opcode = GetSlideOpcode(SrcInfo[1].second); + SecondSlideCost = getRISCVInstructionCost(Opcode, LT.second, CostKind); + } else { + SecondSlideCost = + getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, CostKind); + } + + auto EC = Tp->getElementCount(); + VectorType *MaskTy = + VectorType::get(IntegerType::getInt1Ty(Tp->getContext()), EC); + InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); + return FirstSlideCost + SecondSlideCost + MaskCost; +} + InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, @@ -487,8 +545,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // First, handle cases where having a fixed length vector enables us to // give a more accurate cost than falling back to generic scalable codegen. // TODO: Each of these cases hints at a modeling gap around scalable vectors. - if (ST->hasVInstructions() && isa(Tp) && - LT.second.isFixedLengthVector()) { + if (auto *FVTp = dyn_cast(Tp); + FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) { InstructionCost VRegSplittingCost = costShuffleViaVRegSplitting( *this, LT.second, ST->getRealVLen(), Tp, Mask, CostKind); if (VRegSplittingCost.isValid()) @@ -544,6 +602,11 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return Cost; } } + + if (InstructionCost SlideCost = getSlideCost(FVTp, Mask, CostKind); + SlideCost.isValid()) + return SlideCost; + // vrgather + cost of generating the mask constant. // We model this for an unknown mask with a single vrgather. if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 || @@ -558,6 +621,11 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, } case TTI::SK_Transpose: case TTI::SK_PermuteTwoSrc: { + + if (InstructionCost SlideCost = getSlideCost(FVTp, Mask, CostKind); + SlideCost.isValid()) + return SlideCost; + // 2 x (vrgather + cost of generating the mask constant) + cost of mask // register for the second vrgather. We model this for an unknown // (shuffle) mask. diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 134a7333b9b06..3f57560d3c127 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -63,6 +63,12 @@ class RISCVTTIImpl : public BasicTTIImplBase { /// type. InstructionCost getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind); + + /// If this shuffle can be lowered as a masked slide pair (at worst), + /// return a cost for it. + InstructionCost getSlideCost(FixedVectorType *Tp, ArrayRef Mask, + TTI::TargetCostKind CostKind); + public: explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F) : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll index c951184a31731..06c709e4cc879 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-exact-vlen.ll @@ -186,7 +186,7 @@ define void @insert_subvec() vscale_range(2,2) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_1 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_3 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'insert_subvec' @@ -225,7 +225,7 @@ define void @insert_subvec() vscale_range(2,2) { ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_1 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_4_3 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i32_4_05 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v4i8_2_0 = shufflevector <4 x i8> poison, <4 x i8> poison, <4 x i32> @@ -737,8 +737,8 @@ define void @multipart() vscale_range(2,2) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16c = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16d = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32idrev = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v32many = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v32many2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> @@ -757,8 +757,8 @@ define void @multipart() vscale_range(2,2) { ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16c = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16d = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32a = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v32a4 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32idrev = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32many = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32many2 = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll index e8dd30345cc76..d2bfb61a11b00 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll @@ -19,7 +19,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; VLEN128-LABEL: 'test_vXf64' diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll index 8f784a07d3124..ef069fee8526e 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll @@ -10,11 +10,11 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: 'trn1.v8i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %tmp0 ; ; SIZE-LABEL: 'trn1.v8i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %tmp0 ; %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> @@ -23,11 +23,11 @@ define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: 'trn2.v8i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %tmp0 ; ; SIZE-LABEL: 'trn2.v8i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %tmp0 ; %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> @@ -36,11 +36,11 @@ define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: 'trn1.v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %tmp0 ; ; SIZE-LABEL: 'trn1.v16i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %tmp0 ; %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> @@ -49,11 +49,11 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: 'trn2.v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %tmp0 ; ; SIZE-LABEL: 'trn2.v16i8' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %tmp0 ; %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> @@ -62,11 +62,11 @@ define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: 'trn1.v4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %tmp0 ; ; SIZE-LABEL: 'trn1.v4i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %tmp0 ; %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> @@ -75,11 +75,11 @@ define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: 'trn2.v4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %tmp0 ; ; SIZE-LABEL: 'trn2.v4i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %tmp0 ; %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> @@ -88,11 +88,11 @@ define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: 'trn1.v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %tmp0 ; ; SIZE-LABEL: 'trn1.v8i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %tmp0 ; %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> @@ -101,11 +101,11 @@ define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: 'trn2.v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %tmp0 ; ; SIZE-LABEL: 'trn2.v8i16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %tmp0 ; %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> @@ -140,11 +140,11 @@ define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: 'trn1.v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %tmp0 ; ; SIZE-LABEL: 'trn1.v4i32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %tmp0 ; %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> @@ -153,11 +153,11 @@ define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: 'trn2.v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %tmp0 ; ; SIZE-LABEL: 'trn2.v4i32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %tmp0 ; %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> @@ -218,11 +218,11 @@ define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: 'trn1.v4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %tmp0 ; ; SIZE-LABEL: 'trn1.v4f32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %tmp0 ; %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> @@ -231,11 +231,11 @@ define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: 'trn2.v4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %tmp0 ; ; SIZE-LABEL: 'trn2.v4f32' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %tmp0 ; %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> @@ -270,11 +270,11 @@ define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: 'trn1.v4f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %tmp0 ; ; SIZE-LABEL: 'trn1.v4f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x half> %tmp0 ; %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> @@ -283,11 +283,11 @@ define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: 'trn2.v4f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %tmp0 ; ; SIZE-LABEL: 'trn2.v4f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x half> %tmp0 ; %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> @@ -296,11 +296,11 @@ define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: 'trn1.v8f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x half> %tmp0 ; ; SIZE-LABEL: 'trn1.v8f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x half> %tmp0 ; %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> @@ -309,11 +309,11 @@ define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: 'trn2.v8f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x half> %tmp0 ; ; SIZE-LABEL: 'trn2.v8f16' -; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x half> %tmp0 ; %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index 11fa3337544a1..18acae5835724 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -6,663 +6,175 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-LABEL: define i32 @test( ; CHECK-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[PIX1]], align 1 -; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[TMP0]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PIX1]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[PIX2]], i64 4 -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[PIX1]], i64 1 -; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr i8, ptr [[PIX1]], i64 3 -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1 -; CHECK-NEXT: [[CONV33:%.*]] = zext i8 [[TMP10]] to i32 ; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]] -; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ADD_PTR3]], align 1 -; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP11]] to i32 ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 4 -; CHECK-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 1 -; CHECK-NEXT: [[ARRAYIDX27_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 3 -; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX27_1]], align 1 -; CHECK-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP31:%.*]] = zext <2 x i8> [[TMP22]] to <2 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = sub <2 x i32> [[TMP21]], [[TMP31]] -; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 -; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP49]] to <2 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP50:%.*]] = zext <2 x i8> [[TMP27]] to <2 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = sub <2 x i32> [[TMP26]], [[TMP50]] -; CHECK-NEXT: [[TMP25:%.*]] = shl <2 x i32> [[TMP24]], splat (i32 16) -; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP25]], [[TMP23]] -; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32> -; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP57:%.*]] = zext <2 x i8> [[TMP56]] to <2 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP51]], [[TMP57]] -; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP39:%.*]] = zext <2 x i8> [[TMP38]] to <2 x i32> -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP61:%.*]] = zext <2 x i8> [[TMP40]] to <2 x i32> -; CHECK-NEXT: [[TMP36:%.*]] = sub <2 x i32> [[TMP39]], [[TMP61]] -; CHECK-NEXT: [[TMP37:%.*]] = shl <2 x i32> [[TMP36]], splat (i32 16) -; CHECK-NEXT: [[TMP42:%.*]] = add <2 x i32> [[TMP37]], [[TMP35]] -; CHECK-NEXT: [[TMP34:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]] -; CHECK-NEXT: [[TMP44:%.*]] = sub <2 x i32> [[TMP30]], [[TMP42]] -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP34]], i32 0 -; CHECK-NEXT: [[CONV_2:%.*]] = extractelement <2 x i32> [[TMP34]], i32 1 -; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[CONV_2]], [[TMP43]] -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP44]], i32 0 -; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i32> [[TMP44]], i32 1 -; CHECK-NEXT: [[ADD55_2:%.*]] = add i32 [[TMP47]], [[TMP46]] ; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4 -; CHECK-NEXT: [[TMP53:%.*]] = load <2 x i8>, ptr null, align 1 ; CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr null, align 1 -; CHECK-NEXT: [[TMP62:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> -; CHECK-NEXT: [[TMP77:%.*]] = zext i8 [[TMP52]] to i32 -; CHECK-NEXT: [[TMP54:%.*]] = load <2 x i8>, ptr null, align 1 -; CHECK-NEXT: [[TMP55:%.*]] = zext <2 x i8> [[TMP54]] to <2 x i32> -; CHECK-NEXT: [[TMP59:%.*]] = sub <2 x i32> [[TMP62]], [[TMP55]] -; CHECK-NEXT: [[TMP41:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) -; CHECK-NEXT: [[TMP58:%.*]] = zext <2 x i8> [[TMP41]] to <2 x i32> -; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <2 x i32> [[TMP58]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP63:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[TMP76:%.*]] = zext <2 x i8> [[TMP63]] to <2 x i32> -; CHECK-NEXT: [[TMP81:%.*]] = sub <2 x i32> [[TMP48]], [[TMP76]] -; CHECK-NEXT: [[TMP167:%.*]] = shl <2 x i32> [[TMP81]], splat (i32 16) -; CHECK-NEXT: [[TMP75:%.*]] = add <2 x i32> [[TMP167]], [[TMP59]] -; CHECK-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 -; CHECK-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 -; CHECK-NEXT: [[ARRAYIDX27_3:%.*]] = getelementptr i8, ptr null, i64 6 -; CHECK-NEXT: [[TMP64:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_3]], align 1 -; CHECK-NEXT: [[TMP79:%.*]] = zext <2 x i8> [[TMP64]] to <2 x i32> -; CHECK-NEXT: [[TMP82:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 -; CHECK-NEXT: [[TMP91:%.*]] = zext <2 x i8> [[TMP82]] to <2 x i32> -; CHECK-NEXT: [[TMP65:%.*]] = sub <2 x i32> [[TMP79]], [[TMP91]] -; CHECK-NEXT: [[TMP170:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) -; CHECK-NEXT: [[TMP171:%.*]] = zext <2 x i8> [[TMP170]] to <2 x i32> -; CHECK-NEXT: [[TMP172:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 -; CHECK-NEXT: [[TMP173:%.*]] = zext <2 x i8> [[TMP172]] to <2 x i32> -; CHECK-NEXT: [[TMP66:%.*]] = sub <2 x i32> [[TMP171]], [[TMP173]] -; CHECK-NEXT: [[TMP67:%.*]] = shl <2 x i32> [[TMP66]], splat (i32 16) -; CHECK-NEXT: [[TMP69:%.*]] = add <2 x i32> [[TMP67]], [[TMP65]] -; CHECK-NEXT: [[TMP176:%.*]] = extractelement <2 x i32> [[TMP75]], i32 0 -; CHECK-NEXT: [[TMP197:%.*]] = extractelement <2 x i32> [[TMP75]], i32 1 -; CHECK-NEXT: [[SUB59:%.*]] = add i32 [[TMP197]], [[TMP176]] -; CHECK-NEXT: [[SUB45_3:%.*]] = sub i32 [[TMP176]], [[TMP197]] -; CHECK-NEXT: [[ADD112_2:%.*]] = extractelement <2 x i32> [[TMP69]], i32 0 -; CHECK-NEXT: [[XOR_I63_2:%.*]] = extractelement <2 x i32> [[TMP69]], i32 1 -; CHECK-NEXT: [[SUB59_1:%.*]] = add i32 [[XOR_I63_2]], [[ADD112_2]] -; CHECK-NEXT: [[SUB47_3:%.*]] = sub i32 [[ADD112_2]], [[XOR_I63_2]] -; CHECK-NEXT: [[ADD94:%.*]] = add i32 [[SUB59_1]], [[SUB59]] -; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <2 x i32> [[TMP34]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i32> [[TMP70]], i32 [[SUB59]], i32 0 -; CHECK-NEXT: [[TMP72:%.*]] = insertelement <2 x i32> [[TMP34]], i32 [[SUB59_1]], i32 0 -; CHECK-NEXT: [[TMP222:%.*]] = sub <2 x i32> [[TMP71]], [[TMP72]] -; CHECK-NEXT: [[ADD55_3:%.*]] = add i32 [[SUB47_3]], [[SUB45_3]] -; CHECK-NEXT: [[TMP74:%.*]] = shufflevector <2 x i32> [[TMP44]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP78:%.*]] = insertelement <2 x i32> [[TMP74]], i32 [[SUB45_3]], i32 0 -; CHECK-NEXT: [[TMP80:%.*]] = insertelement <2 x i32> [[TMP44]], i32 [[SUB47_3]], i32 0 -; CHECK-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP78]], [[TMP80]] -; CHECK-NEXT: [[ADD95:%.*]] = add i32 [[ADD94]], [[ADD48_2]] -; CHECK-NEXT: [[SUB86_3:%.*]] = sub i32 [[ADD48_2]], [[ADD94]] -; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP77]], 15 -; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537 -; CHECK-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535 -; CHECK-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[CONV_2]], 15 -; CHECK-NEXT: [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537 -; CHECK-NEXT: [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535 -; CHECK-NEXT: [[TMP86:%.*]] = extractelement <2 x i32> [[TMP222]], i32 0 -; CHECK-NEXT: [[TMP87:%.*]] = extractelement <2 x i32> [[TMP222]], i32 1 -; CHECK-NEXT: [[ADD94_3:%.*]] = add i32 [[TMP86]], [[TMP87]] -; CHECK-NEXT: [[ADD112_1:%.*]] = sub i32 [[TMP87]], [[TMP86]] -; CHECK-NEXT: [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15 -; CHECK-NEXT: [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537 -; CHECK-NEXT: [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535 -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <2 x i32> [[TMP85]], i32 0 -; CHECK-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP85]], i32 1 -; CHECK-NEXT: [[ADD94_4:%.*]] = add i32 [[TMP88]], [[TMP89]] -; CHECK-NEXT: [[SUB102_3:%.*]] = sub i32 [[TMP89]], [[TMP88]] -; CHECK-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[CONV1]], 15 -; CHECK-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 -; CHECK-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 -; CHECK-NEXT: [[TMP90:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1 -; CHECK-NEXT: [[TMP102:%.*]] = zext <2 x i8> [[TMP90]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr null, align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1 ; CHECK-NEXT: [[TMP92:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 -; CHECK-NEXT: [[TMP93:%.*]] = shufflevector <4 x i8> [[TMP92]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP94:%.*]] = zext <2 x i8> [[TMP93]] to <2 x i32> ; CHECK-NEXT: [[TMP95:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP97:%.*]] = zext <2 x i8> [[TMP96]] to <2 x i32> ; CHECK-NEXT: [[TMP98:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <4 x i8> [[TMP98]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP100:%.*]] = zext <2 x i8> [[TMP99]] to <2 x i32> -; CHECK-NEXT: [[TMP101:%.*]] = sub <2 x i32> [[TMP97]], [[TMP100]] -; CHECK-NEXT: [[TMP224:%.*]] = shl <2 x i32> [[TMP101]], splat (i32 16) -; CHECK-NEXT: [[TMP103:%.*]] = shufflevector <4 x i8> [[TMP92]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP104:%.*]] = zext <2 x i8> [[TMP103]] to <2 x i32> -; CHECK-NEXT: [[TMP105:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP106:%.*]] = zext <2 x i8> [[TMP105]] to <2 x i32> -; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <4 x i8> [[TMP98]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP108:%.*]] = zext <2 x i8> [[TMP107]] to <2 x i32> -; CHECK-NEXT: [[TMP109:%.*]] = sub <2 x i32> [[TMP106]], [[TMP108]] -; CHECK-NEXT: [[TMP110:%.*]] = shl <2 x i32> [[TMP109]], splat (i32 16) -; CHECK-NEXT: [[TMP111:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV33]], i32 1 -; CHECK-NEXT: [[TMP112:%.*]] = sub <2 x i32> [[TMP111]], [[TMP104]] -; CHECK-NEXT: [[TMP113:%.*]] = add <2 x i32> [[TMP110]], [[TMP112]] -; CHECK-NEXT: [[TMP114:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV1]], i32 0 -; CHECK-NEXT: [[TMP115:%.*]] = sub <2 x i32> [[TMP114]], [[TMP94]] -; CHECK-NEXT: [[TMP116:%.*]] = add <2 x i32> [[TMP224]], [[TMP115]] -; CHECK-NEXT: [[TMP117:%.*]] = shufflevector <2 x i32> [[TMP113]], <2 x i32> [[TMP116]], <2 x i32> -; CHECK-NEXT: [[TMP126:%.*]] = add <2 x i32> [[TMP113]], [[TMP116]] -; CHECK-NEXT: [[TMP119:%.*]] = sub <2 x i32> [[TMP116]], [[TMP113]] -; CHECK-NEXT: [[TMP120:%.*]] = extractelement <2 x i32> [[TMP126]], i32 0 -; CHECK-NEXT: [[TMP127:%.*]] = extractelement <2 x i32> [[TMP126]], i32 1 -; CHECK-NEXT: [[ADD48:%.*]] = add i32 [[TMP127]], [[TMP120]] -; CHECK-NEXT: [[TMP166:%.*]] = sub i32 [[TMP120]], [[TMP127]] -; CHECK-NEXT: [[TMP128:%.*]] = extractelement <2 x i32> [[TMP119]], i32 0 -; CHECK-NEXT: [[TMP129:%.*]] = extractelement <2 x i32> [[TMP119]], i32 1 -; CHECK-NEXT: [[ADD55:%.*]] = add i32 [[TMP129]], [[TMP128]] -; CHECK-NEXT: [[SUB60:%.*]] = sub i32 [[TMP128]], [[TMP129]] -; CHECK-NEXT: [[SHR_I59:%.*]] = lshr i32 [[TMP127]], 15 -; CHECK-NEXT: [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537 -; CHECK-NEXT: [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535 -; CHECK-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[TMP129]], 15 -; CHECK-NEXT: [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537 -; CHECK-NEXT: [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535 -; CHECK-NEXT: [[TMP130:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 -; CHECK-NEXT: [[TMP131:%.*]] = zext <2 x i8> [[TMP130]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1 ; CHECK-NEXT: [[TMP132:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 -; CHECK-NEXT: [[TMP133:%.*]] = shufflevector <4 x i8> [[TMP132]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP133]] to <2 x i32> ; CHECK-NEXT: [[TMP135:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 -; CHECK-NEXT: [[TMP136:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP137:%.*]] = zext <2 x i8> [[TMP136]] to <2 x i32> ; CHECK-NEXT: [[TMP138:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; CHECK-NEXT: [[TMP139:%.*]] = shufflevector <4 x i8> [[TMP138]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP140:%.*]] = zext <2 x i8> [[TMP139]] to <2 x i32> -; CHECK-NEXT: [[TMP141:%.*]] = sub <2 x i32> [[TMP137]], [[TMP140]] -; CHECK-NEXT: [[TMP142:%.*]] = shl <2 x i32> [[TMP141]], splat (i32 16) -; CHECK-NEXT: [[TMP143:%.*]] = shufflevector <4 x i8> [[TMP132]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP144:%.*]] = zext <2 x i8> [[TMP143]] to <2 x i32> -; CHECK-NEXT: [[TMP145:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP146:%.*]] = zext <2 x i8> [[TMP145]] to <2 x i32> -; CHECK-NEXT: [[TMP147:%.*]] = shufflevector <4 x i8> [[TMP138]], <4 x i8> poison, <2 x i32> -; CHECK-NEXT: [[TMP148:%.*]] = zext <2 x i8> [[TMP147]] to <2 x i32> -; CHECK-NEXT: [[TMP149:%.*]] = sub <2 x i32> [[TMP146]], [[TMP148]] -; CHECK-NEXT: [[TMP150:%.*]] = shl <2 x i32> [[TMP149]], splat (i32 16) -; CHECK-NEXT: [[TMP151:%.*]] = insertelement <2 x i32> [[TMP131]], i32 [[CONV33_1]], i32 1 -; CHECK-NEXT: [[TMP225:%.*]] = sub <2 x i32> [[TMP151]], [[TMP144]] -; CHECK-NEXT: [[TMP153:%.*]] = add <2 x i32> [[TMP150]], [[TMP225]] -; CHECK-NEXT: [[TMP154:%.*]] = insertelement <2 x i32> [[TMP131]], i32 [[CONV_1]], i32 0 -; CHECK-NEXT: [[TMP155:%.*]] = sub <2 x i32> [[TMP154]], [[TMP134]] -; CHECK-NEXT: [[TMP156:%.*]] = add <2 x i32> [[TMP142]], [[TMP155]] -; CHECK-NEXT: [[TMP157:%.*]] = add <2 x i32> [[TMP153]], [[TMP156]] -; CHECK-NEXT: [[TMP158:%.*]] = sub <2 x i32> [[TMP156]], [[TMP153]] -; CHECK-NEXT: [[TMP159:%.*]] = extractelement <2 x i32> [[TMP157]], i32 0 -; CHECK-NEXT: [[TMP160:%.*]] = extractelement <2 x i32> [[TMP157]], i32 1 -; CHECK-NEXT: [[ADD48_1:%.*]] = add i32 [[TMP160]], [[TMP159]] -; CHECK-NEXT: [[SUB51_1:%.*]] = sub i32 [[TMP159]], [[TMP160]] -; CHECK-NEXT: [[TMP161:%.*]] = extractelement <2 x i32> [[TMP158]], i32 0 -; CHECK-NEXT: [[TMP162:%.*]] = extractelement <2 x i32> [[TMP158]], i32 1 -; CHECK-NEXT: [[ADD55_1:%.*]] = add i32 [[TMP162]], [[TMP161]] -; CHECK-NEXT: [[SUB59_2:%.*]] = sub i32 [[TMP161]], [[TMP162]] -; CHECK-NEXT: [[SHR_I54:%.*]] = lshr i32 [[TMP160]], 15 -; CHECK-NEXT: [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537 -; CHECK-NEXT: [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535 -; CHECK-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[TMP162]], 15 -; CHECK-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 -; CHECK-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 -; CHECK-NEXT: [[TMP163:%.*]] = lshr <2 x i32> [[TMP131]], splat (i32 15) -; CHECK-NEXT: [[TMP164:%.*]] = and <2 x i32> [[TMP163]], splat (i32 65537) -; CHECK-NEXT: [[TMP165:%.*]] = mul <2 x i32> [[TMP164]], splat (i32 65535) -; CHECK-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD48]] -; CHECK-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_1]] -; CHECK-NEXT: [[ADD103:%.*]] = add i32 [[ADD95]], [[ADD78]] -; CHECK-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD95]] -; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB86_3]], [[SUB86]] -; CHECK-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB86_3]] -; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I_1]], [[ADD103]] -; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP77]] -; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51_1]], [[ADD105]] -; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[CONV_2]] -; CHECK-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]] -; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP160]] -; CHECK-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]] -; CHECK-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP127]] -; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] -; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] -; CHECK-NEXT: [[ADD105_3:%.*]] = add i32 [[ADD112]], [[XOR_I63]] -; CHECK-NEXT: [[TMP169:%.*]] = load <2 x i8>, ptr [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[TMP181:%.*]] = zext <2 x i8> [[TMP169]] to <2 x i32> -; CHECK-NEXT: [[TMP152:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_2]], i32 0 -; CHECK-NEXT: [[TMP182:%.*]] = shufflevector <2 x i32> [[TMP152]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP183:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_3]], i32 0 -; CHECK-NEXT: [[TMP184:%.*]] = shufflevector <2 x i32> [[TMP183]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP191:%.*]] = sub <2 x i32> [[TMP182]], [[TMP184]] -; CHECK-NEXT: [[TMP192:%.*]] = add <2 x i32> [[TMP182]], [[TMP184]] -; CHECK-NEXT: [[TMP194:%.*]] = shufflevector <2 x i32> [[TMP191]], <2 x i32> [[TMP192]], <2 x i32> -; CHECK-NEXT: [[TMP195:%.*]] = lshr <2 x i32> [[TMP181]], splat (i32 15) -; CHECK-NEXT: [[TMP196:%.*]] = and <2 x i32> [[TMP195]], splat (i32 65537) -; CHECK-NEXT: [[TMP198:%.*]] = mul <2 x i32> [[TMP196]], splat (i32 65535) -; CHECK-NEXT: [[TMP202:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55]], i32 0 -; CHECK-NEXT: [[TMP203:%.*]] = shufflevector <2 x i32> [[TMP202]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP205:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_1]], i32 0 -; CHECK-NEXT: [[TMP206:%.*]] = shufflevector <2 x i32> [[TMP205]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP207:%.*]] = sub <2 x i32> [[TMP203]], [[TMP206]] -; CHECK-NEXT: [[TMP210:%.*]] = add <2 x i32> [[TMP203]], [[TMP206]] -; CHECK-NEXT: [[TMP168:%.*]] = shufflevector <2 x i32> [[TMP207]], <2 x i32> [[TMP210]], <2 x i32> -; CHECK-NEXT: [[ADD94_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 1 -; CHECK-NEXT: [[ADD78_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 1 -; CHECK-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]] -; CHECK-NEXT: [[TMP220:%.*]] = add <2 x i32> [[TMP194]], [[TMP168]] -; CHECK-NEXT: [[SUB102_1:%.*]] = extractelement <2 x i32> [[TMP194]], i32 0 -; CHECK-NEXT: [[SUB86_1:%.*]] = extractelement <2 x i32> [[TMP168]], i32 0 -; CHECK-NEXT: [[TMP174:%.*]] = shufflevector <2 x i32> [[TMP168]], <2 x i32> [[TMP194]], <2 x i32> -; CHECK-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]] -; CHECK-NEXT: [[TMP175:%.*]] = add <2 x i32> [[TMP198]], [[TMP220]] -; CHECK-NEXT: [[TMP221:%.*]] = xor <2 x i32> [[TMP175]], [[TMP181]] -; CHECK-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]] -; CHECK-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[TMP162]] -; CHECK-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]] -; CHECK-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP129]] -; CHECK-NEXT: [[XOR_I53_1:%.*]] = extractelement <2 x i32> [[TMP221]], i32 0 -; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD105_3]] -; CHECK-NEXT: [[XOR_I_1:%.*]] = extractelement <2 x i32> [[TMP221]], i32 1 -; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]] -; CHECK-NEXT: [[ADD112_5:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]] -; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_5]], [[XOR_I63_1]] -; CHECK-NEXT: [[ADD78_3:%.*]] = add i32 [[SUB51_1]], [[TMP166]] -; CHECK-NEXT: [[TMP204:%.*]] = sub i32 [[TMP166]], [[SUB51_1]] -; CHECK-NEXT: [[TMP177:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_3]], i32 0 -; CHECK-NEXT: [[TMP178:%.*]] = shufflevector <2 x i32> [[TMP177]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP179:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_3]], i32 0 -; CHECK-NEXT: [[TMP180:%.*]] = shufflevector <2 x i32> [[TMP179]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP199:%.*]] = add <2 x i32> [[TMP178]], [[TMP180]] -; CHECK-NEXT: [[TMP200:%.*]] = sub <2 x i32> [[TMP178]], [[TMP180]] -; CHECK-NEXT: [[TMP201:%.*]] = shufflevector <2 x i32> [[TMP199]], <2 x i32> [[TMP200]], <2 x i32> -; CHECK-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[TMP204]] -; CHECK-NEXT: [[SUB106_2:%.*]] = sub i32 [[TMP204]], [[ADD112_1]] -; CHECK-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD113_1]] -; CHECK-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] -; CHECK-NEXT: [[TMP208:%.*]] = add <2 x i32> [[TMP165]], [[TMP201]] -; CHECK-NEXT: [[TMP209:%.*]] = xor <2 x i32> [[TMP208]], [[TMP131]] -; CHECK-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[TMP120]], 15 -; CHECK-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 -; CHECK-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 -; CHECK-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] -; CHECK-NEXT: [[XOR_I63_4:%.*]] = xor i32 [[ADD_I62_2]], [[TMP120]] -; CHECK-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_2]] -; CHECK-NEXT: [[TMP211:%.*]] = extractelement <2 x i32> [[TMP209]], i32 0 -; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP211]] -; CHECK-NEXT: [[TMP212:%.*]] = extractelement <2 x i32> [[TMP209]], i32 1 -; CHECK-NEXT: [[ADD112_4:%.*]] = add i32 [[ADD110_2]], [[TMP212]] -; CHECK-NEXT: [[ADD113_4:%.*]] = add i32 [[ADD112_4]], [[XOR_I63_4]] -; CHECK-NEXT: [[ADD78_4:%.*]] = add i32 [[SUB59_2]], [[SUB60]] -; CHECK-NEXT: [[SUB86_4:%.*]] = sub i32 [[SUB60]], [[SUB59_2]] -; CHECK-NEXT: [[TMP213:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_4]], i32 0 -; CHECK-NEXT: [[TMP214:%.*]] = shufflevector <2 x i32> [[TMP213]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP215:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_4]], i32 0 -; CHECK-NEXT: [[TMP216:%.*]] = shufflevector <2 x i32> [[TMP215]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP217:%.*]] = add <2 x i32> [[TMP214]], [[TMP216]] -; CHECK-NEXT: [[TMP218:%.*]] = sub <2 x i32> [[TMP214]], [[TMP216]] -; CHECK-NEXT: [[TMP219:%.*]] = shufflevector <2 x i32> [[TMP217]], <2 x i32> [[TMP218]], <2 x i32> -; CHECK-NEXT: [[ADD105_4:%.*]] = add i32 [[SUB102_3]], [[SUB86_4]] -; CHECK-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_4]], [[SUB102_3]] -; CHECK-NEXT: [[ADD_I52_4:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_4]] -; CHECK-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_4]], [[CONV1]] -; CHECK-NEXT: [[TMP185:%.*]] = lshr <2 x i32> [[TMP102]], splat (i32 15) -; CHECK-NEXT: [[TMP193:%.*]] = and <2 x i32> [[TMP185]], splat (i32 65537) -; CHECK-NEXT: [[TMP186:%.*]] = mul <2 x i32> [[TMP193]], splat (i32 65535) -; CHECK-NEXT: [[TMP187:%.*]] = add <2 x i32> [[TMP186]], [[TMP219]] -; CHECK-NEXT: [[TMP188:%.*]] = xor <2 x i32> [[TMP187]], [[TMP102]] -; CHECK-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 -; CHECK-NEXT: [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537 -; CHECK-NEXT: [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535 -; CHECK-NEXT: [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]] -; CHECK-NEXT: [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]] -; CHECK-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_4]] -; CHECK-NEXT: [[TMP189:%.*]] = extractelement <2 x i32> [[TMP188]], i32 0 -; CHECK-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP189]] -; CHECK-NEXT: [[TMP190:%.*]] = extractelement <2 x i32> [[TMP188]], i32 1 -; CHECK-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP190]] -; CHECK-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]] +; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i8>, ptr null, align 1 +; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP10]], i64 0) +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP15]], <4 x i8> [[TMP14]], i64 4) +; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP16]], <4 x i8> [[TMP2]], i64 8) +; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP17]], <4 x i8> [[TMP6]], i64 12) +; CHECK-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i8>, ptr null, align 1 +; CHECK-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP11]], i64 0) +; CHECK-NEXT: [[TMP22:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP21]], <4 x i8> [[TMP20]], i64 4) +; CHECK-NEXT: [[TMP23:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP22]], <4 x i8> [[TMP92]], i64 8) +; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP23]], <4 x i8> [[TMP132]], i64 12) +; CHECK-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = sub <16 x i32> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <2 x i8> [[TMP28]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP29]], <16 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP135]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> +; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP95]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> [[TMP33]], <16 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP34]], i8 [[TMP3]], i32 5 +; CHECK-NEXT: [[TMP36:%.*]] = insertelement <16 x i8> [[TMP35]], i8 [[TMP52]], i32 9 +; CHECK-NEXT: [[TMP37:%.*]] = zext <16 x i8> [[TMP36]] to <16 x i32> +; CHECK-NEXT: [[TMP38:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP13]], i64 0) +; CHECK-NEXT: [[TMP40:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP39]], <4 x i8> [[TMP38]], i64 4) +; CHECK-NEXT: [[TMP41:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP40]], <4 x i8> [[TMP98]], i64 8) +; CHECK-NEXT: [[TMP42:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP41]], <4 x i8> [[TMP138]], i64 12) +; CHECK-NEXT: [[TMP43:%.*]] = zext <16 x i8> [[TMP42]] to <16 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = sub <16 x i32> [[TMP37]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = shl <16 x i32> [[TMP45]], splat (i32 16) +; CHECK-NEXT: [[TMP47:%.*]] = add <16 x i32> [[TMP46]], [[TMP27]] +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = add <16 x i32> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = sub <16 x i32> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> +; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = add <16 x i32> [[TMP51]], [[TMP70]] +; CHECK-NEXT: [[TMP54:%.*]] = sub <16 x i32> [[TMP51]], [[TMP70]] +; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP57:%.*]] = sub <16 x i32> [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = add <16 x i32> [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = add <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = sub <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP19]], <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15) +; CHECK-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537) +; CHECK-NEXT: [[TMP67:%.*]] = mul <16 x i32> [[TMP66]], splat (i32 65535) +; CHECK-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP63]] +; CHECK-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP64]] +; CHECK-NEXT: [[ADD113_3:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) ; CHECK-NEXT: ret i32 [[ADD113_3]] ; ; THR15-LABEL: define i32 @test( ; THR15-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] { ; THR15-NEXT: entry: -; THR15-NEXT: [[TMP0:%.*]] = load i8, ptr [[PIX1]], align 1 -; THR15-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32 ; THR15-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 4 ; THR15-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[PIX2]], i64 4 -; THR15-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[PIX1]], i64 1 -; THR15-NEXT: [[ARRAYIDX32:%.*]] = getelementptr i8, ptr [[PIX1]], i64 3 -; THR15-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1 -; THR15-NEXT: [[CONV33:%.*]] = zext i8 [[TMP1]] to i32 ; THR15-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]] ; THR15-NEXT: [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]] -; THR15-NEXT: [[TMP2:%.*]] = load i8, ptr [[ADD_PTR3]], align 1 -; THR15-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP2]] to i32 ; THR15-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 4 ; THR15-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 4 -; THR15-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 1 -; THR15-NEXT: [[ARRAYIDX27_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 3 -; THR15-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX27_1]], align 1 -; THR15-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP5]] to i32 ; THR15-NEXT: [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; THR15-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] ; THR15-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4 ; THR15-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4 -; THR15-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 1 -; THR15-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 -; THR15-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 -; THR15-NEXT: [[TMP6:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 -; THR15-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP20:%.*]] = zext <2 x i8> [[TMP19]] to <2 x i32> -; THR15-NEXT: [[TMP87:%.*]] = zext i8 [[TMP6]] to i32 -; THR15-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 -; THR15-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP22:%.*]] = zext <2 x i8> [[TMP21]] to <2 x i32> -; THR15-NEXT: [[TMP23:%.*]] = sub <2 x i32> [[TMP20]], [[TMP22]] -; THR15-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 -; THR15-NEXT: [[TMP24:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP25:%.*]] = zext <2 x i8> [[TMP24]] to <2 x i32> -; THR15-NEXT: [[TMP16:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 -; THR15-NEXT: [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP27:%.*]] = zext <2 x i8> [[TMP26]] to <2 x i32> -; THR15-NEXT: [[TMP28:%.*]] = sub <2 x i32> [[TMP25]], [[TMP27]] -; THR15-NEXT: [[TMP29:%.*]] = shl <2 x i32> [[TMP28]], splat (i32 16) -; THR15-NEXT: [[TMP59:%.*]] = add <2 x i32> [[TMP29]], [[TMP23]] -; THR15-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP32:%.*]] = zext <2 x i8> [[TMP31]] to <2 x i32> -; THR15-NEXT: [[TMP86:%.*]] = zext i8 [[TMP7]] to i32 -; THR15-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP34:%.*]] = zext <2 x i8> [[TMP33]] to <2 x i32> -; THR15-NEXT: [[TMP35:%.*]] = sub <2 x i32> [[TMP32]], [[TMP34]] -; THR15-NEXT: [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP37:%.*]] = zext <2 x i8> [[TMP36]] to <2 x i32> -; THR15-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP39:%.*]] = zext <2 x i8> [[TMP38]] to <2 x i32> -; THR15-NEXT: [[TMP40:%.*]] = sub <2 x i32> [[TMP37]], [[TMP39]] -; THR15-NEXT: [[TMP41:%.*]] = shl <2 x i32> [[TMP40]], splat (i32 16) -; THR15-NEXT: [[TMP76:%.*]] = add <2 x i32> [[TMP41]], [[TMP35]] -; THR15-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP76]], [[TMP59]] -; THR15-NEXT: [[TMP42:%.*]] = sub <2 x i32> [[TMP59]], [[TMP76]] -; THR15-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP30]], i32 0 -; THR15-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP30]], i32 1 -; THR15-NEXT: [[ADD44_2:%.*]] = add i32 [[TMP44]], [[TMP43]] -; THR15-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[TMP42]], i32 0 -; THR15-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP42]], i32 1 -; THR15-NEXT: [[ADD46_2:%.*]] = add i32 [[TMP46]], [[TMP45]] ; THR15-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4 -; THR15-NEXT: [[TMP47:%.*]] = load <2 x i8>, ptr null, align 1 ; THR15-NEXT: [[TMP48:%.*]] = load i8, ptr null, align 1 -; THR15-NEXT: [[TMP49:%.*]] = zext <2 x i8> [[TMP47]] to <2 x i32> -; THR15-NEXT: [[CONV_3:%.*]] = zext i8 [[TMP48]] to i32 -; THR15-NEXT: [[TMP50:%.*]] = load <2 x i8>, ptr null, align 1 -; THR15-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP50]] to <2 x i32> -; THR15-NEXT: [[TMP52:%.*]] = sub <2 x i32> [[TMP49]], [[TMP51]] -; THR15-NEXT: [[TMP53:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) -; THR15-NEXT: [[TMP54:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> -; THR15-NEXT: [[TMP77:%.*]] = shufflevector <2 x i32> [[TMP54]], <2 x i32> poison, <2 x i32> -; THR15-NEXT: [[TMP55:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; THR15-NEXT: [[TMP56:%.*]] = zext <2 x i8> [[TMP55]] to <2 x i32> -; THR15-NEXT: [[TMP57:%.*]] = sub <2 x i32> [[TMP77]], [[TMP56]] -; THR15-NEXT: [[TMP58:%.*]] = shl <2 x i32> [[TMP57]], splat (i32 16) -; THR15-NEXT: [[TMP72:%.*]] = add <2 x i32> [[TMP58]], [[TMP52]] -; THR15-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 -; THR15-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 -; THR15-NEXT: [[ARRAYIDX27_3:%.*]] = getelementptr i8, ptr null, i64 6 -; THR15-NEXT: [[TMP60:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_3]], align 1 -; THR15-NEXT: [[TMP61:%.*]] = zext <2 x i8> [[TMP60]] to <2 x i32> -; THR15-NEXT: [[TMP62:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 -; THR15-NEXT: [[TMP63:%.*]] = zext <2 x i8> [[TMP62]] to <2 x i32> -; THR15-NEXT: [[TMP64:%.*]] = sub <2 x i32> [[TMP61]], [[TMP63]] -; THR15-NEXT: [[TMP65:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) -; THR15-NEXT: [[TMP66:%.*]] = zext <2 x i8> [[TMP65]] to <2 x i32> -; THR15-NEXT: [[TMP67:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 -; THR15-NEXT: [[TMP68:%.*]] = zext <2 x i8> [[TMP67]] to <2 x i32> -; THR15-NEXT: [[TMP69:%.*]] = sub <2 x i32> [[TMP66]], [[TMP68]] -; THR15-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], splat (i32 16) -; THR15-NEXT: [[TMP73:%.*]] = add <2 x i32> [[TMP70]], [[TMP64]] -; THR15-NEXT: [[TMP74:%.*]] = extractelement <2 x i32> [[TMP72]], i32 0 -; THR15-NEXT: [[TMP75:%.*]] = extractelement <2 x i32> [[TMP72]], i32 1 -; THR15-NEXT: [[ADD48_3:%.*]] = add i32 [[TMP75]], [[TMP74]] -; THR15-NEXT: [[SUB45_3:%.*]] = sub i32 [[TMP74]], [[TMP75]] -; THR15-NEXT: [[TMP80:%.*]] = extractelement <2 x i32> [[TMP73]], i32 0 -; THR15-NEXT: [[TMP81:%.*]] = extractelement <2 x i32> [[TMP73]], i32 1 -; THR15-NEXT: [[ADD55_3:%.*]] = add i32 [[TMP81]], [[TMP80]] -; THR15-NEXT: [[SUB47_3:%.*]] = sub i32 [[TMP80]], [[TMP81]] -; THR15-NEXT: [[ADD48_4:%.*]] = add i32 [[ADD55_3]], [[ADD48_3]] -; THR15-NEXT: [[TMP78:%.*]] = shufflevector <2 x i32> [[TMP30]], <2 x i32> poison, <2 x i32> -; THR15-NEXT: [[TMP71:%.*]] = insertelement <2 x i32> [[TMP78]], i32 [[ADD48_3]], i32 0 -; THR15-NEXT: [[TMP83:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[ADD55_3]], i32 0 -; THR15-NEXT: [[TMP79:%.*]] = sub <2 x i32> [[TMP71]], [[TMP83]] -; THR15-NEXT: [[ADD55_4:%.*]] = add i32 [[SUB47_3]], [[SUB45_3]] -; THR15-NEXT: [[TMP137:%.*]] = shufflevector <2 x i32> [[TMP42]], <2 x i32> poison, <2 x i32> -; THR15-NEXT: [[TMP82:%.*]] = insertelement <2 x i32> [[TMP137]], i32 [[SUB45_3]], i32 0 -; THR15-NEXT: [[TMP84:%.*]] = insertelement <2 x i32> [[TMP42]], i32 [[SUB47_3]], i32 0 -; THR15-NEXT: [[TMP85:%.*]] = sub <2 x i32> [[TMP82]], [[TMP84]] -; THR15-NEXT: [[ADD94:%.*]] = add i32 [[ADD48_4]], [[ADD44_2]] -; THR15-NEXT: [[SUB102:%.*]] = sub i32 [[ADD44_2]], [[ADD48_4]] -; THR15-NEXT: [[SHR_I:%.*]] = lshr i32 [[CONV_3]], 15 -; THR15-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537 -; THR15-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535 -; THR15-NEXT: [[SHR_I49:%.*]] = lshr i32 [[TMP44]], 15 -; THR15-NEXT: [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537 -; THR15-NEXT: [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535 -; THR15-NEXT: [[ADD94_1:%.*]] = add i32 [[ADD55_4]], [[ADD46_2]] -; THR15-NEXT: [[SUB102_1:%.*]] = sub i32 [[ADD46_2]], [[ADD55_4]] -; THR15-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[TMP86]], 15 -; THR15-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537 -; THR15-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535 -; THR15-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[TMP87]], 15 -; THR15-NEXT: [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537 -; THR15-NEXT: [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535 -; THR15-NEXT: [[TMP88:%.*]] = extractelement <2 x i32> [[TMP79]], i32 0 -; THR15-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP79]], i32 1 -; THR15-NEXT: [[ADD94_2:%.*]] = add i32 [[TMP88]], [[TMP89]] -; THR15-NEXT: [[SUB102_2:%.*]] = sub i32 [[TMP89]], [[TMP88]] -; THR15-NEXT: [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15 -; THR15-NEXT: [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537 -; THR15-NEXT: [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535 -; THR15-NEXT: [[TMP90:%.*]] = extractelement <2 x i32> [[TMP85]], i32 0 -; THR15-NEXT: [[TMP91:%.*]] = extractelement <2 x i32> [[TMP85]], i32 1 -; THR15-NEXT: [[ADD94_3:%.*]] = add i32 [[TMP90]], [[TMP91]] -; THR15-NEXT: [[SUB102_3:%.*]] = sub i32 [[TMP91]], [[TMP90]] -; THR15-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[CONV]], 15 -; THR15-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 -; THR15-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 -; THR15-NEXT: [[TMP92:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1 -; THR15-NEXT: [[TMP93:%.*]] = zext <2 x i8> [[TMP92]] to <2 x i32> +; THR15-NEXT: [[TMP1:%.*]] = load i8, ptr null, align 1 +; THR15-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1 ; THR15-NEXT: [[TMP143:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 -; THR15-NEXT: [[TMP94:%.*]] = shufflevector <4 x i8> [[TMP143]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP95:%.*]] = zext <2 x i8> [[TMP94]] to <2 x i32> ; THR15-NEXT: [[TMP146:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 -; THR15-NEXT: [[TMP96:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP97:%.*]] = zext <2 x i8> [[TMP96]] to <2 x i32> ; THR15-NEXT: [[TMP147:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; THR15-NEXT: [[TMP98:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP99:%.*]] = zext <2 x i8> [[TMP98]] to <2 x i32> -; THR15-NEXT: [[TMP100:%.*]] = sub <2 x i32> [[TMP97]], [[TMP99]] -; THR15-NEXT: [[TMP101:%.*]] = shl <2 x i32> [[TMP100]], splat (i32 16) -; THR15-NEXT: [[TMP102:%.*]] = shufflevector <4 x i8> [[TMP143]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP103:%.*]] = zext <2 x i8> [[TMP102]] to <2 x i32> -; THR15-NEXT: [[TMP104:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP105:%.*]] = zext <2 x i8> [[TMP104]] to <2 x i32> -; THR15-NEXT: [[TMP106:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP107:%.*]] = zext <2 x i8> [[TMP106]] to <2 x i32> -; THR15-NEXT: [[TMP108:%.*]] = sub <2 x i32> [[TMP105]], [[TMP107]] -; THR15-NEXT: [[TMP109:%.*]] = shl <2 x i32> [[TMP108]], splat (i32 16) -; THR15-NEXT: [[TMP110:%.*]] = insertelement <2 x i32> [[TMP93]], i32 [[CONV33]], i32 1 -; THR15-NEXT: [[TMP111:%.*]] = sub <2 x i32> [[TMP110]], [[TMP103]] -; THR15-NEXT: [[TMP112:%.*]] = add <2 x i32> [[TMP109]], [[TMP111]] -; THR15-NEXT: [[TMP113:%.*]] = insertelement <2 x i32> [[TMP93]], i32 [[CONV]], i32 0 -; THR15-NEXT: [[TMP114:%.*]] = sub <2 x i32> [[TMP113]], [[TMP95]] -; THR15-NEXT: [[TMP115:%.*]] = add <2 x i32> [[TMP101]], [[TMP114]] -; THR15-NEXT: [[TMP116:%.*]] = shufflevector <2 x i32> [[TMP112]], <2 x i32> [[TMP115]], <2 x i32> -; THR15-NEXT: [[TMP117:%.*]] = add <2 x i32> [[TMP112]], [[TMP115]] -; THR15-NEXT: [[TMP118:%.*]] = sub <2 x i32> [[TMP115]], [[TMP112]] -; THR15-NEXT: [[TMP119:%.*]] = extractelement <2 x i32> [[TMP117]], i32 0 -; THR15-NEXT: [[TMP120:%.*]] = extractelement <2 x i32> [[TMP117]], i32 1 -; THR15-NEXT: [[ADD48:%.*]] = add i32 [[TMP120]], [[TMP119]] -; THR15-NEXT: [[SUB51:%.*]] = sub i32 [[TMP119]], [[TMP120]] -; THR15-NEXT: [[TMP121:%.*]] = extractelement <2 x i32> [[TMP118]], i32 0 -; THR15-NEXT: [[TMP122:%.*]] = extractelement <2 x i32> [[TMP118]], i32 1 -; THR15-NEXT: [[ADD55:%.*]] = add i32 [[TMP122]], [[TMP121]] -; THR15-NEXT: [[SUB59:%.*]] = sub i32 [[TMP121]], [[TMP122]] -; THR15-NEXT: [[SHR_I59:%.*]] = lshr i32 [[TMP120]], 15 -; THR15-NEXT: [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537 -; THR15-NEXT: [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535 -; THR15-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[TMP122]], 15 -; THR15-NEXT: [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537 -; THR15-NEXT: [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535 -; THR15-NEXT: [[TMP123:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 -; THR15-NEXT: [[TMP124:%.*]] = zext <2 x i8> [[TMP123]] to <2 x i32> +; THR15-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1 ; THR15-NEXT: [[TMP148:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 -; THR15-NEXT: [[TMP125:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP126:%.*]] = zext <2 x i8> [[TMP125]] to <2 x i32> ; THR15-NEXT: [[TMP152:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 -; THR15-NEXT: [[TMP127:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP128:%.*]] = zext <2 x i8> [[TMP127]] to <2 x i32> ; THR15-NEXT: [[TMP153:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; THR15-NEXT: [[TMP129:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP130:%.*]] = zext <2 x i8> [[TMP129]] to <2 x i32> -; THR15-NEXT: [[TMP131:%.*]] = sub <2 x i32> [[TMP128]], [[TMP130]] -; THR15-NEXT: [[TMP132:%.*]] = shl <2 x i32> [[TMP131]], splat (i32 16) -; THR15-NEXT: [[TMP138:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP139:%.*]] = zext <2 x i8> [[TMP138]] to <2 x i32> -; THR15-NEXT: [[TMP154:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP155:%.*]] = zext <2 x i8> [[TMP154]] to <2 x i32> -; THR15-NEXT: [[TMP133:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> -; THR15-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP133]] to <2 x i32> -; THR15-NEXT: [[TMP135:%.*]] = sub <2 x i32> [[TMP155]], [[TMP134]] -; THR15-NEXT: [[TMP170:%.*]] = shl <2 x i32> [[TMP135]], splat (i32 16) -; THR15-NEXT: [[TMP140:%.*]] = insertelement <2 x i32> [[TMP124]], i32 [[CONV33_1]], i32 1 -; THR15-NEXT: [[TMP141:%.*]] = sub <2 x i32> [[TMP140]], [[TMP139]] -; THR15-NEXT: [[TMP171:%.*]] = add <2 x i32> [[TMP170]], [[TMP141]] -; THR15-NEXT: [[TMP186:%.*]] = insertelement <2 x i32> [[TMP124]], i32 [[CONV_1]], i32 0 -; THR15-NEXT: [[TMP187:%.*]] = sub <2 x i32> [[TMP186]], [[TMP126]] -; THR15-NEXT: [[TMP142:%.*]] = add <2 x i32> [[TMP132]], [[TMP187]] -; THR15-NEXT: [[TMP136:%.*]] = add <2 x i32> [[TMP171]], [[TMP142]] -; THR15-NEXT: [[TMP149:%.*]] = sub <2 x i32> [[TMP142]], [[TMP171]] -; THR15-NEXT: [[TMP144:%.*]] = extractelement <2 x i32> [[TMP136]], i32 0 -; THR15-NEXT: [[TMP145:%.*]] = extractelement <2 x i32> [[TMP136]], i32 1 -; THR15-NEXT: [[ADD48_2:%.*]] = add i32 [[TMP145]], [[TMP144]] -; THR15-NEXT: [[SUB45_1:%.*]] = sub i32 [[TMP144]], [[TMP145]] -; THR15-NEXT: [[TMP150:%.*]] = extractelement <2 x i32> [[TMP149]], i32 0 -; THR15-NEXT: [[TMP151:%.*]] = extractelement <2 x i32> [[TMP149]], i32 1 -; THR15-NEXT: [[ADD48_1:%.*]] = add i32 [[TMP151]], [[TMP150]] -; THR15-NEXT: [[SUB51_1:%.*]] = sub i32 [[TMP150]], [[TMP151]] -; THR15-NEXT: [[SHR_I54:%.*]] = lshr i32 [[TMP145]], 15 -; THR15-NEXT: [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537 -; THR15-NEXT: [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535 -; THR15-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[TMP151]], 15 -; THR15-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 -; THR15-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 -; THR15-NEXT: [[TMP156:%.*]] = lshr <2 x i32> [[TMP124]], splat (i32 15) -; THR15-NEXT: [[TMP157:%.*]] = and <2 x i32> [[TMP156]], splat (i32 65537) -; THR15-NEXT: [[TMP158:%.*]] = mul <2 x i32> [[TMP157]], splat (i32 65535) -; THR15-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_2]], [[ADD48]] -; THR15-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_2]] -; THR15-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] -; THR15-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD94]] -; THR15-NEXT: [[ADD105:%.*]] = add i32 [[SUB102]], [[SUB86]] -; THR15-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB102]] -; THR15-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]] -; THR15-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[CONV_3]] -; THR15-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51]], [[ADD105]] -; THR15-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[TMP44]] -; THR15-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]] -; THR15-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP145]] -; THR15-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]] -; THR15-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[TMP120]] -; THR15-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] -; THR15-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] -; THR15-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I63]] -; THR15-NEXT: [[ADD78_1:%.*]] = add i32 [[ADD48_1]], [[ADD55]] -; THR15-NEXT: [[SUB86_1:%.*]] = sub i32 [[ADD55]], [[ADD48_1]] -; THR15-NEXT: [[ADD103_1:%.*]] = add i32 [[ADD94_1]], [[ADD78_1]] -; THR15-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]] -; THR15-NEXT: [[ADD105_1:%.*]] = add i32 [[SUB102_1]], [[SUB86_1]] -; THR15-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]] -; THR15-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]] -; THR15-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[TMP86]] -; THR15-NEXT: [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_1]], [[ADD105_1]] -; THR15-NEXT: [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[TMP87]] -; THR15-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]] -; THR15-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[TMP151]] -; THR15-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]] -; THR15-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[TMP122]] -; THR15-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD113]] -; THR15-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]] -; THR15-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]] -; THR15-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I63_1]] -; THR15-NEXT: [[ADD78_2:%.*]] = add i32 [[SUB45_1]], [[SUB51]] -; THR15-NEXT: [[SUB86_2:%.*]] = sub i32 [[SUB51]], [[SUB45_1]] -; THR15-NEXT: [[TMP159:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_2]], i32 0 -; THR15-NEXT: [[TMP160:%.*]] = shufflevector <2 x i32> [[TMP159]], <2 x i32> poison, <2 x i32> zeroinitializer -; THR15-NEXT: [[TMP161:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_2]], i32 0 -; THR15-NEXT: [[TMP162:%.*]] = shufflevector <2 x i32> [[TMP161]], <2 x i32> poison, <2 x i32> zeroinitializer -; THR15-NEXT: [[TMP163:%.*]] = add <2 x i32> [[TMP160]], [[TMP162]] -; THR15-NEXT: [[TMP164:%.*]] = sub <2 x i32> [[TMP160]], [[TMP162]] -; THR15-NEXT: [[TMP165:%.*]] = shufflevector <2 x i32> [[TMP163]], <2 x i32> [[TMP164]], <2 x i32> -; THR15-NEXT: [[ADD105_2:%.*]] = add i32 [[SUB102_2]], [[SUB86_2]] -; THR15-NEXT: [[SUB106_2:%.*]] = sub i32 [[SUB86_2]], [[SUB102_2]] -; THR15-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD105_2]] -; THR15-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] -; THR15-NEXT: [[TMP166:%.*]] = add <2 x i32> [[TMP158]], [[TMP165]] -; THR15-NEXT: [[TMP167:%.*]] = xor <2 x i32> [[TMP166]], [[TMP124]] -; THR15-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[TMP119]], 15 -; THR15-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 -; THR15-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 -; THR15-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] -; THR15-NEXT: [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[TMP119]] -; THR15-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_1]] -; THR15-NEXT: [[TMP168:%.*]] = extractelement <2 x i32> [[TMP167]], i32 0 -; THR15-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP168]] -; THR15-NEXT: [[TMP169:%.*]] = extractelement <2 x i32> [[TMP167]], i32 1 -; THR15-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[TMP169]] -; THR15-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I63_2]] -; THR15-NEXT: [[ADD78_3:%.*]] = add i32 [[SUB51_1]], [[SUB59]] -; THR15-NEXT: [[SUB86_3:%.*]] = sub i32 [[SUB59]], [[SUB51_1]] -; THR15-NEXT: [[TMP172:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_3]], i32 0 -; THR15-NEXT: [[TMP173:%.*]] = shufflevector <2 x i32> [[TMP172]], <2 x i32> poison, <2 x i32> zeroinitializer -; THR15-NEXT: [[TMP174:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_3]], i32 0 -; THR15-NEXT: [[TMP175:%.*]] = shufflevector <2 x i32> [[TMP174]], <2 x i32> poison, <2 x i32> zeroinitializer -; THR15-NEXT: [[TMP176:%.*]] = add <2 x i32> [[TMP173]], [[TMP175]] -; THR15-NEXT: [[TMP177:%.*]] = sub <2 x i32> [[TMP173]], [[TMP175]] -; THR15-NEXT: [[TMP178:%.*]] = shufflevector <2 x i32> [[TMP176]], <2 x i32> [[TMP177]], <2 x i32> -; THR15-NEXT: [[ADD105_3:%.*]] = add i32 [[SUB102_3]], [[SUB86_3]] -; THR15-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]] -; THR15-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_3]] -; THR15-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV]] -; THR15-NEXT: [[TMP179:%.*]] = lshr <2 x i32> [[TMP93]], splat (i32 15) -; THR15-NEXT: [[TMP180:%.*]] = and <2 x i32> [[TMP179]], splat (i32 65537) -; THR15-NEXT: [[TMP181:%.*]] = mul <2 x i32> [[TMP180]], splat (i32 65535) -; THR15-NEXT: [[TMP182:%.*]] = add <2 x i32> [[TMP181]], [[TMP178]] -; THR15-NEXT: [[TMP183:%.*]] = xor <2 x i32> [[TMP182]], [[TMP93]] -; THR15-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 -; THR15-NEXT: [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537 -; THR15-NEXT: [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535 -; THR15-NEXT: [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]] -; THR15-NEXT: [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]] -; THR15-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_2]] -; THR15-NEXT: [[TMP184:%.*]] = extractelement <2 x i32> [[TMP183]], i32 0 -; THR15-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP184]] -; THR15-NEXT: [[TMP185:%.*]] = extractelement <2 x i32> [[TMP183]], i32 1 -; THR15-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP185]] -; THR15-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]] +; THR15-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 +; THR15-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 +; THR15-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 +; THR15-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 +; THR15-NEXT: [[TMP14:%.*]] = load <4 x i8>, ptr null, align 1 +; THR15-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP10]], i64 0) +; THR15-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP15]], <4 x i8> [[TMP14]], i64 4) +; THR15-NEXT: [[TMP17:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP16]], <4 x i8> [[TMP2]], i64 8) +; THR15-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP17]], <4 x i8> [[TMP6]], i64 12) +; THR15-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32> +; THR15-NEXT: [[TMP20:%.*]] = load <4 x i8>, ptr null, align 1 +; THR15-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP11]], i64 0) +; THR15-NEXT: [[TMP22:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP21]], <4 x i8> [[TMP20]], i64 4) +; THR15-NEXT: [[TMP23:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP22]], <4 x i8> [[TMP143]], i64 8) +; THR15-NEXT: [[TMP24:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP23]], <4 x i8> [[TMP148]], i64 12) +; THR15-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32> +; THR15-NEXT: [[TMP26:%.*]] = sub <16 x i32> [[TMP19]], [[TMP25]] +; THR15-NEXT: [[TMP27:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP28:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) +; THR15-NEXT: [[TMP29:%.*]] = shufflevector <2 x i8> [[TMP28]], <2 x i8> poison, <4 x i32> +; THR15-NEXT: [[TMP30:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP29]], <16 x i32> +; THR15-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <16 x i32> +; THR15-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> +; THR15-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <16 x i32> +; THR15-NEXT: [[TMP34:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> [[TMP33]], <16 x i32> +; THR15-NEXT: [[TMP35:%.*]] = insertelement <16 x i8> [[TMP34]], i8 [[TMP1]], i32 5 +; THR15-NEXT: [[TMP36:%.*]] = insertelement <16 x i8> [[TMP35]], i8 [[TMP48]], i32 9 +; THR15-NEXT: [[TMP37:%.*]] = zext <16 x i8> [[TMP36]] to <16 x i32> +; THR15-NEXT: [[TMP38:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 +; THR15-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP13]], i64 0) +; THR15-NEXT: [[TMP40:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP39]], <4 x i8> [[TMP38]], i64 4) +; THR15-NEXT: [[TMP41:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP40]], <4 x i8> [[TMP147]], i64 8) +; THR15-NEXT: [[TMP42:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP41]], <4 x i8> [[TMP153]], i64 12) +; THR15-NEXT: [[TMP43:%.*]] = zext <16 x i8> [[TMP42]] to <16 x i32> +; THR15-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP45:%.*]] = sub <16 x i32> [[TMP37]], [[TMP44]] +; THR15-NEXT: [[TMP46:%.*]] = shl <16 x i32> [[TMP45]], splat (i32 16) +; THR15-NEXT: [[TMP47:%.*]] = add <16 x i32> [[TMP46]], [[TMP27]] +; THR15-NEXT: [[TMP70:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP49:%.*]] = add <16 x i32> [[TMP47]], [[TMP70]] +; THR15-NEXT: [[TMP50:%.*]] = sub <16 x i32> [[TMP47]], [[TMP70]] +; THR15-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> +; THR15-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP53:%.*]] = add <16 x i32> [[TMP51]], [[TMP52]] +; THR15-NEXT: [[TMP54:%.*]] = sub <16 x i32> [[TMP51]], [[TMP52]] +; THR15-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> +; THR15-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP57:%.*]] = sub <16 x i32> [[TMP55]], [[TMP56]] +; THR15-NEXT: [[TMP58:%.*]] = add <16 x i32> [[TMP55]], [[TMP56]] +; THR15-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> +; THR15-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> +; THR15-NEXT: [[TMP61:%.*]] = add <16 x i32> [[TMP59]], [[TMP60]] +; THR15-NEXT: [[TMP62:%.*]] = sub <16 x i32> [[TMP59]], [[TMP60]] +; THR15-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> +; THR15-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP19]], <16 x i32> +; THR15-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15) +; THR15-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537) +; THR15-NEXT: [[TMP67:%.*]] = mul <16 x i32> [[TMP66]], splat (i32 65535) +; THR15-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP63]] +; THR15-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP64]] +; THR15-NEXT: [[ADD113_3:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) ; THR15-NEXT: ret i32 [[ADD113_3]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index 5b0f4a69de4c3..7723746dda301 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -342,8 +342,8 @@ define void @reduce_or_2() { ; ZVFHMIN-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 6 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer ; ZVFHMIN-NEXT: [[RDX_OP:%.*]] = or <16 x i1> [[TMP3]], [[TMP5]] -; ZVFHMIN-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) -; ZVFHMIN-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] +; ZVFHMIN-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) +; ZVFHMIN-NEXT: br i1 [[TMP6]], label [[TMP8:%.*]], label [[TMP7:%.*]] ; ZVFHMIN: 7: ; ZVFHMIN-NEXT: ret void ; ZVFHMIN: 8: @@ -356,8 +356,8 @@ define void @reduce_or_2() { ; ZVL128-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 6 ; ZVL128-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer ; ZVL128-NEXT: [[RDX_OP:%.*]] = or <16 x i1> [[TMP3]], [[TMP5]] -; ZVL128-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) -; ZVL128-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] +; ZVL128-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) +; ZVL128-NEXT: br i1 [[TMP6]], label [[TMP8:%.*]], label [[TMP7:%.*]] ; ZVL128: 7: ; ZVL128-NEXT: ret void ; ZVL128: 8: @@ -365,16 +365,14 @@ define void @reduce_or_2() { ; ; ZVL256-LABEL: @reduce_or_2( ; ZVL256-NEXT: [[TMP1:%.*]] = shl i64 0, 0 -; ZVL256-NEXT: [[TMP2:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 15 -; ZVL256-NEXT: [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer -; ZVL256-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> , i64 [[TMP1]], i32 6 -; ZVL256-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer -; ZVL256-NEXT: [[RDX_OP:%.*]] = or <16 x i1> [[TMP3]], [[TMP5]] -; ZVL256-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[RDX_OP]]) -; ZVL256-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] -; ZVL256: 7: +; ZVL256-NEXT: [[TMP2:%.*]] = insertelement <32 x i64> , i64 [[TMP1]], i32 15 +; ZVL256-NEXT: [[TMP3:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> +; ZVL256-NEXT: [[TMP4:%.*]] = icmp ult <32 x i64> [[TMP3]], zeroinitializer +; ZVL256-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP4]]) +; ZVL256-NEXT: br i1 [[TMP5]], label [[TMP7:%.*]], label [[TMP6:%.*]] +; ZVL256: 6: ; ZVL256-NEXT: ret void -; ZVL256: 8: +; ZVL256: 7: ; ZVL256-NEXT: ret void ; ; ZVL512-LABEL: @reduce_or_2( From 86806baf878d523025426c8ff8299952dca132a3 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 28 Feb 2025 08:04:09 -0800 Subject: [PATCH 105/123] [ExecutionEngine] Avoid repeated hash lookups (NFC) (#129191) --- llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index 944fca000d61f..1989d8ca101e1 100644 --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -98,9 +98,9 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, std::lock_guard Lock(Mutex); // This is the maximum range whose permission have been possibly modified - Allocations[MinAddr].Size = MaxAddr - MinAddr; - Allocations[MinAddr].DeinitializationActions = - std::move(*DeinitializeActions); + auto &Alloc = Allocations[MinAddr]; + Alloc.Size = MaxAddr - MinAddr; + Alloc.DeinitializationActions = std::move(*DeinitializeActions); Reservations[AI.MappingBase.toPtr()].Allocations.push_back(MinAddr); } From 7c9ab982df132513e566252a1dd1843d0f131e56 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 28 Feb 2025 08:04:26 -0800 Subject: [PATCH 106/123] [MCA] Avoid repeated hash lookups (NFC) (#129192) --- llvm/lib/MCA/InstrBuilder.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp index 2cb1908695308..2bac99b6309af 100644 --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -634,16 +634,14 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI, bool IsVariadic = MCDesc.isVariadic(); if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) { auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID); - Descriptors[DKey] = std::move(ID); - return *Descriptors[DKey]; + return *(Descriptors[DKey] = std::move(ID)); } auto VDKey = std::make_pair(hashMCInst(MCI), SchedClassID); assert( !VariantDescriptors.contains(VDKey) && "Expected VariantDescriptors to not already have a value for this key."); - VariantDescriptors[VDKey] = std::move(ID); - return *VariantDescriptors[VDKey]; + return *(VariantDescriptors[VDKey] = std::move(ID)); } Expected From 1a1cc50b20ca76f7e789b6b7465133cca248d549 Mon Sep 17 00:00:00 2001 From: ShatianWang <38512325+ShatianWang@users.noreply.github.com> Date: Fri, 28 Feb 2025 11:06:52 -0500 Subject: [PATCH 107/123] [BOLT] Report flow conservation scores (#127954) Add two additional profile quality stats for CG (call graph) and CFG (control flow graph) flow conservations besides the CFG discontinuity stats introduced in #109683. The two new stats quantify how different "in-flow" is from "out-flow" in the following cases where they should be equal. The smaller the reported stats, the better the flow conservations are. CG flow conservation: for each function that is not a program entry, the number of times the function is called according to CG ("in-flow") should be equal to the number of times the transition from an entry basic block of the function to another basic block within the function is recorded ("out-flow"). CFG flow conservation: for each basic block that is not a function entry or exit, the number of times the transition into this basic block from another basic block within the function is recorded ("in-flow") should be equal to the number of times the transition from this basic block to another basic block within the function is recorded ("out-flow"). Use `-v=1` for more detailed bucketed stats, and use `-v=2` to dump functions / basic blocks with bad flow conservations. --- bolt/include/bolt/Passes/ContinuityStats.h | 61 -- .../include/bolt/Passes/ProfileQualityStats.h | 98 +++ bolt/lib/Passes/CMakeLists.txt | 2 +- bolt/lib/Passes/ContinuityStats.cpp | 250 -------- bolt/lib/Passes/ProfileQualityStats.cpp | 579 ++++++++++++++++++ bolt/lib/Rewrite/BinaryPassManager.cpp | 4 +- .../test/X86/cfg-discontinuity-reporting.test | 4 - bolt/test/X86/profile-quality-reporting.test | 4 + 8 files changed, 684 insertions(+), 318 deletions(-) delete mode 100644 bolt/include/bolt/Passes/ContinuityStats.h create mode 100644 bolt/include/bolt/Passes/ProfileQualityStats.h delete mode 100644 bolt/lib/Passes/ContinuityStats.cpp create mode 100644 bolt/lib/Passes/ProfileQualityStats.cpp delete mode 100644 bolt/test/X86/cfg-discontinuity-reporting.test create mode 100644 bolt/test/X86/profile-quality-reporting.test diff --git a/bolt/include/bolt/Passes/ContinuityStats.h b/bolt/include/bolt/Passes/ContinuityStats.h deleted file mode 100644 index bd4d491ad4a55..0000000000000 --- a/bolt/include/bolt/Passes/ContinuityStats.h +++ /dev/null @@ -1,61 +0,0 @@ -//===- bolt/Passes/ContinuityStats.h ----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass checks how well the BOLT input profile satisfies the following -// "CFG continuity" property of a perfect profile: -// -// Each positive-execution-count block in the function’s CFG -// should be *reachable* from a positive-execution-count function -// entry block through a positive-execution-count path. -// -// More specifically, for each of the hottest 1000 functions, the pass -// calculates the function’s fraction of basic block execution counts -// that is *unreachable*. It then reports the 95th percentile of the -// distribution of the 1000 unreachable fractions in a single BOLT-INFO line. -// The smaller the reported value is, the better the BOLT profile -// satisfies the CFG continuity property. - -// The default value of 1000 above can be changed via the hidden BOLT option -// `-num-functions-for-continuity-check=[N]`. -// If more detailed stats are needed, `-v=1` can be used: the hottest N -// functions will be grouped into 5 equally-sized buckets, from the hottest -// to the coldest; for each bucket, various summary statistics of the -// distribution of the unreachable fractions and the raw unreachable execution -// counts will be reported. -// -//===----------------------------------------------------------------------===// - -#ifndef BOLT_PASSES_CONTINUITYSTATS_H -#define BOLT_PASSES_CONTINUITYSTATS_H - -#include "bolt/Passes/BinaryPasses.h" -#include - -namespace llvm { - -class raw_ostream; - -namespace bolt { -class BinaryContext; - -/// Compute and report to the user the function CFG continuity quality -class PrintContinuityStats : public BinaryFunctionPass { -public: - explicit PrintContinuityStats(const cl::opt &PrintPass) - : BinaryFunctionPass(PrintPass) {} - - bool shouldOptimize(const BinaryFunction &BF) const override; - const char *getName() const override { return "continuity-stats"; } - bool shouldPrint(const BinaryFunction &) const override { return false; } - Error runOnFunctions(BinaryContext &BC) override; -}; - -} // namespace bolt -} // namespace llvm - -#endif // BOLT_PASSES_CONTINUITYSTATS_H diff --git a/bolt/include/bolt/Passes/ProfileQualityStats.h b/bolt/include/bolt/Passes/ProfileQualityStats.h new file mode 100644 index 0000000000000..86fc88cefc10e --- /dev/null +++ b/bolt/include/bolt/Passes/ProfileQualityStats.h @@ -0,0 +1,98 @@ +//===- bolt/Passes/ProfileQualityStats.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass checks the BOLT input profile quality. +// +// Check 1: how well the input profile satisfies the following +// "CFG continuity" property of a perfect profile: +// +// Each positive-execution-count block in the function’s CFG +// is *reachable* from a positive-execution-count function +// entry block through a positive-execution-count path. +// +// More specifically, for each of the hottest 1000 functions, the pass +// calculates the function’s fraction of basic block execution counts +// that is *unreachable*. It then reports the 95th percentile of the +// distribution of the 1000 unreachable fractions in a single BOLT-INFO line. +// The smaller the reported value is, the better the BOLT profile +// satisfies the CFG continuity property. +// +// Check 2: how well the input profile satisfies the "call graph flow +// conservation" property of a perfect profile: +// +// For each function that is not a program entry, the number of times the +// function is called is equal to the net CFG outflow of the +// function's entry block(s). +// +// More specifically, for each of the hottest 1000 functions, the pass obtains +// A = number of times the function is called, B = the function's entry blocks' +// inflow, C = the function's entry blocks' outflow, where B and C are computed +// using the function's weighted CFG. It then computes gap = 1 - MIN(A,C-B) / +// MAX(A, C-B). The pass reports the 95th percentile of the distribution of the +// 1000 gaps in a single BOLT-INFO line. The smaller the reported value is, the +// better the BOLT profile satisfies the call graph flow conservation property. +// +// Check 3: how well the input profile satisfies the "function CFG flow +// conservation property" of a perfect profile: +// +// A non-entry non-exit basic block's inflow is equal to its outflow. +// +// More specifically, for each of the hottest 1000 functions, the pass loops +// over its basic blocks that are non-entry and non-exit, and for each block +// obtains a block gap = 1 - MIN(block inflow, block outflow, block call count +// if any) / MAX(block inflow, block outflow, block call count if any). It then +// aggregates the block gaps into 2 values for the function: "weighted" is the +// weighted average of the block conservation gaps, where the weights depend on +// each block's execution count and instruction count; "worst" is the worst +// (biggest) block gap acorss all basic blocks in the function with an execution +// count of > 500. The pass then reports the 95th percentile of the weighted and +// worst values of the 1000 functions in a single BOLT-INFO line. The smaller +// the reported values are, the better the BOLT profile satisfies the function +// CFG flow conservation property. +// +// The default value of 1000 above can be changed via the hidden BOLT option +// `-top-functions-for-profile-quality-check=[N]`. +// The default reporting of the 95th percentile can be changed via the hidden +// BOLT option `-percentile-for-profile-quality-check=[M]`. +// +// If more detailed stats are needed, `-v=1` can be used: the hottest N +// functions will be grouped into 5 equally-sized buckets, from the hottest +// to the coldest; for each bucket, various summary statistics of the +// profile quality will be reported. +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_PASSES_PROFILEQUALITYSTATS_H +#define BOLT_PASSES_PROFILEQUALITYSTATS_H + +#include "bolt/Passes/BinaryPasses.h" +#include + +namespace llvm { + +class raw_ostream; + +namespace bolt { +class BinaryContext; + +/// Compute and report to the user the profile quality +class PrintProfileQualityStats : public BinaryFunctionPass { +public: + explicit PrintProfileQualityStats(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) {} + + bool shouldOptimize(const BinaryFunction &BF) const override; + const char *getName() const override { return "profile-quality-stats"; } + bool shouldPrint(const BinaryFunction &) const override { return false; } + Error runOnFunctions(BinaryContext &BC) override; +}; + +} // namespace bolt +} // namespace llvm + +#endif // BOLT_PASSES_PROFILEQUALITYSTATS_H diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt index adc91658050a6..3864255a09ebe 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -27,7 +27,7 @@ add_llvm_library(LLVMBOLTPasses PatchEntries.cpp PettisAndHansen.cpp PLTCall.cpp - ContinuityStats.cpp + ProfileQualityStats.cpp RegAnalysis.cpp RegReAssign.cpp ReorderAlgorithm.cpp diff --git a/bolt/lib/Passes/ContinuityStats.cpp b/bolt/lib/Passes/ContinuityStats.cpp deleted file mode 100644 index b32365b59065d..0000000000000 --- a/bolt/lib/Passes/ContinuityStats.cpp +++ /dev/null @@ -1,250 +0,0 @@ -//===- bolt/Passes/ContinuityStats.cpp --------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the continuity stats calculation pass. -// -//===----------------------------------------------------------------------===// - -#include "bolt/Passes/ContinuityStats.h" -#include "bolt/Core/BinaryBasicBlock.h" -#include "bolt/Core/BinaryFunction.h" -#include "bolt/Utils/CommandLineOpts.h" -#include "llvm/Support/CommandLine.h" -#include -#include -#include - -#define DEBUG_TYPE "bolt-opts" - -using namespace llvm; -using namespace bolt; - -namespace opts { -extern cl::opt Verbosity; -cl::opt NumFunctionsForContinuityCheck( - "num-functions-for-continuity-check", - cl::desc("number of hottest functions to print aggregated " - "CFG discontinuity stats of."), - cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); -} // namespace opts - -namespace { -using FunctionListType = std::vector; -using function_iterator = FunctionListType::iterator; - -template -void printDistribution(raw_ostream &OS, std::vector &values, - bool Fraction = false) { - if (values.empty()) - return; - // Sort values from largest to smallest and print the MAX, TOP 1%, 5%, 10%, - // 20%, 50%, 80%, MIN. If Fraction is true, then values are printed as - // fractions instead of integers. - std::sort(values.begin(), values.end()); - - auto printLine = [&](std::string Text, double Percent) { - int Rank = int(values.size() * (1.0 - Percent / 100)); - if (Percent == 0) - Rank = values.size() - 1; - if (Fraction) - OS << " " << Text << std::string(9 - Text.length(), ' ') << ": " - << format("%.2lf%%", values[Rank] * 100) << "\n"; - else - OS << " " << Text << std::string(9 - Text.length(), ' ') << ": " - << values[Rank] << "\n"; - }; - - printLine("MAX", 0); - const int percentages[] = {1, 5, 10, 20, 50, 80}; - for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) { - printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]); - } - printLine("MIN", 100); -} - -void printCFGContinuityStats(raw_ostream &OS, - iterator_range &Functions) { - // Given a perfect profile, every positive-execution-count BB should be - // connected to an entry of the function through a positive-execution-count - // directed path in the control flow graph. - std::vector NumUnreachables; - std::vector SumECUnreachables; - std::vector FractionECUnreachables; - - for (auto it = Functions.begin(); it != Functions.end(); ++it) { - const BinaryFunction *Function = *it; - if (Function->size() <= 1) - continue; - - // Compute the sum of all BB execution counts (ECs). - size_t NumPosECBBs = 0; - size_t SumAllBBEC = 0; - for (const BinaryBasicBlock &BB : *Function) { - const size_t BBEC = BB.getKnownExecutionCount(); - NumPosECBBs += BBEC > 0 ? 1 : 0; - SumAllBBEC += BBEC; - } - - // Perform BFS on subgraph of CFG induced by positive weight edges. - // Compute the number of BBs reachable from the entry(s) of the function and - // the sum of their execution counts (ECs). - std::unordered_map IndexToBB; - std::unordered_set Visited; - std::queue Queue; - for (const BinaryBasicBlock &BB : *Function) { - // Make sure BB.getIndex() is not already in IndexToBB. - assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end()); - IndexToBB[BB.getIndex()] = &BB; - if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) { - Queue.push(BB.getIndex()); - Visited.insert(BB.getIndex()); - } - } - while (!Queue.empty()) { - const unsigned BBIndex = Queue.front(); - const BinaryBasicBlock *BB = IndexToBB[BBIndex]; - Queue.pop(); - auto SuccBIIter = BB->branch_info_begin(); - for (const BinaryBasicBlock *Succ : BB->successors()) { - const uint64_t Count = SuccBIIter->Count; - if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) { - ++SuccBIIter; - continue; - } - if (!Visited.insert(Succ->getIndex()).second) { - ++SuccBIIter; - continue; - } - Queue.push(Succ->getIndex()); - ++SuccBIIter; - } - } - - const size_t NumReachableBBs = Visited.size(); - - // Loop through Visited, and sum the corresponding BBs' execution counts - // (ECs). - size_t SumReachableBBEC = 0; - for (const unsigned BBIndex : Visited) { - const BinaryBasicBlock *BB = IndexToBB[BBIndex]; - SumReachableBBEC += BB->getKnownExecutionCount(); - } - - const size_t NumPosECBBsUnreachableFromEntry = - NumPosECBBs - NumReachableBBs; - const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC; - const double FractionECUnreachable = - (double)SumUnreachableBBEC / SumAllBBEC; - - if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) { - OS << "Non-trivial CFG discontinuity observed in function " - << Function->getPrintName() << "\n"; - LLVM_DEBUG(Function->dump()); - } - - NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry); - SumECUnreachables.push_back(SumUnreachableBBEC); - FractionECUnreachables.push_back(FractionECUnreachable); - } - - if (FractionECUnreachables.empty()) - return; - - std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end()); - const int Rank = int(FractionECUnreachables.size() * 0.95); - OS << format("top 5%% function CFG discontinuity is %.2lf%%\n", - FractionECUnreachables[Rank] * 100); - - if (opts::Verbosity >= 1) { - OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n" - << "distribution of NUM(unreachable POS BBs) among all focal " - "functions\n"; - printDistribution(OS, NumUnreachables); - - OS << "distribution of SUM_EC(unreachable POS BBs) among all focal " - "functions\n"; - printDistribution(OS, SumECUnreachables); - - OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all " - "POS BBs))] among all focal functions\n"; - printDistribution(OS, FractionECUnreachables, /*Fraction=*/true); - } -} - -void printAll(BinaryContext &BC, FunctionListType &ValidFunctions, - size_t NumTopFunctions) { - // Sort the list of functions by execution counts (reverse). - llvm::sort(ValidFunctions, - [&](const BinaryFunction *A, const BinaryFunction *B) { - return A->getKnownExecutionCount() > B->getKnownExecutionCount(); - }); - - const size_t RealNumTopFunctions = - std::min(NumTopFunctions, ValidFunctions.size()); - - iterator_range Functions( - ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions); - - BC.outs() << format("BOLT-INFO: among the hottest %zu functions ", - RealNumTopFunctions); - printCFGContinuityStats(BC.outs(), Functions); - - // Print more detailed bucketed stats if requested. - if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) { - const size_t PerBucketSize = RealNumTopFunctions / 5; - BC.outs() << format( - "Detailed stats for 5 buckets, each with %zu functions:\n", - PerBucketSize); - - // For each bucket, print the CFG continuity stats of the functions in the - // bucket. - for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) { - const size_t StartIndex = BucketIndex * PerBucketSize; - const size_t EndIndex = StartIndex + PerBucketSize; - iterator_range Functions( - ValidFunctions.begin() + StartIndex, - ValidFunctions.begin() + EndIndex); - const size_t MaxFunctionExecutionCount = - ValidFunctions[StartIndex]->getKnownExecutionCount(); - const size_t MinFunctionExecutionCount = - ValidFunctions[EndIndex - 1]->getKnownExecutionCount(); - BC.outs() << format("----------------\n| Bucket %zu: " - "|\n----------------\n", - BucketIndex + 1) - << format( - "execution counts of the %zu functions in the bucket: " - "%zu-%zu\n", - EndIndex - StartIndex, MinFunctionExecutionCount, - MaxFunctionExecutionCount); - printCFGContinuityStats(BC.outs(), Functions); - } - } -} -} // namespace - -bool PrintContinuityStats::shouldOptimize(const BinaryFunction &BF) const { - if (BF.empty() || !BF.hasValidProfile()) - return false; - - return BinaryFunctionPass::shouldOptimize(BF); -} - -Error PrintContinuityStats::runOnFunctions(BinaryContext &BC) { - // Create a list of functions with valid profiles. - FunctionListType ValidFunctions; - for (const auto &BFI : BC.getBinaryFunctions()) { - const BinaryFunction *Function = &BFI.second; - if (PrintContinuityStats::shouldOptimize(*Function)) - ValidFunctions.push_back(Function); - } - if (ValidFunctions.empty() || opts::NumFunctionsForContinuityCheck == 0) - return Error::success(); - - printAll(BC, ValidFunctions, opts::NumFunctionsForContinuityCheck); - return Error::success(); -} diff --git a/bolt/lib/Passes/ProfileQualityStats.cpp b/bolt/lib/Passes/ProfileQualityStats.cpp new file mode 100644 index 0000000000000..78e6412f56ba1 --- /dev/null +++ b/bolt/lib/Passes/ProfileQualityStats.cpp @@ -0,0 +1,579 @@ +//===- bolt/Passes/ProfileQualityStats.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the profile quality stats calculation pass. +// +//===----------------------------------------------------------------------===// + +#include "bolt/Passes/ProfileQualityStats.h" +#include "bolt/Core/BinaryBasicBlock.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Utils/CommandLineOpts.h" +#include "llvm/Support/CommandLine.h" +#include +#include +#include + +using namespace llvm; +using namespace bolt; + +namespace opts { +extern cl::opt Verbosity; +cl::opt TopFunctionsForProfileQualityCheck( + "top-functions-for-profile-quality-check", + cl::desc("number of hottest functions to print aggregated " + "profile quality stats of."), + cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); +cl::opt PercentileForProfileQualityCheck( + "percentile-for-profile-quality-check", + cl::desc("Percentile of profile quality distributions over hottest " + "functions to report."), + cl::init(95), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); +} // namespace opts + +namespace { +using FunctionListType = std::vector; +using function_iterator = FunctionListType::iterator; + +// Function number -> vector of flows for BBs in the function +using TotalFlowMapTy = std::unordered_map>; +// Function number -> flow count +using FunctionFlowMapTy = std::unordered_map; +struct FlowInfo { + TotalFlowMapTy TotalIncomingFlows; + TotalFlowMapTy TotalOutgoingFlows; + TotalFlowMapTy TotalMaxCountMaps; + TotalFlowMapTy TotalMinCountMaps; + FunctionFlowMapTy CallGraphIncomingFlows; +}; + +template +void printDistribution(raw_ostream &OS, std::vector &values, + bool Fraction = false) { + // Assume values are sorted. + if (values.empty()) + return; + + OS << " Length : " << values.size() << "\n"; + + auto printLine = [&](std::string Text, double Percent) { + int Rank = int(values.size() * (100 - Percent) / 100); + if (Percent == 0) + Rank = values.size() - 1; + if (Fraction) + OS << " " << Text << std::string(11 - Text.length(), ' ') << ": " + << formatv("{0:P}", values[Rank]) << "\n"; + else + OS << " " << Text << std::string(11 - Text.length(), ' ') << ": " + << values[Rank] << "\n"; + }; + + printLine("MAX", 0); + const int percentages[] = {1, 5, 10, 20, 50, 80}; + for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) { + printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]); + } + printLine("MIN", 100); +} + +void printCFGContinuityStats(raw_ostream &OS, + iterator_range &Functions) { + // Given a perfect profile, every positive-execution-count BB should be + // connected to an entry of the function through a positive-execution-count + // directed path in the control flow graph. + std::vector NumUnreachables; + std::vector SumECUnreachables; + std::vector FractionECUnreachables; + + for (const BinaryFunction *Function : Functions) { + if (Function->size() <= 1) + continue; + + // Compute the sum of all BB execution counts (ECs). + size_t NumPosECBBs = 0; + size_t SumAllBBEC = 0; + for (const BinaryBasicBlock &BB : *Function) { + const size_t BBEC = BB.getKnownExecutionCount(); + NumPosECBBs += !!BBEC; + SumAllBBEC += BBEC; + } + + // Perform BFS on subgraph of CFG induced by positive weight edges. + // Compute the number of BBs reachable from the entry(s) of the function and + // the sum of their execution counts (ECs). + std::unordered_set Visited; + std::queue Queue; + size_t SumReachableBBEC = 0; + + Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) { + const BinaryBasicBlock *EntryBB = Function->getBasicBlockAtOffset(Offset); + if (!EntryBB || EntryBB->getKnownExecutionCount() == 0) + return true; + Queue.push(EntryBB->getLayoutIndex()); + Visited.insert(EntryBB->getLayoutIndex()); + SumReachableBBEC += EntryBB->getKnownExecutionCount(); + return true; + }); + + const FunctionLayout &Layout = Function->getLayout(); + + while (!Queue.empty()) { + const unsigned BBIndex = Queue.front(); + const BinaryBasicBlock *BB = Layout.getBlock(BBIndex); + Queue.pop(); + for (const auto &[Succ, BI] : + llvm::zip(BB->successors(), BB->branch_info())) { + const uint64_t Count = BI.Count; + if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0 || + !Visited.insert(Succ->getLayoutIndex()).second) + continue; + SumReachableBBEC += Succ->getKnownExecutionCount(); + Queue.push(Succ->getLayoutIndex()); + } + } + + const size_t NumReachableBBs = Visited.size(); + + const size_t NumPosECBBsUnreachableFromEntry = + NumPosECBBs - NumReachableBBs; + const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC; + const double FractionECUnreachable = + (double)SumUnreachableBBEC / SumAllBBEC; + + if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) { + OS << "Non-trivial CFG discontinuity observed in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + + NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry); + SumECUnreachables.push_back(SumUnreachableBBEC); + FractionECUnreachables.push_back(FractionECUnreachable); + } + + if (FractionECUnreachables.empty()) + return; + + llvm::sort(FractionECUnreachables); + const int Rank = int(FractionECUnreachables.size() * + opts::PercentileForProfileQualityCheck / 100); + OS << formatv("function CFG discontinuity {0:P}; ", + FractionECUnreachables[Rank]); + if (opts::Verbosity >= 1) { + OS << "\nabbreviations: EC = execution count, POS BBs = positive EC BBs\n" + << "distribution of NUM(unreachable POS BBs) per function\n"; + llvm::sort(NumUnreachables); + printDistribution(OS, NumUnreachables); + + OS << "distribution of SUM_EC(unreachable POS BBs) per function\n"; + llvm::sort(SumECUnreachables); + printDistribution(OS, SumECUnreachables); + + OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all " + "POS BBs))] per function\n"; + printDistribution(OS, FractionECUnreachables, /*Fraction=*/true); + } +} + +void printCallGraphFlowConservationStats( + raw_ostream &OS, iterator_range &Functions, + FlowInfo &TotalFlowMap) { + std::vector CallGraphGaps; + + for (const BinaryFunction *Function : Functions) { + if (Function->size() <= 1 || !Function->isSimple()) + continue; + + const uint64_t FunctionNum = Function->getFunctionNumber(); + std::vector &IncomingFlows = + TotalFlowMap.TotalIncomingFlows[FunctionNum]; + std::vector &OutgoingFlows = + TotalFlowMap.TotalOutgoingFlows[FunctionNum]; + FunctionFlowMapTy &CallGraphIncomingFlows = + TotalFlowMap.CallGraphIncomingFlows; + + // Only consider functions that are not a program entry. + if (CallGraphIncomingFlows.find(FunctionNum) != + CallGraphIncomingFlows.end()) { + uint64_t EntryInflow = 0; + uint64_t EntryOutflow = 0; + uint32_t NumConsideredEntryBlocks = 0; + + Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) { + const BinaryBasicBlock *EntryBB = + Function->getBasicBlockAtOffset(Offset); + if (!EntryBB || EntryBB->succ_size() == 0) + return true; + NumConsideredEntryBlocks++; + EntryInflow += IncomingFlows[EntryBB->getLayoutIndex()]; + EntryOutflow += OutgoingFlows[EntryBB->getLayoutIndex()]; + return true; + }); + + uint64_t NetEntryOutflow = 0; + if (EntryOutflow < EntryInflow) { + if (opts::Verbosity >= 2) { + // We expect entry blocks' CFG outflow >= inflow, i.e., it has a + // non-negative net outflow. If this is not the case, then raise a + // warning if requested. + OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow " + "in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + } else { + NetEntryOutflow = EntryOutflow - EntryInflow; + } + if (NumConsideredEntryBlocks > 0) { + const uint64_t CallGraphInflow = + TotalFlowMap.CallGraphIncomingFlows[Function->getFunctionNumber()]; + const uint64_t Min = std::min(NetEntryOutflow, CallGraphInflow); + const uint64_t Max = std::max(NetEntryOutflow, CallGraphInflow); + const double CallGraphGap = 1 - (double)Min / Max; + + if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) { + OS << "Nontrivial call graph gap of size " + << formatv("{0:P}", CallGraphGap) << " observed in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + + CallGraphGaps.push_back(CallGraphGap); + } + } + } + + if (CallGraphGaps.empty()) + return; + + llvm::sort(CallGraphGaps); + const int Rank = + int(CallGraphGaps.size() * opts::PercentileForProfileQualityCheck / 100); + OS << formatv("call graph flow conservation gap {0:P}; ", + CallGraphGaps[Rank]); + if (opts::Verbosity >= 1) { + OS << "\ndistribution of function entry flow conservation gaps\n"; + printDistribution(OS, CallGraphGaps, /*Fraction=*/true); + } +} + +void printCFGFlowConservationStats(raw_ostream &OS, + iterator_range &Functions, + FlowInfo &TotalFlowMap) { + std::vector CFGGapsWeightedAvg; + std::vector CFGGapsWorst; + std::vector CFGGapsWorstAbs; + // We only consider blocks with execution counts > MinBlockCount when + // reporting the distribution of worst gaps. + const uint16_t MinBlockCount = 500; + for (const BinaryFunction *Function : Functions) { + if (Function->size() <= 1 || !Function->isSimple()) + continue; + + const uint64_t FunctionNum = Function->getFunctionNumber(); + std::vector &MaxCountMaps = + TotalFlowMap.TotalMaxCountMaps[FunctionNum]; + std::vector &MinCountMaps = + TotalFlowMap.TotalMinCountMaps[FunctionNum]; + double WeightedGapSum = 0.0; + double WeightSum = 0.0; + double WorstGap = 0.0; + uint64_t WorstGapAbs = 0; + BinaryBasicBlock *BBWorstGap = nullptr; + BinaryBasicBlock *BBWorstGapAbs = nullptr; + for (BinaryBasicBlock &BB : *Function) { + // We don't consider function entry or exit blocks for CFG flow + // conservation + if (BB.isEntryPoint() || BB.succ_size() == 0) + continue; + + const uint64_t Max = MaxCountMaps[BB.getLayoutIndex()]; + const uint64_t Min = MinCountMaps[BB.getLayoutIndex()]; + const double Gap = 1 - (double)Min / Max; + double Weight = BB.getKnownExecutionCount() * BB.getNumNonPseudos(); + if (Weight == 0) + continue; + // We use log to prevent the stats from being dominated by extremely hot + // blocks + Weight = log(Weight); + WeightedGapSum += Gap * Weight; + WeightSum += Weight; + if (BB.getKnownExecutionCount() > MinBlockCount && Gap > WorstGap) { + WorstGap = Gap; + BBWorstGap = &BB; + } + if (BB.getKnownExecutionCount() > MinBlockCount && + Max - Min > WorstGapAbs) { + WorstGapAbs = Max - Min; + BBWorstGapAbs = &BB; + } + } + if (WeightSum > 0) { + const double WeightedGap = WeightedGapSum / WeightSum; + if (opts::Verbosity >= 2 && (WeightedGap >= 0.1 || WorstGap >= 0.9)) { + OS << "Nontrivial CFG gap observed in function " + << Function->getPrintName() << "\n" + << "Weighted gap: " << formatv("{0:P}", WeightedGap) << "\n"; + if (BBWorstGap) + OS << "Worst gap: " << formatv("{0:P}", WorstGap) + << " at BB with input offset: 0x" + << Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n"; + if (BBWorstGapAbs) + OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with " + << "input offset 0x" + << Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + + CFGGapsWeightedAvg.push_back(WeightedGap); + CFGGapsWorst.push_back(WorstGap); + CFGGapsWorstAbs.push_back(WorstGapAbs); + } + } + + if (CFGGapsWeightedAvg.empty()) + return; + llvm::sort(CFGGapsWeightedAvg); + const int RankWA = int(CFGGapsWeightedAvg.size() * + opts::PercentileForProfileQualityCheck / 100); + llvm::sort(CFGGapsWorst); + const int RankW = + int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100); + OS << formatv("CFG flow conservation gap {0:P} (weighted) {1:P} (worst)\n", + CFGGapsWeightedAvg[RankWA], CFGGapsWorst[RankW]); + if (opts::Verbosity >= 1) { + OS << "distribution of weighted CFG flow conservation gaps\n"; + printDistribution(OS, CFGGapsWeightedAvg, /*Fraction=*/true); + OS << format("Consider only blocks with execution counts > %zu:\n", + MinBlockCount) + << "distribution of worst block flow conservation gap per " + "function \n"; + printDistribution(OS, CFGGapsWorst, /*Fraction=*/true); + OS << "distribution of worst block flow conservation gap (absolute " + "value) per function\n"; + llvm::sort(CFGGapsWorstAbs); + printDistribution(OS, CFGGapsWorstAbs, /*Fraction=*/false); + } +} + +void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) { + // Increment block inflow and outflow with CFG jump counts. + TotalFlowMapTy &TotalIncomingFlows = TotalFlowMap.TotalIncomingFlows; + TotalFlowMapTy &TotalOutgoingFlows = TotalFlowMap.TotalOutgoingFlows; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + std::vector &IncomingFlows = + TotalIncomingFlows[Function->getFunctionNumber()]; + std::vector &OutgoingFlows = + TotalOutgoingFlows[Function->getFunctionNumber()]; + const uint64_t NumBlocks = Function->size(); + IncomingFlows.resize(NumBlocks, 0); + OutgoingFlows.resize(NumBlocks, 0); + if (Function->empty() || !Function->hasValidProfile()) + continue; + for (const BinaryBasicBlock &BB : *Function) { + uint64_t TotalOutgoing = 0ULL; + for (const auto &[Succ, BI] : + llvm::zip(BB.successors(), BB.branch_info())) { + const uint64_t Count = BI.Count; + if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) + continue; + TotalOutgoing += Count; + IncomingFlows[Succ->getLayoutIndex()] += Count; + } + OutgoingFlows[BB.getLayoutIndex()] = TotalOutgoing; + } + } + // Initialize TotalMaxCountMaps and TotalMinCountMaps using + // TotalIncomingFlows and TotalOutgoingFlows + TotalFlowMapTy &TotalMaxCountMaps = TotalFlowMap.TotalMaxCountMaps; + TotalFlowMapTy &TotalMinCountMaps = TotalFlowMap.TotalMinCountMaps; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + uint64_t FunctionNum = Function->getFunctionNumber(); + std::vector &IncomingFlows = TotalIncomingFlows[FunctionNum]; + std::vector &OutgoingFlows = TotalOutgoingFlows[FunctionNum]; + std::vector &MaxCountMap = TotalMaxCountMaps[FunctionNum]; + std::vector &MinCountMap = TotalMinCountMaps[FunctionNum]; + const uint64_t NumBlocks = Function->size(); + MaxCountMap.resize(NumBlocks, 0); + MinCountMap.resize(NumBlocks, 0); + if (Function->empty() || !Function->hasValidProfile()) + continue; + for (const BinaryBasicBlock &BB : *Function) { + uint64_t BBNum = BB.getLayoutIndex(); + MaxCountMap[BBNum] = std::max(IncomingFlows[BBNum], OutgoingFlows[BBNum]); + MinCountMap[BBNum] = std::min(IncomingFlows[BBNum], OutgoingFlows[BBNum]); + } + } + + // Modify TotalMaxCountMaps and TotalMinCountMaps using call counts and + // fill out CallGraphIncomingFlows + FunctionFlowMapTy &CallGraphIncomingFlows = + TotalFlowMap.CallGraphIncomingFlows; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + uint64_t FunctionNum = Function->getFunctionNumber(); + std::vector &MaxCountMap = TotalMaxCountMaps[FunctionNum]; + std::vector &MinCountMap = TotalMinCountMaps[FunctionNum]; + + // Update MaxCountMap, MinCountMap, and CallGraphIncomingFlows + auto recordCall = [&](const BinaryBasicBlock *SourceBB, + const MCSymbol *DestSymbol, uint64_t Count, + uint64_t TotalCallCount) { + if (Count == BinaryBasicBlock::COUNT_NO_PROFILE) + Count = 0; + const BinaryFunction *DstFunc = + DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr; + if (DstFunc) + CallGraphIncomingFlows[DstFunc->getFunctionNumber()] += Count; + if (SourceBB) { + unsigned BlockIndex = SourceBB->getLayoutIndex(); + MaxCountMap[BlockIndex] = + std::max(MaxCountMap[BlockIndex], TotalCallCount); + MinCountMap[BlockIndex] = + std::min(MinCountMap[BlockIndex], TotalCallCount); + } + }; + + // Get pairs of (symbol, count) for each target at this callsite. + // If the call is to an unknown function the symbol will be nullptr. + // If there is no profiling data the count will be COUNT_NO_PROFILE. + using TargetDesc = std::pair; + using CallInfoTy = std::vector; + auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) { + CallInfoTy Counts; + const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst); + + if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) { + for (const auto &CSI : BC.MIB->getAnnotationAs( + Inst, "CallProfile")) + if (CSI.Symbol) + Counts.emplace_back(CSI.Symbol, CSI.Count); + } else { + const uint64_t Count = BB->getExecutionCount(); + Counts.emplace_back(DstSym, Count); + } + + return Counts; + }; + + // If the function has an invalid profile, try to use the perf data + // directly. The call EC is only used to update CallGraphIncomingFlows. + if (!Function->hasValidProfile() && !Function->getAllCallSites().empty()) { + for (const IndirectCallProfile &CSI : Function->getAllCallSites()) + if (CSI.Symbol) + recordCall(nullptr, CSI.Symbol, CSI.Count, CSI.Count); + continue; + } else { + // If the function has a valid profile + for (const BinaryBasicBlock &BB : *Function) { + for (const MCInst &Inst : BB) { + if (!BC.MIB->isCall(Inst)) + continue; + // Find call instructions and extract target symbols from each + // one. + const CallInfoTy CallInfo = getCallInfo(&BB, Inst); + // We need the total call count to update MaxCountMap and + // MinCountMap in recordCall for indirect calls + uint64_t TotalCallCount = 0; + for (const TargetDesc &CI : CallInfo) + TotalCallCount += CI.second; + for (const TargetDesc &CI : CallInfo) + recordCall(&BB, CI.first, CI.second, TotalCallCount); + } + } + } + } +} + +void printAll(BinaryContext &BC, FunctionListType &ValidFunctions, + size_t NumTopFunctions) { + // Sort the list of functions by execution counts (reverse). + llvm::sort(ValidFunctions, + [&](const BinaryFunction *A, const BinaryFunction *B) { + return A->getKnownExecutionCount() > B->getKnownExecutionCount(); + }); + + const size_t RealNumTopFunctions = + std::min(NumTopFunctions, ValidFunctions.size()); + + iterator_range Functions( + ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions); + + FlowInfo TotalFlowMap; + computeFlowMappings(BC, TotalFlowMap); + + BC.outs() << format("BOLT-INFO: profile quality metrics for the hottest %zu " + "functions (reporting top %zu%% values): ", + RealNumTopFunctions, + 100 - opts::PercentileForProfileQualityCheck); + printCFGContinuityStats(BC.outs(), Functions); + printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + + // Print more detailed bucketed stats if requested. + if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) { + const size_t PerBucketSize = RealNumTopFunctions / 5; + BC.outs() << format( + "Detailed stats for 5 buckets, each with %zu functions:\n", + PerBucketSize); + + // For each bucket, print the CFG continuity stats of the functions in + // the bucket. + for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) { + const size_t StartIndex = BucketIndex * PerBucketSize; + const size_t EndIndex = StartIndex + PerBucketSize; + iterator_range Functions( + ValidFunctions.begin() + StartIndex, + ValidFunctions.begin() + EndIndex); + const size_t MaxFunctionExecutionCount = + ValidFunctions[StartIndex]->getKnownExecutionCount(); + const size_t MinFunctionExecutionCount = + ValidFunctions[EndIndex - 1]->getKnownExecutionCount(); + BC.outs() << format("----------------\n| Bucket %zu: " + "|\n----------------\n", + BucketIndex + 1) + << format( + "execution counts of the %zu functions in the bucket: " + "%zu-%zu\n", + EndIndex - StartIndex, MinFunctionExecutionCount, + MaxFunctionExecutionCount); + printCFGContinuityStats(BC.outs(), Functions); + printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + } + } +} +} // namespace + +bool PrintProfileQualityStats::shouldOptimize(const BinaryFunction &BF) const { + if (BF.empty() || !BF.hasValidProfile()) + return false; + + return BinaryFunctionPass::shouldOptimize(BF); +} + +Error PrintProfileQualityStats::runOnFunctions(BinaryContext &BC) { + // Create a list of functions with valid profiles. + FunctionListType ValidFunctions; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + if (PrintProfileQualityStats::shouldOptimize(*Function)) + ValidFunctions.push_back(Function); + } + if (ValidFunctions.empty() || opts::TopFunctionsForProfileQualityCheck == 0) + return Error::success(); + + printAll(BC, ValidFunctions, opts::TopFunctionsForProfileQualityCheck); + return Error::success(); +} diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 2d851c751ae10..dd48653931eb9 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -12,7 +12,6 @@ #include "bolt/Passes/AllocCombiner.h" #include "bolt/Passes/AsmDump.h" #include "bolt/Passes/CMOVConversion.h" -#include "bolt/Passes/ContinuityStats.h" #include "bolt/Passes/FixRISCVCallsPass.h" #include "bolt/Passes/FixRelaxationPass.h" #include "bolt/Passes/FrameOptimizer.h" @@ -27,6 +26,7 @@ #include "bolt/Passes/MCF.h" #include "bolt/Passes/PLTCall.h" #include "bolt/Passes/PatchEntries.h" +#include "bolt/Passes/ProfileQualityStats.h" #include "bolt/Passes/RegReAssign.h" #include "bolt/Passes/ReorderData.h" #include "bolt/Passes/ReorderFunctions.h" @@ -379,7 +379,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { if (opts::PrintProfileStats) Manager.registerPass(std::make_unique(NeverPrint)); - Manager.registerPass(std::make_unique(NeverPrint)); + Manager.registerPass(std::make_unique(NeverPrint)); Manager.registerPass(std::make_unique(NeverPrint)); diff --git a/bolt/test/X86/cfg-discontinuity-reporting.test b/bolt/test/X86/cfg-discontinuity-reporting.test deleted file mode 100644 index 4d7d3305cdb75..0000000000000 --- a/bolt/test/X86/cfg-discontinuity-reporting.test +++ /dev/null @@ -1,4 +0,0 @@ -## Check profile discontinuity reporting -RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe -RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s -CHECK: among the hottest 5 functions top 5% function CFG discontinuity is 100.00% diff --git a/bolt/test/X86/profile-quality-reporting.test b/bolt/test/X86/profile-quality-reporting.test new file mode 100644 index 0000000000000..2e15a6b245afa --- /dev/null +++ b/bolt/test/X86/profile-quality-reporting.test @@ -0,0 +1,4 @@ +## Check profile quality stats reporting +RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe +RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s +CHECK: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst) From bb6118faa61601aef6ae6b840985cbb83ece9658 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 28 Feb 2025 16:12:33 +0000 Subject: [PATCH 108/123] [gn build] Port 7e33bebe7c8c --- llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn index 19d2b5d27c33d..285f41ee70203 100644 --- a/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn @@ -19,7 +19,6 @@ static_library("Passes") { "BinaryPasses.cpp", "CMOVConversion.cpp", "CacheMetrics.cpp", - "ContinuityStats.cpp", "DataflowAnalysis.cpp", "DataflowInfoManager.cpp", "FixRISCVCallsPass.cpp", @@ -41,6 +40,7 @@ static_library("Passes") { "PLTCall.cpp", "PatchEntries.cpp", "PettisAndHansen.cpp", + "ProfileQualityStats.cpp", "RegAnalysis.cpp", "RegReAssign.cpp", "ReorderAlgorithm.cpp", From b8ca8d60b9d176ba25dd40cf49d7459cf71d3c9b Mon Sep 17 00:00:00 2001 From: Michael Flanders Date: Fri, 28 Feb 2025 10:42:26 -0600 Subject: [PATCH 109/123] [analyzer] Do list initialization for CXXNewExpr with initializer list arg (#127702) Fixes #116444. Closed #127700 because I accidentally updated it in github UI. ### Current vs expected behavior Previously, the result of a `CXXNewExpr` was not always list initialized when using an initializer list. In this example: ``` struct S { int x; }; void F() { S *s = new S{1}; delete s; } ``` there would be a binding of `s` to `compoundVal{1}`, but this isn't used during later field binding lookup. After this PR, there is instead a binding of `s->x` to `1`. This is the cause of #116444 since the field binding lookup returns undefined in some cases currently. ### Changes This PR swaps around the handling of typed value regions (seems to be the usual region type when doing non-CXX-new-expr list initialization) and symbolic regions (the result of the CXX new expr), so that symbolic regions also get list initialized. In the below snippet, it swaps the order of the two conditionals. https://github.com/llvm/llvm-project/blob/8529bd7b964cc9fafe8fece84f7bd12dacb09560/clang/lib/StaticAnalyzer/Core/RegionStore.cpp#L2426-L2448 ### Followup work This PR only makes CSA do list init for `CXXNewExpr`s. After this, I would like to make some changes to `RegionStoreMananger::bind` in how it handles list initialization generally. I've added some straightforward test cases here for the `new` expr with a list initializer. I started adding some more before realizing that the current general (not just `new` expr) list initialization could be changed to handle more cases like list initialization of unions and arrays (like https://github.com/llvm/llvm-project/issues/54910). Lmk if it is preferred to then leave these test cases out for now. --- clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 18 +- clang/test/Analysis/initializer.cpp | 218 ++++++++++++++++++ clang/test/Analysis/new-user-defined.cpp | 30 +++ 3 files changed, 257 insertions(+), 9 deletions(-) create mode 100644 clang/test/Analysis/new-user-defined.cpp diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 550a276c66c71..79cb5a07701fd 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -2533,6 +2533,15 @@ RegionStoreManager::bind(LimitedRegionBindingsConstRef B, Loc L, SVal V) { const MemRegion *R = MemRegVal->getRegion(); + // Binding directly to a symbolic region should be treated as binding + // to element 0. + if (const auto *SymReg = dyn_cast(R)) { + QualType Ty = SymReg->getPointeeStaticType(); + if (Ty->isVoidType()) + Ty = StateMgr.getContext().CharTy; + R = GetElementZeroRegion(SymReg, Ty); + } + // Check if the region is a struct region. if (const TypedValueRegion* TR = dyn_cast(R)) { QualType Ty = TR->getValueType(); @@ -2546,15 +2555,6 @@ RegionStoreManager::bind(LimitedRegionBindingsConstRef B, Loc L, SVal V) { return bindAggregate(B, TR, V); } - // Binding directly to a symbolic region should be treated as binding - // to element 0. - if (const auto *SymReg = dyn_cast(R)) { - QualType Ty = SymReg->getPointeeStaticType(); - if (Ty->isVoidType()) - Ty = StateMgr.getContext().CharTy; - R = GetElementZeroRegion(SymReg, Ty); - } - assert((!isa(R) || !B.lookup(R)) && "'this' pointer is not an l-value and is not assignable"); diff --git a/clang/test/Analysis/initializer.cpp b/clang/test/Analysis/initializer.cpp index f50afff25d245..713e121168571 100644 --- a/clang/test/Analysis/initializer.cpp +++ b/clang/test/Analysis/initializer.cpp @@ -254,6 +254,224 @@ void foo() { } } // namespace CXX17_aggregate_construction +namespace newexpr_init_list_initialization { +template +void escape(FirstT first, Rest... args); + +struct S { + int foo; + int bar; +}; +void none_designated() { + S *s = new S{13,1}; + clang_analyzer_eval(13 == s->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(1 == s->bar); // expected-warning{{TRUE}} + delete s; +} +void none_designated_swapped() { + S *s = new S{1,13}; + clang_analyzer_eval(1 == s->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(13 == s->bar); // expected-warning{{TRUE}} + delete s; +} +void one_designated_one_not() { + S *s = new S{ 1, .bar = 13 }; + clang_analyzer_eval(1 == s->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(13 == s->bar); // expected-warning{{TRUE}} + delete s; +} +void all_designated() { + S *s = new S{ + .foo = 13, + .bar = 1, + }; + clang_analyzer_eval(13 == s->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(1 == s->bar); // expected-warning{{TRUE}} + delete s; +} +void non_designated_array_of_aggr_struct() { + S *s = new S[2] { {1, 2}, {3, 4} }; + clang_analyzer_eval(1 == s[0].foo); // expected-warning{{TRUE}} + clang_analyzer_eval(2 == s[0].bar); // expected-warning{{TRUE}} + clang_analyzer_eval(3 == s[1].foo); // expected-warning{{TRUE}} + clang_analyzer_eval(4 == s[1].bar); // expected-warning{{TRUE}} + delete[] s; +} + +struct WithGaps { + int foo; + int bar; + int baz; +}; +void out_of_order_designated_initializers_with_gaps() { + WithGaps *s = new WithGaps{ + .foo = 13, + .baz = 1, + }; + clang_analyzer_eval(13 == s->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == s->bar); // expected-warning{{TRUE}} + clang_analyzer_eval(1 == s->baz); // expected-warning{{TRUE}} + delete s; +} + +// https://eel.is/c++draft/dcl.init.aggr#note-6: +// Static data members, non-static data members of anonymous +// union members, and unnamed bit-fields are not considered +// elements of the aggregate. +struct NonConsideredFields { + int i; + static int s; + int j; + int :17; + int k; +}; +void considered_fields_initd() { + auto S = new NonConsideredFields { 1, 2, 3 }; + clang_analyzer_eval(1 == S->i); // expected-warning{{TRUE}} + clang_analyzer_eval(2 == S->j); // expected-warning{{TRUE}} + clang_analyzer_eval(3 == S->k); // expected-warning{{TRUE}} + delete S; +} + +#if __cplusplus >= 201703L +enum Enum : int { +}; +void list_init_enum() { + Enum *E = new Enum{53}; + clang_analyzer_eval(53 == *E); // expected-warning{{TRUE}} + delete E; +} +#endif // __cplusplus >= 201703L + +class PubClass { +public: + int foo; + int bar; +}; +void public_class_designated_initializers() { + S *s = new S{ + .foo = 13, + .bar = 1, + }; + clang_analyzer_eval(13 == s->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(1 == s->bar); // expected-warning{{TRUE}} + delete s; +} + +union UnionTestTy { + int x; + char y; +}; +void new_expr_aggr_init_union_no_designator() { + UnionTestTy *u = new UnionTestTy{}; + clang_analyzer_eval(0 == u->x); // expected-warning{{UNKNOWN}} FIXME: should be TRUE + clang_analyzer_eval(u->y); // expected-warning{{UNKNOWN}} FIXME: should be undefined, warning + delete u; +} +void new_expr_aggr_init_union_designated_first_field() { + UnionTestTy *u = new UnionTestTy{ .x = 14 }; + clang_analyzer_eval(14 == u->x); // expected-warning{{UNKNOWN}} FIXME: should be TRUE + clang_analyzer_eval(u->y); // expected-warning{{UNKNOWN}} FIXME: should be undefined, warning + delete u; +} +void new_expr_aggr_init_union_designated_non_first_field() { + UnionTestTy *u = new UnionTestTy{ .y = 3 }; + clang_analyzer_eval(3 == u->y); // expected-warning{{UNKNOWN}} FIXME: should be TRUE + clang_analyzer_eval(u->x); // expected-warning{{UNKNOWN}} FIXME: should be undefined, warning + delete u; +} + +union UnionTestTyWithDefaultMemberInit { + int x; + char y = 14; +}; +void union_with_default_member_init_empty_init_list() { + auto U = new UnionTestTyWithDefaultMemberInit{}; + // clang_analyzer_eval(14 == U->y); // FIXME: Should be true + clang_analyzer_eval(U->x); // expected-warning{{UNKNOWN}} FIXME: should be undefined, warning + delete U; +} + +struct Inner { + int bar; +}; +struct Nested { + int foo; + Inner inner; + int baz; +}; +void nested_aggregates() { + auto N = new Nested{}; + clang_analyzer_eval(0 == N->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N->baz); // expected-warning{{TRUE}} + + auto N1 = new Nested{1}; + clang_analyzer_eval(1 == N1->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N1->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N1->baz); // expected-warning{{TRUE}} + + auto N2 = new Nested{.baz = 14}; + clang_analyzer_eval(0 == N2->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N2->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(14 == N2->baz); // expected-warning{{TRUE}} + + auto N3 = new Nested{1,2,3}; + clang_analyzer_eval(1 == N3->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(2 == N3->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(3 == N3->baz); // expected-warning{{TRUE}} + + auto N4 = new Nested{1, {}, 3}; + clang_analyzer_eval(1 == N4->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N4->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(3 == N4->baz); // expected-warning{{TRUE}} + + auto N5 = new Nested{{},{},{}}; + clang_analyzer_eval(0 == N5->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N5->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(0 == N5->baz); // expected-warning{{TRUE}} + + auto N6 = new Nested{1, {.bar = 2}, 3}; + clang_analyzer_eval(1 == N6->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(2 == N6->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(3 == N6->baz); // expected-warning{{TRUE}} + + auto N7 = new Nested{1, {2}, 3}; + clang_analyzer_eval(1 == N7->foo); // expected-warning{{TRUE}} + clang_analyzer_eval(2 == N7->inner.bar); // expected-warning{{TRUE}} + clang_analyzer_eval(3 == N7->baz); // expected-warning{{TRUE}} + + escape(N,N1,N2,N3,N4,N5,N6,N7); +} +} // namespace newexpr_init_list_initialization + +namespace placement_new_initializer_list_arg { +struct S { + int x; +}; +void aggregate_struct() { + S s; + S *s_ptr = new (&s) S{1}; + clang_analyzer_eval(1 == s_ptr->x); // expected-warning{{TRUE}} + + S vi; + S *vi_ptr = new (&vi) S{}; + clang_analyzer_eval(0 == vi_ptr->x); // expected-warning{{TRUE}} + + S di; + S *di_ptr = new (&di) S; + int z = di_ptr->x + 1; // expected-warning{{The left operand of '+' is a garbage value}} +} +void initialize_non_zeroth_element(S arr[2]) { + S *s = new (&arr[1]) S{1}; + clang_analyzer_eval(1 == s->x); // expected-warning{{TRUE}} +} +void initialize_non_zeroth_argument_pointers(S *arr[2]) { + arr[1] = new (arr[1]) S{1}; + clang_analyzer_eval(1 == arr[1]->x); // expected-warning{{TRUE}} +} +} // namespace placement_new_initializer_list_arg + namespace CXX17_transparent_init_list_exprs { class A {}; diff --git a/clang/test/Analysis/new-user-defined.cpp b/clang/test/Analysis/new-user-defined.cpp new file mode 100644 index 0000000000000..8987ac078bf2c --- /dev/null +++ b/clang/test/Analysis/new-user-defined.cpp @@ -0,0 +1,30 @@ +// RUN: %clang_analyze_cc1 -verify %s\ +// RUN: -analyzer-checker=core,debug.ExprInspection + +void clang_analyzer_eval(bool); + +using size_t = decltype(sizeof(int)); + +template +void escape(FirstT first, Rest... args); + +namespace CustomClassType { +struct S { + int x; + static void* operator new(size_t size) { + return ::operator new(size); + } +}; +void F() { + S *s = new S; + clang_analyzer_eval(s->x); // expected-warning{{UNKNOWN}} FIXME: should be an undefined warning + + S *s2 = new S{}; + clang_analyzer_eval(0 == s2->x); // expected-warning{{TRUE}} + + S *s3 = new S{1}; + clang_analyzer_eval(1 == s3->x); // expected-warning{{TRUE}} + + escape(s, s2, s3); +} +} // namespace CustomClassType From 4e4d77c8f8b1c3363d59c8b9f2b64bf545160fce Mon Sep 17 00:00:00 2001 From: Tristan Ross Date: Fri, 28 Feb 2025 08:43:33 -0800 Subject: [PATCH 110/123] [libc] Add UEFI headers (#127126) Originated from #120687 This PR simply adds the necessary headers for UEFI which defines all the necessary types. This PR unlocks the ability to work on other PR's for UEFI support. --- libc/include/CMakeLists.txt | 13 + libc/include/Uefi.h.def | 16 ++ libc/include/Uefi.yaml | 15 ++ libc/include/llvm-libc-macros/CMakeLists.txt | 6 + libc/include/llvm-libc-macros/EFIAPI-macros.h | 18 ++ libc/include/llvm-libc-types/CMakeLists.txt | 124 +++++++++ .../llvm-libc-types/EFI_ALLOCATE_TYPE.h | 19 ++ .../llvm-libc-types/EFI_BOOT_SERVICES.h | 250 ++++++++++++++++++ libc/include/llvm-libc-types/EFI_CAPSULE.h | 26 ++ .../llvm-libc-types/EFI_CONFIGURATION_TABLE.h | 19 ++ .../EFI_DEVICE_PATH_PROTOCOL.h | 23 ++ libc/include/llvm-libc-types/EFI_EVENT.h | 21 ++ libc/include/llvm-libc-types/EFI_GUID.h | 21 ++ libc/include/llvm-libc-types/EFI_HANDLE.h | 14 + .../llvm-libc-types/EFI_INTERFACE_TYPE.h | 16 ++ .../llvm-libc-types/EFI_LOCATE_SEARCH_TYPE.h | 18 ++ .../llvm-libc-types/EFI_MEMORY_DESCRIPTOR.h | 43 +++ .../include/llvm-libc-types/EFI_MEMORY_TYPE.h | 32 +++ .../EFI_OPEN_PROTOCOL_INFORMATION_ENTRY.h | 22 ++ .../llvm-libc-types/EFI_PHYSICAL_ADDRESS.h | 16 ++ .../llvm-libc-types/EFI_RUNTIME_SERVICES.h | 137 ++++++++++ .../EFI_SIMPLE_TEXT_INPUT_PROTOCOL.h | 39 +++ .../EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.h | 64 +++++ libc/include/llvm-libc-types/EFI_STATUS.h | 16 ++ .../llvm-libc-types/EFI_SYSTEM_TABLE.h | 65 +++++ .../llvm-libc-types/EFI_TABLE_HEADER.h | 22 ++ libc/include/llvm-libc-types/EFI_TIME.h | 37 +++ .../include/llvm-libc-types/EFI_TIMER_DELAY.h | 18 ++ libc/include/llvm-libc-types/EFI_TPL.h | 21 ++ .../llvm-libc-types/EFI_VIRTUAL_ADDRESS.h | 16 ++ 30 files changed, 1167 insertions(+) create mode 100644 libc/include/Uefi.h.def create mode 100644 libc/include/Uefi.yaml create mode 100644 libc/include/llvm-libc-macros/EFIAPI-macros.h create mode 100644 libc/include/llvm-libc-types/EFI_ALLOCATE_TYPE.h create mode 100644 libc/include/llvm-libc-types/EFI_BOOT_SERVICES.h create mode 100644 libc/include/llvm-libc-types/EFI_CAPSULE.h create mode 100644 libc/include/llvm-libc-types/EFI_CONFIGURATION_TABLE.h create mode 100644 libc/include/llvm-libc-types/EFI_DEVICE_PATH_PROTOCOL.h create mode 100644 libc/include/llvm-libc-types/EFI_EVENT.h create mode 100644 libc/include/llvm-libc-types/EFI_GUID.h create mode 100644 libc/include/llvm-libc-types/EFI_HANDLE.h create mode 100644 libc/include/llvm-libc-types/EFI_INTERFACE_TYPE.h create mode 100644 libc/include/llvm-libc-types/EFI_LOCATE_SEARCH_TYPE.h create mode 100644 libc/include/llvm-libc-types/EFI_MEMORY_DESCRIPTOR.h create mode 100644 libc/include/llvm-libc-types/EFI_MEMORY_TYPE.h create mode 100644 libc/include/llvm-libc-types/EFI_OPEN_PROTOCOL_INFORMATION_ENTRY.h create mode 100644 libc/include/llvm-libc-types/EFI_PHYSICAL_ADDRESS.h create mode 100644 libc/include/llvm-libc-types/EFI_RUNTIME_SERVICES.h create mode 100644 libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_INPUT_PROTOCOL.h create mode 100644 libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.h create mode 100644 libc/include/llvm-libc-types/EFI_STATUS.h create mode 100644 libc/include/llvm-libc-types/EFI_SYSTEM_TABLE.h create mode 100644 libc/include/llvm-libc-types/EFI_TABLE_HEADER.h create mode 100644 libc/include/llvm-libc-types/EFI_TIME.h create mode 100644 libc/include/llvm-libc-types/EFI_TIMER_DELAY.h create mode 100644 libc/include/llvm-libc-types/EFI_TPL.h create mode 100644 libc/include/llvm-libc-types/EFI_VIRTUAL_ADDRESS.h diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 867bd1e5ee20f..41f6d3b67c95b 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -724,6 +724,19 @@ add_header_macro( .llvm-libc-macros.poll-macros ) +# UEFI spec references "Uefi.h" so we use that name for compatibility +add_header_macro( + uefi + ../libc/include/Uefi.yaml + Uefi.h.def + Uefi.h + DEPENDS + .llvm_libc_common_h + .llvm-libc-types.EFI_GUID + .llvm-libc-types.EFI_STATUS + .llvm-libc-types.EFI_SYSTEM_TABLE +) + if(NOT LLVM_LIBC_FULL_BUILD) # We don't install headers in non-fullbuild mode. return() diff --git a/libc/include/Uefi.h.def b/libc/include/Uefi.h.def new file mode 100644 index 0000000000000..6655e13579cd8 --- /dev/null +++ b/libc/include/Uefi.h.def @@ -0,0 +1,16 @@ +//===-- UEFI header uefi.h --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_UEFI_H +#define LLVM_LIBC_UEFI_H + +#include "__llvm-libc-common.h" + +%%public_api() + +#endif // LLVM_LIBC_UEFI_H diff --git a/libc/include/Uefi.yaml b/libc/include/Uefi.yaml new file mode 100644 index 0000000000000..28582eb2524b1 --- /dev/null +++ b/libc/include/Uefi.yaml @@ -0,0 +1,15 @@ +header: Uefi.h +standards: UEFI +macros: [] +types: + - type_name: EFI_BOOT_SERVICES + - type_name: EFI_GUID + - type_name: EFI_STATUS + - type_name: EFI_SYSTEM_TABLE +enums: [] +functions: [] +objects: + - object_name: efi_system_table + object_type: EFI_SYSTEM_TABLE * + - object_name: efi_image_handle + object_type: EFI_HANDLE diff --git a/libc/include/llvm-libc-macros/CMakeLists.txt b/libc/include/llvm-libc-macros/CMakeLists.txt index 8c1f7387f3b4d..7f10e773479a3 100644 --- a/libc/include/llvm-libc-macros/CMakeLists.txt +++ b/libc/include/llvm-libc-macros/CMakeLists.txt @@ -337,3 +337,9 @@ add_macro_header( HDR poll-macros.h ) + +add_macro_header( + EFIAPI_macros + HDR + EFIAPI-macros.h +) diff --git a/libc/include/llvm-libc-macros/EFIAPI-macros.h b/libc/include/llvm-libc-macros/EFIAPI-macros.h new file mode 100644 index 0000000000000..cb854928d0ab7 --- /dev/null +++ b/libc/include/llvm-libc-macros/EFIAPI-macros.h @@ -0,0 +1,18 @@ +//===-- Definition of EFIAPI macro ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_MACROS_EFIAPI_MACROS_H +#define LLVM_LIBC_MACROS_EFIAPI_MACROS_H + +#if defined(__x86_64__) && !defined(__ILP32__) +#define EFIAPI __attribute__((ms_abi)) +#else +#define EFIAPI +#endif + +#endif // LLVM_LIBC_MACROS_EFIAPI_MACROS_H diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt index 7ed69ab1af6d9..58761ac97d7cf 100644 --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -157,3 +157,127 @@ DEPENDS add_header(locale_t HDR locale_t.h) add_header(struct_lconv HDR struct_lconv.h) add_header(stdfix-types HDR stdfix-types.h) + +# UEFI +add_header(EFI_GUID HDR EFI_GUID.h DEPENDS libc.include.llvm-libc-macros.stdint_macros) +add_header(EFI_CONFIGURATION_TABLE HDR EFI_CONFIGURATION_TABLE.h DEPENDS .EFI_GUID) + +add_header(EFI_PHYSICAL_ADDRESS HDR EFI_PHYSICAL_ADDRESS.h DEPENDS libc.include.llvm-libc-macros.stdint_macros) +add_header(EFI_VIRTUAL_ADDRESS HDR EFI_VIRTUAL_ADDRESS.h DEPENDS libc.include.llvm-libc-macros.stdint_macros) + +add_header(EFI_MEMORY_DESCRIPTOR + HDR + EFI_MEMORY_DESCRIPTOR.h + DEPENDS + libc.include.llvm-libc-macros.stdint_macros + .EFI_PHYSICAL_ADDRESS + .EFI_VIRTUAL_ADDRESS +) + +add_header(EFI_ALLOCATE_TYPE HDR EFI_ALLOCATE_TYPE.h) +add_header(EFI_EVENT HDR EFI_EVENT.h) +add_header(EFI_INTERFACE_TYPE HDR EFI_INTERFACE_TYPE.h) +add_header(EFI_LOCATE_SEARCH_TYPE HDR EFI_LOCATE_SEARCH_TYPE.h) +add_header(EFI_MEMORY_TYPE HDR EFI_MEMORY_TYPE.h) +add_header(EFI_HANDLE HDR EFI_HANDLE.h) +add_header(EFI_TIME HDR EFI_TIME.h DEPENDS libc.include.llvm-libc-macros.stdint_macros) +add_header(EFI_TIMER_DELAY HDR EFI_TIMER_DELAY.h) +add_header(EFI_TPL HDR EFI_TPL.h DEPENDS .size_t) +add_header(EFI_STATUS HDR EFI_STATUS.h DEPENDS .size_t) + +add_header(EFI_OPEN_PROTOCOL_INFORMATION_ENTRY + HDR + EFI_OPEN_PROTOCOL_INFORMATION_ENTRY.h + DEPENDS + libc.include.llvm-libc-macros.stdint_macros + .EFI_HANDLE +) + +add_header(EFI_CAPSULE + HDR + EFI_CAPSULE.h + DEPENDS + libc.include.llvm-libc-macros.stdint_macros + .EFI_GUID +) + +add_header(EFI_TABLE_HEADER + HDR + EFI_TABLE_HEADER.h + DEPENDS + libc.include.llvm-libc-macros.stdint_macros +) + +add_header(EFI_DEVICE_PATH_PROTOCOL + HDR + EFI_DEVICE_PATH_PROTOCOL.h + DEPENDS + libc.include.llvm-libc-macros.stdint_macros +) + +add_header(EFI_SIMPLE_TEXT_INPUT_PROTOCOL + HDR + EFI_SIMPLE_TEXT_INPUT_PROTOCOL.h + DEPENDS + libc.include.llvm-libc-macros.EFIAPI_macros + libc.include.llvm-libc-macros.stdint_macros + .EFI_EVENT + .EFI_STATUS + .char16_t +) + +add_header(EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL + HDR + EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.h + DEPENDS + libc.include.llvm-libc-macros.stdint_macros + .EFI_STATUS + .size_t +) + +add_header(EFI_BOOT_SERVICES + HDR + EFI_BOOT_SERVICES.h + DEPENDS + libc.include.llvm-libc-macros.EFIAPI_macros + .EFI_ALLOCATE_TYPE + .EFI_DEVICE_PATH_PROTOCOL + .EFI_EVENT + .EFI_INTERFACE_TYPE + .EFI_LOCATE_SEARCH_TYPE + .EFI_MEMORY_DESCRIPTOR + .EFI_MEMORY_TYPE + .EFI_OPEN_PROTOCOL_INFORMATION_ENTRY + .EFI_PHYSICAL_ADDRESS + .EFI_STATUS + .EFI_TABLE_HEADER + .EFI_TIMER_DELAY + .EFI_TPL + .char16_t +) + +add_header(EFI_RUNTIME_SERVICES + HDR + EFI_RUNTIME_SERVICES.h + DEPENDS + .EFI_CAPSULE + .EFI_STATUS + .EFI_TABLE_HEADER + .EFI_TIME + .char16_t +) + +add_header(EFI_SYSTEM_TABLE + HDR + EFI_SYSTEM_TABLE.h + DEPENDS + .EFI_BOOT_SERVICES + .EFI_CONFIGURATION_TABLE + .EFI_HANDLE + .EFI_RUNTIME_SERVICES + .EFI_SIMPLE_TEXT_INPUT_PROTOCOL + .EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL + .EFI_STATUS + .EFI_TABLE_HEADER + .char16_t +) diff --git a/libc/include/llvm-libc-types/EFI_ALLOCATE_TYPE.h b/libc/include/llvm-libc-types/EFI_ALLOCATE_TYPE.h new file mode 100644 index 0000000000000..90f23969678f4 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_ALLOCATE_TYPE.h @@ -0,0 +1,19 @@ +//===-- Definition of EFI_ALLOCATE_TYPE type ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_ALLOCATE_TYPE_H +#define LLVM_LIBC_TYPES_EFI_ALLOCATE_TYPE_H + +typedef enum { + AllocateAnyPages, + AllocateMaxAddress, + AllocateAddress, + MaxAllocateType +} EFI_ALLOCATE_TYPE; + +#endif // LLVM_LIBC_TYPES_EFI_ALLOCATE_TYPE_H diff --git a/libc/include/llvm-libc-types/EFI_BOOT_SERVICES.h b/libc/include/llvm-libc-types/EFI_BOOT_SERVICES.h new file mode 100644 index 0000000000000..8b7a6aadd7a24 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_BOOT_SERVICES.h @@ -0,0 +1,250 @@ +//===-- Definition of EFI_BOOT_SERVICES type ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_BOOT_SERVICES_H +#define LLVM_LIBC_TYPES_EFI_BOOT_SERVICES_H + +#include "../llvm-libc-macros/EFIAPI-macros.h" +#include "EFI_ALLOCATE_TYPE.h" +#include "EFI_DEVICE_PATH_PROTOCOL.h" +#include "EFI_EVENT.h" +#include "EFI_GUID.h" +#include "EFI_INTERFACE_TYPE.h" +#include "EFI_LOCATE_SEARCH_TYPE.h" +#include "EFI_MEMORY_DESCRIPTOR.h" +#include "EFI_MEMORY_TYPE.h" +#include "EFI_OPEN_PROTOCOL_INFORMATION_ENTRY.h" +#include "EFI_PHYSICAL_ADDRESS.h" +#include "EFI_STATUS.h" +#include "EFI_TABLE_HEADER.h" +#include "EFI_TIMER_DELAY.h" +#include "EFI_TPL.h" +#include "char16_t.h" +#include "size_t.h" + +#define EFI_BOOT_SERVICES_SIGNATURE 0x56524553544f4f42 +#define EFI_BOOT_SERVICES_REVISION EFI_SPECIFICATION_VERSION + +typedef EFI_TPL(EFIAPI *EFI_RAISE_TPL)(EFI_TPL NewTpl); +typedef void(EFIAPI *EFI_RESTORE_TPL)(EFI_TPL OldTpl); + +typedef EFI_STATUS(EFIAPI *EFI_ALLOCATE_PAGES)(EFI_ALLOCATE_TYPE Type, + EFI_MEMORY_TYPE MemoryType, + size_t Pages, + EFI_PHYSICAL_ADDRESS *Memory); +typedef EFI_STATUS(EFIAPI *EFI_FREE_PAGES)(EFI_PHYSICAL_ADDRESS Memory, + size_t Pages); +typedef EFI_STATUS(EFIAPI *EFI_GET_MEMORY_MAP)(size_t *MemoryMapSize, + EFI_MEMORY_DESCRIPTOR *MemoryMap, + size_t *MapKey, + size_t *DescriptorSize, + uint32_t *DescriptorVersion); + +typedef EFI_STATUS(EFIAPI *EFI_ALLOCATE_POOL)(EFI_MEMORY_TYPE PoolType, + size_t Size, void **Buffer); +typedef EFI_STATUS(EFIAPI *EFI_FREE_POOL)(void *Buffer); + +typedef void(EFIAPI *EFI_EVENT_NOTIFY)(EFI_EVENT Event, void *Context); + +typedef EFI_STATUS(EFIAPI *EFI_CREATE_EVENT)(uint32_t Type, EFI_TPL NotifyTpl, + EFI_EVENT_NOTIFY NotifyFunction, + void *NotifyContext, + EFI_EVENT *Event); +typedef EFI_STATUS(EFIAPI *EFI_SET_TIMER)(EFI_EVENT Event, EFI_TIMER_DELAY Type, + uint64_t TriggerTime); +typedef EFI_STATUS(EFIAPI *EFI_WAIT_FOR_EVENT)(size_t NumberOfEvents, + EFI_EVENT *Event, size_t *Index); +typedef EFI_STATUS(EFIAPI *EFI_SIGNAL_EVENT)(EFI_EVENT Event); +typedef EFI_STATUS(EFIAPI *EFI_CLOSE_EVENT)(EFI_EVENT Event); +typedef EFI_STATUS(EFIAPI *EFI_CHECK_EVENT)(EFI_EVENT Event); + +typedef EFI_STATUS(EFIAPI *EFI_INSTALL_PROTOCOL_INTERFACE)( + EFI_HANDLE *Handle, EFI_GUID *Protocol, EFI_INTERFACE_TYPE InterfaceType, + void *Interface); +typedef EFI_STATUS(EFIAPI *EFI_REINSTALL_PROTOCOL_INTERFACE)( + EFI_HANDLE Handle, EFI_GUID *Protocol, void *OldInterface, + void *NewInterface); +typedef EFI_STATUS(EFIAPI *EFI_UNINSTALL_PROTOCOL_INTERFACE)(EFI_HANDLE Handle, + EFI_GUID *Protocol, + void *Interface); + +typedef EFI_STATUS(EFIAPI *EFI_HANDLE_PROTOCOL)(EFI_HANDLE Handle, + EFI_GUID *Protocol, + void **Interface); +typedef EFI_STATUS(EFIAPI *EFI_REGISTER_PROTOCOL_NOTIFY)(EFI_GUID *Protocol, + EFI_EVENT Event, + void **Registration); + +typedef EFI_STATUS(EFIAPI *EFI_LOCATE_HANDLE)(EFI_LOCATE_SEARCH_TYPE SearchType, + EFI_GUID *Protocol, + void *SearchKey, + size_t *BufferSize, + EFI_HANDLE *Buffer); +typedef EFI_STATUS(EFIAPI *EFI_LOCATE_DEVICE_PATH)( + EFI_GUID *Protocol, EFI_DEVICE_PATH_PROTOCOL **DevicePath, + EFI_HANDLE *Device); + +typedef EFI_STATUS(EFIAPI *EFI_INSTALL_CONFIGURATION_TABLE)(EFI_GUID *Guid, + void *Table); +typedef EFI_STATUS(EFIAPI *EFI_IMAGE_UNLOAD)(EFI_HANDLE ImageHandle); +typedef EFI_STATUS(EFIAPI *EFI_IMAGE_START)(EFI_HANDLE ImageHandle, + size_t *ExitDataSize, + char16_t **ExitData); + +typedef EFI_STATUS(EFIAPI *EFI_EXIT)(EFI_HANDLE ImageHandle, + EFI_STATUS ExitStatus, size_t ExitDataSize, + char16_t *ExitData); +typedef EFI_STATUS(EFIAPI *EFI_EXIT_BOOT_SERVICES)(EFI_HANDLE ImageHandle, + size_t MapKey); +typedef EFI_STATUS(EFIAPI *EFI_GET_NEXT_MONOTONIC_COUNT)(uint64_t *Count); +typedef EFI_STATUS(EFIAPI *EFI_STALL)(size_t Microseconds); +typedef EFI_STATUS(EFIAPI *EFI_SET_WATCHDOG_TIMER)(size_t Timeout, + uint64_t WatchdogCode, + size_t DataSize, + char16_t *WatchdogData); + +typedef EFI_STATUS(EFIAPI *EFI_CONNECT_CONTROLLER)( + EFI_HANDLE ControllerHandle, EFI_HANDLE *DriverImageHandle, + EFI_DEVICE_PATH_PROTOCOL *RemainingDevicePath, bool Recursive); + +typedef EFI_STATUS(EFIAPI *EFI_DISCONNECT_CONTROLLER)( + EFI_HANDLE ControllerHandle, EFI_HANDLE DriverImageHandle, + EFI_HANDLE ChildHandle); + +typedef EFI_STATUS(EFIAPI *EFI_OPEN_PROTOCOL)( + EFI_HANDLE Handle, EFI_GUID *Protocol, void **Interface, + EFI_HANDLE AgentHandle, EFI_HANDLE ControllerHandle, uint32_t Attributes); + +typedef EFI_STATUS(EFIAPI *EFI_CLOSE_PROTOCOL)(EFI_HANDLE Handle, + EFI_GUID *Protocol, + EFI_HANDLE AgentHandle, + EFI_HANDLE ControllerHandle); + +typedef EFI_STATUS(EFIAPI *EFI_OPEN_PROTOCOL_INFORMATION)( + EFI_HANDLE Handle, EFI_GUID *Protocol, + EFI_OPEN_PROTOCOL_INFORMATION_ENTRY **EntryBuffer, size_t *EntryCount); + +typedef EFI_STATUS(EFIAPI *EFI_PROTOCOLS_PER_HANDLE)( + EFI_HANDLE Handle, EFI_GUID ***ProtocolBuffer, size_t *ProtocolBufferCount); + +typedef EFI_STATUS(EFIAPI *EFI_LOCATE_HANDLE_BUFFER)( + EFI_LOCATE_SEARCH_TYPE SearchType, EFI_GUID *Protocol, void *SearchKey, + size_t *NoHandles, EFI_HANDLE **Buffer); + +typedef EFI_STATUS(EFIAPI *EFI_LOCATE_PROTOCOL)(EFI_GUID *Protocol, + void *Registration, + void **Interface); + +typedef EFI_STATUS(EFIAPI *EFI_UNINSTALL_MULTIPLE_PROTOCOL_INTERFACES)( + EFI_HANDLE Handle, ...); +typedef EFI_STATUS(EFIAPI *EFI_CALCULATE_CRC32)(void *Data, size_t DataSize, + uint32_t *Crc32); + +typedef void(EFIAPI *EFI_COPY_MEM)(void *Destination, void *Source, + size_t Length); +typedef void(EFIAPI *EFI_SET_MEM)(void *Buffer, size_t Size, uint8_t Value); + +typedef EFI_STATUS(EFIAPI *EFI_CREATE_EVENT_EX)( + uint32_t Type, EFI_TPL NotifyTpl, EFI_EVENT_NOTIFY NotifyFunction, + const void *NotifyContext, const EFI_GUID *EventGroup, EFI_EVENT *Event); + +typedef struct { + EFI_TABLE_HEADER Hdr; + + // + // Task Priority Services + // + EFI_RAISE_TPL RaiseTPL; // EFI 1.0+ + EFI_RESTORE_TPL RestoreTPL; // EFI 1.0+ + + // + // Memory Services + // + EFI_ALLOCATE_PAGES AllocatePages; // EFI 1.0+ + EFI_FREE_PAGES FreePages; // EFI 1.0+ + EFI_GET_MEMORY_MAP GetMemoryMap; // EFI 1.0+ + EFI_ALLOCATE_POOL AllocatePool; // EFI 1.0+ + EFI_FREE_POOL FreePool; // EFI 1.0+ + + // + // Event & Timer Services + // + EFI_CREATE_EVENT CreateEvent; // EFI 1.0+ + EFI_SET_TIMER SetTimer; // EFI 1.0+ + EFI_WAIT_FOR_EVENT WaitForEvent; // EFI 1.0+ + EFI_SIGNAL_EVENT SignalEvent; // EFI 1.0+ + EFI_CLOSE_EVENT CloseEvent; // EFI 1.0+ + EFI_CHECK_EVENT CheckEvent; // EFI 1.0+ + + // + // Protocol Handler Services + // + EFI_INSTALL_PROTOCOL_INTERFACE InstallProtocolInterface; // EFI 1.0+ + EFI_REINSTALL_PROTOCOL_INTERFACE ReinstallProtocolInterface; // EFI 1.0+ + EFI_UNINSTALL_PROTOCOL_INTERFACE UninstallProtocolInterface; // EFI 1.0+ + EFI_HANDLE_PROTOCOL HandleProtocol; // EFI 1.0+ + void *Reserved; // EFI 1.0+ + EFI_REGISTER_PROTOCOL_NOTIFY RegisterProtocolNotify; // EFI 1.0+ + EFI_LOCATE_HANDLE LocateHandle; // EFI 1.+ + EFI_LOCATE_DEVICE_PATH LocateDevicePath; // EFI 1.0+ + EFI_INSTALL_CONFIGURATION_TABLE InstallConfigurationTable; // EFI 1.0+ + + // + // Image Services + // + EFI_IMAGE_UNLOAD LoadImage; // EFI 1.0+ + EFI_IMAGE_START StartImage; // EFI 1.0+ + EFI_EXIT Exit; // EFI 1.0+ + EFI_IMAGE_UNLOAD UnloadImage; // EFI 1.0+ + EFI_EXIT_BOOT_SERVICES ExitBootServices; // EFI 1.0+ + + // + // Miscellaneous Services + // + EFI_GET_NEXT_MONOTONIC_COUNT GetNextMonotonicCount; // EFI 1.0+ + EFI_STALL Stall; // EFI 1.0+ + EFI_SET_WATCHDOG_TIMER SetWatchdogTimer; // EFI 1.0+ + + // + // DriverSupport Services + // + EFI_CONNECT_CONTROLLER ConnectController; // EFI 1.1 + EFI_DISCONNECT_CONTROLLER DisconnectController; // EFI 1.1+ + + // + // Open and Close Protocol Services + // + EFI_OPEN_PROTOCOL OpenProtocol; // EFI 1.1+ + EFI_CLOSE_PROTOCOL CloseProtocol; // EFI 1.1+ + EFI_OPEN_PROTOCOL_INFORMATION OpenProtocolInformation; // EFI 1.1+ + + // + // Library Services + // + EFI_PROTOCOLS_PER_HANDLE ProtocolsPerHandle; // EFI 1.1+ + EFI_LOCATE_HANDLE_BUFFER LocateHandleBuffer; // EFI 1.1+ + EFI_LOCATE_PROTOCOL LocateProtocol; // EFI 1.1+ + EFI_UNINSTALL_MULTIPLE_PROTOCOL_INTERFACES + InstallMultipleProtocolInterfaces; // EFI 1.1+ + EFI_UNINSTALL_MULTIPLE_PROTOCOL_INTERFACES + UninstallMultipleProtocolInterfaces; // EFI 1.1+* + + // + // 32-bit CRC Services + // + EFI_CALCULATE_CRC32 CalculateCrc32; // EFI 1.1+ + + // + // Miscellaneous Services + // + EFI_COPY_MEM CopyMem; // EFI 1.1+ + EFI_SET_MEM SetMem; // EFI 1.1+ + EFI_CREATE_EVENT_EX CreateEventEx; // UEFI 2.0+ +} EFI_BOOT_SERVICES; + +#endif // LLVM_LIBC_TYPES_EFI_BOOT_SERVICES_H diff --git a/libc/include/llvm-libc-types/EFI_CAPSULE.h b/libc/include/llvm-libc-types/EFI_CAPSULE.h new file mode 100644 index 0000000000000..c7440c9b03b75 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_CAPSULE.h @@ -0,0 +1,26 @@ +//===-- Definition of EFI_CAPSULE type ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_CAPSULE_H +#define LLVM_LIBC_TYPES_EFI_CAPSULE_H + +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_GUID.h" + +typedef struct { + EFI_GUID CapsuleGuid; + uint32_t HeaderSize; + uint32_t Flags; + uint32_t CapsuleImageSize; +} EFI_CAPSULE_HEADER; + +#define CAPSULE_FLAGS_PERSIST_ACROSS_RESET 0x00010000 +#define CAPSULE_FLAGS_POPULATE_SYSTEM_TABLE 0x00020000 +#define CAPSULE_FLAGS_INITIATE_RESET 0x00040000 + +#endif // LLVM_LIBC_TYPES_EFI_CAPSULE_H diff --git a/libc/include/llvm-libc-types/EFI_CONFIGURATION_TABLE.h b/libc/include/llvm-libc-types/EFI_CONFIGURATION_TABLE.h new file mode 100644 index 0000000000000..56cd3e4fbb587 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_CONFIGURATION_TABLE.h @@ -0,0 +1,19 @@ +//===-- Definition of EFI_CONFIGURATION_TABLE type ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_CONFIGURATION_TABLE_H +#define LLVM_LIBC_TYPES_EFI_CONFIGURATION_TABLE_H + +#include "EFI_GUID.h" + +typedef struct { + EFI_GUID VendorGuid; + void *VendorTable; +} EFI_CONFIGURATION_TABLE; + +#endif // LLVM_LIBC_TYPES_EFI_CONFIGURATION_TABLE_H diff --git a/libc/include/llvm-libc-types/EFI_DEVICE_PATH_PROTOCOL.h b/libc/include/llvm-libc-types/EFI_DEVICE_PATH_PROTOCOL.h new file mode 100644 index 0000000000000..f6a0b2e1f45c0 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_DEVICE_PATH_PROTOCOL.h @@ -0,0 +1,23 @@ +//===-- Definition of EFI_DEVICE_PATH_PROTOCOL type -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_DEVICE_PATH_PROTOCOL_H +#define LLVM_LIBC_TYPES_EFI_DEVICE_PATH_PROTOCOL_H + +#include "../llvm-libc-macros/stdint-macros.h" + +#define EFI_DEVICE_PATH_PROTOCOL_GUID \ + {0x09576e91, 0x6d3f, 0x11d2, {0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b}} + +typedef struct _EFI_DEVICE_PATH_PROTOCOL { + uint8_t Type; + uint8_t SubType; + uint8_t Length[2]; +} EFI_DEVICE_PATH_PROTOCOL; + +#endif // LLVM_LIBC_TYPES_EFI_DEVICE_PATH_PROTOCOL_H diff --git a/libc/include/llvm-libc-types/EFI_EVENT.h b/libc/include/llvm-libc-types/EFI_EVENT.h new file mode 100644 index 0000000000000..938856b8e791e --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_EVENT.h @@ -0,0 +1,21 @@ +//===-- Definition of EFI_EVENT type --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_EVENT_H +#define LLVM_LIBC_TYPES_EFI_EVENT_H + +typedef void *EFI_EVENT; + +#define EVT_TIMER 0x80000000 +#define EVT_RUNTIME 0x40000000 +#define EVT_NOTIFY_WAIT 0x00000100 +#define EVT_NOTIFY_SIGNAL 0x00000200 +#define EVT_SIGNAL_EXIT_BOOT_SERVICES 0x00000201 +#define EVT_SIGNAL_VIRTUAL_ADDRESS_CHANGE 0x60000202 + +#endif // LLVM_LIBC_TYPES_EFI_EVENT_H diff --git a/libc/include/llvm-libc-types/EFI_GUID.h b/libc/include/llvm-libc-types/EFI_GUID.h new file mode 100644 index 0000000000000..b3530008384dd --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_GUID.h @@ -0,0 +1,21 @@ +//===-- Definition of EFI_GUID type -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_GUID_H +#define LLVM_LIBC_TYPES_EFI_GUID_H + +#include "../llvm-libc-macros/stdint-macros.h" + +typedef struct { + uint32_t Data1; + uint16_t Data2; + uint16_t Data3; + uint8_t Data4[8]; +} EFI_GUID; + +#endif // LLVM_LIBC_TYPES_EFI_GUID_H diff --git a/libc/include/llvm-libc-types/EFI_HANDLE.h b/libc/include/llvm-libc-types/EFI_HANDLE.h new file mode 100644 index 0000000000000..d4376dd247533 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_HANDLE.h @@ -0,0 +1,14 @@ +//===-- Definition of EFI_HANDLE type ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_HANDLE_H +#define LLVM_LIBC_TYPES_EFI_HANDLE_H + +typedef void *EFI_HANDLE; + +#endif // LLVM_LIBC_TYPES_EFI_HANDLE_H diff --git a/libc/include/llvm-libc-types/EFI_INTERFACE_TYPE.h b/libc/include/llvm-libc-types/EFI_INTERFACE_TYPE.h new file mode 100644 index 0000000000000..d463c5381b3f0 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_INTERFACE_TYPE.h @@ -0,0 +1,16 @@ +//===-- Definition of EFI_INTERFACE_TYPE type -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_INTERFACE_TYPE_H +#define LLVM_LIBC_TYPES_EFI_INTERFACE_TYPE_H + +typedef enum { + EFI_NATIVE_INTERFACE, +} EFI_INTERFACE_TYPE; + +#endif // LLVM_LIBC_TYPES_EFI_INTERFACE_TYPE_H diff --git a/libc/include/llvm-libc-types/EFI_LOCATE_SEARCH_TYPE.h b/libc/include/llvm-libc-types/EFI_LOCATE_SEARCH_TYPE.h new file mode 100644 index 0000000000000..3a8fd7bc3e776 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_LOCATE_SEARCH_TYPE.h @@ -0,0 +1,18 @@ +//===-- Definition of EFI_LOCATE_SEARCH_TYPE type -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_LOCATE_SEARCH_TYPE_H +#define LLVM_LIBC_TYPES_EFI_LOCATE_SEARCH_TYPE_H + +typedef enum { + AllHandles, + ByRegisterNotify, + ByProtocol, +} EFI_LOCATE_SEARCH_TYPE; + +#endif // LLVM_LIBC_TYPES_EFI_LOCATE_SEARCH_TYPE_H diff --git a/libc/include/llvm-libc-types/EFI_MEMORY_DESCRIPTOR.h b/libc/include/llvm-libc-types/EFI_MEMORY_DESCRIPTOR.h new file mode 100644 index 0000000000000..72d0579aef76c --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_MEMORY_DESCRIPTOR.h @@ -0,0 +1,43 @@ +//===-- Definition of EFI_MEMORY_DESCRIPTOR type --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_MEMORY_DESCRIPTOR_H +#define LLVM_LIBC_TYPES_EFI_MEMORY_DESCRIPTOR_H + +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_PHYSICAL_ADDRESS.h" +#include "EFI_VIRTUAL_ADDRESS.h" + +#define EFI_MEMORY_DESCRIPTOR_VERSION 1 + +#define EFI_MEMORY_UC 0x0000000000000001 +#define EFI_MEMORY_WC 0x0000000000000002 +#define EFI_MEMORY_WT 0x0000000000000004 +#define EFI_MEMORY_WB 0x0000000000000008 +#define EFI_MEMORY_UCE 0x0000000000000010 +#define EFI_MEMORY_WP 0x0000000000001000 +#define EFI_MEMORY_RP 0x0000000000002000 +#define EFI_MEMORY_XP 0x0000000000004000 +#define EFI_MEMORY_NV 0x0000000000008000 +#define EFI_MEMORY_MORE_RELIABLE 0x0000000000010000 +#define EFI_MEMORY_RO 0x0000000000020000 +#define EFI_MEMORY_SP 0x0000000000040000 +#define EFI_MEMORY_CPU_CRYPTO 0x0000000000080000 +#define EFI_MEMORY_RUNTIME 0x8000000000000000 +#define EFI_MEMORY_ISA_VALID 0x4000000000000000 +#define EFI_MEMORY_ISA_MASK 0x0FFFF00000000000 + +typedef struct { + uint32_t Type; + EFI_PHYSICAL_ADDRESS PhysicalStart; + EFI_VIRTUAL_ADDRESS VirtualStart; + uint64_t NumberOfPages; + uint64_t Attribute; +} EFI_MEMORY_DESCRIPTOR; + +#endif // LLVM_LIBC_TYPES_EFI_MEMORY_DESCRIPTOR_H diff --git a/libc/include/llvm-libc-types/EFI_MEMORY_TYPE.h b/libc/include/llvm-libc-types/EFI_MEMORY_TYPE.h new file mode 100644 index 0000000000000..c8921cda2c388 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_MEMORY_TYPE.h @@ -0,0 +1,32 @@ +//===-- Definition of EFI_MEMORY_TYPE type --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_MEMORY_TYPE_H +#define LLVM_LIBC_TYPES_EFI_MEMORY_TYPE_H + +typedef enum { + EfiReservedMemoryType, + EfiLoaderCode, + EfiLoaderData, + EfiBootServicesCode, + EfiBootServicesData, + EfiRuntimeServicesCode, + EfiRuntimeServicesData, + EfiConventionalMemory, + EfiUnusableMemory, + EfiACPIReclaimMemory, + EfiACPIMemoryNVS, + EfiMemoryMappedIO, + EfiMemoryMappedIOPortSpace, + EfiPalCode, + EfiPersistentMemory, + EfiUnacceptedMemoryType, + EfiMaxMemoryType +} EFI_MEMORY_TYPE; + +#endif // LLVM_LIBC_TYPES_EFI_MEMORY_TYPE_H diff --git a/libc/include/llvm-libc-types/EFI_OPEN_PROTOCOL_INFORMATION_ENTRY.h b/libc/include/llvm-libc-types/EFI_OPEN_PROTOCOL_INFORMATION_ENTRY.h new file mode 100644 index 0000000000000..de0c59c139efb --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_OPEN_PROTOCOL_INFORMATION_ENTRY.h @@ -0,0 +1,22 @@ +//===-- Definition of EFI_OPEN_PROTOCOL_INFORMATION_ENTRY type ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_OPEN_PROTOCOL_INFORMATION_ENTRY_H +#define LLVM_LIBC_TYPES_EFI_OPEN_PROTOCOL_INFORMATION_ENTRY_H + +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_HANDLE.h" + +typedef struct { + EFI_HANDLE AgentHandle; + EFI_HANDLE ControllerHandle; + uint32_t Attributes; + uint32_t OpenCount; +} EFI_OPEN_PROTOCOL_INFORMATION_ENTRY; + +#endif // LLVM_LIBC_TYPES_EFI_OPEN_PROTOCOL_INFORMATION_ENTRY_H diff --git a/libc/include/llvm-libc-types/EFI_PHYSICAL_ADDRESS.h b/libc/include/llvm-libc-types/EFI_PHYSICAL_ADDRESS.h new file mode 100644 index 0000000000000..8880ee66c0f8d --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_PHYSICAL_ADDRESS.h @@ -0,0 +1,16 @@ +//===-- Definition of EFI_PHYSICAL_ADDRESS type ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_PHYSICAL_ADDRESS_H +#define LLVM_LIBC_TYPES_EFI_PHYSICAL_ADDRESS_H + +#include "../llvm-libc-macros/stdint-macros.h" + +typedef uint64_t EFI_PHYSICAL_ADDRESS; + +#endif // LLVM_LIBC_TYPES_EFI_PHYSICAL_ADDRESS_H diff --git a/libc/include/llvm-libc-types/EFI_RUNTIME_SERVICES.h b/libc/include/llvm-libc-types/EFI_RUNTIME_SERVICES.h new file mode 100644 index 0000000000000..8913118b0844c --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_RUNTIME_SERVICES.h @@ -0,0 +1,137 @@ +//===-- Definition of EFI_RUNTIME_SERVICES type ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_RUNTIME_SERVICES_H +#define LLVM_LIBC_TYPES_EFI_RUNTIME_SERVICES_H + +#include "../llvm-libc-macros/EFIAPI-macros.h" +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_CAPSULE.h" +#include "EFI_MEMORY_DESCRIPTOR.h" +#include "EFI_PHYSICAL_ADDRESS.h" +#include "EFI_STATUS.h" +#include "EFI_TABLE_HEADER.h" +#include "EFI_TIME.h" +#include "char16_t.h" +#include "size_t.h" + +#define EFI_RUNTIME_SERVICES_SIGNATURE 0x56524553544e5552 +#define EFI_RUNTIME_SERVICES_REVISION EFI_SPECIFICATION_VERSION + +#define EFI_VARIABLE_NON_VOLATILE 0x00000001 +#define EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002 +#define EFI_VARIABLE_RUNTIME_ACCESS 0x00000004 +#define EFI_VARIABLE_HARDWARE_ERROR_RECORD 0x00000008 +// This attribute is identified by the mnemonic 'HR' elsewhere +// in this specification. +#define EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS 0x00000010 +// NOTE: EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS is deprecated +// and should be considered reserved. +#define EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS 0x00000020 +#define EFI_VARIABLE_APPEND_WRITE 0x00000040 +#define EFI_VARIABLE_ENHANCED_AUTHENTICATED_ACCESS 0x00000080 + +typedef enum { + EfiResetCold, + EfiResetWarm, + EfiResetShutdown, + EfiResetPlatformSpecific, +} EFI_RESET_TYPE; + +#define EFI_VARIABLE_AUTHENTICATION_3_CERT_ID_SHA256 1 + +typedef struct { + uint8_t Type; + uint32_t IdSize; + // Value is defined as: + // uint8_t Id[IdSize]; +} EFI_VARIABLE_AUTHENTICATION_3_CERT_ID; + +typedef EFI_STATUS(EFIAPI *EFI_GET_TIME)(EFI_TIME *Time, + EFI_TIME_CAPABILITIES *Capabilities); +typedef EFI_STATUS(EFIAPI *EFI_SET_TIME)(EFI_TIME *Time); +typedef EFI_STATUS(EFIAPI *EFI_GET_WAKEUP_TIME)(bool *Enabled, bool *Pending, + EFI_TIME *Time); +typedef EFI_STATUS(EFIAPI *EFI_SET_WAKEUP_TIME)(bool *Enabled, EFI_TIME *Time); + +typedef EFI_STATUS(EFIAPI *EFI_SET_VIRTUAL_ADDRESS_MAP)( + size_t MemoryMapSize, size_t DescriptorSize, uint32_t DescriptorVersion, + EFI_MEMORY_DESCRIPTOR *VirtualMap); +typedef EFI_STATUS(EFIAPI *EFI_CONVERT_POINTER)(size_t DebugDisposition, + void **Address); + +typedef EFI_STATUS(EFIAPI *EFI_GET_VARIABLE)(char16_t *VariableName, + EFI_GUID *VendorGuid, + uint32_t *Attributes, + size_t *DataSize, void *Data); +typedef EFI_STATUS(EFIAPI *EFI_GET_NEXT_VARIABLE_NAME)(size_t *VariableNameSize, + char16_t *VariableName, + EFI_GUID *VendorGuid); +typedef EFI_STATUS(EFIAPI *EFI_SET_VARIABLE)(char16_t *VariableName, + EFI_GUID *VendorGuid, + uint32_t Attributes, + size_t DataSize, void *Data); + +typedef EFI_STATUS(EFIAPI *EFI_GET_NEXT_HIGH_MONO_COUNT)(uint32_t *HighCount); +typedef void(EFIAPI *EFI_RESET_SYSTEM)(EFI_RESET_TYPE ResetType, + EFI_STATUS ResetStatus, size_t DataSize, + void *ResetData); + +typedef EFI_STATUS(EFIAPI *EFI_UPDATE_CAPSULE)( + EFI_CAPSULE_HEADER **CapsuleHeaderArray, size_t CapsuleCount, + EFI_PHYSICAL_ADDRESS ScatterGatherList); +typedef EFI_STATUS(EFIAPI *EFI_QUERY_CAPSULE_CAPABILITIES)( + EFI_CAPSULE_HEADER **CapsuleHeaderArray, size_t CapsuleCount, + uint64_t *MaximumCapsuleSize, EFI_RESET_TYPE ResetType); + +typedef EFI_STATUS(EFIAPI *EFI_QUERY_VARIABLE_INFO)( + uint32_t Attributes, uint64_t *MaximumVariableStorageSize, + uint64_t *RemainingVariableStorageSize, uint64_t *MaximumVariableSize); + +typedef struct { + EFI_TABLE_HEADER Hdr; + + /// + /// Time Services + EFI_GET_TIME GetTime; + EFI_SET_TIME SetTime; + EFI_GET_WAKEUP_TIME GetWakeupTime; + EFI_SET_WAKEUP_TIME SetWakeupTime; + + // + // Virtual Memory Services + // + EFI_SET_VIRTUAL_ADDRESS_MAP SetVirtualAddressMap; + EFI_CONVERT_POINTER ConvertPointer; + + // + // Variable Services + // + EFI_GET_VARIABLE GetVariable; + EFI_GET_NEXT_VARIABLE_NAME GetNextVariableName; + EFI_SET_VARIABLE SetVariable; + + // + // Miscellaneous Services + // + EFI_GET_NEXT_HIGH_MONO_COUNT GetNextHighMonotonicCount; + EFI_RESET_SYSTEM ResetSystem; + + // + // UEFI 2.0 Capsule Services + // + EFI_UPDATE_CAPSULE UpdateCapsule; + EFI_QUERY_CAPSULE_CAPABILITIES QueryCapsuleCapabilities; + + // + // Miscellaneous UEFI 2.0 Service + // + EFI_QUERY_VARIABLE_INFO QueryVariableInfo; +} EFI_RUNTIME_SERVICES; + +#endif // LLVM_LIBC_TYPES_EFI_RUNTIME_SERVICES_H diff --git a/libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_INPUT_PROTOCOL.h b/libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_INPUT_PROTOCOL.h new file mode 100644 index 0000000000000..a6dc0952b6310 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_INPUT_PROTOCOL.h @@ -0,0 +1,39 @@ +//===-- Definition of EFI_SIMPLE_TEXT_INPUT_PROTOCOL type -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_SIMPLE_TEXT_INPUT_PROTOCOL_H +#define LLVM_LIBC_TYPES_EFI_SIMPLE_TEXT_INPUT_PROTOCOL_H + +#include "../llvm-libc-macros/EFIAPI-macros.h" +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_EVENT.h" +#include "EFI_STATUS.h" +#include "char16_t.h" + +#define EFI_SIMPLE_TEXT_INPUT_PROTOCOL_GUID \ + {0x387477c1, 0x69c7, 0x11d2, {0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b}} + +typedef struct { + uint16_t ScanCode; + char16_t UnicodeChar; +} EFI_INPUT_KEY; + +struct _EFI_SIMPLE_TEXT_INPUT_PROTOCOL; + +typedef EFI_STATUS(EFIAPI *EFI_INPUT_RESET)( + struct _EFI_SIMPLE_TEXT_INPUT_PROTOCOL *This, bool ExtendedVerification); +typedef EFI_STATUS(EFIAPI *EFI_INPUT_READ_KEY)( + struct _EFI_SIMPLE_TEXT_INPUT_PROTOCOL *This, EFI_INPUT_KEY *Key); + +typedef struct _EFI_SIMPLE_TEXT_INPUT_PROTOCOL { + EFI_INPUT_RESET Reset; + EFI_INPUT_READ_KEY ReadKeyStroke; + EFI_EVENT WaitForKey; +} EFI_SIMPLE_TEXT_INPUT_PROTOCOL; + +#endif // LLVM_LIBC_TYPES_EFI_SIMPLE_TEXT_INPUT_PROTOCOL_H diff --git a/libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.h b/libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.h new file mode 100644 index 0000000000000..b5014c46a0722 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.h @@ -0,0 +1,64 @@ +//===-- Definition of EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL type ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL_H +#define LLVM_LIBC_TYPES_EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL_H + +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_STATUS.h" +#include "size_t.h" + +#define EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL_GUID \ + {0x387477c2, 0x69c7, 0x11d2, {0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b}} + +struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL; + +typedef EFI_STATUS(EFIAPI *EFI_TEXT_RESET)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, bool ExtendedVerification); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_STRING)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, const char16_t *String); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_TEST_STRING)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, const char16_t *String); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_QUERY_MODE)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, size_t ModeNumber, + size_t *Columns, size_t *Rows); + +typedef EFI_STATUS(EFIAPI *EFI_TEXT_SET_MODE)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, size_t ModeNumber); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_SET_ATTRIBUTE)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, size_t Attribute); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_CLEAR_SCREEN)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_SET_CURSOR_POSITION)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, size_t Column, size_t Row); +typedef EFI_STATUS(EFIAPI *EFI_TEXT_ENABLE_CURSOR)( + struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, bool Visible); + +typedef struct { + int32_t MaxMode; + int32_t Mode; + int32_t Attribute; + int32_t CursorColumn; + int32_t CursorRow; + bool CursorVisible; +} SIMPLE_TEXT_OUTPUT_MODE; + +typedef struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL { + EFI_TEXT_RESET Reset; + EFI_TEXT_STRING OutputString; + EFI_TEXT_TEST_STRING TestString; + EFI_TEXT_QUERY_MODE QueryMode; + EFI_TEXT_SET_MODE SetMode; + EFI_TEXT_SET_ATTRIBUTE SetAttribute; + EFI_TEXT_CLEAR_SCREEN ClearScreen; + EFI_TEXT_SET_CURSOR_POSITION SetCursorPosition; + EFI_TEXT_ENABLE_CURSOR EnableCursor; + SIMPLE_TEXT_OUTPUT_MODE *Mode; +} EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL; + +#endif // LLVM_LIBC_TYPES_EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL_H diff --git a/libc/include/llvm-libc-types/EFI_STATUS.h b/libc/include/llvm-libc-types/EFI_STATUS.h new file mode 100644 index 0000000000000..f7fa6e52381e1 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_STATUS.h @@ -0,0 +1,16 @@ +//===-- Definition of EFI_STATUS type ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_STATUS_H +#define LLVM_LIBC_TYPES_EFI_STATUS_H + +#include "size_t.h" + +typedef size_t EFI_STATUS; + +#endif // LLVM_LIBC_TYPES_EFI_STATUS_H diff --git a/libc/include/llvm-libc-types/EFI_SYSTEM_TABLE.h b/libc/include/llvm-libc-types/EFI_SYSTEM_TABLE.h new file mode 100644 index 0000000000000..290067ad862e1 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_SYSTEM_TABLE.h @@ -0,0 +1,65 @@ +//===-- Definition of EFI_SYSTEM_TABLE type -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_SYSTEM_TABLE_H +#define LLVM_LIBC_TYPES_EFI_SYSTEM_TABLE_H + +#include "../llvm-libc-macros/stdint-macros.h" +#include "EFI_BOOT_SERVICES.h" +#include "EFI_CONFIGURATION_TABLE.h" +#include "EFI_HANDLE.h" +#include "EFI_RUNTIME_SERVICES.h" +#include "EFI_SIMPLE_TEXT_INPUT_PROTOCOL.h" +#include "EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.h" +#include "EFI_STATUS.h" +#include "EFI_TABLE_HEADER.h" + +#include "char16_t.h" +#include "size_t.h" + +#define EFI_SYSTEM_TABLE_SIGNATURE 0x5453595320494249 +#define EFI_2_100_SYSTEM_TABLE_REVISION ((2 << 16) | (100)) +#define EFI_2_90_SYSTEM_TABLE_REVISION ((2 << 16) | (90)) +#define EFI_2_80_SYSTEM_TABLE_REVISION ((2 << 16) | (80)) +#define EFI_2_70_SYSTEM_TABLE_REVISION ((2 << 16) | (70)) +#define EFI_2_60_SYSTEM_TABLE_REVISION ((2 << 16) | (60)) +#define EFI_2_50_SYSTEM_TABLE_REVISION ((2 << 16) | (50)) +#define EFI_2_40_SYSTEM_TABLE_REVISION ((2 << 16) | (40)) +#define EFI_2_31_SYSTEM_TABLE_REVISION ((2 << 16) | (31)) +#define EFI_2_30_SYSTEM_TABLE_REVISION ((2 << 16) | (30)) +#define EFI_2_20_SYSTEM_TABLE_REVISION ((2 << 16) | (20)) +#define EFI_2_10_SYSTEM_TABLE_REVISION ((2 << 16) | (10)) +#define EFI_2_00_SYSTEM_TABLE_REVISION ((2 << 16) | (00)) +#define EFI_1_10_SYSTEM_TABLE_REVISION ((1 << 16) | (10)) +#define EFI_1_02_SYSTEM_TABLE_REVISION ((1 << 16) | (02)) +#define EFI_SPECIFICATION_VERSION EFI_SYSTEM_TABLE_REVISION +#define EFI_SYSTEM_TABLE_REVISION EFI_2_100_SYSTEM_TABLE_REVISION + +typedef struct { + EFI_TABLE_HEADER Hdr; + + char16_t *FirmwareVendor; + uint32_t FirmwareRevision; + + EFI_HANDLE ConsoleInHandle; + EFI_SIMPLE_TEXT_INPUT_PROTOCOL *ConIn; + + EFI_HANDLE ConsoleOutHandle; + EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *ConOut; + + EFI_HANDLE StandardErrorHandle; + EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *StdErr; + + EFI_RUNTIME_SERVICES *RuntimeServices; + EFI_BOOT_SERVICES *BootServices; + + size_t NumberOfTableEntries; + EFI_CONFIGURATION_TABLE *ConfigurationTable; +} EFI_SYSTEM_TABLE; + +#endif // LLVM_LIBC_TYPES_EFI_SYSTEM_TABLE_H diff --git a/libc/include/llvm-libc-types/EFI_TABLE_HEADER.h b/libc/include/llvm-libc-types/EFI_TABLE_HEADER.h new file mode 100644 index 0000000000000..293968ecc4d1b --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_TABLE_HEADER.h @@ -0,0 +1,22 @@ +//===-- Definition of EFI_TABLE_HEADER type -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_TABLE_HEADER_H +#define LLVM_LIBC_TYPES_EFI_TABLE_HEADER_H + +#include "../llvm-libc-macros/stdint-macros.h" + +typedef struct { + uint64_t Signature; + uint32_t Revision; + uint32_t HeaderSize; + uint32_t CRC32; + uint32_t Reserved; +} EFI_TABLE_HEADER; + +#endif // LLVM_LIBC_TYPES_EFI_TABLE_HEADER_H diff --git a/libc/include/llvm-libc-types/EFI_TIME.h b/libc/include/llvm-libc-types/EFI_TIME.h new file mode 100644 index 0000000000000..b0e38b987d44e --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_TIME.h @@ -0,0 +1,37 @@ +//===-- Definition of EFI_TIME type ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_TIME_H +#define LLVM_LIBC_TYPES_EFI_TIME_H + +#include "../llvm-libc-macros/stdint-macros.h" + +typedef struct { + uint16_t Year; // 1900 - 9999 + uint8_t Month; // 1 - 12 + uint8_t Day; // 1 - 31 + uint8_t Hour; // 0 - 23 + uint8_t Minute; // 0 - 59 + uint8_t Second; // 0 - 59 + uint8_t Pad1; + uint32_t Nanosecond; // 0 - 999,999,999 + int16_t TimeZone; // --1440 to 1440 or 2047 +} EFI_TIME; + +#define EFI_TIME_ADJUST_DAYLIGHT 0x01 +#define EFI_TIME_IN_DAYLIGHT 0x02 + +#define EFI_UNSPECIFIED_TIMEZONE 0x07FF + +typedef struct { + uint32_t Resolution; + uint32_t Accuracy; + bool SetsToZero; +} EFI_TIME_CAPABILITIES; + +#endif // LLVM_LIBC_TYPES_EFI_TIME_H diff --git a/libc/include/llvm-libc-types/EFI_TIMER_DELAY.h b/libc/include/llvm-libc-types/EFI_TIMER_DELAY.h new file mode 100644 index 0000000000000..2a6872c69c8b3 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_TIMER_DELAY.h @@ -0,0 +1,18 @@ +//===-- Definition of EFI_TIMER_DELAY type --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_TIMER_DELAY_H +#define LLVM_LIBC_TYPES_EFI_TIMER_DELAY_H + +typedef enum { + TimerCancel, + TimerPeriodic, + TimerRelative, +} EFI_TIMER_DELAY; + +#endif // LLVM_LIBC_TYPES_EFI_TIMER_DELAY_H diff --git a/libc/include/llvm-libc-types/EFI_TPL.h b/libc/include/llvm-libc-types/EFI_TPL.h new file mode 100644 index 0000000000000..8361ccfacd6f5 --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_TPL.h @@ -0,0 +1,21 @@ +//===-- Definition of EFI_TPL type ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_TPL_H +#define LLVM_LIBC_TYPES_EFI_TPL_H + +#include "size_t.h" + +typedef size_t EFI_TPL; + +#define TPL_APPLICATION 4 +#define TPL_CALLBACK 8 +#define TPL_NOTIFY 16 +#define TPL_HIGH_LEVEL 31 + +#endif // LLVM_LIBC_TYPES_EFI_TPL_H diff --git a/libc/include/llvm-libc-types/EFI_VIRTUAL_ADDRESS.h b/libc/include/llvm-libc-types/EFI_VIRTUAL_ADDRESS.h new file mode 100644 index 0000000000000..46cbec734dadc --- /dev/null +++ b/libc/include/llvm-libc-types/EFI_VIRTUAL_ADDRESS.h @@ -0,0 +1,16 @@ +//===-- Definition of EFI_VIRTUAL_ADDRESS type ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_EFI_VIRTUAL_ADDRESS_H +#define LLVM_LIBC_TYPES_EFI_VIRTUAL_ADDRESS_H + +#include "../llvm-libc-macros/stdint-macros.h" + +typedef uint64_t EFI_VIRTUAL_ADDRESS; + +#endif // LLVM_LIBC_TYPES_EFI_VIRTUAL_ADDRESS_H From 511e7b73fdb906759c208f9752d4bc84871bbe8a Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 28 Feb 2025 16:55:36 +0000 Subject: [PATCH 111/123] [clang][HIP] Make some math not not work with AMDGCN SPIR-V (#128360) Do not hardcode `address_space(5)` (`private`) in the ROCDL interface, as that breaks SPIRV generation (the latter uses 0). Add test. In the long run we should stop using ROCDL inline. --- .../Headers/__clang_hip_libdevice_declares.h | 32 +- clang/lib/Headers/__clang_hip_math.h | 28 +- clang/test/Headers/__clang_hip_math.hip | 1655 +++++++++++++++++ 3 files changed, 1679 insertions(+), 36 deletions(-) diff --git a/clang/lib/Headers/__clang_hip_libdevice_declares.h b/clang/lib/Headers/__clang_hip_libdevice_declares.h index f15198b3d9f93..fa8d918248dd0 100644 --- a/clang/lib/Headers/__clang_hip_libdevice_declares.h +++ b/clang/lib/Headers/__clang_hip_libdevice_declares.h @@ -14,6 +14,8 @@ #include "hip/hip_version.h" #endif // __has_include("hip/hip_version.h") +#define __PRIVATE_AS __attribute__((opencl_private)) + #ifdef __cplusplus extern "C" { #endif @@ -55,8 +57,7 @@ __device__ __attribute__((const)) float __ocml_fmax_f32(float, float); __device__ __attribute__((const)) float __ocml_fmin_f32(float, float); __device__ __attribute__((const)) __device__ float __ocml_fmod_f32(float, float); -__device__ float __ocml_frexp_f32(float, - __attribute__((address_space(5))) int *); +__device__ float __ocml_frexp_f32(float, __PRIVATE_AS int *); __device__ __attribute__((const)) float __ocml_hypot_f32(float, float); __device__ __attribute__((const)) int __ocml_ilogb_f32(float); __device__ __attribute__((const)) int __ocml_isfinite_f32(float); @@ -74,8 +75,7 @@ __device__ __attribute__((pure)) float __ocml_native_log2_f32(float); __device__ __attribute__((const)) float __ocml_logb_f32(float); __device__ __attribute__((pure)) float __ocml_log_f32(float); __device__ __attribute__((pure)) float __ocml_native_log_f32(float); -__device__ float __ocml_modf_f32(float, - __attribute__((address_space(5))) float *); +__device__ float __ocml_modf_f32(float, __PRIVATE_AS float *); __device__ __attribute__((const)) float __ocml_nearbyint_f32(float); __device__ __attribute__((const)) float __ocml_nextafter_f32(float, float); __device__ __attribute__((const)) float __ocml_len3_f32(float, float, float); @@ -87,8 +87,7 @@ __device__ __attribute__((pure)) float __ocml_pow_f32(float, float); __device__ __attribute__((pure)) float __ocml_pown_f32(float, int); __device__ __attribute__((pure)) float __ocml_rcbrt_f32(float); __device__ __attribute__((const)) float __ocml_remainder_f32(float, float); -__device__ float __ocml_remquo_f32(float, float, - __attribute__((address_space(5))) int *); +__device__ float __ocml_remquo_f32(float, float, __PRIVATE_AS int *); __device__ __attribute__((const)) float __ocml_rhypot_f32(float, float); __device__ __attribute__((const)) float __ocml_rint_f32(float); __device__ __attribute__((const)) float __ocml_rlen3_f32(float, float, float); @@ -99,10 +98,8 @@ __device__ __attribute__((pure)) float __ocml_rsqrt_f32(float); __device__ __attribute__((const)) float __ocml_scalb_f32(float, float); __device__ __attribute__((const)) float __ocml_scalbn_f32(float, int); __device__ __attribute__((const)) int __ocml_signbit_f32(float); -__device__ float __ocml_sincos_f32(float, - __attribute__((address_space(5))) float *); -__device__ float __ocml_sincospi_f32(float, - __attribute__((address_space(5))) float *); +__device__ float __ocml_sincos_f32(float, __PRIVATE_AS float *); +__device__ float __ocml_sincospi_f32(float, __PRIVATE_AS float *); __device__ float __ocml_sin_f32(float); __device__ float __ocml_native_sin_f32(float); __device__ __attribute__((pure)) float __ocml_sinh_f32(float); @@ -176,8 +173,7 @@ __device__ __attribute__((const)) double __ocml_fma_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fmax_f64(double, double); __device__ __attribute__((const)) double __ocml_fmin_f64(double, double); __device__ __attribute__((const)) double __ocml_fmod_f64(double, double); -__device__ double __ocml_frexp_f64(double, - __attribute__((address_space(5))) int *); +__device__ double __ocml_frexp_f64(double, __PRIVATE_AS int *); __device__ __attribute__((const)) double __ocml_hypot_f64(double, double); __device__ __attribute__((const)) int __ocml_ilogb_f64(double); __device__ __attribute__((const)) int __ocml_isfinite_f64(double); @@ -192,8 +188,7 @@ __device__ __attribute__((pure)) double __ocml_log1p_f64(double); __device__ __attribute__((pure)) double __ocml_log2_f64(double); __device__ __attribute__((const)) double __ocml_logb_f64(double); __device__ __attribute__((pure)) double __ocml_log_f64(double); -__device__ double __ocml_modf_f64(double, - __attribute__((address_space(5))) double *); +__device__ double __ocml_modf_f64(double, __PRIVATE_AS double *); __device__ __attribute__((const)) double __ocml_nearbyint_f64(double); __device__ __attribute__((const)) double __ocml_nextafter_f64(double, double); __device__ __attribute__((const)) double __ocml_len3_f64(double, double, @@ -206,8 +201,7 @@ __device__ __attribute__((pure)) double __ocml_pow_f64(double, double); __device__ __attribute__((pure)) double __ocml_pown_f64(double, int); __device__ __attribute__((pure)) double __ocml_rcbrt_f64(double); __device__ __attribute__((const)) double __ocml_remainder_f64(double, double); -__device__ double __ocml_remquo_f64(double, double, - __attribute__((address_space(5))) int *); +__device__ double __ocml_remquo_f64(double, double, __PRIVATE_AS int *); __device__ __attribute__((const)) double __ocml_rhypot_f64(double, double); __device__ __attribute__((const)) double __ocml_rint_f64(double); __device__ __attribute__((const)) double __ocml_rlen3_f64(double, double, @@ -219,10 +213,8 @@ __device__ __attribute__((pure)) double __ocml_rsqrt_f64(double); __device__ __attribute__((const)) double __ocml_scalb_f64(double, double); __device__ __attribute__((const)) double __ocml_scalbn_f64(double, int); __device__ __attribute__((const)) int __ocml_signbit_f64(double); -__device__ double __ocml_sincos_f64(double, - __attribute__((address_space(5))) double *); -__device__ double -__ocml_sincospi_f64(double, __attribute__((address_space(5))) double *); +__device__ double __ocml_sincos_f64(double, __PRIVATE_AS double *); +__device__ double __ocml_sincospi_f64(double, __PRIVATE_AS double *); __device__ double __ocml_sin_f64(double); __device__ __attribute__((pure)) double __ocml_sinh_f64(double); __device__ double __ocml_sinpi_f64(double); diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h index 8468751d9de26..bf8517bc3a507 100644 --- a/clang/lib/Headers/__clang_hip_math.h +++ b/clang/lib/Headers/__clang_hip_math.h @@ -33,6 +33,9 @@ #define __DEVICE__ static __device__ inline __attribute__((always_inline)) #endif +#pragma push_macro("__PRIVATE_AS") + +#define __PRIVATE_AS __attribute__((opencl_private)) // Device library provides fast low precision and slow full-recision // implementations for some functions. Which one gets selected depends on // __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if @@ -512,8 +515,7 @@ float modff(float __x, float *__iptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - float __r = - __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); + float __r = __ocml_modf_f32(__x, (__PRIVATE_AS float *)&__tmp); *__iptr = __tmp; return __r; } @@ -595,8 +597,7 @@ float remquof(float __x, float __y, int *__quo) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - float __r = __ocml_remquo_f32( - __x, __y, (__attribute__((address_space(5))) int *)&__tmp); + float __r = __ocml_remquo_f32(__x, __y, (__PRIVATE_AS int *)&__tmp); *__quo = __tmp; return __r; @@ -657,8 +658,7 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) { #ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__ __sincosf(__x, __sinptr, __cosptr); #else - *__sinptr = - __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); + *__sinptr = __ocml_sincos_f32(__x, (__PRIVATE_AS float *)&__tmp); *__cosptr = __tmp; #endif } @@ -669,8 +669,7 @@ void sincospif(float __x, float *__sinptr, float *__cosptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - *__sinptr = __ocml_sincospi_f32( - __x, (__attribute__((address_space(5))) float *)&__tmp); + *__sinptr = __ocml_sincospi_f32(__x, (__PRIVATE_AS float *)&__tmp); *__cosptr = __tmp; } @@ -913,8 +912,7 @@ double modf(double __x, double *__iptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - double __r = - __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); + double __r = __ocml_modf_f64(__x, (__PRIVATE_AS double *)&__tmp); *__iptr = __tmp; return __r; @@ -1004,8 +1002,7 @@ double remquo(double __x, double __y, int *__quo) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - double __r = __ocml_remquo_f64( - __x, __y, (__attribute__((address_space(5))) int *)&__tmp); + double __r = __ocml_remquo_f64(__x, __y, (__PRIVATE_AS int *)&__tmp); *__quo = __tmp; return __r; @@ -1065,8 +1062,7 @@ void sincos(double __x, double *__sinptr, double *__cosptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - *__sinptr = __ocml_sincos_f64( - __x, (__attribute__((address_space(5))) double *)&__tmp); + *__sinptr = __ocml_sincos_f64(__x, (__PRIVATE_AS double *)&__tmp); *__cosptr = __tmp; } @@ -1076,8 +1072,7 @@ void sincospi(double __x, double *__sinptr, double *__cosptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - *__sinptr = __ocml_sincospi_f64( - __x, (__attribute__((address_space(5))) double *)&__tmp); + *__sinptr = __ocml_sincospi_f64(__x, (__PRIVATE_AS double *)&__tmp); *__cosptr = __tmp; } @@ -1322,6 +1317,7 @@ __host__ inline static int max(int __arg1, int __arg2) { #endif #pragma pop_macro("__DEVICE__") +#pragma pop_macro("__PRIVATE_AS") #pragma pop_macro("__RETURN_TYPE") #pragma pop_macro("__FAST_OR_SLOW") diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index e4254d1e64bec..a375ea47b530d 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -26,6 +26,14 @@ // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -O1 -fgpu-approx-transcendentals -o - \ // RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,APPROX %s +// Check that we use the AMDGCNSPIRV address space map +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -triple spirv64-amd-amdhsa -aux-triple x86_64-unknown-unknown \ +// RUN: -emit-llvm %s -fcuda-is-device -O1 -o - \ +// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=AMDGCNSPIRV %s + #define BOOL_TYPE int typedef unsigned long long uint64_t; @@ -57,6 +65,30 @@ typedef unsigned long long uint64_t; // CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_2_I]] // +// AMDGCNSPIRV-LABEL: @test___make_mantissa_base8( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I:%.*]] +// AMDGCNSPIRV: while.cond.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[WHILE_BODY_I:%.*]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[WHILE_BODY_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5:![0-9]+]] +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:%.*]], label [[WHILE_BODY_I]] +// AMDGCNSPIRV: while.body.i: +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = and i8 [[TMP0]], -8 +// AMDGCNSPIRV-NEXT: [[OR_COND_I:%.*]] = icmp eq i8 [[TMP1]], 48 +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl i64 [[__R_0_I]], 3 +// AMDGCNSPIRV-NEXT: [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], -48 +// AMDGCNSPIRV-NEXT: [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]] +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_IDX:%.*]] = zext i1 [[OR_COND_I]] to i64 +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 [[__TAGP_ADDR_1_I_IDX]] +// AMDGCNSPIRV-NEXT: [[__R_1_I]] = select i1 [[OR_COND_I]], i64 [[SUB_I]], i64 [[__R_0_I]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], !llvm.loop [[LOOP8:![0-9]+]] +// AMDGCNSPIRV: _ZL21__make_mantissa_base8PKc.exit: +// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] +// AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]] +// extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) { return __make_mantissa_base8(p); } @@ -89,6 +121,30 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) { // CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_2_I]] // +// AMDGCNSPIRV-LABEL: @test___make_mantissa_base10( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I:%.*]] +// AMDGCNSPIRV: while.cond.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[WHILE_BODY_I:%.*]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[WHILE_BODY_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:%.*]], label [[WHILE_BODY_I]] +// AMDGCNSPIRV: while.body.i: +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = add i8 [[TMP0]], -48 +// AMDGCNSPIRV-NEXT: [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10 +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = mul i64 [[__R_0_I]], 10 +// AMDGCNSPIRV-NEXT: [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], -48 +// AMDGCNSPIRV-NEXT: [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]] +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_IDX:%.*]] = zext i1 [[OR_COND_I]] to i64 +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 [[__TAGP_ADDR_1_I_IDX]] +// AMDGCNSPIRV-NEXT: [[__R_1_I]] = select i1 [[OR_COND_I]], i64 [[SUB_I]], i64 [[__R_0_I]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], !llvm.loop [[LOOP11:![0-9]+]] +// AMDGCNSPIRV: _ZL22__make_mantissa_base10PKc.exit: +// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] +// AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]] +// extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) { return __make_mantissa_base10(p); } @@ -131,6 +187,44 @@ extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) { // CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_2_I]] // +// AMDGCNSPIRV-LABEL: @test___make_mantissa_base16( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I:%.*]] +// AMDGCNSPIRV: while.cond.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[CLEANUP_I:%.*]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_2_I:%.*]], [[CLEANUP_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] +// AMDGCNSPIRV: while.body.i: +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = add i8 [[TMP0]], -48 +// AMDGCNSPIRV-NEXT: [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10 +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label [[IF_END31_I:%.*]], label [[IF_ELSE_I:%.*]] +// AMDGCNSPIRV: if.else.i: +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = add i8 [[TMP0]], -97 +// AMDGCNSPIRV-NEXT: [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP2]], 6 +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I]], label [[IF_END31_I]], label [[IF_ELSE17_I:%.*]] +// AMDGCNSPIRV: if.else17.i: +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = add i8 [[TMP0]], -65 +// AMDGCNSPIRV-NEXT: [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP3]], 6 +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[CLEANUP_I]] +// AMDGCNSPIRV: if.end31.i: +// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I]] ], [ -87, [[IF_ELSE_I]] ], [ -55, [[IF_ELSE17_I]] ] +// AMDGCNSPIRV-NEXT: [[MUL24_I:%.*]] = shl i64 [[__R_0_I]], 4 +// AMDGCNSPIRV-NEXT: [[CONV25_I:%.*]] = zext nneg i8 [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]] +// AMDGCNSPIRV-NEXT: [[ADD28_I:%.*]] = add i64 [[ADD26_I]], [[CONV25_I]] +// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 1 +// AMDGCNSPIRV-NEXT: br label [[CLEANUP_I]] +// AMDGCNSPIRV: cleanup.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I]], [[IF_END31_I]] ], [ [[__TAGP_ADDR_0_I]], [[IF_ELSE17_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_2_I]] = phi i64 [ [[ADD28_I]], [[IF_END31_I]] ], [ [[__R_0_I]], [[IF_ELSE17_I]] ] +// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = phi i1 [ true, [[IF_END31_I]] ], [ false, [[IF_ELSE17_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], !llvm.loop [[LOOP12:![0-9]+]] +// AMDGCNSPIRV: _ZL22__make_mantissa_base16PKc.exit: +// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] +// AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]] +// extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { return __make_mantissa_base16(p); } @@ -226,6 +320,89 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // CHECK-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ 0, [[CLEANUP_I36_I]] ], [ [[__R_0_I32_I]], [[WHILE_COND_I30_I]] ], [ 0, [[CLEANUP_I20_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_0_I]] // +// AMDGCNSPIRV-LABEL: @test___make_mantissa( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P:%.*]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I:%.*]] +// AMDGCNSPIRV: if.then.i: +// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[P]], i64 1 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I:%.*]] [ +// AMDGCNSPIRV-NEXT: i8 120, label [[WHILE_COND_I28_I_PREHEADER:%.*]] +// AMDGCNSPIRV-NEXT: i8 88, label [[WHILE_COND_I28_I_PREHEADER]] +// AMDGCNSPIRV-NEXT: ] +// AMDGCNSPIRV: while.cond.i28.i.preheader: +// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I28_I:%.*]] +// AMDGCNSPIRV: while.cond.i28.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I34_I:%.*]], [[CLEANUP_I_I:%.*]] ], [ [[INCDEC_PTR_I]], [[WHILE_COND_I28_I_PREHEADER]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I30_I:%.*]] = phi i64 [ [[__R_2_I_I:%.*]], [[CLEANUP_I_I]] ], [ 0, [[WHILE_COND_I28_I_PREHEADER]] ] +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I:%.*]] = icmp eq i8 [[TMP2]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I:%.*]] +// AMDGCNSPIRV: while.body.i32.i: +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], -48 +// AMDGCNSPIRV-NEXT: [[OR_COND_I33_I:%.*]] = icmp ult i8 [[TMP3]], 10 +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I33_I]], label [[IF_END31_I_I:%.*]], label [[IF_ELSE_I_I:%.*]] +// AMDGCNSPIRV: if.else.i.i: +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = add i8 [[TMP2]], -97 +// AMDGCNSPIRV-NEXT: [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP4]], 6 +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I_I]], label [[IF_END31_I_I]], label [[IF_ELSE17_I_I:%.*]] +// AMDGCNSPIRV: if.else17.i.i: +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = add i8 [[TMP2]], -65 +// AMDGCNSPIRV-NEXT: [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[CLEANUP_I_I]] +// AMDGCNSPIRV: if.end31.i.i: +// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I]] ], [ -87, [[IF_ELSE_I_I]] ], [ -55, [[IF_ELSE17_I_I]] ] +// AMDGCNSPIRV-NEXT: [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I30_I]], 4 +// AMDGCNSPIRV-NEXT: [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 +// AMDGCNSPIRV-NEXT: [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]] +// AMDGCNSPIRV-NEXT: [[ADD28_I_I:%.*]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]] +// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I37_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I]], i64 1 +// AMDGCNSPIRV-NEXT: br label [[CLEANUP_I_I]] +// AMDGCNSPIRV: cleanup.i.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I34_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I37_I]], [[IF_END31_I_I]] ], [ [[__TAGP_ADDR_0_I29_I]], [[IF_ELSE17_I_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_2_I_I]] = phi i64 [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ [[__R_0_I30_I]], [[IF_ELSE17_I_I]] ] +// AMDGCNSPIRV-NEXT: [[COND_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I]] ], [ false, [[IF_ELSE17_I_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[COND_I_I]], label [[WHILE_COND_I28_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP12]] +// AMDGCNSPIRV: while.cond.i.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I:%.*]], [[WHILE_BODY_I_I:%.*]] ], [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I_I:%.*]] = phi i64 [ [[__R_1_I_I:%.*]], [[WHILE_BODY_I_I]] ], [ 0, [[IF_THEN_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP6]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I]] +// AMDGCNSPIRV: while.body.i.i: +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -8 +// AMDGCNSPIRV-NEXT: [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP7]], 48 +// AMDGCNSPIRV-NEXT: [[MUL_I_I:%.*]] = shl i64 [[__R_0_I_I]], 3 +// AMDGCNSPIRV-NEXT: [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 +// AMDGCNSPIRV-NEXT: [[ADD_I_I:%.*]] = add i64 [[MUL_I_I]], -48 +// AMDGCNSPIRV-NEXT: [[SUB_I_I:%.*]] = add i64 [[ADD_I_I]], [[CONV5_I_I]] +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I]] to i64 +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], i64 [[__TAGP_ADDR_1_I_I_IDX]] +// AMDGCNSPIRV-NEXT: [[__R_1_I_I]] = select i1 [[OR_COND_I_I]], i64 [[SUB_I_I]], i64 [[__R_0_I_I]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I]], label [[WHILE_COND_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP8]] +// AMDGCNSPIRV: while.cond.i14.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I:%.*]], [[WHILE_BODY_I18_I:%.*]] ], [ [[P]], [[ENTRY:%.*]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I16_I:%.*]] = phi i64 [ [[__R_1_I26_I:%.*]], [[WHILE_BODY_I18_I]] ], [ 0, [[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP8]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I18_I]] +// AMDGCNSPIRV: while.body.i18.i: +// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], -48 +// AMDGCNSPIRV-NEXT: [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP9]], 10 +// AMDGCNSPIRV-NEXT: [[MUL_I20_I:%.*]] = mul i64 [[__R_0_I16_I]], 10 +// AMDGCNSPIRV-NEXT: [[CONV5_I21_I:%.*]] = zext nneg i8 [[TMP8]] to i64 +// AMDGCNSPIRV-NEXT: [[ADD_I22_I:%.*]] = add i64 [[MUL_I20_I]], -48 +// AMDGCNSPIRV-NEXT: [[SUB_I23_I:%.*]] = add i64 [[ADD_I22_I]], [[CONV5_I21_I]] +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I]] to i64 +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], i64 [[__TAGP_ADDR_1_I25_I_IDX]] +// AMDGCNSPIRV-NEXT: [[__R_1_I26_I]] = select i1 [[OR_COND_I19_I]], i64 [[SUB_I23_I]], i64 [[__R_0_I16_I]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I]], label [[WHILE_COND_I14_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP11]] +// AMDGCNSPIRV: _ZL15__make_mantissaPKc.exit: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ 0, [[CLEANUP_I_I]] ], [ [[__R_0_I30_I]], [[WHILE_COND_I28_I]] ], [ 0, [[WHILE_BODY_I18_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ] +// AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_0_I]] +// extern "C" __device__ uint64_t test___make_mantissa(const char *p) { return __make_mantissa(p); } @@ -235,6 +412,11 @@ extern "C" __device__ uint64_t test___make_mantissa(const char *p) { // CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) // CHECK-NEXT: ret i32 [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_abs( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) addrspace(4) i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) +// AMDGCNSPIRV-NEXT: ret i32 [[TMP0]] +// extern "C" __device__ int test_abs(int x) { return abs(x); } @@ -244,6 +426,11 @@ extern "C" __device__ int test_abs(int x) { // CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) // CHECK-NEXT: ret i64 [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_labs( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) addrspace(4) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) +// AMDGCNSPIRV-NEXT: ret i64 [[TMP0]] +// extern "C" __device__ long test_labs(long x) { return labs(x); } @@ -253,6 +440,11 @@ extern "C" __device__ long test_labs(long x) { // CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) // CHECK-NEXT: ret i64 [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_llabs( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) addrspace(4) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) +// AMDGCNSPIRV-NEXT: ret i64 [[TMP0]] +// extern "C" __device__ long long test_llabs(long x) { return llabs(x); } @@ -272,6 +464,11 @@ extern "C" __device__ long long test_llabs(long x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR12:[0-9]+]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_acosf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR12:[0-9]+]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_acosf(float x) { return acosf(x); } @@ -291,6 +488,11 @@ extern "C" __device__ float test_acosf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_acos( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_acos(double x) { return acos(x); } @@ -310,6 +512,11 @@ extern "C" __device__ double test_acos(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR13:[0-9]+]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_acoshf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR13:[0-9]+]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_acoshf(float x) { return acoshf(x); } @@ -329,6 +536,11 @@ extern "C" __device__ float test_acoshf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_acosh( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_acosh(double x) { return acosh(x); } @@ -348,6 +560,11 @@ extern "C" __device__ double test_acosh(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_asinf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_asinf(float x) { return asinf(x); } @@ -367,6 +584,11 @@ extern "C" __device__ float test_asinf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_asin( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_asin(double x) { return asin(x); @@ -387,6 +609,11 @@ extern "C" __device__ double test_asin(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_asinhf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_asinhf(float x) { return asinhf(x); } @@ -406,6 +633,11 @@ extern "C" __device__ float test_asinhf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_asinh( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_asinh(double x) { return asinh(x); } @@ -425,6 +657,11 @@ extern "C" __device__ double test_asinh(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_atan2f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_atan2f(float x, float y) { return atan2f(x, y); } @@ -444,6 +681,11 @@ extern "C" __device__ float test_atan2f(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_atan2( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_atan2(double x, double y) { return atan2(x, y); } @@ -463,6 +705,11 @@ extern "C" __device__ double test_atan2(double x, double y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_atanf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_atanf(float x) { return atanf(x); } @@ -482,6 +729,11 @@ extern "C" __device__ float test_atanf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_atan( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_atan(double x) { return atan(x); } @@ -501,6 +753,11 @@ extern "C" __device__ double test_atan(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_atanhf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_atanhf(float x) { return atanhf(x); } @@ -520,6 +777,11 @@ extern "C" __device__ float test_atanhf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_atanh( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_atanh(double x) { return atanh(x); } @@ -539,6 +801,11 @@ extern "C" __device__ double test_atanh(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_cbrtf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_cbrtf(float x) { return cbrtf(x); } @@ -558,6 +825,11 @@ extern "C" __device__ float test_cbrtf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_cbrt( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_cbrt(double x) { return cbrt(x); } @@ -577,6 +849,11 @@ extern "C" __device__ double test_cbrt(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_ceilf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ceil.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_ceilf(float x) { return ceilf(x); } @@ -596,6 +873,11 @@ extern "C" __device__ float test_ceilf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_ceil( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ceil.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_ceil(double x) { return ceil(x); } @@ -615,6 +897,11 @@ extern "C" __device__ double test_ceil(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_copysignf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_copysignf(float x, float y) { return copysignf(x, y); } @@ -634,6 +921,11 @@ extern "C" __device__ float test_copysignf(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X:%.*]], double [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_copysign( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.copysign.f64(double [[X:%.*]], double [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_copysign(double x, double y) { return copysign(x, y); } @@ -653,6 +945,11 @@ extern "C" __device__ double test_copysign(double x, double y) { // APPROX-NEXT: [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]] // APPROX-NEXT: ret float [[CALL_I1]] // +// AMDGCNSPIRV-LABEL: @test_cosf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_cosf(float x) { return cosf(x); } @@ -672,6 +969,11 @@ extern "C" __device__ float test_cosf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_cos( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_cos(double x) { return cos(x); } @@ -691,6 +993,11 @@ extern "C" __device__ double test_cos(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_coshf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_coshf(float x) { return coshf(x); } @@ -710,6 +1017,11 @@ extern "C" __device__ float test_coshf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_cosh( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_cosh(double x) { return cosh(x); } @@ -729,6 +1041,11 @@ extern "C" __device__ double test_cosh(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_cospif( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_cospif(float x) { return cospif(x); } @@ -748,10 +1065,16 @@ extern "C" __device__ float test_cospif(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_cospi( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_cospi(double x) { return cospi(x); } +// // DEFAULT-LABEL: @test_cyl_bessel_i0f( // DEFAULT-NEXT: entry: // DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]] @@ -767,6 +1090,11 @@ extern "C" __device__ double test_cospi(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i0f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_cyl_bessel_i0f(float x) { return cyl_bessel_i0f(x); } @@ -786,6 +1114,11 @@ extern "C" __device__ float test_cyl_bessel_i0f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i0( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_cyl_bessel_i0(double x) { return cyl_bessel_i0(x); } @@ -805,6 +1138,11 @@ extern "C" __device__ double test_cyl_bessel_i0(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i1f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_cyl_bessel_i1f(float x) { return cyl_bessel_i1f(x); } @@ -824,6 +1162,11 @@ extern "C" __device__ float test_cyl_bessel_i1f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i1( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_cyl_bessel_i1(double x) { return cyl_bessel_i1(x); } @@ -843,6 +1186,11 @@ extern "C" __device__ double test_cyl_bessel_i1(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_erfcf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_erfcf(float x) { return erfcf(x); } @@ -862,6 +1210,11 @@ extern "C" __device__ float test_erfcf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_erfc( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_erfc(double x) { return erfc(x); } @@ -881,6 +1234,11 @@ extern "C" __device__ double test_erfc(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_erfinvf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_erfinvf(float x) { return erfinvf(x); } @@ -900,6 +1258,11 @@ extern "C" __device__ float test_erfinvf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_erfinv( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_erfinv(double x) { return erfinv(x); } @@ -919,6 +1282,11 @@ extern "C" __device__ double test_erfinv(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_exp10_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_exp10f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_exp10_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_exp10f(float x) { return exp10f(x); } @@ -938,6 +1306,11 @@ extern "C" __device__ float test_exp10f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_exp10( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_exp10(double x) { return exp10(x); } @@ -957,6 +1330,11 @@ extern "C" __device__ double test_exp10(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_exp2f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp2.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_exp2f(float x) { return exp2f(x); } @@ -976,6 +1354,11 @@ extern "C" __device__ float test_exp2f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_exp2( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_exp2(double x) { return exp2(x); } @@ -995,6 +1378,11 @@ extern "C" __device__ double test_exp2(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_expf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_expf(float x) { return expf(x); } @@ -1014,6 +1402,11 @@ extern "C" __device__ float test_expf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_exp( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_exp(double x) { return exp(x); } @@ -1033,6 +1426,11 @@ extern "C" __device__ double test_exp(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_expm1f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_expm1f(float x) { return expm1f(x); } @@ -1052,6 +1450,11 @@ extern "C" __device__ float test_expm1f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_expm1( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_expm1(double x) { return expm1(x); } @@ -1071,6 +1474,11 @@ extern "C" __device__ double test_expm1(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_fabsf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fabs.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_fabsf(float x) { return fabsf(x); } @@ -1090,6 +1498,11 @@ extern "C" __device__ float test_fabsf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_fabs( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fabs.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_fabs(double x) { return fabs(x); } @@ -1109,6 +1522,11 @@ extern "C" __device__ double test_fabs(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_fdimf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_fdimf(float x, float y) { return fdimf(x, y); } @@ -1128,6 +1546,11 @@ extern "C" __device__ float test_fdimf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_fdim( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_fdim(double x, double y) { return fdim(x, y); } @@ -1147,6 +1570,11 @@ extern "C" __device__ double test_fdim(double x, double y) { // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret float [[DIV_I]] // +// AMDGCNSPIRV-LABEL: @test_fdividef( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-NEXT: ret float [[DIV_I]] +// extern "C" __device__ float test_fdividef(float x, float y) { return fdividef(x, y); } @@ -1166,6 +1594,11 @@ extern "C" __device__ float test_fdividef(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_floorf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.floor.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_floorf(float x) { return floorf(x); } @@ -1185,6 +1618,11 @@ extern "C" __device__ float test_floorf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_floor( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.floor.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_floor(double x) { return floor(x); } @@ -1204,6 +1642,11 @@ extern "C" __device__ double test_floor(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_fmaf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_fmaf(float x, float y, float z) { return fmaf(x, y, z); } @@ -1223,6 +1666,11 @@ extern "C" __device__ float test_fmaf(float x, float y, float z) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_fma( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_fma(double x, double y, double z) { return fma(x, y, z); } @@ -1242,6 +1690,11 @@ extern "C" __device__ double test_fma(double x, double y, double z) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_fma_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_fma_rn(double x, double y, double z) { return __fma_rn(x, y, z); } @@ -1261,6 +1714,11 @@ extern "C" __device__ double test_fma_rn(double x, double y, double z) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_fmaxf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_fmaxf(float x, float y) { return fmaxf(x, y); } @@ -1280,10 +1738,16 @@ extern "C" __device__ float test_fmaxf(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_fmax( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_fmax(double x, double y) { return fmax(x, y); } +// // DEFAULT-LABEL: @test_fminf( // DEFAULT-NEXT: entry: // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) @@ -1299,6 +1763,11 @@ extern "C" __device__ double test_fmax(double x, double y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_fminf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_fminf(float x, float y) { return fminf(x, y); } @@ -1318,6 +1787,11 @@ extern "C" __device__ float test_fminf(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_fmin( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_fmin(double x, double y) { return fmin(x, y); } @@ -1337,6 +1811,11 @@ extern "C" __device__ double test_fmin(double x, double y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_fmodf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_fmodf(float x, float y) { return fmodf(x, y); } @@ -1356,6 +1835,11 @@ extern "C" __device__ float test_fmodf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_fmod( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_fmod(double x, double y) { return fmod(x, y); } @@ -1368,6 +1852,14 @@ extern "C" __device__ double test_fmod(double x, double y) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 // CHECK-NEXT: ret float [[TMP2]] // +// AMDGCNSPIRV-LABEL: @test_frexpf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA13:![0-9]+]] +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 +// AMDGCNSPIRV-NEXT: ret float [[TMP2]] +// extern "C" __device__ float test_frexpf(float x, int* y) { return frexpf(x, y); } @@ -1380,6 +1872,14 @@ extern "C" __device__ float test_frexpf(float x, int* y) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 // CHECK-NEXT: ret double [[TMP2]] // +// AMDGCNSPIRV-LABEL: @test_frexp( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA13]] +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 +// AMDGCNSPIRV-NEXT: ret double [[TMP2]] +// extern "C" __device__ double test_frexp(double x, int* y) { return frexp(x, y); } @@ -1399,6 +1899,11 @@ extern "C" __device__ double test_frexp(double x, int* y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_hypotf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_hypotf(float x, float y) { return hypotf(x, y); } @@ -1418,6 +1923,11 @@ extern "C" __device__ float test_hypotf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_hypot( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_hypot(double x, double y) { return hypot(x, y); } @@ -1437,6 +1947,11 @@ extern "C" __device__ double test_hypot(double x, double y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret i32 [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_ilogbf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call spir_func noundef addrspace(4) i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret i32 [[CALL_I]] +// extern "C" __device__ int test_ilogbf(float x) { return ilogbf(x); } @@ -1456,6 +1971,11 @@ extern "C" __device__ int test_ilogbf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret i32 [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_ilogb( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call spir_func noundef addrspace(4) i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret i32 [[CALL_I]] +// extern "C" __device__ int test_ilogb(double x) { return ilogb(x); } @@ -1478,6 +1998,13 @@ extern "C" __device__ int test_ilogb(double x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // +// AMDGCNSPIRV-LABEL: @test___finitef( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = fcmp one float [[TMP0]], 0x7FF0000000000000 +// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] +// extern "C" __device__ BOOL_TYPE test___finitef(float x) { return __finitef(x); } @@ -1500,6 +2027,13 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // +// AMDGCNSPIRV-LABEL: @test___finite( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = fcmp one double [[TMP0]], 0x7FF0000000000000 +// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] +// extern "C" __device__ BOOL_TYPE test___finite(double x) { return __finite(x); } @@ -1522,6 +2056,13 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // +// AMDGCNSPIRV-LABEL: @test___isinff( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = fcmp oeq float [[TMP0]], 0x7FF0000000000000 +// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] +// extern "C" __device__ BOOL_TYPE test___isinff(float x) { return __isinff(x); } @@ -1544,6 +2085,13 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // +// AMDGCNSPIRV-LABEL: @test___isinf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = fcmp oeq double [[TMP0]], 0x7FF0000000000000 +// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] +// extern "C" __device__ BOOL_TYPE test___isinf(double x) { return __isinf(x); } @@ -1564,6 +2112,12 @@ extern "C" __device__ BOOL_TYPE test___isinf(double x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // +// AMDGCNSPIRV-LABEL: @test___isnanf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00 +// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 +// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] +// extern "C" __device__ BOOL_TYPE test___isnanf(float x) { return __isnanf(x); } @@ -1584,6 +2138,12 @@ extern "C" __device__ BOOL_TYPE test___isnanf(float x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // +// AMDGCNSPIRV-LABEL: @test___isnan( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00 +// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 +// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] +// extern "C" __device__ BOOL_TYPE test___isnan(double x) { return __isnan(x); } @@ -1603,6 +2163,11 @@ extern "C" __device__ BOOL_TYPE test___isnan(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_j0f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_j0f(float x) { return j0f(x); } @@ -1622,6 +2187,11 @@ extern "C" __device__ float test_j0f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_j0( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_j0(double x) { return j0(x); } @@ -1641,6 +2211,11 @@ extern "C" __device__ double test_j0(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_j1f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_j1f(float x) { return j1f(x); } @@ -1660,6 +2235,11 @@ extern "C" __device__ float test_j1f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_j1( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_j1(double x) { return j1(x); } @@ -1763,6 +2343,39 @@ extern "C" __device__ double test_j1(double x) { // APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] // APPROX-NEXT: ret float [[RETVAL_0_I]] // +// AMDGCNSPIRV-LABEL: @test_jnf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ +// AMDGCNSPIRV-NEXT: i32 0, label [[IF_THEN_I:%.*]] +// AMDGCNSPIRV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// AMDGCNSPIRV-NEXT: ] +// AMDGCNSPIRV: if.then.i: +// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: br label [[_ZL3JNFIF_EXIT:%.*]] +// AMDGCNSPIRV: if.then2.i: +// AMDGCNSPIRV-NEXT: [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: br label [[_ZL3JNFIF_EXIT]] +// AMDGCNSPIRV: if.end4.i: +// AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 +// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]] +// AMDGCNSPIRV: for.body.i: +// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] +// AMDGCNSPIRV-NEXT: [[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] +// AMDGCNSPIRV-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] +// AMDGCNSPIRV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 +// AMDGCNSPIRV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] +// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] +// AMDGCNSPIRV: _ZL3jnfif.exit: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// AMDGCNSPIRV-NEXT: ret float [[RETVAL_0_I]] +// extern "C" __device__ float test_jnf(int x, float y) { return jnf(x, y); } @@ -1866,6 +2479,39 @@ extern "C" __device__ float test_jnf(int x, float y) { // APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] // APPROX-NEXT: ret double [[RETVAL_0_I]] // +// AMDGCNSPIRV-LABEL: @test_jn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ +// AMDGCNSPIRV-NEXT: i32 0, label [[IF_THEN_I:%.*]] +// AMDGCNSPIRV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// AMDGCNSPIRV-NEXT: ] +// AMDGCNSPIRV: if.then.i: +// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: br label [[_ZL2JNID_EXIT:%.*]] +// AMDGCNSPIRV: if.then2.i: +// AMDGCNSPIRV-NEXT: [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: br label [[_ZL2JNID_EXIT]] +// AMDGCNSPIRV: if.end4.i: +// AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 +// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]] +// AMDGCNSPIRV: for.body.i: +// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] +// AMDGCNSPIRV-NEXT: [[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], [[DIV_I]] +// AMDGCNSPIRV-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] +// AMDGCNSPIRV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 +// AMDGCNSPIRV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] +// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP16:![0-9]+]] +// AMDGCNSPIRV: _ZL2jnid.exit: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// AMDGCNSPIRV-NEXT: ret double [[RETVAL_0_I]] +// extern "C" __device__ double test_jn(int x, double y) { return jn(x, y); } @@ -1885,6 +2531,11 @@ extern "C" __device__ double test_jn(int x, double y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_ldexpf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_ldexpf(float x, int y) { return ldexpf(x, y); } @@ -1904,6 +2555,11 @@ extern "C" __device__ float test_ldexpf(float x, int y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_ldexp( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_ldexp(double x, int y) { return ldexp(x, y); } @@ -1923,6 +2579,11 @@ extern "C" __device__ double test_ldexp(double x, int y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_lgammaf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_lgammaf(float x) { return lgammaf(x); } @@ -1942,6 +2603,11 @@ extern "C" __device__ float test_lgammaf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_lgamma( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_lgamma(double x) { return lgamma(x); } @@ -1964,6 +2630,12 @@ extern "C" __device__ double test_lgamma(double x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // +// AMDGCNSPIRV-LABEL: @test_llrintf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] +// extern "C" __device__ long long int test_llrintf(float x) { return llrintf(x); } @@ -1986,6 +2658,12 @@ extern "C" __device__ long long int test_llrintf(float x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // +// AMDGCNSPIRV-LABEL: @test_llrint( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] +// extern "C" __device__ long long int test_llrint(double x) { return llrint(x); } @@ -2008,6 +2686,12 @@ extern "C" __device__ long long int test_llrint(double x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // +// AMDGCNSPIRV-LABEL: @test_llroundf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] +// extern "C" __device__ long long int test_llroundf(float x) { return llroundf(x); } @@ -2030,6 +2714,12 @@ extern "C" __device__ long long int test_llroundf(float x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // +// AMDGCNSPIRV-LABEL: @test_llround( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] +// extern "C" __device__ long long int test_llround(double x) { return llround(x); } @@ -2049,6 +2739,11 @@ extern "C" __device__ long long int test_llround(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_log10f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log10.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_log10f(float x) { return log10f(x); } @@ -2068,6 +2763,11 @@ extern "C" __device__ float test_log10f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_log10( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_log10(double x) { return log10(x); } @@ -2087,6 +2787,11 @@ extern "C" __device__ double test_log10(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_log1pf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_log1pf(float x) { return log1pf(x); } @@ -2106,6 +2811,11 @@ extern "C" __device__ float test_log1pf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_log1p( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_log1p(double x) { return log1p(x); } @@ -2125,6 +2835,11 @@ extern "C" __device__ double test_log1p(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_log2f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_log2_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_log2f(float x) { return log2f(x); } @@ -2144,6 +2859,11 @@ extern "C" __device__ float test_log2f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_log2( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_log2(double x) { return log2(x); } @@ -2163,6 +2883,11 @@ extern "C" __device__ double test_log2(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_logbf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_logbf(float x) { return logbf(x); } @@ -2182,6 +2907,11 @@ extern "C" __device__ float test_logbf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_logb( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_logb(double x) { return logb(x); } @@ -2201,6 +2931,11 @@ extern "C" __device__ double test_logb(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_logf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_log_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_logf(float x) { return logf(x); } @@ -2223,6 +2958,12 @@ extern "C" __device__ float test_logf(float x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // +// AMDGCNSPIRV-LABEL: @test_lrintf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] +// extern "C" __device__ long int test_lrintf(float x) { return lrintf(x); } @@ -2245,6 +2986,12 @@ extern "C" __device__ long int test_lrintf(float x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // +// AMDGCNSPIRV-LABEL: @test_lrint( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] +// extern "C" __device__ long int test_lrint(double x) { return lrint(x); } @@ -2267,6 +3014,12 @@ extern "C" __device__ long int test_lrint(double x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // +// AMDGCNSPIRV-LABEL: @test_lroundf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] +// extern "C" __device__ long int test_lroundf(float x) { return lroundf(x); } @@ -2289,6 +3042,12 @@ extern "C" __device__ long int test_lroundf(float x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // +// AMDGCNSPIRV-LABEL: @test_lround( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] +// extern "C" __device__ long int test_lround(double x) { return lround(x); } @@ -2323,6 +3082,17 @@ extern "C" __device__ long int test_lround(double x) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_modff( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15:[0-9]+]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_modf_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17:![0-9]+]] +// AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_modff(float x, float* y) { return modff(x, y); } @@ -2357,6 +3127,17 @@ extern "C" __device__ float test_modff(float x, float* y) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_modf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_modf_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19:![0-9]+]] +// AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_modf(double x, double* y) { return modf(x, y); } @@ -2555,6 +3336,93 @@ extern "C" __device__ double test_modf(double x, double* y) { // APPROX-NEXT: [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float // APPROX-NEXT: ret float [[TMP10]] // +// AMDGCNSPIRV-LABEL: @test_nanf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG:%.*]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] +// AMDGCNSPIRV: if.then.i.i: +// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ +// AMDGCNSPIRV-NEXT: i8 120, label [[WHILE_COND_I28_I_I_PREHEADER:%.*]] +// AMDGCNSPIRV-NEXT: i8 88, label [[WHILE_COND_I28_I_I_PREHEADER]] +// AMDGCNSPIRV-NEXT: ] +// AMDGCNSPIRV: while.cond.i28.i.i.preheader: +// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I28_I_I:%.*]] +// AMDGCNSPIRV: while.cond.i28.i.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I34_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I28_I_I_PREHEADER]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I30_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[WHILE_COND_I28_I_I_PREHEADER]] ] +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP2]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]] +// AMDGCNSPIRV: while.body.i32.i.i: +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], -48 +// AMDGCNSPIRV-NEXT: [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP3]], 10 +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]] +// AMDGCNSPIRV: if.else.i.i.i: +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = add i8 [[TMP2]], -97 +// AMDGCNSPIRV-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6 +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] +// AMDGCNSPIRV: if.else17.i.i.i: +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = add i8 [[TMP2]], -65 +// AMDGCNSPIRV-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I_I_I]] +// AMDGCNSPIRV: if.end31.i.i.i: +// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// AMDGCNSPIRV-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I]], 4 +// AMDGCNSPIRV-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 +// AMDGCNSPIRV-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] +// AMDGCNSPIRV-NEXT: [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] +// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I37_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], i64 1 +// AMDGCNSPIRV-NEXT: br label [[CLEANUP_I_I_I]] +// AMDGCNSPIRV: cleanup.i.i.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I34_I_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I37_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I29_I_I]], [[IF_ELSE17_I_I_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I30_I_I]], [[IF_ELSE17_I_I_I]] ] +// AMDGCNSPIRV-NEXT: [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[COND_I_I_I]], label [[WHILE_COND_I28_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP12]] +// AMDGCNSPIRV: while.cond.i.i.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]] +// AMDGCNSPIRV: while.body.i.i.i: +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -8 +// AMDGCNSPIRV-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48 +// AMDGCNSPIRV-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3 +// AMDGCNSPIRV-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 +// AMDGCNSPIRV-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 +// AMDGCNSPIRV-NEXT: [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64 +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], i64 [[__TAGP_ADDR_1_I_I_I_IDX]] +// AMDGCNSPIRV-NEXT: [[__R_1_I_I_I]] = select i1 [[OR_COND_I_I_I]], i64 [[SUB_I_I_I]], i64 [[__R_0_I_I_I]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP8]] +// AMDGCNSPIRV: while.cond.i14.i.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], [[WHILE_BODY_I18_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], [[WHILE_BODY_I18_I_I]] ], [ 0, [[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]] +// AMDGCNSPIRV: while.body.i18.i.i: +// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], -48 +// AMDGCNSPIRV-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10 +// AMDGCNSPIRV-NEXT: [[MUL_I20_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10 +// AMDGCNSPIRV-NEXT: [[CONV5_I21_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64 +// AMDGCNSPIRV-NEXT: [[ADD_I22_I_I:%.*]] = add i64 [[MUL_I20_I_I]], -48 +// AMDGCNSPIRV-NEXT: [[SUB_I23_I_I:%.*]] = add i64 [[ADD_I22_I_I]], [[CONV5_I21_I_I]] +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64 +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], i64 [[__TAGP_ADDR_1_I25_I_I_IDX]] +// AMDGCNSPIRV-NEXT: [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]] +// AMDGCNSPIRV: _ZL4nanfPKc.exit: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I30_I_I]], [[WHILE_COND_I28_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ] +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 +// AMDGCNSPIRV-NEXT: [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303 +// AMDGCNSPIRV-NEXT: [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344 +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float +// AMDGCNSPIRV-NEXT: ret float [[TMP10]] +// extern "C" __device__ float test_nanf(const char *tag) { return nanf(tag); } @@ -2751,6 +3619,92 @@ extern "C" __device__ float test_nanf(const char *tag) { // APPROX-NEXT: [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double // APPROX-NEXT: ret double [[TMP10]] // +// AMDGCNSPIRV-LABEL: @test_nan( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG:%.*]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] +// AMDGCNSPIRV: if.then.i.i: +// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ +// AMDGCNSPIRV-NEXT: i8 120, label [[WHILE_COND_I28_I_I_PREHEADER:%.*]] +// AMDGCNSPIRV-NEXT: i8 88, label [[WHILE_COND_I28_I_I_PREHEADER]] +// AMDGCNSPIRV-NEXT: ] +// AMDGCNSPIRV: while.cond.i28.i.i.preheader: +// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I28_I_I:%.*]] +// AMDGCNSPIRV: while.cond.i28.i.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I34_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I28_I_I_PREHEADER]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I30_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[WHILE_COND_I28_I_I_PREHEADER]] ] +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP2]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]] +// AMDGCNSPIRV: while.body.i32.i.i: +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], -48 +// AMDGCNSPIRV-NEXT: [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP3]], 10 +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]] +// AMDGCNSPIRV: if.else.i.i.i: +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = add i8 [[TMP2]], -97 +// AMDGCNSPIRV-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6 +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] +// AMDGCNSPIRV: if.else17.i.i.i: +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = add i8 [[TMP2]], -65 +// AMDGCNSPIRV-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I_I_I]] +// AMDGCNSPIRV: if.end31.i.i.i: +// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// AMDGCNSPIRV-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I]], 4 +// AMDGCNSPIRV-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 +// AMDGCNSPIRV-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] +// AMDGCNSPIRV-NEXT: [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] +// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I37_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], i64 1 +// AMDGCNSPIRV-NEXT: br label [[CLEANUP_I_I_I]] +// AMDGCNSPIRV: cleanup.i.i.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I34_I_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I37_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I29_I_I]], [[IF_ELSE17_I_I_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I30_I_I]], [[IF_ELSE17_I_I_I]] ] +// AMDGCNSPIRV-NEXT: [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ] +// AMDGCNSPIRV-NEXT: br i1 [[COND_I_I_I]], label [[WHILE_COND_I28_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP12]] +// AMDGCNSPIRV: while.cond.i.i.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]] +// AMDGCNSPIRV: while.body.i.i.i: +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -8 +// AMDGCNSPIRV-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48 +// AMDGCNSPIRV-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3 +// AMDGCNSPIRV-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 +// AMDGCNSPIRV-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 +// AMDGCNSPIRV-NEXT: [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64 +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], i64 [[__TAGP_ADDR_1_I_I_I_IDX]] +// AMDGCNSPIRV-NEXT: [[__R_1_I_I_I]] = select i1 [[OR_COND_I_I_I]], i64 [[SUB_I_I_I]], i64 [[__R_0_I_I_I]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP8]] +// AMDGCNSPIRV: while.cond.i14.i.i: +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], [[WHILE_BODY_I18_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ] +// AMDGCNSPIRV-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], [[WHILE_BODY_I18_I_I]] ], [ 0, [[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]] +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]] +// AMDGCNSPIRV: while.body.i18.i.i: +// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], -48 +// AMDGCNSPIRV-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10 +// AMDGCNSPIRV-NEXT: [[MUL_I20_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10 +// AMDGCNSPIRV-NEXT: [[CONV5_I21_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64 +// AMDGCNSPIRV-NEXT: [[ADD_I22_I_I:%.*]] = add i64 [[MUL_I20_I_I]], -48 +// AMDGCNSPIRV-NEXT: [[SUB_I23_I_I:%.*]] = add i64 [[ADD_I22_I_I]], [[CONV5_I21_I_I]] +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64 +// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], i64 [[__TAGP_ADDR_1_I25_I_I_IDX]] +// AMDGCNSPIRV-NEXT: [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]] +// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]] +// AMDGCNSPIRV: _ZL3nanPKc.exit: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I30_I_I]], [[WHILE_COND_I28_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ] +// AMDGCNSPIRV-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 +// AMDGCNSPIRV-NEXT: [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560 +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double +// AMDGCNSPIRV-NEXT: ret double [[TMP10]] +// extern "C" __device__ double test_nan(const char *tag) { return nan(tag); } @@ -2767,6 +3721,10 @@ extern "C" __device__ double test_nan(const char *tag) { // APPROX-NEXT: entry: // APPROX-NEXT: ret float 0x7FF8000000000000 // +// AMDGCNSPIRV-LABEL: @test_nanf_emptystr( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: ret float 0x7FF8000000000000 +// extern "C" __device__ float test_nanf_emptystr() { return nanf(""); } @@ -2783,6 +3741,10 @@ extern "C" __device__ float test_nanf_emptystr() { // APPROX-NEXT: entry: // APPROX-NEXT: ret double 0x7FF8000000000000 // +// AMDGCNSPIRV-LABEL: @test_nan_emptystr( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: ret double 0x7FF8000000000000 +// extern "C" __device__ double test_nan_emptystr() { return nan(""); } @@ -2799,6 +3761,10 @@ extern "C" __device__ double test_nan_emptystr() { // APPROX-NEXT: entry: // APPROX-NEXT: ret float 0x7FF8000000000000 // +// AMDGCNSPIRV-LABEL: @test_nanf_fill( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: ret float 0x7FF8000000000000 +// extern "C" __device__ float test_nanf_fill() { return nanf("0x456"); } @@ -2815,6 +3781,10 @@ extern "C" __device__ float test_nanf_fill() { // APPROX-NEXT: entry: // APPROX-NEXT: ret double 0x7FF8000000000000 // +// AMDGCNSPIRV-LABEL: @test_nan_fill( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: ret double 0x7FF8000000000000 +// extern "C" __device__ double test_nan_fill() { return nan("0x123"); } @@ -2834,6 +3804,11 @@ extern "C" __device__ double test_nan_fill() { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_nearbyintf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.nearbyint.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_nearbyintf(float x) { return nearbyintf(x); } @@ -2853,6 +3828,11 @@ extern "C" __device__ float test_nearbyintf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_nearbyint( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.nearbyint.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_nearbyint(double x) { return nearbyint(x); } @@ -2872,6 +3852,11 @@ extern "C" __device__ double test_nearbyint(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_nextafterf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_nextafterf(float x, float y) { return nextafterf(x, y); } @@ -2891,6 +3876,11 @@ extern "C" __device__ float test_nextafterf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_nextafter( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_nextafter(double x, double y) { return nextafter(x, y); } @@ -2910,6 +3900,11 @@ extern "C" __device__ double test_nextafter(double x, double y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_norm3df( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_norm3df(float x, float y, float z) { return norm3df(x, y, z); } @@ -2929,6 +3924,11 @@ extern "C" __device__ float test_norm3df(float x, float y, float z) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_norm3d( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_norm3d(double x, double y, double z) { return norm3d(x, y, z); } @@ -2948,6 +3948,11 @@ extern "C" __device__ double test_norm3d(double x, double y, double z) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_norm4df( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_norm4df(float x, float y, float z, float w) { return norm4df(x, y, z, w); } @@ -2967,6 +3972,11 @@ extern "C" __device__ float test_norm4df(float x, float y, float z, float w) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_norm4d( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_norm4d(double x, double y, double z, double w) { return norm4d(x, y, z, w); } @@ -2986,6 +3996,11 @@ extern "C" __device__ double test_norm4d(double x, double y, double z, double w) // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_normcdff( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_normcdff(float x) { return normcdff(x); } @@ -3005,6 +4020,11 @@ extern "C" __device__ float test_normcdff(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_normcdf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_normcdf(double x) { return normcdf(x); } @@ -3024,6 +4044,11 @@ extern "C" __device__ double test_normcdf(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_normcdfinvf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_normcdfinvf(float x) { return normcdfinvf(x); } @@ -3043,6 +4068,11 @@ extern "C" __device__ float test_normcdfinvf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_normcdfinv( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_normcdfinv(double x) { return normcdfinv(x); } @@ -3107,6 +4137,26 @@ extern "C" __device__ double test_normcdfinv(double x) { // APPROX-NEXT: [[TMP1:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]]) // APPROX-NEXT: ret float [[TMP1]] // +// AMDGCNSPIRV-LABEL: @test_normf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5NORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] +// AMDGCNSPIRV: while.body.i: +// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] +// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] +// AMDGCNSPIRV-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] +// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 4 +// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] +// AMDGCNSPIRV: _ZL5normfiPKf.exit: +// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = tail call contract noundef addrspace(4) float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP1]] +// extern "C" __device__ float test_normf(int x, const float *y) { return normf(x, y); } @@ -3171,6 +4221,26 @@ extern "C" __device__ float test_normf(int x, const float *y) { // APPROX-NEXT: [[TMP1:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]]) // APPROX-NEXT: ret double [[TMP1]] // +// AMDGCNSPIRV-LABEL: @test_norm( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL4NORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] +// AMDGCNSPIRV: while.body.i: +// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] +// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] +// AMDGCNSPIRV-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] +// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 8 +// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] +// AMDGCNSPIRV: _ZL4normiPKd.exit: +// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP1]] +// extern "C" __device__ double test_norm(int x, const double *y) { return norm(x, y); } @@ -3190,6 +4260,11 @@ extern "C" __device__ double test_norm(int x, const double *y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_powf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_powf(float x, float y) { return powf(x, y); } @@ -3209,6 +4284,11 @@ extern "C" __device__ float test_powf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_pow( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_pow(double x, double y) { return pow(x, y); } @@ -3228,6 +4308,11 @@ extern "C" __device__ double test_pow(double x, double y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_powif( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_powif(float x, int y) { return powif(x, y); } @@ -3247,6 +4332,11 @@ extern "C" __device__ float test_powif(float x, int y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_powi( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_powi(double x, int y) { return powi(x, y); } @@ -3266,6 +4356,11 @@ extern "C" __device__ double test_powi(double x, int y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_rcbrtf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_rcbrtf(float x) { return rcbrtf(x); } @@ -3285,6 +4380,11 @@ extern "C" __device__ float test_rcbrtf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_rcbrt( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_rcbrt(double x) { return rcbrt(x); } @@ -3304,6 +4404,11 @@ extern "C" __device__ double test_rcbrt(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_remainderf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_remainderf(float x, float y) { return remainderf(x, y); } @@ -3323,6 +4428,11 @@ extern "C" __device__ float test_remainderf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_remainder( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_remainder(double x, double y) { return remainder(x, y); } @@ -3357,6 +4467,17 @@ extern "C" __device__ double test_remainder(double x, double y) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_remquof( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA13]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA13]] +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_remquof(float x, float y, int* z) { return remquof(x, y, z); } @@ -3391,6 +4512,17 @@ extern "C" __device__ float test_remquof(float x, float y, int* z) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_remquo( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA13]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA13]] +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_remquo(double x, double y, int* z) { return remquo(x, y, z); } @@ -3410,6 +4542,11 @@ extern "C" __device__ double test_remquo(double x, double y, int* z) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_rhypotf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_rhypotf(float x, float y) { return rhypotf(x, y); } @@ -3429,6 +4566,11 @@ extern "C" __device__ float test_rhypotf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_rhypot( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_rhypot(double x, double y) { return rhypot(x, y); } @@ -3448,6 +4590,11 @@ extern "C" __device__ double test_rhypot(double x, double y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_rintf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.rint.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_rintf(float x) { return rintf(x); } @@ -3467,6 +4614,11 @@ extern "C" __device__ float test_rintf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_rint( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.rint.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_rint(double x) { return rint(x); } @@ -3531,6 +4683,26 @@ extern "C" __device__ double test_rint(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_rnormf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL6RNORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] +// AMDGCNSPIRV: while.body.i: +// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] +// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] +// AMDGCNSPIRV-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] +// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 4 +// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] +// AMDGCNSPIRV: _ZL6rnormfiPKf.exit: +// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_rnormf(int x, const float* y) { return rnormf(x, y); } @@ -3595,6 +4767,26 @@ extern "C" __device__ float test_rnormf(int x, const float* y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_rnorm( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5RNORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] +// AMDGCNSPIRV: while.body.i: +// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] +// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] +// AMDGCNSPIRV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] +// AMDGCNSPIRV-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] +// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 8 +// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] +// AMDGCNSPIRV: _ZL5rnormiPKd.exit: +// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_rnorm(int x, const double* y) { return rnorm(x, y); } @@ -3614,6 +4806,11 @@ extern "C" __device__ double test_rnorm(int x, const double* y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_rnorm3df( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_rnorm3df(float x, float y, float z) { return rnorm3df(x, y, z); } @@ -3633,6 +4830,11 @@ extern "C" __device__ float test_rnorm3df(float x, float y, float z) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_rnorm3d( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_rnorm3d(double x, double y, double z) { return rnorm3d(x, y, z); } @@ -3652,6 +4854,11 @@ extern "C" __device__ double test_rnorm3d(double x, double y, double z) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_rnorm4df( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_rnorm4df(float x, float y, float z, float w) { return rnorm4df(x, y, z, w); } @@ -3671,6 +4878,11 @@ extern "C" __device__ float test_rnorm4df(float x, float y, float z, float w) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_rnorm4d( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_rnorm4d(double x, double y, double z, double w) { return rnorm4d(x, y, z, w); } @@ -3690,6 +4902,11 @@ extern "C" __device__ double test_rnorm4d(double x, double y, double z, double w // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_roundf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.round.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_roundf(float x) { return roundf(x); } @@ -3709,6 +4926,11 @@ extern "C" __device__ float test_roundf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_round( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.round.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_round(double x) { return round(x); } @@ -3728,6 +4950,11 @@ extern "C" __device__ double test_round(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_rsqrtf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_rsqrtf(float x) { return rsqrtf(x); } @@ -3747,6 +4974,11 @@ extern "C" __device__ float test_rsqrtf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_rsqrt( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_rsqrt(double x) { return rsqrt(x); } @@ -3796,6 +5028,21 @@ extern "C" __device__ double test_rsqrt(double x) { // APPROX-NEXT: [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ] // APPROX-NEXT: ret float [[COND_I]] // +// AMDGCNSPIRV-LABEL: @test_scalblnf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]] +// AMDGCNSPIRV: cond.true.i: +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]]) +// AMDGCNSPIRV-NEXT: br label [[_ZL8SCALBLNFFL_EXIT:%.*]] +// AMDGCNSPIRV: cond.false.i: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_scalb_f32(float noundef [[X]], float noundef 0x43E0000000000000) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: br label [[_ZL8SCALBLNFFL_EXIT]] +// AMDGCNSPIRV: _ZL8scalblnffl.exit: +// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ] +// AMDGCNSPIRV-NEXT: ret float [[COND_I]] +// extern "C" __device__ float test_scalblnf(float x, long int y) { return scalblnf(x, y); } @@ -3845,6 +5092,21 @@ extern "C" __device__ float test_scalblnf(float x, long int y) { // APPROX-NEXT: [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ] // APPROX-NEXT: ret double [[COND_I]] // +// AMDGCNSPIRV-LABEL: @test_scalbln( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807 +// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]] +// AMDGCNSPIRV: cond.true.i: +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]]) +// AMDGCNSPIRV-NEXT: br label [[_ZL7SCALBLNDL_EXIT:%.*]] +// AMDGCNSPIRV: cond.false.i: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) double @__ocml_scalb_f64(double noundef [[X]], double noundef 0x43E0000000000000) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: br label [[_ZL7SCALBLNDL_EXIT]] +// AMDGCNSPIRV: _ZL7scalblndl.exit: +// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ] +// AMDGCNSPIRV-NEXT: ret double [[COND_I]] +// extern "C" __device__ double test_scalbln(double x, long int y) { return scalbln(x, y); } @@ -3864,6 +5126,11 @@ extern "C" __device__ double test_scalbln(double x, long int y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_scalbnf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_scalbnf(float x, int y) { return scalbnf(x, y); } @@ -3883,6 +5150,11 @@ extern "C" __device__ float test_scalbnf(float x, int y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_scalbn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_scalbn(double x, int y) { return scalbn(x, y); } @@ -3893,6 +5165,12 @@ extern "C" __device__ double test_scalbn(double x, int y) { // CHECK-NEXT: [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31 // CHECK-NEXT: ret i32 [[DOTLOBIT]] // +// AMDGCNSPIRV-LABEL: @test___signbitf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = bitcast float [[X:%.*]] to i32 +// AMDGCNSPIRV-NEXT: [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31 +// AMDGCNSPIRV-NEXT: ret i32 [[DOTLOBIT]] +// extern "C" __device__ BOOL_TYPE test___signbitf(float x) { return __signbitf(x); } @@ -3904,6 +5182,13 @@ extern "C" __device__ BOOL_TYPE test___signbitf(float x) { // CHECK-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32 // CHECK-NEXT: ret i32 [[CONV]] // +// AMDGCNSPIRV-LABEL: @test___signbit( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = bitcast double [[X:%.*]] to i64 +// AMDGCNSPIRV-NEXT: [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63 +// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32 +// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] +// extern "C" __device__ BOOL_TYPE test___signbit(double x) { return __signbit(x); } @@ -3941,6 +5226,18 @@ extern "C" __device__ BOOL_TYPE test___signbit(double x) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret void // +// AMDGCNSPIRV-LABEL: @test_sincosf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: ret void +// extern "C" __device__ void test_sincosf(float x, float *y, float *z) { sincosf(x, y, z); } @@ -3978,6 +5275,18 @@ extern "C" __device__ void test_sincosf(float x, float *y, float *z) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret void // +// AMDGCNSPIRV-LABEL: @test_sincos( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: store double [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: ret void +// extern "C" __device__ void test_sincos(double x, double *y, double *z) { sincos(x, y, z); } @@ -4015,6 +5324,18 @@ extern "C" __device__ void test_sincos(double x, double *y, double *z) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret void // +// AMDGCNSPIRV-LABEL: @test_sincospif( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: ret void +// extern "C" __device__ void test_sincospif(float x, float *y, float *z) { sincospif(x, y, z); } @@ -4052,6 +5373,18 @@ extern "C" __device__ void test_sincospif(float x, float *y, float *z) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret void // +// AMDGCNSPIRV-LABEL: @test_sincospi( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: store double [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] +// AMDGCNSPIRV-NEXT: ret void +// extern "C" __device__ void test_sincospi(double x, double *y, double *z) { sincospi(x, y, z); } @@ -4071,6 +5404,11 @@ extern "C" __device__ void test_sincospi(double x, double *y, double *z) { // APPROX-NEXT: [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I1]] // +// AMDGCNSPIRV-LABEL: @test_sinf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_sinf(float x) { return sinf(x); } @@ -4090,6 +5428,11 @@ extern "C" __device__ float test_sinf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_sin( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_sin(double x) { return sin(x); } @@ -4109,6 +5452,11 @@ extern "C" __device__ double test_sin(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_sinpif( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_sinpif(float x) { return sinpif(x); } @@ -4128,6 +5476,11 @@ extern "C" __device__ float test_sinpif(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_sinpi( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_sinpi(double x) { return sinpi(x); } @@ -4147,6 +5500,11 @@ extern "C" __device__ double test_sinpi(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_sqrtf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.sqrt.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_sqrtf(float x) { return sqrtf(x); } @@ -4166,6 +5524,11 @@ extern "C" __device__ float test_sqrtf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_sqrt( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_sqrt(double x) { return sqrt(x); } @@ -4185,6 +5548,11 @@ extern "C" __device__ double test_sqrt(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_tanf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_tanf(float x) { return tanf(x); } @@ -4204,6 +5572,11 @@ extern "C" __device__ float test_tanf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_tan( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_tan(double x) { return tan(x); } @@ -4223,6 +5596,11 @@ extern "C" __device__ double test_tan(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_tanhf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_tanhf(float x) { return tanhf(x); } @@ -4242,6 +5620,11 @@ extern "C" __device__ float test_tanhf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_tanh( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_tanh(double x) { return tanh(x); } @@ -4261,6 +5644,11 @@ extern "C" __device__ double test_tanh(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_tgammaf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_tgammaf(float x) { return tgammaf(x); } @@ -4280,6 +5668,11 @@ extern "C" __device__ float test_tgammaf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_tgamma( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_tgamma(double x) { return tgamma(x); } @@ -4299,6 +5692,11 @@ extern "C" __device__ double test_tgamma(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_truncf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.trunc.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_truncf(float x) { return truncf(x); } @@ -4318,6 +5716,11 @@ extern "C" __device__ float test_truncf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_trunc( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.trunc.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_trunc(double x) { return trunc(x); } @@ -4337,6 +5740,11 @@ extern "C" __device__ double test_trunc(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_y0f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_y0f(float x) { return y0f(x); } @@ -4356,6 +5764,11 @@ extern "C" __device__ float test_y0f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_y0( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_y0(double x) { return y0(x); } @@ -4375,6 +5788,11 @@ extern "C" __device__ double test_y0(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_y1f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test_y1f(float x) { return y1f(x); } @@ -4394,6 +5812,11 @@ extern "C" __device__ float test_y1f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test_y1( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] +// extern "C" __device__ double test_y1(double x) { return y1(x); } @@ -4497,6 +5920,39 @@ extern "C" __device__ double test_y1(double x) { // APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] // APPROX-NEXT: ret float [[RETVAL_0_I]] // +// AMDGCNSPIRV-LABEL: @test_ynf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ +// AMDGCNSPIRV-NEXT: i32 0, label [[IF_THEN_I:%.*]] +// AMDGCNSPIRV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// AMDGCNSPIRV-NEXT: ] +// AMDGCNSPIRV: if.then.i: +// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: br label [[_ZL3YNFIF_EXIT:%.*]] +// AMDGCNSPIRV: if.then2.i: +// AMDGCNSPIRV-NEXT: [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: br label [[_ZL3YNFIF_EXIT]] +// AMDGCNSPIRV: if.end4.i: +// AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 +// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]] +// AMDGCNSPIRV: for.body.i: +// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] +// AMDGCNSPIRV-NEXT: [[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] +// AMDGCNSPIRV-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] +// AMDGCNSPIRV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 +// AMDGCNSPIRV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] +// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]] +// AMDGCNSPIRV: _ZL3ynfif.exit: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// AMDGCNSPIRV-NEXT: ret float [[RETVAL_0_I]] +// extern "C" __device__ float test_ynf(int x, float y) { return ynf(x, y); } @@ -4600,6 +6056,39 @@ extern "C" __device__ float test_ynf(int x, float y) { // APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] // APPROX-NEXT: ret double [[RETVAL_0_I]] // +// AMDGCNSPIRV-LABEL: @test_yn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ +// AMDGCNSPIRV-NEXT: i32 0, label [[IF_THEN_I:%.*]] +// AMDGCNSPIRV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] +// AMDGCNSPIRV-NEXT: ] +// AMDGCNSPIRV: if.then.i: +// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: br label [[_ZL2YNID_EXIT:%.*]] +// AMDGCNSPIRV: if.then2.i: +// AMDGCNSPIRV-NEXT: [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: br label [[_ZL2YNID_EXIT]] +// AMDGCNSPIRV: if.end4.i: +// AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 +// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]] +// AMDGCNSPIRV: for.body.i: +// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] +// AMDGCNSPIRV-NEXT: [[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], [[DIV_I]] +// AMDGCNSPIRV-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] +// AMDGCNSPIRV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 +// AMDGCNSPIRV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] +// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP26:![0-9]+]] +// AMDGCNSPIRV: _ZL2ynid.exit: +// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] +// AMDGCNSPIRV-NEXT: ret double [[RETVAL_0_I]] +// extern "C" __device__ double test_yn(int x, double y) { return yn(x, y); } @@ -4619,6 +6108,11 @@ extern "C" __device__ double test_yn(int x, double y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test___cosf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test___cosf(float x) { return __cosf(x); } @@ -4641,6 +6135,12 @@ extern "C" __device__ float test___cosf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test___exp10f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x400A934F00000000 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test___exp10f(float x) { return __exp10f(x); } @@ -4663,6 +6163,12 @@ extern "C" __device__ float test___exp10f(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test___expf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x3FF7154760000000 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test___expf(float x) { return __expf(x); } @@ -4682,6 +6188,11 @@ extern "C" __device__ float test___expf(float x) { // APPROX-NEXT: [[ADD_I:%.*]] = fadd contract float [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret float [[ADD_I]] // +// AMDGCNSPIRV-LABEL: @test___fadd_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = fadd contract float [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-NEXT: ret float [[ADD_I]] +// extern "C" __device__ float test___fadd_rn(float x, float y) { return __fadd_rn(x, y); } @@ -4701,6 +6212,11 @@ extern "C" __device__ float test___fadd_rn(float x, float y) { // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret float [[DIV_I]] // +// AMDGCNSPIRV-LABEL: @test___fdividef( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-NEXT: ret float [[DIV_I]] +// extern "C" __device__ float test___fdividef(float x, float y) { return __fdividef(x, y); } @@ -4720,6 +6236,11 @@ extern "C" __device__ float test___fdividef(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test__fmaf_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test__fmaf_rn(float x, float y, float z) { return __fmaf_rn(x, y, z); } @@ -4739,6 +6260,11 @@ extern "C" __device__ float test__fmaf_rn(float x, float y, float z) { // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret float [[MUL_I]] // +// AMDGCNSPIRV-LABEL: @test___fmul_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-NEXT: ret float [[MUL_I]] +// extern "C" __device__ float test___fmul_rn(float x, float y) { return __fmul_rn(x, y); } @@ -4758,6 +6284,11 @@ extern "C" __device__ float test___fmul_rn(float x, float y) { // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X:%.*]] // APPROX-NEXT: ret float [[DIV_I]] // +// AMDGCNSPIRV-LABEL: @test___frcp_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X:%.*]] +// AMDGCNSPIRV-NEXT: ret float [[DIV_I]] +// extern "C" __device__ float test___frcp_rn(float x) { return __frcp_rn(x); } @@ -4777,6 +6308,11 @@ extern "C" __device__ float test___frcp_rn(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test___frsqrt_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.rsq.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test___frsqrt_rn(float x) { return __frsqrt_rn(x); } @@ -4796,6 +6332,11 @@ extern "C" __device__ float test___frsqrt_rn(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test___fsqrt_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR12]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test___fsqrt_rn(float x) { return __fsqrt_rn(x); } @@ -4815,6 +6356,11 @@ extern "C" __device__ float test___fsqrt_rn(float x) { // APPROX-NEXT: [[SUB_I:%.*]] = fsub contract float [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret float [[SUB_I]] // +// AMDGCNSPIRV-LABEL: @test___fsub_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[SUB_I:%.*]] = fsub contract float [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-NEXT: ret float [[SUB_I]] +// extern "C" __device__ float test___fsub_rn(float x, float y) { return __fsub_rn(x, y); } @@ -4834,6 +6380,11 @@ extern "C" __device__ float test___fsub_rn(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test___log10f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log10.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test___log10f(float x) { return __log10f(x); } @@ -4853,6 +6404,11 @@ extern "C" __device__ float test___log10f(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test___log2f( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.log.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test___log2f(float x) { return __log2f(x); } @@ -4872,6 +6428,11 @@ extern "C" __device__ float test___log2f(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test___logf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log.f32(float [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test___logf(float x) { return __logf(x); } @@ -4891,6 +6452,11 @@ extern "C" __device__ float test___logf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test___powf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test___powf(float x, float y) { return __powf(x, y); } @@ -4919,6 +6485,14 @@ extern "C" __device__ float test___powf(float x, float y) { // APPROX-NEXT: [[COND5_I:%.*]] = select contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]] // APPROX-NEXT: ret float [[COND5_I]] // +// AMDGCNSPIRV-LABEL: @test___saturatef( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CMP_I:%.*]] = fcmp contract olt float [[X:%.*]], 0.000000e+00 +// AMDGCNSPIRV-NEXT: [[CMP1_I:%.*]] = fcmp contract ogt float [[X]], 1.000000e+00 +// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = select contract i1 [[CMP1_I]], float 1.000000e+00, float [[X]] +// AMDGCNSPIRV-NEXT: [[COND5_I:%.*]] = select contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]] +// AMDGCNSPIRV-NEXT: ret float [[COND5_I]] +// extern "C" __device__ float test___saturatef(float x) { return __saturatef(x); } @@ -4947,6 +6521,14 @@ extern "C" __device__ float test___saturatef(float x) { // APPROX-NEXT: store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]] // APPROX-NEXT: ret void // +// AMDGCNSPIRV-LABEL: @test___sincosf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: [[CALL1_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: store float [[CALL1_I]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: ret void +// extern "C" __device__ void test___sincosf(float x, float *y, float *z) { __sincosf(x, y, z); } @@ -4966,6 +6548,11 @@ extern "C" __device__ void test___sincosf(float x, float *y, float *z) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // +// AMDGCNSPIRV-LABEL: @test___sinf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] +// extern "C" __device__ float test___sinf(float x) { return __sinf(x); } @@ -4994,6 +6581,14 @@ extern "C" __device__ float test___sinf(float x) { // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]] // APPROX-NEXT: ret float [[MUL_I]] // +// AMDGCNSPIRV-LABEL: @test___tanf( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[CALL_I3_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]]) +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]] +// AMDGCNSPIRV-NEXT: ret float [[MUL_I]] +// extern "C" __device__ float test___tanf(float x) { return __tanf(x); } @@ -5013,6 +6608,11 @@ extern "C" __device__ float test___tanf(float x) { // APPROX-NEXT: [[ADD_I:%.*]] = fadd contract double [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret double [[ADD_I]] // +// AMDGCNSPIRV-LABEL: @test___dadd_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = fadd contract double [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-NEXT: ret double [[ADD_I]] +// extern "C" __device__ double test___dadd_rn(double x, double y) { return __dadd_rn(x, y); } @@ -5032,6 +6632,11 @@ extern "C" __device__ double test___dadd_rn(double x, double y) { // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract double [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret double [[DIV_I]] // +// AMDGCNSPIRV-LABEL: @test___ddiv_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-NEXT: ret double [[DIV_I]] +// extern "C" __device__ double test___ddiv_rn(double x, double y) { return __ddiv_rn(x, y); } @@ -5051,6 +6656,11 @@ extern "C" __device__ double test___ddiv_rn(double x, double y) { // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract double [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret double [[MUL_I]] // +// AMDGCNSPIRV-LABEL: @test___dmul_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract double [[X:%.*]], [[Y:%.*]] +// AMDGCNSPIRV-NEXT: ret double [[MUL_I]] +// extern "C" __device__ double test___dmul_rn(double x, double y) { return __dmul_rn(x, y); } @@ -5070,6 +6680,11 @@ extern "C" __device__ double test___dmul_rn(double x, double y) { // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X:%.*]] // APPROX-NEXT: ret double [[DIV_I]] // +// AMDGCNSPIRV-LABEL: @test___drcp_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X:%.*]] +// AMDGCNSPIRV-NEXT: ret double [[DIV_I]] +// extern "C" __device__ double test___drcp_rn(double x) { return __drcp_rn(x); } @@ -5089,6 +6704,11 @@ extern "C" __device__ double test___drcp_rn(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test___dsqrt_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[X:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test___dsqrt_rn(double x) { return __dsqrt_rn(x); } @@ -5108,6 +6728,11 @@ extern "C" __device__ double test___dsqrt_rn(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test__fma_rn( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test__fma_rn(double x, double y, double z) { return __fma_rn(x, y, z); } @@ -5127,6 +6752,11 @@ extern "C" __device__ double test__fma_rn(double x, double y, double z) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_float_min( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_float_min(float x, float y) { return min(x, y); } @@ -5146,6 +6776,11 @@ extern "C" __device__ float test_float_min(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_float_max( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret float [[TMP0]] +// extern "C" __device__ float test_float_max(float x, float y) { return max(x, y); } @@ -5165,6 +6800,11 @@ extern "C" __device__ float test_float_max(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_double_min( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_double_min(double x, double y) { return min(x, y); } @@ -5184,6 +6824,11 @@ extern "C" __device__ double test_double_min(double x, double y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // +// AMDGCNSPIRV-LABEL: @test_double_max( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret double [[TMP0]] +// extern "C" __device__ double test_double_max(double x, double y) { return max(x, y); } @@ -5192,6 +6837,11 @@ extern "C" __device__ double test_double_max(double x, double y) { // CHECK-NEXT: [[COND_I:%.*]] = tail call noundef i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) // CHECK-NEXT: ret i32 [[COND_I]] // +// AMDGCNSPIRV-LABEL: @test_int_min( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = tail call noundef addrspace(4) i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret i32 [[COND_I]] +// extern "C" __device__ int test_int_min(int x, int y) { return min(x, y); } @@ -5201,6 +6851,11 @@ extern "C" __device__ int test_int_min(int x, int y) { // CHECK-NEXT: [[COND_I:%.*]] = tail call noundef i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) // CHECK-NEXT: ret i32 [[COND_I]] // +// AMDGCNSPIRV-LABEL: @test_int_max( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = tail call noundef addrspace(4) i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +// AMDGCNSPIRV-NEXT: ret i32 [[COND_I]] +// extern "C" __device__ int test_int_max(int x, int y) { return max(x, y); } From caa7d94199f62301f588e8e73d3bb198057c964f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 28 Feb 2025 17:01:19 +0000 Subject: [PATCH 112/123] [VPlan] Remove unneeded VPValue::getLiveInIRValue() const (NFC). The accessor is not needed/used. --- llvm/lib/Transforms/Vectorize/VPlanValue.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index a058b2a121d59..0a59b137bbd79 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -175,12 +175,7 @@ class VPValue { /// Returns the underlying IR value, if this VPValue is defined outside the /// scope of VPlan. Returns nullptr if the VPValue is defined by a VPDef /// inside a VPlan. - Value *getLiveInIRValue() { - assert(isLiveIn() && - "VPValue is not a live-in; it is defined by a VPDef inside a VPlan"); - return getUnderlyingValue(); - } - const Value *getLiveInIRValue() const { + Value *getLiveInIRValue() const { assert(isLiveIn() && "VPValue is not a live-in; it is defined by a VPDef inside a VPlan"); return getUnderlyingValue(); From 2ff1b19ab90c937016614ece7c1939e61fcc187f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 28 Feb 2025 09:05:40 -0800 Subject: [PATCH 113/123] [RISCV] Improve assembler error message for Zcmp stack adjustment. (#129180) Instead of referring the user to the spec, print the expected range. --- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 32 ++++++++++++------- .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 14 -------- llvm/test/MC/RISCV/rv32xqccmp-invalid.s | 14 ++++---- llvm/test/MC/RISCV/rv32zcmp-invalid.s | 10 +++--- llvm/test/MC/RISCV/rv64xqccmp-invalid.s | 14 ++++---- llvm/test/MC/RISCV/rv64zcmp-invalid.s | 10 +++--- 6 files changed, 44 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 36e2fa0262f9d..85d53f0c5045c 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1684,14 +1684,9 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } case Match_InvalidStackAdj: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); - StringRef SpecName = "Zc"; - if (getSTI().hasFeature(RISCV::FeatureVendorXqccmp)) - SpecName = "Xqccmp"; - - return Error(ErrorLoc, - Twine("stack adjustment is invalid for this instruction") + - " and register list; refer to " + SpecName + - " spec for a detailed range of stack adjustment"); + return Error( + ErrorLoc, + "stack adjustment is invalid for this instruction and register list"); } } @@ -2771,12 +2766,25 @@ ParseStatus RISCVAsmParser::parseZcmpStackAdj(OperandVector &Operands, SMLoc S = getLoc(); int64_t StackAdjustment = getLexer().getTok().getIntVal(); - unsigned Spimm = 0; unsigned RlistVal = static_cast(Operands[1].get())->Rlist.Val; - if (Negative != ExpectNegative || - !RISCVZC::getSpimm(RlistVal, Spimm, StackAdjustment, isRV64())) - return ParseStatus::NoMatch; + assert(RlistVal != RISCVZC::INVALID_RLIST); + unsigned StackAdjBase = RISCVZC::getStackAdjBase(RlistVal, isRV64()); + if (Negative != ExpectNegative || StackAdjustment % 16 != 0 || + StackAdjustment < StackAdjBase || (StackAdjustment - StackAdjBase) > 48) { + int64_t Lower = StackAdjBase; + int64_t Upper = StackAdjBase + 48; + if (ExpectNegative) { + Lower = -Lower; + Upper = -Upper; + std::swap(Lower, Upper); + } + return generateImmOutOfRangeError(S, Lower, Upper, + "stack adjustment for register list must " + "be a multiple of 16 bytes in the range"); + } + + unsigned Spimm = (StackAdjustment - StackAdjBase) / 16; Operands.push_back(RISCVOperand::createSpimm(Spimm << 4, S)); getLexer().Lex(); return ParseStatus::Success; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 433be1f1e87d4..82b562bec9bf6 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -637,20 +637,6 @@ inline static unsigned getStackAdjBase(unsigned RlistVal, bool IsRV64) { llvm_unreachable("Unexpected RlistVal"); } -inline static bool getSpimm(unsigned RlistVal, unsigned &SpimmVal, - int64_t StackAdjustment, bool IsRV64) { - if (RlistVal == RLISTENCODE::INVALID_RLIST) - return false; - unsigned StackAdjBase = getStackAdjBase(RlistVal, IsRV64); - StackAdjustment -= StackAdjBase; - if (StackAdjustment % 16 != 0) - return false; - SpimmVal = StackAdjustment / 16; - if (SpimmVal > 3) - return false; - return true; -} - void printRlist(unsigned SlistEncode, raw_ostream &OS); } // namespace RISCVZC diff --git a/llvm/test/MC/RISCV/rv32xqccmp-invalid.s b/llvm/test/MC/RISCV/rv32xqccmp-invalid.s index 899979cb4c601..74f96f076756c 100644 --- a/llvm/test/MC/RISCV/rv32xqccmp-invalid.s +++ b/llvm/test/MC/RISCV/rv32xqccmp-invalid.s @@ -13,23 +13,23 @@ qc.cm.mva01s a1, a2 # CHECK-ERROR: error: invalid register list, {ra, s0-s10} or {x1, x8-x9, x18-x26} is not supported qc.cm.popretz {ra, s0-s10}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [16, 64] qc.cm.popretz {ra, s0-s1}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.push {ra}, 16 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.pushfp {ra, s0}, 16 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [16, 64] qc.cm.pop {ra, s0-s1}, -32 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.push {ra}, -8 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.pushfp {ra, s0}, -12 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [16, 64] qc.cm.pop {ra, s0-s1}, -40 diff --git a/llvm/test/MC/RISCV/rv32zcmp-invalid.s b/llvm/test/MC/RISCV/rv32zcmp-invalid.s index 0720a74a9b5c2..4115333fc738b 100644 --- a/llvm/test/MC/RISCV/rv32zcmp-invalid.s +++ b/llvm/test/MC/RISCV/rv32zcmp-invalid.s @@ -13,17 +13,17 @@ cm.mva01s a1, a2 # CHECK-ERROR: error: invalid register list, {ra, s0-s10} or {x1, x8-x9, x18-x26} is not supported cm.popretz {ra, s0-s10}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [16, 64] cm.popretz {ra, s0-s1}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] cm.push {ra}, 16 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [16, 64] cm.pop {ra, s0-s1}, -32 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] cm.push {ra}, -8 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [16, 64] cm.pop {ra, s0-s1}, -40 diff --git a/llvm/test/MC/RISCV/rv64xqccmp-invalid.s b/llvm/test/MC/RISCV/rv64xqccmp-invalid.s index e922572a44749..ba0ed29afa108 100644 --- a/llvm/test/MC/RISCV/rv64xqccmp-invalid.s +++ b/llvm/test/MC/RISCV/rv64xqccmp-invalid.s @@ -13,23 +13,23 @@ qc.cm.mva01s a1, a2 # CHECK-ERROR: error: invalid register list, {ra, s0-s10} or {x1, x8-x9, x18-x26} is not supported qc.cm.popretz {ra, s0-s10}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [32, 80] qc.cm.popretz {ra, s0-s1}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.push {ra}, 16 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.pushfp {ra, s0}, 16 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [32, 80] qc.cm.pop {ra, s0-s1}, -32 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.push {ra}, -15 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] qc.cm.push {ra, s0}, -15 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Xqccmp spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [32, 80] qc.cm.pop {ra, s0-s1}, -33 diff --git a/llvm/test/MC/RISCV/rv64zcmp-invalid.s b/llvm/test/MC/RISCV/rv64zcmp-invalid.s index 7e10ab5c2f902..804234d2c11e6 100644 --- a/llvm/test/MC/RISCV/rv64zcmp-invalid.s +++ b/llvm/test/MC/RISCV/rv64zcmp-invalid.s @@ -13,17 +13,17 @@ cm.mva01s a1, a2 # CHECK-ERROR: error: invalid register list, {ra, s0-s10} or {x1, x8-x9, x18-x26} is not supported cm.popretz {ra, s0-s10}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [32, 80] cm.popretz {ra, s0-s1}, 112 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] cm.push {ra}, 16 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [32, 80] cm.pop {ra, s0-s1}, -32 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [-64, -16] cm.push {ra}, -15 -# CHECK-ERROR: error: stack adjustment is invalid for this instruction and register list; refer to Zc spec for a detailed range of stack adjustment +# CHECK-ERROR: error: stack adjustment for register list must be a multiple of 16 bytes in the range [32, 80] cm.pop {ra, s0-s1}, -33 From 4f9c6956058fc34cadf0ef1034c55a9078fc68a8 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 28 Feb 2025 09:38:48 -0800 Subject: [PATCH 114/123] [llvm-objdump] Rework .gnu.version_d dumping and fix crash when vd_aux is invalid (#86611). vd_version, vd_flags, vd_ndx, and vd_cnt in Elf{32,64}_Verdef are 16-bit. Change VerDef to use uint16_t instead. vda_name specifies a NUL-terminated string. Update getVersionDefinitions to remove some `.c_str()`. Pull Request: https://github.com/llvm/llvm-project/pull/128434 --- llvm/include/llvm/Object/ELF.h | 12 +-- .../llvm-objdump/ELF/private-headers.test | 1 + .../llvm-objdump/ELF/verdef-invalid.test | 77 +++++++++++++++++++ llvm/test/tools/llvm-objdump/ELF/verdef.test | 28 ++++--- .../llvm-readobj/ELF/verdef-invalid.test | 36 ++++++++- llvm/tools/llvm-objdump/ELFDump.cpp | 54 +++++-------- llvm/tools/llvm-objdump/llvm-objdump.cpp | 2 +- llvm/tools/llvm-objdump/llvm-objdump.h | 1 + llvm/tools/llvm-readobj/ELFDumper.cpp | 2 +- 9 files changed, 160 insertions(+), 53 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/ELF/verdef-invalid.test diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index 3aa1d7864fcb7..57a6db6c4e5aa 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -41,10 +41,10 @@ struct VerdAux { struct VerDef { unsigned Offset; - unsigned Version; - unsigned Flags; - unsigned Ndx; - unsigned Cnt; + uint16_t Version; + uint16_t Flags; + uint16_t Ndx; + uint16_t Cnt; unsigned Hash; std::string Name; std::vector AuxV; @@ -1057,8 +1057,8 @@ ELFFile::getVersionDefinitions(const Elf_Shdr &Sec) const { VerdAux Aux; Aux.Offset = VerdauxBuf - Start; - if (Verdaux->vda_name <= StrTabOrErr->size()) - Aux.Name = std::string(StrTabOrErr->drop_front(Verdaux->vda_name)); + if (Verdaux->vda_name < StrTabOrErr->size()) + Aux.Name = std::string(StrTabOrErr->drop_front(Verdaux->vda_name).data()); else Aux.Name = ("vda_name) + ">").str(); return Aux; diff --git a/llvm/test/tools/llvm-objdump/ELF/private-headers.test b/llvm/test/tools/llvm-objdump/ELF/private-headers.test index c90cf59f4ed7d..157e2a2ea0490 100644 --- a/llvm/test/tools/llvm-objdump/ELF/private-headers.test +++ b/llvm/test/tools/llvm-objdump/ELF/private-headers.test @@ -38,6 +38,7 @@ Sections: Value: 0x0 - Name: .gnu.version_d Type: SHT_GNU_verdef + AddressAlign: 4 Entries: - Version: 1 Flags: 1 diff --git a/llvm/test/tools/llvm-objdump/ELF/verdef-invalid.test b/llvm/test/tools/llvm-objdump/ELF/verdef-invalid.test new file mode 100644 index 0000000000000..45f2331eadc34 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/verdef-invalid.test @@ -0,0 +1,77 @@ +## Adapted from test/llvm-readobj/ELF/verdef-invalid.test +## Check that we report a warning when a SHT_GNU_verdef section contains a version definition +## that refers to an auxiliary entry that goes past the end of the section. + +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump -p %t 2>&1 | FileCheck %s --check-prefix=AUX-PAST-END -DFILE=%t + +# AUX-PAST-END: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: version definition 1 refers to an auxiliary entry that goes past the end of the section + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Entries: + - Names: + - FOO + ## The correct sh_size is 28. + ShSize: 27 +DynamicSymbols: + - Name: foo + +## Check we report a warning when a version definition is not correctly aligned in memory. + +# RUN: yaml2obj %s --docnum=2 -o %t2 +# RUN: llvm-objdump -p %t2 2>&1 | FileCheck %s --check-prefix=MISALIGNED-DEF -DFILE=%t2 + +# MISALIGNED-DEF: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: found a misaligned version definition entry at offset 0x0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN +Sections: + - Type: Fill + Size: 0x1 + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .dynstr + Info: 0x1 + Entries: + - Names: + - FOO +DynamicSymbols: + - Name: foo + +## Check we report "invalid vda_name" when vda_name = size(.dynstr) + +# RUN: yaml2obj %s --docnum=3 -o %t3 +# RUN: llvm-objdump -p %t3 2>&1 | FileCheck %s --check-prefix=VDANAME-PAST-END --implicit-check-not=warning: + +# VDANAME-PAST-END: Version definitions: +# VDANAME-PAST-END-NEXT: 0 0x00 0x00000000 V0 +# VDANAME-PAST-END-NEXT: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Flags: [ SHF_ALLOC ] + Link: .dynstr + Info: 0x1 +## The byte offset to the auxiliary entry is 0x13, i.e. it is not correctly aligned in memory. + Content: "010000000000020000000000140000000000000004000000080000000700000000000000" +DynamicSymbols: + - Name: V1 + Binding: STB_GLOBAL + - Name: V0 + Binding: STB_GLOBAL diff --git a/llvm/test/tools/llvm-objdump/ELF/verdef.test b/llvm/test/tools/llvm-objdump/ELF/verdef.test index e4ae33853deb4..dbb10bf87cbea 100644 --- a/llvm/test/tools/llvm-objdump/ELF/verdef.test +++ b/llvm/test/tools/llvm-objdump/ELF/verdef.test @@ -1,12 +1,14 @@ # RUN: yaml2obj %s -o %t -# RUN: llvm-objdump -p %t | FileCheck --strict-whitespace %s +# RUN: llvm-objdump -p %t | FileCheck --match-full-lines --strict-whitespace %s -# CHECK: Dynamic Section: -# CHECK-EMPTY: -# CHECK-NEXT: Version definitions: -# CHECK-NEXT: 1 0x01 0x075bcd15 foo -# CHECK-NEXT: 2 0x02 0x3ade68b1 VERSION_1 -# CHECK-NEXT: VERSION_2 +# CHECK:Dynamic Section: +#CHECK-EMPTY: +# CHECK-NEXT:Version definitions: +# CHECK-NEXT:2 0x01 0x075bcd15 foo +# CHECK-NEXT:3 0x02 0x3ade68b1 VERSION_1 +# CHECK-NEXT: VERSION_2 +# CHECK-NEXT:4 0x00 0x0000007b VERSION_3 +# CHECK-NEXT: VERSION_4 VERSION_5 --- !ELF FileHeader: @@ -24,17 +26,25 @@ Sections: Entries: - Version: 1 Flags: 1 - VersionNdx: 1 + VersionNdx: 2 Hash: 123456789 Names: - foo - Version: 1 Flags: 2 - VersionNdx: 2 + VersionNdx: 3 Hash: 987654321 Names: - VERSION_1 - VERSION_2 + - Version: 1 + Flags: 0 + VersionNdx: 4 + Hash: 123 + Names: + - VERSION_3 + - VERSION_4 + - VERSION_5 DynamicSymbols: - Name: bar Binding: STB_GLOBAL diff --git a/llvm/test/tools/llvm-readobj/ELF/verdef-invalid.test b/llvm/test/tools/llvm-readobj/ELF/verdef-invalid.test index e8bd4d21f7429..e768e13f4a1ec 100644 --- a/llvm/test/tools/llvm-readobj/ELF/verdef-invalid.test +++ b/llvm/test/tools/llvm-readobj/ELF/verdef-invalid.test @@ -128,7 +128,8 @@ Sections: Entries: - Names: - FOO - ShSize: 21 + ## The correct sh_size is 28. + ShSize: 27 DynamicSymbols: - Name: foo @@ -290,3 +291,36 @@ Sections: DynamicSymbols: - Name: foo Binding: STB_GLOBAL + +## Check we report "invalid vda_name" when vda_name = size(.dynstr) + +# RUN: yaml2obj %s --docnum=10 -o %t11 +# RUN: llvm-readobj -V %t11 2>&1 | FileCheck %s --check-prefix=VDANAME-PAST-END-LLVM -DFILE=%t11 --implicit-check-not=warning: +# RUN: llvm-readelf -V %t11 2>&1 | FileCheck %s --check-prefix=VDANAME-PAST-END-GNU -DFILE=%t11 --implicit-check-not=warning: + +# VDANAME-PAST-END-LLVM: Name: V0 +# VDANAME-PAST-END-LLVM-NEXT: Predecessors: [] + +# VDANAME-PAST-END-GNU: Version definition section '.gnu.version_d' contains 1 entries: +# VDANAME-PAST-END-GNU-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 3 (.dynstr) +# VDANAME-PAST-END-GNU-NEXT: 0x0000: Rev: 1 Flags: none Index: 0 Cnt: 2 Name: V0 +# VDANAME-PAST-END-GNU-NEXT: 0x001c: Parent 1: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Flags: [ SHF_ALLOC ] + Link: .dynstr + Info: 0x1 +## The byte offset to the auxiliary entry is 0x13, i.e. it is not correctly aligned in memory. + Content: "010000000000020000000000140000000000000004000000080000000700000000000000" +DynamicSymbols: + - Name: V1 + Binding: STB_GLOBAL + - Name: V0 + Binding: STB_GLOBAL diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp index bce308c870ddf..b7899bc3bcbee 100644 --- a/llvm/tools/llvm-objdump/ELFDump.cpp +++ b/llvm/tools/llvm-objdump/ELFDump.cpp @@ -378,38 +378,6 @@ void ELFDumper::printSymbolVersionDependency( } } -template -static void printSymbolVersionDefinition(const typename ELFT::Shdr &Shdr, - ArrayRef Contents, - StringRef StrTab) { - outs() << "\nVersion definitions:\n"; - - const uint8_t *Buf = Contents.data(); - uint32_t VerdefIndex = 1; - // sh_info contains the number of entries in the SHT_GNU_verdef section. To - // make the index column have consistent width, we should insert blank spaces - // according to sh_info. - uint16_t VerdefIndexWidth = std::to_string(Shdr.sh_info).size(); - while (Buf) { - auto *Verdef = reinterpret_cast(Buf); - outs() << format_decimal(VerdefIndex++, VerdefIndexWidth) << " " - << format("0x%02" PRIx16 " ", (uint16_t)Verdef->vd_flags) - << format("0x%08" PRIx32 " ", (uint32_t)Verdef->vd_hash); - - const uint8_t *BufAux = Buf + Verdef->vd_aux; - uint16_t VerdauxIndex = 0; - while (BufAux) { - auto *Verdaux = reinterpret_cast(BufAux); - if (VerdauxIndex) - outs() << std::string(VerdefIndexWidth + 17, ' '); - outs() << StringRef(StrTab.drop_front(Verdaux->vda_name).data()) << '\n'; - BufAux = Verdaux->vda_next ? BufAux + Verdaux->vda_next : nullptr; - ++VerdauxIndex; - } - Buf = Verdef->vd_next ? Buf + Verdef->vd_next : nullptr; - } -} - template void ELFDumper::printSymbolVersion() { const ELFFile &Elf = getELFFile(); StringRef FileName = Obj.getFileName(); @@ -426,10 +394,26 @@ template void ELFDumper::printSymbolVersion() { unwrapOrError(Elf.getSection(Shdr.sh_link), FileName); StringRef StrTab = unwrapOrError(Elf.getStringTable(*StrTabSec), FileName); - if (Shdr.sh_type == ELF::SHT_GNU_verneed) + if (Shdr.sh_type == ELF::SHT_GNU_verneed) { printSymbolVersionDependency(Shdr); - else - printSymbolVersionDefinition(Shdr, Contents, StrTab); + } else { + OS << "\nVersion definitions:\n"; + Expected> V = + getELFFile().getVersionDefinitions(Shdr); + if (!V) { + this->reportUniqueWarning(V.takeError()); + continue; + } + for (const VerDef &Def : *V) { + OS << Def.Ndx << ' ' << format_hex(Def.Flags, 4) << ' ' + << format_hex(Def.Hash, 10) << ' ' << Def.Name << '\n'; + if (!Def.AuxV.empty()) { + for (auto [I, Aux] : enumerate(Def.AuxV)) + OS << (I ? ' ' : '\t') << Aux.Name; + OS << '\n'; + } + } + } } } diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 99e0440dce78d..115f04a4df778 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -360,7 +360,7 @@ static StringRef ToolName; std::unique_ptr BIDFetcher; -Dumper::Dumper(const object::ObjectFile &O) : O(O) { +Dumper::Dumper(const object::ObjectFile &O) : O(O), OS(outs()) { WarningHandler = [this](const Twine &Msg) { if (Warnings.insert(Msg.str()).second) reportWarning(Msg, this->O.getFileName()); diff --git a/llvm/tools/llvm-objdump/llvm-objdump.h b/llvm/tools/llvm-objdump/llvm-objdump.h index 7253cc3f4d91b..25d9c1e106a6c 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.h +++ b/llvm/tools/llvm-objdump/llvm-objdump.h @@ -77,6 +77,7 @@ class Dumper { StringSet<> Warnings; protected: + llvm::raw_ostream &OS; std::function WarningHandler; public: diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index fdae09ac767e6..e7825419ef9ec 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -7668,7 +7668,7 @@ void LLVMELFDumper::printVersionDefinitionSection(const Elf_Shdr *Sec) { W.printFlags("Flags", D.Flags, ArrayRef(SymVersionFlags)); W.printNumber("Index", D.Ndx); W.printNumber("Hash", D.Hash); - W.printString("Name", D.Name.c_str()); + W.printString("Name", D.Name); W.printList( "Predecessors", D.AuxV, [](raw_ostream &OS, const VerdAux &Aux) { OS << Aux.Name.c_str(); }); From 743d080c9e4c0367058f24a9141512a212727a12 Mon Sep 17 00:00:00 2001 From: Tai Ly Date: Fri, 28 Feb 2025 11:44:05 -0600 Subject: [PATCH 115/123] [mlir][tosa] Rename the result of MATMUL from `c` to `output` (#129274) This renames the output of TOSA MatMul operator from `c` to `output` to align to TOSA spec Co-authored-by: TatWai Chong --- mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index ddfec2c9bfcd3..1f4f1fc6b96c9 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -301,7 +301,7 @@ def Tosa_MatMulOp : Tosa_InferShapedTypeOp<"matmul"> { ); let results = (outs - Tosa_Tensor3D:$c + Tosa_Tensor3D:$output ); list availability = [ From 0e6b7ad1a4e649f2a70d8c99f3f8fa37991bb6bd Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 28 Feb 2025 18:12:39 +0000 Subject: [PATCH 116/123] Revert "[clang][HIP] Make some math not not work with AMDGCN SPIR-V" (#129280) Reverts llvm/llvm-project#128360 pending resolution of odd test break. --- .../Headers/__clang_hip_libdevice_declares.h | 32 +- clang/lib/Headers/__clang_hip_math.h | 28 +- clang/test/Headers/__clang_hip_math.hip | 1655 ----------------- 3 files changed, 36 insertions(+), 1679 deletions(-) diff --git a/clang/lib/Headers/__clang_hip_libdevice_declares.h b/clang/lib/Headers/__clang_hip_libdevice_declares.h index fa8d918248dd0..f15198b3d9f93 100644 --- a/clang/lib/Headers/__clang_hip_libdevice_declares.h +++ b/clang/lib/Headers/__clang_hip_libdevice_declares.h @@ -14,8 +14,6 @@ #include "hip/hip_version.h" #endif // __has_include("hip/hip_version.h") -#define __PRIVATE_AS __attribute__((opencl_private)) - #ifdef __cplusplus extern "C" { #endif @@ -57,7 +55,8 @@ __device__ __attribute__((const)) float __ocml_fmax_f32(float, float); __device__ __attribute__((const)) float __ocml_fmin_f32(float, float); __device__ __attribute__((const)) __device__ float __ocml_fmod_f32(float, float); -__device__ float __ocml_frexp_f32(float, __PRIVATE_AS int *); +__device__ float __ocml_frexp_f32(float, + __attribute__((address_space(5))) int *); __device__ __attribute__((const)) float __ocml_hypot_f32(float, float); __device__ __attribute__((const)) int __ocml_ilogb_f32(float); __device__ __attribute__((const)) int __ocml_isfinite_f32(float); @@ -75,7 +74,8 @@ __device__ __attribute__((pure)) float __ocml_native_log2_f32(float); __device__ __attribute__((const)) float __ocml_logb_f32(float); __device__ __attribute__((pure)) float __ocml_log_f32(float); __device__ __attribute__((pure)) float __ocml_native_log_f32(float); -__device__ float __ocml_modf_f32(float, __PRIVATE_AS float *); +__device__ float __ocml_modf_f32(float, + __attribute__((address_space(5))) float *); __device__ __attribute__((const)) float __ocml_nearbyint_f32(float); __device__ __attribute__((const)) float __ocml_nextafter_f32(float, float); __device__ __attribute__((const)) float __ocml_len3_f32(float, float, float); @@ -87,7 +87,8 @@ __device__ __attribute__((pure)) float __ocml_pow_f32(float, float); __device__ __attribute__((pure)) float __ocml_pown_f32(float, int); __device__ __attribute__((pure)) float __ocml_rcbrt_f32(float); __device__ __attribute__((const)) float __ocml_remainder_f32(float, float); -__device__ float __ocml_remquo_f32(float, float, __PRIVATE_AS int *); +__device__ float __ocml_remquo_f32(float, float, + __attribute__((address_space(5))) int *); __device__ __attribute__((const)) float __ocml_rhypot_f32(float, float); __device__ __attribute__((const)) float __ocml_rint_f32(float); __device__ __attribute__((const)) float __ocml_rlen3_f32(float, float, float); @@ -98,8 +99,10 @@ __device__ __attribute__((pure)) float __ocml_rsqrt_f32(float); __device__ __attribute__((const)) float __ocml_scalb_f32(float, float); __device__ __attribute__((const)) float __ocml_scalbn_f32(float, int); __device__ __attribute__((const)) int __ocml_signbit_f32(float); -__device__ float __ocml_sincos_f32(float, __PRIVATE_AS float *); -__device__ float __ocml_sincospi_f32(float, __PRIVATE_AS float *); +__device__ float __ocml_sincos_f32(float, + __attribute__((address_space(5))) float *); +__device__ float __ocml_sincospi_f32(float, + __attribute__((address_space(5))) float *); __device__ float __ocml_sin_f32(float); __device__ float __ocml_native_sin_f32(float); __device__ __attribute__((pure)) float __ocml_sinh_f32(float); @@ -173,7 +176,8 @@ __device__ __attribute__((const)) double __ocml_fma_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fmax_f64(double, double); __device__ __attribute__((const)) double __ocml_fmin_f64(double, double); __device__ __attribute__((const)) double __ocml_fmod_f64(double, double); -__device__ double __ocml_frexp_f64(double, __PRIVATE_AS int *); +__device__ double __ocml_frexp_f64(double, + __attribute__((address_space(5))) int *); __device__ __attribute__((const)) double __ocml_hypot_f64(double, double); __device__ __attribute__((const)) int __ocml_ilogb_f64(double); __device__ __attribute__((const)) int __ocml_isfinite_f64(double); @@ -188,7 +192,8 @@ __device__ __attribute__((pure)) double __ocml_log1p_f64(double); __device__ __attribute__((pure)) double __ocml_log2_f64(double); __device__ __attribute__((const)) double __ocml_logb_f64(double); __device__ __attribute__((pure)) double __ocml_log_f64(double); -__device__ double __ocml_modf_f64(double, __PRIVATE_AS double *); +__device__ double __ocml_modf_f64(double, + __attribute__((address_space(5))) double *); __device__ __attribute__((const)) double __ocml_nearbyint_f64(double); __device__ __attribute__((const)) double __ocml_nextafter_f64(double, double); __device__ __attribute__((const)) double __ocml_len3_f64(double, double, @@ -201,7 +206,8 @@ __device__ __attribute__((pure)) double __ocml_pow_f64(double, double); __device__ __attribute__((pure)) double __ocml_pown_f64(double, int); __device__ __attribute__((pure)) double __ocml_rcbrt_f64(double); __device__ __attribute__((const)) double __ocml_remainder_f64(double, double); -__device__ double __ocml_remquo_f64(double, double, __PRIVATE_AS int *); +__device__ double __ocml_remquo_f64(double, double, + __attribute__((address_space(5))) int *); __device__ __attribute__((const)) double __ocml_rhypot_f64(double, double); __device__ __attribute__((const)) double __ocml_rint_f64(double); __device__ __attribute__((const)) double __ocml_rlen3_f64(double, double, @@ -213,8 +219,10 @@ __device__ __attribute__((pure)) double __ocml_rsqrt_f64(double); __device__ __attribute__((const)) double __ocml_scalb_f64(double, double); __device__ __attribute__((const)) double __ocml_scalbn_f64(double, int); __device__ __attribute__((const)) int __ocml_signbit_f64(double); -__device__ double __ocml_sincos_f64(double, __PRIVATE_AS double *); -__device__ double __ocml_sincospi_f64(double, __PRIVATE_AS double *); +__device__ double __ocml_sincos_f64(double, + __attribute__((address_space(5))) double *); +__device__ double +__ocml_sincospi_f64(double, __attribute__((address_space(5))) double *); __device__ double __ocml_sin_f64(double); __device__ __attribute__((pure)) double __ocml_sinh_f64(double); __device__ double __ocml_sinpi_f64(double); diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h index bf8517bc3a507..8468751d9de26 100644 --- a/clang/lib/Headers/__clang_hip_math.h +++ b/clang/lib/Headers/__clang_hip_math.h @@ -33,9 +33,6 @@ #define __DEVICE__ static __device__ inline __attribute__((always_inline)) #endif -#pragma push_macro("__PRIVATE_AS") - -#define __PRIVATE_AS __attribute__((opencl_private)) // Device library provides fast low precision and slow full-recision // implementations for some functions. Which one gets selected depends on // __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if @@ -515,7 +512,8 @@ float modff(float __x, float *__iptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - float __r = __ocml_modf_f32(__x, (__PRIVATE_AS float *)&__tmp); + float __r = + __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__iptr = __tmp; return __r; } @@ -597,7 +595,8 @@ float remquof(float __x, float __y, int *__quo) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - float __r = __ocml_remquo_f32(__x, __y, (__PRIVATE_AS int *)&__tmp); + float __r = __ocml_remquo_f32( + __x, __y, (__attribute__((address_space(5))) int *)&__tmp); *__quo = __tmp; return __r; @@ -658,7 +657,8 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) { #ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__ __sincosf(__x, __sinptr, __cosptr); #else - *__sinptr = __ocml_sincos_f32(__x, (__PRIVATE_AS float *)&__tmp); + *__sinptr = + __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; #endif } @@ -669,7 +669,8 @@ void sincospif(float __x, float *__sinptr, float *__cosptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - *__sinptr = __ocml_sincospi_f32(__x, (__PRIVATE_AS float *)&__tmp); + *__sinptr = __ocml_sincospi_f32( + __x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; } @@ -912,7 +913,8 @@ double modf(double __x, double *__iptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - double __r = __ocml_modf_f64(__x, (__PRIVATE_AS double *)&__tmp); + double __r = + __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); *__iptr = __tmp; return __r; @@ -1002,7 +1004,8 @@ double remquo(double __x, double __y, int *__quo) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - double __r = __ocml_remquo_f64(__x, __y, (__PRIVATE_AS int *)&__tmp); + double __r = __ocml_remquo_f64( + __x, __y, (__attribute__((address_space(5))) int *)&__tmp); *__quo = __tmp; return __r; @@ -1062,7 +1065,8 @@ void sincos(double __x, double *__sinptr, double *__cosptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - *__sinptr = __ocml_sincos_f64(__x, (__PRIVATE_AS double *)&__tmp); + *__sinptr = __ocml_sincos_f64( + __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; } @@ -1072,7 +1076,8 @@ void sincospi(double __x, double *__sinptr, double *__cosptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif - *__sinptr = __ocml_sincospi_f64(__x, (__PRIVATE_AS double *)&__tmp); + *__sinptr = __ocml_sincospi_f64( + __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; } @@ -1317,7 +1322,6 @@ __host__ inline static int max(int __arg1, int __arg2) { #endif #pragma pop_macro("__DEVICE__") -#pragma pop_macro("__PRIVATE_AS") #pragma pop_macro("__RETURN_TYPE") #pragma pop_macro("__FAST_OR_SLOW") diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index a375ea47b530d..e4254d1e64bec 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -26,14 +26,6 @@ // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -O1 -fgpu-approx-transcendentals -o - \ // RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,APPROX %s -// Check that we use the AMDGCNSPIRV address space map -// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ -// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ -// RUN: -internal-isystem %S/Inputs/include \ -// RUN: -triple spirv64-amd-amdhsa -aux-triple x86_64-unknown-unknown \ -// RUN: -emit-llvm %s -fcuda-is-device -O1 -o - \ -// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=AMDGCNSPIRV %s - #define BOOL_TYPE int typedef unsigned long long uint64_t; @@ -65,30 +57,6 @@ typedef unsigned long long uint64_t; // CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_2_I]] // -// AMDGCNSPIRV-LABEL: @test___make_mantissa_base8( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I:%.*]] -// AMDGCNSPIRV: while.cond.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[WHILE_BODY_I:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[WHILE_BODY_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5:![0-9]+]] -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:%.*]], label [[WHILE_BODY_I]] -// AMDGCNSPIRV: while.body.i: -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = and i8 [[TMP0]], -8 -// AMDGCNSPIRV-NEXT: [[OR_COND_I:%.*]] = icmp eq i8 [[TMP1]], 48 -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl i64 [[__R_0_I]], 3 -// AMDGCNSPIRV-NEXT: [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64 -// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], -48 -// AMDGCNSPIRV-NEXT: [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]] -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_IDX:%.*]] = zext i1 [[OR_COND_I]] to i64 -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 [[__TAGP_ADDR_1_I_IDX]] -// AMDGCNSPIRV-NEXT: [[__R_1_I]] = select i1 [[OR_COND_I]], i64 [[SUB_I]], i64 [[__R_0_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], !llvm.loop [[LOOP8:![0-9]+]] -// AMDGCNSPIRV: _ZL21__make_mantissa_base8PKc.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] -// AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]] -// extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) { return __make_mantissa_base8(p); } @@ -121,30 +89,6 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) { // CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_2_I]] // -// AMDGCNSPIRV-LABEL: @test___make_mantissa_base10( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I:%.*]] -// AMDGCNSPIRV: while.cond.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[WHILE_BODY_I:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[WHILE_BODY_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:%.*]], label [[WHILE_BODY_I]] -// AMDGCNSPIRV: while.body.i: -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = add i8 [[TMP0]], -48 -// AMDGCNSPIRV-NEXT: [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10 -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = mul i64 [[__R_0_I]], 10 -// AMDGCNSPIRV-NEXT: [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64 -// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], -48 -// AMDGCNSPIRV-NEXT: [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]] -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_IDX:%.*]] = zext i1 [[OR_COND_I]] to i64 -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 [[__TAGP_ADDR_1_I_IDX]] -// AMDGCNSPIRV-NEXT: [[__R_1_I]] = select i1 [[OR_COND_I]], i64 [[SUB_I]], i64 [[__R_0_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], !llvm.loop [[LOOP11:![0-9]+]] -// AMDGCNSPIRV: _ZL22__make_mantissa_base10PKc.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] -// AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]] -// extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) { return __make_mantissa_base10(p); } @@ -187,44 +131,6 @@ extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) { // CHECK-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_2_I]] // -// AMDGCNSPIRV-LABEL: @test___make_mantissa_base16( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I:%.*]] -// AMDGCNSPIRV: while.cond.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[CLEANUP_I:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_2_I:%.*]], [[CLEANUP_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// AMDGCNSPIRV: while.body.i: -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = add i8 [[TMP0]], -48 -// AMDGCNSPIRV-NEXT: [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I]], label [[IF_END31_I:%.*]], label [[IF_ELSE_I:%.*]] -// AMDGCNSPIRV: if.else.i: -// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = add i8 [[TMP0]], -97 -// AMDGCNSPIRV-NEXT: [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP2]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I]], label [[IF_END31_I]], label [[IF_ELSE17_I:%.*]] -// AMDGCNSPIRV: if.else17.i: -// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = add i8 [[TMP0]], -65 -// AMDGCNSPIRV-NEXT: [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP3]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[CLEANUP_I]] -// AMDGCNSPIRV: if.end31.i: -// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I]] ], [ -87, [[IF_ELSE_I]] ], [ -55, [[IF_ELSE17_I]] ] -// AMDGCNSPIRV-NEXT: [[MUL24_I:%.*]] = shl i64 [[__R_0_I]], 4 -// AMDGCNSPIRV-NEXT: [[CONV25_I:%.*]] = zext nneg i8 [[TMP0]] to i64 -// AMDGCNSPIRV-NEXT: [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]] -// AMDGCNSPIRV-NEXT: [[ADD28_I:%.*]] = add i64 [[ADD26_I]], [[CONV25_I]] -// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 1 -// AMDGCNSPIRV-NEXT: br label [[CLEANUP_I]] -// AMDGCNSPIRV: cleanup.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I]], [[IF_END31_I]] ], [ [[__TAGP_ADDR_0_I]], [[IF_ELSE17_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_2_I]] = phi i64 [ [[ADD28_I]], [[IF_END31_I]] ], [ [[__R_0_I]], [[IF_ELSE17_I]] ] -// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = phi i1 [ true, [[IF_END31_I]] ], [ false, [[IF_ELSE17_I]] ] -// AMDGCNSPIRV-NEXT: br i1 [[COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], !llvm.loop [[LOOP12:![0-9]+]] -// AMDGCNSPIRV: _ZL22__make_mantissa_base16PKc.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ] -// AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_2_I]] -// extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { return __make_mantissa_base16(p); } @@ -320,89 +226,6 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // CHECK-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ 0, [[CLEANUP_I36_I]] ], [ [[__R_0_I32_I]], [[WHILE_COND_I30_I]] ], [ 0, [[CLEANUP_I20_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ] // CHECK-NEXT: ret i64 [[RETVAL_0_I]] // -// AMDGCNSPIRV-LABEL: @test___make_mantissa( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P:%.*]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I:%.*]] -// AMDGCNSPIRV: if.then.i: -// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[P]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i8 120, label [[WHILE_COND_I28_I_PREHEADER:%.*]] -// AMDGCNSPIRV-NEXT: i8 88, label [[WHILE_COND_I28_I_PREHEADER]] -// AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: while.cond.i28.i.preheader: -// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I28_I:%.*]] -// AMDGCNSPIRV: while.cond.i28.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I34_I:%.*]], [[CLEANUP_I_I:%.*]] ], [ [[INCDEC_PTR_I]], [[WHILE_COND_I28_I_PREHEADER]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I30_I:%.*]] = phi i64 [ [[__R_2_I_I:%.*]], [[CLEANUP_I_I]] ], [ 0, [[WHILE_COND_I28_I_PREHEADER]] ] -// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I:%.*]] = icmp eq i8 [[TMP2]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I:%.*]] -// AMDGCNSPIRV: while.body.i32.i: -// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], -48 -// AMDGCNSPIRV-NEXT: [[OR_COND_I33_I:%.*]] = icmp ult i8 [[TMP3]], 10 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I33_I]], label [[IF_END31_I_I:%.*]], label [[IF_ELSE_I_I:%.*]] -// AMDGCNSPIRV: if.else.i.i: -// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = add i8 [[TMP2]], -97 -// AMDGCNSPIRV-NEXT: [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP4]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I_I]], label [[IF_END31_I_I]], label [[IF_ELSE17_I_I:%.*]] -// AMDGCNSPIRV: if.else17.i.i: -// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = add i8 [[TMP2]], -65 -// AMDGCNSPIRV-NEXT: [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[CLEANUP_I_I]] -// AMDGCNSPIRV: if.end31.i.i: -// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I]] ], [ -87, [[IF_ELSE_I_I]] ], [ -55, [[IF_ELSE17_I_I]] ] -// AMDGCNSPIRV-NEXT: [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I30_I]], 4 -// AMDGCNSPIRV-NEXT: [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 -// AMDGCNSPIRV-NEXT: [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]] -// AMDGCNSPIRV-NEXT: [[ADD28_I_I:%.*]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]] -// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I37_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I]], i64 1 -// AMDGCNSPIRV-NEXT: br label [[CLEANUP_I_I]] -// AMDGCNSPIRV: cleanup.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I34_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I37_I]], [[IF_END31_I_I]] ], [ [[__TAGP_ADDR_0_I29_I]], [[IF_ELSE17_I_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_2_I_I]] = phi i64 [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ [[__R_0_I30_I]], [[IF_ELSE17_I_I]] ] -// AMDGCNSPIRV-NEXT: [[COND_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I]] ], [ false, [[IF_ELSE17_I_I]] ] -// AMDGCNSPIRV-NEXT: br i1 [[COND_I_I]], label [[WHILE_COND_I28_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP12]] -// AMDGCNSPIRV: while.cond.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I:%.*]], [[WHILE_BODY_I_I:%.*]] ], [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I_I:%.*]] = phi i64 [ [[__R_1_I_I:%.*]], [[WHILE_BODY_I_I]] ], [ 0, [[IF_THEN_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP6]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I]] -// AMDGCNSPIRV: while.body.i.i: -// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -8 -// AMDGCNSPIRV-NEXT: [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP7]], 48 -// AMDGCNSPIRV-NEXT: [[MUL_I_I:%.*]] = shl i64 [[__R_0_I_I]], 3 -// AMDGCNSPIRV-NEXT: [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 -// AMDGCNSPIRV-NEXT: [[ADD_I_I:%.*]] = add i64 [[MUL_I_I]], -48 -// AMDGCNSPIRV-NEXT: [[SUB_I_I:%.*]] = add i64 [[ADD_I_I]], [[CONV5_I_I]] -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I]] to i64 -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], i64 [[__TAGP_ADDR_1_I_I_IDX]] -// AMDGCNSPIRV-NEXT: [[__R_1_I_I]] = select i1 [[OR_COND_I_I]], i64 [[SUB_I_I]], i64 [[__R_0_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I]], label [[WHILE_COND_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP8]] -// AMDGCNSPIRV: while.cond.i14.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I:%.*]], [[WHILE_BODY_I18_I:%.*]] ], [ [[P]], [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I16_I:%.*]] = phi i64 [ [[__R_1_I26_I:%.*]], [[WHILE_BODY_I18_I]] ], [ 0, [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP8]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I18_I]] -// AMDGCNSPIRV: while.body.i18.i: -// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], -48 -// AMDGCNSPIRV-NEXT: [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP9]], 10 -// AMDGCNSPIRV-NEXT: [[MUL_I20_I:%.*]] = mul i64 [[__R_0_I16_I]], 10 -// AMDGCNSPIRV-NEXT: [[CONV5_I21_I:%.*]] = zext nneg i8 [[TMP8]] to i64 -// AMDGCNSPIRV-NEXT: [[ADD_I22_I:%.*]] = add i64 [[MUL_I20_I]], -48 -// AMDGCNSPIRV-NEXT: [[SUB_I23_I:%.*]] = add i64 [[ADD_I22_I]], [[CONV5_I21_I]] -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I]] to i64 -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], i64 [[__TAGP_ADDR_1_I25_I_IDX]] -// AMDGCNSPIRV-NEXT: [[__R_1_I26_I]] = select i1 [[OR_COND_I19_I]], i64 [[SUB_I23_I]], i64 [[__R_0_I16_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I]], label [[WHILE_COND_I14_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP11]] -// AMDGCNSPIRV: _ZL15__make_mantissaPKc.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ 0, [[CLEANUP_I_I]] ], [ [[__R_0_I30_I]], [[WHILE_COND_I28_I]] ], [ 0, [[WHILE_BODY_I18_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ] -// AMDGCNSPIRV-NEXT: ret i64 [[RETVAL_0_I]] -// extern "C" __device__ uint64_t test___make_mantissa(const char *p) { return __make_mantissa(p); } @@ -412,11 +235,6 @@ extern "C" __device__ uint64_t test___make_mantissa(const char *p) { // CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) // CHECK-NEXT: ret i32 [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_abs( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call noundef range(i32 0, -2147483648) addrspace(4) i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) -// AMDGCNSPIRV-NEXT: ret i32 [[TMP0]] -// extern "C" __device__ int test_abs(int x) { return abs(x); } @@ -426,11 +244,6 @@ extern "C" __device__ int test_abs(int x) { // CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) // CHECK-NEXT: ret i64 [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_labs( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) addrspace(4) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) -// AMDGCNSPIRV-NEXT: ret i64 [[TMP0]] -// extern "C" __device__ long test_labs(long x) { return labs(x); } @@ -440,11 +253,6 @@ extern "C" __device__ long test_labs(long x) { // CHECK-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) // CHECK-NEXT: ret i64 [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_llabs( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call noundef range(i64 0, -9223372036854775808) addrspace(4) i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true) -// AMDGCNSPIRV-NEXT: ret i64 [[TMP0]] -// extern "C" __device__ long long test_llabs(long x) { return llabs(x); } @@ -464,11 +272,6 @@ extern "C" __device__ long long test_llabs(long x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR12:[0-9]+]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_acosf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR12:[0-9]+]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_acosf(float x) { return acosf(x); } @@ -488,11 +291,6 @@ extern "C" __device__ float test_acosf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_acos( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_acos(double x) { return acos(x); } @@ -512,11 +310,6 @@ extern "C" __device__ double test_acos(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR13:[0-9]+]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_acoshf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR13:[0-9]+]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_acoshf(float x) { return acoshf(x); } @@ -536,11 +329,6 @@ extern "C" __device__ float test_acoshf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_acosh( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_acosh(double x) { return acosh(x); } @@ -560,11 +348,6 @@ extern "C" __device__ double test_acosh(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_asinf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_asinf(float x) { return asinf(x); } @@ -584,11 +367,6 @@ extern "C" __device__ float test_asinf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_asin( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_asin(double x) { return asin(x); @@ -609,11 +387,6 @@ extern "C" __device__ double test_asin(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_asinhf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_asinhf(float x) { return asinhf(x); } @@ -633,11 +406,6 @@ extern "C" __device__ float test_asinhf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_asinh( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_asinh(double x) { return asinh(x); } @@ -657,11 +425,6 @@ extern "C" __device__ double test_asinh(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_atan2f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_atan2f(float x, float y) { return atan2f(x, y); } @@ -681,11 +444,6 @@ extern "C" __device__ float test_atan2f(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_atan2( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_atan2(double x, double y) { return atan2(x, y); } @@ -705,11 +463,6 @@ extern "C" __device__ double test_atan2(double x, double y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_atanf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_atanf(float x) { return atanf(x); } @@ -729,11 +482,6 @@ extern "C" __device__ float test_atanf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_atan( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_atan(double x) { return atan(x); } @@ -753,11 +501,6 @@ extern "C" __device__ double test_atan(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_atanhf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_atanhf(float x) { return atanhf(x); } @@ -777,11 +520,6 @@ extern "C" __device__ float test_atanhf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_atanh( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_atanh(double x) { return atanh(x); } @@ -801,11 +539,6 @@ extern "C" __device__ double test_atanh(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cbrtf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_cbrtf(float x) { return cbrtf(x); } @@ -825,11 +558,6 @@ extern "C" __device__ float test_cbrtf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cbrt( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_cbrt(double x) { return cbrt(x); } @@ -849,11 +577,6 @@ extern "C" __device__ double test_cbrt(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ceil.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_ceilf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ceil.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_ceilf(float x) { return ceilf(x); } @@ -873,11 +596,6 @@ extern "C" __device__ float test_ceilf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ceil.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_ceil( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ceil.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_ceil(double x) { return ceil(x); } @@ -897,11 +615,6 @@ extern "C" __device__ double test_ceil(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_copysignf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_copysignf(float x, float y) { return copysignf(x, y); } @@ -921,11 +634,6 @@ extern "C" __device__ float test_copysignf(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.copysign.f64(double [[X:%.*]], double [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_copysign( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.copysign.f64(double [[X:%.*]], double [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_copysign(double x, double y) { return copysign(x, y); } @@ -945,11 +653,6 @@ extern "C" __device__ double test_copysign(double x, double y) { // APPROX-NEXT: [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]] // APPROX-NEXT: ret float [[CALL_I1]] // -// AMDGCNSPIRV-LABEL: @test_cosf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_cosf(float x) { return cosf(x); } @@ -969,11 +672,6 @@ extern "C" __device__ float test_cosf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cos( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_cos(double x) { return cos(x); } @@ -993,11 +691,6 @@ extern "C" __device__ double test_cos(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_coshf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_coshf(float x) { return coshf(x); } @@ -1017,11 +710,6 @@ extern "C" __device__ float test_coshf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cosh( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_cosh(double x) { return cosh(x); } @@ -1041,11 +729,6 @@ extern "C" __device__ double test_cosh(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cospif( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_cospif(float x) { return cospif(x); } @@ -1065,16 +748,10 @@ extern "C" __device__ float test_cospif(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cospi( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_cospi(double x) { return cospi(x); } -// // DEFAULT-LABEL: @test_cyl_bessel_i0f( // DEFAULT-NEXT: entry: // DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]] @@ -1090,11 +767,6 @@ extern "C" __device__ double test_cospi(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i0f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_cyl_bessel_i0f(float x) { return cyl_bessel_i0f(x); } @@ -1114,11 +786,6 @@ extern "C" __device__ float test_cyl_bessel_i0f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i0( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_cyl_bessel_i0(double x) { return cyl_bessel_i0(x); } @@ -1138,11 +805,6 @@ extern "C" __device__ double test_cyl_bessel_i0(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i1f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_cyl_bessel_i1f(float x) { return cyl_bessel_i1f(x); } @@ -1162,11 +824,6 @@ extern "C" __device__ float test_cyl_bessel_i1f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_cyl_bessel_i1( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_cyl_bessel_i1(double x) { return cyl_bessel_i1(x); } @@ -1186,11 +843,6 @@ extern "C" __device__ double test_cyl_bessel_i1(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_erfcf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_erfcf(float x) { return erfcf(x); } @@ -1210,11 +862,6 @@ extern "C" __device__ float test_erfcf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_erfc( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_erfc(double x) { return erfc(x); } @@ -1234,11 +881,6 @@ extern "C" __device__ double test_erfc(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_erfinvf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_erfinvf(float x) { return erfinvf(x); } @@ -1258,11 +900,6 @@ extern "C" __device__ float test_erfinvf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_erfinv( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_erfinv(double x) { return erfinv(x); } @@ -1282,11 +919,6 @@ extern "C" __device__ double test_erfinv(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_exp10_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_exp10f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_exp10_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_exp10f(float x) { return exp10f(x); } @@ -1306,11 +938,6 @@ extern "C" __device__ float test_exp10f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_exp10( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_exp10(double x) { return exp10(x); } @@ -1330,11 +957,6 @@ extern "C" __device__ double test_exp10(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp2.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_exp2f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp2.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_exp2f(float x) { return exp2f(x); } @@ -1354,11 +976,6 @@ extern "C" __device__ float test_exp2f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_exp2( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_exp2(double x) { return exp2(x); } @@ -1378,11 +995,6 @@ extern "C" __device__ double test_exp2(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.exp.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_expf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.exp.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_expf(float x) { return expf(x); } @@ -1402,11 +1014,6 @@ extern "C" __device__ float test_expf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_exp( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_exp(double x) { return exp(x); } @@ -1426,11 +1033,6 @@ extern "C" __device__ double test_exp(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_expm1f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_expm1f(float x) { return expm1f(x); } @@ -1450,11 +1052,6 @@ extern "C" __device__ float test_expm1f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_expm1( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_expm1(double x) { return expm1(x); } @@ -1474,11 +1071,6 @@ extern "C" __device__ double test_expm1(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fabs.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fabsf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fabs.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_fabsf(float x) { return fabsf(x); } @@ -1498,11 +1090,6 @@ extern "C" __device__ float test_fabsf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fabs.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fabs( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fabs.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_fabs(double x) { return fabs(x); } @@ -1522,11 +1109,6 @@ extern "C" __device__ double test_fabs(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_fdimf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_fdimf(float x, float y) { return fdimf(x, y); } @@ -1546,11 +1128,6 @@ extern "C" __device__ float test_fdimf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_fdim( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_fdim(double x, double y) { return fdim(x, y); } @@ -1570,11 +1147,6 @@ extern "C" __device__ double test_fdim(double x, double y) { // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret float [[DIV_I]] // -// AMDGCNSPIRV-LABEL: @test_fdividef( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] -// AMDGCNSPIRV-NEXT: ret float [[DIV_I]] -// extern "C" __device__ float test_fdividef(float x, float y) { return fdividef(x, y); } @@ -1594,11 +1166,6 @@ extern "C" __device__ float test_fdividef(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.floor.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_floorf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.floor.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_floorf(float x) { return floorf(x); } @@ -1618,11 +1185,6 @@ extern "C" __device__ float test_floorf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.floor.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_floor( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.floor.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_floor(double x) { return floor(x); } @@ -1642,11 +1204,6 @@ extern "C" __device__ double test_floor(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fmaf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_fmaf(float x, float y, float z) { return fmaf(x, y, z); } @@ -1666,11 +1223,6 @@ extern "C" __device__ float test_fmaf(float x, float y, float z) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fma( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_fma(double x, double y, double z) { return fma(x, y, z); } @@ -1690,11 +1242,6 @@ extern "C" __device__ double test_fma(double x, double y, double z) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fma_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_fma_rn(double x, double y, double z) { return __fma_rn(x, y, z); } @@ -1714,11 +1261,6 @@ extern "C" __device__ double test_fma_rn(double x, double y, double z) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fmaxf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_fmaxf(float x, float y) { return fmaxf(x, y); } @@ -1738,16 +1280,10 @@ extern "C" __device__ float test_fmaxf(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fmax( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_fmax(double x, double y) { return fmax(x, y); } -// // DEFAULT-LABEL: @test_fminf( // DEFAULT-NEXT: entry: // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) @@ -1763,11 +1299,6 @@ extern "C" __device__ double test_fmax(double x, double y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fminf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_fminf(float x, float y) { return fminf(x, y); } @@ -1787,11 +1318,6 @@ extern "C" __device__ float test_fminf(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_fmin( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_fmin(double x, double y) { return fmin(x, y); } @@ -1811,11 +1337,6 @@ extern "C" __device__ double test_fmin(double x, double y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_fmodf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_fmodf(float x, float y) { return fmodf(x, y); } @@ -1835,11 +1356,6 @@ extern "C" __device__ float test_fmodf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_fmod( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_fmod(double x, double y) { return fmod(x, y); } @@ -1852,14 +1368,6 @@ extern "C" __device__ double test_fmod(double x, double y) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 // CHECK-NEXT: ret float [[TMP2]] // -// AMDGCNSPIRV-LABEL: @test_frexpf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 -// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA13:![0-9]+]] -// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP0]], 0 -// AMDGCNSPIRV-NEXT: ret float [[TMP2]] -// extern "C" __device__ float test_frexpf(float x, int* y) { return frexpf(x, y); } @@ -1872,14 +1380,6 @@ extern "C" __device__ float test_frexpf(float x, int* y) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 // CHECK-NEXT: ret double [[TMP2]] // -// AMDGCNSPIRV-LABEL: @test_frexp( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 -// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA13]] -// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP0]], 0 -// AMDGCNSPIRV-NEXT: ret double [[TMP2]] -// extern "C" __device__ double test_frexp(double x, int* y) { return frexp(x, y); } @@ -1899,11 +1399,6 @@ extern "C" __device__ double test_frexp(double x, int* y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_hypotf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_hypotf(float x, float y) { return hypotf(x, y); } @@ -1923,11 +1418,6 @@ extern "C" __device__ float test_hypotf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_hypot( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_hypot(double x, double y) { return hypot(x, y); } @@ -1947,11 +1437,6 @@ extern "C" __device__ double test_hypot(double x, double y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret i32 [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_ilogbf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call spir_func noundef addrspace(4) i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret i32 [[CALL_I]] -// extern "C" __device__ int test_ilogbf(float x) { return ilogbf(x); } @@ -1971,11 +1456,6 @@ extern "C" __device__ int test_ilogbf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret i32 [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_ilogb( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call spir_func noundef addrspace(4) i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret i32 [[CALL_I]] -// extern "C" __device__ int test_ilogb(double x) { return ilogb(x); } @@ -1998,13 +1478,6 @@ extern "C" __device__ int test_ilogb(double x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___finitef( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = fcmp one float [[TMP0]], 0x7FF0000000000000 -// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 -// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] -// extern "C" __device__ BOOL_TYPE test___finitef(float x) { return __finitef(x); } @@ -2027,13 +1500,6 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___finite( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = fcmp one double [[TMP0]], 0x7FF0000000000000 -// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 -// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] -// extern "C" __device__ BOOL_TYPE test___finite(double x) { return __finite(x); } @@ -2056,13 +1522,6 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___isinff( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = fcmp oeq float [[TMP0]], 0x7FF0000000000000 -// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 -// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] -// extern "C" __device__ BOOL_TYPE test___isinff(float x) { return __isinff(x); } @@ -2085,13 +1544,6 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___isinf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = fcmp oeq double [[TMP0]], 0x7FF0000000000000 -// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 -// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] -// extern "C" __device__ BOOL_TYPE test___isinf(double x) { return __isinf(x); } @@ -2112,12 +1564,6 @@ extern "C" __device__ BOOL_TYPE test___isinf(double x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___isnanf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00 -// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 -// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] -// extern "C" __device__ BOOL_TYPE test___isnanf(float x) { return __isnanf(x); } @@ -2138,12 +1584,6 @@ extern "C" __device__ BOOL_TYPE test___isnanf(float x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___isnan( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00 -// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 -// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] -// extern "C" __device__ BOOL_TYPE test___isnan(double x) { return __isnan(x); } @@ -2163,11 +1603,6 @@ extern "C" __device__ BOOL_TYPE test___isnan(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_j0f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_j0f(float x) { return j0f(x); } @@ -2187,11 +1622,6 @@ extern "C" __device__ float test_j0f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_j0( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_j0(double x) { return j0(x); } @@ -2211,11 +1641,6 @@ extern "C" __device__ double test_j0(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_j1f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_j1f(float x) { return j1f(x); } @@ -2235,11 +1660,6 @@ extern "C" __device__ float test_j1f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_j1( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_j1(double x) { return j1(x); } @@ -2343,39 +1763,6 @@ extern "C" __device__ double test_j1(double x) { // APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] // APPROX-NEXT: ret float [[RETVAL_0_I]] // -// AMDGCNSPIRV-LABEL: @test_jnf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// AMDGCNSPIRV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] -// AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: if.then.i: -// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL3JNFIF_EXIT:%.*]] -// AMDGCNSPIRV: if.then2.i: -// AMDGCNSPIRV-NEXT: [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL3JNFIF_EXIT]] -// AMDGCNSPIRV: if.end4.i: -// AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]] -// AMDGCNSPIRV: for.body.i: -// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] -// AMDGCNSPIRV-NEXT: [[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] -// AMDGCNSPIRV-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] -// AMDGCNSPIRV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 -// AMDGCNSPIRV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]] -// AMDGCNSPIRV: _ZL3jnfif.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] -// AMDGCNSPIRV-NEXT: ret float [[RETVAL_0_I]] -// extern "C" __device__ float test_jnf(int x, float y) { return jnf(x, y); } @@ -2479,39 +1866,6 @@ extern "C" __device__ float test_jnf(int x, float y) { // APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] // APPROX-NEXT: ret double [[RETVAL_0_I]] // -// AMDGCNSPIRV-LABEL: @test_jn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// AMDGCNSPIRV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] -// AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: if.then.i: -// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL2JNID_EXIT:%.*]] -// AMDGCNSPIRV: if.then2.i: -// AMDGCNSPIRV-NEXT: [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL2JNID_EXIT]] -// AMDGCNSPIRV: if.end4.i: -// AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]] -// AMDGCNSPIRV: for.body.i: -// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] -// AMDGCNSPIRV-NEXT: [[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], [[DIV_I]] -// AMDGCNSPIRV-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] -// AMDGCNSPIRV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 -// AMDGCNSPIRV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP16:![0-9]+]] -// AMDGCNSPIRV: _ZL2jnid.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] -// AMDGCNSPIRV-NEXT: ret double [[RETVAL_0_I]] -// extern "C" __device__ double test_jn(int x, double y) { return jn(x, y); } @@ -2531,11 +1885,6 @@ extern "C" __device__ double test_jn(int x, double y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_ldexpf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_ldexpf(float x, int y) { return ldexpf(x, y); } @@ -2555,11 +1904,6 @@ extern "C" __device__ float test_ldexpf(float x, int y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_ldexp( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_ldexp(double x, int y) { return ldexp(x, y); } @@ -2579,11 +1923,6 @@ extern "C" __device__ double test_ldexp(double x, int y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_lgammaf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_lgammaf(float x) { return lgammaf(x); } @@ -2603,11 +1942,6 @@ extern "C" __device__ float test_lgammaf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_lgamma( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_lgamma(double x) { return lgamma(x); } @@ -2630,12 +1964,6 @@ extern "C" __device__ double test_lgamma(double x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_llrintf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 -// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] -// extern "C" __device__ long long int test_llrintf(float x) { return llrintf(x); } @@ -2658,12 +1986,6 @@ extern "C" __device__ long long int test_llrintf(float x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_llrint( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 -// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] -// extern "C" __device__ long long int test_llrint(double x) { return llrint(x); } @@ -2686,12 +2008,6 @@ extern "C" __device__ long long int test_llrint(double x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_llroundf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 -// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] -// extern "C" __device__ long long int test_llroundf(float x) { return llroundf(x); } @@ -2714,12 +2030,6 @@ extern "C" __device__ long long int test_llroundf(float x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_llround( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 -// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] -// extern "C" __device__ long long int test_llround(double x) { return llround(x); } @@ -2739,11 +2049,6 @@ extern "C" __device__ long long int test_llround(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_log10f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log10.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_log10f(float x) { return log10f(x); } @@ -2763,11 +2068,6 @@ extern "C" __device__ float test_log10f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_log10( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_log10(double x) { return log10(x); } @@ -2787,11 +2087,6 @@ extern "C" __device__ double test_log10(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_log1pf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_log1pf(float x) { return log1pf(x); } @@ -2811,11 +2106,6 @@ extern "C" __device__ float test_log1pf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_log1p( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_log1p(double x) { return log1p(x); } @@ -2835,11 +2125,6 @@ extern "C" __device__ double test_log1p(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_log2f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_log2_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_log2f(float x) { return log2f(x); } @@ -2859,11 +2144,6 @@ extern "C" __device__ float test_log2f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_log2( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_log2(double x) { return log2(x); } @@ -2883,11 +2163,6 @@ extern "C" __device__ double test_log2(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_logbf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_logbf(float x) { return logbf(x); } @@ -2907,11 +2182,6 @@ extern "C" __device__ float test_logbf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_logb( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_logb(double x) { return logb(x); } @@ -2931,11 +2201,6 @@ extern "C" __device__ double test_logb(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_logf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_log_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_logf(float x) { return logf(x); } @@ -2958,12 +2223,6 @@ extern "C" __device__ float test_logf(float x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_lrintf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 -// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] -// extern "C" __device__ long int test_lrintf(float x) { return lrintf(x); } @@ -2986,12 +2245,6 @@ extern "C" __device__ long int test_lrintf(float x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_lrint( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 -// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] -// extern "C" __device__ long int test_lrint(double x) { return lrint(x); } @@ -3014,12 +2267,6 @@ extern "C" __device__ long int test_lrint(double x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_lroundf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 -// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] -// extern "C" __device__ long int test_lroundf(float x) { return lroundf(x); } @@ -3042,12 +2289,6 @@ extern "C" __device__ long int test_lroundf(float x) { // APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // -// AMDGCNSPIRV-LABEL: @test_lround( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 -// AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] -// extern "C" __device__ long int test_lround(double x) { return lround(x); } @@ -3082,17 +2323,6 @@ extern "C" __device__ long int test_lround(double x) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_modff( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15:[0-9]+]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_modf_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17:![0-9]+]] -// AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_modff(float x, float* y) { return modff(x, y); } @@ -3127,17 +2357,6 @@ extern "C" __device__ float test_modff(float x, float* y) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_modf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_modf_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19:![0-9]+]] -// AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_modf(double x, double* y) { return modf(x, y); } @@ -3336,93 +2555,6 @@ extern "C" __device__ double test_modf(double x, double* y) { // APPROX-NEXT: [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float // APPROX-NEXT: ret float [[TMP10]] // -// AMDGCNSPIRV-LABEL: @test_nanf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG:%.*]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] -// AMDGCNSPIRV: if.then.i.i: -// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i8 120, label [[WHILE_COND_I28_I_I_PREHEADER:%.*]] -// AMDGCNSPIRV-NEXT: i8 88, label [[WHILE_COND_I28_I_I_PREHEADER]] -// AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: while.cond.i28.i.i.preheader: -// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I28_I_I:%.*]] -// AMDGCNSPIRV: while.cond.i28.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I34_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I28_I_I_PREHEADER]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I30_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[WHILE_COND_I28_I_I_PREHEADER]] ] -// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP2]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]] -// AMDGCNSPIRV: while.body.i32.i.i: -// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], -48 -// AMDGCNSPIRV-NEXT: [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP3]], 10 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]] -// AMDGCNSPIRV: if.else.i.i.i: -// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = add i8 [[TMP2]], -97 -// AMDGCNSPIRV-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] -// AMDGCNSPIRV: if.else17.i.i.i: -// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = add i8 [[TMP2]], -65 -// AMDGCNSPIRV-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I_I_I]] -// AMDGCNSPIRV: if.end31.i.i.i: -// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] -// AMDGCNSPIRV-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I]], 4 -// AMDGCNSPIRV-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 -// AMDGCNSPIRV-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] -// AMDGCNSPIRV-NEXT: [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] -// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I37_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], i64 1 -// AMDGCNSPIRV-NEXT: br label [[CLEANUP_I_I_I]] -// AMDGCNSPIRV: cleanup.i.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I34_I_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I37_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I29_I_I]], [[IF_ELSE17_I_I_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I30_I_I]], [[IF_ELSE17_I_I_I]] ] -// AMDGCNSPIRV-NEXT: [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ] -// AMDGCNSPIRV-NEXT: br i1 [[COND_I_I_I]], label [[WHILE_COND_I28_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP12]] -// AMDGCNSPIRV: while.cond.i.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]] -// AMDGCNSPIRV: while.body.i.i.i: -// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -8 -// AMDGCNSPIRV-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48 -// AMDGCNSPIRV-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3 -// AMDGCNSPIRV-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 -// AMDGCNSPIRV-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 -// AMDGCNSPIRV-NEXT: [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64 -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], i64 [[__TAGP_ADDR_1_I_I_I_IDX]] -// AMDGCNSPIRV-NEXT: [[__R_1_I_I_I]] = select i1 [[OR_COND_I_I_I]], i64 [[SUB_I_I_I]], i64 [[__R_0_I_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP8]] -// AMDGCNSPIRV: while.cond.i14.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], [[WHILE_BODY_I18_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], [[WHILE_BODY_I18_I_I]] ], [ 0, [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]] -// AMDGCNSPIRV: while.body.i18.i.i: -// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], -48 -// AMDGCNSPIRV-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10 -// AMDGCNSPIRV-NEXT: [[MUL_I20_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10 -// AMDGCNSPIRV-NEXT: [[CONV5_I21_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64 -// AMDGCNSPIRV-NEXT: [[ADD_I22_I_I:%.*]] = add i64 [[MUL_I20_I_I]], -48 -// AMDGCNSPIRV-NEXT: [[SUB_I23_I_I:%.*]] = add i64 [[ADD_I22_I_I]], [[CONV5_I21_I_I]] -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64 -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], i64 [[__TAGP_ADDR_1_I25_I_I_IDX]] -// AMDGCNSPIRV-NEXT: [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]] -// AMDGCNSPIRV: _ZL4nanfPKc.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I30_I_I]], [[WHILE_COND_I28_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ] -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 -// AMDGCNSPIRV-NEXT: [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303 -// AMDGCNSPIRV-NEXT: [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344 -// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float -// AMDGCNSPIRV-NEXT: ret float [[TMP10]] -// extern "C" __device__ float test_nanf(const char *tag) { return nanf(tag); } @@ -3619,92 +2751,6 @@ extern "C" __device__ float test_nanf(const char *tag) { // APPROX-NEXT: [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double // APPROX-NEXT: ret double [[TMP10]] // -// AMDGCNSPIRV-LABEL: @test_nan( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[TAG:%.*]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] -// AMDGCNSPIRV: if.then.i.i: -// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1 -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i8 120, label [[WHILE_COND_I28_I_I_PREHEADER:%.*]] -// AMDGCNSPIRV-NEXT: i8 88, label [[WHILE_COND_I28_I_I_PREHEADER]] -// AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: while.cond.i28.i.i.preheader: -// AMDGCNSPIRV-NEXT: br label [[WHILE_COND_I28_I_I:%.*]] -// AMDGCNSPIRV: while.cond.i28.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I29_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I34_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I28_I_I_PREHEADER]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I30_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[WHILE_COND_I28_I_I_PREHEADER]] ] -// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP2]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I31_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]] -// AMDGCNSPIRV: while.body.i32.i.i: -// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], -48 -// AMDGCNSPIRV-NEXT: [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP3]], 10 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]] -// AMDGCNSPIRV: if.else.i.i.i: -// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = add i8 [[TMP2]], -97 -// AMDGCNSPIRV-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] -// AMDGCNSPIRV: if.else17.i.i.i: -// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = add i8 [[TMP2]], -65 -// AMDGCNSPIRV-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I_I_I]] -// AMDGCNSPIRV: if.end31.i.i.i: -// AMDGCNSPIRV-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] -// AMDGCNSPIRV-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I]], 4 -// AMDGCNSPIRV-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 -// AMDGCNSPIRV-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] -// AMDGCNSPIRV-NEXT: [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] -// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I37_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], i64 1 -// AMDGCNSPIRV-NEXT: br label [[CLEANUP_I_I_I]] -// AMDGCNSPIRV: cleanup.i.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I34_I_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I37_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I29_I_I]], [[IF_ELSE17_I_I_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I30_I_I]], [[IF_ELSE17_I_I_I]] ] -// AMDGCNSPIRV-NEXT: [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ] -// AMDGCNSPIRV-NEXT: br i1 [[COND_I_I_I]], label [[WHILE_COND_I28_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP12]] -// AMDGCNSPIRV: while.cond.i.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]] -// AMDGCNSPIRV: while.body.i.i.i: -// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -8 -// AMDGCNSPIRV-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48 -// AMDGCNSPIRV-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3 -// AMDGCNSPIRV-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 -// AMDGCNSPIRV-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 -// AMDGCNSPIRV-NEXT: [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64 -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], i64 [[__TAGP_ADDR_1_I_I_I_IDX]] -// AMDGCNSPIRV-NEXT: [[__R_1_I_I_I]] = select i1 [[OR_COND_I_I_I]], i64 [[SUB_I_I_I]], i64 [[__R_0_I_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP8]] -// AMDGCNSPIRV: while.cond.i14.i.i: -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], [[WHILE_BODY_I18_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], [[WHILE_BODY_I18_I_I]] ], [ 0, [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]] -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]] -// AMDGCNSPIRV: while.body.i18.i.i: -// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], -48 -// AMDGCNSPIRV-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10 -// AMDGCNSPIRV-NEXT: [[MUL_I20_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10 -// AMDGCNSPIRV-NEXT: [[CONV5_I21_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64 -// AMDGCNSPIRV-NEXT: [[ADD_I22_I_I:%.*]] = add i64 [[MUL_I20_I_I]], -48 -// AMDGCNSPIRV-NEXT: [[SUB_I23_I_I:%.*]] = add i64 [[ADD_I22_I_I]], [[CONV5_I21_I_I]] -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64 -// AMDGCNSPIRV-NEXT: [[__TAGP_ADDR_1_I25_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], i64 [[__TAGP_ADDR_1_I25_I_I_IDX]] -// AMDGCNSPIRV-NEXT: [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]] -// AMDGCNSPIRV-NEXT: br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]] -// AMDGCNSPIRV: _ZL3nanPKc.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I30_I_I]], [[WHILE_COND_I28_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ] -// AMDGCNSPIRV-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 -// AMDGCNSPIRV-NEXT: [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560 -// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double -// AMDGCNSPIRV-NEXT: ret double [[TMP10]] -// extern "C" __device__ double test_nan(const char *tag) { return nan(tag); } @@ -3721,10 +2767,6 @@ extern "C" __device__ double test_nan(const char *tag) { // APPROX-NEXT: entry: // APPROX-NEXT: ret float 0x7FF8000000000000 // -// AMDGCNSPIRV-LABEL: @test_nanf_emptystr( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: ret float 0x7FF8000000000000 -// extern "C" __device__ float test_nanf_emptystr() { return nanf(""); } @@ -3741,10 +2783,6 @@ extern "C" __device__ float test_nanf_emptystr() { // APPROX-NEXT: entry: // APPROX-NEXT: ret double 0x7FF8000000000000 // -// AMDGCNSPIRV-LABEL: @test_nan_emptystr( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: ret double 0x7FF8000000000000 -// extern "C" __device__ double test_nan_emptystr() { return nan(""); } @@ -3761,10 +2799,6 @@ extern "C" __device__ double test_nan_emptystr() { // APPROX-NEXT: entry: // APPROX-NEXT: ret float 0x7FF8000000000000 // -// AMDGCNSPIRV-LABEL: @test_nanf_fill( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: ret float 0x7FF8000000000000 -// extern "C" __device__ float test_nanf_fill() { return nanf("0x456"); } @@ -3781,10 +2815,6 @@ extern "C" __device__ float test_nanf_fill() { // APPROX-NEXT: entry: // APPROX-NEXT: ret double 0x7FF8000000000000 // -// AMDGCNSPIRV-LABEL: @test_nan_fill( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: ret double 0x7FF8000000000000 -// extern "C" __device__ double test_nan_fill() { return nan("0x123"); } @@ -3804,11 +2834,6 @@ extern "C" __device__ double test_nan_fill() { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.nearbyint.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_nearbyintf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.nearbyint.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_nearbyintf(float x) { return nearbyintf(x); } @@ -3828,11 +2853,6 @@ extern "C" __device__ float test_nearbyintf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.nearbyint.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_nearbyint( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.nearbyint.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_nearbyint(double x) { return nearbyint(x); } @@ -3852,11 +2872,6 @@ extern "C" __device__ double test_nearbyint(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_nextafterf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_nextafterf(float x, float y) { return nextafterf(x, y); } @@ -3876,11 +2891,6 @@ extern "C" __device__ float test_nextafterf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_nextafter( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_nextafter(double x, double y) { return nextafter(x, y); } @@ -3900,11 +2910,6 @@ extern "C" __device__ double test_nextafter(double x, double y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_norm3df( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_norm3df(float x, float y, float z) { return norm3df(x, y, z); } @@ -3924,11 +2929,6 @@ extern "C" __device__ float test_norm3df(float x, float y, float z) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_norm3d( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_norm3d(double x, double y, double z) { return norm3d(x, y, z); } @@ -3948,11 +2948,6 @@ extern "C" __device__ double test_norm3d(double x, double y, double z) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_norm4df( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_norm4df(float x, float y, float z, float w) { return norm4df(x, y, z, w); } @@ -3972,11 +2967,6 @@ extern "C" __device__ float test_norm4df(float x, float y, float z, float w) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_norm4d( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_norm4d(double x, double y, double z, double w) { return norm4d(x, y, z, w); } @@ -3996,11 +2986,6 @@ extern "C" __device__ double test_norm4d(double x, double y, double z, double w) // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_normcdff( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_normcdff(float x) { return normcdff(x); } @@ -4020,11 +3005,6 @@ extern "C" __device__ float test_normcdff(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_normcdf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_normcdf(double x) { return normcdf(x); } @@ -4044,11 +3024,6 @@ extern "C" __device__ double test_normcdf(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_normcdfinvf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_normcdfinvf(float x) { return normcdfinvf(x); } @@ -4068,11 +3043,6 @@ extern "C" __device__ float test_normcdfinvf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_normcdfinv( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_normcdfinv(double x) { return normcdfinv(x); } @@ -4137,26 +3107,6 @@ extern "C" __device__ double test_normcdfinv(double x) { // APPROX-NEXT: [[TMP1:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]]) // APPROX-NEXT: ret float [[TMP1]] // -// AMDGCNSPIRV-LABEL: @test_normf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5NORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// AMDGCNSPIRV: while.body.i: -// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] -// AMDGCNSPIRV-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] -// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 4 -// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] -// AMDGCNSPIRV: _ZL5normfiPKf.exit: -// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = tail call contract noundef addrspace(4) float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP1]] -// extern "C" __device__ float test_normf(int x, const float *y) { return normf(x, y); } @@ -4221,26 +3171,6 @@ extern "C" __device__ float test_normf(int x, const float *y) { // APPROX-NEXT: [[TMP1:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]]) // APPROX-NEXT: ret double [[TMP1]] // -// AMDGCNSPIRV-LABEL: @test_norm( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL4NORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// AMDGCNSPIRV: while.body.i: -// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] -// AMDGCNSPIRV-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] -// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 8 -// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] -// AMDGCNSPIRV: _ZL4normiPKd.exit: -// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] -// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP1]] -// extern "C" __device__ double test_norm(int x, const double *y) { return norm(x, y); } @@ -4260,11 +3190,6 @@ extern "C" __device__ double test_norm(int x, const double *y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_powf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_powf(float x, float y) { return powf(x, y); } @@ -4284,11 +3209,6 @@ extern "C" __device__ float test_powf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_pow( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_pow(double x, double y) { return pow(x, y); } @@ -4308,11 +3228,6 @@ extern "C" __device__ double test_pow(double x, double y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_powif( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_powif(float x, int y) { return powif(x, y); } @@ -4332,11 +3247,6 @@ extern "C" __device__ float test_powif(float x, int y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_powi( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_powi(double x, int y) { return powi(x, y); } @@ -4356,11 +3266,6 @@ extern "C" __device__ double test_powi(double x, int y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rcbrtf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_rcbrtf(float x) { return rcbrtf(x); } @@ -4380,11 +3285,6 @@ extern "C" __device__ float test_rcbrtf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rcbrt( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_rcbrt(double x) { return rcbrt(x); } @@ -4404,11 +3304,6 @@ extern "C" __device__ double test_rcbrt(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_remainderf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_remainderf(float x, float y) { return remainderf(x, y); } @@ -4428,11 +3323,6 @@ extern "C" __device__ float test_remainderf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_remainder( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_remainder(double x, double y) { return remainder(x, y); } @@ -4467,17 +3357,6 @@ extern "C" __device__ double test_remainder(double x, double y) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_remquof( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA13]] -// AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA13]] -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_remquof(float x, float y, int* z) { return remquof(x, y, z); } @@ -4512,17 +3391,6 @@ extern "C" __device__ float test_remquof(float x, float y, int* z) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_remquo( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA13]] -// AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA13]] -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_remquo(double x, double y, int* z) { return remquo(x, y, z); } @@ -4542,11 +3410,6 @@ extern "C" __device__ double test_remquo(double x, double y, int* z) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rhypotf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_rhypotf(float x, float y) { return rhypotf(x, y); } @@ -4566,11 +3429,6 @@ extern "C" __device__ float test_rhypotf(float x, float y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rhypot( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_rhypot(double x, double y) { return rhypot(x, y); } @@ -4590,11 +3448,6 @@ extern "C" __device__ double test_rhypot(double x, double y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.rint.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_rintf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.rint.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_rintf(float x) { return rintf(x); } @@ -4614,11 +3467,6 @@ extern "C" __device__ float test_rintf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.rint.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_rint( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.rint.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_rint(double x) { return rint(x); } @@ -4683,26 +3531,6 @@ extern "C" __device__ double test_rint(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rnormf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL6RNORMFIPKF_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// AMDGCNSPIRV: while.body.i: -// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi float [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] -// AMDGCNSPIRV-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] -// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 4 -// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] -// AMDGCNSPIRV: _ZL6rnormfiPKf.exit: -// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_rnormf(int x, const float* y) { return rnormf(x, y); } @@ -4767,26 +3595,6 @@ extern "C" __device__ float test_rnormf(int x, const float* y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rnorm( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I1:%.*]] = icmp eq i32 [[X:%.*]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I1]], label [[_ZL5RNORMIPKD_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -// AMDGCNSPIRV: while.body.i: -// AMDGCNSPIRV-NEXT: [[__R_0_I4:%.*]] = phi double [ [[ADD_I:%.*]], [[WHILE_BODY_I]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -// AMDGCNSPIRV-NEXT: [[__A_ADDR_0_I3:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ [[Y:%.*]], [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[__DIM_ADDR_0_I2:%.*]] = phi i32 [ [[DEC_I:%.*]], [[WHILE_BODY_I]] ], [ [[X]], [[ENTRY]] ] -// AMDGCNSPIRV-NEXT: [[DEC_I]] = add nsw i32 [[__DIM_ADDR_0_I2]], -1 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] -// AMDGCNSPIRV-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] -// AMDGCNSPIRV-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 8 -// AMDGCNSPIRV-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 -// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]] -// AMDGCNSPIRV: _ZL5rnormiPKd.exit: -// AMDGCNSPIRV-NEXT: [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_rnorm(int x, const double* y) { return rnorm(x, y); } @@ -4806,11 +3614,6 @@ extern "C" __device__ double test_rnorm(int x, const double* y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rnorm3df( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_rnorm3df(float x, float y, float z) { return rnorm3df(x, y, z); } @@ -4830,11 +3633,6 @@ extern "C" __device__ float test_rnorm3df(float x, float y, float z) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rnorm3d( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_rnorm3d(double x, double y, double z) { return rnorm3d(x, y, z); } @@ -4854,11 +3652,6 @@ extern "C" __device__ double test_rnorm3d(double x, double y, double z) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rnorm4df( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_rnorm4df(float x, float y, float z, float w) { return rnorm4df(x, y, z, w); } @@ -4878,11 +3671,6 @@ extern "C" __device__ float test_rnorm4df(float x, float y, float z, float w) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rnorm4d( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_rnorm4d(double x, double y, double z, double w) { return rnorm4d(x, y, z, w); } @@ -4902,11 +3690,6 @@ extern "C" __device__ double test_rnorm4d(double x, double y, double z, double w // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.round.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_roundf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.round.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_roundf(float x) { return roundf(x); } @@ -4926,11 +3709,6 @@ extern "C" __device__ float test_roundf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.round.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_round( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.round.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_round(double x) { return round(x); } @@ -4950,11 +3728,6 @@ extern "C" __device__ double test_round(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rsqrtf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_rsqrtf(float x) { return rsqrtf(x); } @@ -4974,11 +3747,6 @@ extern "C" __device__ float test_rsqrtf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_rsqrt( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_rsqrt(double x) { return rsqrt(x); } @@ -5028,21 +3796,6 @@ extern "C" __device__ double test_rsqrt(double x) { // APPROX-NEXT: [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ] // APPROX-NEXT: ret float [[COND_I]] // -// AMDGCNSPIRV-LABEL: @test_scalblnf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]] -// AMDGCNSPIRV: cond.true.i: -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]]) -// AMDGCNSPIRV-NEXT: br label [[_ZL8SCALBLNFFL_EXIT:%.*]] -// AMDGCNSPIRV: cond.false.i: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_scalb_f32(float noundef [[X]], float noundef 0x43E0000000000000) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: br label [[_ZL8SCALBLNFFL_EXIT]] -// AMDGCNSPIRV: _ZL8scalblnffl.exit: -// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ] -// AMDGCNSPIRV-NEXT: ret float [[COND_I]] -// extern "C" __device__ float test_scalblnf(float x, long int y) { return scalblnf(x, y); } @@ -5092,21 +3845,6 @@ extern "C" __device__ float test_scalblnf(float x, long int y) { // APPROX-NEXT: [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ] // APPROX-NEXT: ret double [[COND_I]] // -// AMDGCNSPIRV-LABEL: @test_scalbln( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807 -// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]] -// AMDGCNSPIRV: cond.true.i: -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]]) -// AMDGCNSPIRV-NEXT: br label [[_ZL7SCALBLNDL_EXIT:%.*]] -// AMDGCNSPIRV: cond.false.i: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) double @__ocml_scalb_f64(double noundef [[X]], double noundef 0x43E0000000000000) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: br label [[_ZL7SCALBLNDL_EXIT]] -// AMDGCNSPIRV: _ZL7scalblndl.exit: -// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ] -// AMDGCNSPIRV-NEXT: ret double [[COND_I]] -// extern "C" __device__ double test_scalbln(double x, long int y) { return scalbln(x, y); } @@ -5126,11 +3864,6 @@ extern "C" __device__ double test_scalbln(double x, long int y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_scalbnf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_scalbnf(float x, int y) { return scalbnf(x, y); } @@ -5150,11 +3883,6 @@ extern "C" __device__ float test_scalbnf(float x, int y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_scalbn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_scalbn(double x, int y) { return scalbn(x, y); } @@ -5165,12 +3893,6 @@ extern "C" __device__ double test_scalbn(double x, int y) { // CHECK-NEXT: [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31 // CHECK-NEXT: ret i32 [[DOTLOBIT]] // -// AMDGCNSPIRV-LABEL: @test___signbitf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = bitcast float [[X:%.*]] to i32 -// AMDGCNSPIRV-NEXT: [[DOTLOBIT:%.*]] = lshr i32 [[TMP0]], 31 -// AMDGCNSPIRV-NEXT: ret i32 [[DOTLOBIT]] -// extern "C" __device__ BOOL_TYPE test___signbitf(float x) { return __signbitf(x); } @@ -5182,13 +3904,6 @@ extern "C" __device__ BOOL_TYPE test___signbitf(float x) { // CHECK-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32 // CHECK-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: @test___signbit( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = bitcast double [[X:%.*]] to i64 -// AMDGCNSPIRV-NEXT: [[DOTLOBIT:%.*]] = lshr i64 [[TMP0]], 63 -// AMDGCNSPIRV-NEXT: [[CONV:%.*]] = trunc nuw nsw i64 [[DOTLOBIT]] to i32 -// AMDGCNSPIRV-NEXT: ret i32 [[CONV]] -// extern "C" __device__ BOOL_TYPE test___signbit(double x) { return __signbit(x); } @@ -5226,18 +3941,6 @@ extern "C" __device__ BOOL_TYPE test___signbit(double x) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret void // -// AMDGCNSPIRV-LABEL: @test_sincosf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: ret void -// extern "C" __device__ void test_sincosf(float x, float *y, float *z) { sincosf(x, y, z); } @@ -5275,18 +3978,6 @@ extern "C" __device__ void test_sincosf(float x, float *y, float *z) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret void // -// AMDGCNSPIRV-LABEL: @test_sincos( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: store double [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: ret void -// extern "C" __device__ void test_sincos(double x, double *y, double *z) { sincos(x, y, z); } @@ -5324,18 +4015,6 @@ extern "C" __device__ void test_sincos(double x, double *y, double *z) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret void // -// AMDGCNSPIRV-LABEL: @test_sincospif( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: ret void -// extern "C" __device__ void test_sincospif(float x, float *y, float *z) { sincospif(x, y, z); } @@ -5373,18 +4052,6 @@ extern "C" __device__ void test_sincospif(float x, float *y, float *z) { // APPROX-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]] // APPROX-NEXT: ret void // -// AMDGCNSPIRV-LABEL: @test_sincospi( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: store double [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] -// AMDGCNSPIRV-NEXT: ret void -// extern "C" __device__ void test_sincospi(double x, double *y, double *z) { sincospi(x, y, z); } @@ -5404,11 +4071,6 @@ extern "C" __device__ void test_sincospi(double x, double *y, double *z) { // APPROX-NEXT: [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I1]] // -// AMDGCNSPIRV-LABEL: @test_sinf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_sinf(float x) { return sinf(x); } @@ -5428,11 +4090,6 @@ extern "C" __device__ float test_sinf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_sin( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_sin(double x) { return sin(x); } @@ -5452,11 +4109,6 @@ extern "C" __device__ double test_sin(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_sinpif( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_sinpif(float x) { return sinpif(x); } @@ -5476,11 +4128,6 @@ extern "C" __device__ float test_sinpif(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_sinpi( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_sinpi(double x) { return sinpi(x); } @@ -5500,11 +4147,6 @@ extern "C" __device__ double test_sinpi(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_sqrtf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.sqrt.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_sqrtf(float x) { return sqrtf(x); } @@ -5524,11 +4166,6 @@ extern "C" __device__ float test_sqrtf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_sqrt( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_sqrt(double x) { return sqrt(x); } @@ -5548,11 +4185,6 @@ extern "C" __device__ double test_sqrt(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_tanf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_tanf(float x) { return tanf(x); } @@ -5572,11 +4204,6 @@ extern "C" __device__ float test_tanf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_tan( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_tan(double x) { return tan(x); } @@ -5596,11 +4223,6 @@ extern "C" __device__ double test_tan(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_tanhf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_tanhf(float x) { return tanhf(x); } @@ -5620,11 +4242,6 @@ extern "C" __device__ float test_tanhf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_tanh( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_tanh(double x) { return tanh(x); } @@ -5644,11 +4261,6 @@ extern "C" __device__ double test_tanh(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_tgammaf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_tgammaf(float x) { return tgammaf(x); } @@ -5668,11 +4280,6 @@ extern "C" __device__ float test_tgammaf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_tgamma( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_tgamma(double x) { return tgamma(x); } @@ -5692,11 +4299,6 @@ extern "C" __device__ double test_tgamma(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.trunc.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_truncf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.trunc.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_truncf(float x) { return truncf(x); } @@ -5716,11 +4318,6 @@ extern "C" __device__ float test_truncf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.trunc.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_trunc( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.trunc.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_trunc(double x) { return trunc(x); } @@ -5740,11 +4337,6 @@ extern "C" __device__ double test_trunc(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_y0f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_y0f(float x) { return y0f(x); } @@ -5764,11 +4356,6 @@ extern "C" __device__ float test_y0f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_y0( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_y0(double x) { return y0(x); } @@ -5788,11 +4375,6 @@ extern "C" __device__ double test_y0(double x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_y1f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test_y1f(float x) { return y1f(x); } @@ -5812,11 +4394,6 @@ extern "C" __device__ float test_y1f(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret double [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test_y1( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret double [[CALL_I]] -// extern "C" __device__ double test_y1(double x) { return y1(x); } @@ -5920,39 +4497,6 @@ extern "C" __device__ double test_y1(double x) { // APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] // APPROX-NEXT: ret float [[RETVAL_0_I]] // -// AMDGCNSPIRV-LABEL: @test_ynf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// AMDGCNSPIRV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] -// AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: if.then.i: -// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL3YNFIF_EXIT:%.*]] -// AMDGCNSPIRV: if.then2.i: -// AMDGCNSPIRV-NEXT: [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL3YNFIF_EXIT]] -// AMDGCNSPIRV: if.end4.i: -// AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]] -// AMDGCNSPIRV: for.body.i: -// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] -// AMDGCNSPIRV-NEXT: [[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] -// AMDGCNSPIRV-NEXT: [[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] -// AMDGCNSPIRV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 -// AMDGCNSPIRV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]] -// AMDGCNSPIRV: _ZL3ynfif.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] -// AMDGCNSPIRV-NEXT: ret float [[RETVAL_0_I]] -// extern "C" __device__ float test_ynf(int x, float y) { return ynf(x, y); } @@ -6056,39 +4600,6 @@ extern "C" __device__ float test_ynf(int x, float y) { // APPROX-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] // APPROX-NEXT: ret double [[RETVAL_0_I]] // -// AMDGCNSPIRV-LABEL: @test_yn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: switch i32 [[X:%.*]], label [[IF_END4_I:%.*]] [ -// AMDGCNSPIRV-NEXT: i32 0, label [[IF_THEN_I:%.*]] -// AMDGCNSPIRV-NEXT: i32 1, label [[IF_THEN2_I:%.*]] -// AMDGCNSPIRV-NEXT: ] -// AMDGCNSPIRV: if.then.i: -// AMDGCNSPIRV-NEXT: [[CALL_I20_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL2YNID_EXIT:%.*]] -// AMDGCNSPIRV: if.then2.i: -// AMDGCNSPIRV-NEXT: [[CALL_I22_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: br label [[_ZL2YNID_EXIT]] -// AMDGCNSPIRV: if.end4.i: -// AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[CALL_I21_I:%.*]] = tail call contract spir_func noundef addrspace(4) double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1 -// AMDGCNSPIRV-NEXT: br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]] -// AMDGCNSPIRV: for.body.i: -// AMDGCNSPIRV-NEXT: [[__I_0_I4:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY_I]] ], [ 1, [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] -// AMDGCNSPIRV-NEXT: [[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], [[DIV_I]] -// AMDGCNSPIRV-NEXT: [[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] -// AMDGCNSPIRV-NEXT: [[INC_I]] = add nuw nsw i32 [[__I_0_I4]], 1 -// AMDGCNSPIRV-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]] -// AMDGCNSPIRV-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP26:![0-9]+]] -// AMDGCNSPIRV: _ZL2ynid.exit: -// AMDGCNSPIRV-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ] -// AMDGCNSPIRV-NEXT: ret double [[RETVAL_0_I]] -// extern "C" __device__ double test_yn(int x, double y) { return yn(x, y); } @@ -6108,11 +4619,6 @@ extern "C" __device__ double test_yn(int x, double y) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test___cosf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test___cosf(float x) { return __cosf(x); } @@ -6135,12 +4641,6 @@ extern "C" __device__ float test___cosf(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___exp10f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x400A934F00000000 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test___exp10f(float x) { return __exp10f(x); } @@ -6163,12 +4663,6 @@ extern "C" __device__ float test___exp10f(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___expf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], 0x3FF7154760000000 -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.exp2.f32(float [[MUL_I]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test___expf(float x) { return __expf(x); } @@ -6188,11 +4682,6 @@ extern "C" __device__ float test___expf(float x) { // APPROX-NEXT: [[ADD_I:%.*]] = fadd contract float [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret float [[ADD_I]] // -// AMDGCNSPIRV-LABEL: @test___fadd_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = fadd contract float [[X:%.*]], [[Y:%.*]] -// AMDGCNSPIRV-NEXT: ret float [[ADD_I]] -// extern "C" __device__ float test___fadd_rn(float x, float y) { return __fadd_rn(x, y); } @@ -6212,11 +4701,6 @@ extern "C" __device__ float test___fadd_rn(float x, float y) { // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret float [[DIV_I]] // -// AMDGCNSPIRV-LABEL: @test___fdividef( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float [[X:%.*]], [[Y:%.*]] -// AMDGCNSPIRV-NEXT: ret float [[DIV_I]] -// extern "C" __device__ float test___fdividef(float x, float y) { return __fdividef(x, y); } @@ -6236,11 +4720,6 @@ extern "C" __device__ float test___fdividef(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test__fmaf_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test__fmaf_rn(float x, float y, float z) { return __fmaf_rn(x, y, z); } @@ -6260,11 +4739,6 @@ extern "C" __device__ float test__fmaf_rn(float x, float y, float z) { // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret float [[MUL_I]] // -// AMDGCNSPIRV-LABEL: @test___fmul_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[X:%.*]], [[Y:%.*]] -// AMDGCNSPIRV-NEXT: ret float [[MUL_I]] -// extern "C" __device__ float test___fmul_rn(float x, float y) { return __fmul_rn(x, y); } @@ -6284,11 +4758,6 @@ extern "C" __device__ float test___fmul_rn(float x, float y) { // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X:%.*]] // APPROX-NEXT: ret float [[DIV_I]] // -// AMDGCNSPIRV-LABEL: @test___frcp_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract float 1.000000e+00, [[X:%.*]] -// AMDGCNSPIRV-NEXT: ret float [[DIV_I]] -// extern "C" __device__ float test___frcp_rn(float x) { return __frcp_rn(x); } @@ -6308,11 +4777,6 @@ extern "C" __device__ float test___frcp_rn(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.rsq.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___frsqrt_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.rsq.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test___frsqrt_rn(float x) { return __frsqrt_rn(x); } @@ -6332,11 +4796,6 @@ extern "C" __device__ float test___frsqrt_rn(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR12]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test___fsqrt_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR12]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test___fsqrt_rn(float x) { return __fsqrt_rn(x); } @@ -6356,11 +4815,6 @@ extern "C" __device__ float test___fsqrt_rn(float x) { // APPROX-NEXT: [[SUB_I:%.*]] = fsub contract float [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret float [[SUB_I]] // -// AMDGCNSPIRV-LABEL: @test___fsub_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[SUB_I:%.*]] = fsub contract float [[X:%.*]], [[Y:%.*]] -// AMDGCNSPIRV-NEXT: ret float [[SUB_I]] -// extern "C" __device__ float test___fsub_rn(float x, float y) { return __fsub_rn(x, y); } @@ -6380,11 +4834,6 @@ extern "C" __device__ float test___fsub_rn(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log10.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___log10f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log10.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test___log10f(float x) { return __log10f(x); } @@ -6404,11 +4853,6 @@ extern "C" __device__ float test___log10f(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.amdgcn.log.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___log2f( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.amdgcn.log.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test___log2f(float x) { return __log2f(x); } @@ -6428,11 +4872,6 @@ extern "C" __device__ float test___log2f(float x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.log.f32(float [[X:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___logf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.log.f32(float [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test___logf(float x) { return __logf(x); } @@ -6452,11 +4891,6 @@ extern "C" __device__ float test___logf(float x) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test___powf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test___powf(float x, float y) { return __powf(x, y); } @@ -6485,14 +4919,6 @@ extern "C" __device__ float test___powf(float x, float y) { // APPROX-NEXT: [[COND5_I:%.*]] = select contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]] // APPROX-NEXT: ret float [[COND5_I]] // -// AMDGCNSPIRV-LABEL: @test___saturatef( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CMP_I:%.*]] = fcmp contract olt float [[X:%.*]], 0.000000e+00 -// AMDGCNSPIRV-NEXT: [[CMP1_I:%.*]] = fcmp contract ogt float [[X]], 1.000000e+00 -// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = select contract i1 [[CMP1_I]], float 1.000000e+00, float [[X]] -// AMDGCNSPIRV-NEXT: [[COND5_I:%.*]] = select contract i1 [[CMP_I]], float 0.000000e+00, float [[COND_I]] -// AMDGCNSPIRV-NEXT: ret float [[COND5_I]] -// extern "C" __device__ float test___saturatef(float x) { return __saturatef(x); } @@ -6521,14 +4947,6 @@ extern "C" __device__ float test___saturatef(float x) { // APPROX-NEXT: store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]] // APPROX-NEXT: ret void // -// AMDGCNSPIRV-LABEL: @test___sincosf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: [[CALL1_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: store float [[CALL1_I]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: ret void -// extern "C" __device__ void test___sincosf(float x, float *y, float *z) { __sincosf(x, y, z); } @@ -6548,11 +4966,6 @@ extern "C" __device__ void test___sincosf(float x, float *y, float *z) { // APPROX-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] // APPROX-NEXT: ret float [[CALL_I]] // -// AMDGCNSPIRV-LABEL: @test___sinf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: ret float [[CALL_I]] -// extern "C" __device__ float test___sinf(float x) { return __sinf(x); } @@ -6581,14 +4994,6 @@ extern "C" __device__ float test___sinf(float x) { // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]] // APPROX-NEXT: ret float [[MUL_I]] // -// AMDGCNSPIRV-LABEL: @test___tanf( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[CALL_I3_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[CALL_I_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]]) -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]] -// AMDGCNSPIRV-NEXT: ret float [[MUL_I]] -// extern "C" __device__ float test___tanf(float x) { return __tanf(x); } @@ -6608,11 +5013,6 @@ extern "C" __device__ float test___tanf(float x) { // APPROX-NEXT: [[ADD_I:%.*]] = fadd contract double [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret double [[ADD_I]] // -// AMDGCNSPIRV-LABEL: @test___dadd_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[ADD_I:%.*]] = fadd contract double [[X:%.*]], [[Y:%.*]] -// AMDGCNSPIRV-NEXT: ret double [[ADD_I]] -// extern "C" __device__ double test___dadd_rn(double x, double y) { return __dadd_rn(x, y); } @@ -6632,11 +5032,6 @@ extern "C" __device__ double test___dadd_rn(double x, double y) { // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract double [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret double [[DIV_I]] // -// AMDGCNSPIRV-LABEL: @test___ddiv_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double [[X:%.*]], [[Y:%.*]] -// AMDGCNSPIRV-NEXT: ret double [[DIV_I]] -// extern "C" __device__ double test___ddiv_rn(double x, double y) { return __ddiv_rn(x, y); } @@ -6656,11 +5051,6 @@ extern "C" __device__ double test___ddiv_rn(double x, double y) { // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract double [[X:%.*]], [[Y:%.*]] // APPROX-NEXT: ret double [[MUL_I]] // -// AMDGCNSPIRV-LABEL: @test___dmul_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[MUL_I:%.*]] = fmul contract double [[X:%.*]], [[Y:%.*]] -// AMDGCNSPIRV-NEXT: ret double [[MUL_I]] -// extern "C" __device__ double test___dmul_rn(double x, double y) { return __dmul_rn(x, y); } @@ -6680,11 +5070,6 @@ extern "C" __device__ double test___dmul_rn(double x, double y) { // APPROX-NEXT: [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X:%.*]] // APPROX-NEXT: ret double [[DIV_I]] // -// AMDGCNSPIRV-LABEL: @test___drcp_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[DIV_I:%.*]] = fdiv contract double 1.000000e+00, [[X:%.*]] -// AMDGCNSPIRV-NEXT: ret double [[DIV_I]] -// extern "C" __device__ double test___drcp_rn(double x) { return __drcp_rn(x); } @@ -6704,11 +5089,6 @@ extern "C" __device__ double test___drcp_rn(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[X:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test___dsqrt_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[X:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test___dsqrt_rn(double x) { return __dsqrt_rn(x); } @@ -6728,11 +5108,6 @@ extern "C" __device__ double test___dsqrt_rn(double x) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test__fma_rn( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.fma.f64(double [[X:%.*]], double [[Y:%.*]], double [[Z:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test__fma_rn(double x, double y, double z) { return __fma_rn(x, y, z); } @@ -6752,11 +5127,6 @@ extern "C" __device__ double test__fma_rn(double x, double y, double z) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_float_min( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_float_min(float x, float y) { return min(x, y); } @@ -6776,11 +5146,6 @@ extern "C" __device__ float test_float_min(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) // APPROX-NEXT: ret float [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_float_max( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret float [[TMP0]] -// extern "C" __device__ float test_float_max(float x, float y) { return max(x, y); } @@ -6800,11 +5165,6 @@ extern "C" __device__ float test_float_max(float x, float y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_double_min( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.minnum.f64(double [[X:%.*]], double [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_double_min(double x, double y) { return min(x, y); } @@ -6824,11 +5184,6 @@ extern "C" __device__ double test_double_min(double x, double y) { // APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) // APPROX-NEXT: ret double [[TMP0]] // -// AMDGCNSPIRV-LABEL: @test_double_max( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret double [[TMP0]] -// extern "C" __device__ double test_double_max(double x, double y) { return max(x, y); } @@ -6837,11 +5192,6 @@ extern "C" __device__ double test_double_max(double x, double y) { // CHECK-NEXT: [[COND_I:%.*]] = tail call noundef i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) // CHECK-NEXT: ret i32 [[COND_I]] // -// AMDGCNSPIRV-LABEL: @test_int_min( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = tail call noundef addrspace(4) i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret i32 [[COND_I]] -// extern "C" __device__ int test_int_min(int x, int y) { return min(x, y); } @@ -6851,11 +5201,6 @@ extern "C" __device__ int test_int_min(int x, int y) { // CHECK-NEXT: [[COND_I:%.*]] = tail call noundef i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) // CHECK-NEXT: ret i32 [[COND_I]] // -// AMDGCNSPIRV-LABEL: @test_int_max( -// AMDGCNSPIRV-NEXT: entry: -// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = tail call noundef addrspace(4) i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) -// AMDGCNSPIRV-NEXT: ret i32 [[COND_I]] -// extern "C" __device__ int test_int_max(int x, int y) { return max(x, y); } From 393ef36e45f37176804fd21e2519f1515c51352f Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Fri, 28 Feb 2025 10:14:31 -0800 Subject: [PATCH 117/123] [Utils][UnifyLoopExits] Avoid costly updates if nothing changed (#129179) If the ControlFlowHub did not perform any change to the control flow, there is no need to repair SSA, update the loop structure, and verify a bunch of things. This is not completely NFC though, repairSSA introduced PHI nodes with a single entry that are now missing. My code went from 400+ seconds to 1 second, since no loop required the exits to be unified, but there were many "complex" loops. --- .../llvm/Transforms/Utils/ControlFlowUtils.h | 4 ++- .../lib/Transforms/Utils/ControlFlowUtils.cpp | 6 ++-- llvm/lib/Transforms/Utils/UnifyLoopExits.cpp | 6 +++- ...vergence-divergent-i1-used-outside-loop.ll | 6 ++-- .../CodeGen/AMDGPU/local-atomicrmw-fadd.ll | 32 +++++++++---------- .../CodeGen/AMDGPU/no-fold-accvgpr-mov.ll | 2 ++ 6 files changed, 33 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h index f789c3af75c68..810fef29f4010 100644 --- a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h +++ b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h @@ -110,7 +110,9 @@ struct ControlFlowHub { Branches.emplace_back(BB, Succ0, Succ1); } - BasicBlock * + /// Return the unified loop exit block and a flag indicating if the CFG was + /// changed at all. + std::pair finalize(DomTreeUpdater *DTU, SmallVectorImpl &GuardBlocks, const StringRef Prefix, std::optional MaxControlFlowBooleans = std::nullopt); diff --git a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp index 5ba626fa213ad..0155a7ba2570b 100644 --- a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp +++ b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp @@ -270,7 +270,7 @@ static void reconnectPhis(BasicBlock *Out, BasicBlock *GuardBlock, } } -BasicBlock *ControlFlowHub::finalize( +std::pair ControlFlowHub::finalize( DomTreeUpdater *DTU, SmallVectorImpl &GuardBlocks, const StringRef Prefix, std::optional MaxControlFlowBooleans) { #ifndef NDEBUG @@ -289,7 +289,7 @@ BasicBlock *ControlFlowHub::finalize( } if (Outgoing.size() < 2) - return Outgoing.front(); + return {Outgoing.front(), false}; SmallVector Updates; if (DTU) { @@ -338,5 +338,5 @@ BasicBlock *ControlFlowHub::finalize( Inst->eraseFromParent(); } - return FirstGuardBlock; + return {FirstGuardBlock, true}; } diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index 856f3c3ed3e13..9f338dbc78cff 100644 --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -169,8 +169,12 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { SmallVector GuardBlocks; DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - BasicBlock *LoopExitBlock = CHub.finalize( + BasicBlock *LoopExitBlock; + bool ChangedCFG; + std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize( &DTU, GuardBlocks, "loop.exit", MaxBooleansInControlFlowHub.getValue()); + if (!ChangedCFG) + return false; restoreSSA(DT, L, ExitingBlocks, LoopExitBlock); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll index 91e16d91ddd15..a66c21feb1cbc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll @@ -298,7 +298,7 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: ; implicit-def: $sgpr6 -; GFX10-NEXT: v_mov_b32_e32 v4, s5 +; GFX10-NEXT: v_mov_b32_e32 v5, s5 ; GFX10-NEXT: s_branch .LBB4_2 ; GFX10-NEXT: .LBB4_1: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1 @@ -312,6 +312,7 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10-NEXT: s_cbranch_execz .LBB4_6 ; GFX10-NEXT: .LBB4_2: ; %cond.block.0 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX10-NEXT: v_mov_b32_e32 v4, v5 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v4 ; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_4 @@ -328,11 +329,12 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, v1, v4 ; GFX10-NEXT: s_mov_b32 s7, -1 +; GFX10-NEXT: ; implicit-def: $vgpr5 ; GFX10-NEXT: s_and_saveexec_b32 s8, s4 ; GFX10-NEXT: s_cbranch_execz .LBB4_1 ; GFX10-NEXT: ; %bb.5: ; %loop.cond ; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1 -; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v4 +; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v4 ; GFX10-NEXT: s_andn2_b32 s4, -1, exec_lo ; GFX10-NEXT: s_and_b32 s7, exec_lo, 0 ; GFX10-NEXT: s_or_b32 s7, s4, s7 diff --git a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll index 50b6ad9f0cb37..15f5b2f76dfc5 100644 --- a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll @@ -7646,9 +7646,9 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX7-NEXT: s_or_b64 s[8:9], s[0:1], s[8:9] ; GFX7-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX7-NEXT: s_cbranch_execnz .LBB28_2 -; GFX7-NEXT: ; %bb.3: ; %Flow23 +; GFX7-NEXT: ; %bb.3: ; %Flow22 ; GFX7-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX7-NEXT: .LBB28_4: ; %Flow24 +; GFX7-NEXT: .LBB28_4: ; %Flow23 ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX7-NEXT: s_mov_b64 s[8:9], exec ; GFX7-NEXT: v_readfirstlane_b32 s10, v1 @@ -7676,7 +7676,7 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX7-NEXT: v_mov_b32_e32 v3, v4 ; GFX7-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX7-NEXT: s_cbranch_execnz .LBB28_6 -; GFX7-NEXT: .LBB28_7: ; %Flow22 +; GFX7-NEXT: .LBB28_7: ; %Flow21 ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX7-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX7-NEXT: v_mul_f32_e32 v0, 0x42280000, v0 @@ -7725,7 +7725,7 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX7-NEXT: s_cbranch_execnz .LBB28_11 ; GFX7-NEXT: ; %bb.12: ; %Flow ; GFX7-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX7-NEXT: .LBB28_13: ; %Flow20 +; GFX7-NEXT: .LBB28_13: ; %Flow19 ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: v_readfirstlane_b32 s4, v2 @@ -7770,9 +7770,9 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX6-NEXT: s_or_b64 s[8:9], s[0:1], s[8:9] ; GFX6-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX6-NEXT: s_cbranch_execnz .LBB28_2 -; GFX6-NEXT: ; %bb.3: ; %Flow21 +; GFX6-NEXT: ; %bb.3: ; %Flow20 ; GFX6-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX6-NEXT: .LBB28_4: ; %Flow22 +; GFX6-NEXT: .LBB28_4: ; %Flow21 ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX6-NEXT: s_mov_b64 s[8:9], exec ; GFX6-NEXT: v_readfirstlane_b32 s10, v1 @@ -7800,7 +7800,7 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX6-NEXT: v_mov_b32_e32 v3, v4 ; GFX6-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX6-NEXT: s_cbranch_execnz .LBB28_6 -; GFX6-NEXT: .LBB28_7: ; %Flow20 +; GFX6-NEXT: .LBB28_7: ; %Flow19 ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x42280000, v0 @@ -7849,7 +7849,7 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX6-NEXT: s_cbranch_execnz .LBB28_11 ; GFX6-NEXT: ; %bb.12: ; %Flow ; GFX6-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX6-NEXT: .LBB28_13: ; %Flow18 +; GFX6-NEXT: .LBB28_13: ; %Flow17 ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX6-NEXT: v_readfirstlane_b32 s4, v2 @@ -8483,9 +8483,9 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX7-NEXT: s_or_b64 s[8:9], s[0:1], s[8:9] ; GFX7-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX7-NEXT: s_cbranch_execnz .LBB29_2 -; GFX7-NEXT: ; %bb.3: ; %Flow23 +; GFX7-NEXT: ; %bb.3: ; %Flow22 ; GFX7-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX7-NEXT: .LBB29_4: ; %Flow24 +; GFX7-NEXT: .LBB29_4: ; %Flow23 ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX7-NEXT: s_mov_b64 s[8:9], exec ; GFX7-NEXT: v_readfirstlane_b32 s10, v1 @@ -8513,7 +8513,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX7-NEXT: v_mov_b32_e32 v3, v4 ; GFX7-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX7-NEXT: s_cbranch_execnz .LBB29_6 -; GFX7-NEXT: .LBB29_7: ; %Flow22 +; GFX7-NEXT: .LBB29_7: ; %Flow21 ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX7-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX7-NEXT: v_mul_f32_e32 v0, 0x42280000, v0 @@ -8562,7 +8562,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX7-NEXT: s_cbranch_execnz .LBB29_11 ; GFX7-NEXT: ; %bb.12: ; %Flow ; GFX7-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX7-NEXT: .LBB29_13: ; %Flow20 +; GFX7-NEXT: .LBB29_13: ; %Flow19 ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX7-NEXT: v_readfirstlane_b32 s4, v2 @@ -8607,9 +8607,9 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX6-NEXT: s_or_b64 s[8:9], s[0:1], s[8:9] ; GFX6-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX6-NEXT: s_cbranch_execnz .LBB29_2 -; GFX6-NEXT: ; %bb.3: ; %Flow21 +; GFX6-NEXT: ; %bb.3: ; %Flow20 ; GFX6-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX6-NEXT: .LBB29_4: ; %Flow22 +; GFX6-NEXT: .LBB29_4: ; %Flow21 ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX6-NEXT: s_mov_b64 s[8:9], exec ; GFX6-NEXT: v_readfirstlane_b32 s10, v1 @@ -8637,7 +8637,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX6-NEXT: v_mov_b32_e32 v3, v4 ; GFX6-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GFX6-NEXT: s_cbranch_execnz .LBB29_6 -; GFX6-NEXT: .LBB29_7: ; %Flow20 +; GFX6-NEXT: .LBB29_7: ; %Flow19 ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x42280000, v0 @@ -8686,7 +8686,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX6-NEXT: s_cbranch_execnz .LBB29_11 ; GFX6-NEXT: ; %bb.12: ; %Flow ; GFX6-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX6-NEXT: .LBB29_13: ; %Flow18 +; GFX6-NEXT: .LBB29_13: ; %Flow17 ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX6-NEXT: v_readfirstlane_b32 s4, v2 diff --git a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll index a9b8663a48dea..dad59daaefb5f 100644 --- a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll +++ b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll @@ -39,6 +39,7 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) { ; GFX942-NEXT: s_cbranch_vccz .LBB0_1 ; GFX942-NEXT: ; %bb.3: ; GFX942-NEXT: ; implicit-def: $sgpr3 +; GFX942-NEXT: ; implicit-def: $agpr0 ; GFX942-NEXT: .LBB0_4: ; %common.ret ; GFX942-NEXT: s_endpgm ; @@ -79,6 +80,7 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) { ; GFX908-NEXT: s_cbranch_vccz .LBB0_1 ; GFX908-NEXT: ; %bb.3: ; GFX908-NEXT: ; implicit-def: $sgpr3 +; GFX908-NEXT: ; implicit-def: $agpr0 ; GFX908-NEXT: .LBB0_4: ; %common.ret ; GFX908-NEXT: s_endpgm entry: From 5f9c172fab1f4481d868d15da40a68b7745a9649 Mon Sep 17 00:00:00 2001 From: Valentyn Yukhymenko Date: Fri, 28 Feb 2025 18:27:20 +0000 Subject: [PATCH 118/123] [clang-tidy] [dataflow] Cache reference accessors for `bugprone-unchecked-optional-access` (#128437) Fixes https://github.com/llvm/llvm-project/issues/126283 Extending https://github.com/llvm/llvm-project/pull/112605 to cache const getters which return references. Fixes false positives from const reference accessors to object containing optional member --- clang-tools-extra/docs/ReleaseNotes.rst | 3 +- .../Models/UncheckedOptionalAccessModel.cpp | 16 ++ .../UncheckedOptionalAccessModelTest.cpp | 194 ++++++++++++++++++ 3 files changed, 212 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index a8d17d19fda1d..07a79d6bbe807 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -112,7 +112,8 @@ Changes in existing checks ` fixing false positives from smart pointer accessors repeated in checking ``has_value`` and accessing ``value``. The option `IgnoreSmartPointerDereference` should - no longer be needed and will be removed. + no longer be needed and will be removed. Also fixing false positive from + const reference accessors to objects containing optional member. - Improved :doc:`bugprone-unsafe-functions ` check to allow specifying diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index e1394e28cd49a..9381c5c42e566 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -580,6 +580,22 @@ void handleConstMemberCall(const CallExpr *CE, return; } + // Cache if the const method returns a reference + if (RecordLoc != nullptr && CE->isGLValue()) { + const FunctionDecl *DirectCallee = CE->getDirectCallee(); + if (DirectCallee == nullptr) + return; + + StorageLocation &Loc = + State.Lattice.getOrCreateConstMethodReturnStorageLocation( + *RecordLoc, DirectCallee, State.Env, [&](StorageLocation &Loc) { + // no-op + }); + + State.Env.setStorageLocation(*CE, Loc); + return; + } + // Cache if the const method returns a boolean or pointer type. // We may decide to cache other return types in the future. if (RecordLoc != nullptr && diff --git a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp index 19c3ff49eab27..5031e17188e17 100644 --- a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp @@ -3863,6 +3863,200 @@ TEST_P(UncheckedOptionalAccessTest, ConstBoolAccessorWithModInBetween) { )cc"); } +TEST_P(UncheckedOptionalAccessTest, + ConstRefAccessorToOptionalViaConstRefAccessorToHoldingObject) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + const A& getA() const { return a; } + + A a; + }; + + void target(B& b) { + if (b.getA().get().has_value()) { + b.getA().get().value(); + } + } + )cc"); +} + +TEST_P( + UncheckedOptionalAccessTest, + ConstRefAccessorToOptionalViaConstRefAccessorToHoldingObjectWithoutValueCheck) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + const A& getA() const { return a; } + + A a; + }; + + void target(B& b) { + b.getA().get().value(); // [[unsafe]] + } + )cc"); +} + +TEST_P(UncheckedOptionalAccessTest, + ConstRefToOptionalSavedAsTemporaryVariable) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + const A& getA() const { return a; } + + A a; + }; + + void target(B& b) { + const auto& opt = b.getA().get(); + if (opt.has_value()) { + opt.value(); + } + } + )cc"); +} + +TEST_P(UncheckedOptionalAccessTest, + ConstRefAccessorToOptionalViaAccessorToHoldingObjectByValue) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + const A copyA() const { return a; } + + A a; + }; + + void target(B& b) { + if (b.copyA().get().has_value()) { + b.copyA().get().value(); // [[unsafe]] + } + } + )cc"); +} + +TEST_P(UncheckedOptionalAccessTest, + ConstRefAccessorToOptionalViaNonConstRefAccessorToHoldingObject) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + A& getA() { return a; } + + A a; + }; + + void target(B& b) { + if (b.getA().get().has_value()) { + b.getA().get().value(); // [[unsafe]] + } + } + )cc"); +} + +TEST_P( + UncheckedOptionalAccessTest, + ConstRefAccessorToOptionalViaConstRefAccessorToHoldingObjectWithModAfterCheck) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + const A& getA() const { return a; } + + A& getA() { return a; } + + void clear() { a = A{}; } + + A a; + }; + + void target(B& b) { + // changing field A via non-const getter after const getter check + if (b.getA().get().has_value()) { + b.getA() = A{}; + b.getA().get().value(); // [[unsafe]] + } + + // calling non-const method which might change field A + if (b.getA().get().has_value()) { + b.clear(); + b.getA().get().value(); // [[unsafe]] + } + } + )cc"); +} + +TEST_P( + UncheckedOptionalAccessTest, + ConstRefAccessorToOptionalViaConstRefAccessorToHoldingObjectWithAnotherConstCallAfterCheck) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + const $ns::$optional& get() const { return x; } + + $ns::$optional x; + }; + + struct B { + const A& getA() const { return a; } + + void callWithoutChanges() const { + // no-op + } + + A a; + }; + + void target(B& b) { + if (b.getA().get().has_value()) { + b.callWithoutChanges(); // calling const method which cannot change A + b.getA().get().value(); + } + } + )cc"); +} + // FIXME: Add support for: // - constructors (copy, move) // - assignment operators (default, copy, move) From 06cf3406b38f1bc897f2345a800ffc260f8e5165 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Fri, 28 Feb 2025 10:33:32 -0800 Subject: [PATCH 119/123] [-Wunsafe-buffer-usage] Fix a potential overflow bug reported by #126334 (#129169) `MeasureTokenLength` may return an unsigned 0 representing failure in obtaining length of a token. The analysis now gives up on such cases. Otherwise, there might be issues caused by unsigned integer "overflow". --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index ff4f940a596e3..12e99143cb148 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -2364,12 +2364,13 @@ template static std::optional getEndCharLoc(const NodeTy *Node, const SourceManager &SM, const LangOptions &LangOpts) { - unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts); - SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1); - - if (Loc.isValid()) - return Loc; + if (unsigned TkLen = + Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts)) { + SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1); + if (Loc.isValid()) + return Loc; + } return std::nullopt; } From c09cf63764e8dd58af59215f139c8b0e0327462d Mon Sep 17 00:00:00 2001 From: Jerry-Ge Date: Fri, 28 Feb 2025 10:38:12 -0800 Subject: [PATCH 120/123] [mlir][tosa] Remove out_shape from transpose_conv2d (#129133) --- .../mlir/Dialect/Tosa/IR/TosaOpBase.td | 4 +--- mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td | 1 - mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 14 +++++------- mlir/test/Dialect/Tosa/invalid.mlir | 11 +--------- .../Tosa/tosa-decompose-transpose-conv.mlir | 22 ++++++++----------- mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir | 16 +++++++------- 6 files changed, 25 insertions(+), 43 deletions(-) diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td index 23692478755c6..ce17ad9362227 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td @@ -148,13 +148,11 @@ def Tosa_TransConvOpQuantInfoBuilder : OpBuilder< "::mlir::Value":$weight, "mlir::Value":$bias, "::mlir::DenseI64ArrayAttr":$outpad, "::mlir::DenseI64ArrayAttr":$stride, - "::mlir::DenseI64ArrayAttr":$outputShape, "::mlir::TypeAttr":$acc_type), [{ buildTransConvOpWithQuantInfo($_builder, $_state, outputType, input, weight, bias, - outpad, stride, - outputShape, acc_type); + outpad, stride, acc_type); }]>; // The tosa.matmul op is also intended to be generated where a fully_connected diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index 1f4f1fc6b96c9..f1a9d1fedac1b 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -408,7 +408,6 @@ def Tosa_TransposeConv2DOp : Tosa_ConvOp<"transpose_conv2d"> { Tosa_IntArrayAttr4:$out_pad, Tosa_IntArrayAttr2:$stride, - Tosa_IntArrayAttr4:$out_shape, TypeAttrOf:$acc_type, DefaultValuedOptionalAttr:$local_bound ); diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 7b50eceb081dd..54f9fa917f2e0 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -569,15 +569,15 @@ static void buildConvOpWithQuantInfo(OpBuilder &builder, OperationState &result, /// Handles tosa.transpose_conv2d which has outpad and output shape /// attributes. -static void buildTransConvOpWithQuantInfo( - OpBuilder &builder, OperationState &result, Type outputType, Value input, - Value weight, Value bias, DenseI64ArrayAttr outpad, - DenseI64ArrayAttr stride, DenseI64ArrayAttr outputShape, TypeAttr accType) { +static void +buildTransConvOpWithQuantInfo(OpBuilder &builder, OperationState &result, + Type outputType, Value input, Value weight, + Value bias, DenseI64ArrayAttr outpad, + DenseI64ArrayAttr stride, TypeAttr accType) { auto zps = createZPsAsConst(builder, input, weight); result.addOperands({input, weight, bias, zps.first, zps.second}); result.addAttribute("out_pad", outpad); result.addAttribute("stride", stride); - result.addAttribute("out_shape", outputShape); result.addAttribute("acc_type", accType); Type finalOutputType = outputType; auto quantAttr = buildConvOpQuantizationAttr(builder, input, weight); @@ -2327,9 +2327,7 @@ LogicalResult TransposeConv2DOp::inferReturnTypeComponents( MLIRContext *context, ::std::optional location, TransposeConv2DOp::Adaptor adaptor, SmallVectorImpl &inferredReturnShapes) { - // outputShape is mutable. - llvm::SmallVector outputShape = - convertToMlirShape(adaptor.getOutShape()); + llvm::SmallVector outputShape(4, ShapedType::kDynamic); int64_t inputWidth = ShapedType::kDynamic; int64_t inputHeight = ShapedType::kDynamic; diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index 123c65e1b4fcd..5b928a2489eea 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -168,7 +168,7 @@ func.func @test_depthwise_conv2d_acc_type(%arg0: tensor<1x4x4x4xi8>, %arg1: tens func.func @test_transpose_conv2d(%arg0: tensor<1x32x32x8xi8>, %arg1: tensor<16x1x1x8xi8>, %arg2: tensor<16xi8>) -> tensor<1x32x32x16xi8> { %zp = "tosa.const"() {value = dense<0> : tensor<1xi8>} : () -> tensor<1xi8> // expected-error@+1 {{'tosa.transpose_conv2d' op accumulator type for i8 tensor is not i32}} - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %zp, %zp {acc_type = f16, out_pad = array, out_shape = array, stride = array} : (tensor<1x32x32x8xi8>, tensor<16x1x1x8xi8>, tensor<16xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<1x32x32x16xi8> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %zp, %zp {acc_type = f16, out_pad = array, stride = array} : (tensor<1x32x32x8xi8>, tensor<16x1x1x8xi8>, tensor<16xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<1x32x32x16xi8> return %0 : tensor<1x32x32x16xi8> } @@ -741,15 +741,6 @@ func.func @test_table_io_shape_mismatch(%arg0: tensor, %arg1: tensor<6 // ----- -// CHECK-LABEL: test_transpose_conv2d_invalid_outshape -func.func @test_transpose_conv2d_invalid_outshape(%arg0: tensor<1x32x32x8xf32>, %arg1: tensor<16x1x1x8xf32>, %arg2: tensor<16xf32>) -> tensor<1x32x32x16xf32> { - // expected-error@+1 {{'tosa.transpose_conv2d' op attribute 'out_shape' failed to satisfy constraint: i64 dense array attribute with exactly 4 elements}} - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2 {out_pad = array, out_shape = array, stride = array} : (tensor<1x32x32x8xf32>, tensor<16x1x1x8xf32>, tensor<16xf32>) -> tensor<1x32x32x16xf32> - return %0 : tensor<1x32x32x16xf32> -} - -// ----- - // CHECK-LABEL: test_mul_type_mismatch func.func @test_mul_type_mismatch(%arg0: tensor<13x21x3xf32>, %arg1: tensor<13x1x3xf16>) -> tensor<13x21x3xf32> { %shift = "tosa.const"() {value = dense<0> : tensor<1xi8>} : () -> tensor<1xi8> diff --git a/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir b/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir index bb3c16cf52d63..0167bf10ed0ae 100644 --- a/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir +++ b/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir @@ -32,19 +32,16 @@ func.func @transpose_conv2d_quantized(%arg0: tensor<2x16x14x3xi8>, %arg1: tensor // CHECK-LABEL: @transpose_conv2d_quantized_padded func.func @transpose_conv2d_quantized_padded(%arg0: tensor<2x16x14x3xi8>, %arg1: tensor<5x3x6x3xi8>, %arg2: tensor<5xi32>) -> (tensor<2x21x26x5xi32>) { - // CHECK-DAG: %[[INPUT_ZP:.+]] = "tosa.const"() <{value = dense<-22> : tensor<1xi8>} - // CHECK-DAG: %[[WEIGHT_ZP:.+]] = "tosa.const"() <{value = dense<42> : tensor<1xi8>} - // CHECK-DAG: %[[REV0:.+]] = tosa.reverse %2 {axis = 2 : i32} - // CHECK-DAG: %[[REV1:.+]] = tosa.reverse %arg1 {axis = 1 : i32} - // CHECK: tosa.conv2d %arg0, %3, %arg2, %[[INPUT_ZP]], %[[WEIGHT_ZP]] - // CHECK-SAME: dilation = array, pad = array, - // CHECK-SAME: stride = array} - %input_zp = "tosa.const"() {value = dense<-22> : tensor<1xi8>} : () -> tensor<1xi8> - %weight_zp = "tosa.const"() {value = dense<42> : tensor<1xi8>} : () -> tensor<1xi8> + // CHECK-DAG: %[[INPUT_ZP:.+]] = "tosa.const"() <{value = dense<-22> : tensor<1xi8>}> : () -> tensor<1xi8> + // CHECK-DAG: %[[WEIGHT_ZP:.+]] = "tosa.const"() <{value = dense<42> : tensor<1xi8>}> : () -> tensor<1xi8> + // CHECK-DAG: %[[REV0:.+]] = tosa.reverse %arg1 {axis = 1 : i32} + // CHECK-DAG: %[[REV1:.+]] = tosa.reverse %[[REV0]] {axis = 2 : i32} + // CHECK: tosa.conv2d %arg0, %[[REV1]], %arg2, %[[INPUT_ZP]], %[[WEIGHT_ZP]] {acc_type = i32, dilation = array, pad = array, stride = array} + %input_zp = "tosa.const"() <{value = dense<-22> : tensor<1xi8>}> : () -> tensor<1xi8> + %weight_zp = "tosa.const"() <{value = dense<42> : tensor<1xi8>}> : () -> tensor<1xi8> %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %input_zp, %weight_zp { acc_type = i32, out_pad = array, - out_shape = array, stride = array} : (tensor<2x16x14x3xi8>, tensor<5x3x6x3xi8>, tensor<5xi32>, tensor<1xi8>, tensor<1xi8>) -> tensor<2x21x26x5xi32> return %0 : tensor<2x21x26x5xi32> } @@ -160,12 +157,11 @@ func.func @transpose_conv2d_strided_overpad(%arg0 : tensor<1x16x1x1xi8>, %arg1 : // CHECK: %[[PAD_RESULT:.+]] = tosa.pad %[[RESHAPE_RESULT_1]], %[[RESULT_PAD]] // CHECK: %[[RESHAPE_ARG2:.+]] = tosa.reshape %arg2, %[[CONST10]] // CHECK: %[[ADD:.+]] = tosa.add %[[PAD_RESULT]], %[[RESHAPE_ARG2]] - %input_zp = "tosa.const"() {value = dense<-103> : tensor<1xi8>} : () -> tensor<1xi8> - %weight_zp = "tosa.const"() {value = dense<93> : tensor<1xi8>} : () -> tensor<1xi8> + %input_zp = "tosa.const"() <{value = dense<-103> : tensor<1xi8>}> : () -> tensor<1xi8> + %weight_zp = "tosa.const"() <{value = dense<93> : tensor<1xi8>}> : () -> tensor<1xi8> %2 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %input_zp, %weight_zp { acc_type = i32, out_pad = array, - out_shape = array, stride = array} : (tensor<1x16x1x1xi8>, tensor<1x2x1x1xi8>, tensor<1xi32>, tensor<1xi8>, tensor<1xi8>) -> tensor<1x19x2x1xi32> "func.return" (%2) : (tensor<1x19x2x1xi32>) -> () diff --git a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir index b87e9a78bf144..8a3dbfe17d686 100644 --- a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir +++ b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir @@ -907,7 +907,7 @@ func.func @depthwise_conv2d_strided(%arg0: tensor<1x13x14x1xf32>, %arg1: tensor< // CHECK-LABEL: @transpose_conv2d_out_shape func.func @transpose_conv2d_out_shape(%arg0: tensor<2x?x?x3xf32>, %arg1: tensor<5x3x6x3xf32>, %arg2: tensor<5xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<2x8x9x5xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<2x?x?x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x8x9x5xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<2x?x?x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x8x9x5xf32> return } @@ -916,7 +916,7 @@ func.func @transpose_conv2d_out_shape(%arg0: tensor<2x?x?x3xf32>, %arg1: tensor< // CHECK-LABEL: @transpose_conv2d_static func.func @transpose_conv2d_static(%arg0: tensor<2x16x14x3xf32>, %arg1: tensor<5x3x6x3xf32>, %arg2: tensor<5xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<2x18x19x5xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<2x16x14x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x?x?x5xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<2x16x14x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x?x?x5xf32> return } @@ -925,7 +925,7 @@ func.func @transpose_conv2d_static(%arg0: tensor<2x16x14x3xf32>, %arg1: tensor<5 // CHECK-LABEL: @transpose_conv2d_static_strided func.func @transpose_conv2d_static_strided(%arg0: tensor<2x16x14x3xf32>, %arg1: tensor<5x3x6x3xf32>, %arg2: tensor<5xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<2x33x45x5xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<2x16x14x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x?x?x5xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<2x16x14x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x?x?x5xf32> return } @@ -934,7 +934,7 @@ func.func @transpose_conv2d_static_strided(%arg0: tensor<2x16x14x3xf32>, %arg1: // CHECK-LABEL: @transpose_conv2d_dynamic_input func.func @transpose_conv2d_dynamic_input(%arg0: tensor, %arg1: tensor<5x3x6x3xf32>, %arg2: tensor<5xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor return } @@ -943,7 +943,7 @@ func.func @transpose_conv2d_dynamic_input(%arg0: tensor, %arg1: ten // CHECK-LABEL: @transpose_conv2d_dynamic_weights func.func @transpose_conv2d_dynamic_weights(%arg0: tensor<2x6x4x3xf32>, %arg1: tensor, %arg2: tensor<5xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<2x?x?x5xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<2x6x4x3xf32>, tensor, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x?x?x5xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<2x6x4x3xf32>, tensor, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x?x?x5xf32> return } @@ -952,7 +952,7 @@ func.func @transpose_conv2d_dynamic_weights(%arg0: tensor<2x6x4x3xf32>, %arg1: t // CHECK-LABEL: @transpose_conv2d_dynamic_bias func.func @transpose_conv2d_dynamic_bias(%arg0: tensor<2x6x4x3xf32>, %arg1: tensor<5x3x6x3xf32>, %arg2: tensor, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<2x8x9x5xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<2x6x4x3xf32>, tensor<5x3x6x3xf32>, tensor, tensor<1xf32>, tensor<1xf32>) -> tensor<2x8x9x5xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<2x6x4x3xf32>, tensor<5x3x6x3xf32>, tensor, tensor<1xf32>, tensor<1xf32>) -> tensor<2x8x9x5xf32> return } @@ -961,14 +961,14 @@ func.func @transpose_conv2d_dynamic_bias(%arg0: tensor<2x6x4x3xf32>, %arg1: tens // CHECK-LABEL: @transpose_conv2d_padded func.func @transpose_conv2d_padded(%arg0: tensor<2x9x11x3xf32>, %arg1: tensor<5x3x6x3xf32>, %arg2: tensor<5xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<2x10x13x5xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<2x9x11x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x10x13x5xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<2x9x11x3xf32>, tensor<5x3x6x3xf32>, tensor<5xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x10x13x5xf32> return } // CHECK-LABEL: @transpose_conv2d_strided func.func @transpose_conv2d_strided(%arg0: tensor<1x5x7x1xf32>, %arg1: tensor<1x1x1x1xf32>, %arg2: tensor<1xf32>, %arg3: tensor<1xf32>, %arg4: tensor<1xf32>) { // CHECK: -> tensor<1x13x13x1xf32> - %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, out_shape = array, stride = array} : (tensor<1x5x7x1xf32>, tensor<1x1x1x1xf32>, tensor<1xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x13x13x1xf32> + %0 = tosa.transpose_conv2d %arg0, %arg1, %arg2, %arg3, %arg4 {acc_type = f32, out_pad = array, stride = array} : (tensor<1x5x7x1xf32>, tensor<1x1x1x1xf32>, tensor<1xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x13x13x1xf32> return } From 4338e36927c5ccde9cbcbf8d260a54c42e97ae97 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Fri, 28 Feb 2025 10:46:17 -0800 Subject: [PATCH 121/123] [MLIR][LLVMIR] Add support for empty global ctor/dtor lists (#128969) LLVM IR emitted in from C++ may contain `@llvm.global_ctors = appending global [0 x { i32, ptr, ptr }] zeroinitializer`. Before this PR, if we try to roundtrip code like this from the importer, we'll end up with nothing in place. Note that `llvm::appendToGlobalCtors` ignores empty lists and this PR uses the same approach as `llvm-as`, which doesn't use the utilities from `llvm/lib/Transforms/Utils/ModuleUtils.cpp` in order to build this - it calls into creating a global variable from scratch. --- mlir/lib/Target/LLVMIR/ModuleImport.cpp | 16 ++++++-- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 39 ++++++++++++++----- mlir/test/Dialect/LLVMIR/global.mlir | 8 ++++ .../Target/LLVMIR/Import/global-variables.ll | 8 ++++ mlir/test/Target/LLVMIR/llvmir.mlir | 8 ++++ 5 files changed, 66 insertions(+), 13 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index 8445e609c2244..7ea82f61fadbb 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -1071,11 +1071,21 @@ LogicalResult ModuleImport::convertGlobalCtorsAndDtors(llvm::GlobalVariable *globalVar) { if (!globalVar->hasInitializer() || !globalVar->hasAppendingLinkage()) return failure(); - auto *initializer = - dyn_cast(globalVar->getInitializer()); - if (!initializer) + llvm::Constant *initializer = globalVar->getInitializer(); + + bool knownInit = isa(initializer) || + isa(initializer); + if (!knownInit) return failure(); + // ConstantAggregateZero does not engage with the operand initialization + // in the loop that follows - there should be no operands. This implies + // empty ctor/dtor lists. + if (auto *caz = dyn_cast(initializer)) { + if (caz->getElementCount().getFixedValue() != 0) + return failure(); + } + SmallVector funcs; SmallVector priorities; for (llvm::Value *operand : initializer->operands()) { diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 5cd841ee2df91..eda6b51ff45ea 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -1258,16 +1258,35 @@ LogicalResult ModuleTranslation::convertGlobalsAndAliases() { auto dtorOp = dyn_cast(op); if (!ctorOp && !dtorOp) continue; - auto range = ctorOp ? llvm::zip(ctorOp.getCtors(), ctorOp.getPriorities()) - : llvm::zip(dtorOp.getDtors(), dtorOp.getPriorities()); - auto appendGlobalFn = - ctorOp ? llvm::appendToGlobalCtors : llvm::appendToGlobalDtors; - for (auto symbolAndPriority : range) { - llvm::Function *f = lookupFunction( - cast(std::get<0>(symbolAndPriority)).getValue()); - appendGlobalFn(*llvmModule, f, - cast(std::get<1>(symbolAndPriority)).getInt(), - /*Data=*/nullptr); + + // The empty / zero initialized version of llvm.global_(c|d)tors cannot be + // handled by appendGlobalFn logic below, which just ignores empty (c|d)tor + // lists. Make sure it gets emitted. + if ((ctorOp && ctorOp.getCtors().empty()) || + (dtorOp && dtorOp.getDtors().empty())) { + llvm::IRBuilder builder( + llvmModule->getContext(), + llvm::TargetFolder(llvmModule->getDataLayout())); + llvm::Type *eltTy = llvm::StructType::get( + builder.getInt32Ty(), builder.getPtrTy(), builder.getPtrTy()); + llvm::ArrayType *at = llvm::ArrayType::get(eltTy, 0); + llvm::Constant *zeroInit = llvm::Constant::getNullValue(at); + (void)new llvm::GlobalVariable( + *llvmModule, zeroInit->getType(), false, + llvm::GlobalValue::AppendingLinkage, zeroInit, + ctorOp ? "llvm.global_ctors" : "llvm.global_dtors"); + } else { + auto range = ctorOp + ? llvm::zip(ctorOp.getCtors(), ctorOp.getPriorities()) + : llvm::zip(dtorOp.getDtors(), dtorOp.getPriorities()); + auto appendGlobalFn = + ctorOp ? llvm::appendToGlobalCtors : llvm::appendToGlobalDtors; + for (const auto &[sym, prio] : range) { + llvm::Function *f = + lookupFunction(cast(sym).getValue()); + appendGlobalFn(*llvmModule, f, cast(prio).getInt(), + /*Data=*/nullptr); + } } } diff --git a/mlir/test/Dialect/LLVMIR/global.mlir b/mlir/test/Dialect/LLVMIR/global.mlir index 79d1cafabfbed..bd3584de9a405 100644 --- a/mlir/test/Dialect/LLVMIR/global.mlir +++ b/mlir/test/Dialect/LLVMIR/global.mlir @@ -233,6 +233,14 @@ llvm.mlir.global_ctors { ctors = [@ctor], priorities = [0 : i32]} // ----- +// CHECK: llvm.mlir.global_ctors {ctors = [], priorities = []} +llvm.mlir.global_ctors {ctors = [], priorities = []} + +// CHECK: llvm.mlir.global_dtors {dtors = [], priorities = []} +llvm.mlir.global_dtors {dtors = [], priorities = []} + +// ----- + llvm.func @dtor() { llvm.return } diff --git a/mlir/test/Target/LLVMIR/Import/global-variables.ll b/mlir/test/Target/LLVMIR/Import/global-variables.ll index fbeda4cd42af8..b809c93d772f5 100644 --- a/mlir/test/Target/LLVMIR/Import/global-variables.ll +++ b/mlir/test/Target/LLVMIR/Import/global-variables.ll @@ -256,6 +256,14 @@ define void @bar() { ; // ----- +; CHECK: llvm.mlir.global_ctors {ctors = [], priorities = []} +@llvm.global_ctors = appending global [0 x { i32, ptr, ptr }] zeroinitializer + +; CHECK: llvm.mlir.global_dtors {dtors = [], priorities = []} +@llvm.global_dtors = appending global [0 x { i32, ptr, ptr }] zeroinitializer + +; // ----- + ; Visibility attribute. ; CHECK: llvm.mlir.global external hidden constant @hidden("string") diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index 7f9a3ba79d724..db2e08742dbca 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -1859,6 +1859,14 @@ llvm.func @foo() { // ----- +// CHECK: @llvm.global_ctors = appending global [0 x { i32, ptr, ptr }] zeroinitializer +llvm.mlir.global_ctors {ctors = [], priorities = []} + +// CHECK: @llvm.global_dtors = appending global [0 x { i32, ptr, ptr }] zeroinitializer +llvm.mlir.global_dtors {dtors = [], priorities = []} + +// ----- + // CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @foo, ptr null }] llvm.mlir.global_dtors { dtors = [@foo], priorities = [0 : i32]} From 140a46b849267647a685fab19f9e77e790418ae3 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 28 Feb 2025 10:49:24 -0800 Subject: [PATCH 122/123] [lldb-dap] Adding server mode support to lldb-dap VSCode extension. (#128957) This adds support for launching lldb-dap in server mode. The extension will start lldb-dap in server mode on-demand and retain the server until the VSCode window is closed (when the extension context is disposed). While running in server mode, launch performance for binaries is greatly improved by improving caching between debug sessions. For example, on my local M1 Max laptop it takes ~5s to attach for the first attach to an iOS Simulator process and ~0.5s to attach each time after the first. --- lldb/tools/lldb-dap/package.json | 8 +- .../lldb-dap/src-ts/debug-adapter-factory.ts | 86 +++++++++++++------ lldb/tools/lldb-dap/src-ts/extension.ts | 9 +- 3 files changed, 73 insertions(+), 30 deletions(-) diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json index 31d808eda4c35..cd450a614b3f7 100644 --- a/lldb/tools/lldb-dap/package.json +++ b/lldb/tools/lldb-dap/package.json @@ -88,6 +88,12 @@ "additionalProperties": { "type": "string" } + }, + "lldb-dap.serverMode": { + "scope": "resource", + "type": "boolean", + "markdownDescription": "Run lldb-dap in server mode.\n\nWhen enabled, lldb-dap will start a background server that will be reused between debug sessions. This allows caching of debug symbols between sessions and improves launch performance.", + "default": false } } }, @@ -543,4 +549,4 @@ } ] } -} +} \ No newline at end of file diff --git a/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts b/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts index 36107336ebc4d..1f76fe31b00ad 100644 --- a/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts +++ b/lldb/tools/lldb-dap/src-ts/debug-adapter-factory.ts @@ -4,6 +4,8 @@ import * as vscode from "vscode"; import * as child_process from "child_process"; import * as fs from "node:fs/promises"; +const exec = util.promisify(child_process.execFile); + export async function isExecutable(path: string): Promise { try { await fs.access(path, fs.constants.X_OK); @@ -16,7 +18,6 @@ export async function isExecutable(path: string): Promise { async function findWithXcrun(executable: string): Promise { if (process.platform === "darwin") { try { - const exec = util.promisify(child_process.execFile); let { stdout, stderr } = await exec("/usr/bin/xcrun", [ "-find", executable, @@ -24,7 +25,7 @@ async function findWithXcrun(executable: string): Promise { if (stdout) { return stdout.toString().trimEnd(); } - } catch (error) {} + } catch (error) { } } return undefined; } @@ -97,8 +98,15 @@ async function getDAPExecutable( * depending on the session configuration. */ export class LLDBDapDescriptorFactory - implements vscode.DebugAdapterDescriptorFactory -{ + implements vscode.DebugAdapterDescriptorFactory, vscode.Disposable { + private server?: Promise<{ process: child_process.ChildProcess, host: string, port: number }>; + + dispose() { + this.server?.then(({ process }) => { + process.kill(); + }); + } + async createDebugAdapterDescriptor( session: vscode.DebugSession, executable: vscode.DebugAdapterExecutable | undefined, @@ -115,7 +123,18 @@ export class LLDBDapDescriptorFactory } const configEnvironment = config.get<{ [key: string]: string }>("environment") || {}; - const dapPath = await getDAPExecutable(session); + const dapPath = (await getDAPExecutable(session)) ?? executable?.command; + + if (!dapPath) { + LLDBDapDescriptorFactory.showLLDBDapNotFoundMessage(); + return undefined; + } + + if (!(await isExecutable(dapPath))) { + LLDBDapDescriptorFactory.showLLDBDapNotFoundMessage(dapPath); + return; + } + const dbgOptions = { env: { ...executable?.options?.env, @@ -123,33 +142,52 @@ export class LLDBDapDescriptorFactory ...env, }, }; - if (dapPath) { - if (!(await isExecutable(dapPath))) { - LLDBDapDescriptorFactory.showLLDBDapNotFoundMessage(dapPath); - return undefined; - } - return new vscode.DebugAdapterExecutable(dapPath, [], dbgOptions); - } else if (executable) { - if (!(await isExecutable(executable.command))) { - LLDBDapDescriptorFactory.showLLDBDapNotFoundMessage(executable.command); - return undefined; - } - return new vscode.DebugAdapterExecutable( - executable.command, - executable.args, - dbgOptions, - ); + const dbgArgs = executable?.args ?? []; + + const serverMode = config.get('serverMode', false); + if (serverMode) { + const { host, port } = await this.startServer(dapPath, dbgArgs, dbgOptions); + return new vscode.DebugAdapterServer(port, host); } - return undefined; + + return new vscode.DebugAdapterExecutable(dapPath, dbgArgs, dbgOptions); + } + + startServer(dapPath: string, args: string[], options: child_process.CommonSpawnOptions): Promise<{ host: string, port: number }> { + if (this.server) return this.server; + + this.server = new Promise(resolve => { + args.push( + '--connection', + 'connect://localhost:0' + ); + const server = child_process.spawn(dapPath, args, options); + server.stdout!.setEncoding('utf8').once('data', (data: string) => { + const connection = /connection:\/\/\[([^\]]+)\]:(\d+)/.exec(data); + if (connection) { + const host = connection[1]; + const port = Number(connection[2]); + resolve({ process: server, host, port }); + } + }); + server.on('exit', () => { + this.server = undefined; + }) + }); + return this.server; } /** * Shows a message box when the debug adapter's path is not found */ - static async showLLDBDapNotFoundMessage(path: string) { + static async showLLDBDapNotFoundMessage(path?: string) { + const message = + path + ? `Debug adapter path: ${path} is not a valid file.` + : "Unable to find the path to the LLDB debug adapter executable."; const openSettingsAction = "Open Settings"; const callbackValue = await vscode.window.showErrorMessage( - `Debug adapter path: ${path} is not a valid file`, + message, openSettingsAction, ); diff --git a/lldb/tools/lldb-dap/src-ts/extension.ts b/lldb/tools/lldb-dap/src-ts/extension.ts index 71fd48298f8f5..a07bcdebcb68b 100644 --- a/lldb/tools/lldb-dap/src-ts/extension.ts +++ b/lldb/tools/lldb-dap/src-ts/extension.ts @@ -1,5 +1,3 @@ -import * as path from "path"; -import * as util from "util"; import * as vscode from "vscode"; import { @@ -15,13 +13,14 @@ import { DisposableContext } from "./disposable-context"; export class LLDBDapExtension extends DisposableContext { constructor() { super(); + const factory = new LLDBDapDescriptorFactory(); + this.pushSubscription(factory); this.pushSubscription( vscode.debug.registerDebugAdapterDescriptorFactory( "lldb-dap", - new LLDBDapDescriptorFactory(), - ), + factory, + ) ); - this.pushSubscription( vscode.workspace.onDidChangeConfiguration(async (event) => { if (event.affectsConfiguration("lldb-dap.executable-path")) { From 12941c5320ce430658eb249f1bcab436fcc61f16 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 28 Feb 2025 10:53:05 -0800 Subject: [PATCH 123/123] [RISCV] Remove non-portable vsetvli instructions from llvm-mca test. NFC (#129134) Not all fractional LMULs are required to be support for all SEWs. This test previously printed a warning for these cases. --- .../tools/llvm-mca/RISCV/SiFiveP600/div.s | 194 +----------------- 1 file changed, 7 insertions(+), 187 deletions(-) diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s index c0b702ef0449b..83de52b3c2cbc 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/div.s @@ -17,8 +17,6 @@ vsetvli zero, zero, e8, m4, tu, mu vdiv.vv v8, v16, v24 vsetvli zero, zero, e8, m8, tu, mu vdiv.vv v8, v16, v24 -vsetvli zero, zero, e16, mf8, tu, mu -vdiv.vv v8, v16, v24 vsetvli zero, zero, e16, mf4, tu, mu vdiv.vv v8, v16, v24 vsetvli zero, zero, e16, mf2, tu, mu @@ -33,10 +31,6 @@ vsetvli zero, zero, e16, m4, tu, mu vdiv.vv v8, v16, v24 vsetvli zero, zero, e16, m8, tu, mu vdiv.vv v8, v16, v24 -vsetvli zero, zero, e32, mf8, tu, mu -vdiv.vv v8, v16, v24 -vsetvli zero, zero, e32, mf4, tu, mu -vdiv.vv v8, v16, v24 vsetvli zero, zero, e32, mf2, tu, mu vdiv.vv v8, v16, v24 vsetvli zero, zero, e32, m1, tu, mu @@ -49,12 +43,6 @@ vsetvli zero, zero, e32, m4, tu, mu vdiv.vv v8, v16, v24 vsetvli zero, zero, e32, m8, tu, mu vdiv.vv v8, v16, v24 -vsetvli zero, zero, e64, mf8, tu, mu -vdiv.vv v8, v16, v24 -vsetvli zero, zero, e64, mf4, tu, mu -vdiv.vv v8, v16, v24 -vsetvli zero, zero, e64, mf2, tu, mu -vdiv.vv v8, v16, v24 vsetvli zero, zero, e64, m1, tu, mu vdiv.vv v8, v16, v24 vsetvli zero, zero, e64, m1, tu, mu @@ -82,8 +70,6 @@ vsetvli zero, zero, e8, m4, tu, mu vdiv.vx v8, v16, a0 vsetvli zero, zero, e8, m8, tu, mu vdiv.vx v8, v16, a0 -vsetvli zero, zero, e16, mf8, tu, mu -vdiv.vx v8, v16, a0 vsetvli zero, zero, e16, mf4, tu, mu vdiv.vx v8, v16, a0 vsetvli zero, zero, e16, mf2, tu, mu @@ -98,10 +84,6 @@ vsetvli zero, zero, e16, m4, tu, mu vdiv.vx v8, v16, a0 vsetvli zero, zero, e16, m8, tu, mu vdiv.vx v8, v16, a0 -vsetvli zero, zero, e32, mf8, tu, mu -vdiv.vx v8, v16, a0 -vsetvli zero, zero, e32, mf4, tu, mu -vdiv.vx v8, v16, a0 vsetvli zero, zero, e32, mf2, tu, mu vdiv.vx v8, v16, a0 vsetvli zero, zero, e32, m1, tu, mu @@ -114,12 +96,6 @@ vsetvli zero, zero, e32, m4, tu, mu vdiv.vx v8, v16, a0 vsetvli zero, zero, e32, m8, tu, mu vdiv.vx v8, v16, a0 -vsetvli zero, zero, e64, mf8, tu, mu -vdiv.vx v8, v16, a0 -vsetvli zero, zero, e64, mf4, tu, mu -vdiv.vx v8, v16, a0 -vsetvli zero, zero, e64, mf2, tu, mu -vdiv.vx v8, v16, a0 vsetvli zero, zero, e64, m1, tu, mu vdiv.vx v8, v16, a0 vsetvli zero, zero, e64, m1, tu, mu @@ -147,8 +123,6 @@ vsetvli zero, zero, e8, m4, tu, mu vfdiv.vv v8, v16, v24 vsetvli zero, zero, e8, m8, tu, mu vfdiv.vv v8, v16, v24 -vsetvli zero, zero, e16, mf8, tu, mu -vfdiv.vv v8, v16, v24 vsetvli zero, zero, e16, mf4, tu, mu vfdiv.vv v8, v16, v24 vsetvli zero, zero, e16, mf2, tu, mu @@ -163,10 +137,6 @@ vsetvli zero, zero, e16, m4, tu, mu vfdiv.vv v8, v16, v24 vsetvli zero, zero, e16, m8, tu, mu vfdiv.vv v8, v16, v24 -vsetvli zero, zero, e32, mf8, tu, mu -vfdiv.vv v8, v16, v24 -vsetvli zero, zero, e32, mf4, tu, mu -vfdiv.vv v8, v16, v24 vsetvli zero, zero, e32, mf2, tu, mu vfdiv.vv v8, v16, v24 vsetvli zero, zero, e32, m1, tu, mu @@ -179,12 +149,6 @@ vsetvli zero, zero, e32, m4, tu, mu vfdiv.vv v8, v16, v24 vsetvli zero, zero, e32, m8, tu, mu vfdiv.vv v8, v16, v24 -vsetvli zero, zero, e64, mf8, tu, mu -vfdiv.vv v8, v16, v24 -vsetvli zero, zero, e64, mf4, tu, mu -vfdiv.vv v8, v16, v24 -vsetvli zero, zero, e64, mf2, tu, mu -vfdiv.vv v8, v16, v24 vsetvli zero, zero, e64, m1, tu, mu vfdiv.vv v8, v16, v24 vsetvli zero, zero, e64, m1, tu, mu @@ -212,8 +176,6 @@ vsetvli zero, zero, e8, m4, tu, mu vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e8, m8, tu, mu vfdiv.vf v8, v16, fa0 -vsetvli zero, zero, e16, mf8, tu, mu -vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e16, mf4, tu, mu vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e16, mf2, tu, mu @@ -228,10 +190,6 @@ vsetvli zero, zero, e16, m4, tu, mu vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e16, m8, tu, mu vfdiv.vf v8, v16, fa0 -vsetvli zero, zero, e32, mf8, tu, mu -vfdiv.vf v8, v16, fa0 -vsetvli zero, zero, e32, mf4, tu, mu -vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e32, mf2, tu, mu vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e32, m1, tu, mu @@ -244,12 +202,6 @@ vsetvli zero, zero, e32, m4, tu, mu vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e32, m8, tu, mu vfdiv.vf v8, v16, fa0 -vsetvli zero, zero, e64, mf8, tu, mu -vfdiv.vf v8, v16, fa0 -vsetvli zero, zero, e64, mf4, tu, mu -vfdiv.vf v8, v16, fa0 -vsetvli zero, zero, e64, mf2, tu, mu -vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e64, m1, tu, mu vfdiv.vf v8, v16, fa0 vsetvli zero, zero, e64, m1, tu, mu @@ -277,8 +229,6 @@ vsetvli zero, zero, e8, m4, tu, mu vfsqrt.v v8, v16 vsetvli zero, zero, e8, m8, tu, mu vfsqrt.v v8, v16 -vsetvli zero, zero, e16, mf8, tu, mu -vfsqrt.v v8, v16 vsetvli zero, zero, e16, mf4, tu, mu vfsqrt.v v8, v16 vsetvli zero, zero, e16, mf2, tu, mu @@ -293,10 +243,6 @@ vsetvli zero, zero, e16, m4, tu, mu vfsqrt.v v8, v16 vsetvli zero, zero, e16, m8, tu, mu vfsqrt.v v8, v16 -vsetvli zero, zero, e32, mf8, tu, mu -vfsqrt.v v8, v16 -vsetvli zero, zero, e32, mf4, tu, mu -vfsqrt.v v8, v16 vsetvli zero, zero, e32, mf2, tu, mu vfsqrt.v v8, v16 vsetvli zero, zero, e32, m1, tu, mu @@ -309,12 +255,6 @@ vsetvli zero, zero, e32, m4, tu, mu vfsqrt.v v8, v16 vsetvli zero, zero, e32, m8, tu, mu vfsqrt.v v8, v16 -vsetvli zero, zero, e64, mf8, tu, mu -vfsqrt.v v8, v16 -vsetvli zero, zero, e64, mf4, tu, mu -vfsqrt.v v8, v16 -vsetvli zero, zero, e64, mf2, tu, mu -vfsqrt.v v8, v16 vsetvli zero, zero, e64, m1, tu, mu vfsqrt.v v8, v16 vsetvli zero, zero, e64, m1, tu, mu @@ -327,14 +267,14 @@ vsetvli zero, zero, e64, m8, tu, mu vfsqrt.v v8, v16 # CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 320 -# CHECK-NEXT: Total Cycles: 14435 -# CHECK-NEXT: Total uOps: 320 +# CHECK-NEXT: Instructions: 260 +# CHECK-NEXT: Total Cycles: 10243 +# CHECK-NEXT: Total uOps: 260 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.02 -# CHECK-NEXT: IPC: 0.02 -# CHECK-NEXT: Block RThroughput: 14361.0 +# CHECK-NEXT: uOps Per Cycle: 0.03 +# CHECK-NEXT: IPC: 0.03 +# CHECK-NEXT: Block RThroughput: 10185.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -361,8 +301,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 204 204.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: 1 45 45.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu @@ -377,10 +315,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 180 180.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: 1 360 360.00 vdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 42 42.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu @@ -393,12 +327,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 168 168.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: 1 336 336.00 vdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: 1 72 72.00 vdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu @@ -425,8 +353,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 204 204.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: 1 45 45.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu @@ -441,10 +367,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 180 180.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: 1 360 360.00 vdiv.vx v8, v16, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 42 42.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu @@ -457,12 +379,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 168 168.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: 1 336 336.00 vdiv.vx v8, v16, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: 1 408 408.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: 1 72 72.00 vdiv.vx v8, v16, a0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu @@ -489,8 +405,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: 1 29 29.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu @@ -505,10 +419,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 116 116.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 25 25.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu @@ -521,12 +431,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 100 100.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: 1 200 200.00 vfdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: 1 37 37.00 vfdiv.vv v8, v16, v24 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu @@ -553,8 +457,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: 1 29 29.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu @@ -569,10 +471,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 116 116.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 25 25.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu @@ -585,12 +483,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 100 100.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: 1 200 200.00 vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: 1 232 232.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: 1 37 37.00 vfdiv.vf v8, v16, fa0 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu @@ -617,8 +509,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: 1 29 29.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, tu, mu @@ -633,10 +523,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 116 116.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: 1 25 25.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu @@ -649,12 +535,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: 1 100 100.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: 1 200 200.00 vfsqrt.v v8, v16 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 -# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: 1 232 232.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: 1 37 37.00 vfsqrt.v v8, v16 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, tu, mu @@ -686,7 +566,7 @@ vfsqrt.v v8, v16 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 160.00 - - - - - 12186.00 - 725.00 14361.00 - - +# CHECK-NEXT: - - - - 130.00 - - - - - 7290.00 - 485.00 10185.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: @@ -706,8 +586,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - 204.00 - 4.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: - - - - - - - - - - 45.00 - 1.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu @@ -722,10 +600,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - 180.00 - 4.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - 360.00 - 8.00 - - - vdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: - - - - - - - - - - 42.00 - 1.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu @@ -738,12 +612,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - 168.00 - 4.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - 336.00 - 8.00 - - - vdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - 72.00 - 1.00 - - - vdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu @@ -770,8 +638,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - 204.00 - 4.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: - - - - - - - - - - 45.00 - 1.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu @@ -786,10 +652,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - 180.00 - 4.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - 360.00 - 8.00 - - - vdiv.vx v8, v16, a0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: - - - - - - - - - - 42.00 - 1.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu @@ -802,12 +664,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - 168.00 - 4.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - 336.00 - 8.00 - - - vdiv.vx v8, v16, a0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - 408.00 - 8.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - 72.00 - 1.00 - - - vdiv.vx v8, v16, a0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu @@ -834,8 +690,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 29.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu @@ -850,10 +704,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 4.00 116.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 25.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu @@ -866,12 +716,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 4.00 100.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 200.00 - - vfdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 37.00 - - vfdiv.vv v8, v16, v24 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu @@ -898,8 +742,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 29.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu @@ -914,10 +756,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 4.00 116.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 25.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu @@ -930,12 +768,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 4.00 100.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 200.00 - - vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 37.00 - - vfdiv.vf v8, v16, fa0 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu @@ -962,8 +794,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e8, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf4, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 29.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, mf2, tu, mu @@ -978,10 +808,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 4.00 116.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e16, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, mf2, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 25.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu @@ -994,12 +820,6 @@ vfsqrt.v v8, v16 # CHECK-NEXT: - - - - - - - - - - - - 4.00 100.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m8, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 8.00 200.00 - - vfsqrt.v v8, v16 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf8, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf4, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, mf2, tu, mu -# CHECK-NEXT: - - - - - - - - - - - - 8.00 232.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - - - 1.00 37.00 - - vfsqrt.v v8, v16 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e64, m1, tu, mu